diff options
Diffstat (limited to 'drivers/net/ethernet')
513 files changed, 42237 insertions, 11235 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index fe115b7caba0..93a2d4deb27c 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -76,6 +76,7 @@ source "drivers/net/ethernet/ezchip/Kconfig" source "drivers/net/ethernet/faraday/Kconfig" source "drivers/net/ethernet/freescale/Kconfig" source "drivers/net/ethernet/fujitsu/Kconfig" +source "drivers/net/ethernet/google/Kconfig" source "drivers/net/ethernet/hisilicon/Kconfig" source "drivers/net/ethernet/hp/Kconfig" source "drivers/net/ethernet/huawei/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 7b5bf9682066..fb9155cffcff 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -39,6 +39,7 @@ obj-$(CONFIG_NET_VENDOR_EZCHIP) += ezchip/ obj-$(CONFIG_NET_VENDOR_FARADAY) += faraday/ obj-$(CONFIG_NET_VENDOR_FREESCALE) += freescale/ obj-$(CONFIG_NET_VENDOR_FUJITSU) += fujitsu/ +obj-$(CONFIG_NET_VENDOR_GOOGLE) += google/ obj-$(CONFIG_NET_VENDOR_HISILICON) += hisilicon/ obj-$(CONFIG_NET_VENDOR_HP) += hp/ obj-$(CONFIG_NET_VENDOR_HUAWEI) += huawei/ diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c index 9e06dff619c3..3434730a7699 100644 --- a/drivers/net/ethernet/allwinner/sun4i-emac.c +++ b/drivers/net/ethernet/allwinner/sun4i-emac.c @@ -224,8 +224,8 @@ static int emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) static void emac_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, DRV_NAME, sizeof(DRV_NAME)); - strlcpy(info->version, DRV_VERSION, sizeof(DRV_VERSION)); + strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_VERSION, sizeof(info->version)); strlcpy(info->bus_info, dev_name(&dev->dev), sizeof(info->bus_info)); } @@ -818,7 +818,6 @@ static int emac_probe(struct platform_device *pdev) SET_NETDEV_DEV(ndev, &pdev->dev); db = netdev_priv(ndev); - memset(db, 0, sizeof(*db)); db->dev = &pdev->dev; db->ndev = ndev; diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h index 9f80b73f90b1..d19f2ecf8e84 100644 --- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -60,6 +60,7 @@ enum ena_admin_aq_feature_id { ENA_ADMIN_MAX_QUEUES_NUM = 2, ENA_ADMIN_HW_HINTS = 3, ENA_ADMIN_LLQ = 4, + ENA_ADMIN_MAX_QUEUES_EXT = 7, ENA_ADMIN_RSS_HASH_FUNCTION = 10, ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, @@ -421,7 +422,13 @@ struct ena_admin_get_set_feature_common_desc { /* as appears in ena_admin_aq_feature_id */ u8 feature_id; - u16 reserved16; + /* The driver specifies the max feature version it supports and the + * device responds with the currently supported feature version. The + * field is zero based + */ + u8 feature_version; + + u8 reserved8; }; struct ena_admin_device_attr_feature_desc { @@ -524,6 +531,39 @@ struct ena_admin_feature_llq_desc { /* the stride control the driver selected to use */ u16 descriptors_stride_ctrl_enabled; + + /* Maximum size in bytes taken by llq entries in a single tx burst. + * Set to 0 when there is no such limit. + */ + u32 max_tx_burst_size; +}; + +struct ena_admin_queue_ext_feature_fields { + u32 max_tx_sq_num; + + u32 max_tx_cq_num; + + u32 max_rx_sq_num; + + u32 max_rx_cq_num; + + u32 max_tx_sq_depth; + + u32 max_tx_cq_depth; + + u32 max_rx_sq_depth; + + u32 max_rx_cq_depth; + + u32 max_tx_header_size; + + /* Maximum Descriptors number, including meta descriptor, allowed for + * a single Tx packet + */ + u16 max_per_packet_tx_descs; + + /* Maximum Descriptors number allowed for a single Rx packet */ + u16 max_per_packet_rx_descs; }; struct ena_admin_queue_feature_desc { @@ -832,6 +872,19 @@ struct ena_admin_get_feat_cmd { u32 raw[11]; }; +struct ena_admin_queue_ext_feature_desc { + /* version */ + u8 version; + + u8 reserved1[3]; + + union { + struct ena_admin_queue_ext_feature_fields max_queue_ext; + + u32 raw[10]; + }; +}; + struct ena_admin_get_feat_resp { struct ena_admin_acq_common_desc acq_common_desc; @@ -844,6 +897,8 @@ struct ena_admin_get_feat_resp { struct ena_admin_queue_feature_desc max_queue; + struct ena_admin_queue_ext_feature_desc max_queue_ext; + struct ena_admin_feature_aenq_desc aenq; struct ena_admin_get_feature_link_desc link; @@ -908,7 +963,9 @@ struct ena_admin_aenq_common_desc { u16 syndrom; - /* 0 : phase */ + /* 0 : phase + * 7:1 : reserved - MBZ + */ u8 flags; u8 reserved1[3]; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 7f8266b191ae..911a2e7a375a 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -91,7 +91,7 @@ struct ena_com_stats_ctx { struct ena_admin_acq_get_stats_resp get_resp; }; -static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, +static int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, struct ena_common_mem_addr *ena_addr, dma_addr_t addr) { @@ -115,7 +115,7 @@ static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue) GFP_KERNEL); if (!sq->entries) { - pr_err("memory allocation failed"); + pr_err("memory allocation failed\n"); return -ENOMEM; } @@ -137,7 +137,7 @@ static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue) GFP_KERNEL); if (!cq->entries) { - pr_err("memory allocation failed"); + pr_err("memory allocation failed\n"); return -ENOMEM; } @@ -160,7 +160,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *dev, GFP_KERNEL); if (!aenq->entries) { - pr_err("memory allocation failed"); + pr_err("memory allocation failed\n"); return -ENOMEM; } @@ -190,7 +190,7 @@ static int ena_com_admin_init_aenq(struct ena_com_dev *dev, return 0; } -static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, +static void comp_ctxt_release(struct ena_com_admin_queue *queue, struct ena_comp_ctx *comp_ctx) { comp_ctx->occupied = false; @@ -277,7 +277,7 @@ static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queu return comp_ctx; } -static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) +static int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) { size_t size = queue->q_depth * sizeof(struct ena_comp_ctx); struct ena_comp_ctx *comp_ctx; @@ -285,7 +285,7 @@ static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL); if (unlikely(!queue->comp_ctx)) { - pr_err("memory allocation failed"); + pr_err("memory allocation failed\n"); return -ENOMEM; } @@ -356,7 +356,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, } if (!io_sq->desc_addr.virt_addr) { - pr_err("memory allocation failed"); + pr_err("memory allocation failed\n"); return -ENOMEM; } } @@ -382,7 +382,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); if (!io_sq->bounce_buf_ctrl.base_buffer) { - pr_err("bounce buffer memory allocation failed"); + pr_err("bounce buffer memory allocation failed\n"); return -ENOMEM; } @@ -396,6 +396,10 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, 0x0, io_sq->llq_info.desc_list_entry_size); io_sq->llq_buf_ctrl.descs_left_in_line = io_sq->llq_info.descs_num_before_header; + + if (io_sq->llq_info.max_entries_in_tx_burst > 0) + io_sq->entries_in_tx_burst_left = + io_sq->llq_info.max_entries_in_tx_burst; } io_sq->tail = 0; @@ -436,7 +440,7 @@ static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, } if (!io_cq->cdesc_addr.virt_addr) { - pr_err("memory allocation failed"); + pr_err("memory allocation failed\n"); return -ENOMEM; } @@ -727,6 +731,9 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev, supported_feat, llq_info->descs_num_before_header); } + llq_info->max_entries_in_tx_burst = + (u16)(llq_features->max_tx_burst_size / llq_default_cfg->llq_ring_entry_size_value); + rc = ena_com_set_llq(ena_dev); if (rc) pr_err("Cannot set LLQ configuration: %d\n", rc); @@ -755,16 +762,26 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com admin_queue->stats.no_completion++; spin_unlock_irqrestore(&admin_queue->q_lock, flags); - if (comp_ctx->status == ENA_CMD_COMPLETED) - pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n", - comp_ctx->cmd_opcode); - else - pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n", + if (comp_ctx->status == ENA_CMD_COMPLETED) { + pr_err("The ena device sent a completion but the driver didn't receive a MSI-X interrupt (cmd %d), autopolling mode is %s\n", + comp_ctx->cmd_opcode, + admin_queue->auto_polling ? "ON" : "OFF"); + /* Check if fallback to polling is enabled */ + if (admin_queue->auto_polling) + admin_queue->polling = true; + } else { + pr_err("The ena device doesn't send a completion for the admin cmd %d status %d\n", comp_ctx->cmd_opcode, comp_ctx->status); - - admin_queue->running_state = false; - ret = -ETIME; - goto err; + } + /* Check if shifted to polling mode. + * This will happen if there is a completion without an interrupt + * and autopolling mode is enabled. Continuing normal execution in such case + */ + if (!admin_queue->polling) { + admin_queue->running_state = false; + ret = -ETIME; + goto err; + } } ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); @@ -822,7 +839,7 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) } if (read_resp->reg_off != offset) { - pr_err("Read failure: wrong offset provided"); + pr_err("Read failure: wrong offset provided\n"); ret = ENA_MMIO_READ_TIMEOUT; } else { ret = read_resp->reg_val; @@ -961,7 +978,8 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, struct ena_admin_get_feat_resp *get_resp, enum ena_admin_aq_feature_id feature_id, dma_addr_t control_buf_dma_addr, - u32 control_buff_size) + u32 control_buff_size, + u8 feature_ver) { struct ena_com_admin_queue *admin_queue; struct ena_admin_get_feat_cmd get_cmd; @@ -992,7 +1010,7 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, } get_cmd.control_buffer.length = control_buff_size; - + get_cmd.feat_common.feature_version = feature_ver; get_cmd.feat_common.feature_id = feature_id; ret = ena_com_execute_admin_command(admin_queue, @@ -1012,13 +1030,15 @@ static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, static int ena_com_get_feature(struct ena_com_dev *ena_dev, struct ena_admin_get_feat_resp *get_resp, - enum ena_admin_aq_feature_id feature_id) + enum ena_admin_aq_feature_id feature_id, + u8 feature_ver) { return ena_com_get_feature_ex(ena_dev, get_resp, feature_id, 0, - 0); + 0, + feature_ver); } static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) @@ -1078,7 +1098,7 @@ static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, int ret; ret = ena_com_get_feature(ena_dev, &get_resp, - ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, 0); if (unlikely(ret)) return ret; @@ -1498,7 +1518,7 @@ int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) struct ena_admin_get_feat_resp get_resp; int ret; - ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG); + ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG, 0); if (ret) { pr_info("Can't get aenq configuration\n"); return ret; @@ -1643,6 +1663,12 @@ void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) ena_dev->admin_queue.polling = polling; } +void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev, + bool polling) +{ + ena_dev->admin_queue.auto_polling = polling; +} + int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) { struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; @@ -1867,7 +1893,7 @@ void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid) int ena_com_get_link_params(struct ena_com_dev *ena_dev, struct ena_admin_get_feat_resp *resp) { - return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG); + return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG, 0); } int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, @@ -1877,7 +1903,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, int rc; rc = ena_com_get_feature(ena_dev, &get_resp, - ENA_ADMIN_DEVICE_ATTRIBUTES); + ENA_ADMIN_DEVICE_ATTRIBUTES, 0); if (rc) return rc; @@ -1885,17 +1911,34 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, sizeof(get_resp.u.dev_attr)); ena_dev->supported_features = get_resp.u.dev_attr.supported_features; - rc = ena_com_get_feature(ena_dev, &get_resp, - ENA_ADMIN_MAX_QUEUES_NUM); - if (rc) - return rc; + if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_MAX_QUEUES_EXT, + ENA_FEATURE_MAX_QUEUE_EXT_VER); + if (rc) + return rc; - memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, - sizeof(get_resp.u.max_queue)); - ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; + if (get_resp.u.max_queue_ext.version != ENA_FEATURE_MAX_QUEUE_EXT_VER) + return -EINVAL; + + memcpy(&get_feat_ctx->max_queue_ext, &get_resp.u.max_queue_ext, + sizeof(get_resp.u.max_queue_ext)); + ena_dev->tx_max_header_size = + get_resp.u.max_queue_ext.max_queue_ext.max_tx_header_size; + } else { + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_MAX_QUEUES_NUM, 0); + memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, + sizeof(get_resp.u.max_queue)); + ena_dev->tx_max_header_size = + get_resp.u.max_queue.max_header_size; + + if (rc) + return rc; + } rc = ena_com_get_feature(ena_dev, &get_resp, - ENA_ADMIN_AENQ_CONFIG); + ENA_ADMIN_AENQ_CONFIG, 0); if (rc) return rc; @@ -1903,7 +1946,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, sizeof(get_resp.u.aenq)); rc = ena_com_get_feature(ena_dev, &get_resp, - ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0); if (rc) return rc; @@ -1913,7 +1956,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, /* Driver hints isn't mandatory admin command. So in case the * command isn't supported set driver hints to 0 */ - rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS); + rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_HW_HINTS, 0); if (!rc) memcpy(&get_feat_ctx->hw_hints, &get_resp.u.hw_hints, @@ -1924,7 +1967,7 @@ int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, else return rc; - rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ); + rc = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_LLQ, 0); if (!rc) memcpy(&get_feat_ctx->llq, &get_resp.u.llq, sizeof(get_resp.u.llq)); @@ -2161,7 +2204,7 @@ int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, struct ena_admin_get_feat_resp resp; ret = ena_com_get_feature(ena_dev, &resp, - ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG, 0); if (unlikely(ret)) { pr_err("Failed to get offload capabilities %d\n", ret); return ret; @@ -2190,7 +2233,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) /* Validate hash function is supported */ ret = ena_com_get_feature(ena_dev, &get_resp, - ENA_ADMIN_RSS_HASH_FUNCTION); + ENA_ADMIN_RSS_HASH_FUNCTION, 0); if (unlikely(ret)) return ret; @@ -2250,7 +2293,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, rc = ena_com_get_feature_ex(ena_dev, &get_resp, ENA_ADMIN_RSS_HASH_FUNCTION, rss->hash_key_dma_addr, - sizeof(*rss->hash_key)); + sizeof(*rss->hash_key), 0); if (unlikely(rc)) return rc; @@ -2302,7 +2345,7 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, rc = ena_com_get_feature_ex(ena_dev, &get_resp, ENA_ADMIN_RSS_HASH_FUNCTION, rss->hash_key_dma_addr, - sizeof(*rss->hash_key)); + sizeof(*rss->hash_key), 0); if (unlikely(rc)) return rc; @@ -2327,7 +2370,7 @@ int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, rc = ena_com_get_feature_ex(ena_dev, &get_resp, ENA_ADMIN_RSS_HASH_INPUT, rss->hash_ctrl_dma_addr, - sizeof(*rss->hash_ctrl)); + sizeof(*rss->hash_ctrl), 0); if (unlikely(rc)) return rc; @@ -2563,7 +2606,7 @@ int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) rc = ena_com_get_feature_ex(ena_dev, &get_resp, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, rss->rss_ind_tbl_dma_addr, - tbl_size); + tbl_size, 0); if (unlikely(rc)) return rc; @@ -2778,7 +2821,7 @@ int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) int rc; rc = ena_com_get_feature(ena_dev, &get_resp, - ENA_ADMIN_INTERRUPT_MODERATION); + ENA_ADMIN_INTERRUPT_MODERATION, 0); if (rc) { if (rc == -EOPNOTSUPP) { @@ -2913,8 +2956,8 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, struct ena_admin_feature_llq_desc *llq_features, struct ena_llq_configurations *llq_default_cfg) { + struct ena_com_llq_info *llq_info = &ena_dev->llq_info; int rc; - int size; if (!llq_features->max_llq_num) { ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; @@ -2925,12 +2968,10 @@ int ena_com_config_dev_mode(struct ena_com_dev *ena_dev, if (rc) return rc; - /* Validate the descriptor is not too big */ - size = ena_dev->tx_max_header_size; - size += ena_dev->llq_info.descs_num_before_header * - sizeof(struct ena_eth_io_tx_desc); + ena_dev->tx_max_header_size = llq_info->desc_list_entry_size - + (llq_info->descs_num_before_header * sizeof(struct ena_eth_io_tx_desc)); - if (unlikely(ena_dev->llq_info.desc_list_entry_size < size)) { + if (unlikely(ena_dev->tx_max_header_size == 0)) { pr_err("the size of the LLQ entry is smaller than needed\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 078d6f2b4f39..0d3664fe260d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -101,6 +101,8 @@ #define ENA_HW_HINTS_NO_TIMEOUT 0xFFFF +#define ENA_FEATURE_MAX_QUEUE_EXT_VER 1 + enum ena_intr_moder_level { ENA_INTR_MODER_LOWEST = 0, ENA_INTR_MODER_LOW, @@ -159,6 +161,7 @@ struct ena_com_llq_info { u16 desc_list_entry_size; u16 descs_num_before_header; u16 descs_per_entry; + u16 max_entries_in_tx_burst; }; struct ena_com_io_cq { @@ -238,6 +241,7 @@ struct ena_com_io_sq { u8 phase; u8 desc_entry_size; u8 dma_addr_bits; + u16 entries_in_tx_burst_left; } ____cacheline_aligned; struct ena_com_admin_cq { @@ -281,6 +285,9 @@ struct ena_com_admin_queue { /* Indicate if the admin queue should poll for completion */ bool polling; + /* Define if fallback to polling mode should occur */ + bool auto_polling; + u16 curr_cmd_id; /* Indicate that the ena was initialized and can @@ -377,6 +384,7 @@ struct ena_com_dev { struct ena_com_dev_get_features_ctx { struct ena_admin_queue_feature_desc max_queues; + struct ena_admin_queue_ext_feature_desc max_queue_ext; struct ena_admin_device_attr_feature_desc dev_attr; struct ena_admin_feature_aenq_desc aenq; struct ena_admin_feature_offload_desc offload; @@ -536,6 +544,17 @@ void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling); */ bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); +/* ena_com_set_admin_auto_polling_mode - Enable autoswitch to polling mode + * @ena_dev: ENA communication layer struct + * @polling: Enable/Disable polling mode + * + * Set the autopolling mode. + * If autopolling is on: + * In case of missing interrupt when data is available switch to polling. + */ +void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev, + bool polling); + /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler * @ena_dev: ENA communication layer struct * diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c index f6c2d3855be8..38046bf0ff44 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -32,7 +32,7 @@ #include "ena_eth_com.h" -static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( +static struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( struct ena_com_io_cq *io_cq) { struct ena_eth_io_rx_cdesc_base *cdesc; @@ -59,7 +59,7 @@ static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( return cdesc; } -static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq) +static void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq) { u16 tail_masked; u32 offset; @@ -71,7 +71,7 @@ static inline void *get_sq_desc_regular_queue(struct ena_com_io_sq *io_sq) return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); } -static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, +static int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq, u8 *bounce_buffer) { struct ena_com_llq_info *llq_info = &io_sq->llq_info; @@ -82,6 +82,17 @@ static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq dst_tail_mask = io_sq->tail & (io_sq->q_depth - 1); dst_offset = dst_tail_mask * llq_info->desc_list_entry_size; + if (is_llq_max_tx_burst_exists(io_sq)) { + if (unlikely(!io_sq->entries_in_tx_burst_left)) { + pr_err("Error: trying to send more packets than tx burst allows\n"); + return -ENOSPC; + } + + io_sq->entries_in_tx_burst_left--; + pr_debug("decreasing entries_in_tx_burst_left of queue %d to %d\n", + io_sq->qid, io_sq->entries_in_tx_burst_left); + } + /* Make sure everything was written into the bounce buffer before * writing the bounce buffer to the device */ @@ -100,7 +111,7 @@ static inline int ena_com_write_bounce_buffer_to_dev(struct ena_com_io_sq *io_sq return 0; } -static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, +static int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, u8 *header_src, u16 header_len) { @@ -131,7 +142,7 @@ static inline int ena_com_write_header_to_bounce(struct ena_com_io_sq *io_sq, return 0; } -static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) +static void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) { struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; u8 *bounce_buffer; @@ -151,7 +162,7 @@ static inline void *get_sq_desc_llq(struct ena_com_io_sq *io_sq) return sq_desc; } -static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq) +static int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq) { struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; struct ena_com_llq_info *llq_info = &io_sq->llq_info; @@ -178,7 +189,7 @@ static inline int ena_com_close_bounce_buffer(struct ena_com_io_sq *io_sq) return 0; } -static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) +static void *get_sq_desc(struct ena_com_io_sq *io_sq) { if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) return get_sq_desc_llq(io_sq); @@ -186,7 +197,7 @@ static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) return get_sq_desc_regular_queue(io_sq); } -static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq) +static int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq) { struct ena_com_llq_pkt_ctrl *pkt_ctrl = &io_sq->llq_buf_ctrl; struct ena_com_llq_info *llq_info = &io_sq->llq_info; @@ -214,7 +225,7 @@ static inline int ena_com_sq_update_llq_tail(struct ena_com_io_sq *io_sq) return 0; } -static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) +static int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) { if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) return ena_com_sq_update_llq_tail(io_sq); @@ -228,7 +239,7 @@ static inline int ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) return 0; } -static inline struct ena_eth_io_rx_cdesc_base * +static struct ena_eth_io_rx_cdesc_base * ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx) { idx &= (io_cq->q_depth - 1); @@ -237,7 +248,7 @@ static inline struct ena_eth_io_rx_cdesc_base * idx * io_cq->cdesc_entry_size_in_bytes); } -static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, +static u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, u16 *first_cdesc_idx) { struct ena_eth_io_rx_cdesc_base *cdesc; @@ -274,24 +285,7 @@ static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, return count; } -static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, - struct ena_com_tx_ctx *ena_tx_ctx) -{ - int rc; - - if (ena_tx_ctx->meta_valid) { - rc = memcmp(&io_sq->cached_tx_meta, - &ena_tx_ctx->ena_meta, - sizeof(struct ena_com_tx_meta)); - - if (unlikely(rc != 0)) - return true; - } - - return false; -} - -static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, +static int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, struct ena_com_tx_ctx *ena_tx_ctx) { struct ena_eth_io_tx_meta_desc *meta_desc = NULL; @@ -340,7 +334,7 @@ static inline int ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io return ena_com_sq_update_tail(io_sq); } -static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, +static void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, struct ena_eth_io_rx_cdesc_base *cdesc) { ena_rx_ctx->l3_proto = cdesc->status & diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h index 340d02b64ca6..77986c0ea52c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -125,8 +125,55 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq, return ena_com_free_desc(io_sq) > temp; } +static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + if (!ena_tx_ctx->meta_valid) + return false; + + return !!memcmp(&io_sq->cached_tx_meta, + &ena_tx_ctx->ena_meta, + sizeof(struct ena_com_tx_meta)); +} + +static inline bool is_llq_max_tx_burst_exists(struct ena_com_io_sq *io_sq) +{ + return (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) && + io_sq->llq_info.max_entries_in_tx_burst > 0; +} + +static inline bool ena_com_is_doorbell_needed(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + struct ena_com_llq_info *llq_info; + int descs_after_first_entry; + int num_entries_needed = 1; + u16 num_descs; + + if (!is_llq_max_tx_burst_exists(io_sq)) + return false; + + llq_info = &io_sq->llq_info; + num_descs = ena_tx_ctx->num_bufs; + + if (unlikely(ena_com_meta_desc_changed(io_sq, ena_tx_ctx))) + ++num_descs; + + if (num_descs > llq_info->descs_num_before_header) { + descs_after_first_entry = num_descs - llq_info->descs_num_before_header; + num_entries_needed += DIV_ROUND_UP(descs_after_first_entry, + llq_info->descs_per_entry); + } + + pr_debug("queue: %d num_descs: %d num_entries_needed: %d\n", io_sq->qid, + num_descs, num_entries_needed); + + return num_entries_needed > io_sq->entries_in_tx_burst_left; +} + static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) { + u16 max_entries_in_tx_burst = io_sq->llq_info.max_entries_in_tx_burst; u16 tail = io_sq->tail; pr_debug("write submission queue doorbell for queue: %d tail: %d\n", @@ -134,6 +181,12 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) writel(tail, io_sq->db_addr); + if (is_llq_max_tx_burst_exists(io_sq)) { + pr_debug("reset available entries in tx burst for queue %d to %d\n", + io_sq->qid, max_entries_in_tx_burst); + io_sq->entries_in_tx_burst_left = max_entries_in_tx_burst; + } + return 0; } @@ -142,15 +195,17 @@ static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq) u16 unreported_comp, head; bool need_update; - head = io_cq->head; - unreported_comp = head - io_cq->last_head_update; - need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); - - if (io_cq->cq_head_db_reg && need_update) { - pr_debug("Write completion queue doorbell for queue %d: head: %d\n", - io_cq->qid, head); - writel(head, io_cq->cq_head_db_reg); - io_cq->last_head_update = head; + if (unlikely(io_cq->cq_head_db_reg)) { + head = io_cq->head; + unreported_comp = head - io_cq->last_head_update; + need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); + + if (unlikely(need_update)) { + pr_debug("Write completion queue doorbell for queue %d: head: %d\n", + io_cq->qid, head); + writel(head, io_cq->cq_head_db_reg); + io_cq->last_head_update = head; + } } return 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index fe596bc30a96..b997c3ce9e2b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -88,13 +88,14 @@ static const struct ena_stats ena_stats_tx_strings[] = { static const struct ena_stats ena_stats_rx_strings[] = { ENA_STAT_RX_ENTRY(cnt), ENA_STAT_RX_ENTRY(bytes), + ENA_STAT_RX_ENTRY(rx_copybreak_pkt), + ENA_STAT_RX_ENTRY(csum_good), ENA_STAT_RX_ENTRY(refil_partial), ENA_STAT_RX_ENTRY(bad_csum), ENA_STAT_RX_ENTRY(page_alloc_fail), ENA_STAT_RX_ENTRY(skb_alloc_fail), ENA_STAT_RX_ENTRY(dma_mapping_err), ENA_STAT_RX_ENTRY(bad_desc_num), - ENA_STAT_RX_ENTRY(rx_copybreak_pkt), ENA_STAT_RX_ENTRY(bad_req_id), ENA_STAT_RX_ENTRY(empty_rx_ring), ENA_STAT_RX_ENTRY(csum_unchecked), @@ -447,13 +448,32 @@ static void ena_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { struct ena_adapter *adapter = netdev_priv(netdev); - struct ena_ring *tx_ring = &adapter->tx_ring[0]; - struct ena_ring *rx_ring = &adapter->rx_ring[0]; - ring->rx_max_pending = rx_ring->ring_size; - ring->tx_max_pending = tx_ring->ring_size; - ring->rx_pending = rx_ring->ring_size; - ring->tx_pending = tx_ring->ring_size; + ring->tx_max_pending = adapter->max_tx_ring_size; + ring->rx_max_pending = adapter->max_rx_ring_size; + ring->tx_pending = adapter->tx_ring[0].ring_size; + ring->rx_pending = adapter->rx_ring[0].ring_size; +} + +static int ena_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + u32 new_tx_size, new_rx_size; + + new_tx_size = ring->tx_pending < ENA_MIN_RING_SIZE ? + ENA_MIN_RING_SIZE : ring->tx_pending; + new_tx_size = rounddown_pow_of_two(new_tx_size); + + new_rx_size = ring->rx_pending < ENA_MIN_RING_SIZE ? + ENA_MIN_RING_SIZE : ring->rx_pending; + new_rx_size = rounddown_pow_of_two(new_rx_size); + + if (new_tx_size == adapter->requested_tx_ring_size && + new_rx_size == adapter->requested_rx_ring_size) + return 0; + + return ena_update_queue_sizes(adapter, new_tx_size, new_rx_size); } static u32 ena_flow_hash_to_flow_type(u16 hash_fields) @@ -807,6 +827,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .get_coalesce = ena_get_coalesce, .set_coalesce = ena_set_coalesce, .get_ringparam = ena_get_ringparam, + .set_ringparam = ena_set_ringparam, .get_sset_count = ena_get_sset_count, .get_strings = ena_get_strings, .get_ethtool_stats = ena_get_ethtool_stats, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 9c83642922c7..664e3ed97ea9 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -182,7 +182,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) ena_init_io_rings_common(adapter, rxr, i); /* TX specific ring state */ - txr->ring_size = adapter->tx_ring_size; + txr->ring_size = adapter->requested_tx_ring_size; txr->tx_max_header_size = ena_dev->tx_max_header_size; txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; txr->sgl_size = adapter->max_tx_sgl_size; @@ -190,7 +190,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter) ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); /* RX specific ring state */ - rxr->ring_size = adapter->rx_ring_size; + rxr->ring_size = adapter->requested_rx_ring_size; rxr->rx_copybreak = adapter->rx_copybreak; rxr->sgl_size = adapter->max_rx_sgl_size; rxr->smoothed_interval = @@ -228,11 +228,11 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) } size = sizeof(u16) * tx_ring->ring_size; - tx_ring->free_tx_ids = vzalloc_node(size, node); - if (!tx_ring->free_tx_ids) { - tx_ring->free_tx_ids = vzalloc(size); - if (!tx_ring->free_tx_ids) - goto err_free_tx_ids; + tx_ring->free_ids = vzalloc_node(size, node); + if (!tx_ring->free_ids) { + tx_ring->free_ids = vzalloc(size); + if (!tx_ring->free_ids) + goto err_tx_free_ids; } size = tx_ring->tx_max_header_size; @@ -245,7 +245,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) /* Req id ring for TX out of order completions */ for (i = 0; i < tx_ring->ring_size; i++) - tx_ring->free_tx_ids[i] = i; + tx_ring->free_ids[i] = i; /* Reset tx statistics */ memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); @@ -256,9 +256,9 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) return 0; err_push_buf_intermediate_buf: - vfree(tx_ring->free_tx_ids); - tx_ring->free_tx_ids = NULL; -err_free_tx_ids: + vfree(tx_ring->free_ids); + tx_ring->free_ids = NULL; +err_tx_free_ids: vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; err_tx_buffer_info: @@ -278,8 +278,8 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; - vfree(tx_ring->free_tx_ids); - tx_ring->free_tx_ids = NULL; + vfree(tx_ring->free_ids); + tx_ring->free_ids = NULL; vfree(tx_ring->push_buf_intermediate_buf); tx_ring->push_buf_intermediate_buf = NULL; @@ -326,7 +326,7 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) ena_free_tx_resources(adapter, i); } -static inline int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) +static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id) { if (likely(req_id < rx_ring->ring_size)) return 0; @@ -377,10 +377,10 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, } size = sizeof(u16) * rx_ring->ring_size; - rx_ring->free_rx_ids = vzalloc_node(size, node); - if (!rx_ring->free_rx_ids) { - rx_ring->free_rx_ids = vzalloc(size); - if (!rx_ring->free_rx_ids) { + rx_ring->free_ids = vzalloc_node(size, node); + if (!rx_ring->free_ids) { + rx_ring->free_ids = vzalloc(size); + if (!rx_ring->free_ids) { vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; return -ENOMEM; @@ -389,7 +389,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, /* Req id ring for receiving RX pkts out of order */ for (i = 0; i < rx_ring->ring_size; i++) - rx_ring->free_rx_ids[i] = i; + rx_ring->free_ids[i] = i; /* Reset rx statistics */ memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); @@ -415,8 +415,8 @@ static void ena_free_rx_resources(struct ena_adapter *adapter, vfree(rx_ring->rx_buffer_info); rx_ring->rx_buffer_info = NULL; - vfree(rx_ring->free_rx_ids); - rx_ring->free_rx_ids = NULL; + vfree(rx_ring->free_ids); + rx_ring->free_ids = NULL; } /* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues @@ -460,7 +460,7 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) ena_free_rx_resources(adapter, i); } -static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, +static int ena_alloc_rx_page(struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info, gfp_t gfp) { struct ena_com_buf *ena_buf; @@ -531,7 +531,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) for (i = 0; i < num; i++) { struct ena_rx_buffer *rx_info; - req_id = rx_ring->free_rx_ids[next_to_use]; + req_id = rx_ring->free_ids[next_to_use]; rc = validate_rx_req_id(rx_ring, req_id); if (unlikely(rc < 0)) break; @@ -594,7 +594,6 @@ static void ena_free_rx_bufs(struct ena_adapter *adapter, /* ena_refill_all_rx_bufs - allocate all queues Rx buffers * @adapter: board private structure - * */ static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) { @@ -621,7 +620,7 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) ena_free_rx_bufs(adapter, i); } -static inline void ena_unmap_tx_skb(struct ena_ring *tx_ring, +static void ena_unmap_tx_skb(struct ena_ring *tx_ring, struct ena_tx_buffer *tx_info) { struct ena_com_buf *ena_buf; @@ -797,7 +796,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) tx_pkts++; total_done += tx_info->tx_descs; - tx_ring->free_tx_ids[next_to_clean] = req_id; + tx_ring->free_ids[next_to_clean] = req_id; next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, tx_ring->ring_size); } @@ -911,7 +910,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, skb_put(skb, len); skb->protocol = eth_type_trans(skb, rx_ring->netdev); - rx_ring->free_rx_ids[*next_to_clean] = req_id; + rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, rx_ring->ring_size); return skb; @@ -935,7 +934,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, rx_info->page = NULL; - rx_ring->free_rx_ids[*next_to_clean] = req_id; + rx_ring->free_ids[*next_to_clean] = req_id; *next_to_clean = ENA_RX_RING_IDX_NEXT(*next_to_clean, rx_ring->ring_size); @@ -956,7 +955,7 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, * @ena_rx_ctx: received packet context/metadata * @skb: skb currently being received and modified */ -static inline void ena_rx_checksum(struct ena_ring *rx_ring, +static void ena_rx_checksum(struct ena_ring *rx_ring, struct ena_com_rx_ctx *ena_rx_ctx, struct sk_buff *skb) { @@ -1001,6 +1000,9 @@ static inline void ena_rx_checksum(struct ena_ring *rx_ring, if (likely(ena_rx_ctx->l4_csum_checked)) { skb->ip_summed = CHECKSUM_UNNECESSARY; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.csum_good++; + u64_stats_update_end(&rx_ring->syncp); } else { u64_stats_update_begin(&rx_ring->syncp); rx_ring->rx_stats.csum_unchecked++; @@ -1088,7 +1090,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* exit if we failed to retrieve a buffer */ if (unlikely(!skb)) { for (i = 0; i < ena_rx_ctx.descs; i++) { - rx_ring->free_tx_ids[next_to_clean] = + rx_ring->free_ids[next_to_clean] = rx_ring->ena_bufs[i].req_id; next_to_clean = ENA_RX_RING_IDX_NEXT(next_to_clean, @@ -1153,7 +1155,7 @@ error: return 0; } -inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, +void ena_adjust_intr_moderation(struct ena_ring *rx_ring, struct ena_ring *tx_ring) { /* We apply adaptive moderation on Rx path only. @@ -1172,7 +1174,7 @@ inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, rx_ring->per_napi_bytes = 0; } -static inline void ena_unmask_interrupt(struct ena_ring *tx_ring, +static void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { struct ena_eth_io_intr_reg intr_reg; @@ -1192,7 +1194,7 @@ static inline void ena_unmask_interrupt(struct ena_ring *tx_ring, ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); } -static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, +static void ena_update_ring_numa_node(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { int cpu = get_cpu(); @@ -1635,7 +1637,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) ctx.qid = ena_qid; ctx.mem_queue_type = ena_dev->tx_mem_queue_type; ctx.msix_vector = msix_vector; - ctx.queue_size = adapter->tx_ring_size; + ctx.queue_size = tx_ring->ring_size; ctx.numa_node = cpu_to_node(tx_ring->cpu); rc = ena_com_create_io_queue(ena_dev, &ctx); @@ -1702,7 +1704,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ctx.msix_vector = msix_vector; - ctx.queue_size = adapter->rx_ring_size; + ctx.queue_size = rx_ring->ring_size; ctx.numa_node = cpu_to_node(rx_ring->cpu); rc = ena_com_create_io_queue(ena_dev, &ctx); @@ -1749,6 +1751,112 @@ create_err: return rc; } +static void set_io_rings_size(struct ena_adapter *adapter, + int new_tx_size, int new_rx_size) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) { + adapter->tx_ring[i].ring_size = new_tx_size; + adapter->rx_ring[i].ring_size = new_rx_size; + } +} + +/* This function allows queue allocation to backoff when the system is + * low on memory. If there is not enough memory to allocate io queues + * the driver will try to allocate smaller queues. + * + * The backoff algorithm is as follows: + * 1. Try to allocate TX and RX and if successful. + * 1.1. return success + * + * 2. Divide by 2 the size of the larger of RX and TX queues (or both if their size is the same). + * + * 3. If TX or RX is smaller than 256 + * 3.1. return failure. + * 4. else + * 4.1. go back to 1. + */ +static int create_queues_with_size_backoff(struct ena_adapter *adapter) +{ + int rc, cur_rx_ring_size, cur_tx_ring_size; + int new_rx_ring_size, new_tx_ring_size; + + /* current queue sizes might be set to smaller than the requested + * ones due to past queue allocation failures. + */ + set_io_rings_size(adapter, adapter->requested_tx_ring_size, + adapter->requested_rx_ring_size); + + while (1) { + rc = ena_setup_all_tx_resources(adapter); + if (rc) + goto err_setup_tx; + + rc = ena_create_all_io_tx_queues(adapter); + if (rc) + goto err_create_tx_queues; + + rc = ena_setup_all_rx_resources(adapter); + if (rc) + goto err_setup_rx; + + rc = ena_create_all_io_rx_queues(adapter); + if (rc) + goto err_create_rx_queues; + + return 0; + +err_create_rx_queues: + ena_free_all_io_rx_resources(adapter); +err_setup_rx: + ena_destroy_all_tx_queues(adapter); +err_create_tx_queues: + ena_free_all_io_tx_resources(adapter); +err_setup_tx: + if (rc != -ENOMEM) { + netif_err(adapter, ifup, adapter->netdev, + "Queue creation failed with error code %d\n", + rc); + return rc; + } + + cur_tx_ring_size = adapter->tx_ring[0].ring_size; + cur_rx_ring_size = adapter->rx_ring[0].ring_size; + + netif_err(adapter, ifup, adapter->netdev, + "Not enough memory to create queues with sizes TX=%d, RX=%d\n", + cur_tx_ring_size, cur_rx_ring_size); + + new_tx_ring_size = cur_tx_ring_size; + new_rx_ring_size = cur_rx_ring_size; + + /* Decrease the size of the larger queue, or + * decrease both if they are the same size. + */ + if (cur_rx_ring_size <= cur_tx_ring_size) + new_tx_ring_size = cur_tx_ring_size / 2; + if (cur_rx_ring_size >= cur_tx_ring_size) + new_rx_ring_size = cur_rx_ring_size / 2; + + if (new_tx_ring_size < ENA_MIN_RING_SIZE || + new_rx_ring_size < ENA_MIN_RING_SIZE) { + netif_err(adapter, ifup, adapter->netdev, + "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n", + ENA_MIN_RING_SIZE); + return rc; + } + + netif_err(adapter, ifup, adapter->netdev, + "Retrying queue creation with sizes TX=%d, RX=%d\n", + new_tx_ring_size, + new_rx_ring_size); + + set_io_rings_size(adapter, new_tx_ring_size, + new_rx_ring_size); + } +} + static int ena_up(struct ena_adapter *adapter) { int rc, i; @@ -1768,25 +1876,9 @@ static int ena_up(struct ena_adapter *adapter) if (rc) goto err_req_irq; - /* allocate transmit descriptors */ - rc = ena_setup_all_tx_resources(adapter); + rc = create_queues_with_size_backoff(adapter); if (rc) - goto err_setup_tx; - - /* allocate receive descriptors */ - rc = ena_setup_all_rx_resources(adapter); - if (rc) - goto err_setup_rx; - - /* Create TX queues */ - rc = ena_create_all_io_tx_queues(adapter); - if (rc) - goto err_create_tx_queues; - - /* Create RX queues */ - rc = ena_create_all_io_rx_queues(adapter); - if (rc) - goto err_create_rx_queues; + goto err_create_queues_with_backoff; rc = ena_up_complete(adapter); if (rc) @@ -1815,14 +1907,11 @@ static int ena_up(struct ena_adapter *adapter) return rc; err_up: - ena_destroy_all_rx_queues(adapter); -err_create_rx_queues: ena_destroy_all_tx_queues(adapter); -err_create_tx_queues: - ena_free_all_io_rx_resources(adapter); -err_setup_rx: ena_free_all_io_tx_resources(adapter); -err_setup_tx: + ena_destroy_all_rx_queues(adapter); + ena_free_all_io_rx_resources(adapter); +err_create_queues_with_backoff: ena_free_io_irq(adapter); err_req_irq: ena_del_napi(adapter); @@ -1942,6 +2031,20 @@ static int ena_close(struct net_device *netdev) return 0; } +int ena_update_queue_sizes(struct ena_adapter *adapter, + u32 new_tx_size, + u32 new_rx_size) +{ + bool dev_up; + + dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + ena_close(adapter->netdev); + adapter->requested_tx_ring_size = new_tx_size; + adapter->requested_rx_ring_size = new_rx_size; + ena_init_io_rings(adapter); + return dev_up ? ena_up(adapter) : 0; +} + static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) { u32 mss = skb_shinfo(skb)->gso_size; @@ -2152,7 +2255,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); next_to_use = tx_ring->next_to_use; - req_id = tx_ring->free_tx_ids[next_to_use]; + req_id = tx_ring->free_ids[next_to_use]; tx_info = &tx_ring->tx_buffer_info[req_id]; tx_info->num_of_bufs = 0; @@ -2172,6 +2275,13 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set flags and meta data */ ena_tx_csum(&ena_tx_ctx, skb); + if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) { + netif_dbg(adapter, tx_queued, dev, + "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", + qid); + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + } + /* prepare the packet's descriptors to dma engine */ rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, &nb_hw_desc); @@ -2447,13 +2557,6 @@ static int ena_device_validate_params(struct ena_adapter *adapter, return -EINVAL; } - if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) || - (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) { - netif_err(adapter, drv, netdev, - "Error, device doesn't support enough queues\n"); - return -EINVAL; - } - if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { netif_err(adapter, drv, netdev, "Error, device max mtu is smaller than netdev MTU\n"); @@ -3027,18 +3130,32 @@ static int ena_calc_io_queue_num(struct pci_dev *pdev, struct ena_com_dev *ena_dev, struct ena_com_dev_get_features_ctx *get_feat_ctx) { - int io_sq_num, io_queue_num; + int io_tx_sq_num, io_tx_cq_num, io_rx_num, io_queue_num; - /* In case of LLQ use the llq number in the get feature cmd */ + if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { + struct ena_admin_queue_ext_feature_fields *max_queue_ext = + &get_feat_ctx->max_queue_ext.max_queue_ext; + io_rx_num = min_t(int, max_queue_ext->max_rx_sq_num, + max_queue_ext->max_rx_cq_num); + + io_tx_sq_num = max_queue_ext->max_tx_sq_num; + io_tx_cq_num = max_queue_ext->max_tx_cq_num; + } else { + struct ena_admin_queue_feature_desc *max_queues = + &get_feat_ctx->max_queues; + io_tx_sq_num = max_queues->max_sq_num; + io_tx_cq_num = max_queues->max_cq_num; + io_rx_num = min_t(int, io_tx_sq_num, io_tx_cq_num); + } + + /* In case of LLQ use the llq fields for the tx SQ/CQ */ if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) - io_sq_num = get_feat_ctx->llq.max_llq_num; - else - io_sq_num = get_feat_ctx->max_queues.max_sq_num; + io_tx_sq_num = get_feat_ctx->llq.max_llq_num; io_queue_num = min_t(int, num_online_cpus(), ENA_MAX_NUM_IO_QUEUES); - io_queue_num = min_t(int, io_queue_num, io_sq_num); - io_queue_num = min_t(int, io_queue_num, - get_feat_ctx->max_queues.max_cq_num); + io_queue_num = min_t(int, io_queue_num, io_rx_num); + io_queue_num = min_t(int, io_queue_num, io_tx_sq_num); + io_queue_num = min_t(int, io_queue_num, io_tx_cq_num); /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); if (unlikely(!io_queue_num)) { @@ -3212,7 +3329,7 @@ static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) pci_release_selected_regions(pdev, release_bars); } -static inline void set_default_llq_configurations(struct ena_llq_configurations *llq_config) +static void set_default_llq_configurations(struct ena_llq_configurations *llq_config) { llq_config->llq_header_location = ENA_ADMIN_INLINE_HEADER; llq_config->llq_ring_entry_size = ENA_ADMIN_LIST_ENTRY_SIZE_128B; @@ -3221,36 +3338,70 @@ static inline void set_default_llq_configurations(struct ena_llq_configurations llq_config->llq_ring_entry_size_value = 128; } -static int ena_calc_queue_size(struct pci_dev *pdev, - struct ena_com_dev *ena_dev, - u16 *max_tx_sgl_size, - u16 *max_rx_sgl_size, - struct ena_com_dev_get_features_ctx *get_feat_ctx) +static int ena_calc_queue_size(struct ena_calc_queue_size_ctx *ctx) { - u32 queue_size = ENA_DEFAULT_RING_SIZE; + struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq; + struct ena_com_dev *ena_dev = ctx->ena_dev; + u32 tx_queue_size = ENA_DEFAULT_RING_SIZE; + u32 rx_queue_size = ENA_DEFAULT_RING_SIZE; + u32 max_tx_queue_size; + u32 max_rx_queue_size; - queue_size = min_t(u32, queue_size, - get_feat_ctx->max_queues.max_cq_depth); - queue_size = min_t(u32, queue_size, - get_feat_ctx->max_queues.max_sq_depth); + if (ctx->ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) { + struct ena_admin_queue_ext_feature_fields *max_queue_ext = + &ctx->get_feat_ctx->max_queue_ext.max_queue_ext; + max_rx_queue_size = min_t(u32, max_queue_ext->max_rx_cq_depth, + max_queue_ext->max_rx_sq_depth); + max_tx_queue_size = max_queue_ext->max_tx_cq_depth; - if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) - queue_size = min_t(u32, queue_size, - get_feat_ctx->llq.max_llq_depth); + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + max_tx_queue_size = min_t(u32, max_tx_queue_size, + llq->max_llq_depth); + else + max_tx_queue_size = min_t(u32, max_tx_queue_size, + max_queue_ext->max_tx_sq_depth); - queue_size = rounddown_pow_of_two(queue_size); + ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queue_ext->max_per_packet_tx_descs); + ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queue_ext->max_per_packet_rx_descs); + } else { + struct ena_admin_queue_feature_desc *max_queues = + &ctx->get_feat_ctx->max_queues; + max_rx_queue_size = min_t(u32, max_queues->max_cq_depth, + max_queues->max_sq_depth); + max_tx_queue_size = max_queues->max_cq_depth; + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + max_tx_queue_size = min_t(u32, max_tx_queue_size, + llq->max_llq_depth); + else + max_tx_queue_size = min_t(u32, max_tx_queue_size, + max_queues->max_sq_depth); - if (unlikely(!queue_size)) { - dev_err(&pdev->dev, "Invalid queue size\n"); - return -EFAULT; + ctx->max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queues->max_packet_tx_descs); + ctx->max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + max_queues->max_packet_rx_descs); } - *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, - get_feat_ctx->max_queues.max_packet_tx_descs); - *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, - get_feat_ctx->max_queues.max_packet_rx_descs); + max_tx_queue_size = rounddown_pow_of_two(max_tx_queue_size); + max_rx_queue_size = rounddown_pow_of_two(max_rx_queue_size); + + tx_queue_size = clamp_val(tx_queue_size, ENA_MIN_RING_SIZE, + max_tx_queue_size); + rx_queue_size = clamp_val(rx_queue_size, ENA_MIN_RING_SIZE, + max_rx_queue_size); - return queue_size; + tx_queue_size = rounddown_pow_of_two(tx_queue_size); + rx_queue_size = rounddown_pow_of_two(rx_queue_size); + + ctx->max_tx_queue_size = max_tx_queue_size; + ctx->max_rx_queue_size = max_rx_queue_size; + ctx->tx_queue_size = tx_queue_size; + ctx->rx_queue_size = rx_queue_size; + + return 0; } /* ena_probe - Device Initialization Routine @@ -3266,23 +3417,19 @@ static int ena_calc_queue_size(struct pci_dev *pdev, static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct ena_com_dev_get_features_ctx get_feat_ctx; - static int version_printed; - struct net_device *netdev; - struct ena_adapter *adapter; + struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 }; struct ena_llq_configurations llq_config; struct ena_com_dev *ena_dev = NULL; - char *queue_type_str; - static int adapters_found; + struct ena_adapter *adapter; int io_queue_num, bars, rc; - int queue_size; - u16 tx_sgl_size = 0; - u16 rx_sgl_size = 0; + struct net_device *netdev; + static int adapters_found; + char *queue_type_str; bool wd_state; dev_dbg(&pdev->dev, "%s\n", __func__); - if (version_printed++ == 0) - dev_info(&pdev->dev, "%s", version); + dev_info_once(&pdev->dev, "%s", version); rc = pci_enable_device_mem(pdev); if (rc) { @@ -3334,20 +3481,25 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_device_destroy; } + calc_queue_ctx.ena_dev = ena_dev; + calc_queue_ctx.get_feat_ctx = &get_feat_ctx; + calc_queue_ctx.pdev = pdev; + /* initial Tx interrupt delay, Assumes 1 usec granularity. * Updated during device initialization with the real granularity */ ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); - queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size, - &rx_sgl_size, &get_feat_ctx); - if ((queue_size <= 0) || (io_queue_num <= 0)) { + rc = ena_calc_queue_size(&calc_queue_ctx); + if (rc || io_queue_num <= 0) { rc = -EFAULT; goto err_device_destroy; } - dev_info(&pdev->dev, "creating %d io queues. queue size: %d. LLQ is %s\n", - io_queue_num, queue_size, + dev_info(&pdev->dev, "creating %d io queues. rx queue size: %d tx queue size. %d LLQ is %s\n", + io_queue_num, + calc_queue_ctx.rx_queue_size, + calc_queue_ctx.tx_queue_size, (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) ? "ENABLED" : "DISABLED"); @@ -3373,11 +3525,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); adapter->reset_reason = ENA_REGS_RESET_NORMAL; - adapter->tx_ring_size = queue_size; - adapter->rx_ring_size = queue_size; - - adapter->max_tx_sgl_size = tx_sgl_size; - adapter->max_rx_sgl_size = rx_sgl_size; + adapter->requested_tx_ring_size = calc_queue_ctx.tx_queue_size; + adapter->requested_rx_ring_size = calc_queue_ctx.rx_queue_size; + adapter->max_tx_ring_size = calc_queue_ctx.max_tx_queue_size; + adapter->max_rx_ring_size = calc_queue_ctx.max_rx_queue_size; + adapter->max_tx_sgl_size = calc_queue_ctx.max_tx_sgl_size; + adapter->max_rx_sgl_size = calc_queue_ctx.max_rx_sgl_size; adapter->num_queues = io_queue_num; adapter->last_monitored_tx_qid = 0; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 63870072cbbd..efbcffd22215 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -44,8 +44,8 @@ #include "ena_eth_com.h" #define DRV_MODULE_VER_MAJOR 2 -#define DRV_MODULE_VER_MINOR 0 -#define DRV_MODULE_VER_SUBMINOR 3 +#define DRV_MODULE_VER_MINOR 1 +#define DRV_MODULE_VER_SUBMINOR 0 #define DRV_MODULE_NAME "ena" #ifndef DRV_MODULE_VERSION @@ -79,6 +79,7 @@ #define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR)) #define ENA_DEFAULT_RING_SIZE (1024) +#define ENA_MIN_RING_SIZE (256) #define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) #define ENA_DEFAULT_RX_COPYBREAK (256 - NET_IP_ALIGN) @@ -154,6 +155,18 @@ struct ena_napi { u32 qid; }; +struct ena_calc_queue_size_ctx { + struct ena_com_dev_get_features_ctx *get_feat_ctx; + struct ena_com_dev *ena_dev; + struct pci_dev *pdev; + u16 tx_queue_size; + u16 rx_queue_size; + u16 max_tx_queue_size; + u16 max_rx_queue_size; + u16 max_tx_sgl_size; + u16 max_rx_sgl_size; +}; + struct ena_tx_buffer { struct sk_buff *skb; /* num of ena desc for this specific skb @@ -208,26 +221,24 @@ struct ena_stats_tx { struct ena_stats_rx { u64 cnt; u64 bytes; + u64 rx_copybreak_pkt; + u64 csum_good; u64 refil_partial; u64 bad_csum; u64 page_alloc_fail; u64 skb_alloc_fail; u64 dma_mapping_err; u64 bad_desc_num; - u64 rx_copybreak_pkt; u64 bad_req_id; u64 empty_rx_ring; u64 csum_unchecked; }; struct ena_ring { - union { - /* Holds the empty requests for TX/RX - * out of order completions - */ - u16 *free_tx_ids; - u16 *free_rx_ids; - }; + /* Holds the empty requests for TX/RX + * out of order completions + */ + u16 *free_ids; union { struct ena_tx_buffer *tx_buffer_info; @@ -321,8 +332,11 @@ struct ena_adapter { u32 tx_usecs, rx_usecs; /* interrupt moderation */ u32 tx_frames, rx_frames; /* interrupt moderation */ - u32 tx_ring_size; - u32 rx_ring_size; + u32 requested_tx_ring_size; + u32 requested_rx_ring_size; + + u32 max_tx_ring_size; + u32 max_rx_ring_size; u32 msg_enable; @@ -372,6 +386,10 @@ void ena_dump_stats_to_dmesg(struct ena_adapter *adapter); void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); +int ena_update_queue_sizes(struct ena_adapter *adapter, + u32 new_tx_size, + u32 new_rx_size); + int ena_get_sset_count(struct net_device *netdev, int sset); #endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 173be45463ee..02f1b70c4e25 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -9,6 +9,8 @@ #ifndef AQ_CFG_H #define AQ_CFG_H +#include <generated/utsrelease.h> + #define AQ_CFG_VECS_DEF 8U #define AQ_CFG_TCS_DEF 1U @@ -86,10 +88,7 @@ #define AQ_CFG_DRV_AUTHOR "aQuantia" #define AQ_CFG_DRV_DESC "aQuantia Corporation(R) Network Driver" #define AQ_CFG_DRV_NAME "atlantic" -#define AQ_CFG_DRV_VERSION __stringify(NIC_MAJOR_DRIVER_VERSION)"."\ - __stringify(NIC_MINOR_DRIVER_VERSION)"."\ - __stringify(NIC_BUILD_DRIVER_VERSION)"."\ - __stringify(NIC_REVISION_DRIVER_VERSION) \ +#define AQ_CFG_DRV_VERSION UTS_RELEASE \ AQ_CFG_DRV_VERSION_SUFFIX #endif /* AQ_CFG_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c index adad6a7acabe..6da65099047d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2014-2019 aQuantia Corporation. */ /* File aq_drvinfo.c: Definition of common code for firmware info in sys.*/ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h index 41fbb1358068..23a0487893a7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_drvinfo.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (C) 2014-2017 aQuantia Corporation. */ /* File aq_drvinfo.h: Declaration of common code for firmware info in sys.*/ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 1fff462a4175..440690b18734 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2014-2017 aQuantia Corporation. */ /* File aq_filters.c: RX filters related functions. */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.h b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h index c6a08c6585d5..122e06c88a33 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (C) 2014-2017 aQuantia Corporation. */ /* File aq_filters.h: RX filters related functions. */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c index 5315df5ff6f8..100722ad5c2d 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c @@ -108,11 +108,16 @@ err_exit: static int aq_ndev_set_features(struct net_device *ndev, netdev_features_t features) { + bool is_vlan_rx_strip = !!(features & NETIF_F_HW_VLAN_CTAG_RX); + bool is_vlan_tx_insert = !!(features & NETIF_F_HW_VLAN_CTAG_TX); struct aq_nic_s *aq_nic = netdev_priv(ndev); - struct aq_nic_cfg_s *aq_cfg = aq_nic_get_cfg(aq_nic); + bool need_ndev_restart = false; + struct aq_nic_cfg_s *aq_cfg; bool is_lro = false; int err = 0; + aq_cfg = aq_nic_get_cfg(aq_nic); + if (!(features & NETIF_F_NTUPLE)) { if (aq_nic->ndev->features & NETIF_F_NTUPLE) { err = aq_clear_rxnfc_all_rules(aq_nic); @@ -135,17 +140,32 @@ static int aq_ndev_set_features(struct net_device *ndev, if (aq_cfg->is_lro != is_lro) { aq_cfg->is_lro = is_lro; - - if (netif_running(ndev)) { - aq_ndev_close(ndev); - aq_ndev_open(ndev); - } + need_ndev_restart = true; } } - if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM) + + if ((aq_nic->ndev->features ^ features) & NETIF_F_RXCSUM) { err = aq_nic->aq_hw_ops->hw_set_offload(aq_nic->aq_hw, aq_cfg); + if (unlikely(err)) + goto err_exit; + } + + if (aq_cfg->is_vlan_rx_strip != is_vlan_rx_strip) { + aq_cfg->is_vlan_rx_strip = is_vlan_rx_strip; + need_ndev_restart = true; + } + if (aq_cfg->is_vlan_tx_insert != is_vlan_tx_insert) { + aq_cfg->is_vlan_tx_insert = is_vlan_tx_insert; + need_ndev_restart = true; + } + + if (need_ndev_restart && netif_running(ndev)) { + aq_ndev_close(ndev); + aq_ndev_open(ndev); + } + err_exit: return err; } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 41172fbebddd..e1392766e21e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -126,6 +126,8 @@ void aq_nic_cfg_start(struct aq_nic_s *self) cfg->link_speed_msk &= cfg->aq_hw_caps->link_speed_msk; cfg->features = cfg->aq_hw_caps->hw_features; + cfg->is_vlan_rx_strip = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_RX); + cfg->is_vlan_tx_insert = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_TX); cfg->is_vlan_force_promisc = true; } @@ -286,7 +288,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->hw_features |= aq_hw_caps->hw_features; self->ndev->features = aq_hw_caps->hw_features; self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM | - NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO; + NETIF_F_RXHASH | NETIF_F_SG | + NETIF_F_LRO | NETIF_F_TSO; self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; @@ -427,26 +430,37 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, unsigned int dx = ring->sw_tail; struct aq_ring_buff_s *first = NULL; struct aq_ring_buff_s *dx_buff = &ring->buff_ring[dx]; + bool need_context_tag = false; + + dx_buff->flags = 0U; if (unlikely(skb_is_gso(skb))) { - dx_buff->flags = 0U; + dx_buff->mss = skb_shinfo(skb)->gso_size; + dx_buff->is_gso = 1U; dx_buff->len_pkt = skb->len; dx_buff->len_l2 = ETH_HLEN; dx_buff->len_l3 = ip_hdrlen(skb); dx_buff->len_l4 = tcp_hdrlen(skb); - dx_buff->mss = skb_shinfo(skb)->gso_size; - dx_buff->is_txc = 1U; dx_buff->eop_index = 0xffffU; - dx_buff->is_ipv6 = (ip_hdr(skb)->version == 6) ? 1U : 0U; + need_context_tag = true; + } + + if (self->aq_nic_cfg.is_vlan_tx_insert && skb_vlan_tag_present(skb)) { + dx_buff->vlan_tx_tag = skb_vlan_tag_get(skb); + dx_buff->len_pkt = skb->len; + dx_buff->is_vlan = 1U; + need_context_tag = true; + } + if (need_context_tag) { dx = aq_ring_next_dx(ring, dx); dx_buff = &ring->buff_ring[dx]; + dx_buff->flags = 0U; ++ret; } - dx_buff->flags = 0U; dx_buff->len = skb_headlen(skb); dx_buff->pa = dma_map_single(aq_nic_get_dev(self), skb->data, @@ -535,7 +549,7 @@ mapping_error: --ret, dx = aq_ring_next_dx(ring, dx)) { dx_buff = &ring->buff_ring[dx]; - if (!dx_buff->is_txc && dx_buff->pa) { + if (!dx_buff->is_gso && !dx_buff->is_vlan && dx_buff->pa) { if (unlikely(dx_buff->is_sop)) { dma_unmap_single(aq_nic_get_dev(self), dx_buff->pa, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 0f22f5d5691b..255b54a6ae07 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -35,6 +35,8 @@ struct aq_nic_cfg_s { u32 flow_control; u32 link_speed_msk; u32 wol; + u8 is_vlan_rx_strip; + u8 is_vlan_tx_insert; bool is_vlan_force_promisc; u16 is_mc_list_enabled; u16 mc_list_count; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 2a7b91ed17c5..3901d7994ca1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -409,6 +409,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, } } + if (buff->is_vlan) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + buff->vlan_rx_tag); + skb->protocol = eth_type_trans(skb, ndev); aq_rx_checksum(self, buff, skb); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h index 6bd67210d0b7..47abd09d06c2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h @@ -27,7 +27,7 @@ struct aq_rxpage { * +----------+----------+----------+----------- * 4/8bytes|len pkt |len pkt | | skb * +----------+----------+----------+----------- - * 4/8bytes|is_txc |len,flags |len |len,is_eop + * 4/8bytes|is_gso |len,flags |len |len,is_eop * +----------+----------+----------+----------- * * This aq_ring_buff_s doesn't have endianness dependency. @@ -44,6 +44,7 @@ struct __packed aq_ring_buff_s { u8 is_hash_l4; u8 rsvd1; struct aq_rxpage rxdata; + u16 vlan_rx_tag; }; /* EOP */ struct { @@ -59,6 +60,7 @@ struct __packed aq_ring_buff_s { u8 is_ipv6:1; u8 rsvd2:7; u32 len_pkt; + u16 vlan_tx_tag; }; }; union { @@ -70,11 +72,12 @@ struct __packed aq_ring_buff_s { u32 is_cso_err:1; u32 is_sop:1; u32 is_eop:1; - u32 is_txc:1; + u32 is_gso:1; u32 is_mapped:1; u32 is_cleaned:1; u32 is_error:1; - u32 rsvd3:6; + u32 is_vlan:1; + u32 rsvd3:5; u16 eop_index; u16 rsvd4; }; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index 0f140a9fe404..359a4d387185 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -451,7 +451,7 @@ static int hw_atl_a0_hw_ring_tx_xmit(struct aq_hw_s *self, buff = &ring->buff_ring[ring->sw_tail]; - if (buff->is_txc) { + if (buff->is_gso) { txd->ctl |= (buff->len_l3 << 31) | (buff->len_l2 << 24) | HW_ATL_A0_TXD_CTL_CMD_TCP | diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 13ac2661a473..30f7fc4c97ff 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -40,7 +40,9 @@ NETIF_F_TSO | \ NETIF_F_LRO | \ NETIF_F_NTUPLE | \ - NETIF_F_HW_VLAN_CTAG_FILTER, \ + NETIF_F_HW_VLAN_CTAG_FILTER | \ + NETIF_F_HW_VLAN_CTAG_RX | \ + NETIF_F_HW_VLAN_CTAG_TX, \ .hw_priv_flags = IFF_UNICAST_FLT, \ .flow_control = true, \ .mtu = HW_ATL_B0_MTU_JUMBO, \ @@ -245,6 +247,9 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self, /* LSO offloads*/ hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU); + /* Outer VLAN tag offload */ + hw_atl_rpo_outer_vlan_tag_mode_set(self, 1U); + /* LRO offloads */ { unsigned int val = (8U < HW_ATL_B0_LRO_RXD_MAX) ? 0x3U : @@ -487,6 +492,7 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, unsigned int buff_pa_len = 0U; unsigned int pkt_len = 0U; unsigned int frag_count = 0U; + bool is_vlan = false; bool is_gso = false; buff = &ring->buff_ring[ring->sw_tail]; @@ -501,36 +507,44 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, buff = &ring->buff_ring[ring->sw_tail]; - if (buff->is_txc) { + if (buff->is_gso) { + txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TCP; + txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC; txd->ctl |= (buff->len_l3 << 31) | - (buff->len_l2 << 24) | - HW_ATL_B0_TXD_CTL_CMD_TCP | - HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC; - txd->ctl2 |= (buff->mss << 16) | - (buff->len_l4 << 8) | - (buff->len_l3 >> 1); + (buff->len_l2 << 24); + txd->ctl2 |= (buff->mss << 16); + is_gso = true; pkt_len -= (buff->len_l4 + buff->len_l3 + buff->len_l2); - is_gso = true; - if (buff->is_ipv6) txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_IPV6; - } else { + txd->ctl2 |= (buff->len_l4 << 8) | + (buff->len_l3 >> 1); + } + if (buff->is_vlan) { + txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC; + txd->ctl |= buff->vlan_tx_tag << 4; + is_vlan = true; + } + if (!buff->is_gso && !buff->is_vlan) { buff_pa_len = buff->len; txd->buf_addr = buff->pa; txd->ctl |= (HW_ATL_B0_TXD_CTL_BLEN & ((u32)buff_pa_len << 4)); txd->ctl |= HW_ATL_B0_TXD_CTL_DESC_TYPE_TXD; + /* PAY_LEN */ txd->ctl2 |= HW_ATL_B0_TXD_CTL2_LEN & (pkt_len << 14); - if (is_gso) { - txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO; + if (is_gso || is_vlan) { + /* enable tx context */ txd->ctl2 |= HW_ATL_B0_TXD_CTL2_CTX_EN; } + if (is_gso) + txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_LSO; /* Tx checksum offloads */ if (buff->is_ip_cso) @@ -539,13 +553,16 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, if (buff->is_udp_cso || buff->is_tcp_cso) txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_TUCSO; + if (is_vlan) + txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_VLAN; + if (unlikely(buff->is_eop)) { txd->ctl |= HW_ATL_B0_TXD_CTL_EOP; txd->ctl |= HW_ATL_B0_TXD_CTL_CMD_WB; is_gso = false; + is_vlan = false; } } - ring->sw_tail = aq_ring_next_dx(ring, ring->sw_tail); } @@ -559,6 +576,7 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self, { u32 dma_desc_addr_lsw = (u32)aq_ring->dx_ring_pa; u32 dma_desc_addr_msw = (u32)(((u64)aq_ring->dx_ring_pa) >> 32); + u32 vlan_rx_stripping = self->aq_nic_cfg->is_vlan_rx_strip; hw_atl_rdm_rx_desc_en_set(self, false, aq_ring->idx); @@ -578,7 +596,8 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self, hw_atl_rdm_rx_desc_head_buff_size_set(self, 0U, aq_ring->idx); hw_atl_rdm_rx_desc_head_splitting_set(self, 0U, aq_ring->idx); - hw_atl_rpo_rx_desc_vlan_stripping_set(self, 0U, aq_ring->idx); + hw_atl_rpo_rx_desc_vlan_stripping_set(self, !!vlan_rx_stripping, + aq_ring->idx); /* Rx ring set mode */ @@ -681,11 +700,15 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, buff = &ring->buff_ring[ring->hw_head]; + buff->flags = 0U; + buff->is_hash_l4 = 0U; + rx_stat = (0x0000003CU & rxd_wb->status) >> 2; is_rx_check_sum_enabled = (rxd_wb->type >> 19) & 0x3U; - pkt_type = 0xFFU & (rxd_wb->type >> 4); + pkt_type = (rxd_wb->type & HW_ATL_B0_RXD_WB_STAT_PKTTYPE) >> + HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT; if (is_rx_check_sum_enabled & BIT(0) && (0x0U == (pkt_type & 0x3U))) @@ -706,6 +729,13 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, buff->is_cso_err = 0U; } + if (self->aq_nic_cfg->is_vlan_rx_strip && + ((pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN) || + (pkt_type & HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE))) { + buff->is_vlan = 1; + buff->vlan_rx_tag = le16_to_cpu(rxd_wb->vlan); + } + if ((rx_stat & BIT(0)) || rxd_wb->type & 0x1000U) { /* MAC error or DMA error */ buff->is_error = 1U; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h index e4ba2ccf9830..808d8cd4252a 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h @@ -107,10 +107,17 @@ #define HW_ATL_B0_RXD_NCEA0 (0x1) #define HW_ATL_B0_RXD_WB_STAT_RSSTYPE (0x0000000F) +#define HW_ATL_B0_RXD_WB_STAT_RSSTYPE_SHIFT (0x0) #define HW_ATL_B0_RXD_WB_STAT_PKTTYPE (0x00000FF0) +#define HW_ATL_B0_RXD_WB_STAT_PKTTYPE_SHIFT (0x4) #define HW_ATL_B0_RXD_WB_STAT_RXCTRL (0x00180000) +#define HW_ATL_B0_RXD_WB_STAT_RXCTRL_SHIFT (0x13) #define HW_ATL_B0_RXD_WB_STAT_SPLHDR (0x00200000) #define HW_ATL_B0_RXD_WB_STAT_HDRLEN (0xFFC00000) +#define HW_ATL_B0_RXD_WB_STAT_HDRLEN_SHIFT (0x16) + +#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN BIT(5) +#define HW_ATL_B0_RXD_WB_PKTTYPE_VLAN_DOUBLE BIT(6) #define HW_ATL_B0_RXD_WB_STAT2_DD (0x0001) #define HW_ATL_B0_RXD_WB_STAT2_EOP (0x0002) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c index 451529069f28..1149812ae463 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c @@ -1004,6 +1004,22 @@ void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw, rx_desc_vlan_stripping); } +void hw_atl_rpo_outer_vlan_tag_mode_set(void *context, + u32 outervlantagmode) +{ + aq_hw_write_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR, + HW_ATL_RPO_OUTER_VL_INS_MODE_MSK, + HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT, + outervlantagmode); +} + +u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context) +{ + return aq_hw_read_reg_bit(context, HW_ATL_RPO_OUTER_VL_INS_MODE_ADR, + HW_ATL_RPO_OUTER_VL_INS_MODE_MSK, + HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT); +} + void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw, u32 tcp_udp_crc_offload_en) { diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h index 34b42ce43512..0c37abbabca5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h @@ -488,6 +488,11 @@ void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw, u32 rx_desc_vlan_stripping, u32 descriptor); +void hw_atl_rpo_outer_vlan_tag_mode_set(void *context, + u32 outervlantagmode); + +u32 hw_atl_rpo_outer_vlan_tag_mode_get(void *context); + /* set tcp/udp checksum offload enable */ void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw, u32 tcp_udp_crc_offload_en); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h index fc1446f737bb..c3febcdfa92e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h @@ -1383,6 +1383,24 @@ /* default value of bitfield l4_chk_en */ #define HW_ATL_RPOL4CHK_EN_DEFAULT 0x0 +/* RX outer_vl_ins_mode Bitfield Definitions + * Preprocessor definitions for the bitfield "outer_vl_ins_mode". + * PORT="pif_rpo_outer_vl_mode_i" + */ + +/* Register address for bitfield outer_vl_ins_mode */ +#define HW_ATL_RPO_OUTER_VL_INS_MODE_ADR 0x00005580 +/* Bitmask for bitfield outer_vl_ins_mode */ +#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSK 0x00000004 +/* Inverted bitmask for bitfield outer_vl_ins_mode */ +#define HW_ATL_RPO_OUTER_VL_INS_MODE_MSKN 0xFFFFFFFB +/* Lower bit position of bitfield outer_vl_ins_mode */ +#define HW_ATL_RPO_OUTER_VL_INS_MODE_SHIFT 2 +/* Width of bitfield outer_vl_ins_mode */ +#define HW_ATL_RPO_OUTER_VL_INS_MODE_WIDTH 1 +/* Default value of bitfield outer_vl_ins_mode */ +#define HW_ATL_RPO_OUTER_VL_INS_MODE_DEFAULT 0x0 + /* rx reg_res_dsbl bitfield definitions * preprocessor definitions for the bitfield "reg_res_dsbl". * port="pif_rx_reg_res_dsbl_i" diff --git a/drivers/net/ethernet/aquantia/atlantic/ver.h b/drivers/net/ethernet/aquantia/atlantic/ver.h index 23374bffa92b..597654b51e01 100644 --- a/drivers/net/ethernet/aquantia/atlantic/ver.h +++ b/drivers/net/ethernet/aquantia/atlantic/ver.h @@ -7,11 +7,6 @@ #ifndef VER_H #define VER_H -#define NIC_MAJOR_DRIVER_VERSION 2 -#define NIC_MINOR_DRIVER_VERSION 0 -#define NIC_BUILD_DRIVER_VERSION 4 -#define NIC_REVISION_DRIVER_VERSION 0 - #define AQ_CFG_DRV_VERSION_SUFFIX "-kern" #endif /* VER_H */ diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig index 953ff1f9ac70..0058051ba925 100644 --- a/drivers/net/ethernet/atheros/Kconfig +++ b/drivers/net/ethernet/atheros/Kconfig @@ -6,7 +6,7 @@ config NET_VENDOR_ATHEROS bool "Atheros devices" default y - depends on PCI + depends on (PCI || ATH79) ---help--- If you have a network (Ethernet) card belonging to this class, say Y. @@ -17,6 +17,14 @@ config NET_VENDOR_ATHEROS if NET_VENDOR_ATHEROS +config AG71XX + tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support" + depends on ATH79 + select PHYLIB + help + If you wish to compile a kernel for AR7XXX/91XXX and enable + ethernet support, then you should always answer Y to this. + config ATL2 tristate "Atheros L2 Fast Ethernet support" depends on PCI diff --git a/drivers/net/ethernet/atheros/Makefile b/drivers/net/ethernet/atheros/Makefile index aa3d394b87e6..aca696cb6425 100644 --- a/drivers/net/ethernet/atheros/Makefile +++ b/drivers/net/ethernet/atheros/Makefile @@ -3,6 +3,7 @@ # Makefile for the Atheros network device drivers. # +obj-$(CONFIG_AG71XX) += ag71xx.o obj-$(CONFIG_ATL1) += atlx/ obj-$(CONFIG_ATL2) += atlx/ obj-$(CONFIG_ATL1E) += atl1e/ diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c new file mode 100644 index 000000000000..72a57c6cd254 --- /dev/null +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -0,0 +1,1898 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Atheros AR71xx built-in ethernet mac driver + * + * Copyright (C) 2019 Oleksij Rempel <o.rempel@pengutronix.de> + * + * List of authors contributed to this driver before mainlining: + * Alexander Couzens <lynxis@fe80.eu> + * Christian Lamparter <chunkeey@gmail.com> + * Chuanhong Guo <gch981213@gmail.com> + * Daniel F. Dickinson <cshored@thecshore.com> + * David Bauer <mail@david-bauer.net> + * Felix Fietkau <nbd@nbd.name> + * Gabor Juhos <juhosg@freemail.hu> + * Hauke Mehrtens <hauke@hauke-m.de> + * Johann Neuhauser <johann@it-neuhauser.de> + * John Crispin <john@phrozen.org> + * Jo-Philipp Wich <jo@mein.io> + * Koen Vandeputte <koen.vandeputte@ncentric.com> + * Lucian Cristian <lucian.cristian@gmail.com> + * Matt Merhar <mattmerhar@protonmail.com> + * Milan Krstic <milan.krstic@gmail.com> + * Petr Å tetiar <ynezz@true.cz> + * Rosen Penev <rosenp@gmail.com> + * Stephen Walker <stephendwalker+github@gmail.com> + * Vittorio Gambaletta <openwrt@vittgam.net> + * Weijie Gao <hackpascal@gmail.com> + * Imre Kaloz <kaloz@openwrt.org> + */ + +#include <linux/if_vlan.h> +#include <linux/mfd/syscon.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <linux/of_platform.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <linux/clk.h> + +/* For our NAPI weight bigger does *NOT* mean better - it means more + * D-cache misses and lots more wasted cycles than we'll ever + * possibly gain from saving instructions. + */ +#define AG71XX_NAPI_WEIGHT 32 +#define AG71XX_OOM_REFILL (1 + HZ / 10) + +#define AG71XX_INT_ERR (AG71XX_INT_RX_BE | AG71XX_INT_TX_BE) +#define AG71XX_INT_TX (AG71XX_INT_TX_PS) +#define AG71XX_INT_RX (AG71XX_INT_RX_PR | AG71XX_INT_RX_OF) + +#define AG71XX_INT_POLL (AG71XX_INT_RX | AG71XX_INT_TX) +#define AG71XX_INT_INIT (AG71XX_INT_ERR | AG71XX_INT_POLL) + +#define AG71XX_TX_MTU_LEN 1540 + +#define AG71XX_TX_RING_SPLIT 512 +#define AG71XX_TX_RING_DS_PER_PKT DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \ + AG71XX_TX_RING_SPLIT) +#define AG71XX_TX_RING_SIZE_DEFAULT 128 +#define AG71XX_RX_RING_SIZE_DEFAULT 256 + +#define AG71XX_MDIO_RETRY 1000 +#define AG71XX_MDIO_DELAY 5 +#define AG71XX_MDIO_MAX_CLK 5000000 + +/* Register offsets */ +#define AG71XX_REG_MAC_CFG1 0x0000 +#define MAC_CFG1_TXE BIT(0) /* Tx Enable */ +#define MAC_CFG1_STX BIT(1) /* Synchronize Tx Enable */ +#define MAC_CFG1_RXE BIT(2) /* Rx Enable */ +#define MAC_CFG1_SRX BIT(3) /* Synchronize Rx Enable */ +#define MAC_CFG1_TFC BIT(4) /* Tx Flow Control Enable */ +#define MAC_CFG1_RFC BIT(5) /* Rx Flow Control Enable */ +#define MAC_CFG1_SR BIT(31) /* Soft Reset */ +#define MAC_CFG1_INIT (MAC_CFG1_RXE | MAC_CFG1_TXE | \ + MAC_CFG1_SRX | MAC_CFG1_STX) + +#define AG71XX_REG_MAC_CFG2 0x0004 +#define MAC_CFG2_FDX BIT(0) +#define MAC_CFG2_PAD_CRC_EN BIT(2) +#define MAC_CFG2_LEN_CHECK BIT(4) +#define MAC_CFG2_IF_1000 BIT(9) +#define MAC_CFG2_IF_10_100 BIT(8) + +#define AG71XX_REG_MAC_MFL 0x0010 + +#define AG71XX_REG_MII_CFG 0x0020 +#define MII_CFG_CLK_DIV_4 0 +#define MII_CFG_CLK_DIV_6 2 +#define MII_CFG_CLK_DIV_8 3 +#define MII_CFG_CLK_DIV_10 4 +#define MII_CFG_CLK_DIV_14 5 +#define MII_CFG_CLK_DIV_20 6 +#define MII_CFG_CLK_DIV_28 7 +#define MII_CFG_CLK_DIV_34 8 +#define MII_CFG_CLK_DIV_42 9 +#define MII_CFG_CLK_DIV_50 10 +#define MII_CFG_CLK_DIV_58 11 +#define MII_CFG_CLK_DIV_66 12 +#define MII_CFG_CLK_DIV_74 13 +#define MII_CFG_CLK_DIV_82 14 +#define MII_CFG_CLK_DIV_98 15 +#define MII_CFG_RESET BIT(31) + +#define AG71XX_REG_MII_CMD 0x0024 +#define MII_CMD_READ BIT(0) + +#define AG71XX_REG_MII_ADDR 0x0028 +#define MII_ADDR_SHIFT 8 + +#define AG71XX_REG_MII_CTRL 0x002c +#define AG71XX_REG_MII_STATUS 0x0030 +#define AG71XX_REG_MII_IND 0x0034 +#define MII_IND_BUSY BIT(0) +#define MII_IND_INVALID BIT(2) + +#define AG71XX_REG_MAC_IFCTL 0x0038 +#define MAC_IFCTL_SPEED BIT(16) + +#define AG71XX_REG_MAC_ADDR1 0x0040 +#define AG71XX_REG_MAC_ADDR2 0x0044 +#define AG71XX_REG_FIFO_CFG0 0x0048 +#define FIFO_CFG0_WTM BIT(0) /* Watermark Module */ +#define FIFO_CFG0_RXS BIT(1) /* Rx System Module */ +#define FIFO_CFG0_RXF BIT(2) /* Rx Fabric Module */ +#define FIFO_CFG0_TXS BIT(3) /* Tx System Module */ +#define FIFO_CFG0_TXF BIT(4) /* Tx Fabric Module */ +#define FIFO_CFG0_ALL (FIFO_CFG0_WTM | FIFO_CFG0_RXS | FIFO_CFG0_RXF \ + | FIFO_CFG0_TXS | FIFO_CFG0_TXF) +#define FIFO_CFG0_INIT (FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT) + +#define FIFO_CFG0_ENABLE_SHIFT 8 + +#define AG71XX_REG_FIFO_CFG1 0x004c +#define AG71XX_REG_FIFO_CFG2 0x0050 +#define AG71XX_REG_FIFO_CFG3 0x0054 +#define AG71XX_REG_FIFO_CFG4 0x0058 +#define FIFO_CFG4_DE BIT(0) /* Drop Event */ +#define FIFO_CFG4_DV BIT(1) /* RX_DV Event */ +#define FIFO_CFG4_FC BIT(2) /* False Carrier */ +#define FIFO_CFG4_CE BIT(3) /* Code Error */ +#define FIFO_CFG4_CR BIT(4) /* CRC error */ +#define FIFO_CFG4_LM BIT(5) /* Length Mismatch */ +#define FIFO_CFG4_LO BIT(6) /* Length out of range */ +#define FIFO_CFG4_OK BIT(7) /* Packet is OK */ +#define FIFO_CFG4_MC BIT(8) /* Multicast Packet */ +#define FIFO_CFG4_BC BIT(9) /* Broadcast Packet */ +#define FIFO_CFG4_DR BIT(10) /* Dribble */ +#define FIFO_CFG4_LE BIT(11) /* Long Event */ +#define FIFO_CFG4_CF BIT(12) /* Control Frame */ +#define FIFO_CFG4_PF BIT(13) /* Pause Frame */ +#define FIFO_CFG4_UO BIT(14) /* Unsupported Opcode */ +#define FIFO_CFG4_VT BIT(15) /* VLAN tag detected */ +#define FIFO_CFG4_FT BIT(16) /* Frame Truncated */ +#define FIFO_CFG4_UC BIT(17) /* Unicast Packet */ +#define FIFO_CFG4_INIT (FIFO_CFG4_DE | FIFO_CFG4_DV | FIFO_CFG4_FC | \ + FIFO_CFG4_CE | FIFO_CFG4_CR | FIFO_CFG4_LM | \ + FIFO_CFG4_LO | FIFO_CFG4_OK | FIFO_CFG4_MC | \ + FIFO_CFG4_BC | FIFO_CFG4_DR | FIFO_CFG4_LE | \ + FIFO_CFG4_CF | FIFO_CFG4_PF | FIFO_CFG4_UO | \ + FIFO_CFG4_VT) + +#define AG71XX_REG_FIFO_CFG5 0x005c +#define FIFO_CFG5_DE BIT(0) /* Drop Event */ +#define FIFO_CFG5_DV BIT(1) /* RX_DV Event */ +#define FIFO_CFG5_FC BIT(2) /* False Carrier */ +#define FIFO_CFG5_CE BIT(3) /* Code Error */ +#define FIFO_CFG5_LM BIT(4) /* Length Mismatch */ +#define FIFO_CFG5_LO BIT(5) /* Length Out of Range */ +#define FIFO_CFG5_OK BIT(6) /* Packet is OK */ +#define FIFO_CFG5_MC BIT(7) /* Multicast Packet */ +#define FIFO_CFG5_BC BIT(8) /* Broadcast Packet */ +#define FIFO_CFG5_DR BIT(9) /* Dribble */ +#define FIFO_CFG5_CF BIT(10) /* Control Frame */ +#define FIFO_CFG5_PF BIT(11) /* Pause Frame */ +#define FIFO_CFG5_UO BIT(12) /* Unsupported Opcode */ +#define FIFO_CFG5_VT BIT(13) /* VLAN tag detected */ +#define FIFO_CFG5_LE BIT(14) /* Long Event */ +#define FIFO_CFG5_FT BIT(15) /* Frame Truncated */ +#define FIFO_CFG5_16 BIT(16) /* unknown */ +#define FIFO_CFG5_17 BIT(17) /* unknown */ +#define FIFO_CFG5_SF BIT(18) /* Short Frame */ +#define FIFO_CFG5_BM BIT(19) /* Byte Mode */ +#define FIFO_CFG5_INIT (FIFO_CFG5_DE | FIFO_CFG5_DV | FIFO_CFG5_FC | \ + FIFO_CFG5_CE | FIFO_CFG5_LO | FIFO_CFG5_OK | \ + FIFO_CFG5_MC | FIFO_CFG5_BC | FIFO_CFG5_DR | \ + FIFO_CFG5_CF | FIFO_CFG5_PF | FIFO_CFG5_VT | \ + FIFO_CFG5_LE | FIFO_CFG5_FT | FIFO_CFG5_16 | \ + FIFO_CFG5_17 | FIFO_CFG5_SF) + +#define AG71XX_REG_TX_CTRL 0x0180 +#define TX_CTRL_TXE BIT(0) /* Tx Enable */ + +#define AG71XX_REG_TX_DESC 0x0184 +#define AG71XX_REG_TX_STATUS 0x0188 +#define TX_STATUS_PS BIT(0) /* Packet Sent */ +#define TX_STATUS_UR BIT(1) /* Tx Underrun */ +#define TX_STATUS_BE BIT(3) /* Bus Error */ + +#define AG71XX_REG_RX_CTRL 0x018c +#define RX_CTRL_RXE BIT(0) /* Rx Enable */ + +#define AG71XX_DMA_RETRY 10 +#define AG71XX_DMA_DELAY 1 + +#define AG71XX_REG_RX_DESC 0x0190 +#define AG71XX_REG_RX_STATUS 0x0194 +#define RX_STATUS_PR BIT(0) /* Packet Received */ +#define RX_STATUS_OF BIT(2) /* Rx Overflow */ +#define RX_STATUS_BE BIT(3) /* Bus Error */ + +#define AG71XX_REG_INT_ENABLE 0x0198 +#define AG71XX_REG_INT_STATUS 0x019c +#define AG71XX_INT_TX_PS BIT(0) +#define AG71XX_INT_TX_UR BIT(1) +#define AG71XX_INT_TX_BE BIT(3) +#define AG71XX_INT_RX_PR BIT(4) +#define AG71XX_INT_RX_OF BIT(6) +#define AG71XX_INT_RX_BE BIT(7) + +#define AG71XX_REG_FIFO_DEPTH 0x01a8 +#define AG71XX_REG_RX_SM 0x01b0 +#define AG71XX_REG_TX_SM 0x01b4 + +#define ETH_SWITCH_HEADER_LEN 2 + +#define AG71XX_DEFAULT_MSG_ENABLE \ + (NETIF_MSG_DRV \ + | NETIF_MSG_PROBE \ + | NETIF_MSG_LINK \ + | NETIF_MSG_TIMER \ + | NETIF_MSG_IFDOWN \ + | NETIF_MSG_IFUP \ + | NETIF_MSG_RX_ERR \ + | NETIF_MSG_TX_ERR) + +#define DESC_EMPTY BIT(31) +#define DESC_MORE BIT(24) +#define DESC_PKTLEN_M 0xfff +struct ag71xx_desc { + u32 data; + u32 ctrl; + u32 next; + u32 pad; +} __aligned(4); + +#define AG71XX_DESC_SIZE roundup(sizeof(struct ag71xx_desc), \ + L1_CACHE_BYTES) + +struct ag71xx_buf { + union { + struct { + struct sk_buff *skb; + unsigned int len; + } tx; + struct { + dma_addr_t dma_addr; + void *rx_buf; + } rx; + }; +}; + +struct ag71xx_ring { + /* "Hot" fields in the data path. */ + unsigned int curr; + unsigned int dirty; + + /* "Cold" fields - not used in the data path. */ + struct ag71xx_buf *buf; + u16 order; + u16 desc_split; + dma_addr_t descs_dma; + u8 *descs_cpu; +}; + +enum ag71xx_type { + AR7100, + AR7240, + AR9130, + AR9330, + AR9340, + QCA9530, + QCA9550, +}; + +struct ag71xx_dcfg { + u32 max_frame_len; + const u32 *fifodata; + u16 desc_pktlen_mask; + bool tx_hang_workaround; + enum ag71xx_type type; +}; + +struct ag71xx { + /* Critical data related to the per-packet data path are clustered + * early in this structure to help improve the D-cache footprint. + */ + struct ag71xx_ring rx_ring ____cacheline_aligned; + struct ag71xx_ring tx_ring ____cacheline_aligned; + + u16 rx_buf_size; + u8 rx_buf_offset; + + struct net_device *ndev; + struct platform_device *pdev; + struct napi_struct napi; + u32 msg_enable; + const struct ag71xx_dcfg *dcfg; + + /* From this point onwards we're not looking at per-packet fields. */ + void __iomem *mac_base; + + struct ag71xx_desc *stop_desc; + dma_addr_t stop_desc_dma; + + int phy_if_mode; + + struct delayed_work restart_work; + struct timer_list oom_timer; + + struct reset_control *mac_reset; + + u32 fifodata[3]; + int mac_idx; + + struct reset_control *mdio_reset; + struct mii_bus *mii_bus; + struct clk *clk_mdio; + struct clk *clk_eth; +}; + +static int ag71xx_desc_empty(struct ag71xx_desc *desc) +{ + return (desc->ctrl & DESC_EMPTY) != 0; +} + +static struct ag71xx_desc *ag71xx_ring_desc(struct ag71xx_ring *ring, int idx) +{ + return (struct ag71xx_desc *)&ring->descs_cpu[idx * AG71XX_DESC_SIZE]; +} + +static int ag71xx_ring_size_order(int size) +{ + return fls(size - 1); +} + +static bool ag71xx_is(struct ag71xx *ag, enum ag71xx_type type) +{ + return ag->dcfg->type == type; +} + +static void ag71xx_wr(struct ag71xx *ag, unsigned int reg, u32 value) +{ + iowrite32(value, ag->mac_base + reg); + /* flush write */ + (void)ioread32(ag->mac_base + reg); +} + +static u32 ag71xx_rr(struct ag71xx *ag, unsigned int reg) +{ + return ioread32(ag->mac_base + reg); +} + +static void ag71xx_sb(struct ag71xx *ag, unsigned int reg, u32 mask) +{ + void __iomem *r; + + r = ag->mac_base + reg; + iowrite32(ioread32(r) | mask, r); + /* flush write */ + (void)ioread32(r); +} + +static void ag71xx_cb(struct ag71xx *ag, unsigned int reg, u32 mask) +{ + void __iomem *r; + + r = ag->mac_base + reg; + iowrite32(ioread32(r) & ~mask, r); + /* flush write */ + (void)ioread32(r); +} + +static void ag71xx_int_enable(struct ag71xx *ag, u32 ints) +{ + ag71xx_sb(ag, AG71XX_REG_INT_ENABLE, ints); +} + +static void ag71xx_int_disable(struct ag71xx *ag, u32 ints) +{ + ag71xx_cb(ag, AG71XX_REG_INT_ENABLE, ints); +} + +static int ag71xx_mdio_wait_busy(struct ag71xx *ag) +{ + struct net_device *ndev = ag->ndev; + int i; + + for (i = 0; i < AG71XX_MDIO_RETRY; i++) { + u32 busy; + + udelay(AG71XX_MDIO_DELAY); + + busy = ag71xx_rr(ag, AG71XX_REG_MII_IND); + if (!busy) + return 0; + + udelay(AG71XX_MDIO_DELAY); + } + + netif_err(ag, link, ndev, "MDIO operation timed out\n"); + + return -ETIMEDOUT; +} + +static int ag71xx_mdio_mii_read(struct mii_bus *bus, int addr, int reg) +{ + struct ag71xx *ag = bus->priv; + int err, val; + + err = ag71xx_mdio_wait_busy(ag); + if (err) + return err; + + ag71xx_wr(ag, AG71XX_REG_MII_ADDR, + ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff)); + /* enable read mode */ + ag71xx_wr(ag, AG71XX_REG_MII_CMD, MII_CMD_READ); + + err = ag71xx_mdio_wait_busy(ag); + if (err) + return err; + + val = ag71xx_rr(ag, AG71XX_REG_MII_STATUS); + /* disable read mode */ + ag71xx_wr(ag, AG71XX_REG_MII_CMD, 0); + + netif_dbg(ag, link, ag->ndev, "mii_read: addr=%04x, reg=%04x, value=%04x\n", + addr, reg, val); + + return val; +} + +static int ag71xx_mdio_mii_write(struct mii_bus *bus, int addr, int reg, + u16 val) +{ + struct ag71xx *ag = bus->priv; + + netif_dbg(ag, link, ag->ndev, "mii_write: addr=%04x, reg=%04x, value=%04x\n", + addr, reg, val); + + ag71xx_wr(ag, AG71XX_REG_MII_ADDR, + ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff)); + ag71xx_wr(ag, AG71XX_REG_MII_CTRL, val); + + return ag71xx_mdio_wait_busy(ag); +} + +static const u32 ar71xx_mdio_div_table[] = { + 4, 4, 6, 8, 10, 14, 20, 28, +}; + +static const u32 ar7240_mdio_div_table[] = { + 2, 2, 4, 6, 8, 12, 18, 26, 32, 40, 48, 56, 62, 70, 78, 96, +}; + +static const u32 ar933x_mdio_div_table[] = { + 4, 4, 6, 8, 10, 14, 20, 28, 34, 42, 50, 58, 66, 74, 82, 98, +}; + +static int ag71xx_mdio_get_divider(struct ag71xx *ag, u32 *div) +{ + unsigned long ref_clock; + const u32 *table; + int ndivs, i; + + ref_clock = clk_get_rate(ag->clk_mdio); + if (!ref_clock) + return -EINVAL; + + if (ag71xx_is(ag, AR9330) || ag71xx_is(ag, AR9340)) { + table = ar933x_mdio_div_table; + ndivs = ARRAY_SIZE(ar933x_mdio_div_table); + } else if (ag71xx_is(ag, AR7240)) { + table = ar7240_mdio_div_table; + ndivs = ARRAY_SIZE(ar7240_mdio_div_table); + } else { + table = ar71xx_mdio_div_table; + ndivs = ARRAY_SIZE(ar71xx_mdio_div_table); + } + + for (i = 0; i < ndivs; i++) { + unsigned long t; + + t = ref_clock / table[i]; + if (t <= AG71XX_MDIO_MAX_CLK) { + *div = i; + return 0; + } + } + + return -ENOENT; +} + +static int ag71xx_mdio_reset(struct mii_bus *bus) +{ + struct ag71xx *ag = bus->priv; + int err; + u32 t; + + err = ag71xx_mdio_get_divider(ag, &t); + if (err) + return err; + + ag71xx_wr(ag, AG71XX_REG_MII_CFG, t | MII_CFG_RESET); + usleep_range(100, 200); + + ag71xx_wr(ag, AG71XX_REG_MII_CFG, t); + usleep_range(100, 200); + + return 0; +} + +static int ag71xx_mdio_probe(struct ag71xx *ag) +{ + struct device *dev = &ag->pdev->dev; + struct net_device *ndev = ag->ndev; + static struct mii_bus *mii_bus; + struct device_node *np; + int err; + + np = dev->of_node; + ag->mii_bus = NULL; + + ag->clk_mdio = devm_clk_get(dev, "mdio"); + if (IS_ERR(ag->clk_mdio)) { + netif_err(ag, probe, ndev, "Failed to get mdio clk.\n"); + return PTR_ERR(ag->clk_mdio); + } + + err = clk_prepare_enable(ag->clk_mdio); + if (err) { + netif_err(ag, probe, ndev, "Failed to enable mdio clk.\n"); + return err; + } + + mii_bus = devm_mdiobus_alloc(dev); + if (!mii_bus) { + err = -ENOMEM; + goto mdio_err_put_clk; + } + + ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio"); + if (IS_ERR(ag->mdio_reset)) { + netif_err(ag, probe, ndev, "Failed to get reset mdio.\n"); + return PTR_ERR(ag->mdio_reset); + } + + mii_bus->name = "ag71xx_mdio"; + mii_bus->read = ag71xx_mdio_mii_read; + mii_bus->write = ag71xx_mdio_mii_write; + mii_bus->reset = ag71xx_mdio_reset; + mii_bus->priv = ag; + mii_bus->parent = dev; + snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s.%d", np->name, ag->mac_idx); + + if (!IS_ERR(ag->mdio_reset)) { + reset_control_assert(ag->mdio_reset); + msleep(100); + reset_control_deassert(ag->mdio_reset); + msleep(200); + } + + err = of_mdiobus_register(mii_bus, np); + if (err) + goto mdio_err_put_clk; + + ag->mii_bus = mii_bus; + + return 0; + +mdio_err_put_clk: + clk_disable_unprepare(ag->clk_mdio); + return err; +} + +static void ag71xx_mdio_remove(struct ag71xx *ag) +{ + if (ag->mii_bus) + mdiobus_unregister(ag->mii_bus); + clk_disable_unprepare(ag->clk_mdio); +} + +static void ag71xx_hw_stop(struct ag71xx *ag) +{ + /* disable all interrupts and stop the rx/tx engine */ + ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, 0); + ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0); + ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0); +} + +static bool ag71xx_check_dma_stuck(struct ag71xx *ag) +{ + unsigned long timestamp; + u32 rx_sm, tx_sm, rx_fd; + + timestamp = netdev_get_tx_queue(ag->ndev, 0)->trans_start; + if (likely(time_before(jiffies, timestamp + HZ / 10))) + return false; + + if (!netif_carrier_ok(ag->ndev)) + return false; + + rx_sm = ag71xx_rr(ag, AG71XX_REG_RX_SM); + if ((rx_sm & 0x7) == 0x3 && ((rx_sm >> 4) & 0x7) == 0x6) + return true; + + tx_sm = ag71xx_rr(ag, AG71XX_REG_TX_SM); + rx_fd = ag71xx_rr(ag, AG71XX_REG_FIFO_DEPTH); + if (((tx_sm >> 4) & 0x7) == 0 && ((rx_sm & 0x7) == 0) && + ((rx_sm >> 4) & 0x7) == 0 && rx_fd == 0) + return true; + + return false; +} + +static int ag71xx_tx_packets(struct ag71xx *ag, bool flush) +{ + struct ag71xx_ring *ring = &ag->tx_ring; + int sent = 0, bytes_compl = 0, n = 0; + struct net_device *ndev = ag->ndev; + int ring_mask, ring_size; + bool dma_stuck = false; + + ring_mask = BIT(ring->order) - 1; + ring_size = BIT(ring->order); + + netif_dbg(ag, tx_queued, ndev, "processing TX ring\n"); + + while (ring->dirty + n != ring->curr) { + struct ag71xx_desc *desc; + struct sk_buff *skb; + unsigned int i; + + i = (ring->dirty + n) & ring_mask; + desc = ag71xx_ring_desc(ring, i); + skb = ring->buf[i].tx.skb; + + if (!flush && !ag71xx_desc_empty(desc)) { + if (ag->dcfg->tx_hang_workaround && + ag71xx_check_dma_stuck(ag)) { + schedule_delayed_work(&ag->restart_work, + HZ / 2); + dma_stuck = true; + } + break; + } + + if (flush) + desc->ctrl |= DESC_EMPTY; + + n++; + if (!skb) + continue; + + dev_kfree_skb_any(skb); + ring->buf[i].tx.skb = NULL; + + bytes_compl += ring->buf[i].tx.len; + + sent++; + ring->dirty += n; + + while (n > 0) { + ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS); + n--; + } + } + + netif_dbg(ag, tx_done, ndev, "%d packets sent out\n", sent); + + if (!sent) + return 0; + + ag->ndev->stats.tx_bytes += bytes_compl; + ag->ndev->stats.tx_packets += sent; + + netdev_completed_queue(ag->ndev, sent, bytes_compl); + if ((ring->curr - ring->dirty) < (ring_size * 3) / 4) + netif_wake_queue(ag->ndev); + + if (!dma_stuck) + cancel_delayed_work(&ag->restart_work); + + return sent; +} + +static void ag71xx_dma_wait_stop(struct ag71xx *ag) +{ + struct net_device *ndev = ag->ndev; + int i; + + for (i = 0; i < AG71XX_DMA_RETRY; i++) { + u32 rx, tx; + + mdelay(AG71XX_DMA_DELAY); + + rx = ag71xx_rr(ag, AG71XX_REG_RX_CTRL) & RX_CTRL_RXE; + tx = ag71xx_rr(ag, AG71XX_REG_TX_CTRL) & TX_CTRL_TXE; + if (!rx && !tx) + return; + } + + netif_err(ag, hw, ndev, "DMA stop operation timed out\n"); +} + +static void ag71xx_dma_reset(struct ag71xx *ag) +{ + struct net_device *ndev = ag->ndev; + u32 val; + int i; + + /* stop RX and TX */ + ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0); + ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0); + + /* give the hardware some time to really stop all rx/tx activity + * clearing the descriptors too early causes random memory corruption + */ + ag71xx_dma_wait_stop(ag); + + /* clear descriptor addresses */ + ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->stop_desc_dma); + ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->stop_desc_dma); + + /* clear pending RX/TX interrupts */ + for (i = 0; i < 256; i++) { + ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR); + ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS); + } + + /* clear pending errors */ + ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE | RX_STATUS_OF); + ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE | TX_STATUS_UR); + + val = ag71xx_rr(ag, AG71XX_REG_RX_STATUS); + if (val) + netif_err(ag, hw, ndev, "unable to clear DMA Rx status: %08x\n", + val); + + val = ag71xx_rr(ag, AG71XX_REG_TX_STATUS); + + /* mask out reserved bits */ + val &= ~0xff000000; + + if (val) + netif_err(ag, hw, ndev, "unable to clear DMA Tx status: %08x\n", + val); +} + +static void ag71xx_hw_setup(struct ag71xx *ag) +{ + u32 init = MAC_CFG1_INIT; + + /* setup MAC configuration registers */ + ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, init); + + ag71xx_sb(ag, AG71XX_REG_MAC_CFG2, + MAC_CFG2_PAD_CRC_EN | MAC_CFG2_LEN_CHECK); + + /* setup max frame length to zero */ + ag71xx_wr(ag, AG71XX_REG_MAC_MFL, 0); + + /* setup FIFO configuration registers */ + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG0, FIFO_CFG0_INIT); + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG1, ag->fifodata[0]); + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG2, ag->fifodata[1]); + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG4, FIFO_CFG4_INIT); + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, FIFO_CFG5_INIT); +} + +static unsigned int ag71xx_max_frame_len(unsigned int mtu) +{ + return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN; +} + +static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac) +{ + u32 t; + + t = (((u32)mac[5]) << 24) | (((u32)mac[4]) << 16) + | (((u32)mac[3]) << 8) | ((u32)mac[2]); + + ag71xx_wr(ag, AG71XX_REG_MAC_ADDR1, t); + + t = (((u32)mac[1]) << 24) | (((u32)mac[0]) << 16); + ag71xx_wr(ag, AG71XX_REG_MAC_ADDR2, t); +} + +static void ag71xx_fast_reset(struct ag71xx *ag) +{ + struct net_device *dev = ag->ndev; + u32 rx_ds; + u32 mii_reg; + + ag71xx_hw_stop(ag); + + mii_reg = ag71xx_rr(ag, AG71XX_REG_MII_CFG); + rx_ds = ag71xx_rr(ag, AG71XX_REG_RX_DESC); + + ag71xx_tx_packets(ag, true); + + reset_control_assert(ag->mac_reset); + usleep_range(10, 20); + reset_control_deassert(ag->mac_reset); + usleep_range(10, 20); + + ag71xx_dma_reset(ag); + ag71xx_hw_setup(ag); + ag->tx_ring.curr = 0; + ag->tx_ring.dirty = 0; + netdev_reset_queue(ag->ndev); + + /* setup max frame length */ + ag71xx_wr(ag, AG71XX_REG_MAC_MFL, + ag71xx_max_frame_len(ag->ndev->mtu)); + + ag71xx_wr(ag, AG71XX_REG_RX_DESC, rx_ds); + ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma); + ag71xx_wr(ag, AG71XX_REG_MII_CFG, mii_reg); + + ag71xx_hw_set_macaddr(ag, dev->dev_addr); +} + +static void ag71xx_hw_start(struct ag71xx *ag) +{ + /* start RX engine */ + ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE); + + /* enable interrupts */ + ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, AG71XX_INT_INIT); + + netif_wake_queue(ag->ndev); +} + +static void ag71xx_link_adjust(struct ag71xx *ag, bool update) +{ + struct phy_device *phydev = ag->ndev->phydev; + u32 cfg2; + u32 ifctl; + u32 fifo5; + + if (!phydev->link && update) { + ag71xx_hw_stop(ag); + return; + } + + if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130)) + ag71xx_fast_reset(ag); + + cfg2 = ag71xx_rr(ag, AG71XX_REG_MAC_CFG2); + cfg2 &= ~(MAC_CFG2_IF_1000 | MAC_CFG2_IF_10_100 | MAC_CFG2_FDX); + cfg2 |= (phydev->duplex) ? MAC_CFG2_FDX : 0; + + ifctl = ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL); + ifctl &= ~(MAC_IFCTL_SPEED); + + fifo5 = ag71xx_rr(ag, AG71XX_REG_FIFO_CFG5); + fifo5 &= ~FIFO_CFG5_BM; + + switch (phydev->speed) { + case SPEED_1000: + cfg2 |= MAC_CFG2_IF_1000; + fifo5 |= FIFO_CFG5_BM; + break; + case SPEED_100: + cfg2 |= MAC_CFG2_IF_10_100; + ifctl |= MAC_IFCTL_SPEED; + break; + case SPEED_10: + cfg2 |= MAC_CFG2_IF_10_100; + break; + default: + WARN(1, "not supported speed %i\n", phydev->speed); + return; + } + + if (ag->tx_ring.desc_split) { + ag->fifodata[2] &= 0xffff; + ag->fifodata[2] |= ((2048 - ag->tx_ring.desc_split) / 4) << 16; + } + + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, ag->fifodata[2]); + + ag71xx_wr(ag, AG71XX_REG_MAC_CFG2, cfg2); + ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, fifo5); + ag71xx_wr(ag, AG71XX_REG_MAC_IFCTL, ifctl); + + ag71xx_hw_start(ag); + + if (update) + phy_print_status(phydev); +} + +static void ag71xx_phy_link_adjust(struct net_device *ndev) +{ + struct ag71xx *ag = netdev_priv(ndev); + + ag71xx_link_adjust(ag, true); +} + +static int ag71xx_phy_connect(struct ag71xx *ag) +{ + struct device_node *np = ag->pdev->dev.of_node; + struct net_device *ndev = ag->ndev; + struct device_node *phy_node; + struct phy_device *phydev; + int ret; + + if (of_phy_is_fixed_link(np)) { + ret = of_phy_register_fixed_link(np); + if (ret < 0) { + netif_err(ag, probe, ndev, "Failed to register fixed PHY link: %d\n", + ret); + return ret; + } + + phy_node = of_node_get(np); + } else { + phy_node = of_parse_phandle(np, "phy-handle", 0); + } + + if (!phy_node) { + netif_err(ag, probe, ndev, "Could not find valid phy node\n"); + return -ENODEV; + } + + phydev = of_phy_connect(ag->ndev, phy_node, ag71xx_phy_link_adjust, + 0, ag->phy_if_mode); + + of_node_put(phy_node); + + if (!phydev) { + netif_err(ag, probe, ndev, "Could not connect to PHY device\n"); + return -ENODEV; + } + + phy_attached_info(phydev); + + return 0; +} + +static void ag71xx_ring_tx_clean(struct ag71xx *ag) +{ + struct ag71xx_ring *ring = &ag->tx_ring; + int ring_mask = BIT(ring->order) - 1; + u32 bytes_compl = 0, pkts_compl = 0; + struct net_device *ndev = ag->ndev; + + while (ring->curr != ring->dirty) { + struct ag71xx_desc *desc; + u32 i = ring->dirty & ring_mask; + + desc = ag71xx_ring_desc(ring, i); + if (!ag71xx_desc_empty(desc)) { + desc->ctrl = 0; + ndev->stats.tx_errors++; + } + + if (ring->buf[i].tx.skb) { + bytes_compl += ring->buf[i].tx.len; + pkts_compl++; + dev_kfree_skb_any(ring->buf[i].tx.skb); + } + ring->buf[i].tx.skb = NULL; + ring->dirty++; + } + + /* flush descriptors */ + wmb(); + + netdev_completed_queue(ndev, pkts_compl, bytes_compl); +} + +static void ag71xx_ring_tx_init(struct ag71xx *ag) +{ + struct ag71xx_ring *ring = &ag->tx_ring; + int ring_size = BIT(ring->order); + int ring_mask = ring_size - 1; + int i; + + for (i = 0; i < ring_size; i++) { + struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i); + + desc->next = (u32)(ring->descs_dma + + AG71XX_DESC_SIZE * ((i + 1) & ring_mask)); + + desc->ctrl = DESC_EMPTY; + ring->buf[i].tx.skb = NULL; + } + + /* flush descriptors */ + wmb(); + + ring->curr = 0; + ring->dirty = 0; + netdev_reset_queue(ag->ndev); +} + +static void ag71xx_ring_rx_clean(struct ag71xx *ag) +{ + struct ag71xx_ring *ring = &ag->rx_ring; + int ring_size = BIT(ring->order); + int i; + + if (!ring->buf) + return; + + for (i = 0; i < ring_size; i++) + if (ring->buf[i].rx.rx_buf) { + dma_unmap_single(&ag->pdev->dev, + ring->buf[i].rx.dma_addr, + ag->rx_buf_size, DMA_FROM_DEVICE); + skb_free_frag(ring->buf[i].rx.rx_buf); + } +} + +static int ag71xx_buffer_size(struct ag71xx *ag) +{ + return ag->rx_buf_size + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); +} + +static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf, + int offset, + void *(*alloc)(unsigned int size)) +{ + struct ag71xx_ring *ring = &ag->rx_ring; + struct ag71xx_desc *desc; + void *data; + + desc = ag71xx_ring_desc(ring, buf - &ring->buf[0]); + + data = alloc(ag71xx_buffer_size(ag)); + if (!data) + return false; + + buf->rx.rx_buf = data; + buf->rx.dma_addr = dma_map_single(&ag->pdev->dev, data, ag->rx_buf_size, + DMA_FROM_DEVICE); + desc->data = (u32)buf->rx.dma_addr + offset; + return true; +} + +static int ag71xx_ring_rx_init(struct ag71xx *ag) +{ + struct ag71xx_ring *ring = &ag->rx_ring; + struct net_device *ndev = ag->ndev; + int ring_mask = BIT(ring->order) - 1; + int ring_size = BIT(ring->order); + unsigned int i; + int ret; + + ret = 0; + for (i = 0; i < ring_size; i++) { + struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i); + + desc->next = (u32)(ring->descs_dma + + AG71XX_DESC_SIZE * ((i + 1) & ring_mask)); + + netif_dbg(ag, rx_status, ndev, "RX desc at %p, next is %08x\n", + desc, desc->next); + } + + for (i = 0; i < ring_size; i++) { + struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i); + + if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], ag->rx_buf_offset, + netdev_alloc_frag)) { + ret = -ENOMEM; + break; + } + + desc->ctrl = DESC_EMPTY; + } + + /* flush descriptors */ + wmb(); + + ring->curr = 0; + ring->dirty = 0; + + return ret; +} + +static int ag71xx_ring_rx_refill(struct ag71xx *ag) +{ + struct ag71xx_ring *ring = &ag->rx_ring; + int ring_mask = BIT(ring->order) - 1; + int offset = ag->rx_buf_offset; + unsigned int count; + + count = 0; + for (; ring->curr - ring->dirty > 0; ring->dirty++) { + struct ag71xx_desc *desc; + unsigned int i; + + i = ring->dirty & ring_mask; + desc = ag71xx_ring_desc(ring, i); + + if (!ring->buf[i].rx.rx_buf && + !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset, + napi_alloc_frag)) + break; + + desc->ctrl = DESC_EMPTY; + count++; + } + + /* flush descriptors */ + wmb(); + + netif_dbg(ag, rx_status, ag->ndev, "%u rx descriptors refilled\n", + count); + + return count; +} + +static int ag71xx_rings_init(struct ag71xx *ag) +{ + struct ag71xx_ring *tx = &ag->tx_ring; + struct ag71xx_ring *rx = &ag->rx_ring; + int ring_size, tx_size; + + ring_size = BIT(tx->order) + BIT(rx->order); + tx_size = BIT(tx->order); + + tx->buf = kcalloc(ring_size, sizeof(*tx->buf), GFP_KERNEL); + if (!tx->buf) + return -ENOMEM; + + tx->descs_cpu = dma_alloc_coherent(&ag->pdev->dev, + ring_size * AG71XX_DESC_SIZE, + &tx->descs_dma, GFP_ATOMIC); + if (!tx->descs_cpu) { + kfree(tx->buf); + tx->buf = NULL; + return -ENOMEM; + } + + rx->buf = &tx->buf[BIT(tx->order)]; + rx->descs_cpu = ((void *)tx->descs_cpu) + tx_size * AG71XX_DESC_SIZE; + rx->descs_dma = tx->descs_dma + tx_size * AG71XX_DESC_SIZE; + + ag71xx_ring_tx_init(ag); + return ag71xx_ring_rx_init(ag); +} + +static void ag71xx_rings_free(struct ag71xx *ag) +{ + struct ag71xx_ring *tx = &ag->tx_ring; + struct ag71xx_ring *rx = &ag->rx_ring; + int ring_size; + + ring_size = BIT(tx->order) + BIT(rx->order); + + if (tx->descs_cpu) + dma_free_coherent(&ag->pdev->dev, ring_size * AG71XX_DESC_SIZE, + tx->descs_cpu, tx->descs_dma); + + kfree(tx->buf); + + tx->descs_cpu = NULL; + rx->descs_cpu = NULL; + tx->buf = NULL; + rx->buf = NULL; +} + +static void ag71xx_rings_cleanup(struct ag71xx *ag) +{ + ag71xx_ring_rx_clean(ag); + ag71xx_ring_tx_clean(ag); + ag71xx_rings_free(ag); + + netdev_reset_queue(ag->ndev); +} + +static void ag71xx_hw_init(struct ag71xx *ag) +{ + ag71xx_hw_stop(ag); + + ag71xx_sb(ag, AG71XX_REG_MAC_CFG1, MAC_CFG1_SR); + usleep_range(20, 30); + + reset_control_assert(ag->mac_reset); + msleep(100); + reset_control_deassert(ag->mac_reset); + msleep(200); + + ag71xx_hw_setup(ag); + + ag71xx_dma_reset(ag); +} + +static int ag71xx_hw_enable(struct ag71xx *ag) +{ + int ret; + + ret = ag71xx_rings_init(ag); + if (ret) + return ret; + + napi_enable(&ag->napi); + ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma); + ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma); + netif_start_queue(ag->ndev); + + return 0; +} + +static void ag71xx_hw_disable(struct ag71xx *ag) +{ + netif_stop_queue(ag->ndev); + + ag71xx_hw_stop(ag); + ag71xx_dma_reset(ag); + + napi_disable(&ag->napi); + del_timer_sync(&ag->oom_timer); + + ag71xx_rings_cleanup(ag); +} + +static int ag71xx_open(struct net_device *ndev) +{ + struct ag71xx *ag = netdev_priv(ndev); + unsigned int max_frame_len; + int ret; + + max_frame_len = ag71xx_max_frame_len(ndev->mtu); + ag->rx_buf_size = + SKB_DATA_ALIGN(max_frame_len + NET_SKB_PAD + NET_IP_ALIGN); + + /* setup max frame length */ + ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len); + ag71xx_hw_set_macaddr(ag, ndev->dev_addr); + + ret = ag71xx_hw_enable(ag); + if (ret) + goto err; + + ret = ag71xx_phy_connect(ag); + if (ret) + goto err; + + phy_start(ndev->phydev); + + return 0; + +err: + ag71xx_rings_cleanup(ag); + return ret; +} + +static int ag71xx_stop(struct net_device *ndev) +{ + struct ag71xx *ag = netdev_priv(ndev); + + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); + ag71xx_hw_disable(ag); + + return 0; +} + +static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len) +{ + int i, ring_mask, ndesc, split; + struct ag71xx_desc *desc; + + ring_mask = BIT(ring->order) - 1; + ndesc = 0; + split = ring->desc_split; + + if (!split) + split = len; + + while (len > 0) { + unsigned int cur_len = len; + + i = (ring->curr + ndesc) & ring_mask; + desc = ag71xx_ring_desc(ring, i); + + if (!ag71xx_desc_empty(desc)) + return -1; + + if (cur_len > split) { + cur_len = split; + + /* TX will hang if DMA transfers <= 4 bytes, + * make sure next segment is more than 4 bytes long. + */ + if (len <= split + 4) + cur_len -= 4; + } + + desc->data = addr; + addr += cur_len; + len -= cur_len; + + if (len > 0) + cur_len |= DESC_MORE; + + /* prevent early tx attempt of this descriptor */ + if (!ndesc) + cur_len |= DESC_EMPTY; + + desc->ctrl = cur_len; + ndesc++; + } + + return ndesc; +} + +static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + int i, n, ring_min, ring_mask, ring_size; + struct ag71xx *ag = netdev_priv(ndev); + struct ag71xx_ring *ring; + struct ag71xx_desc *desc; + dma_addr_t dma_addr; + + ring = &ag->tx_ring; + ring_mask = BIT(ring->order) - 1; + ring_size = BIT(ring->order); + + if (skb->len <= 4) { + netif_dbg(ag, tx_err, ndev, "packet len is too small\n"); + goto err_drop; + } + + dma_addr = dma_map_single(&ag->pdev->dev, skb->data, skb->len, + DMA_TO_DEVICE); + + i = ring->curr & ring_mask; + desc = ag71xx_ring_desc(ring, i); + + /* setup descriptor fields */ + n = ag71xx_fill_dma_desc(ring, (u32)dma_addr, + skb->len & ag->dcfg->desc_pktlen_mask); + if (n < 0) + goto err_drop_unmap; + + i = (ring->curr + n - 1) & ring_mask; + ring->buf[i].tx.len = skb->len; + ring->buf[i].tx.skb = skb; + + netdev_sent_queue(ndev, skb->len); + + skb_tx_timestamp(skb); + + desc->ctrl &= ~DESC_EMPTY; + ring->curr += n; + + /* flush descriptor */ + wmb(); + + ring_min = 2; + if (ring->desc_split) + ring_min *= AG71XX_TX_RING_DS_PER_PKT; + + if (ring->curr - ring->dirty >= ring_size - ring_min) { + netif_dbg(ag, tx_err, ndev, "tx queue full\n"); + netif_stop_queue(ndev); + } + + netif_dbg(ag, tx_queued, ndev, "packet injected into TX queue\n"); + + /* enable TX engine */ + ag71xx_wr(ag, AG71XX_REG_TX_CTRL, TX_CTRL_TXE); + + return NETDEV_TX_OK; + +err_drop_unmap: + dma_unmap_single(&ag->pdev->dev, dma_addr, skb->len, DMA_TO_DEVICE); + +err_drop: + ndev->stats.tx_dropped++; + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int ag71xx_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd) +{ + if (!ndev->phydev) + return -EINVAL; + + return phy_mii_ioctl(ndev->phydev, ifr, cmd); +} + +static void ag71xx_oom_timer_handler(struct timer_list *t) +{ + struct ag71xx *ag = from_timer(ag, t, oom_timer); + + napi_schedule(&ag->napi); +} + +static void ag71xx_tx_timeout(struct net_device *ndev) +{ + struct ag71xx *ag = netdev_priv(ndev); + + netif_err(ag, tx_err, ndev, "tx timeout\n"); + + schedule_delayed_work(&ag->restart_work, 1); +} + +static void ag71xx_restart_work_func(struct work_struct *work) +{ + struct ag71xx *ag = container_of(work, struct ag71xx, + restart_work.work); + struct net_device *ndev = ag->ndev; + + rtnl_lock(); + ag71xx_hw_disable(ag); + ag71xx_hw_enable(ag); + if (ndev->phydev->link) + ag71xx_link_adjust(ag, false); + rtnl_unlock(); +} + +static int ag71xx_rx_packets(struct ag71xx *ag, int limit) +{ + struct net_device *ndev = ag->ndev; + int ring_mask, ring_size, done = 0; + unsigned int pktlen_mask, offset; + struct sk_buff *next, *skb; + struct ag71xx_ring *ring; + struct list_head rx_list; + + ring = &ag->rx_ring; + pktlen_mask = ag->dcfg->desc_pktlen_mask; + offset = ag->rx_buf_offset; + ring_mask = BIT(ring->order) - 1; + ring_size = BIT(ring->order); + + netif_dbg(ag, rx_status, ndev, "rx packets, limit=%d, curr=%u, dirty=%u\n", + limit, ring->curr, ring->dirty); + + INIT_LIST_HEAD(&rx_list); + + while (done < limit) { + unsigned int i = ring->curr & ring_mask; + struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i); + int pktlen; + int err = 0; + + if (ag71xx_desc_empty(desc)) + break; + + if ((ring->dirty + ring_size) == ring->curr) { + WARN_ONCE(1, "RX out of ring"); + break; + } + + ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR); + + pktlen = desc->ctrl & pktlen_mask; + pktlen -= ETH_FCS_LEN; + + dma_unmap_single(&ag->pdev->dev, ring->buf[i].rx.dma_addr, + ag->rx_buf_size, DMA_FROM_DEVICE); + + ndev->stats.rx_packets++; + ndev->stats.rx_bytes += pktlen; + + skb = build_skb(ring->buf[i].rx.rx_buf, ag71xx_buffer_size(ag)); + if (!skb) { + skb_free_frag(ring->buf[i].rx.rx_buf); + goto next; + } + + skb_reserve(skb, offset); + skb_put(skb, pktlen); + + if (err) { + ndev->stats.rx_dropped++; + kfree_skb(skb); + } else { + skb->dev = ndev; + skb->ip_summed = CHECKSUM_NONE; + list_add_tail(&skb->list, &rx_list); + } + +next: + ring->buf[i].rx.rx_buf = NULL; + done++; + + ring->curr++; + } + + ag71xx_ring_rx_refill(ag); + + list_for_each_entry_safe(skb, next, &rx_list, list) + skb->protocol = eth_type_trans(skb, ndev); + netif_receive_skb_list(&rx_list); + + netif_dbg(ag, rx_status, ndev, "rx finish, curr=%u, dirty=%u, done=%d\n", + ring->curr, ring->dirty, done); + + return done; +} + +static int ag71xx_poll(struct napi_struct *napi, int limit) +{ + struct ag71xx *ag = container_of(napi, struct ag71xx, napi); + struct ag71xx_ring *rx_ring = &ag->rx_ring; + int rx_ring_size = BIT(rx_ring->order); + struct net_device *ndev = ag->ndev; + int tx_done, rx_done; + u32 status; + + tx_done = ag71xx_tx_packets(ag, false); + + netif_dbg(ag, rx_status, ndev, "processing RX ring\n"); + rx_done = ag71xx_rx_packets(ag, limit); + + if (!rx_ring->buf[rx_ring->dirty % rx_ring_size].rx.rx_buf) + goto oom; + + status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS); + if (unlikely(status & RX_STATUS_OF)) { + ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_OF); + ndev->stats.rx_fifo_errors++; + + /* restart RX */ + ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE); + } + + if (rx_done < limit) { + if (status & RX_STATUS_PR) + goto more; + + status = ag71xx_rr(ag, AG71XX_REG_TX_STATUS); + if (status & TX_STATUS_PS) + goto more; + + netif_dbg(ag, rx_status, ndev, "disable polling mode, rx=%d, tx=%d,limit=%d\n", + rx_done, tx_done, limit); + + napi_complete(napi); + + /* enable interrupts */ + ag71xx_int_enable(ag, AG71XX_INT_POLL); + return rx_done; + } + +more: + netif_dbg(ag, rx_status, ndev, "stay in polling mode, rx=%d, tx=%d, limit=%d\n", + rx_done, tx_done, limit); + return limit; + +oom: + netif_err(ag, rx_err, ndev, "out of memory\n"); + + mod_timer(&ag->oom_timer, jiffies + AG71XX_OOM_REFILL); + napi_complete(napi); + return 0; +} + +static irqreturn_t ag71xx_interrupt(int irq, void *dev_id) +{ + struct net_device *ndev = dev_id; + struct ag71xx *ag; + u32 status; + + ag = netdev_priv(ndev); + status = ag71xx_rr(ag, AG71XX_REG_INT_STATUS); + + if (unlikely(!status)) + return IRQ_NONE; + + if (unlikely(status & AG71XX_INT_ERR)) { + if (status & AG71XX_INT_TX_BE) { + ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE); + netif_err(ag, intr, ndev, "TX BUS error\n"); + } + if (status & AG71XX_INT_RX_BE) { + ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE); + netif_err(ag, intr, ndev, "RX BUS error\n"); + } + } + + if (likely(status & AG71XX_INT_POLL)) { + ag71xx_int_disable(ag, AG71XX_INT_POLL); + netif_dbg(ag, intr, ndev, "enable polling mode\n"); + napi_schedule(&ag->napi); + } + + return IRQ_HANDLED; +} + +static int ag71xx_change_mtu(struct net_device *ndev, int new_mtu) +{ + struct ag71xx *ag = netdev_priv(ndev); + + ndev->mtu = new_mtu; + ag71xx_wr(ag, AG71XX_REG_MAC_MFL, + ag71xx_max_frame_len(ndev->mtu)); + + return 0; +} + +static const struct net_device_ops ag71xx_netdev_ops = { + .ndo_open = ag71xx_open, + .ndo_stop = ag71xx_stop, + .ndo_start_xmit = ag71xx_hard_start_xmit, + .ndo_do_ioctl = ag71xx_do_ioctl, + .ndo_tx_timeout = ag71xx_tx_timeout, + .ndo_change_mtu = ag71xx_change_mtu, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, +}; + +static const u32 ar71xx_addr_ar7100[] = { + 0x19000000, 0x1a000000, +}; + +static int ag71xx_probe(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + const struct ag71xx_dcfg *dcfg; + struct net_device *ndev; + struct resource *res; + const void *mac_addr; + int tx_size, err, i; + struct ag71xx *ag; + + if (!np) + return -ENODEV; + + ndev = devm_alloc_etherdev(&pdev->dev, sizeof(*ag)); + if (!ndev) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + + dcfg = of_device_get_match_data(&pdev->dev); + if (!dcfg) + return -EINVAL; + + ag = netdev_priv(ndev); + ag->mac_idx = -1; + for (i = 0; i < ARRAY_SIZE(ar71xx_addr_ar7100); i++) { + if (ar71xx_addr_ar7100[i] == res->start) + ag->mac_idx = i; + } + + if (ag->mac_idx < 0) { + netif_err(ag, probe, ndev, "unknown mac idx\n"); + return -EINVAL; + } + + ag->clk_eth = devm_clk_get(&pdev->dev, "eth"); + if (IS_ERR(ag->clk_eth)) { + netif_err(ag, probe, ndev, "Failed to get eth clk.\n"); + return PTR_ERR(ag->clk_eth); + } + + SET_NETDEV_DEV(ndev, &pdev->dev); + + ag->pdev = pdev; + ag->ndev = ndev; + ag->dcfg = dcfg; + ag->msg_enable = netif_msg_init(-1, AG71XX_DEFAULT_MSG_ENABLE); + memcpy(ag->fifodata, dcfg->fifodata, sizeof(ag->fifodata)); + + ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); + if (IS_ERR(ag->mac_reset)) { + netif_err(ag, probe, ndev, "missing mac reset\n"); + err = PTR_ERR(ag->mac_reset); + goto err_free; + } + + ag->mac_base = devm_ioremap_nocache(&pdev->dev, res->start, + res->end - res->start + 1); + if (!ag->mac_base) { + err = -ENOMEM; + goto err_free; + } + + ndev->irq = platform_get_irq(pdev, 0); + err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, + 0x0, dev_name(&pdev->dev), ndev); + if (err) { + netif_err(ag, probe, ndev, "unable to request IRQ %d\n", + ndev->irq); + goto err_free; + } + + ndev->netdev_ops = &ag71xx_netdev_ops; + + INIT_DELAYED_WORK(&ag->restart_work, ag71xx_restart_work_func); + timer_setup(&ag->oom_timer, ag71xx_oom_timer_handler, 0); + + tx_size = AG71XX_TX_RING_SIZE_DEFAULT; + ag->rx_ring.order = ag71xx_ring_size_order(AG71XX_RX_RING_SIZE_DEFAULT); + + ndev->min_mtu = 68; + ndev->max_mtu = dcfg->max_frame_len - ag71xx_max_frame_len(0); + + ag->rx_buf_offset = NET_SKB_PAD; + if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130)) + ag->rx_buf_offset += NET_IP_ALIGN; + + if (ag71xx_is(ag, AR7100)) { + ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT; + tx_size *= AG71XX_TX_RING_DS_PER_PKT; + } + ag->tx_ring.order = ag71xx_ring_size_order(tx_size); + + ag->stop_desc = dmam_alloc_coherent(&pdev->dev, + sizeof(struct ag71xx_desc), + &ag->stop_desc_dma, GFP_KERNEL); + if (!ag->stop_desc) + goto err_free; + + ag->stop_desc->data = 0; + ag->stop_desc->ctrl = 0; + ag->stop_desc->next = (u32)ag->stop_desc_dma; + + mac_addr = of_get_mac_address(np); + if (mac_addr) + memcpy(ndev->dev_addr, mac_addr, ETH_ALEN); + if (!mac_addr || !is_valid_ether_addr(ndev->dev_addr)) { + netif_err(ag, probe, ndev, "invalid MAC address, using random address\n"); + eth_random_addr(ndev->dev_addr); + } + + ag->phy_if_mode = of_get_phy_mode(np); + if (ag->phy_if_mode < 0) { + netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); + err = ag->phy_if_mode; + goto err_free; + } + + netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); + + err = clk_prepare_enable(ag->clk_eth); + if (err) { + netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); + goto err_free; + } + + ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); + + ag71xx_hw_init(ag); + + err = ag71xx_mdio_probe(ag); + if (err) + goto err_put_clk; + + platform_set_drvdata(pdev, ndev); + + err = register_netdev(ndev); + if (err) { + netif_err(ag, probe, ndev, "unable to register net device\n"); + platform_set_drvdata(pdev, NULL); + goto err_mdio_remove; + } + + netif_info(ag, probe, ndev, "Atheros AG71xx at 0x%08lx, irq %d, mode:%s\n", + (unsigned long)ag->mac_base, ndev->irq, + phy_modes(ag->phy_if_mode)); + + return 0; + +err_mdio_remove: + ag71xx_mdio_remove(ag); +err_put_clk: + clk_disable_unprepare(ag->clk_eth); +err_free: + free_netdev(ndev); + return err; +} + +static int ag71xx_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct ag71xx *ag; + + if (!ndev) + return 0; + + ag = netdev_priv(ndev); + unregister_netdev(ndev); + ag71xx_mdio_remove(ag); + clk_disable_unprepare(ag->clk_eth); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +static const u32 ar71xx_fifo_ar7100[] = { + 0x0fff0000, 0x00001fff, 0x00780fff, +}; + +static const u32 ar71xx_fifo_ar9130[] = { + 0x0fff0000, 0x00001fff, 0x008001ff, +}; + +static const u32 ar71xx_fifo_ar9330[] = { + 0x0010ffff, 0x015500aa, 0x01f00140, +}; + +static const struct ag71xx_dcfg ag71xx_dcfg_ar7100 = { + .type = AR7100, + .fifodata = ar71xx_fifo_ar7100, + .max_frame_len = 1540, + .desc_pktlen_mask = SZ_4K - 1, + .tx_hang_workaround = false, +}; + +static const struct ag71xx_dcfg ag71xx_dcfg_ar7240 = { + .type = AR7240, + .fifodata = ar71xx_fifo_ar7100, + .max_frame_len = 1540, + .desc_pktlen_mask = SZ_4K - 1, + .tx_hang_workaround = true, +}; + +static const struct ag71xx_dcfg ag71xx_dcfg_ar9130 = { + .type = AR9130, + .fifodata = ar71xx_fifo_ar9130, + .max_frame_len = 1540, + .desc_pktlen_mask = SZ_4K - 1, + .tx_hang_workaround = false, +}; + +static const struct ag71xx_dcfg ag71xx_dcfg_ar9330 = { + .type = AR9330, + .fifodata = ar71xx_fifo_ar9330, + .max_frame_len = 1540, + .desc_pktlen_mask = SZ_4K - 1, + .tx_hang_workaround = true, +}; + +static const struct ag71xx_dcfg ag71xx_dcfg_ar9340 = { + .type = AR9340, + .fifodata = ar71xx_fifo_ar9330, + .max_frame_len = SZ_16K - 1, + .desc_pktlen_mask = SZ_16K - 1, + .tx_hang_workaround = true, +}; + +static const struct ag71xx_dcfg ag71xx_dcfg_qca9530 = { + .type = QCA9530, + .fifodata = ar71xx_fifo_ar9330, + .max_frame_len = SZ_16K - 1, + .desc_pktlen_mask = SZ_16K - 1, + .tx_hang_workaround = true, +}; + +static const struct ag71xx_dcfg ag71xx_dcfg_qca9550 = { + .type = QCA9550, + .fifodata = ar71xx_fifo_ar9330, + .max_frame_len = 1540, + .desc_pktlen_mask = SZ_16K - 1, + .tx_hang_workaround = true, +}; + +static const struct of_device_id ag71xx_match[] = { + { .compatible = "qca,ar7100-eth", .data = &ag71xx_dcfg_ar7100 }, + { .compatible = "qca,ar7240-eth", .data = &ag71xx_dcfg_ar7240 }, + { .compatible = "qca,ar7241-eth", .data = &ag71xx_dcfg_ar7240 }, + { .compatible = "qca,ar7242-eth", .data = &ag71xx_dcfg_ar7240 }, + { .compatible = "qca,ar9130-eth", .data = &ag71xx_dcfg_ar9130 }, + { .compatible = "qca,ar9330-eth", .data = &ag71xx_dcfg_ar9330 }, + { .compatible = "qca,ar9340-eth", .data = &ag71xx_dcfg_ar9340 }, + { .compatible = "qca,qca9530-eth", .data = &ag71xx_dcfg_qca9530 }, + { .compatible = "qca,qca9550-eth", .data = &ag71xx_dcfg_qca9550 }, + { .compatible = "qca,qca9560-eth", .data = &ag71xx_dcfg_qca9550 }, + {} +}; + +static struct platform_driver ag71xx_driver = { + .probe = ag71xx_probe, + .remove = ag71xx_remove, + .driver = { + .name = "ag71xx", + .of_match_table = ag71xx_match, + } +}; + +module_platform_driver(ag71xx_driver); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 25bf085324b8..be7f9cebb675 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2201,7 +2201,7 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct atl1c_adapter *adapter = netdev_priv(netdev); - u16 tpd_req = 1; + u16 tpd_req; struct atl1c_tpd_desc *tpd; enum atl1c_trans_queue type = atl1c_trans_normal; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index b123509d385f..e9017caf024d 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -8,6 +8,7 @@ config NET_VENDOR_BROADCOM default y depends on (SSB_POSSIBLE && HAS_DMA) || PCI || BCM63XX || \ SIBYTE_SB1xxx_SOC + select DIMLIB ---help--- If you have a network (Ethernet) chipset belonging to this class, say Y. @@ -198,6 +199,7 @@ config BNXT select FW_LOADER select LIBCRC32C select NET_DEVLINK + select PAGE_POOL ---help--- This driver supports Broadcom NetXtreme-C/E 10/25/40/50 gigabit Ethernet cards. To compile this driver as a module, choose M here: diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 85e610210477..291e4afd4a1a 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -2659,7 +2659,6 @@ static int bcm_enetsw_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; priv = netdev_priv(dev); - memset(priv, 0, sizeof(*priv)); /* initialize default and fetch platform data */ priv->enet_is_sw = true; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index cae9b77ff44b..b9c5cea8db16 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -609,7 +609,7 @@ static int bcm_sysport_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) { struct bcm_sysport_priv *priv = netdev_priv(dev); - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; unsigned int i; @@ -992,7 +992,7 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) { struct bcm_sysport_priv *priv = container_of(napi, struct bcm_sysport_priv, napi); - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; unsigned int work_done = 0; work_done = bcm_sysport_desc_rx(priv, budget); @@ -1016,8 +1016,8 @@ static int bcm_sysport_poll(struct napi_struct *napi, int budget) } if (priv->dim.use_dim) { - net_dim_sample(priv->dim.event_ctr, priv->dim.packets, - priv->dim.bytes, &dim_sample); + dim_update_sample(priv->dim.event_ctr, priv->dim.packets, + priv->dim.bytes, &dim_sample); net_dim(&priv->dim.dim, dim_sample); } @@ -1087,16 +1087,16 @@ static void bcm_sysport_resume_from_wol(struct bcm_sysport_priv *priv) static void bcm_sysport_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct bcm_sysport_net_dim *ndim = container_of(dim, struct bcm_sysport_net_dim, dim); struct bcm_sysport_priv *priv = container_of(ndim, struct bcm_sysport_priv, dim); - struct net_dim_cq_moder cur_profile = - net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode, + dim->profile_ix); bcm_sysport_set_rx_coalesce(priv, cur_profile.usec, cur_profile.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } /* RX and misc interrupt routine */ @@ -1437,7 +1437,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv, struct bcm_sysport_net_dim *dim = &priv->dim; INIT_WORK(&dim->dim.work, cb); - dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; dim->event_ctr = 0; dim->packets = 0; dim->bytes = 0; @@ -1446,7 +1446,7 @@ static void bcm_sysport_init_dim(struct bcm_sysport_priv *priv, static void bcm_sysport_init_rx_coalesce(struct bcm_sysport_priv *priv) { struct bcm_sysport_net_dim *dim = &priv->dim; - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; usecs = priv->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 86193931203a..6d80735fbc7f 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -11,7 +11,7 @@ #include <linux/bitmap.h> #include <linux/ethtool.h> #include <linux/if_vlan.h> -#include <linux/net_dim.h> +#include <linux/dim.h> /* Receive/transmit descriptor format */ #define DESC_ADDR_HI_STATUS_LEN 0x00 @@ -702,7 +702,7 @@ struct bcm_sysport_net_dim { u16 event_ctr; unsigned long packets; unsigned long bytes; - struct net_dim dim; + struct dim dim; }; /* Software view of the TX ring */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 008ad0ca89ba..656ed80647f0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -684,7 +684,7 @@ static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask) if (unlikely(gfpflags_allow_blocking(gfp_mask))) return (void *)__get_free_page(gfp_mask); - return netdev_alloc_frag(fp->rx_frag_size); + return napi_alloc_frag(fp->rx_frag_size); } return kmalloc(fp->rx_buf_size + NET_SKB_PAD, gfp_mask); @@ -3857,9 +3857,12 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { if (!(bp->flags & TX_TIMESTAMPING_EN)) { + bp->eth_stats.ptp_skip_tx_ts++; BNX2X_ERR("Tx timestamping was not enabled, this packet will not be timestamped\n"); } else if (bp->ptp_tx_skb) { - BNX2X_ERR("The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n"); + bp->eth_stats.ptp_skip_tx_ts++; + netdev_err_once(bp->dev, + "Device supports only a single outstanding packet to timestamp, this packet won't be timestamped\n"); } else { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; /* schedule check for Tx timestamp */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index 51fc845de31a..4a0ba6801c9e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -182,7 +182,9 @@ static const struct { { STATS_OFFSET32(driver_filtered_tx_pkt), 4, false, "driver_filtered_tx_pkt" }, { STATS_OFFSET32(eee_tx_lpi), - 4, true, "Tx LPI entry count"} + 4, true, "Tx LPI entry count"}, + { STATS_OFFSET32(ptp_skip_tx_ts), + 4, false, "ptp_skipped_tx_tstamp" }, }; #define BNX2X_NUM_STATS ARRAY_SIZE(bnx2x_stats_arr) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 03ac10b1cd1e..2cc14db8f0ec 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -15214,11 +15214,24 @@ static void bnx2x_ptp_task(struct work_struct *work) u32 val_seq; u64 timestamp, ns; struct skb_shared_hwtstamps shhwtstamps; + bool bail = true; + int i; + + /* FW may take a while to complete timestamping; try a bit and if it's + * still not complete, may indicate an error state - bail out then. + */ + for (i = 0; i < 10; i++) { + /* Read Tx timestamp registers */ + val_seq = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_SEQID : + NIG_REG_P0_TLLH_PTP_BUF_SEQID); + if (val_seq & 0x10000) { + bail = false; + break; + } + msleep(1 << i); + } - /* Read Tx timestamp registers */ - val_seq = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_SEQID : - NIG_REG_P0_TLLH_PTP_BUF_SEQID); - if (val_seq & 0x10000) { + if (!bail) { /* There is a valid timestamp value */ timestamp = REG_RD(bp, port ? NIG_REG_P1_TLLH_PTP_BUF_TS_MSB : NIG_REG_P0_TLLH_PTP_BUF_TS_MSB); @@ -15233,16 +15246,18 @@ static void bnx2x_ptp_task(struct work_struct *work) memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(ns); skb_tstamp_tx(bp->ptp_tx_skb, &shhwtstamps); - dev_kfree_skb_any(bp->ptp_tx_skb); - bp->ptp_tx_skb = NULL; DP(BNX2X_MSG_PTP, "Tx timestamp, timestamp cycles = %llu, ns = %llu\n", timestamp, ns); } else { - DP(BNX2X_MSG_PTP, "There is no valid Tx timestamp yet\n"); - /* Reschedule to keep checking for a valid timestamp value */ - schedule_work(&bp->ptp_task); + DP(BNX2X_MSG_PTP, + "Tx timestamp is not recorded (register read=%u)\n", + val_seq); + bp->eth_stats.ptp_skip_tx_ts++; } + + dev_kfree_skb_any(bp->ptp_tx_skb); + bp->ptp_tx_skb = NULL; } void bnx2x_set_rx_ts(struct bnx2x *bp, struct sk_buff *skb) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h index b2644ed13d06..d55e63692cf3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h @@ -207,6 +207,9 @@ struct bnx2x_eth_stats { u32 driver_filtered_tx_pkt; /* src: Clear-on-Read register; Will not survive PMF Migration */ u32 eee_tx_lpi; + + /* PTP */ + u32 ptp_skip_tx_ts; }; struct bnx2x_eth_q_stats { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f758b2e0591f..3f632028eff0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -54,6 +54,7 @@ #include <net/pkt_cls.h> #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> +#include <net/page_pool.h> #include "bnxt_hsi.h" #include "bnxt.h" @@ -668,19 +669,20 @@ next_tx_int: } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, + struct bnxt_rx_ring_info *rxr, gfp_t gfp) { struct device *dev = &bp->pdev->dev; struct page *page; - page = alloc_page(gfp); + page = page_pool_dev_alloc_pages(rxr->page_pool); if (!page) return NULL; *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); if (dma_mapping_error(dev, *mapping)) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } *mapping += bp->rx_dma_offset; @@ -716,7 +718,8 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, dma_addr_t mapping; if (BNXT_RX_PAGE_MODE(bp)) { - struct page *page = __bnxt_alloc_rx_page(bp, &mapping, gfp); + struct page *page = + __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp); if (!page) return -ENOMEM; @@ -1989,6 +1992,9 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } + if (event & BNXT_REDIRECT_EVENT) + xdp_do_flush_map(); + if (event & BNXT_TX_EVENT) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; u16 prod = txr->tx_prod; @@ -2130,12 +2136,12 @@ static int bnxt_poll(struct napi_struct *napi, int budget) } } if (bp->flags & BNXT_FLAG_DIM) { - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; - net_dim_sample(cpr->event_ctr, - cpr->rx_packets, - cpr->rx_bytes, - &dim_sample); + dim_update_sample(cpr->event_ctr, + cpr->rx_packets, + cpr->rx_bytes, + &dim_sample); net_dim(&cpr->dim, dim_sample); } return work_done; @@ -2254,9 +2260,23 @@ static void bnxt_free_tx_skbs(struct bnxt *bp) for (j = 0; j < max_idx;) { struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[j]; - struct sk_buff *skb = tx_buf->skb; + struct sk_buff *skb; int k, last; + if (i < bp->tx_nr_rings_xdp && + tx_buf->action == XDP_REDIRECT) { + dma_unmap_single(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + dma_unmap_len(tx_buf, len), + PCI_DMA_TODEVICE); + xdp_return_frame(tx_buf->xdpf); + tx_buf->action = 0; + tx_buf->xdpf = NULL; + j++; + continue; + } + + skb = tx_buf->skb; if (!skb) { j++; continue; @@ -2343,7 +2363,7 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); - __free_page(data); + page_pool_recycle_direct(rxr->page_pool, data); } else { dma_unmap_single_attrs(&pdev->dev, mapping, bp->rx_buf_use_size, @@ -2480,6 +2500,9 @@ static void bnxt_free_rx_rings(struct bnxt *bp) if (xdp_rxq_info_is_reg(&rxr->xdp_rxq)) xdp_rxq_info_unreg(&rxr->xdp_rxq); + page_pool_destroy(rxr->page_pool); + rxr->page_pool = NULL; + kfree(rxr->rx_tpa); rxr->rx_tpa = NULL; @@ -2494,6 +2517,26 @@ static void bnxt_free_rx_rings(struct bnxt *bp) } } +static int bnxt_alloc_rx_page_pool(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr) +{ + struct page_pool_params pp = { 0 }; + + pp.pool_size = bp->rx_ring_size; + pp.nid = dev_to_node(&bp->pdev->dev); + pp.dev = &bp->pdev->dev; + pp.dma_dir = DMA_BIDIRECTIONAL; + + rxr->page_pool = page_pool_create(&pp); + if (IS_ERR(rxr->page_pool)) { + int err = PTR_ERR(rxr->page_pool); + + rxr->page_pool = NULL; + return err; + } + return 0; +} + static int bnxt_alloc_rx_rings(struct bnxt *bp) { int i, rc, agg_rings = 0, tpa_rings = 0; @@ -2513,10 +2556,22 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp) ring = &rxr->rx_ring_struct; + rc = bnxt_alloc_rx_page_pool(bp, rxr); + if (rc) + return rc; + rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i); if (rc < 0) return rc; + rc = xdp_rxq_info_reg_mem_model(&rxr->xdp_rxq, + MEM_TYPE_PAGE_POOL, + rxr->page_pool); + if (rc) { + xdp_rxq_info_unreg(&rxr->xdp_rxq); + return rc; + } + rc = bnxt_alloc_ring(bp, &ring->ring_mem); if (rc) return rc; @@ -5508,7 +5563,16 @@ static int bnxt_cp_rings_in_use(struct bnxt *bp) static int bnxt_get_func_stat_ctxs(struct bnxt *bp) { - return bp->cp_nr_rings + bnxt_get_ulp_stat_ctxs(bp); + int ulp_stat = bnxt_get_ulp_stat_ctxs(bp); + int cp = bp->cp_nr_rings; + + if (!ulp_stat) + return cp; + + if (bnxt_nq_rings_in_use(bp) > cp + bnxt_get_ulp_msix_num(bp)) + return bnxt_get_ulp_msix_base(bp) + ulp_stat; + + return cp + ulp_stat; } static bool bnxt_need_reserve_rings(struct bnxt *bp) @@ -7477,11 +7541,7 @@ unsigned int bnxt_get_avail_cp_rings_for_en(struct bnxt *bp) unsigned int bnxt_get_avail_stat_ctxs_for_en(struct bnxt *bp) { - unsigned int stat; - - stat = bnxt_get_max_func_stat_ctxs(bp) - bnxt_get_ulp_stat_ctxs(bp); - stat -= bp->cp_nr_rings; - return stat; + return bnxt_get_max_func_stat_ctxs(bp) - bnxt_get_func_stat_ctxs(bp); } int bnxt_get_avail_msix(struct bnxt *bp, int num) @@ -7813,7 +7873,7 @@ static void bnxt_enable_napi(struct bnxt *bp) if (bp->bnapi[i]->rx_ring) { INIT_WORK(&cpr->dim.work, bnxt_dim_work); - cpr->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } napi_enable(&bp->bnapi[i]->napi); } @@ -9847,32 +9907,19 @@ static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -static int bnxt_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct bnxt *bp = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb, - bp, bp, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(bnxt_block_cb_list); static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct bnxt *bp = netdev_priv(dev); + switch (type) { case TC_SETUP_BLOCK: - return bnxt_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, + &bnxt_block_cb_list, + bnxt_setup_tc_block_cb, + bp, bp, true); case TC_SETUP_QDISC_MQPRIO: { struct tc_mqprio_qopt *mqprio = type_data; @@ -10233,6 +10280,7 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_udp_tunnel_add = bnxt_udp_tunnel_add, .ndo_udp_tunnel_del = bnxt_udp_tunnel_del, .ndo_bpf = bnxt_xdp, + .ndo_xdp_xmit = bnxt_xdp_xmit, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, .ndo_get_devlink_port = bnxt_get_devlink_port, @@ -10262,10 +10310,10 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_dcb_free(bp); kfree(bp->edev); bp->edev = NULL; + bnxt_cleanup_pci(bp); bnxt_free_ctx_mem(bp); kfree(bp->ctx); bp->ctx = NULL; - bnxt_cleanup_pci(bp); bnxt_free_port_stats(bp); free_netdev(dev); } @@ -10859,6 +10907,7 @@ static void bnxt_shutdown(struct pci_dev *pdev) if (system_state == SYSTEM_POWER_OFF) { bnxt_clear_int_mode(bp); + pci_disable_device(pdev); pci_wake_from_d3(pdev, bp->wol); pci_set_power_state(pdev, PCI_D3hot); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index be438d82f939..16694b704d15 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -24,7 +24,9 @@ #include <net/devlink.h> #include <net/dst_metadata.h> #include <net/xdp.h> -#include <linux/net_dim.h> +#include <linux/dim.h> + +struct page_pool; struct tx_bd { __le32 tx_bd_len_flags_type; @@ -587,15 +589,21 @@ struct nqe_cn { #define BNXT_HWRM_CHNL_CHIMP 0 #define BNXT_HWRM_CHNL_KONG 1 -#define BNXT_RX_EVENT 1 -#define BNXT_AGG_EVENT 2 -#define BNXT_TX_EVENT 4 +#define BNXT_RX_EVENT 1 +#define BNXT_AGG_EVENT 2 +#define BNXT_TX_EVENT 4 +#define BNXT_REDIRECT_EVENT 8 struct bnxt_sw_tx_bd { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; DEFINE_DMA_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_LEN(len); u8 is_gso; u8 is_push; + u8 action; union { unsigned short nr_frags; u16 rx_prod; @@ -793,6 +801,7 @@ struct bnxt_rx_ring_info { struct bnxt_ring_struct rx_ring_struct; struct bnxt_ring_struct rx_agg_ring_struct; struct xdp_rxq_info xdp_rxq; + struct page_pool *page_pool; }; struct bnxt_cp_ring_info { @@ -810,7 +819,7 @@ struct bnxt_cp_ring_info { u64 rx_bytes; u64 event_ctr; - struct net_dim dim; + struct dim dim; union { struct tx_cmp *cp_desc_ring[MAX_CP_PAGES]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index 70775158c8c4..07301cb87c03 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -396,7 +396,7 @@ static int bnxt_hwrm_queue_dscp_qcaps(struct bnxt *bp) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_DSCP_QCAPS, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + rc = _hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) { bp->max_dscp_value = (1 << resp->num_dscp_bits) - 1; if (bp->max_dscp_value < 0x3f) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c index 94e208e9789f..61393f351a77 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_debugfs.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/pci.h> #include "bnxt_hsi.h" -#include <linux/net_dim.h> +#include <linux/dim.h> #include "bnxt.h" #include "bnxt_debugfs.h" @@ -21,7 +21,7 @@ static ssize_t debugfs_dim_read(struct file *filep, char __user *buffer, size_t count, loff_t *ppos) { - struct net_dim *dim = filep->private_data; + struct dim *dim = filep->private_data; int len; char *buf; @@ -61,7 +61,7 @@ static const struct file_operations debugfs_dim_fops = { .read = debugfs_dim_read, }; -static struct dentry *debugfs_dim_ring_init(struct net_dim *dim, int ring_idx, +static struct dentry *debugfs_dim_ring_init(struct dim *dim, int ring_idx, struct dentry *dd) { static char qname[16]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c index afa97c8bb081..6f6576dc417a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dim.c @@ -7,26 +7,25 @@ * the Free Software Foundation. */ -#include <linux/net_dim.h> +#include <linux/dim.h> #include "bnxt_hsi.h" #include "bnxt.h" void bnxt_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, - work); + struct dim *dim = container_of(work, struct dim, work); struct bnxt_cp_ring_info *cpr = container_of(dim, struct bnxt_cp_ring_info, dim); struct bnxt_napi *bnapi = container_of(cpr, struct bnxt_napi, cp_ring); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); cpr->rx_ring_coal.coal_ticks = cur_moder.usec; cpr->rx_ring_coal.coal_bufs = cur_moder.pkts; bnxt_hwrm_set_ring_coal(bnapi->bp, bnapi); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a6c7baf38036..c7ee63d69679 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2799,7 +2799,7 @@ static int bnxt_run_loopback(struct bnxt *bp) dev_kfree_skb(skb); return -EIO; } - bnxt_xmit_xdp(bp, txr, map, pkt_size, 0); + bnxt_xmit_bd(bp, txr, map, pkt_size); /* Sync BD data before updating doorbell */ wmb(); @@ -2842,7 +2842,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bool offline = false; u8 test_results = 0; u8 test_mask = 0; - int rc, i; + int rc = 0, i; if (!bp->num_tests || !BNXT_SINGLE_PF(bp)) return; @@ -2913,9 +2913,9 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } bnxt_hwrm_phy_loopback(bp, false, false); bnxt_half_close_nic(bp); - bnxt_open_nic(bp, false, true); + rc = bnxt_open_nic(bp, false, true); } - if (bnxt_test_irq(bp)) { + if (rc || bnxt_test_irq(bp)) { buf[BNXT_IRQ_TEST_IDX] = 1; etest->flags |= ETH_TEST_FL_FAILED; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 44d6c5743fb9..6fe4a7174271 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -170,10 +170,10 @@ static int bnxt_tc_parse_actions(struct bnxt *bp, } static int bnxt_tc_parse_flow(struct bnxt *bp, - struct tc_cls_flower_offload *tc_flow_cmd, + struct flow_cls_offload *tc_flow_cmd, struct bnxt_tc_flow *flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(tc_flow_cmd); + struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd); struct flow_dissector *dissector = rule->match.dissector; /* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */ @@ -1262,7 +1262,7 @@ static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow, * The hash-tables are already protected by the rhashtable API. */ static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid, - struct tc_cls_flower_offload *tc_flow_cmd) + struct flow_cls_offload *tc_flow_cmd) { struct bnxt_tc_flow_node *new_node, *old_node; struct bnxt_tc_info *tc_info = bp->tc_info; @@ -1348,7 +1348,7 @@ done: } static int bnxt_tc_del_flow(struct bnxt *bp, - struct tc_cls_flower_offload *tc_flow_cmd) + struct flow_cls_offload *tc_flow_cmd) { struct bnxt_tc_info *tc_info = bp->tc_info; struct bnxt_tc_flow_node *flow_node; @@ -1363,7 +1363,7 @@ static int bnxt_tc_del_flow(struct bnxt *bp, } static int bnxt_tc_get_flow_stats(struct bnxt *bp, - struct tc_cls_flower_offload *tc_flow_cmd) + struct flow_cls_offload *tc_flow_cmd) { struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats; struct bnxt_tc_info *tc_info = bp->tc_info; @@ -1585,14 +1585,14 @@ void bnxt_tc_flow_stats_work(struct bnxt *bp) } int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return bnxt_tc_add_flow(bp, src_fid, cls_flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return bnxt_tc_del_flow(bp, cls_flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return bnxt_tc_get_flow_stats(bp, cls_flower); default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h index 8a0968967bc5..ffec57d1a5ec 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.h @@ -196,7 +196,7 @@ struct bnxt_tc_flow_node { }; int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, - struct tc_cls_flower_offload *cls_flower); + struct flow_cls_offload *cls_flower); int bnxt_init_tc(struct bnxt *bp); void bnxt_shutdown_tc(struct bnxt *bp); void bnxt_tc_flow_stats_work(struct bnxt *bp); @@ -209,7 +209,7 @@ static inline bool bnxt_tc_flower_enabled(struct bnxt *bp) #else /* CONFIG_BNXT_FLOWER_OFFLOAD */ static inline int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index bfa342a98d08..fc77caf0a076 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -157,8 +157,10 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id, if (BNXT_NEW_RM(bp)) { struct bnxt_hw_resc *hw_resc = &bp->hw_resc; + int resv_msix; - avail_msix = hw_resc->resv_irqs - bp->cp_nr_rings; + resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings; + avail_msix = min_t(int, resv_msix, avail_msix); edev->ulp_tbl[ulp_id].msix_requested = avail_msix; } bnxt_fill_msix_vecs(bp, ent); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c index f760921389a3..f9bf7d7250ab 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c @@ -161,34 +161,19 @@ static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type, } } -static int bnxt_vf_rep_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct bnxt_vf_rep *vf_rep = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, - bnxt_vf_rep_setup_tc_block_cb, - vf_rep, vf_rep, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, - bnxt_vf_rep_setup_tc_block_cb, vf_rep); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(bnxt_vf_block_cb_list); static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct bnxt_vf_rep *vf_rep = netdev_priv(dev); + switch (type) { case TC_SETUP_BLOCK: - return bnxt_vf_rep_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, + &bnxt_vf_block_cb_list, + bnxt_vf_rep_setup_tc_block_cb, + vf_rep, vf_rep, true); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 0184ef6f05a7..c6f6f2033880 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -15,12 +15,14 @@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/filter.h> +#include <net/page_pool.h> #include "bnxt_hsi.h" #include "bnxt.h" #include "bnxt_xdp.h" -void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, - dma_addr_t mapping, u32 len, u16 rx_prod) +struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + dma_addr_t mapping, u32 len) { struct bnxt_sw_tx_bd *tx_buf; struct tx_bd *txbd; @@ -29,7 +31,6 @@ void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, prod = txr->tx_prod; tx_buf = &txr->tx_buf_ring[prod]; - tx_buf->rx_prod = rx_prod; txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)]; flags = (len << TX_BD_LEN_SHIFT) | (1 << TX_BD_FLAGS_BD_CNT_SHIFT) | @@ -40,30 +41,67 @@ void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, prod = NEXT_TX(prod); txr->tx_prod = prod; + return tx_buf; +} + +static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, + dma_addr_t mapping, u32 len, u16 rx_prod) +{ + struct bnxt_sw_tx_bd *tx_buf; + + tx_buf = bnxt_xmit_bd(bp, txr, mapping, len); + tx_buf->rx_prod = rx_prod; + tx_buf->action = XDP_TX; +} + +static void __bnxt_xmit_xdp_redirect(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + dma_addr_t mapping, u32 len, + struct xdp_frame *xdpf) +{ + struct bnxt_sw_tx_bd *tx_buf; + + tx_buf = bnxt_xmit_bd(bp, txr, mapping, len); + tx_buf->action = XDP_REDIRECT; + tx_buf->xdpf = xdpf; + dma_unmap_addr_set(tx_buf, mapping, mapping); + dma_unmap_len_set(tx_buf, len, 0); } void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; + bool rx_doorbell_needed = false; struct bnxt_sw_tx_bd *tx_buf; u16 tx_cons = txr->tx_cons; u16 last_tx_cons = tx_cons; - u16 rx_prod; int i; for (i = 0; i < nr_pkts; i++) { - last_tx_cons = tx_cons; + tx_buf = &txr->tx_buf_ring[tx_cons]; + + if (tx_buf->action == XDP_REDIRECT) { + struct pci_dev *pdev = bp->pdev; + + dma_unmap_single(&pdev->dev, + dma_unmap_addr(tx_buf, mapping), + dma_unmap_len(tx_buf, len), + PCI_DMA_TODEVICE); + xdp_return_frame(tx_buf->xdpf); + tx_buf->action = 0; + tx_buf->xdpf = NULL; + } else if (tx_buf->action == XDP_TX) { + rx_doorbell_needed = true; + last_tx_cons = tx_cons; + } tx_cons = NEXT_TX(tx_cons); } txr->tx_cons = tx_cons; - if (bnxt_tx_avail(bp, txr) == bp->tx_ring_size) { - rx_prod = rxr->rx_prod; - } else { + if (rx_doorbell_needed) { tx_buf = &txr->tx_buf_ring[last_tx_cons]; - rx_prod = tx_buf->rx_prod; + bnxt_db_write(bp, &rxr->rx_db, tx_buf->rx_prod); } - bnxt_db_write(bp, &rxr->rx_db, rx_prod); } /* returns the following: @@ -88,19 +126,19 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, return false; pdev = bp->pdev; - txr = rxr->bnapi->tx_ring; rx_buf = &rxr->rx_buf_ring[cons]; offset = bp->rx_offset; + mapping = rx_buf->mapping - bp->rx_dma_offset; + dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); + + txr = rxr->bnapi->tx_ring; xdp.data_hard_start = *data_ptr - offset; xdp.data = *data_ptr; xdp_set_data_meta_invalid(&xdp); xdp.data_end = *data_ptr + *len; xdp.rxq = &rxr->xdp_rxq; orig_data = xdp.data; - mapping = rx_buf->mapping - bp->rx_dma_offset; - - dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); rcu_read_lock(); act = bpf_prog_run_xdp(xdp_prog, &xdp); @@ -132,10 +170,34 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, *event = BNXT_TX_EVENT; dma_sync_single_for_device(&pdev->dev, mapping + offset, *len, bp->rx_dir); - bnxt_xmit_xdp(bp, txr, mapping + offset, *len, - NEXT_RX(rxr->rx_prod)); + __bnxt_xmit_xdp(bp, txr, mapping + offset, *len, + NEXT_RX(rxr->rx_prod)); bnxt_reuse_rx_data(rxr, cons, page); return true; + case XDP_REDIRECT: + /* if we are calling this here then we know that the + * redirect is coming from a frame received by the + * bnxt_en driver. + */ + dma_unmap_page_attrs(&pdev->dev, mapping, + PAGE_SIZE, bp->rx_dir, + DMA_ATTR_WEAK_ORDERING); + + /* if we are unable to allocate a new buffer, abort and reuse */ + if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { + trace_xdp_exception(bp->dev, xdp_prog, act); + bnxt_reuse_rx_data(rxr, cons, page); + return true; + } + + if (xdp_do_redirect(bp->dev, &xdp, xdp_prog)) { + trace_xdp_exception(bp->dev, xdp_prog, act); + page_pool_recycle_direct(rxr->page_pool, page); + return true; + } + + *event |= BNXT_REDIRECT_EVENT; + break; default: bpf_warn_invalid_xdp_action(act); /* Fall thru */ @@ -149,6 +211,56 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, return true; } +int bnxt_xdp_xmit(struct net_device *dev, int num_frames, + struct xdp_frame **frames, u32 flags) +{ + struct bnxt *bp = netdev_priv(dev); + struct bpf_prog *xdp_prog = READ_ONCE(bp->xdp_prog); + struct pci_dev *pdev = bp->pdev; + struct bnxt_tx_ring_info *txr; + dma_addr_t mapping; + int drops = 0; + int ring; + int i; + + if (!test_bit(BNXT_STATE_OPEN, &bp->state) || + !bp->tx_nr_rings_xdp || + !xdp_prog) + return -EINVAL; + + ring = smp_processor_id() % bp->tx_nr_rings_xdp; + txr = &bp->tx_ring[ring]; + + for (i = 0; i < num_frames; i++) { + struct xdp_frame *xdp = frames[i]; + + if (!txr || !bnxt_tx_avail(bp, txr) || + !(bp->bnapi[ring]->flags & BNXT_NAPI_FLAG_XDP)) { + xdp_return_frame_rx_napi(xdp); + drops++; + continue; + } + + mapping = dma_map_single(&pdev->dev, xdp->data, xdp->len, + DMA_TO_DEVICE); + + if (dma_mapping_error(&pdev->dev, mapping)) { + xdp_return_frame_rx_napi(xdp); + drops++; + continue; + } + __bnxt_xmit_xdp_redirect(bp, txr, mapping, xdp->len, xdp); + } + + if (flags & XDP_XMIT_FLUSH) { + /* Sync BD data before updating doorbell */ + wmb(); + bnxt_db_write(bp, &txr->tx_db, txr->tx_prod); + } + + return num_frames - drops; +} + /* Under rtnl_lock */ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 414b748038ca..0df40c3beb05 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -10,12 +10,15 @@ #ifndef BNXT_XDP_H #define BNXT_XDP_H -void bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr, - dma_addr_t mapping, u32 len, u16 rx_prod); +struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + dma_addr_t mapping, u32 len); void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, struct page *page, u8 **data_ptr, unsigned int *len, u8 *event); int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); +int bnxt_xdp_xmit(struct net_device *dev, int num_frames, + struct xdp_frame **frames, u32 flags); #endif diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 41b50e6570ea..34466b827dde 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -640,7 +640,7 @@ static void bcmgenet_set_rx_coalesce(struct bcmgenet_rx_ring *ring, static void bcmgenet_set_ring_rx_coalesce(struct bcmgenet_rx_ring *ring, struct ethtool_coalesce *ec) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; ring->rx_coalesce_usecs = ec->rx_coalesce_usecs; @@ -1895,7 +1895,7 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) { struct bcmgenet_rx_ring *ring = container_of(napi, struct bcmgenet_rx_ring, napi); - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; unsigned int work_done; work_done = bcmgenet_desc_rx(ring, budget); @@ -1906,8 +1906,8 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) } if (ring->dim.use_dim) { - net_dim_sample(ring->dim.event_ctr, ring->dim.packets, - ring->dim.bytes, &dim_sample); + dim_update_sample(ring->dim.event_ctr, ring->dim.packets, + ring->dim.bytes, &dim_sample); net_dim(&ring->dim.dim, dim_sample); } @@ -1916,16 +1916,16 @@ static int bcmgenet_rx_poll(struct napi_struct *napi, int budget) static void bcmgenet_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct bcmgenet_net_dim *ndim = container_of(dim, struct bcmgenet_net_dim, dim); struct bcmgenet_rx_ring *ring = container_of(ndim, struct bcmgenet_rx_ring, dim); - struct net_dim_cq_moder cur_profile = + struct dim_cq_moder cur_profile = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); bcmgenet_set_rx_coalesce(ring, cur_profile.usec, cur_profile.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } /* Assign skb to RX DMA descriptor. */ @@ -2082,7 +2082,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring, struct bcmgenet_net_dim *dim = &ring->dim; INIT_WORK(&dim->dim.work, cb); - dim->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + dim->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; dim->event_ctr = 0; dim->packets = 0; dim->bytes = 0; @@ -2091,7 +2091,7 @@ static void bcmgenet_init_dim(struct bcmgenet_rx_ring *ring, static void bcmgenet_init_rx_coalesce(struct bcmgenet_rx_ring *ring) { struct bcmgenet_net_dim *dim = &ring->dim; - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; u32 usecs, pkts; usecs = ring->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 9ad835aee1bc..4a8fc03d82fd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -13,7 +13,7 @@ #include <linux/mii.h> #include <linux/if_vlan.h> #include <linux/phy.h> -#include <linux/net_dim.h> +#include <linux/dim.h> /* total number of Buffer Descriptors, same for Rx/Tx */ #define TOTAL_DESC 256 @@ -578,7 +578,7 @@ struct bcmgenet_net_dim { u16 event_ctr; unsigned long packets; unsigned long bytes; - struct net_dim dim; + struct dim dim; }; struct bcmgenet_rx_ring { diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 6d1f9c822548..4c404d2213f9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -6710,7 +6710,7 @@ static int tg3_alloc_rx_data(struct tg3 *tp, struct tg3_rx_prodring_set *tpr, skb_size = SKB_DATA_ALIGN(data_size + TG3_RX_OFFSET(tp)) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); if (skb_size <= PAGE_SIZE) { - data = netdev_alloc_frag(skb_size); + data = napi_alloc_frag(skb_size); *frag_size = skb_size; } else { data = kmalloc(skb_size, GFP_ATOMIC); diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 1766697c9c5a..f4b3bd85dfe3 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only # -# Atmel device configuration +# Cadence device configuration # config NET_VENDOR_CADENCE @@ -13,15 +13,15 @@ config NET_VENDOR_CADENCE If unsure, say Y. Note that the answer to this question doesn't directly affect the - kernel: saying N will just cause the configurator to skip all - the remaining Atmel network card questions. If you say Y, you will be + kernel: saying N will just cause the configurator to skip all the + remaining Cadence network card questions. If you say Y, you will be asked for your specific card in the following questions. if NET_VENDOR_CADENCE config MACB tristate "Cadence MACB/GEM support" - depends on HAS_DMA + depends on HAS_DMA && COMMON_CLK select PHYLIB ---help--- The Cadence MACB ethernet interface is found on many Atmel AT32 and @@ -42,7 +42,7 @@ config MACB_USE_HWSTAMP config MACB_PCI tristate "Cadence PCI MACB/GEM support" - depends on MACB && PCI && COMMON_CLK + depends on MACB && PCI ---help--- This is PCI wrapper for MACB driver. diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 6ff123da6a14..03983bd46eef 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -496,7 +496,11 @@ /* Bitfields in TISUBN */ #define GEM_SUBNSINCR_OFFSET 0 -#define GEM_SUBNSINCR_SIZE 16 +#define GEM_SUBNSINCRL_OFFSET 24 +#define GEM_SUBNSINCRL_SIZE 8 +#define GEM_SUBNSINCRH_OFFSET 0 +#define GEM_SUBNSINCRH_SIZE 16 +#define GEM_SUBNSINCR_SIZE 24 /* Bitfields in TI */ #define GEM_NSINCR_OFFSET 0 @@ -834,6 +838,9 @@ struct gem_tx_ts { /* limit RX checksum offload to TCP and UDP packets */ #define GEM_RX_CSUM_CHECKED_MASK 2 +/* Scaled PPM fraction */ +#define PPM_FRACTION 16 + /* struct macb_tx_skb - data about an skb which is being transmitted * @skb: skb currently being transmitted, only set for the last buffer * of the frame @@ -1060,7 +1067,8 @@ struct macb_or_gem_ops { int (*mog_alloc_rx_buffers)(struct macb *bp); void (*mog_free_rx_buffers)(struct macb *bp); void (*mog_init_rings)(struct macb *bp); - int (*mog_rx)(struct macb_queue *queue, int budget); + int (*mog_rx)(struct macb_queue *queue, struct napi_struct *napi, + int budget); }; /* MACB-PTP interface: adapt to platform needs. */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 262a28ff81fc..5ca17e62dc3e 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/clk.h> +#include <linux/clk-provider.h> #include <linux/crc32.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -37,6 +38,13 @@ #include <linux/pm_runtime.h> #include "macb.h" +/* This structure is only used for MACB on SiFive FU540 devices */ +struct sifive_fu540_macb_mgmt { + void __iomem *reg; + unsigned long rate; + struct clk_hw hw; +}; + #define MACB_RX_BUFFER_SIZE 128 #define RX_BUFFER_MULTIPLE 64 /* bytes */ @@ -981,7 +989,8 @@ static void discard_partial_frame(struct macb_queue *queue, unsigned int begin, */ } -static int gem_rx(struct macb_queue *queue, int budget) +static int gem_rx(struct macb_queue *queue, struct napi_struct *napi, + int budget) { struct macb *bp = queue->bp; unsigned int len; @@ -1063,7 +1072,7 @@ static int gem_rx(struct macb_queue *queue, int budget) skb->data, 32, true); #endif - netif_receive_skb(skb); + napi_gro_receive(napi, skb); } gem_rx_refill(queue); @@ -1071,8 +1080,8 @@ static int gem_rx(struct macb_queue *queue, int budget) return count; } -static int macb_rx_frame(struct macb_queue *queue, unsigned int first_frag, - unsigned int last_frag) +static int macb_rx_frame(struct macb_queue *queue, struct napi_struct *napi, + unsigned int first_frag, unsigned int last_frag) { unsigned int len; unsigned int frag; @@ -1148,7 +1157,7 @@ static int macb_rx_frame(struct macb_queue *queue, unsigned int first_frag, bp->dev->stats.rx_bytes += skb->len; netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n", skb->len, skb->csum); - netif_receive_skb(skb); + napi_gro_receive(napi, skb); return 0; } @@ -1171,7 +1180,8 @@ static inline void macb_init_rx_ring(struct macb_queue *queue) queue->rx_tail = 0; } -static int macb_rx(struct macb_queue *queue, int budget) +static int macb_rx(struct macb_queue *queue, struct napi_struct *napi, + int budget) { struct macb *bp = queue->bp; bool reset_rx_queue = false; @@ -1208,7 +1218,7 @@ static int macb_rx(struct macb_queue *queue, int budget) continue; } - dropped = macb_rx_frame(queue, first_frag, tail); + dropped = macb_rx_frame(queue, napi, first_frag, tail); first_frag = -1; if (unlikely(dropped < 0)) { reset_rx_queue = true; @@ -1262,7 +1272,7 @@ static int macb_poll(struct napi_struct *napi, int budget) netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n", (unsigned long)status, budget); - work_done = bp->macbgem_ops.mog_rx(queue, budget); + work_done = bp->macbgem_ops.mog_rx(queue, napi, budget); if (work_done < budget) { napi_complete_done(napi, work_done); @@ -3477,7 +3487,7 @@ static int macb_init(struct platform_device *pdev) queue = &bp->queues[q]; queue->bp = bp; - netif_napi_add(dev, &queue->napi, macb_poll, 64); + netif_napi_add(dev, &queue->napi, macb_poll, NAPI_POLL_WEIGHT); if (hw_q) { queue->ISR = GEM_ISR(hw_q - 1); queue->IER = GEM_IER(hw_q - 1); @@ -3616,6 +3626,8 @@ static int macb_init(struct platform_device *pdev) /* max number of receive buffers */ #define AT91ETHER_MAX_RX_DESCR 9 +static struct sifive_fu540_macb_mgmt *mgmt; + /* Initialize and start the Receiver and Transmit subsystems */ static int at91ether_start(struct net_device *dev) { @@ -3943,6 +3955,116 @@ static int at91ether_init(struct platform_device *pdev) return 0; } +static unsigned long fu540_macb_tx_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + return mgmt->rate; +} + +static long fu540_macb_tx_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *parent_rate) +{ + if (WARN_ON(rate < 2500000)) + return 2500000; + else if (rate == 2500000) + return 2500000; + else if (WARN_ON(rate < 13750000)) + return 2500000; + else if (WARN_ON(rate < 25000000)) + return 25000000; + else if (rate == 25000000) + return 25000000; + else if (WARN_ON(rate < 75000000)) + return 25000000; + else if (WARN_ON(rate < 125000000)) + return 125000000; + else if (rate == 125000000) + return 125000000; + + WARN_ON(rate > 125000000); + + return 125000000; +} + +static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + rate = fu540_macb_tx_round_rate(hw, rate, &parent_rate); + if (rate != 125000000) + iowrite32(1, mgmt->reg); + else + iowrite32(0, mgmt->reg); + mgmt->rate = rate; + + return 0; +} + +static const struct clk_ops fu540_c000_ops = { + .recalc_rate = fu540_macb_tx_recalc_rate, + .round_rate = fu540_macb_tx_round_rate, + .set_rate = fu540_macb_tx_set_rate, +}; + +static int fu540_c000_clk_init(struct platform_device *pdev, struct clk **pclk, + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk, struct clk **tsu_clk) +{ + struct clk_init_data init; + int err = 0; + + err = macb_clk_init(pdev, pclk, hclk, tx_clk, rx_clk, tsu_clk); + if (err) + return err; + + mgmt = devm_kzalloc(&pdev->dev, sizeof(*mgmt), GFP_KERNEL); + if (!mgmt) + return -ENOMEM; + + init.name = "sifive-gemgxl-mgmt"; + init.ops = &fu540_c000_ops; + init.flags = 0; + init.num_parents = 0; + + mgmt->rate = 0; + mgmt->hw.init = &init; + + *tx_clk = clk_register(NULL, &mgmt->hw); + if (IS_ERR(*tx_clk)) + return PTR_ERR(*tx_clk); + + err = clk_prepare_enable(*tx_clk); + if (err) + dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + else + dev_info(&pdev->dev, "Registered clk switch '%s'\n", init.name); + + return 0; +} + +static int fu540_c000_init(struct platform_device *pdev) +{ + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return -ENODEV; + + mgmt->reg = ioremap(res->start, resource_size(res)); + if (!mgmt->reg) + return -ENOMEM; + + return macb_init(pdev); +} + +static const struct macb_config fu540_c000_config = { + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | + MACB_CAPS_GEM_HAS_PTP, + .dma_burst_length = 16, + .clk_init = fu540_c000_clk_init, + .init = fu540_c000_init, + .jumbo_max_len = 10240, +}; + static const struct macb_config at91sam9260_config = { .caps = MACB_CAPS_USRIO_HAS_CLKEN | MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII, .clk_init = macb_clk_init, @@ -4032,6 +4154,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,emac", .data = &emac_config }, { .compatible = "cdns,zynqmp-gem", .data = &zynqmp_config}, { .compatible = "cdns,zynq-gem", .data = &zynq_config }, + { .compatible = "sifive,fu540-macb", .data = &fu540_c000_config }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, macb_dt_ids); @@ -4239,6 +4362,7 @@ err_out_free_netdev: err_disable_clocks: clk_disable_unprepare(tx_clk); + clk_unregister(tx_clk); clk_disable_unprepare(hclk); clk_disable_unprepare(pclk); clk_disable_unprepare(rx_clk); @@ -4273,6 +4397,7 @@ static int macb_remove(struct platform_device *pdev) pm_runtime_dont_use_autosuspend(&pdev->dev); if (!pm_runtime_suspended(&pdev->dev)) { clk_disable_unprepare(bp->tx_clk); + clk_unregister(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); clk_disable_unprepare(bp->rx_clk); diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 0a8aca8d3634..43a3f0dbf857 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -104,7 +104,10 @@ static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec) * to take effect. */ spin_lock_irqsave(&bp->tsu_clk_lock, flags); - gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns)); + /* RegBit[15:0] = Subns[23:8]; RegBit[31:24] = Subns[7:0] */ + gem_writel(bp, TISUBN, GEM_BF(SUBNSINCRL, incr_spec->sub_ns) | + GEM_BF(SUBNSINCRH, (incr_spec->sub_ns >> + GEM_SUBNSINCRL_SIZE))); gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns)); spin_unlock_irqrestore(&bp->tsu_clk_lock, flags); @@ -135,7 +138,7 @@ static int gem_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) * (temp / USEC_PER_SEC) + 0.5 */ adj += (USEC_PER_SEC >> 1); - adj >>= GEM_SUBNSINCR_SIZE; /* remove fractions */ + adj >>= PPM_FRACTION; /* remove fractions */ adj = div_u64(adj, USEC_PER_SEC); adj = neg_adj ? (word - adj) : (word + adj); diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 11d4e91ea754..99f49d059414 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -1855,7 +1855,7 @@ static void xgmac_pmt(void __iomem *ioaddr, unsigned long mode) static int xgmac_suspend(struct device *dev) { - struct net_device *ndev = platform_get_drvdata(to_platform_device(dev)); + struct net_device *ndev = dev_get_drvdata(dev); struct xgmac_priv *priv = netdev_priv(ndev); u32 value; @@ -1881,7 +1881,7 @@ static int xgmac_suspend(struct device *dev) static int xgmac_resume(struct device *dev) { - struct net_device *ndev = platform_get_drvdata(to_platform_device(dev)); + struct net_device *ndev = dev_get_drvdata(dev); struct xgmac_priv *priv = netdev_priv(ndev); void __iomem *ioaddr = priv->base; diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index 91d8a885deba..20390f6afbb4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \ cxgb4_uld.o srq.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \ - cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \ + cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o cxgb4_mps.o \ cudbg_common.o cudbg_lib.o cudbg_zlib.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a8fe0808823d..1fbb640e896a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -280,6 +280,7 @@ struct tp_params { unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */ u32 vlan_pri_map; /* cached TP_VLAN_PRI_MAP */ + u32 filter_mask; u32 ingress_config; /* cached TP_INGRESS_CONFIG */ /* cached TP_OUT_CONFIG compressed error vector @@ -600,6 +601,7 @@ struct port_info { u8 vin; u8 vivld; u8 smt_idx; + u8 rx_cchan; }; struct dentry; @@ -878,6 +880,7 @@ struct uld_msix_info { unsigned short vec; char desc[IFNAMSIZ + 10]; unsigned int idx; + cpumask_var_t aff_mask; }; struct vf_info { @@ -902,10 +905,6 @@ struct mbox_list { struct list_head list; }; -struct mps_encap_entry { - atomic_t refcnt; -}; - #if IS_ENABLED(CONFIG_THERMAL) struct ch_thermal { struct thermal_zone_device *tzdev; @@ -914,6 +913,14 @@ struct ch_thermal { }; #endif +struct mps_entries_ref { + struct list_head list; + u8 addr[ETH_ALEN]; + u8 mask[ETH_ALEN]; + u16 idx; + refcount_t refcnt; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -938,9 +945,10 @@ struct adapter { struct cxgb4_virt_res vres; unsigned int swintr; - struct { + struct msix_info { unsigned short vec; char desc[IFNAMSIZ + 10]; + cpumask_var_t aff_mask; } msix_info[MAX_INGQ + 1]; struct uld_msix_info *msix_info_ulds; /* msix info for uld's */ struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */ @@ -965,7 +973,6 @@ struct adapter { unsigned int rawf_start; unsigned int rawf_cnt; struct smt_data *smt; - struct mps_encap_entry *mps_encap; struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; unsigned int num_uld; @@ -973,6 +980,8 @@ struct adapter { struct list_head list_node; struct list_head rcu_node; struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ + struct list_head mps_ref; + spinlock_t mps_ref_lock; /* lock for syncing mps ref/def activities */ void *iscsi_ppm; @@ -1898,5 +1907,46 @@ int cxgb4_dcb_enabled(const struct net_device *dev); int cxgb4_thermal_init(struct adapter *adap); int cxgb4_thermal_remove(struct adapter *adap); +int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec, + cpumask_var_t *aff_mask, int idx); +void cxgb4_clear_msix_aff(unsigned short vec, cpumask_var_t aff_mask); + +int cxgb4_change_mac(struct port_info *pi, unsigned int viid, + int *tcam_idx, const u8 *addr, + bool persistent, u8 *smt_idx); + +int cxgb4_alloc_mac_filt(struct adapter *adap, unsigned int viid, + bool free, unsigned int naddr, + const u8 **addr, u16 *idx, + u64 *hash, bool sleep_ok); +int cxgb4_free_mac_filt(struct adapter *adap, unsigned int viid, + unsigned int naddr, const u8 **addr, bool sleep_ok); +int cxgb4_init_mps_ref_entries(struct adapter *adap); +void cxgb4_free_mps_ref_entries(struct adapter *adap); +int cxgb4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid, + const u8 *addr, const u8 *mask, + unsigned int vni, unsigned int vni_mask, + u8 dip_hit, u8 lookup_type, bool sleep_ok); +int cxgb4_free_encap_mac_filt(struct adapter *adap, unsigned int viid, + int idx, bool sleep_ok); +int cxgb4_free_raw_mac_filt(struct adapter *adap, + unsigned int viid, + const u8 *addr, + const u8 *mask, + unsigned int idx, + u8 lookup_type, + u8 port_id, + bool sleep_ok); +int cxgb4_alloc_raw_mac_filt(struct adapter *adap, + unsigned int viid, + const u8 *addr, + const u8 *mask, + unsigned int idx, + u8 lookup_type, + u8 port_id, + bool sleep_ok); +int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid, + int *tcam_idx, const u8 *addr, + bool persistent, u8 *smt_idx); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 4107007b6ec4..43b0f8c57da7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -248,8 +248,9 @@ static int validate_filter(struct net_device *dev, u32 fconf, iconf; /* Check for unconfigured fields being used. */ - fconf = adapter->params.tp.vlan_pri_map; iconf = adapter->params.tp.ingress_config; + fconf = fs->hash ? adapter->params.tp.filter_mask : + adapter->params.tp.vlan_pri_map; if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) || unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) || @@ -726,10 +727,8 @@ void clear_filter(struct adapter *adap, struct filter_entry *f) cxgb4_smt_release(f->smt); if (f->fs.val.encap_vld && f->fs.val.ovlan_vld) - if (atomic_dec_and_test(&adap->mps_encap[f->fs.val.ovlan & - 0x1ff].refcnt)) - t4_free_encap_mac_filt(adap, pi->viid, - f->fs.val.ovlan & 0x1ff, 0); + t4_free_encap_mac_filt(adap, pi->viid, + f->fs.val.ovlan & 0x1ff, 0); if ((f->fs.hash || is_t6(adap->params.chip)) && f->fs.type) cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1); @@ -1041,7 +1040,7 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb, RSS_QUEUE_V(f->fs.iq) | TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | - RX_CHANNEL_F | + RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) | CONG_CNTRL_V((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | @@ -1081,7 +1080,7 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb, RSS_QUEUE_V(f->fs.iq) | TX_QUEUE_V(f->fs.nat_mode) | T5_OPT_2_VALID_F | - RX_CHANNEL_F | + RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) | CONG_CNTRL_V((f->fs.action == FILTER_DROP) | (f->fs.dirsteer << 1)) | PACE_V((f->fs.maskhash) | @@ -1176,7 +1175,6 @@ static int cxgb4_set_hash_filter(struct net_device *dev, if (ret < 0) goto free_atid; - atomic_inc(&adapter->mps_encap[ret].refcnt); f->fs.val.ovlan = ret; f->fs.mask.ovlan = 0xffff; f->fs.val.ovlan_vld = 1; @@ -1419,7 +1417,6 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id, if (ret < 0) goto free_clip; - atomic_inc(&adapter->mps_encap[ret].refcnt); f->fs.val.ovlan = ret; f->fs.mask.ovlan = 0x1ff; f->fs.val.ovlan_vld = 1; @@ -1833,24 +1830,38 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) } } -int init_hash_filter(struct adapter *adap) +void init_hash_filter(struct adapter *adap) { + u32 reg; + /* On T6, verify the necessary register configs and warn the user in * case of improper config */ if (is_t6(adap->params.chip)) { - if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4) - goto err; + if (is_offload(adap)) { + if (!(t4_read_reg(adap, TP_GLOBAL_CONFIG_A) + & ACTIVEFILTERCOUNTS_F)) { + dev_err(adap->pdev_dev, "Invalid hash filter + ofld config\n"); + return; + } + } else { + reg = t4_read_reg(adap, LE_DB_RSP_CODE_0_A); + if (TCAM_ACTV_HIT_G(reg) != 4) { + dev_err(adap->pdev_dev, "Invalid hash filter config\n"); + return; + } + + reg = t4_read_reg(adap, LE_DB_RSP_CODE_1_A); + if (HASH_ACTV_HIT_G(reg) != 4) { + dev_err(adap->pdev_dev, "Invalid hash filter config\n"); + return; + } + } - if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4) - goto err; } else { dev_err(adap->pdev_dev, "Hash filter supported only on T6\n"); - return -EINVAL; + return; } + adap->params.hash_filter = 1; - return 0; -err: - dev_warn(adap->pdev_dev, "Invalid hash filter config!\n"); - return -EINVAL; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h index 8db5fca6dcc9..b0751c0611ec 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -50,7 +50,7 @@ int delete_filter(struct adapter *adapter, unsigned int fidx); int writable_filter(struct filter_entry *f); void clear_all_filters(struct adapter *adapter); -int init_hash_filter(struct adapter *adap); +void init_hash_filter(struct adapter *adap); bool is_filter_exact_match(struct adapter *adap, struct ch_filter_specification *fs); #endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 715e4edcf4a2..67202b6f352e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -366,13 +366,19 @@ static int cxgb4_mac_sync(struct net_device *netdev, const u8 *mac_addr) int ret; u64 mhash = 0; u64 uhash = 0; + /* idx stores the index of allocated filters, + * its size should be modified based on the number of + * MAC addresses that we allocate filters for + */ + + u16 idx[1] = {}; bool free = false; bool ucast = is_unicast_ether_addr(mac_addr); const u8 *maclist[1] = {mac_addr}; struct hash_mac_addr *new_entry; - ret = t4_alloc_mac_filt(adap, adap->mbox, pi->viid, free, 1, maclist, - NULL, ucast ? &uhash : &mhash, false); + ret = cxgb4_alloc_mac_filt(adap, pi->viid, free, 1, maclist, + idx, ucast ? &uhash : &mhash, false); if (ret < 0) goto out; /* if hash != 0, then add the addr to hash addr list @@ -410,7 +416,7 @@ static int cxgb4_mac_unsync(struct net_device *netdev, const u8 *mac_addr) } } - ret = t4_free_mac_filt(adap, adap->mbox, pi->viid, 1, maclist, false); + ret = cxgb4_free_mac_filt(adap, pi->viid, 1, maclist, false); return ret < 0 ? -EINVAL : 0; } @@ -449,9 +455,9 @@ static int set_rxmode(struct net_device *dev, int mtu, bool sleep_ok) * Addresses are programmed to hash region, if tcam runs out of entries. * */ -static int cxgb4_change_mac(struct port_info *pi, unsigned int viid, - int *tcam_idx, const u8 *addr, bool persist, - u8 *smt_idx) +int cxgb4_change_mac(struct port_info *pi, unsigned int viid, + int *tcam_idx, const u8 *addr, bool persist, + u8 *smt_idx) { struct adapter *adapter = pi->adapter; struct hash_mac_addr *entry, *new_entry; @@ -505,8 +511,8 @@ static int link_start(struct net_device *dev) ret = t4_set_rxmode(pi->adapter, mb, pi->viid, dev->mtu, -1, -1, -1, !!(dev->features & NETIF_F_HW_VLAN_CTAG_RX), true); if (ret == 0) - ret = cxgb4_change_mac(pi, pi->viid, &pi->xact_addr_filt, - dev->dev_addr, true, &pi->smt_idx); + ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt, + dev->dev_addr, true, &pi->smt_idx); if (ret == 0) ret = t4_link_l1cfg(pi->adapter, mb, pi->tx_chan, &pi->link_cfg); @@ -702,9 +708,38 @@ static void name_msix_vecs(struct adapter *adap) } } +int cxgb4_set_msix_aff(struct adapter *adap, unsigned short vec, + cpumask_var_t *aff_mask, int idx) +{ + int rv; + + if (!zalloc_cpumask_var(aff_mask, GFP_KERNEL)) { + dev_err(adap->pdev_dev, "alloc_cpumask_var failed\n"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(idx, dev_to_node(adap->pdev_dev)), + *aff_mask); + + rv = irq_set_affinity_hint(vec, *aff_mask); + if (rv) + dev_warn(adap->pdev_dev, + "irq_set_affinity_hint %u failed %d\n", + vec, rv); + + return 0; +} + +void cxgb4_clear_msix_aff(unsigned short vec, cpumask_var_t aff_mask) +{ + irq_set_affinity_hint(vec, NULL); + free_cpumask_var(aff_mask); +} + static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; + struct msix_info *minfo; int err, ethqidx; int msi_index = 2; @@ -714,32 +749,77 @@ static int request_msix_queue_irqs(struct adapter *adap) return err; for_each_ethrxq(s, ethqidx) { - err = request_irq(adap->msix_info[msi_index].vec, + minfo = &adap->msix_info[msi_index]; + err = request_irq(minfo->vec, t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, + minfo->desc, &s->ethrxq[ethqidx].rspq); if (err) goto unwind; + + cxgb4_set_msix_aff(adap, minfo->vec, + &minfo->aff_mask, ethqidx); msi_index++; } return 0; unwind: - while (--ethqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->ethrxq[ethqidx].rspq); + while (--ethqidx >= 0) { + msi_index--; + minfo = &adap->msix_info[msi_index]; + cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask); + free_irq(minfo->vec, &s->ethrxq[ethqidx].rspq); + } free_irq(adap->msix_info[1].vec, &s->fw_evtq); return err; } static void free_msix_queue_irqs(struct adapter *adap) { - int i, msi_index = 2; struct sge *s = &adap->sge; + struct msix_info *minfo; + int i, msi_index = 2; free_irq(adap->msix_info[1].vec, &s->fw_evtq); - for_each_ethrxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); + for_each_ethrxq(s, i) { + minfo = &adap->msix_info[msi_index++]; + cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask); + free_irq(minfo->vec, &s->ethrxq[i].rspq); + } +} + +static int setup_ppod_edram(struct adapter *adap) +{ + unsigned int param, val; + int ret; + + /* Driver sends FW_PARAMS_PARAM_DEV_PPOD_EDRAM read command to check + * if firmware supports ppod edram feature or not. If firmware + * returns 1, then driver can enable this feature by sending + * FW_PARAMS_PARAM_DEV_PPOD_EDRAM write command with value 1 to + * enable ppod edram feature. + */ + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_PPOD_EDRAM)); + + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); + if (ret < 0) { + dev_warn(adap->pdev_dev, + "querying PPOD_EDRAM support failed: %d\n", + ret); + return -1; + } + + if (val != 1) + return -1; + + ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); + if (ret < 0) { + dev_err(adap->pdev_dev, + "setting PPOD_EDRAM failed: %d\n", ret); + return -1; + } + return 0; } /** @@ -1646,6 +1726,18 @@ unsigned int cxgb4_port_chan(const struct net_device *dev) } EXPORT_SYMBOL(cxgb4_port_chan); +/** + * cxgb4_port_e2cchan - get the HW c-channel of a port + * @dev: the net device for the port + * + * Return the HW RX c-channel of the given port. + */ +unsigned int cxgb4_port_e2cchan(const struct net_device *dev) +{ + return netdev2pinfo(dev)->rx_cchan; +} +EXPORT_SYMBOL(cxgb4_port_e2cchan); + unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo) { struct adapter *adap = netdev2adap(dev); @@ -2934,8 +3026,8 @@ static int cxgb_set_mac_addr(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ret = cxgb4_change_mac(pi, pi->viid, &pi->xact_addr_filt, - addr->sa_data, true, &pi->smt_idx); + ret = cxgb4_update_mac_filt(pi, pi->viid, &pi->xact_addr_filt, + addr->sa_data, true, &pi->smt_idx); if (ret < 0) return ret; @@ -3043,14 +3135,14 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) } static int cxgb_setup_tc_flower(struct net_device *dev, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return cxgb4_tc_flower_replace(dev, cls_flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return cxgb4_tc_flower_destroy(dev, cls_flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return cxgb4_tc_flower_stats(dev, cls_flower); default: return -EOPNOTSUPP; @@ -3098,32 +3190,19 @@ static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -static int cxgb_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct port_info *pi = netdev2pinfo(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb, - pi, dev, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(cxgb_block_cb_list); static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct port_info *pi = netdev2pinfo(dev); + switch (type) { case TC_SETUP_BLOCK: - return cxgb_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, + &cxgb_block_cb_list, + cxgb_setup_tc_block_cb, + pi, dev, true); default: return -EOPNOTSUPP; } @@ -3187,8 +3266,6 @@ static void cxgb_del_udp_tunnel(struct net_device *netdev, i); return; } - atomic_dec(&adapter->mps_encap[adapter->rawf_start + - pi->port_id].refcnt); } } @@ -3277,7 +3354,6 @@ static void cxgb_add_udp_tunnel(struct net_device *netdev, cxgb_del_udp_tunnel(netdev, ti); return; } - atomic_inc(&adapter->mps_encap[ret].refcnt); } } @@ -3905,14 +3981,14 @@ static int adap_init0_phy(struct adapter *adap) */ static int adap_init0_config(struct adapter *adapter, int reset) { + char *fw_config_file, fw_config_file_path[256]; + u32 finiver, finicsum, cfcsum, param, val; struct fw_caps_config_cmd caps_cmd; - const struct firmware *cf; unsigned long mtype = 0, maddr = 0; - u32 finiver, finicsum, cfcsum; - int ret; - int config_issued = 0; - char *fw_config_file, fw_config_file_path[256]; + const struct firmware *cf; char *config_name = NULL; + int config_issued = 0; + int ret; /* * Reset device if necessary. @@ -4020,6 +4096,24 @@ static int adap_init0_config(struct adapter *adapter, int reset) goto bye; } + val = 0; + + /* Ofld + Hash filter is supported. Older fw will fail this request and + * it is fine. + */ + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD)); + ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0, + 1, ¶m, &val); + + /* FW doesn't know about Hash filter + ofld support, + * it's not a problem, don't return an error. + */ + if (ret < 0) { + dev_warn(adapter->pdev_dev, + "Hash filter with ofld is not supported by FW\n"); + } + /* * Issue a Capability Configuration command to the firmware to get it * to parse the Configuration File. We don't use t4_fw_config_file() @@ -4096,6 +4190,13 @@ static int adap_init0_config(struct adapter *adapter, int reset) dev_err(adapter->pdev_dev, "HMA configuration failed with error %d\n", ret); + if (is_t6(adapter->params.chip)) { + ret = setup_ppod_edram(adapter); + if (!ret) + dev_info(adapter->pdev_dev, "Successfully enabled " + "ppod edram feature\n"); + } + /* * And finally tell the firmware to initialize itself using the * parameters from the Configuration File. @@ -4580,6 +4681,13 @@ static int adap_init0(struct adapter *adap) if (ret < 0) goto bye; + /* hash filter has some mandatory register settings to be tested and for + * that it needs to test whether offload is enabled or not, hence + * checking and setting it here. + */ + if (caps_cmd.ofldcaps) + adap->params.offload = 1; + if (caps_cmd.ofldcaps || (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) { /* query offload-related parameters */ @@ -4619,11 +4727,8 @@ static int adap_init0(struct adapter *adap) adap->params.ofldq_wr_cred = val[5]; if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) { - ret = init_hash_filter(adap); - if (ret < 0) - goto bye; + init_hash_filter(adap); } else { - adap->params.offload = 1; adap->num_ofld_uld += 1; } } @@ -4715,6 +4820,22 @@ static int adap_init0(struct adapter *adap) goto bye; adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; + if (is_t6(adap->params.chip)) { + params[0] = FW_PARAM_PFVF(PPOD_EDRAM_START); + params[1] = FW_PARAM_PFVF(PPOD_EDRAM_END); + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, + params, val); + if (!ret) { + adap->vres.ppod_edram.start = val[0]; + adap->vres.ppod_edram.size = + val[1] - val[0] + 1; + + dev_info(adap->pdev_dev, + "ppod edram start 0x%x end 0x%x size 0x%x\n", + val[0], val[1], + adap->vres.ppod_edram.size); + } + } /* LIO target and cxgb4i initiaitor */ adap->num_ofld_uld += 2; } @@ -5315,7 +5436,6 @@ static void free_some_resources(struct adapter *adapter) { unsigned int i; - kvfree(adapter->mps_encap); kvfree(adapter->smt); kvfree(adapter->l2t); kvfree(adapter->srq); @@ -5841,12 +5961,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->params.offload = 0; } - adapter->mps_encap = kvcalloc(adapter->params.arch.mps_tcam_size, - sizeof(struct mps_encap_entry), - GFP_KERNEL); - if (!adapter->mps_encap) - dev_warn(&pdev->dev, "could not allocate MPS Encap entries, continuing\n"); - #if IS_ENABLED(CONFIG_IPV6) if (chip_ver <= CHELSIO_T5 && (!(t4_read_reg(adapter, LE_DB_CONFIG_A) & ASLIPCOMPEN_F))) { @@ -5922,6 +6036,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* check for PCI Express bandwidth capabiltites */ pcie_print_link_status(pdev); + cxgb4_init_mps_ref_entries(adapter); + err = init_rss(adapter); if (err) goto out_free_dev; @@ -6048,6 +6164,8 @@ static void remove_one(struct pci_dev *pdev) disable_interrupts(adapter); + cxgb4_free_mps_ref_entries(adapter); + for_each_port(adapter, i) if (adapter->port[i]->reg_state == NETREG_REGISTERED) unregister_netdev(adapter->port[i]); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c new file mode 100644 index 000000000000..b1a073eea60b --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Chelsio Communications, Inc. All rights reserved. */ + +#include "cxgb4.h" + +static int cxgb4_mps_ref_dec_by_mac(struct adapter *adap, + const u8 *addr, const u8 *mask) +{ + u8 bitmask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + struct mps_entries_ref *mps_entry, *tmp; + int ret = -EINVAL; + + spin_lock_bh(&adap->mps_ref_lock); + list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) { + if (ether_addr_equal(mps_entry->addr, addr) && + ether_addr_equal(mps_entry->mask, mask ? mask : bitmask)) { + if (!refcount_dec_and_test(&mps_entry->refcnt)) { + spin_unlock_bh(&adap->mps_ref_lock); + return -EBUSY; + } + list_del(&mps_entry->list); + kfree(mps_entry); + ret = 0; + break; + } + } + spin_unlock_bh(&adap->mps_ref_lock); + return ret; +} + +static int cxgb4_mps_ref_dec(struct adapter *adap, u16 idx) +{ + struct mps_entries_ref *mps_entry, *tmp; + int ret = -EINVAL; + + spin_lock(&adap->mps_ref_lock); + list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) { + if (mps_entry->idx == idx) { + if (!refcount_dec_and_test(&mps_entry->refcnt)) { + spin_unlock(&adap->mps_ref_lock); + return -EBUSY; + } + list_del(&mps_entry->list); + kfree(mps_entry); + ret = 0; + break; + } + } + spin_unlock(&adap->mps_ref_lock); + return ret; +} + +static int cxgb4_mps_ref_inc(struct adapter *adap, const u8 *mac_addr, + u16 idx, const u8 *mask) +{ + u8 bitmask[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + struct mps_entries_ref *mps_entry; + int ret = 0; + + spin_lock_bh(&adap->mps_ref_lock); + list_for_each_entry(mps_entry, &adap->mps_ref, list) { + if (mps_entry->idx == idx) { + refcount_inc(&mps_entry->refcnt); + goto unlock; + } + } + mps_entry = kzalloc(sizeof(*mps_entry), GFP_ATOMIC); + if (!mps_entry) { + ret = -ENOMEM; + goto unlock; + } + ether_addr_copy(mps_entry->mask, mask ? mask : bitmask); + ether_addr_copy(mps_entry->addr, mac_addr); + mps_entry->idx = idx; + refcount_set(&mps_entry->refcnt, 1); + list_add_tail(&mps_entry->list, &adap->mps_ref); +unlock: + spin_unlock_bh(&adap->mps_ref_lock); + return ret; +} + +int cxgb4_free_mac_filt(struct adapter *adap, unsigned int viid, + unsigned int naddr, const u8 **addr, bool sleep_ok) +{ + int ret, i; + + for (i = 0; i < naddr; i++) { + if (!cxgb4_mps_ref_dec_by_mac(adap, addr[i], NULL)) { + ret = t4_free_mac_filt(adap, adap->mbox, viid, + 1, &addr[i], sleep_ok); + if (ret < 0) + return ret; + } + } + + /* return number of filters freed */ + return naddr; +} + +int cxgb4_alloc_mac_filt(struct adapter *adap, unsigned int viid, + bool free, unsigned int naddr, const u8 **addr, + u16 *idx, u64 *hash, bool sleep_ok) +{ + int ret, i; + + ret = t4_alloc_mac_filt(adap, adap->mbox, viid, free, + naddr, addr, idx, hash, sleep_ok); + if (ret < 0) + return ret; + + for (i = 0; i < naddr; i++) { + if (idx[i] != 0xffff) { + if (cxgb4_mps_ref_inc(adap, addr[i], idx[i], NULL)) { + ret = -ENOMEM; + goto error; + } + } + } + + goto out; +error: + cxgb4_free_mac_filt(adap, viid, naddr, addr, sleep_ok); + +out: + /* Returns a negative error number or the number of filters allocated */ + return ret; +} + +int cxgb4_update_mac_filt(struct port_info *pi, unsigned int viid, + int *tcam_idx, const u8 *addr, + bool persistent, u8 *smt_idx) +{ + int ret; + + ret = cxgb4_change_mac(pi, viid, tcam_idx, + addr, persistent, smt_idx); + if (ret < 0) + return ret; + + cxgb4_mps_ref_inc(pi->adapter, addr, *tcam_idx, NULL); + return ret; +} + +int cxgb4_free_raw_mac_filt(struct adapter *adap, + unsigned int viid, + const u8 *addr, + const u8 *mask, + unsigned int idx, + u8 lookup_type, + u8 port_id, + bool sleep_ok) +{ + int ret = 0; + + if (!cxgb4_mps_ref_dec(adap, idx)) + ret = t4_free_raw_mac_filt(adap, viid, addr, + mask, idx, lookup_type, + port_id, sleep_ok); + + return ret; +} + +int cxgb4_alloc_raw_mac_filt(struct adapter *adap, + unsigned int viid, + const u8 *addr, + const u8 *mask, + unsigned int idx, + u8 lookup_type, + u8 port_id, + bool sleep_ok) +{ + int ret; + + ret = t4_alloc_raw_mac_filt(adap, viid, addr, + mask, idx, lookup_type, + port_id, sleep_ok); + if (ret < 0) + return ret; + + if (cxgb4_mps_ref_inc(adap, addr, ret, mask)) { + ret = -ENOMEM; + t4_free_raw_mac_filt(adap, viid, addr, + mask, idx, lookup_type, + port_id, sleep_ok); + } + + return ret; +} + +int cxgb4_free_encap_mac_filt(struct adapter *adap, unsigned int viid, + int idx, bool sleep_ok) +{ + int ret = 0; + + if (!cxgb4_mps_ref_dec(adap, idx)) + ret = t4_free_encap_mac_filt(adap, viid, idx, sleep_ok); + + return ret; +} + +int cxgb4_alloc_encap_mac_filt(struct adapter *adap, unsigned int viid, + const u8 *addr, const u8 *mask, + unsigned int vni, unsigned int vni_mask, + u8 dip_hit, u8 lookup_type, bool sleep_ok) +{ + int ret; + + ret = t4_alloc_encap_mac_filt(adap, viid, addr, mask, vni, vni_mask, + dip_hit, lookup_type, sleep_ok); + if (ret < 0) + return ret; + + if (cxgb4_mps_ref_inc(adap, addr, ret, mask)) { + ret = -ENOMEM; + t4_free_encap_mac_filt(adap, viid, ret, sleep_ok); + } + return ret; +} + +int cxgb4_init_mps_ref_entries(struct adapter *adap) +{ + spin_lock_init(&adap->mps_ref_lock); + INIT_LIST_HEAD(&adap->mps_ref); + + return 0; +} + +void cxgb4_free_mps_ref_entries(struct adapter *adap) +{ + struct mps_entries_ref *mps_entry, *tmp; + + if (!list_empty(&adap->mps_ref)) + return; + + spin_lock(&adap->mps_ref_lock); + list_for_each_entry_safe(mps_entry, tmp, &adap->mps_ref, list) { + list_del(&mps_entry->list); + kfree(mps_entry); + } + spin_unlock(&adap->mps_ref_lock); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c index cfaf8f618d1f..312599c6b35a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c @@ -80,10 +80,10 @@ static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap, } static void cxgb4_process_flow_match(struct net_device *dev, - struct tc_cls_flower_offload *cls, + struct flow_cls_offload *cls, struct ch_filter_specification *fs) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); u16 addr_type = 0; if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { @@ -223,9 +223,9 @@ static void cxgb4_process_flow_match(struct net_device *dev, } static int cxgb4_validate_flow_match(struct net_device *dev, - struct tc_cls_flower_offload *cls) + struct flow_cls_offload *cls) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_dissector *dissector = rule->match.dissector; u16 ethtype_mask = 0; u16 ethtype_key = 0; @@ -378,10 +378,10 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val, } static void cxgb4_process_flow_actions(struct net_device *in, - struct tc_cls_flower_offload *cls, + struct flow_cls_offload *cls, struct ch_filter_specification *fs) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_action_entry *act; int i; @@ -544,9 +544,9 @@ static bool valid_pedit_action(struct net_device *dev, } static int cxgb4_validate_flow_actions(struct net_device *dev, - struct tc_cls_flower_offload *cls) + struct flow_cls_offload *cls) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(cls); + struct flow_rule *rule = flow_cls_offload_flow_rule(cls); struct flow_action_entry *act; bool act_redir = false; bool act_pedit = false; @@ -633,7 +633,7 @@ static int cxgb4_validate_flow_actions(struct net_device *dev, } int cxgb4_tc_flower_replace(struct net_device *dev, - struct tc_cls_flower_offload *cls) + struct flow_cls_offload *cls) { struct adapter *adap = netdev2adap(dev); struct ch_tc_flower_entry *ch_flower; @@ -709,7 +709,7 @@ free_entry: } int cxgb4_tc_flower_destroy(struct net_device *dev, - struct tc_cls_flower_offload *cls) + struct flow_cls_offload *cls) { struct adapter *adap = netdev2adap(dev); struct ch_tc_flower_entry *ch_flower; @@ -783,7 +783,7 @@ static void ch_flower_stats_cb(struct timer_list *t) } int cxgb4_tc_flower_stats(struct net_device *dev, - struct tc_cls_flower_offload *cls) + struct flow_cls_offload *cls) { struct adapter *adap = netdev2adap(dev); struct ch_tc_flower_stats *ofld_stats; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h index 050c8a50ae41..eb4c95248baf 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h @@ -109,11 +109,11 @@ struct ch_tc_pedit_fields { #define PEDIT_UDP_SPORT_DPORT 0x0 int cxgb4_tc_flower_replace(struct net_device *dev, - struct tc_cls_flower_offload *cls); + struct flow_cls_offload *cls); int cxgb4_tc_flower_destroy(struct net_device *dev, - struct tc_cls_flower_offload *cls); + struct flow_cls_offload *cls); int cxgb4_tc_flower_stats(struct net_device *dev, - struct tc_cls_flower_offload *cls); + struct flow_cls_offload *cls); int cxgb4_init_tc_flower(struct adapter *adap); void cxgb4_cleanup_tc_flower(struct adapter *adap); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 6c685b920713..5b602243d573 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -352,25 +352,32 @@ static int request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + struct uld_msix_info *minfo; int err = 0; unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { bmap_idx = rxq_info->msix_tbl[idx]; - err = request_irq(adap->msix_info_ulds[bmap_idx].vec, + minfo = &adap->msix_info_ulds[bmap_idx]; + err = request_irq(minfo->vec, t4_sge_intr_msix, 0, - adap->msix_info_ulds[bmap_idx].desc, + minfo->desc, &rxq_info->uldrxq[idx].rspq); if (err) goto unwind; + + cxgb4_set_msix_aff(adap, minfo->vec, + &minfo->aff_mask, idx); } return 0; + unwind: while (idx-- > 0) { bmap_idx = rxq_info->msix_tbl[idx]; + minfo = &adap->msix_info_ulds[bmap_idx]; + cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask); free_msix_idx_in_bmap(adap, bmap_idx); - free_irq(adap->msix_info_ulds[bmap_idx].vec, - &rxq_info->uldrxq[idx].rspq); + free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq); } return err; } @@ -379,14 +386,16 @@ static void free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) { struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + struct uld_msix_info *minfo; unsigned int idx, bmap_idx; for_each_uldrxq(rxq_info, idx) { bmap_idx = rxq_info->msix_tbl[idx]; + minfo = &adap->msix_info_ulds[bmap_idx]; + cxgb4_clear_msix_aff(minfo->vec, minfo->aff_mask); free_msix_idx_in_bmap(adap, bmap_idx); - free_irq(adap->msix_info_ulds[bmap_idx].vec, - &rxq_info->uldrxq[idx].rspq); + free_irq(minfo->vec, &rxq_info->uldrxq[idx].rspq); } } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 21da34a4ca24..cee582e36134 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -292,6 +292,7 @@ struct cxgb4_virt_res { /* virtualized HW resources */ struct cxgb4_range ocq; struct cxgb4_range key; unsigned int ncrypto_fc; + struct cxgb4_range ppod_edram; }; struct chcr_stats_debug { @@ -393,6 +394,7 @@ int cxgb4_immdata_send(struct net_device *dev, unsigned int idx, int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb); unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo); unsigned int cxgb4_port_chan(const struct net_device *dev); +unsigned int cxgb4_port_e2cchan(const struct net_device *dev); unsigned int cxgb4_port_viid(const struct net_device *dev); unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid); unsigned int cxgb4_port_idx(const struct net_device *dev); diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 93feb258067b..9dd5ed9a2965 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -6209,6 +6209,37 @@ unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx) } /** + * t4_get_tp_e2c_map - return the E2C channel map associated with a port + * @adapter: the adapter + * @pidx: the port index + */ +static unsigned int t4_get_tp_e2c_map(struct adapter *adapter, int pidx) +{ + unsigned int nports; + u32 param, val = 0; + int ret; + + nports = 1 << NUMPORTS_G(t4_read_reg(adapter, MPS_CMN_CTL_A)); + if (pidx >= nports) { + CH_WARN(adapter, "TP E2C Channel Port Index %d >= Nports %d\n", + pidx, nports); + return 0; + } + + /* FW version >= 1.16.44.0 can determine E2C channel map using + * FW_PARAMS_PARAM_DEV_TPCHMAP API. + */ + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_TPCHMAP)); + ret = t4_query_params_ns(adapter, adapter->mbox, adapter->pf, + 0, 1, ¶m, &val); + if (!ret) + return (val >> (8 * pidx)) & 0xff; + + return 0; +} + +/** * t4_get_tp_ch_map - return TP ingress channels associated with a port * @adapter: the adapter * @pidx: the port index @@ -9368,8 +9399,9 @@ int t4_init_sge_params(struct adapter *adapter) */ int t4_init_tp_params(struct adapter *adap, bool sleep_ok) { - int chan; - u32 v; + u32 param, val, v; + int chan, ret; + v = t4_read_reg(adap, TP_TIMER_RESOLUTION_A); adap->params.tp.tre = TIMERRESOLUTION_G(v); @@ -9379,11 +9411,47 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok) for (chan = 0; chan < NCHAN; chan++) adap->params.tp.tx_modq[chan] = chan; - /* Cache the adapter's Compressed Filter Mode and global Incress + /* Cache the adapter's Compressed Filter Mode/Mask and global Ingress * Configuration. */ - t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1, - TP_VLAN_PRI_MAP_A, sleep_ok); + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FILTER) | + FW_PARAMS_PARAM_Y_V(FW_PARAM_DEV_FILTER_MODE_MASK)); + + /* Read current value */ + ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1, + ¶m, &val); + if (ret == 0) { + dev_info(adap->pdev_dev, + "Current filter mode/mask 0x%x:0x%x\n", + FW_PARAMS_PARAM_FILTER_MODE_G(val), + FW_PARAMS_PARAM_FILTER_MASK_G(val)); + adap->params.tp.vlan_pri_map = + FW_PARAMS_PARAM_FILTER_MODE_G(val); + adap->params.tp.filter_mask = + FW_PARAMS_PARAM_FILTER_MASK_G(val); + } else { + dev_info(adap->pdev_dev, + "Failed to read filter mode/mask via fw api, using indirect-reg-read\n"); + + /* Incase of older-fw (which doesn't expose the api + * FW_PARAM_DEV_FILTER_MODE_MASK) and newer-driver (which uses + * the fw api) combination, fall-back to older method of reading + * the filter mode from indirect-register + */ + t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1, + TP_VLAN_PRI_MAP_A, sleep_ok); + + /* With the older-fw and newer-driver combination we might run + * into an issue when user wants to use hash filter region but + * the filter_mask is zero, in this case filter_mask validation + * is tough. To avoid that we set the filter_mask same as filter + * mode, which will behave exactly as the older way of ignoring + * the filter mask validation. + */ + adap->params.tp.filter_mask = adap->params.tp.vlan_pri_map; + } + t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1, TP_INGRESS_CONFIG_A, sleep_ok); @@ -9594,6 +9662,7 @@ int t4_init_portinfo(struct port_info *pi, int mbox, pi->tx_chan = port; pi->lport = port; pi->rss_size = rss_size; + pi->rx_cchan = t4_get_tp_e2c_map(pi->adapter, port); /* If fw supports returning the VIN as part of FW_VI_CMD, * save the returned values. diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h index eb222d40ddbf..a957a6e4d4c4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h @@ -1334,6 +1334,10 @@ #define TP_OUT_CONFIG_A 0x7d04 #define TP_GLOBAL_CONFIG_A 0x7d08 +#define ACTIVEFILTERCOUNTS_S 22 +#define ACTIVEFILTERCOUNTS_V(x) ((x) << ACTIVEFILTERCOUNTS_S) +#define ACTIVEFILTERCOUNTS_F ACTIVEFILTERCOUNTS_V(1U) + #define TP_CMM_TCB_BASE_A 0x7d10 #define TP_CMM_MM_BASE_A 0x7d14 #define TP_CMM_TIMER_BASE_A 0x7d18 diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index b2a618e72fcf..65313f6b5704 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1221,6 +1221,23 @@ enum fw_params_mnem { /* * device parameters */ + +#define FW_PARAMS_PARAM_FILTER_MODE_S 16 +#define FW_PARAMS_PARAM_FILTER_MODE_M 0xffff +#define FW_PARAMS_PARAM_FILTER_MODE_V(x) \ + ((x) << FW_PARAMS_PARAM_FILTER_MODE_S) +#define FW_PARAMS_PARAM_FILTER_MODE_G(x) \ + (((x) >> FW_PARAMS_PARAM_FILTER_MODE_S) & \ + FW_PARAMS_PARAM_FILTER_MODE_M) + +#define FW_PARAMS_PARAM_FILTER_MASK_S 0 +#define FW_PARAMS_PARAM_FILTER_MASK_M 0xffff +#define FW_PARAMS_PARAM_FILTER_MASK_V(x) \ + ((x) << FW_PARAMS_PARAM_FILTER_MASK_S) +#define FW_PARAMS_PARAM_FILTER_MASK_G(x) \ + (((x) >> FW_PARAMS_PARAM_FILTER_MASK_S) & \ + FW_PARAMS_PARAM_FILTER_MASK_M) + enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_CCLK = 0x00, /* chip core clock in khz */ FW_PARAMS_PARAM_DEV_PORTVEC = 0x01, /* the port vector */ @@ -1250,12 +1267,16 @@ enum fw_params_param_dev { FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C, FW_PARAMS_PARAM_DEV_FILTER2_WR = 0x1D, FW_PARAMS_PARAM_DEV_MPSBGMAP = 0x1E, + FW_PARAMS_PARAM_DEV_TPCHMAP = 0x1F, FW_PARAMS_PARAM_DEV_HMA_SIZE = 0x20, FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21, + FW_PARAMS_PARAM_DEV_PPOD_EDRAM = 0x23, FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24, FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27, + FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28, FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29, FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A, + FW_PARAMS_PARAM_DEV_FILTER = 0x2E, }; /* @@ -1312,6 +1333,8 @@ enum fw_params_param_pfvf { FW_PARAMS_PARAM_PFVF_RAWF_END = 0x37, FW_PARAMS_PARAM_PFVF_NCRYPTO_LOOKASIDE = 0x39, FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A, + FW_PARAMS_PARAM_PFVF_PPOD_EDRAM_START = 0x3B, + FW_PARAMS_PARAM_PFVF_PPOD_EDRAM_END = 0x3C, FW_PARAMS_PARAM_PFVF_LINK_STATE = 0x40, }; @@ -1347,6 +1370,11 @@ enum fw_params_param_dev_diag { FW_PARAM_DEV_DIAG_MAXTMPTHRESH = 0x02, }; +enum fw_params_param_dev_filter { + FW_PARAM_DEV_FILTER_VNIC_MODE = 0x00, + FW_PARAM_DEV_FILTER_MODE_MASK = 0x01, +}; + enum fw_params_param_dev_fwcache { FW_PARAM_DEV_FWCACHE_FLUSH = 0x00, FW_PARAM_DEV_FWCACHE_FLUSHINV = 0x01, diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c index e2919005ead3..21034536c9c5 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.c @@ -123,6 +123,9 @@ static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count, unsigned int cpu; int i; + if (!ppm->pool) + return -EINVAL; + cpu = get_cpu(); pool = per_cpu_ptr(ppm->pool, cpu); spin_lock_bh(&pool->lock); @@ -169,7 +172,9 @@ static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count, } ppm->next = i + count; - if (ppm->next >= ppm->bmap_index_max) + if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram)) + ppm->next = 0; + else if (ppm->next >= ppm->bmap_index_max) ppm->next = 0; spin_unlock_bh(&ppm->map_lock); @@ -382,18 +387,36 @@ static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, struct pci_dev *pdev, void *lldev, - struct cxgbi_tag_format *tformat, - unsigned int ppmax, - unsigned int llimit, - unsigned int start, - unsigned int reserve_factor) + struct cxgbi_tag_format *tformat, unsigned int iscsi_size, + unsigned int llimit, unsigned int start, + unsigned int reserve_factor, unsigned int iscsi_edram_start, + unsigned int iscsi_edram_size) { struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp); struct cxgbi_ppm_pool *pool = NULL; - unsigned int ppmax_pool = 0; unsigned int pool_index_max = 0; - unsigned int alloc_sz; + unsigned int ppmax_pool = 0; unsigned int ppod_bmap_size; + unsigned int alloc_sz; + unsigned int ppmax; + + if (!iscsi_edram_start) + iscsi_edram_size = 0; + + if (iscsi_edram_size && + ((iscsi_edram_start + iscsi_edram_size) != start)) { + pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x " + "size 0x%x DDR start 0x%x\n", + iscsi_edram_start, iscsi_edram_size, start); + return -EINVAL; + } + + if (iscsi_edram_size) { + reserve_factor = 0; + start = iscsi_edram_start; + } + + ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT; if (ppm) { pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n", @@ -434,6 +457,14 @@ int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, __func__, ppmax, ppmax_pool, ppod_bmap_size, start, end); } + if (iscsi_edram_size) { + unsigned int first_ddr_idx = + iscsi_edram_size >> PPOD_SIZE_SHIFT; + + ppm->max_index_in_edram = first_ddr_idx - 1; + bitmap_set(ppm->ppod_bmap, first_ddr_idx, 1); + pr_debug("reserved %u ppod in bitmap\n", first_ddr_idx); + } spin_lock_init(&ppm->map_lock); kref_init(&ppm->refcnt); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h index a91ad766cef0..7b02c200dd1e 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_ppm.h @@ -143,6 +143,7 @@ struct cxgbi_ppm { spinlock_t map_lock; /* ppm map lock */ unsigned int bmap_index_max; unsigned int next; + unsigned int max_index_in_edram; unsigned long *ppod_bmap; struct cxgbi_ppod_data ppod_data[0]; }; @@ -324,9 +325,9 @@ int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *, unsigned short nr_pages, unsigned long caller_data); int cxgbi_ppm_init(void **ppm_pp, struct net_device *, struct pci_dev *, void *lldev, struct cxgbi_tag_format *, - unsigned int ppmax, unsigned int llimit, - unsigned int start, - unsigned int reserve_factor); + unsigned int iscsi_size, unsigned int llimit, + unsigned int start, unsigned int reserve_factor, + unsigned int edram_start, unsigned int edram_size); int cxgbi_ppm_release(struct cxgbi_ppm *ppm); void cxgbi_tagmask_check(unsigned int tagmask, struct cxgbi_tag_format *); unsigned int cxgbi_tagmask_set(unsigned int ppmax); diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig index 8bd384720f80..fbef2829f3de 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Kconfig +++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig @@ -10,8 +10,7 @@ config FSL_DPAA2_ETH config FSL_DPAA2_PTP_CLOCK tristate "Freescale DPAA2 PTP Clock" - depends on FSL_DPAA2_ETH - imply PTP_1588_CLOCK + depends on FSL_DPAA2_ETH && PTP_1588_CLOCK_QORIQ default y help This driver adds support for using the DPAA2 1588 timer module diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 7d2390e3df77..0acb11557ed1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -555,7 +555,7 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv, /* Prepare the HW SGT structure */ sgt_buf_size = priv->tx_data_offset + sizeof(struct dpaa2_sg_entry) * num_dma_bufs; - sgt_buf = netdev_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN); + sgt_buf = napi_alloc_frag(sgt_buf_size + DPAA2_ETH_TX_BUF_ALIGN); if (unlikely(!sgt_buf)) { err = -ENOMEM; goto sgt_buf_alloc_failed; @@ -757,6 +757,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) u16 queue_mapping; unsigned int needed_headroom; u32 fd_len; + u8 prio = 0; int err, i; percpu_stats = this_cpu_ptr(priv->percpu_stats); @@ -814,6 +815,18 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) * a queue affined to the same core that processed the Rx frame */ queue_mapping = skb_get_queue_mapping(skb); + + if (net_dev->num_tc) { + prio = netdev_txq_to_tc(net_dev, queue_mapping); + /* Hardware interprets priority level 0 as being the highest, + * so we need to do a reverse mapping to the netdev tc index + */ + prio = net_dev->num_tc - prio - 1; + /* We have only one FQ array entry for all Tx hardware queues + * with the same flow id (but different priority levels) + */ + queue_mapping %= dpaa2_eth_queue_count(priv); + } fq = &priv->fq[queue_mapping]; fd_len = dpaa2_fd_get_len(&fd); @@ -824,7 +837,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) * the Tx confirmation callback for this frame */ for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { - err = priv->enqueue(priv, fq, &fd, 0); + err = priv->enqueue(priv, fq, &fd, prio); if (err != -EBUSY) break; } @@ -997,13 +1010,6 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid) int i, j; int new_count; - /* This is the lazy seeding of Rx buffer pools. - * dpaa2_add_bufs() is also used on the Rx hotpath and calls - * napi_alloc_frag(). The trouble with that is that it in turn ends up - * calling this_cpu_ptr(), which mandates execution in atomic context. - * Rather than splitting up the code, do a one-off preempt disable. - */ - preempt_disable(); for (j = 0; j < priv->num_channels; j++) { for (i = 0; i < DPAA2_ETH_NUM_BUFS; i += DPAA2_ETH_BUFS_PER_CMD) { @@ -1011,12 +1017,10 @@ static int seed_pool(struct dpaa2_eth_priv *priv, u16 bpid) priv->channel[j]->buf_count += new_count; if (new_count < DPAA2_ETH_BUFS_PER_CMD) { - preempt_enable(); return -ENOMEM; } } } - preempt_enable(); return 0; } @@ -1872,6 +1876,78 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n, return n - drops; } +static int update_xps(struct dpaa2_eth_priv *priv) +{ + struct net_device *net_dev = priv->net_dev; + struct cpumask xps_mask; + struct dpaa2_eth_fq *fq; + int i, num_queues, netdev_queues; + int err = 0; + + num_queues = dpaa2_eth_queue_count(priv); + netdev_queues = (net_dev->num_tc ? : 1) * num_queues; + + /* The first <num_queues> entries in priv->fq array are Tx/Tx conf + * queues, so only process those + */ + for (i = 0; i < netdev_queues; i++) { + fq = &priv->fq[i % num_queues]; + + cpumask_clear(&xps_mask); + cpumask_set_cpu(fq->target_cpu, &xps_mask); + + err = netif_set_xps_queue(net_dev, &xps_mask, i); + if (err) { + netdev_warn_once(net_dev, "Error setting XPS queue\n"); + break; + } + } + + return err; +} + +static int dpaa2_eth_setup_tc(struct net_device *net_dev, + enum tc_setup_type type, void *type_data) +{ + struct dpaa2_eth_priv *priv = netdev_priv(net_dev); + struct tc_mqprio_qopt *mqprio = type_data; + u8 num_tc, num_queues; + int i; + + if (type != TC_SETUP_QDISC_MQPRIO) + return -EINVAL; + + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + num_queues = dpaa2_eth_queue_count(priv); + num_tc = mqprio->num_tc; + + if (num_tc == net_dev->num_tc) + return 0; + + if (num_tc > dpaa2_eth_tc_count(priv)) { + netdev_err(net_dev, "Max %d traffic classes supported\n", + dpaa2_eth_tc_count(priv)); + return -EINVAL; + } + + if (!num_tc) { + netdev_reset_tc(net_dev); + netif_set_real_num_tx_queues(net_dev, num_queues); + goto out; + } + + netdev_set_num_tc(net_dev, num_tc); + netif_set_real_num_tx_queues(net_dev, num_tc * num_queues); + + for (i = 0; i < num_tc; i++) + netdev_set_tc_queue(net_dev, i, num_queues, i * num_queues); + +out: + update_xps(priv); + + return 0; +} + static const struct net_device_ops dpaa2_eth_ops = { .ndo_open = dpaa2_eth_open, .ndo_start_xmit = dpaa2_eth_tx, @@ -1884,6 +1960,7 @@ static const struct net_device_ops dpaa2_eth_ops = { .ndo_change_mtu = dpaa2_eth_change_mtu, .ndo_bpf = dpaa2_eth_xdp, .ndo_xdp_xmit = dpaa2_eth_xdp_xmit, + .ndo_setup_tc = dpaa2_eth_setup_tc, }; static void cdan_cb(struct dpaa2_io_notification_ctx *ctx) @@ -2138,10 +2215,9 @@ static struct dpaa2_eth_channel *get_affine_channel(struct dpaa2_eth_priv *priv, static void set_fq_affinity(struct dpaa2_eth_priv *priv) { struct device *dev = priv->net_dev->dev.parent; - struct cpumask xps_mask; struct dpaa2_eth_fq *fq; int rx_cpu, txc_cpu; - int i, err; + int i; /* For each FQ, pick one channel/CPU to deliver frames to. * This may well change at runtime, either through irqbalance or @@ -2160,17 +2236,6 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv) break; case DPAA2_TX_CONF_FQ: fq->target_cpu = txc_cpu; - - /* Tell the stack to affine to txc_cpu the Tx queue - * associated with the confirmation one - */ - cpumask_clear(&xps_mask); - cpumask_set_cpu(txc_cpu, &xps_mask); - err = netif_set_xps_queue(priv->net_dev, &xps_mask, - fq->flowid); - if (err) - dev_err(dev, "Error setting XPS queue\n"); - txc_cpu = cpumask_next(txc_cpu, &priv->dpio_cpumask); if (txc_cpu >= nr_cpu_ids) txc_cpu = cpumask_first(&priv->dpio_cpumask); @@ -2180,6 +2245,8 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv) } fq->channel = get_affine_channel(priv, fq->target_cpu); } + + update_xps(priv); } static void setup_fqs(struct dpaa2_eth_priv *priv) @@ -2361,11 +2428,10 @@ static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv, static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv, struct dpaa2_eth_fq *fq, - struct dpaa2_fd *fd, - u8 prio __always_unused) + struct dpaa2_fd *fd, u8 prio) { return dpaa2_io_service_enqueue_fq(fq->channel->dpio, - fq->tx_fqid, fd); + fq->tx_fqid[prio], fd); } static void set_enqueue_mode(struct dpaa2_eth_priv *priv) @@ -2479,14 +2545,9 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv, queue.destination.type = DPNI_DEST_DPCON; queue.destination.priority = 1; queue.user_context = (u64)(uintptr_t)fq; - queue.flc.stash_control = 1; - queue.flc.value &= 0xFFFFFFFFFFFFFFC0; - /* 01 01 00 - data, annotation, flow context */ - queue.flc.value |= 0x14; err = dpni_set_queue(priv->mc_io, 0, priv->mc_token, DPNI_QUEUE_RX, 0, fq->flowid, - DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST | - DPNI_QUEUE_OPT_FLC, + DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST, &queue); if (err) { dev_err(dev, "dpni_set_queue(RX) failed\n"); @@ -2526,17 +2587,21 @@ static int setup_tx_flow(struct dpaa2_eth_priv *priv, struct device *dev = priv->net_dev->dev.parent; struct dpni_queue queue; struct dpni_queue_id qid; - int err; + int i, err; - err = dpni_get_queue(priv->mc_io, 0, priv->mc_token, - DPNI_QUEUE_TX, 0, fq->flowid, &queue, &qid); - if (err) { - dev_err(dev, "dpni_get_queue(TX) failed\n"); - return err; + for (i = 0; i < dpaa2_eth_tc_count(priv); i++) { + err = dpni_get_queue(priv->mc_io, 0, priv->mc_token, + DPNI_QUEUE_TX, i, fq->flowid, + &queue, &qid); + if (err) { + dev_err(dev, "dpni_get_queue(TX) failed\n"); + return err; + } + fq->tx_fqid[i] = qid.fqid; } + /* All Tx queues belonging to the same flowid have the same qdbin */ fq->tx_qdbin = qid.qdbin; - fq->tx_fqid = qid.fqid; err = dpni_get_queue(priv->mc_io, 0, priv->mc_token, DPNI_QUEUE_TX_CONFIRM, 0, fq->flowid, @@ -3236,7 +3301,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) dev = &dpni_dev->dev; /* Net device */ - net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA2_ETH_MAX_TX_QUEUES); + net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA2_ETH_MAX_NETDEV_QUEUES); if (!net_dev) { dev_err(dev, "alloc_etherdev_mq() failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h index e180d5a68c98..9af18c24221f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h @@ -282,10 +282,13 @@ struct dpaa2_eth_ch_stats { }; /* Maximum number of queues associated with a DPNI */ +#define DPAA2_ETH_MAX_TCS 8 #define DPAA2_ETH_MAX_RX_QUEUES 16 #define DPAA2_ETH_MAX_TX_QUEUES 16 #define DPAA2_ETH_MAX_QUEUES (DPAA2_ETH_MAX_RX_QUEUES + \ DPAA2_ETH_MAX_TX_QUEUES) +#define DPAA2_ETH_MAX_NETDEV_QUEUES \ + (DPAA2_ETH_MAX_TX_QUEUES * DPAA2_ETH_MAX_TCS) #define DPAA2_ETH_MAX_DPCONS 16 @@ -299,8 +302,9 @@ struct dpaa2_eth_priv; struct dpaa2_eth_fq { u32 fqid; u32 tx_qdbin; - u32 tx_fqid; + u32 tx_fqid[DPAA2_ETH_MAX_TCS]; u16 flowid; + u8 tc; int target_cpu; u32 dq_frames; u32 dq_bytes; @@ -448,6 +452,9 @@ static inline int dpaa2_eth_cmp_dpni_ver(struct dpaa2_eth_priv *priv, #define dpaa2_eth_fs_count(priv) \ ((priv)->dpni_attrs.fs_entries) +#define dpaa2_eth_tc_count(priv) \ + ((priv)->dpni_attrs.num_tcs) + /* We have exactly one {Rx, Tx conf} queue per channel */ #define dpaa2_eth_queue_count(priv) \ ((priv)->num_channels) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c index 9b150db3b510..a9503aea527f 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c @@ -5,114 +5,58 @@ */ #include <linux/module.h> -#include <linux/slab.h> -#include <linux/ptp_clock_kernel.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/msi.h> #include <linux/fsl/mc.h> +#include <linux/fsl/ptp_qoriq.h> #include "dpaa2-ptp.h" -struct ptp_dpaa2_priv { - struct fsl_mc_device *ptp_mc_dev; - struct ptp_clock *clock; - struct ptp_clock_info caps; - u32 freq_comp; -}; - -/* PTP clock operations */ -static int ptp_dpaa2_adjfreq(struct ptp_clock_info *ptp, s32 ppb) +static int dpaa2_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) { - struct ptp_dpaa2_priv *ptp_dpaa2 = - container_of(ptp, struct ptp_dpaa2_priv, caps); - struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev; - struct device *dev = &mc_dev->dev; - u64 adj; - u32 diff, tmr_add; - int neg_adj = 0; - int err = 0; - - if (ppb < 0) { - neg_adj = 1; - ppb = -ppb; - } - - tmr_add = ptp_dpaa2->freq_comp; - adj = tmr_add; - adj *= ppb; - diff = div_u64(adj, 1000000000ULL); - - tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff; + struct ptp_qoriq *ptp_qoriq = container_of(ptp, struct ptp_qoriq, caps); + struct fsl_mc_device *mc_dev; + struct device *dev; + u32 mask = 0; + u32 bit; + int err; - err = dprtc_set_freq_compensation(mc_dev->mc_io, 0, - mc_dev->mc_handle, tmr_add); - if (err) - dev_err(dev, "dprtc_set_freq_compensation err %d\n", err); - return err; -} + dev = ptp_qoriq->dev; + mc_dev = to_fsl_mc_device(dev); -static int ptp_dpaa2_adjtime(struct ptp_clock_info *ptp, s64 delta) -{ - struct ptp_dpaa2_priv *ptp_dpaa2 = - container_of(ptp, struct ptp_dpaa2_priv, caps); - struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev; - struct device *dev = &mc_dev->dev; - s64 now; - int err = 0; + switch (rq->type) { + case PTP_CLK_REQ_PPS: + bit = DPRTC_EVENT_PPS; + break; + default: + return -EOPNOTSUPP; + } - err = dprtc_get_time(mc_dev->mc_io, 0, mc_dev->mc_handle, &now); - if (err) { - dev_err(dev, "dprtc_get_time err %d\n", err); + err = dprtc_get_irq_mask(mc_dev->mc_io, 0, mc_dev->mc_handle, + DPRTC_IRQ_INDEX, &mask); + if (err < 0) { + dev_err(dev, "dprtc_get_irq_mask(): %d\n", err); return err; } - now += delta; + if (on) + mask |= bit; + else + mask &= ~bit; - err = dprtc_set_time(mc_dev->mc_io, 0, mc_dev->mc_handle, now); - if (err) - dev_err(dev, "dprtc_set_time err %d\n", err); - return err; -} - -static int ptp_dpaa2_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) -{ - struct ptp_dpaa2_priv *ptp_dpaa2 = - container_of(ptp, struct ptp_dpaa2_priv, caps); - struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev; - struct device *dev = &mc_dev->dev; - u64 ns; - u32 remainder; - int err = 0; - - err = dprtc_get_time(mc_dev->mc_io, 0, mc_dev->mc_handle, &ns); - if (err) { - dev_err(dev, "dprtc_get_time err %d\n", err); + err = dprtc_set_irq_mask(mc_dev->mc_io, 0, mc_dev->mc_handle, + DPRTC_IRQ_INDEX, mask); + if (err < 0) { + dev_err(dev, "dprtc_set_irq_mask(): %d\n", err); return err; } - ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); - ts->tv_nsec = remainder; - return err; -} - -static int ptp_dpaa2_settime(struct ptp_clock_info *ptp, - const struct timespec64 *ts) -{ - struct ptp_dpaa2_priv *ptp_dpaa2 = - container_of(ptp, struct ptp_dpaa2_priv, caps); - struct fsl_mc_device *mc_dev = ptp_dpaa2->ptp_mc_dev; - struct device *dev = &mc_dev->dev; - u64 ns; - int err = 0; - - ns = ts->tv_sec * 1000000000ULL; - ns += ts->tv_nsec; - - err = dprtc_set_time(mc_dev->mc_io, 0, mc_dev->mc_handle, ns); - if (err) - dev_err(dev, "dprtc_set_time err %d\n", err); - return err; + return 0; } -static const struct ptp_clock_info ptp_dpaa2_caps = { +static const struct ptp_clock_info dpaa2_ptp_caps = { .owner = THIS_MODULE, .name = "DPAA2 PTP Clock", .max_adj = 512000, @@ -121,21 +65,58 @@ static const struct ptp_clock_info ptp_dpaa2_caps = { .n_per_out = 3, .n_pins = 0, .pps = 1, - .adjfreq = ptp_dpaa2_adjfreq, - .adjtime = ptp_dpaa2_adjtime, - .gettime64 = ptp_dpaa2_gettime, - .settime64 = ptp_dpaa2_settime, + .adjfine = ptp_qoriq_adjfine, + .adjtime = ptp_qoriq_adjtime, + .gettime64 = ptp_qoriq_gettime, + .settime64 = ptp_qoriq_settime, + .enable = dpaa2_ptp_enable, }; +static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv) +{ + struct ptp_qoriq *ptp_qoriq = priv; + struct ptp_clock_event event; + struct fsl_mc_device *mc_dev; + struct device *dev; + u32 status = 0; + int err; + + dev = ptp_qoriq->dev; + mc_dev = to_fsl_mc_device(dev); + + err = dprtc_get_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle, + DPRTC_IRQ_INDEX, &status); + if (unlikely(err)) { + dev_err(dev, "dprtc_get_irq_status err %d\n", err); + return IRQ_NONE; + } + + if (status & DPRTC_EVENT_PPS) { + event.type = PTP_CLOCK_PPS; + ptp_clock_event(ptp_qoriq->clock, &event); + } + + err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle, + DPRTC_IRQ_INDEX, status); + if (unlikely(err)) { + dev_err(dev, "dprtc_clear_irq_status err %d\n", err); + return IRQ_NONE; + } + + return IRQ_HANDLED; +} + static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) { struct device *dev = &mc_dev->dev; - struct ptp_dpaa2_priv *ptp_dpaa2; - u32 tmr_add = 0; + struct fsl_mc_device_irq *irq; + struct ptp_qoriq *ptp_qoriq; + struct device_node *node; + void __iomem *base; int err; - ptp_dpaa2 = devm_kzalloc(dev, sizeof(*ptp_dpaa2), GFP_KERNEL); - if (!ptp_dpaa2) + ptp_qoriq = devm_kzalloc(dev, sizeof(*ptp_qoriq), GFP_KERNEL); + if (!ptp_qoriq) return -ENOMEM; err = fsl_mc_portal_allocate(mc_dev, 0, &mc_dev->mc_io); @@ -154,30 +135,60 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev) goto err_free_mcp; } - ptp_dpaa2->ptp_mc_dev = mc_dev; + ptp_qoriq->dev = dev; - err = dprtc_get_freq_compensation(mc_dev->mc_io, 0, - mc_dev->mc_handle, &tmr_add); - if (err) { - dev_err(dev, "dprtc_get_freq_compensation err %d\n", err); + node = of_find_compatible_node(NULL, NULL, "fsl,dpaa2-ptp"); + if (!node) { + err = -ENODEV; goto err_close; } - ptp_dpaa2->freq_comp = tmr_add; - ptp_dpaa2->caps = ptp_dpaa2_caps; + dev->of_node = node; - ptp_dpaa2->clock = ptp_clock_register(&ptp_dpaa2->caps, dev); - if (IS_ERR(ptp_dpaa2->clock)) { - err = PTR_ERR(ptp_dpaa2->clock); + base = of_iomap(node, 0); + if (!base) { + err = -ENOMEM; goto err_close; } - dpaa2_phc_index = ptp_clock_index(ptp_dpaa2->clock); + err = fsl_mc_allocate_irqs(mc_dev); + if (err) { + dev_err(dev, "MC irqs allocation failed\n"); + goto err_unmap; + } + + irq = mc_dev->irqs[0]; + ptp_qoriq->irq = irq->msi_desc->irq; - dev_set_drvdata(dev, ptp_dpaa2); + err = devm_request_threaded_irq(dev, ptp_qoriq->irq, NULL, + dpaa2_ptp_irq_handler_thread, + IRQF_NO_SUSPEND | IRQF_ONESHOT, + dev_name(dev), ptp_qoriq); + if (err < 0) { + dev_err(dev, "devm_request_threaded_irq(): %d\n", err); + goto err_free_mc_irq; + } + + err = dprtc_set_irq_enable(mc_dev->mc_io, 0, mc_dev->mc_handle, + DPRTC_IRQ_INDEX, 1); + if (err < 0) { + dev_err(dev, "dprtc_set_irq_enable(): %d\n", err); + goto err_free_mc_irq; + } + + err = ptp_qoriq_init(ptp_qoriq, base, &dpaa2_ptp_caps); + if (err) + goto err_free_mc_irq; + + dpaa2_phc_index = ptp_qoriq->phc_index; + dev_set_drvdata(dev, ptp_qoriq); return 0; +err_free_mc_irq: + fsl_mc_free_irqs(mc_dev); +err_unmap: + iounmap(base); err_close: dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); err_free_mcp: @@ -188,12 +199,15 @@ err_exit: static int dpaa2_ptp_remove(struct fsl_mc_device *mc_dev) { - struct ptp_dpaa2_priv *ptp_dpaa2; struct device *dev = &mc_dev->dev; + struct ptp_qoriq *ptp_qoriq; + + ptp_qoriq = dev_get_drvdata(dev); - ptp_dpaa2 = dev_get_drvdata(dev); - ptp_clock_unregister(ptp_dpaa2->clock); + dpaa2_phc_index = -1; + ptp_qoriq_free(ptp_qoriq); + fsl_mc_free_irqs(mc_dev); dprtc_close(mc_dev->mc_io, 0, mc_dev->mc_handle); fsl_mc_portal_free(mc_dev->mc_io); diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h index 9af4ac71f347..720cd50f5895 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h @@ -17,22 +17,54 @@ #define DPRTC_CMDID_CLOSE DPRTC_CMD(0x800) #define DPRTC_CMDID_OPEN DPRTC_CMD(0x810) -#define DPRTC_CMDID_SET_FREQ_COMPENSATION DPRTC_CMD(0x1d1) -#define DPRTC_CMDID_GET_FREQ_COMPENSATION DPRTC_CMD(0x1d2) -#define DPRTC_CMDID_GET_TIME DPRTC_CMD(0x1d3) -#define DPRTC_CMDID_SET_TIME DPRTC_CMD(0x1d4) +#define DPRTC_CMDID_SET_IRQ_ENABLE DPRTC_CMD(0x012) +#define DPRTC_CMDID_GET_IRQ_ENABLE DPRTC_CMD(0x013) +#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD(0x014) +#define DPRTC_CMDID_GET_IRQ_MASK DPRTC_CMD(0x015) +#define DPRTC_CMDID_GET_IRQ_STATUS DPRTC_CMD(0x016) +#define DPRTC_CMDID_CLEAR_IRQ_STATUS DPRTC_CMD(0x017) #pragma pack(push, 1) struct dprtc_cmd_open { __le32 dprtc_id; }; -struct dprtc_get_freq_compensation { - __le32 freq_compensation; +struct dprtc_cmd_get_irq { + __le32 pad; + u8 irq_index; }; -struct dprtc_time { - __le64 time; +struct dprtc_cmd_set_irq_enable { + u8 en; + u8 pad[3]; + u8 irq_index; +}; + +struct dprtc_rsp_get_irq_enable { + u8 en; +}; + +struct dprtc_cmd_set_irq_mask { + __le32 mask; + u8 irq_index; +}; + +struct dprtc_rsp_get_irq_mask { + __le32 mask; +}; + +struct dprtc_cmd_get_irq_status { + __le32 status; + u8 irq_index; +}; + +struct dprtc_rsp_get_irq_status { + __le32 status; +}; + +struct dprtc_cmd_clear_irq_status { + __le32 status; + u8 irq_index; }; #pragma pack(pop) diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.c b/drivers/net/ethernet/freescale/dpaa2/dprtc.c index c13e09bc7b9d..ed52a34fa6a1 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc.c +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.c @@ -74,121 +74,220 @@ int dprtc_close(struct fsl_mc_io *mc_io, } /** - * dprtc_set_freq_compensation() - Sets a new frequency compensation value. + * dprtc_set_irq_enable() - Set overall interrupt state. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPRTC object + * @irq_index: The interrupt index to configure + * @en: Interrupt state - enable = 1, disable = 0 * - * @mc_io: Pointer to MC portal's I/O object - * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' - * @token: Token of DPRTC object - * @freq_compensation: The new frequency compensation value to set. + * Allows GPP software to control when interrupts are generated. + * Each interrupt can have up to 32 causes. The enable/disable control's the + * overall interrupt state. if the interrupt is disabled no causes will cause + * an interrupt. * * Return: '0' on Success; Error code otherwise. */ -int dprtc_set_freq_compensation(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - u32 freq_compensation) +int dprtc_set_irq_enable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u8 en) { - struct dprtc_get_freq_compensation *cmd_params; + struct dprtc_cmd_set_irq_enable *cmd_params; struct fsl_mc_command cmd = { 0 }; - cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_FREQ_COMPENSATION, + cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_ENABLE, cmd_flags, token); - cmd_params = (struct dprtc_get_freq_compensation *)cmd.params; - cmd_params->freq_compensation = cpu_to_le32(freq_compensation); + cmd_params = (struct dprtc_cmd_set_irq_enable *)cmd.params; + cmd_params->irq_index = irq_index; + cmd_params->en = en; return mc_send_command(mc_io, &cmd); } /** - * dprtc_get_freq_compensation() - Retrieves the frequency compensation value + * dprtc_get_irq_enable() - Get overall interrupt state + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPRTC object + * @irq_index: The interrupt index to configure + * @en: Returned interrupt state - enable = 1, disable = 0 + * + * Return: '0' on Success; Error code otherwise. + */ +int dprtc_get_irq_enable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u8 *en) +{ + struct dprtc_rsp_get_irq_enable *rsp_params; + struct dprtc_cmd_get_irq *cmd_params; + struct fsl_mc_command cmd = { 0 }; + int err; + + cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_ENABLE, + cmd_flags, + token); + cmd_params = (struct dprtc_cmd_get_irq *)cmd.params; + cmd_params->irq_index = irq_index; + + err = mc_send_command(mc_io, &cmd); + if (err) + return err; + + rsp_params = (struct dprtc_rsp_get_irq_enable *)cmd.params; + *en = rsp_params->en; + + return 0; +} + +/** + * dprtc_set_irq_mask() - Set interrupt mask. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPRTC object + * @irq_index: The interrupt index to configure + * @mask: Event mask to trigger interrupt; + * each bit: + * 0 = ignore event + * 1 = consider event for asserting IRQ + * + * Every interrupt can have up to 32 causes and the interrupt model supports + * masking/unmasking each cause independently + * + * Return: '0' on Success; Error code otherwise. + */ +int dprtc_set_irq_mask(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 mask) +{ + struct dprtc_cmd_set_irq_mask *cmd_params; + struct fsl_mc_command cmd = { 0 }; + + cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_IRQ_MASK, + cmd_flags, + token); + cmd_params = (struct dprtc_cmd_set_irq_mask *)cmd.params; + cmd_params->mask = cpu_to_le32(mask); + cmd_params->irq_index = irq_index; + + return mc_send_command(mc_io, &cmd); +} + +/** + * dprtc_get_irq_mask() - Get interrupt mask. + * @mc_io: Pointer to MC portal's I/O object + * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' + * @token: Token of DPRTC object + * @irq_index: The interrupt index to configure + * @mask: Returned event mask to trigger interrupt * - * @mc_io: Pointer to MC portal's I/O object - * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' - * @token: Token of DPRTC object - * @freq_compensation: Frequency compensation value + * Every interrupt can have up to 32 causes and the interrupt model supports + * masking/unmasking each cause independently * * Return: '0' on Success; Error code otherwise. */ -int dprtc_get_freq_compensation(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - u32 *freq_compensation) +int dprtc_get_irq_mask(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 *mask) { - struct dprtc_get_freq_compensation *rsp_params; + struct dprtc_rsp_get_irq_mask *rsp_params; + struct dprtc_cmd_get_irq *cmd_params; struct fsl_mc_command cmd = { 0 }; int err; - cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_FREQ_COMPENSATION, + cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_MASK, cmd_flags, token); + cmd_params = (struct dprtc_cmd_get_irq *)cmd.params; + cmd_params->irq_index = irq_index; err = mc_send_command(mc_io, &cmd); if (err) return err; - rsp_params = (struct dprtc_get_freq_compensation *)cmd.params; - *freq_compensation = le32_to_cpu(rsp_params->freq_compensation); + rsp_params = (struct dprtc_rsp_get_irq_mask *)cmd.params; + *mask = le32_to_cpu(rsp_params->mask); return 0; } /** - * dprtc_get_time() - Returns the current RTC time. + * dprtc_get_irq_status() - Get the current status of any pending interrupts. * * @mc_io: Pointer to MC portal's I/O object * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' * @token: Token of DPRTC object - * @time: Current RTC time. + * @irq_index: The interrupt index to configure + * @status: Returned interrupts status - one bit per cause: + * 0 = no interrupt pending + * 1 = interrupt pending * * Return: '0' on Success; Error code otherwise. */ -int dprtc_get_time(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - uint64_t *time) +int dprtc_get_irq_status(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 *status) { - struct dprtc_time *rsp_params; + struct dprtc_cmd_get_irq_status *cmd_params; + struct dprtc_rsp_get_irq_status *rsp_params; struct fsl_mc_command cmd = { 0 }; int err; - cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_TIME, + cmd.header = mc_encode_cmd_header(DPRTC_CMDID_GET_IRQ_STATUS, cmd_flags, token); + cmd_params = (struct dprtc_cmd_get_irq_status *)cmd.params; + cmd_params->status = cpu_to_le32(*status); + cmd_params->irq_index = irq_index; err = mc_send_command(mc_io, &cmd); if (err) return err; - rsp_params = (struct dprtc_time *)cmd.params; - *time = le64_to_cpu(rsp_params->time); + rsp_params = (struct dprtc_rsp_get_irq_status *)cmd.params; + *status = le32_to_cpu(rsp_params->status); return 0; } /** - * dprtc_set_time() - Updates current RTC time. + * dprtc_clear_irq_status() - Clear a pending interrupt's status * * @mc_io: Pointer to MC portal's I/O object * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_' * @token: Token of DPRTC object - * @time: New RTC time. + * @irq_index: The interrupt index to configure + * @status: Bits to clear (W1C) - one bit per cause: + * 0 = don't change + * 1 = clear status bit * * Return: '0' on Success; Error code otherwise. */ -int dprtc_set_time(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - uint64_t time) +int dprtc_clear_irq_status(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 status) { - struct dprtc_time *cmd_params; + struct dprtc_cmd_clear_irq_status *cmd_params; struct fsl_mc_command cmd = { 0 }; - cmd.header = mc_encode_cmd_header(DPRTC_CMDID_SET_TIME, + cmd.header = mc_encode_cmd_header(DPRTC_CMDID_CLEAR_IRQ_STATUS, cmd_flags, token); - cmd_params = (struct dprtc_time *)cmd.params; - cmd_params->time = cpu_to_le64(time); + cmd_params = (struct dprtc_cmd_clear_irq_status *)cmd.params; + cmd_params->irq_index = irq_index; + cmd_params->status = cpu_to_le32(status); return mc_send_command(mc_io, &cmd); } diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h index fe19618d6cdf..be7914c1634d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h +++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h @@ -13,6 +13,14 @@ struct fsl_mc_io; +/** + * Number of irq's + */ +#define DPRTC_MAX_IRQ_NUM 1 +#define DPRTC_IRQ_INDEX 0 + +#define DPRTC_EVENT_PPS 0x08000000 + int dprtc_open(struct fsl_mc_io *mc_io, u32 cmd_flags, int dprtc_id, @@ -22,24 +30,40 @@ int dprtc_close(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); -int dprtc_set_freq_compensation(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - u32 freq_compensation); - -int dprtc_get_freq_compensation(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - u32 *freq_compensation); - -int dprtc_get_time(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - uint64_t *time); - -int dprtc_set_time(struct fsl_mc_io *mc_io, - u32 cmd_flags, - u16 token, - uint64_t time); +int dprtc_set_irq_enable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u8 en); + +int dprtc_get_irq_enable(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u8 *en); + +int dprtc_set_irq_mask(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 mask); + +int dprtc_get_irq_mask(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 *mask); + +int dprtc_get_irq_status(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 *status); + +int dprtc_clear_irq_status(struct fsl_mc_io *mc_io, + u32 cmd_flags, + u16 token, + u8 irq_index, + u32 status); #endif /* __FSL_DPRTC_H */ diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index 8429f5c1d810..ed0d010c7cf2 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -29,3 +29,13 @@ config FSL_ENETC_PTP_CLOCK packets using the SO_TIMESTAMPING API. If compiled as module (M), the module name is fsl-enetc-ptp. + +config FSL_ENETC_HW_TIMESTAMPING + bool "ENETC hardware timestamping support" + depends on FSL_ENETC || FSL_ENETC_VF + help + Enable hardware timestamping support on the Ethernet packets + using the SO_TIMESTAMPING API. Because the RX BD ring dynamic + allocation has not been supported and it is too expensive to use + extended RX BDs if timestamping is not used, this option enables + extended RX BDs in order to support hardware timestamping. diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 491475d87736..223709443ea4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -13,7 +13,8 @@ #define ENETC_MAX_SKB_FRAGS 13 #define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1) -static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb); +static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, + int active_offloads); netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) { @@ -33,7 +34,7 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) return NETDEV_TX_BUSY; } - count = enetc_map_tx_buffs(tx_ring, skb); + count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads); if (unlikely(!count)) goto drop_packet_err; @@ -105,7 +106,8 @@ static void enetc_free_tx_skb(struct enetc_bdr *tx_ring, } } -static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) +static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb, + int active_offloads) { struct enetc_tx_swbd *tx_swbd; struct skb_frag_struct *frag; @@ -137,7 +139,10 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) count++; do_vlan = skb_vlan_tag_present(skb); - do_tstamp = skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP; + do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) && + (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP); + tx_swbd->do_tstamp = do_tstamp; + tx_swbd->check_wb = tx_swbd->do_tstamp; if (do_vlan || do_tstamp) flags |= ENETC_TXBD_FLAGS_EX; @@ -299,24 +304,70 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci) return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi; } +static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd, + u64 *tstamp) +{ + u32 lo, hi, tstamp_lo; + + lo = enetc_rd(hw, ENETC_SICTR0); + hi = enetc_rd(hw, ENETC_SICTR1); + tstamp_lo = le32_to_cpu(txbd->wb.tstamp); + if (lo <= tstamp_lo) + hi -= 1; + *tstamp = (u64)hi << 32 | tstamp_lo; +} + +static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp) +{ + struct skb_shared_hwtstamps shhwtstamps; + + if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) { + memset(&shhwtstamps, 0, sizeof(shhwtstamps)); + shhwtstamps.hwtstamp = ns_to_ktime(tstamp); + skb_tstamp_tx(skb, &shhwtstamps); + } +} + static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) { struct net_device *ndev = tx_ring->ndev; int tx_frm_cnt = 0, tx_byte_cnt = 0; struct enetc_tx_swbd *tx_swbd; int i, bds_to_clean; + bool do_tstamp; + u64 tstamp = 0; i = tx_ring->next_to_clean; tx_swbd = &tx_ring->tx_swbd[i]; bds_to_clean = enetc_bd_ready_count(tx_ring, i); + do_tstamp = false; + while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) { bool is_eof = !!tx_swbd->skb; + if (unlikely(tx_swbd->check_wb)) { + struct enetc_ndev_priv *priv = netdev_priv(ndev); + union enetc_tx_bd *txbd; + + txbd = ENETC_TXBD(*tx_ring, i); + + if (txbd->flags & ENETC_TXBD_FLAGS_W && + tx_swbd->do_tstamp) { + enetc_get_tx_tstamp(&priv->si->hw, txbd, + &tstamp); + do_tstamp = true; + } + } + if (likely(tx_swbd->dma)) enetc_unmap_tx_buff(tx_ring, tx_swbd); if (is_eof) { + if (unlikely(do_tstamp)) { + enetc_tstamp_tx(tx_swbd->skb, tstamp); + do_tstamp = false; + } napi_consume_skb(tx_swbd->skb, napi_budget); tx_swbd->skb = NULL; } @@ -425,10 +476,38 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt) return j; } +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING +static void enetc_get_rx_tstamp(struct net_device *ndev, + union enetc_rx_bd *rxbd, + struct sk_buff *skb) +{ + struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_hw *hw = &priv->si->hw; + u32 lo, hi, tstamp_lo; + u64 tstamp; + + if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) { + lo = enetc_rd(hw, ENETC_SICTR0); + hi = enetc_rd(hw, ENETC_SICTR1); + tstamp_lo = le32_to_cpu(rxbd->r.tstamp); + if (lo <= tstamp_lo) + hi -= 1; + + tstamp = (u64)hi << 32 | tstamp_lo; + memset(shhwtstamps, 0, sizeof(*shhwtstamps)); + shhwtstamps->hwtstamp = ns_to_ktime(tstamp); + } +} +#endif + static void enetc_get_offloads(struct enetc_bdr *rx_ring, union enetc_rx_bd *rxbd, struct sk_buff *skb) { - /* TODO: add tstamp, hashing */ +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING + struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev); +#endif + /* TODO: hashing */ if (rx_ring->ndev->features & NETIF_F_RXCSUM) { u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum); @@ -442,6 +521,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(rxbd->r.vlan_opt)); +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING + if (priv->active_offloads & ENETC_F_RX_TSTAMP) + enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb); +#endif } static void enetc_process_skb(struct enetc_bdr *rx_ring, @@ -1074,6 +1157,9 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) enetc_rxbdr_wr(hw, idx, ENETC_RBICIR0, ENETC_RBICIR0_ICEN | 0x1); rbmr = ENETC_RBMR_EN; +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING + rbmr |= ENETC_RBMR_BDS; +#endif if (rx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_RX) rbmr |= ENETC_RBMR_VTE; @@ -1341,6 +1427,62 @@ int enetc_close(struct net_device *ndev) return 0; } +int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, + void *type_data) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct tc_mqprio_qopt *mqprio = type_data; + struct enetc_bdr *tx_ring; + u8 num_tc; + int i; + + if (type != TC_SETUP_QDISC_MQPRIO) + return -EOPNOTSUPP; + + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + num_tc = mqprio->num_tc; + + if (!num_tc) { + netdev_reset_tc(ndev); + netif_set_real_num_tx_queues(ndev, priv->num_tx_rings); + + /* Reset all ring priorities to 0 */ + for (i = 0; i < priv->num_tx_rings; i++) { + tx_ring = priv->tx_ring[i]; + enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0); + } + + return 0; + } + + /* Check if we have enough BD rings available to accommodate all TCs */ + if (num_tc > priv->num_tx_rings) { + netdev_err(ndev, "Max %d traffic classes supported\n", + priv->num_tx_rings); + return -EINVAL; + } + + /* For the moment, we use only one BD ring per TC. + * + * Configure num_tc BD rings with increasing priorities. + */ + for (i = 0; i < num_tc; i++) { + tx_ring = priv->tx_ring[i]; + enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i); + } + + /* Reset the number of netdev queues based on the TC count */ + netif_set_real_num_tx_queues(ndev, num_tc); + + netdev_set_num_tc(ndev, num_tc); + + /* Each TC is associated with one netdev queue */ + for (i = 0; i < num_tc; i++) + netdev_set_tc_queue(ndev, i, 1, i); + + return 0; +} + struct net_device_stats *enetc_get_stats(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); @@ -1396,6 +1538,70 @@ int enetc_set_features(struct net_device *ndev, return 0; } +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING +static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct hwtstamp_config config; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + priv->active_offloads &= ~ENETC_F_TX_TSTAMP; + break; + case HWTSTAMP_TX_ON: + priv->active_offloads |= ENETC_F_TX_TSTAMP; + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + priv->active_offloads &= ~ENETC_F_RX_TSTAMP; + break; + default: + priv->active_offloads |= ENETC_F_RX_TSTAMP; + config.rx_filter = HWTSTAMP_FILTER_ALL; + } + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct hwtstamp_config config; + + config.flags = 0; + + if (priv->active_offloads & ENETC_F_TX_TSTAMP) + config.tx_type = HWTSTAMP_TX_ON; + else + config.tx_type = HWTSTAMP_TX_OFF; + + config.rx_filter = (priv->active_offloads & ENETC_F_RX_TSTAMP) ? + HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE; + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} +#endif + +int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) +{ +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING + if (cmd == SIOCSHWTSTAMP) + return enetc_hwtstamp_set(ndev, rq); + if (cmd == SIOCGHWTSTAMP) + return enetc_hwtstamp_get(ndev, rq); +#endif + return -EINVAL; +} + int enetc_alloc_msix(struct enetc_ndev_priv *priv) { struct pci_dev *pdev = priv->si->pdev; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index b274135c5103..541b4e2073fe 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -21,7 +21,9 @@ struct enetc_tx_swbd { struct sk_buff *skb; dma_addr_t dma; u16 len; - u16 is_dma_page; + u8 is_dma_page:1; + u8 check_wb:1; + u8 do_tstamp:1; }; #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE @@ -167,6 +169,12 @@ struct enetc_cls_rule { #define ENETC_MAX_BDR_INT 2 /* fixed to max # of available cpus */ +/* TODO: more hardware offloads */ +enum enetc_active_offloads { + ENETC_F_RX_TSTAMP = BIT(0), + ENETC_F_TX_TSTAMP = BIT(1), +}; + struct enetc_ndev_priv { struct net_device *ndev; struct device *dev; /* dma-mapping device */ @@ -178,6 +186,7 @@ struct enetc_ndev_priv { u16 rx_bd_count, tx_bd_count; u16 msg_enable; + int active_offloads; struct enetc_bdr *tx_ring[16]; struct enetc_bdr *rx_ring[16]; @@ -200,6 +209,9 @@ struct enetc_msg_cmd_set_primary_mac { #define ENETC_CBDR_TIMEOUT 1000 /* usecs */ +/* PTP driver exports */ +extern int enetc_phc_index; + /* SI common */ int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv); void enetc_pci_remove(struct pci_dev *pdev); @@ -216,6 +228,10 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev); struct net_device_stats *enetc_get_stats(struct net_device *ndev); int enetc_set_features(struct net_device *ndev, netdev_features_t features); +int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd); +int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, + void *type_data); + /* ethtool */ void enetc_set_ethtool_ops(struct net_device *ndev); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index b9519b6ad727..fcb52efec075 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -555,6 +555,35 @@ static void enetc_get_ringparam(struct net_device *ndev, } } +static int enetc_get_ts_info(struct net_device *ndev, + struct ethtool_ts_info *info) +{ + int *phc_idx; + + phc_idx = symbol_get(enetc_phc_index); + if (phc_idx) { + info->phc_index = *phc_idx; + symbol_put(enetc_phc_index); + } else { + info->phc_index = -1; + } + +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | + (1 << HWTSTAMP_FILTER_ALL); +#else + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; +#endif + return 0; +} + static const struct ethtool_ops enetc_pf_ethtool_ops = { .get_regs_len = enetc_get_reglen, .get_regs = enetc_get_regs, @@ -571,6 +600,7 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = { .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_link = ethtool_op_get_link, + .get_ts_info = enetc_get_ts_info, }; static const struct ethtool_ops enetc_vf_ethtool_ops = { @@ -586,6 +616,7 @@ static const struct ethtool_ops enetc_vf_ethtool_ops = { .set_rxfh = enetc_set_rxfh, .get_ringparam = enetc_get_ringparam, .get_link = ethtool_op_get_link, + .get_ts_info = enetc_get_ts_info, }; void enetc_set_ethtool_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index df8eb8882d92..88276299f447 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -127,7 +127,7 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_TBSR_BUSY BIT(0) #define ENETC_TBMR_VIH BIT(9) #define ENETC_TBMR_PRIO_MASK GENMASK(2, 0) -#define ENETC_TBMR_PRIO_SET(val) val +#define ENETC_TBMR_SET_PRIO(val) ((val) & ENETC_TBMR_PRIO_MASK) #define ENETC_TBMR_EN BIT(31) #define ENETC_TBSR 0x4 #define ENETC_TBBAR0 0x10 @@ -361,6 +361,12 @@ union enetc_tx_bd { u8 e_flags; u8 flags; } ext; /* Tx BD extension */ + struct { + __le32 tstamp; + u8 reserved[10]; + u8 status; + u8 flags; + } wb; /* writeback descriptor */ }; #define ENETC_TXBD_FLAGS_L4CS BIT(0) @@ -399,6 +405,9 @@ union enetc_rx_bd { struct { __le64 addr; u8 reserved[8]; +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING + u8 reserved1[16]; +#endif } w; struct { __le16 inet_csum; @@ -413,6 +422,10 @@ union enetc_rx_bd { }; __le32 lstatus; }; +#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING + __le32 tstamp; + u8 reserved[12]; +#endif } r; }; @@ -531,3 +544,13 @@ static inline void enetc_enable_txvlan(struct enetc_hw *hw, int si_idx, val = (val & ~ENETC_TBMR_VIH) | (en ? ENETC_TBMR_VIH : 0); enetc_txbdr_wr(hw, si_idx, ENETC_TBMR, val); } + +static inline void enetc_set_bdr_prio(struct enetc_hw *hw, int bdr_idx, + int prio) +{ + u32 val = enetc_txbdr_rd(hw, bdr_idx, ENETC_TBMR); + + val &= ~ENETC_TBMR_PRIO_MASK; + val |= ENETC_TBMR_SET_PRIO(prio); + enetc_txbdr_wr(hw, bdr_idx, ENETC_TBMR, val); +} diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 78287c517095..258b3cb38a6f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -702,6 +702,8 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_set_vf_vlan = enetc_pf_set_vf_vlan, .ndo_set_vf_spoofchk = enetc_pf_set_vf_spoofchk, .ndo_set_features = enetc_pf_set_features, + .ndo_do_ioctl = enetc_ioctl, + .ndo_setup_tc = enetc_setup_tc, }; static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c index 8c1497e7d9c5..2fd2586e42bf 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -7,6 +7,9 @@ #include "enetc.h" +int enetc_phc_index = -1; +EXPORT_SYMBOL(enetc_phc_index); + static struct ptp_clock_info enetc_ptp_caps = { .owner = THIS_MODULE, .name = "ENETC PTP clock", @@ -96,6 +99,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev, if (err) goto err_no_clock; + enetc_phc_index = ptp_qoriq->phc_index; pci_set_drvdata(pdev, ptp_qoriq); return 0; @@ -119,6 +123,7 @@ static void enetc_ptp_remove(struct pci_dev *pdev) { struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev); + enetc_phc_index = -1; ptp_qoriq_free(ptp_qoriq); kfree(ptp_qoriq); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 72c3ea887bcf..ebd21bf4cfa1 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -111,6 +111,8 @@ static const struct net_device_ops enetc_ndev_ops = { .ndo_get_stats = enetc_get_stats, .ndo_set_mac_address = enetc_vf_set_mac_addr, .ndo_set_features = enetc_vf_set_features, + .ndo_do_ioctl = enetc_ioctl, + .ndo_setup_tc = enetc_setup_tc, }; static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 38f10f7dcbc3..9d459ccf251d 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1689,10 +1689,10 @@ static void fec_get_mac(struct net_device *ndev) */ if (!is_valid_ether_addr(iap)) { /* Report it and use a random ethernet address instead */ - netdev_err(ndev, "Invalid MAC address: %pM\n", iap); + dev_err(&fep->pdev->dev, "Invalid MAC address: %pM\n", iap); eth_hw_addr_random(ndev); - netdev_info(ndev, "Using random MAC address: %pM\n", - ndev->dev_addr); + dev_info(&fep->pdev->dev, "Using random MAC address: %pM\n", + ndev->dev_addr); return; } @@ -2446,30 +2446,31 @@ static int fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) { struct fec_enet_private *fep = netdev_priv(ndev); + struct device *dev = &fep->pdev->dev; unsigned int cycle; if (!(fep->quirks & FEC_QUIRK_HAS_COALESCE)) return -EOPNOTSUPP; if (ec->rx_max_coalesced_frames > 255) { - pr_err("Rx coalesced frames exceed hardware limitation\n"); + dev_err(dev, "Rx coalesced frames exceed hardware limitation\n"); return -EINVAL; } if (ec->tx_max_coalesced_frames > 255) { - pr_err("Tx coalesced frame exceed hardware limitation\n"); + dev_err(dev, "Tx coalesced frame exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesced usec exceed hardware limitation\n"); + dev_err(dev, "Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); if (cycle > 0xFFFF) { - pr_err("Rx coalesced usec exceed hardware limitation\n"); + dev_err(dev, "Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } @@ -3473,7 +3474,6 @@ fec_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to enable phy regulator: %d\n", ret); - clk_disable_unprepare(fep->clk_ipg); goto failed_regulator; } } else { diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 7e892b1cbd3d..19e2365be7d8 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -617,7 +617,7 @@ void fec_ptp_init(struct platform_device *pdev, int irq_idx) fep->ptp_clock = ptp_clock_register(&fep->ptp_caps, &pdev->dev); if (IS_ERR(fep->ptp_clock)) { fep->ptp_clock = NULL; - pr_err("ptp_clock_register failed\n"); + dev_err(&pdev->dev, "ptp_clock_register failed\n"); } schedule_delayed_work(&fep->time_keep, HZ); diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.c b/drivers/net/ethernet/freescale/fman/fman_keygen.c index f54da3c684d0..e1bdfed16134 100644 --- a/drivers/net/ethernet/freescale/fman/fman_keygen.c +++ b/drivers/net/ethernet/freescale/fman/fman_keygen.c @@ -144,7 +144,8 @@ /* Hash Key extraction fields: */ #define DEFAULT_HASH_KEY_EXTRACT_FIELDS \ (KG_SCH_KN_IPSRC1 | KG_SCH_KN_IPDST1 | \ - KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST) + KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST | \ + KG_SCH_KN_IPSEC_SPI) /* Default values to be used as hash key in case IPv4 or L4 (TCP, UDP) * don't exist in the frame diff --git a/drivers/net/ethernet/google/Kconfig b/drivers/net/ethernet/google/Kconfig new file mode 100644 index 000000000000..b8f04d052fda --- /dev/null +++ b/drivers/net/ethernet/google/Kconfig @@ -0,0 +1,27 @@ +# +# Google network device configuration +# + +config NET_VENDOR_GOOGLE + bool "Google Devices" + default y + help + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Google devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_GOOGLE + +config GVE + tristate "Google Virtual NIC (gVNIC) support" + depends on PCI_MSI + help + This driver supports Google Virtual NIC (gVNIC)" + + To compile this driver as a module, choose M here. + The module will be called gve. + +endif #NET_VENDOR_GOOGLE diff --git a/drivers/net/ethernet/google/Makefile b/drivers/net/ethernet/google/Makefile new file mode 100644 index 000000000000..402cc3ba1639 --- /dev/null +++ b/drivers/net/ethernet/google/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Google network device drivers. +# + +obj-$(CONFIG_GVE) += gve/ diff --git a/drivers/net/ethernet/google/gve/Makefile b/drivers/net/ethernet/google/gve/Makefile new file mode 100644 index 000000000000..3354ce40eb97 --- /dev/null +++ b/drivers/net/ethernet/google/gve/Makefile @@ -0,0 +1,4 @@ +# Makefile for the Google virtual Ethernet (gve) driver + +obj-$(CONFIG_GVE) += gve.o +gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h new file mode 100644 index 000000000000..92372dc43be8 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve.h @@ -0,0 +1,459 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) + * Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#ifndef _GVE_H_ +#define _GVE_H_ + +#include <linux/dma-mapping.h> +#include <linux/netdevice.h> +#include <linux/pci.h> +#include <linux/u64_stats_sync.h> +#include "gve_desc.h" + +#ifndef PCI_VENDOR_ID_GOOGLE +#define PCI_VENDOR_ID_GOOGLE 0x1ae0 +#endif + +#define PCI_DEV_ID_GVNIC 0x0042 + +#define GVE_REGISTER_BAR 0 +#define GVE_DOORBELL_BAR 2 + +/* Driver can alloc up to 2 segments for the header and 2 for the payload. */ +#define GVE_TX_MAX_IOVEC 4 +/* 1 for management, 1 for rx, 1 for tx */ +#define GVE_MIN_MSIX 3 + +/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ +struct gve_rx_desc_queue { + struct gve_rx_desc *desc_ring; /* the descriptor ring */ + dma_addr_t bus; /* the bus for the desc_ring */ + u32 cnt; /* free-running total number of completed packets */ + u32 fill_cnt; /* free-running total number of descriptors posted */ + u32 mask; /* masks the cnt to the size of the ring */ + u8 seqno; /* the next expected seqno for this desc*/ +}; + +/* The page info for a single slot in the RX data queue */ +struct gve_rx_slot_page_info { + struct page *page; + void *page_address; + u32 page_offset; /* offset to write to in page */ +}; + +/* A list of pages registered with the device during setup and used by a queue + * as buffers + */ +struct gve_queue_page_list { + u32 id; /* unique id */ + u32 num_entries; + struct page **pages; /* list of num_entries pages */ + dma_addr_t *page_buses; /* the dma addrs of the pages */ +}; + +/* Each slot in the data ring has a 1:1 mapping to a slot in the desc ring */ +struct gve_rx_data_queue { + struct gve_rx_data_slot *data_ring; /* read by NIC */ + dma_addr_t data_bus; /* dma mapping of the slots */ + struct gve_rx_slot_page_info *page_info; /* page info of the buffers */ + struct gve_queue_page_list *qpl; /* qpl assigned to this queue */ + u32 mask; /* masks the cnt to the size of the ring */ + u32 cnt; /* free-running total number of completed packets */ +}; + +struct gve_priv; + +/* An RX ring that contains a power-of-two sized desc and data ring. */ +struct gve_rx_ring { + struct gve_priv *gve; + struct gve_rx_desc_queue desc; + struct gve_rx_data_queue data; + u64 rbytes; /* free-running bytes received */ + u64 rpackets; /* free-running packets received */ + u32 q_num; /* queue index */ + u32 ntfy_id; /* notification block index */ + struct gve_queue_resources *q_resources; /* head and tail pointer idx */ + dma_addr_t q_resources_bus; /* dma address for the queue resources */ + struct u64_stats_sync statss; /* sync stats for 32bit archs */ +}; + +/* A TX desc ring entry */ +union gve_tx_desc { + struct gve_tx_pkt_desc pkt; /* first desc for a packet */ + struct gve_tx_seg_desc seg; /* subsequent descs for a packet */ +}; + +/* Tracks the memory in the fifo occupied by a segment of a packet */ +struct gve_tx_iovec { + u32 iov_offset; /* offset into this segment */ + u32 iov_len; /* length */ + u32 iov_padding; /* padding associated with this segment */ +}; + +/* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc + * ring entry but only used for a pkt_desc not a seg_desc + */ +struct gve_tx_buffer_state { + struct sk_buff *skb; /* skb for this pkt */ + struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */ +}; + +/* A TX buffer - each queue has one */ +struct gve_tx_fifo { + void *base; /* address of base of FIFO */ + u32 size; /* total size */ + atomic_t available; /* how much space is still available */ + u32 head; /* offset to write at */ + struct gve_queue_page_list *qpl; /* QPL mapped into this FIFO */ +}; + +/* A TX ring that contains a power-of-two sized desc ring and a FIFO buffer */ +struct gve_tx_ring { + /* Cacheline 0 -- Accessed & dirtied during transmit */ + struct gve_tx_fifo tx_fifo; + u32 req; /* driver tracked head pointer */ + u32 done; /* driver tracked tail pointer */ + + /* Cacheline 1 -- Accessed & dirtied during gve_clean_tx_done */ + __be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */ + u64 pkt_done; /* free-running - total packets completed */ + u64 bytes_done; /* free-running - total bytes completed */ + + /* Cacheline 2 -- Read-mostly fields */ + union gve_tx_desc *desc ____cacheline_aligned; + struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */ + struct netdev_queue *netdev_txq; + struct gve_queue_resources *q_resources; /* head and tail pointer idx */ + u32 mask; /* masks req and done down to queue size */ + + /* Slow-path fields */ + u32 q_num ____cacheline_aligned; /* queue idx */ + u32 stop_queue; /* count of queue stops */ + u32 wake_queue; /* count of queue wakes */ + u32 ntfy_id; /* notification block index */ + dma_addr_t bus; /* dma address of the descr ring */ + dma_addr_t q_resources_bus; /* dma address of the queue resources */ + struct u64_stats_sync statss; /* sync stats for 32bit archs */ +} ____cacheline_aligned; + +/* Wraps the info for one irq including the napi struct and the queues + * associated with that irq. + */ +struct gve_notify_block { + __be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */ + char name[IFNAMSIZ + 16]; /* name registered with the kernel */ + struct napi_struct napi; /* kernel napi struct for this block */ + struct gve_priv *priv; + struct gve_tx_ring *tx; /* tx rings on this block */ + struct gve_rx_ring *rx; /* rx rings on this block */ +} ____cacheline_aligned; + +/* Tracks allowed and current queue settings */ +struct gve_queue_config { + u16 max_queues; + u16 num_queues; /* current */ +}; + +/* Tracks the available and used qpl IDs */ +struct gve_qpl_config { + u32 qpl_map_size; /* map memory size */ + unsigned long *qpl_id_map; /* bitmap of used qpl ids */ +}; + +struct gve_priv { + struct net_device *dev; + struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ + struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ + struct gve_queue_page_list *qpls; /* array of num qpls */ + struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ + dma_addr_t ntfy_block_bus; + struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */ + char mgmt_msix_name[IFNAMSIZ + 16]; + u32 mgmt_msix_idx; + __be32 *counter_array; /* array of num_event_counters */ + dma_addr_t counter_array_bus; + + u16 num_event_counters; + u16 tx_desc_cnt; /* num desc per ring */ + u16 rx_desc_cnt; /* num desc per ring */ + u16 tx_pages_per_qpl; /* tx buffer length */ + u16 rx_pages_per_qpl; /* rx buffer length */ + u64 max_registered_pages; + u64 num_registered_pages; /* num pages registered with NIC */ + u32 rx_copybreak; /* copy packets smaller than this */ + u16 default_num_queues; /* default num queues to set up */ + + struct gve_queue_config tx_cfg; + struct gve_queue_config rx_cfg; + struct gve_qpl_config qpl_cfg; /* map used QPL ids */ + u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ + + struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ + __be32 __iomem *db_bar2; /* "array" of doorbells */ + u32 msg_enable; /* level for netif* netdev print macros */ + struct pci_dev *pdev; + + /* metrics */ + u32 tx_timeo_cnt; + + /* Admin queue - see gve_adminq.h*/ + union gve_adminq_command *adminq; + dma_addr_t adminq_bus_addr; + u32 adminq_mask; /* masks prod_cnt to adminq size */ + u32 adminq_prod_cnt; /* free-running count of AQ cmds executed */ + + struct workqueue_struct *gve_wq; + struct work_struct service_task; + unsigned long service_task_flags; + unsigned long state_flags; +}; + +enum gve_service_task_flags { + GVE_PRIV_FLAGS_DO_RESET = BIT(1), + GVE_PRIV_FLAGS_RESET_IN_PROGRESS = BIT(2), + GVE_PRIV_FLAGS_PROBE_IN_PROGRESS = BIT(3), +}; + +enum gve_state_flags { + GVE_PRIV_FLAGS_ADMIN_QUEUE_OK = BIT(1), + GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK = BIT(2), + GVE_PRIV_FLAGS_DEVICE_RINGS_OK = BIT(3), + GVE_PRIV_FLAGS_NAPI_ENABLED = BIT(4), +}; + +static inline bool gve_get_do_reset(struct gve_priv *priv) +{ + return test_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags); +} + +static inline void gve_set_do_reset(struct gve_priv *priv) +{ + set_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags); +} + +static inline void gve_clear_do_reset(struct gve_priv *priv) +{ + clear_bit(GVE_PRIV_FLAGS_DO_RESET, &priv->service_task_flags); +} + +static inline bool gve_get_reset_in_progress(struct gve_priv *priv) +{ + return test_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, + &priv->service_task_flags); +} + +static inline void gve_set_reset_in_progress(struct gve_priv *priv) +{ + set_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags); +} + +static inline void gve_clear_reset_in_progress(struct gve_priv *priv) +{ + clear_bit(GVE_PRIV_FLAGS_RESET_IN_PROGRESS, &priv->service_task_flags); +} + +static inline bool gve_get_probe_in_progress(struct gve_priv *priv) +{ + return test_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, + &priv->service_task_flags); +} + +static inline void gve_set_probe_in_progress(struct gve_priv *priv) +{ + set_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags); +} + +static inline void gve_clear_probe_in_progress(struct gve_priv *priv) +{ + clear_bit(GVE_PRIV_FLAGS_PROBE_IN_PROGRESS, &priv->service_task_flags); +} + +static inline bool gve_get_admin_queue_ok(struct gve_priv *priv) +{ + return test_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags); +} + +static inline void gve_set_admin_queue_ok(struct gve_priv *priv) +{ + set_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags); +} + +static inline void gve_clear_admin_queue_ok(struct gve_priv *priv) +{ + clear_bit(GVE_PRIV_FLAGS_ADMIN_QUEUE_OK, &priv->state_flags); +} + +static inline bool gve_get_device_resources_ok(struct gve_priv *priv) +{ + return test_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags); +} + +static inline void gve_set_device_resources_ok(struct gve_priv *priv) +{ + set_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags); +} + +static inline void gve_clear_device_resources_ok(struct gve_priv *priv) +{ + clear_bit(GVE_PRIV_FLAGS_DEVICE_RESOURCES_OK, &priv->state_flags); +} + +static inline bool gve_get_device_rings_ok(struct gve_priv *priv) +{ + return test_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags); +} + +static inline void gve_set_device_rings_ok(struct gve_priv *priv) +{ + set_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags); +} + +static inline void gve_clear_device_rings_ok(struct gve_priv *priv) +{ + clear_bit(GVE_PRIV_FLAGS_DEVICE_RINGS_OK, &priv->state_flags); +} + +static inline bool gve_get_napi_enabled(struct gve_priv *priv) +{ + return test_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags); +} + +static inline void gve_set_napi_enabled(struct gve_priv *priv) +{ + set_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags); +} + +static inline void gve_clear_napi_enabled(struct gve_priv *priv) +{ + clear_bit(GVE_PRIV_FLAGS_NAPI_ENABLED, &priv->state_flags); +} + +/* Returns the address of the ntfy_blocks irq doorbell + */ +static inline __be32 __iomem *gve_irq_doorbell(struct gve_priv *priv, + struct gve_notify_block *block) +{ + return &priv->db_bar2[be32_to_cpu(block->irq_db_index)]; +} + +/* Returns the index into ntfy_blocks of the given tx ring's block + */ +static inline u32 gve_tx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx) +{ + return queue_idx; +} + +/* Returns the index into ntfy_blocks of the given rx ring's block + */ +static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx) +{ + return (priv->num_ntfy_blks / 2) + queue_idx; +} + +/* Returns the number of tx queue page lists + */ +static inline u32 gve_num_tx_qpls(struct gve_priv *priv) +{ + return priv->tx_cfg.num_queues; +} + +/* Returns the number of rx queue page lists + */ +static inline u32 gve_num_rx_qpls(struct gve_priv *priv) +{ + return priv->rx_cfg.num_queues; +} + +/* Returns a pointer to the next available tx qpl in the list of qpls + */ +static inline +struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv) +{ + int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map, + priv->qpl_cfg.qpl_map_size); + + /* we are out of tx qpls */ + if (id >= gve_num_tx_qpls(priv)) + return NULL; + + set_bit(id, priv->qpl_cfg.qpl_id_map); + return &priv->qpls[id]; +} + +/* Returns a pointer to the next available rx qpl in the list of qpls + */ +static inline +struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) +{ + int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map, + priv->qpl_cfg.qpl_map_size, + gve_num_tx_qpls(priv)); + + /* we are out of rx qpls */ + if (id == priv->qpl_cfg.qpl_map_size) + return NULL; + + set_bit(id, priv->qpl_cfg.qpl_id_map); + return &priv->qpls[id]; +} + +/* Unassigns the qpl with the given id + */ +static inline void gve_unassign_qpl(struct gve_priv *priv, int id) +{ + clear_bit(id, priv->qpl_cfg.qpl_id_map); +} + +/* Returns the correct dma direction for tx and rx qpls + */ +static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, + int id) +{ + if (id < gve_num_tx_qpls(priv)) + return DMA_TO_DEVICE; + else + return DMA_FROM_DEVICE; +} + +/* Returns true if the max mtu allows page recycling */ +static inline bool gve_can_recycle_pages(struct net_device *dev) +{ + /* We can't recycle the pages if we can't fit a packet into half a + * page. + */ + return dev->max_mtu <= PAGE_SIZE / 2; +} + +/* buffers */ +int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma, + enum dma_data_direction); +void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, + enum dma_data_direction); +/* tx handling */ +netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); +bool gve_tx_poll(struct gve_notify_block *block, int budget); +int gve_tx_alloc_rings(struct gve_priv *priv); +void gve_tx_free_rings(struct gve_priv *priv); +__be32 gve_tx_load_event_counter(struct gve_priv *priv, + struct gve_tx_ring *tx); +/* rx handling */ +void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); +bool gve_rx_poll(struct gve_notify_block *block, int budget); +int gve_rx_alloc_rings(struct gve_priv *priv); +void gve_rx_free_rings(struct gve_priv *priv); +bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, + netdev_features_t feat); +/* Reset */ +void gve_schedule_reset(struct gve_priv *priv); +int gve_reset(struct gve_priv *priv, bool attempt_teardown); +int gve_adjust_queues(struct gve_priv *priv, + struct gve_queue_config new_rx_config, + struct gve_queue_config new_tx_config); +/* exported by ethtool.c */ +extern const struct ethtool_ops gve_ethtool_ops; +/* needed by ethtool */ +extern const char gve_version_str[]; +#endif /* _GVE_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c new file mode 100644 index 000000000000..c3ba7baf0107 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#include <linux/etherdevice.h> +#include <linux/pci.h> +#include "gve.h" +#include "gve_adminq.h" +#include "gve_register.h" + +#define GVE_MAX_ADMINQ_RELEASE_CHECK 500 +#define GVE_ADMINQ_SLEEP_LEN 20 +#define GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK 100 + +int gve_adminq_alloc(struct device *dev, struct gve_priv *priv) +{ + priv->adminq = dma_alloc_coherent(dev, PAGE_SIZE, + &priv->adminq_bus_addr, GFP_KERNEL); + if (unlikely(!priv->adminq)) + return -ENOMEM; + + priv->adminq_mask = (PAGE_SIZE / sizeof(union gve_adminq_command)) - 1; + priv->adminq_prod_cnt = 0; + + /* Setup Admin queue with the device */ + iowrite32be(priv->adminq_bus_addr / PAGE_SIZE, + &priv->reg_bar0->adminq_pfn); + + gve_set_admin_queue_ok(priv); + return 0; +} + +void gve_adminq_release(struct gve_priv *priv) +{ + int i = 0; + + /* Tell the device the adminq is leaving */ + iowrite32be(0x0, &priv->reg_bar0->adminq_pfn); + while (ioread32be(&priv->reg_bar0->adminq_pfn)) { + /* If this is reached the device is unrecoverable and still + * holding memory. Continue looping to avoid memory corruption, + * but WARN so it is visible what is going on. + */ + if (i == GVE_MAX_ADMINQ_RELEASE_CHECK) + WARN(1, "Unrecoverable platform error!"); + i++; + msleep(GVE_ADMINQ_SLEEP_LEN); + } + gve_clear_device_rings_ok(priv); + gve_clear_device_resources_ok(priv); + gve_clear_admin_queue_ok(priv); +} + +void gve_adminq_free(struct device *dev, struct gve_priv *priv) +{ + if (!gve_get_admin_queue_ok(priv)) + return; + gve_adminq_release(priv); + dma_free_coherent(dev, PAGE_SIZE, priv->adminq, priv->adminq_bus_addr); + gve_clear_admin_queue_ok(priv); +} + +static void gve_adminq_kick_cmd(struct gve_priv *priv, u32 prod_cnt) +{ + iowrite32be(prod_cnt, &priv->reg_bar0->adminq_doorbell); +} + +static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt) +{ + int i; + + for (i = 0; i < GVE_MAX_ADMINQ_EVENT_COUNTER_CHECK; i++) { + if (ioread32be(&priv->reg_bar0->adminq_event_counter) + == prod_cnt) + return true; + msleep(GVE_ADMINQ_SLEEP_LEN); + } + + return false; +} + +static int gve_adminq_parse_err(struct device *dev, u32 status) +{ + if (status != GVE_ADMINQ_COMMAND_PASSED && + status != GVE_ADMINQ_COMMAND_UNSET) + dev_err(dev, "AQ command failed with status %d\n", status); + + switch (status) { + case GVE_ADMINQ_COMMAND_PASSED: + return 0; + case GVE_ADMINQ_COMMAND_UNSET: + dev_err(dev, "parse_aq_err: err and status both unset, this should not be possible.\n"); + return -EINVAL; + case GVE_ADMINQ_COMMAND_ERROR_ABORTED: + case GVE_ADMINQ_COMMAND_ERROR_CANCELLED: + case GVE_ADMINQ_COMMAND_ERROR_DATALOSS: + case GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION: + case GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE: + return -EAGAIN; + case GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS: + case GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR: + case GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT: + case GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND: + case GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE: + case GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR: + return -EINVAL; + case GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED: + return -ETIME; + case GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED: + case GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED: + return -EACCES; + case GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED: + return -ENOMEM; + case GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED: + return -ENOTSUPP; + default: + dev_err(dev, "parse_aq_err: unknown status code %d\n", status); + return -EINVAL; + } +} + +/* This function is not threadsafe - the caller is responsible for any + * necessary locks. + */ +int gve_adminq_execute_cmd(struct gve_priv *priv, + union gve_adminq_command *cmd_orig) +{ + union gve_adminq_command *cmd; + u32 status = 0; + u32 prod_cnt; + + cmd = &priv->adminq[priv->adminq_prod_cnt & priv->adminq_mask]; + priv->adminq_prod_cnt++; + prod_cnt = priv->adminq_prod_cnt; + + memcpy(cmd, cmd_orig, sizeof(*cmd_orig)); + + gve_adminq_kick_cmd(priv, prod_cnt); + if (!gve_adminq_wait_for_cmd(priv, prod_cnt)) { + dev_err(&priv->pdev->dev, "AQ command timed out, need to reset AQ\n"); + return -ENOTRECOVERABLE; + } + + memcpy(cmd_orig, cmd, sizeof(*cmd)); + status = be32_to_cpu(READ_ONCE(cmd->status)); + return gve_adminq_parse_err(&priv->pdev->dev, status); +} + +/* The device specifies that the management vector can either be the first irq + * or the last irq. ntfy_blk_msix_base_idx indicates the first irq assigned to + * the ntfy blks. It if is 0 then the management vector is last, if it is 1 then + * the management vector is first. + * + * gve arranges the msix vectors so that the management vector is last. + */ +#define GVE_NTFY_BLK_BASE_MSIX_IDX 0 +int gve_adminq_configure_device_resources(struct gve_priv *priv, + dma_addr_t counter_array_bus_addr, + u32 num_counters, + dma_addr_t db_array_bus_addr, + u32 num_ntfy_blks) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES); + cmd.configure_device_resources = + (struct gve_adminq_configure_device_resources) { + .counter_array = cpu_to_be64(counter_array_bus_addr), + .num_counters = cpu_to_be32(num_counters), + .irq_db_addr = cpu_to_be64(db_array_bus_addr), + .num_irq_dbs = cpu_to_be32(num_ntfy_blks), + .irq_db_stride = cpu_to_be32(sizeof(priv->ntfy_blocks[0])), + .ntfy_blk_msix_base_idx = + cpu_to_be32(GVE_NTFY_BLK_BASE_MSIX_IDX), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + +int gve_adminq_deconfigure_device_resources(struct gve_priv *priv) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES); + + return gve_adminq_execute_cmd(priv, &cmd); +} + +int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) +{ + struct gve_tx_ring *tx = &priv->tx[queue_index]; + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE); + cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) { + .queue_id = cpu_to_be32(queue_index), + .reserved = 0, + .queue_resources_addr = cpu_to_be64(tx->q_resources_bus), + .tx_ring_addr = cpu_to_be64(tx->bus), + .queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id), + .ntfy_id = cpu_to_be32(tx->ntfy_id), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + +int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + struct gve_rx_ring *rx = &priv->rx[queue_index]; + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); + cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) { + .queue_id = cpu_to_be32(queue_index), + .index = cpu_to_be32(queue_index), + .reserved = 0, + .ntfy_id = cpu_to_be32(rx->ntfy_id), + .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), + .rx_desc_ring_addr = cpu_to_be64(rx->desc.bus), + .rx_data_ring_addr = cpu_to_be64(rx->data.data_bus), + .queue_page_list_id = cpu_to_be32(rx->data.qpl->id), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + +int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE); + cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) { + .queue_id = cpu_to_be32(queue_index), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + +int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); + cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { + .queue_id = cpu_to_be32(queue_index), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + +int gve_adminq_describe_device(struct gve_priv *priv) +{ + struct gve_device_descriptor *descriptor; + union gve_adminq_command cmd; + dma_addr_t descriptor_bus; + int err = 0; + u8 *mac; + u16 mtu; + + memset(&cmd, 0, sizeof(cmd)); + descriptor = dma_alloc_coherent(&priv->pdev->dev, PAGE_SIZE, + &descriptor_bus, GFP_KERNEL); + if (!descriptor) + return -ENOMEM; + cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESCRIBE_DEVICE); + cmd.describe_device.device_descriptor_addr = + cpu_to_be64(descriptor_bus); + cmd.describe_device.device_descriptor_version = + cpu_to_be32(GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION); + cmd.describe_device.available_length = cpu_to_be32(PAGE_SIZE); + + err = gve_adminq_execute_cmd(priv, &cmd); + if (err) + goto free_device_descriptor; + + priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); + if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) { + netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n", + priv->tx_desc_cnt); + err = -EINVAL; + goto free_device_descriptor; + } + priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); + if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0]) + < PAGE_SIZE || + priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0]) + < PAGE_SIZE) { + netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n", + priv->rx_desc_cnt); + err = -EINVAL; + goto free_device_descriptor; + } + priv->max_registered_pages = + be64_to_cpu(descriptor->max_registered_pages); + mtu = be16_to_cpu(descriptor->mtu); + if (mtu < ETH_MIN_MTU) { + netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n", + mtu); + err = -EINVAL; + goto free_device_descriptor; + } + priv->dev->max_mtu = mtu; + priv->num_event_counters = be16_to_cpu(descriptor->counters); + ether_addr_copy(priv->dev->dev_addr, descriptor->mac); + mac = descriptor->mac; + netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac); + priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl); + priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl); + if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) { + netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n", + priv->rx_pages_per_qpl); + priv->rx_desc_cnt = priv->rx_pages_per_qpl; + } + priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); + +free_device_descriptor: + dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor, + descriptor_bus); + return err; +} + +int gve_adminq_register_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl) +{ + struct device *hdev = &priv->pdev->dev; + u32 num_entries = qpl->num_entries; + u32 size = num_entries * sizeof(qpl->page_buses[0]); + union gve_adminq_command cmd; + dma_addr_t page_list_bus; + __be64 *page_list; + int err; + int i; + + memset(&cmd, 0, sizeof(cmd)); + page_list = dma_alloc_coherent(hdev, size, &page_list_bus, GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + for (i = 0; i < num_entries; i++) + page_list[i] = cpu_to_be64(qpl->page_buses[i]); + + cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST); + cmd.reg_page_list = (struct gve_adminq_register_page_list) { + .page_list_id = cpu_to_be32(qpl->id), + .num_pages = cpu_to_be32(num_entries), + .page_address_list_addr = cpu_to_be64(page_list_bus), + }; + + err = gve_adminq_execute_cmd(priv, &cmd); + dma_free_coherent(hdev, size, page_list, page_list_bus); + return err; +} + +int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST); + cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) { + .page_list_id = cpu_to_be32(page_list_id), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} + +int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu) +{ + union gve_adminq_command cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER); + cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) { + .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU), + .parameter_value = cpu_to_be64(mtu), + }; + + return gve_adminq_execute_cmd(priv, &cmd); +} diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h new file mode 100644 index 000000000000..4dfa06edc0f8 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) + * Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#ifndef _GVE_ADMINQ_H +#define _GVE_ADMINQ_H + +#include <linux/build_bug.h> + +/* Admin queue opcodes */ +enum gve_adminq_opcodes { + GVE_ADMINQ_DESCRIBE_DEVICE = 0x1, + GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES = 0x2, + GVE_ADMINQ_REGISTER_PAGE_LIST = 0x3, + GVE_ADMINQ_UNREGISTER_PAGE_LIST = 0x4, + GVE_ADMINQ_CREATE_TX_QUEUE = 0x5, + GVE_ADMINQ_CREATE_RX_QUEUE = 0x6, + GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7, + GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8, + GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9, + GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB, +}; + +/* Admin queue status codes */ +enum gve_adminq_statuses { + GVE_ADMINQ_COMMAND_UNSET = 0x0, + GVE_ADMINQ_COMMAND_PASSED = 0x1, + GVE_ADMINQ_COMMAND_ERROR_ABORTED = 0xFFFFFFF0, + GVE_ADMINQ_COMMAND_ERROR_ALREADY_EXISTS = 0xFFFFFFF1, + GVE_ADMINQ_COMMAND_ERROR_CANCELLED = 0xFFFFFFF2, + GVE_ADMINQ_COMMAND_ERROR_DATALOSS = 0xFFFFFFF3, + GVE_ADMINQ_COMMAND_ERROR_DEADLINE_EXCEEDED = 0xFFFFFFF4, + GVE_ADMINQ_COMMAND_ERROR_FAILED_PRECONDITION = 0xFFFFFFF5, + GVE_ADMINQ_COMMAND_ERROR_INTERNAL_ERROR = 0xFFFFFFF6, + GVE_ADMINQ_COMMAND_ERROR_INVALID_ARGUMENT = 0xFFFFFFF7, + GVE_ADMINQ_COMMAND_ERROR_NOT_FOUND = 0xFFFFFFF8, + GVE_ADMINQ_COMMAND_ERROR_OUT_OF_RANGE = 0xFFFFFFF9, + GVE_ADMINQ_COMMAND_ERROR_PERMISSION_DENIED = 0xFFFFFFFA, + GVE_ADMINQ_COMMAND_ERROR_UNAUTHENTICATED = 0xFFFFFFFB, + GVE_ADMINQ_COMMAND_ERROR_RESOURCE_EXHAUSTED = 0xFFFFFFFC, + GVE_ADMINQ_COMMAND_ERROR_UNAVAILABLE = 0xFFFFFFFD, + GVE_ADMINQ_COMMAND_ERROR_UNIMPLEMENTED = 0xFFFFFFFE, + GVE_ADMINQ_COMMAND_ERROR_UNKNOWN_ERROR = 0xFFFFFFFF, +}; + +#define GVE_ADMINQ_DEVICE_DESCRIPTOR_VERSION 1 + +/* All AdminQ command structs should be naturally packed. The static_assert + * calls make sure this is the case at compile time. + */ + +struct gve_adminq_describe_device { + __be64 device_descriptor_addr; + __be32 device_descriptor_version; + __be32 available_length; +}; + +static_assert(sizeof(struct gve_adminq_describe_device) == 16); + +struct gve_device_descriptor { + __be64 max_registered_pages; + __be16 reserved1; + __be16 tx_queue_entries; + __be16 rx_queue_entries; + __be16 default_num_queues; + __be16 mtu; + __be16 counters; + __be16 tx_pages_per_qpl; + __be16 rx_pages_per_qpl; + u8 mac[ETH_ALEN]; + __be16 num_device_options; + __be16 total_length; + u8 reserved2[6]; +}; + +static_assert(sizeof(struct gve_device_descriptor) == 40); + +struct device_option { + __be32 option_id; + __be32 option_length; +}; + +static_assert(sizeof(struct device_option) == 8); + +struct gve_adminq_configure_device_resources { + __be64 counter_array; + __be64 irq_db_addr; + __be32 num_counters; + __be32 num_irq_dbs; + __be32 irq_db_stride; + __be32 ntfy_blk_msix_base_idx; +}; + +static_assert(sizeof(struct gve_adminq_configure_device_resources) == 32); + +struct gve_adminq_register_page_list { + __be32 page_list_id; + __be32 num_pages; + __be64 page_address_list_addr; +}; + +static_assert(sizeof(struct gve_adminq_register_page_list) == 16); + +struct gve_adminq_unregister_page_list { + __be32 page_list_id; +}; + +static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4); + +struct gve_adminq_create_tx_queue { + __be32 queue_id; + __be32 reserved; + __be64 queue_resources_addr; + __be64 tx_ring_addr; + __be32 queue_page_list_id; + __be32 ntfy_id; +}; + +static_assert(sizeof(struct gve_adminq_create_tx_queue) == 32); + +struct gve_adminq_create_rx_queue { + __be32 queue_id; + __be32 index; + __be32 reserved; + __be32 ntfy_id; + __be64 queue_resources_addr; + __be64 rx_desc_ring_addr; + __be64 rx_data_ring_addr; + __be32 queue_page_list_id; + u8 padding[4]; +}; + +static_assert(sizeof(struct gve_adminq_create_rx_queue) == 48); + +/* Queue resources that are shared with the device */ +struct gve_queue_resources { + union { + struct { + __be32 db_index; /* Device -> Guest */ + __be32 counter_index; /* Device -> Guest */ + }; + u8 reserved[64]; + }; +}; + +static_assert(sizeof(struct gve_queue_resources) == 64); + +struct gve_adminq_destroy_tx_queue { + __be32 queue_id; +}; + +static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4); + +struct gve_adminq_destroy_rx_queue { + __be32 queue_id; +}; + +static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4); + +/* GVE Set Driver Parameter Types */ +enum gve_set_driver_param_types { + GVE_SET_PARAM_MTU = 0x1, +}; + +struct gve_adminq_set_driver_parameter { + __be32 parameter_type; + u8 reserved[4]; + __be64 parameter_value; +}; + +static_assert(sizeof(struct gve_adminq_set_driver_parameter) == 16); + +union gve_adminq_command { + struct { + __be32 opcode; + __be32 status; + union { + struct gve_adminq_configure_device_resources + configure_device_resources; + struct gve_adminq_create_tx_queue create_tx_queue; + struct gve_adminq_create_rx_queue create_rx_queue; + struct gve_adminq_destroy_tx_queue destroy_tx_queue; + struct gve_adminq_destroy_rx_queue destroy_rx_queue; + struct gve_adminq_describe_device describe_device; + struct gve_adminq_register_page_list reg_page_list; + struct gve_adminq_unregister_page_list unreg_page_list; + struct gve_adminq_set_driver_parameter set_driver_param; + }; + }; + u8 reserved[64]; +}; + +static_assert(sizeof(union gve_adminq_command) == 64); + +int gve_adminq_alloc(struct device *dev, struct gve_priv *priv); +void gve_adminq_free(struct device *dev, struct gve_priv *priv); +void gve_adminq_release(struct gve_priv *priv); +int gve_adminq_execute_cmd(struct gve_priv *priv, + union gve_adminq_command *cmd_orig); +int gve_adminq_describe_device(struct gve_priv *priv); +int gve_adminq_configure_device_resources(struct gve_priv *priv, + dma_addr_t counter_array_bus_addr, + u32 num_counters, + dma_addr_t db_array_bus_addr, + u32 num_ntfy_blks); +int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); +int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id); +int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id); +int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id); +int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id); +int gve_adminq_register_page_list(struct gve_priv *priv, + struct gve_queue_page_list *qpl); +int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id); +int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu); +#endif /* _GVE_ADMINQ_H */ diff --git a/drivers/net/ethernet/google/gve/gve_desc.h b/drivers/net/ethernet/google/gve/gve_desc.h new file mode 100644 index 000000000000..54779871d52e --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_desc.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) + * Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +/* GVE Transmit Descriptor formats */ + +#ifndef _GVE_DESC_H_ +#define _GVE_DESC_H_ + +#include <linux/build_bug.h> + +/* A note on seg_addrs + * + * Base addresses encoded in seg_addr are not assumed to be physical + * addresses. The ring format assumes these come from some linear address + * space. This could be physical memory, kernel virtual memory, user virtual + * memory. gVNIC uses lists of registered pages. Each queue is assumed + * to be associated with a single such linear address space to ensure a + * consistent meaning for seg_addrs posted to its rings. + */ + +struct gve_tx_pkt_desc { + u8 type_flags; /* desc type is lower 4 bits, flags upper */ + u8 l4_csum_offset; /* relative offset of L4 csum word */ + u8 l4_hdr_offset; /* Offset of start of L4 headers in packet */ + u8 desc_cnt; /* Total descriptors for this packet */ + __be16 len; /* Total length of this packet (in bytes) */ + __be16 seg_len; /* Length of this descriptor's segment */ + __be64 seg_addr; /* Base address (see note) of this segment */ +} __packed; + +struct gve_tx_seg_desc { + u8 type_flags; /* type is lower 4 bits, flags upper */ + u8 l3_offset; /* TSO: 2 byte units to start of IPH */ + __be16 reserved; + __be16 mss; /* TSO MSS */ + __be16 seg_len; + __be64 seg_addr; +} __packed; + +/* GVE Transmit Descriptor Types */ +#define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */ +#define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */ +#define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */ + +/* GVE Transmit Descriptor Flags for Std Pkts */ +#define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */ +#define GVE_TXF_TSTAMP BIT(2) /* Timestamp required */ + +/* GVE Transmit Descriptor Flags for TSO Segs */ +#define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */ + +/* GVE Receive Packet Descriptor */ +/* The start of an ethernet packet comes 2 bytes into the rx buffer. + * gVNIC adds this padding so that both the DMA and the L3/4 protocol header + * access is aligned. + */ +#define GVE_RX_PAD 2 + +struct gve_rx_desc { + u8 padding[48]; + __be32 rss_hash; /* Receive-side scaling hash (Toeplitz for gVNIC) */ + __be16 mss; + __be16 reserved; /* Reserved to zero */ + u8 hdr_len; /* Header length (L2-L4) including padding */ + u8 hdr_off; /* 64-byte-scaled offset into RX_DATA entry */ + __sum16 csum; /* 1's-complement partial checksum of L3+ bytes */ + __be16 len; /* Length of the received packet */ + __be16 flags_seq; /* Flags [15:3] and sequence number [2:0] (1-7) */ +} __packed; +static_assert(sizeof(struct gve_rx_desc) == 64); + +/* As with the Tx ring format, the qpl_offset entries below are offsets into an + * ordered list of registered pages. + */ +struct gve_rx_data_slot { + /* byte offset into the rx registered segment of this slot */ + __be64 qpl_offset; +}; + +/* GVE Recive Packet Descriptor Seq No */ +#define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7) + +/* GVE Recive Packet Descriptor Flags */ +#define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x))) +#define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */ +#define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */ +#define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */ +#define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */ +#define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */ +#define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */ + +/* GVE IRQ */ +#define GVE_IRQ_ACK BIT(31) +#define GVE_IRQ_MASK BIT(30) +#define GVE_IRQ_EVENT BIT(29) + +static inline bool gve_needs_rss(__be16 flag) +{ + if (flag & GVE_RXF_FRAG) + return false; + if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) + return true; + return false; +} + +static inline u8 gve_next_seqno(u8 seq) +{ + return (seq + 1) == 8 ? 1 : seq + 1; +} +#endif /* _GVE_DESC_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c new file mode 100644 index 000000000000..26540b856541 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#include <linux/rtnetlink.h> +#include "gve.h" + +static void gve_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct gve_priv *priv = netdev_priv(netdev); + + strlcpy(info->driver, "gve", sizeof(info->driver)); + strlcpy(info->version, gve_version_str, sizeof(info->version)); + strlcpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info)); +} + +static void gve_set_msglevel(struct net_device *netdev, u32 value) +{ + struct gve_priv *priv = netdev_priv(netdev); + + priv->msg_enable = value; +} + +static u32 gve_get_msglevel(struct net_device *netdev) +{ + struct gve_priv *priv = netdev_priv(netdev); + + return priv->msg_enable; +} + +static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { + "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", + "rx_dropped", "tx_dropped", "tx_timeouts", +}; + +#define GVE_MAIN_STATS_LEN ARRAY_SIZE(gve_gstrings_main_stats) +#define NUM_GVE_TX_CNTS 5 +#define NUM_GVE_RX_CNTS 2 + +static void gve_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct gve_priv *priv = netdev_priv(netdev); + char *s = (char *)data; + int i; + + if (stringset != ETH_SS_STATS) + return; + + memcpy(s, *gve_gstrings_main_stats, + sizeof(gve_gstrings_main_stats)); + s += sizeof(gve_gstrings_main_stats); + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + snprintf(s, ETH_GSTRING_LEN, "rx_desc_cnt[%u]", i); + s += ETH_GSTRING_LEN; + snprintf(s, ETH_GSTRING_LEN, "rx_desc_fill_cnt[%u]", i); + s += ETH_GSTRING_LEN; + } + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + snprintf(s, ETH_GSTRING_LEN, "tx_req[%u]", i); + s += ETH_GSTRING_LEN; + snprintf(s, ETH_GSTRING_LEN, "tx_done[%u]", i); + s += ETH_GSTRING_LEN; + snprintf(s, ETH_GSTRING_LEN, "tx_wake[%u]", i); + s += ETH_GSTRING_LEN; + snprintf(s, ETH_GSTRING_LEN, "tx_stop[%u]", i); + s += ETH_GSTRING_LEN; + snprintf(s, ETH_GSTRING_LEN, "tx_event_counter[%u]", i); + s += ETH_GSTRING_LEN; + } +} + +static int gve_get_sset_count(struct net_device *netdev, int sset) +{ + struct gve_priv *priv = netdev_priv(netdev); + + switch (sset) { + case ETH_SS_STATS: + return GVE_MAIN_STATS_LEN + + (priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS) + + (priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS); + default: + return -EOPNOTSUPP; + } +} + +static void +gve_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct gve_priv *priv = netdev_priv(netdev); + u64 rx_pkts, rx_bytes, tx_pkts, tx_bytes; + unsigned int start; + int ring; + int i; + + ASSERT_RTNL(); + + for (rx_pkts = 0, rx_bytes = 0, ring = 0; + ring < priv->rx_cfg.num_queues; ring++) { + if (priv->rx) { + do { + start = + u64_stats_fetch_begin(&priv->rx[ring].statss); + rx_pkts += priv->rx[ring].rpackets; + rx_bytes += priv->rx[ring].rbytes; + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + } + } + for (tx_pkts = 0, tx_bytes = 0, ring = 0; + ring < priv->tx_cfg.num_queues; ring++) { + if (priv->tx) { + do { + start = + u64_stats_fetch_begin(&priv->tx[ring].statss); + tx_pkts += priv->tx[ring].pkt_done; + tx_bytes += priv->tx[ring].bytes_done; + } while (u64_stats_fetch_retry(&priv->tx[ring].statss, + start)); + } + } + + i = 0; + data[i++] = rx_pkts; + data[i++] = tx_pkts; + data[i++] = rx_bytes; + data[i++] = tx_bytes; + /* Skip rx_dropped and tx_dropped */ + i += 2; + data[i++] = priv->tx_timeo_cnt; + i = GVE_MAIN_STATS_LEN; + + /* walk RX rings */ + if (priv->rx) { + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + struct gve_rx_ring *rx = &priv->rx[ring]; + + data[i++] = rx->desc.cnt; + data[i++] = rx->desc.fill_cnt; + } + } else { + i += priv->rx_cfg.num_queues * NUM_GVE_RX_CNTS; + } + /* walk TX rings */ + if (priv->tx) { + for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + struct gve_tx_ring *tx = &priv->tx[ring]; + + data[i++] = tx->req; + data[i++] = tx->done; + data[i++] = tx->wake_queue; + data[i++] = tx->stop_queue; + data[i++] = be32_to_cpu(gve_tx_load_event_counter(priv, + tx)); + } + } else { + i += priv->tx_cfg.num_queues * NUM_GVE_TX_CNTS; + } +} + +static void gve_get_channels(struct net_device *netdev, + struct ethtool_channels *cmd) +{ + struct gve_priv *priv = netdev_priv(netdev); + + cmd->max_rx = priv->rx_cfg.max_queues; + cmd->max_tx = priv->tx_cfg.max_queues; + cmd->max_other = 0; + cmd->max_combined = 0; + cmd->rx_count = priv->rx_cfg.num_queues; + cmd->tx_count = priv->tx_cfg.num_queues; + cmd->other_count = 0; + cmd->combined_count = 0; +} + +static int gve_set_channels(struct net_device *netdev, + struct ethtool_channels *cmd) +{ + struct gve_priv *priv = netdev_priv(netdev); + struct gve_queue_config new_tx_cfg = priv->tx_cfg; + struct gve_queue_config new_rx_cfg = priv->rx_cfg; + struct ethtool_channels old_settings; + int new_tx = cmd->tx_count; + int new_rx = cmd->rx_count; + + gve_get_channels(netdev, &old_settings); + + /* Changing combined is not allowed allowed */ + if (cmd->combined_count != old_settings.combined_count) + return -EINVAL; + + if (!new_rx || !new_tx) + return -EINVAL; + + if (!netif_carrier_ok(netdev)) { + priv->tx_cfg.num_queues = new_tx; + priv->rx_cfg.num_queues = new_rx; + return 0; + } + + new_tx_cfg.num_queues = new_tx; + new_rx_cfg.num_queues = new_rx; + + return gve_adjust_queues(priv, new_rx_cfg, new_tx_cfg); +} + +static void gve_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *cmd) +{ + struct gve_priv *priv = netdev_priv(netdev); + + cmd->rx_max_pending = priv->rx_desc_cnt; + cmd->tx_max_pending = priv->tx_desc_cnt; + cmd->rx_pending = priv->rx_desc_cnt; + cmd->tx_pending = priv->tx_desc_cnt; +} + +static int gve_user_reset(struct net_device *netdev, u32 *flags) +{ + struct gve_priv *priv = netdev_priv(netdev); + + if (*flags == ETH_RESET_ALL) { + *flags = 0; + return gve_reset(priv, true); + } + + return -EOPNOTSUPP; +} + +const struct ethtool_ops gve_ethtool_ops = { + .get_drvinfo = gve_get_drvinfo, + .get_strings = gve_get_strings, + .get_sset_count = gve_get_sset_count, + .get_ethtool_stats = gve_get_ethtool_stats, + .set_msglevel = gve_set_msglevel, + .get_msglevel = gve_get_msglevel, + .set_channels = gve_set_channels, + .get_channels = gve_get_channels, + .get_link = ethtool_op_get_link, + .get_ringparam = gve_get_ringparam, + .reset = gve_user_reset, +}; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c new file mode 100644 index 000000000000..24f16e3368cd --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -0,0 +1,1232 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#include <linux/cpumask.h> +#include <linux/etherdevice.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/sched.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <net/sch_generic.h> +#include "gve.h" +#include "gve_adminq.h" +#include "gve_register.h" + +#define GVE_DEFAULT_RX_COPYBREAK (256) + +#define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) +#define GVE_VERSION "1.0.0" +#define GVE_VERSION_PREFIX "GVE-" + +const char gve_version_str[] = GVE_VERSION; +static const char gve_version_prefix[] = GVE_VERSION_PREFIX; + +static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) +{ + struct gve_priv *priv = netdev_priv(dev); + unsigned int start; + int ring; + + if (priv->rx) { + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { + do { + start = + u64_stats_fetch_begin(&priv->rx[ring].statss); + s->rx_packets += priv->rx[ring].rpackets; + s->rx_bytes += priv->rx[ring].rbytes; + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + } + } + if (priv->tx) { + for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { + do { + start = + u64_stats_fetch_begin(&priv->tx[ring].statss); + s->tx_packets += priv->tx[ring].pkt_done; + s->tx_bytes += priv->tx[ring].bytes_done; + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, + start)); + } + } +} + +static int gve_alloc_counter_array(struct gve_priv *priv) +{ + priv->counter_array = + dma_alloc_coherent(&priv->pdev->dev, + priv->num_event_counters * + sizeof(*priv->counter_array), + &priv->counter_array_bus, GFP_KERNEL); + if (!priv->counter_array) + return -ENOMEM; + + return 0; +} + +static void gve_free_counter_array(struct gve_priv *priv) +{ + dma_free_coherent(&priv->pdev->dev, + priv->num_event_counters * + sizeof(*priv->counter_array), + priv->counter_array, priv->counter_array_bus); + priv->counter_array = NULL; +} + +static irqreturn_t gve_mgmnt_intr(int irq, void *arg) +{ + struct gve_priv *priv = arg; + + queue_work(priv->gve_wq, &priv->service_task); + return IRQ_HANDLED; +} + +static irqreturn_t gve_intr(int irq, void *arg) +{ + struct gve_notify_block *block = arg; + struct gve_priv *priv = block->priv; + + iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); + napi_schedule_irqoff(&block->napi); + return IRQ_HANDLED; +} + +static int gve_napi_poll(struct napi_struct *napi, int budget) +{ + struct gve_notify_block *block; + __be32 __iomem *irq_doorbell; + bool reschedule = false; + struct gve_priv *priv; + + block = container_of(napi, struct gve_notify_block, napi); + priv = block->priv; + + if (block->tx) + reschedule |= gve_tx_poll(block, budget); + if (block->rx) + reschedule |= gve_rx_poll(block, budget); + + if (reschedule) + return budget; + + napi_complete(napi); + irq_doorbell = gve_irq_doorbell(priv, block); + iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); + + /* Double check we have no extra work. + * Ensure unmask synchronizes with checking for work. + */ + dma_rmb(); + if (block->tx) + reschedule |= gve_tx_poll(block, -1); + if (block->rx) + reschedule |= gve_rx_poll(block, -1); + if (reschedule && napi_reschedule(napi)) + iowrite32be(GVE_IRQ_MASK, irq_doorbell); + + return 0; +} + +static int gve_alloc_notify_blocks(struct gve_priv *priv) +{ + int num_vecs_requested = priv->num_ntfy_blks + 1; + char *name = priv->dev->name; + unsigned int active_cpus; + int vecs_enabled; + int i, j; + int err; + + priv->msix_vectors = kvzalloc(num_vecs_requested * + sizeof(*priv->msix_vectors), GFP_KERNEL); + if (!priv->msix_vectors) + return -ENOMEM; + for (i = 0; i < num_vecs_requested; i++) + priv->msix_vectors[i].entry = i; + vecs_enabled = pci_enable_msix_range(priv->pdev, priv->msix_vectors, + GVE_MIN_MSIX, num_vecs_requested); + if (vecs_enabled < 0) { + dev_err(&priv->pdev->dev, "Could not enable min msix %d/%d\n", + GVE_MIN_MSIX, vecs_enabled); + err = vecs_enabled; + goto abort_with_msix_vectors; + } + if (vecs_enabled != num_vecs_requested) { + int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; + int vecs_per_type = new_num_ntfy_blks / 2; + int vecs_left = new_num_ntfy_blks % 2; + + priv->num_ntfy_blks = new_num_ntfy_blks; + priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, + vecs_per_type); + priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, + vecs_per_type + vecs_left); + dev_err(&priv->pdev->dev, + "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", + vecs_enabled, priv->tx_cfg.max_queues, + priv->rx_cfg.max_queues); + if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) + priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; + if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) + priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; + } + /* Half the notification blocks go to TX and half to RX */ + active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); + + /* Setup Management Vector - the last vector */ + snprintf(priv->mgmt_msix_name, sizeof(priv->mgmt_msix_name), "%s-mgmnt", + name); + err = request_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, + gve_mgmnt_intr, 0, priv->mgmt_msix_name, priv); + if (err) { + dev_err(&priv->pdev->dev, "Did not receive management vector.\n"); + goto abort_with_msix_enabled; + } + priv->ntfy_blocks = + dma_alloc_coherent(&priv->pdev->dev, + priv->num_ntfy_blks * + sizeof(*priv->ntfy_blocks), + &priv->ntfy_block_bus, GFP_KERNEL); + if (!priv->ntfy_blocks) { + err = -ENOMEM; + goto abort_with_mgmt_vector; + } + /* Setup the other blocks - the first n-1 vectors */ + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + int msix_idx = i; + + snprintf(block->name, sizeof(block->name), "%s-ntfy-block.%d", + name, i); + block->priv = priv; + err = request_irq(priv->msix_vectors[msix_idx].vector, + gve_intr, 0, block->name, block); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to receive msix vector %d\n", i); + goto abort_with_some_ntfy_blocks; + } + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + get_cpu_mask(i % active_cpus)); + } + return 0; +abort_with_some_ntfy_blocks: + for (j = 0; j < i; j++) { + struct gve_notify_block *block = &priv->ntfy_blocks[j]; + int msix_idx = j; + + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + NULL); + free_irq(priv->msix_vectors[msix_idx].vector, block); + } + dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * + sizeof(*priv->ntfy_blocks), + priv->ntfy_blocks, priv->ntfy_block_bus); + priv->ntfy_blocks = NULL; +abort_with_mgmt_vector: + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); +abort_with_msix_enabled: + pci_disable_msix(priv->pdev); +abort_with_msix_vectors: + kfree(priv->msix_vectors); + priv->msix_vectors = NULL; + return err; +} + +static void gve_free_notify_blocks(struct gve_priv *priv) +{ + int i; + + /* Free the irqs */ + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + int msix_idx = i; + + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + NULL); + free_irq(priv->msix_vectors[msix_idx].vector, block); + } + dma_free_coherent(&priv->pdev->dev, + priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), + priv->ntfy_blocks, priv->ntfy_block_bus); + priv->ntfy_blocks = NULL; + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); + pci_disable_msix(priv->pdev); + kfree(priv->msix_vectors); + priv->msix_vectors = NULL; +} + +static int gve_setup_device_resources(struct gve_priv *priv) +{ + int err; + + err = gve_alloc_counter_array(priv); + if (err) + return err; + err = gve_alloc_notify_blocks(priv); + if (err) + goto abort_with_counter; + err = gve_adminq_configure_device_resources(priv, + priv->counter_array_bus, + priv->num_event_counters, + priv->ntfy_block_bus, + priv->num_ntfy_blks); + if (unlikely(err)) { + dev_err(&priv->pdev->dev, + "could not setup device_resources: err=%d\n", err); + err = -ENXIO; + goto abort_with_ntfy_blocks; + } + gve_set_device_resources_ok(priv); + return 0; +abort_with_ntfy_blocks: + gve_free_notify_blocks(priv); +abort_with_counter: + gve_free_counter_array(priv); + return err; +} + +static void gve_trigger_reset(struct gve_priv *priv); + +static void gve_teardown_device_resources(struct gve_priv *priv) +{ + int err; + + /* Tell device its resources are being freed */ + if (gve_get_device_resources_ok(priv)) { + err = gve_adminq_deconfigure_device_resources(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Could not deconfigure device resources: err=%d\n", + err); + gve_trigger_reset(priv); + } + } + gve_free_counter_array(priv); + gve_free_notify_blocks(priv); + gve_clear_device_resources_ok(priv); +} + +static void gve_add_napi(struct gve_priv *priv, int ntfy_idx) +{ + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + netif_napi_add(priv->dev, &block->napi, gve_napi_poll, + NAPI_POLL_WEIGHT); +} + +static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) +{ + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + netif_napi_del(&block->napi); +} + +static int gve_register_qpls(struct gve_priv *priv) +{ + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int err; + int i; + + for (i = 0; i < num_qpls; i++) { + err = gve_adminq_register_page_list(priv, &priv->qpls[i]); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to register queue page list %d\n", + priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + } + return 0; +} + +static int gve_unregister_qpls(struct gve_priv *priv) +{ + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int err; + int i; + + for (i = 0; i < num_qpls; i++) { + err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); + /* This failure will trigger a reset - no need to clean up */ + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to unregister queue page list %d\n", + priv->qpls[i].id); + return err; + } + } + return 0; +} + +static int gve_create_rings(struct gve_priv *priv) +{ + int err; + int i; + + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + err = gve_adminq_create_tx_queue(priv, i); + if (err) { + netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n", + i); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i); + } + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + err = gve_adminq_create_rx_queue(priv, i); + if (err) { + netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n", + i); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + /* Rx data ring has been prefilled with packet buffers at + * queue allocation time. + * Write the doorbell to provide descriptor slots and packet + * buffers to the NIC. + */ + gve_rx_write_doorbell(priv, &priv->rx[i]); + netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i); + } + + return 0; +} + +static int gve_alloc_rings(struct gve_priv *priv) +{ + int ntfy_idx; + int err; + int i; + + /* Setup tx rings */ + priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx), + GFP_KERNEL); + if (!priv->tx) + return -ENOMEM; + err = gve_tx_alloc_rings(priv); + if (err) + goto free_tx; + /* Setup rx rings */ + priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx), + GFP_KERNEL); + if (!priv->rx) { + err = -ENOMEM; + goto free_tx_queue; + } + err = gve_rx_alloc_rings(priv); + if (err) + goto free_rx; + /* Add tx napi & init sync stats*/ + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + u64_stats_init(&priv->tx[i].statss); + ntfy_idx = gve_tx_idx_to_ntfy(priv, i); + gve_add_napi(priv, ntfy_idx); + } + /* Add rx napi & init sync stats*/ + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + u64_stats_init(&priv->rx[i].statss); + ntfy_idx = gve_rx_idx_to_ntfy(priv, i); + gve_add_napi(priv, ntfy_idx); + } + + return 0; + +free_rx: + kfree(priv->rx); + priv->rx = NULL; +free_tx_queue: + gve_tx_free_rings(priv); +free_tx: + kfree(priv->tx); + priv->tx = NULL; + return err; +} + +static int gve_destroy_rings(struct gve_priv *priv) +{ + int err; + int i; + + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + err = gve_adminq_destroy_tx_queue(priv, i); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to destroy tx queue %d\n", + i); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i); + } + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + err = gve_adminq_destroy_rx_queue(priv, i); + if (err) { + netif_err(priv, drv, priv->dev, + "failed to destroy rx queue %d\n", + i); + /* This failure will trigger a reset - no need to clean + * up + */ + return err; + } + netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i); + } + return 0; +} + +static void gve_free_rings(struct gve_priv *priv) +{ + int ntfy_idx; + int i; + + if (priv->tx) { + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + ntfy_idx = gve_tx_idx_to_ntfy(priv, i); + gve_remove_napi(priv, ntfy_idx); + } + gve_tx_free_rings(priv); + kfree(priv->tx); + priv->tx = NULL; + } + if (priv->rx) { + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + ntfy_idx = gve_rx_idx_to_ntfy(priv, i); + gve_remove_napi(priv, ntfy_idx); + } + gve_rx_free_rings(priv); + kfree(priv->rx); + priv->rx = NULL; + } +} + +int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma, + enum dma_data_direction dir) +{ + *page = alloc_page(GFP_KERNEL); + if (!*page) + return -ENOMEM; + *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); + if (dma_mapping_error(dev, *dma)) { + put_page(*page); + return -ENOMEM; + } + return 0; +} + +static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, + int pages) +{ + struct gve_queue_page_list *qpl = &priv->qpls[id]; + int err; + int i; + + if (pages + priv->num_registered_pages > priv->max_registered_pages) { + netif_err(priv, drv, priv->dev, + "Reached max number of registered pages %llu > %llu\n", + pages + priv->num_registered_pages, + priv->max_registered_pages); + return -EINVAL; + } + + qpl->id = id; + qpl->num_entries = pages; + qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL); + /* caller handles clean up */ + if (!qpl->pages) + return -ENOMEM; + qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses), + GFP_KERNEL); + /* caller handles clean up */ + if (!qpl->page_buses) + return -ENOMEM; + + for (i = 0; i < pages; i++) { + err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i], + &qpl->page_buses[i], + gve_qpl_dma_dir(priv, id)); + /* caller handles clean up */ + if (err) + return -ENOMEM; + } + priv->num_registered_pages += pages; + + return 0; +} + +void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, + enum dma_data_direction dir) +{ + if (!dma_mapping_error(dev, dma)) + dma_unmap_page(dev, dma, PAGE_SIZE, dir); + if (page) + put_page(page); +} + +static void gve_free_queue_page_list(struct gve_priv *priv, + int id) +{ + struct gve_queue_page_list *qpl = &priv->qpls[id]; + int i; + + if (!qpl->pages) + return; + if (!qpl->page_buses) + goto free_pages; + + for (i = 0; i < qpl->num_entries; i++) + gve_free_page(&priv->pdev->dev, qpl->pages[i], + qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); + + kfree(qpl->page_buses); +free_pages: + kfree(qpl->pages); + priv->num_registered_pages -= qpl->num_entries; +} + +static int gve_alloc_qpls(struct gve_priv *priv) +{ + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int i, j; + int err; + + priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL); + if (!priv->qpls) + return -ENOMEM; + + for (i = 0; i < gve_num_tx_qpls(priv); i++) { + err = gve_alloc_queue_page_list(priv, i, + priv->tx_pages_per_qpl); + if (err) + goto free_qpls; + } + for (; i < num_qpls; i++) { + err = gve_alloc_queue_page_list(priv, i, + priv->rx_pages_per_qpl); + if (err) + goto free_qpls; + } + + priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * + sizeof(unsigned long) * BITS_PER_BYTE; + priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) * + sizeof(unsigned long), GFP_KERNEL); + if (!priv->qpl_cfg.qpl_id_map) { + err = -ENOMEM; + goto free_qpls; + } + + return 0; + +free_qpls: + for (j = 0; j <= i; j++) + gve_free_queue_page_list(priv, j); + kfree(priv->qpls); + return err; +} + +static void gve_free_qpls(struct gve_priv *priv) +{ + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); + int i; + + kfree(priv->qpl_cfg.qpl_id_map); + + for (i = 0; i < num_qpls; i++) + gve_free_queue_page_list(priv, i); + + kfree(priv->qpls); +} + +/* Use this to schedule a reset when the device is capable of continuing + * to handle other requests in its current state. If it is not, do a reset + * in thread instead. + */ +void gve_schedule_reset(struct gve_priv *priv) +{ + gve_set_do_reset(priv); + queue_work(priv->gve_wq, &priv->service_task); +} + +static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up); +static int gve_reset_recovery(struct gve_priv *priv, bool was_up); +static void gve_turndown(struct gve_priv *priv); +static void gve_turnup(struct gve_priv *priv); + +static int gve_open(struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + err = gve_alloc_qpls(priv); + if (err) + return err; + err = gve_alloc_rings(priv); + if (err) + goto free_qpls; + + err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); + if (err) + goto free_rings; + err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); + if (err) + goto free_rings; + + err = gve_register_qpls(priv); + if (err) + goto reset; + err = gve_create_rings(priv); + if (err) + goto reset; + gve_set_device_rings_ok(priv); + + gve_turnup(priv); + netif_carrier_on(dev); + return 0; + +free_rings: + gve_free_rings(priv); +free_qpls: + gve_free_qpls(priv); + return err; + +reset: + /* This must have been called from a reset due to the rtnl lock + * so just return at this point. + */ + if (gve_get_reset_in_progress(priv)) + return err; + /* Otherwise reset before returning */ + gve_reset_and_teardown(priv, true); + /* if this fails there is nothing we can do so just ignore the return */ + gve_reset_recovery(priv, false); + /* return the original error */ + return err; +} + +static int gve_close(struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + int err; + + netif_carrier_off(dev); + if (gve_get_device_rings_ok(priv)) { + gve_turndown(priv); + err = gve_destroy_rings(priv); + if (err) + goto err; + err = gve_unregister_qpls(priv); + if (err) + goto err; + gve_clear_device_rings_ok(priv); + } + + gve_free_rings(priv); + gve_free_qpls(priv); + return 0; + +err: + /* This must have been called from a reset due to the rtnl lock + * so just return at this point. + */ + if (gve_get_reset_in_progress(priv)) + return err; + /* Otherwise reset before returning */ + gve_reset_and_teardown(priv, true); + return gve_reset_recovery(priv, false); +} + +int gve_adjust_queues(struct gve_priv *priv, + struct gve_queue_config new_rx_config, + struct gve_queue_config new_tx_config) +{ + int err; + + if (netif_carrier_ok(priv->dev)) { + /* To make this process as simple as possible we teardown the + * device, set the new configuration, and then bring the device + * up again. + */ + err = gve_close(priv->dev); + /* we have already tried to reset in close, + * just fail at this point + */ + if (err) + return err; + priv->tx_cfg = new_tx_config; + priv->rx_cfg = new_rx_config; + + err = gve_open(priv->dev); + if (err) + goto err; + + return 0; + } + /* Set the config for the next up. */ + priv->tx_cfg = new_tx_config; + priv->rx_cfg = new_rx_config; + + return 0; +err: + netif_err(priv, drv, priv->dev, + "Adjust queues failed! !!! DISABLING ALL QUEUES !!!\n"); + gve_turndown(priv); + return err; +} + +static void gve_turndown(struct gve_priv *priv) +{ + int idx; + + if (netif_carrier_ok(priv->dev)) + netif_carrier_off(priv->dev); + + if (!gve_get_napi_enabled(priv)) + return; + + /* Disable napi to prevent more work from coming in */ + for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + napi_disable(&block->napi); + } + for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + napi_disable(&block->napi); + } + + /* Stop tx queues */ + netif_tx_disable(priv->dev); + + gve_clear_napi_enabled(priv); +} + +static void gve_turnup(struct gve_priv *priv) +{ + int idx; + + /* Start the tx queues */ + netif_tx_start_all_queues(priv->dev); + + /* Enable napi and unmask interrupts for all queues */ + for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + napi_enable(&block->napi); + iowrite32be(0, gve_irq_doorbell(priv, block)); + } + for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + + napi_enable(&block->napi); + iowrite32be(0, gve_irq_doorbell(priv, block)); + } + + gve_set_napi_enabled(priv); +} + +static void gve_tx_timeout(struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + + gve_schedule_reset(priv); + priv->tx_timeo_cnt++; +} + +static const struct net_device_ops gve_netdev_ops = { + .ndo_start_xmit = gve_tx, + .ndo_open = gve_open, + .ndo_stop = gve_close, + .ndo_get_stats64 = gve_get_stats, + .ndo_tx_timeout = gve_tx_timeout, +}; + +static void gve_handle_status(struct gve_priv *priv, u32 status) +{ + if (GVE_DEVICE_STATUS_RESET_MASK & status) { + dev_info(&priv->pdev->dev, "Device requested reset.\n"); + gve_set_do_reset(priv); + } +} + +static void gve_handle_reset(struct gve_priv *priv) +{ + /* A service task will be scheduled at the end of probe to catch any + * resets that need to happen, and we don't want to reset until + * probe is done. + */ + if (gve_get_probe_in_progress(priv)) + return; + + if (gve_get_do_reset(priv)) { + rtnl_lock(); + gve_reset(priv, false); + rtnl_unlock(); + } +} + +/* Handle NIC status register changes and reset requests */ +static void gve_service_task(struct work_struct *work) +{ + struct gve_priv *priv = container_of(work, struct gve_priv, + service_task); + + gve_handle_status(priv, + ioread32be(&priv->reg_bar0->device_status)); + + gve_handle_reset(priv); +} + +static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) +{ + int num_ntfy; + int err; + + /* Set up the adminq */ + err = gve_adminq_alloc(&priv->pdev->dev, priv); + if (err) { + dev_err(&priv->pdev->dev, + "Failed to alloc admin queue: err=%d\n", err); + return err; + } + + if (skip_describe_device) + goto setup_device; + + /* Get the initial information we need from the device */ + err = gve_adminq_describe_device(priv); + if (err) { + dev_err(&priv->pdev->dev, + "Could not get device information: err=%d\n", err); + goto err; + } + if (priv->dev->max_mtu > PAGE_SIZE) { + priv->dev->max_mtu = PAGE_SIZE; + err = gve_adminq_set_mtu(priv, priv->dev->mtu); + if (err) { + netif_err(priv, drv, priv->dev, "Could not set mtu"); + goto err; + } + } + priv->dev->mtu = priv->dev->max_mtu; + num_ntfy = pci_msix_vec_count(priv->pdev); + if (num_ntfy <= 0) { + dev_err(&priv->pdev->dev, + "could not count MSI-x vectors: err=%d\n", num_ntfy); + err = num_ntfy; + goto err; + } else if (num_ntfy < GVE_MIN_MSIX) { + dev_err(&priv->pdev->dev, "gve needs at least %d MSI-x vectors, but only has %d\n", + GVE_MIN_MSIX, num_ntfy); + err = -EINVAL; + goto err; + } + + priv->num_registered_pages = 0; + priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; + /* gvnic has one Notification Block per MSI-x vector, except for the + * management vector + */ + priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; + priv->mgmt_msix_idx = priv->num_ntfy_blks; + + priv->tx_cfg.max_queues = + min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); + priv->rx_cfg.max_queues = + min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); + + priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; + priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; + if (priv->default_num_queues > 0) { + priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, + priv->tx_cfg.num_queues); + priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, + priv->rx_cfg.num_queues); + } + + netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n", + priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); + netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n", + priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); + +setup_device: + err = gve_setup_device_resources(priv); + if (!err) + return 0; +err: + gve_adminq_free(&priv->pdev->dev, priv); + return err; +} + +static void gve_teardown_priv_resources(struct gve_priv *priv) +{ + gve_teardown_device_resources(priv); + gve_adminq_free(&priv->pdev->dev, priv); +} + +static void gve_trigger_reset(struct gve_priv *priv) +{ + /* Reset the device by releasing the AQ */ + gve_adminq_release(priv); +} + +static void gve_reset_and_teardown(struct gve_priv *priv, bool was_up) +{ + gve_trigger_reset(priv); + /* With the reset having already happened, close cannot fail */ + if (was_up) + gve_close(priv->dev); + gve_teardown_priv_resources(priv); +} + +static int gve_reset_recovery(struct gve_priv *priv, bool was_up) +{ + int err; + + err = gve_init_priv(priv, true); + if (err) + goto err; + if (was_up) { + err = gve_open(priv->dev); + if (err) + goto err; + } + return 0; +err: + dev_err(&priv->pdev->dev, "Reset failed! !!! DISABLING ALL QUEUES !!!\n"); + gve_turndown(priv); + return err; +} + +int gve_reset(struct gve_priv *priv, bool attempt_teardown) +{ + bool was_up = netif_carrier_ok(priv->dev); + int err; + + dev_info(&priv->pdev->dev, "Performing reset\n"); + gve_clear_do_reset(priv); + gve_set_reset_in_progress(priv); + /* If we aren't attempting to teardown normally, just go turndown and + * reset right away. + */ + if (!attempt_teardown) { + gve_turndown(priv); + gve_reset_and_teardown(priv, was_up); + } else { + /* Otherwise attempt to close normally */ + if (was_up) { + err = gve_close(priv->dev); + /* If that fails reset as we did above */ + if (err) + gve_reset_and_teardown(priv, was_up); + } + /* Clean up any remaining resources */ + gve_teardown_priv_resources(priv); + } + + /* Set it all back up */ + err = gve_reset_recovery(priv, was_up); + gve_clear_reset_in_progress(priv); + return err; +} + +static void gve_write_version(u8 __iomem *driver_version_register) +{ + const char *c = gve_version_prefix; + + while (*c) { + writeb(*c, driver_version_register); + c++; + } + + c = gve_version_str; + while (*c) { + writeb(*c, driver_version_register); + c++; + } + writeb('\n', driver_version_register); +} + +static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int max_tx_queues, max_rx_queues; + struct net_device *dev; + __be32 __iomem *db_bar; + struct gve_registers __iomem *reg_bar; + struct gve_priv *priv; + int err; + + err = pci_enable_device(pdev); + if (err) + return -ENXIO; + + err = pci_request_regions(pdev, "gvnic-cfg"); + if (err) + goto abort_with_enabled; + + pci_set_master(pdev); + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "Failed to set dma mask: err=%d\n", err); + goto abort_with_pci_region; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, + "Failed to set consistent dma mask: err=%d\n", err); + goto abort_with_pci_region; + } + + reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); + if (!reg_bar) { + dev_err(&pdev->dev, "Failed to map pci bar!\n"); + err = -ENOMEM; + goto abort_with_pci_region; + } + + db_bar = pci_iomap(pdev, GVE_DOORBELL_BAR, 0); + if (!db_bar) { + dev_err(&pdev->dev, "Failed to map doorbell bar!\n"); + err = -ENOMEM; + goto abort_with_reg_bar; + } + + gve_write_version(®_bar->driver_version); + /* Get max queues to alloc etherdev */ + max_rx_queues = ioread32be(®_bar->max_tx_queues); + max_tx_queues = ioread32be(®_bar->max_rx_queues); + /* Alloc and setup the netdev and priv */ + dev = alloc_etherdev_mqs(sizeof(*priv), max_tx_queues, max_rx_queues); + if (!dev) { + dev_err(&pdev->dev, "could not allocate netdev\n"); + goto abort_with_db_bar; + } + SET_NETDEV_DEV(dev, &pdev->dev); + pci_set_drvdata(pdev, dev); + dev->ethtool_ops = &gve_ethtool_ops; + dev->netdev_ops = &gve_netdev_ops; + /* advertise features */ + dev->hw_features = NETIF_F_HIGHDMA; + dev->hw_features |= NETIF_F_SG; + dev->hw_features |= NETIF_F_HW_CSUM; + dev->hw_features |= NETIF_F_TSO; + dev->hw_features |= NETIF_F_TSO6; + dev->hw_features |= NETIF_F_TSO_ECN; + dev->hw_features |= NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_RXHASH; + dev->features = dev->hw_features; + dev->watchdog_timeo = 5 * HZ; + dev->min_mtu = ETH_MIN_MTU; + netif_carrier_off(dev); + + priv = netdev_priv(dev); + priv->dev = dev; + priv->pdev = pdev; + priv->msg_enable = DEFAULT_MSG_LEVEL; + priv->reg_bar0 = reg_bar; + priv->db_bar2 = db_bar; + priv->service_task_flags = 0x0; + priv->state_flags = 0x0; + + gve_set_probe_in_progress(priv); + priv->gve_wq = alloc_ordered_workqueue("gve", 0); + if (!priv->gve_wq) { + dev_err(&pdev->dev, "Could not allocate workqueue"); + err = -ENOMEM; + goto abort_with_netdev; + } + INIT_WORK(&priv->service_task, gve_service_task); + priv->tx_cfg.max_queues = max_tx_queues; + priv->rx_cfg.max_queues = max_rx_queues; + + err = gve_init_priv(priv, false); + if (err) + goto abort_with_wq; + + err = register_netdev(dev); + if (err) + goto abort_with_wq; + + dev_info(&pdev->dev, "GVE version %s\n", gve_version_str); + gve_clear_probe_in_progress(priv); + queue_work(priv->gve_wq, &priv->service_task); + return 0; + +abort_with_wq: + destroy_workqueue(priv->gve_wq); + +abort_with_netdev: + free_netdev(dev); + +abort_with_db_bar: + pci_iounmap(pdev, db_bar); + +abort_with_reg_bar: + pci_iounmap(pdev, reg_bar); + +abort_with_pci_region: + pci_release_regions(pdev); + +abort_with_enabled: + pci_disable_device(pdev); + return -ENXIO; +} +EXPORT_SYMBOL(gve_probe); + +static void gve_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct gve_priv *priv = netdev_priv(netdev); + __be32 __iomem *db_bar = priv->db_bar2; + void __iomem *reg_bar = priv->reg_bar0; + + unregister_netdev(netdev); + gve_teardown_priv_resources(priv); + destroy_workqueue(priv->gve_wq); + free_netdev(netdev); + pci_iounmap(pdev, db_bar); + pci_iounmap(pdev, reg_bar); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static const struct pci_device_id gve_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_GOOGLE, PCI_DEV_ID_GVNIC) }, + { } +}; + +static struct pci_driver gvnic_driver = { + .name = "gvnic", + .id_table = gve_id_table, + .probe = gve_probe, + .remove = gve_remove, +}; + +module_pci_driver(gvnic_driver); + +MODULE_DEVICE_TABLE(pci, gve_id_table); +MODULE_AUTHOR("Google, Inc."); +MODULE_DESCRIPTION("gVNIC Driver"); +MODULE_LICENSE("Dual MIT/GPL"); +MODULE_VERSION(GVE_VERSION); diff --git a/drivers/net/ethernet/google/gve/gve_register.h b/drivers/net/ethernet/google/gve/gve_register.h new file mode 100644 index 000000000000..84ab8893aadd --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_register.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) + * Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#ifndef _GVE_REGISTER_H_ +#define _GVE_REGISTER_H_ + +/* Fixed Configuration Registers */ +struct gve_registers { + __be32 device_status; + __be32 driver_status; + __be32 max_tx_queues; + __be32 max_rx_queues; + __be32 adminq_pfn; + __be32 adminq_doorbell; + __be32 adminq_event_counter; + u8 reserved[3]; + u8 driver_version; +}; + +enum gve_device_status_flags { + GVE_DEVICE_STATUS_RESET_MASK = BIT(1), + GVE_DEVICE_STATUS_LINK_STATUS_MASK = BIT(2), +}; +#endif /* _GVE_REGISTER_H_ */ diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c new file mode 100644 index 000000000000..c1aeabd1c594 --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -0,0 +1,446 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#include "gve.h" +#include "gve_adminq.h" +#include <linux/etherdevice.h> + +static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) +{ + struct gve_notify_block *block = + &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; + + block->rx = NULL; +} + +static void gve_rx_free_ring(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + struct device *dev = &priv->pdev->dev; + size_t bytes; + u32 slots; + + gve_rx_remove_from_block(priv, idx); + + bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; + dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); + rx->desc.desc_ring = NULL; + + dma_free_coherent(dev, sizeof(*rx->q_resources), + rx->q_resources, rx->q_resources_bus); + rx->q_resources = NULL; + + gve_unassign_qpl(priv, rx->data.qpl->id); + rx->data.qpl = NULL; + kfree(rx->data.page_info); + + slots = rx->data.mask + 1; + bytes = sizeof(*rx->data.data_ring) * slots; + dma_free_coherent(dev, bytes, rx->data.data_ring, + rx->data.data_bus); + rx->data.data_ring = NULL; + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); +} + +static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, + struct gve_rx_data_slot *slot, + dma_addr_t addr, struct page *page) +{ + page_info->page = page; + page_info->page_offset = 0; + page_info->page_address = page_address(page); + slot->qpl_offset = cpu_to_be64(addr); +} + +static int gve_prefill_rx_pages(struct gve_rx_ring *rx) +{ + struct gve_priv *priv = rx->gve; + u32 slots; + int i; + + /* Allocate one page per Rx queue slot. Each page is split into two + * packet buffers, when possible we "page flip" between the two. + */ + slots = rx->data.mask + 1; + + rx->data.page_info = kvzalloc(slots * + sizeof(*rx->data.page_info), GFP_KERNEL); + if (!rx->data.page_info) + return -ENOMEM; + + rx->data.qpl = gve_assign_rx_qpl(priv); + + for (i = 0; i < slots; i++) { + struct page *page = rx->data.qpl->pages[i]; + dma_addr_t addr = i * PAGE_SIZE; + + gve_setup_rx_buffer(&rx->data.page_info[i], + &rx->data.data_ring[i], addr, page); + } + + return slots; +} + +static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) +{ + u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + struct gve_rx_ring *rx = &priv->rx[queue_idx]; + + block->rx = rx; + rx->ntfy_id = ntfy_idx; +} + +static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) +{ + struct gve_rx_ring *rx = &priv->rx[idx]; + struct device *hdev = &priv->pdev->dev; + u32 slots, npages; + int filled_pages; + size_t bytes; + int err; + + netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); + /* Make sure everything is zeroed to start with */ + memset(rx, 0, sizeof(*rx)); + + rx->gve = priv; + rx->q_num = idx; + + slots = priv->rx_pages_per_qpl; + rx->data.mask = slots - 1; + + /* alloc rx data ring */ + bytes = sizeof(*rx->data.data_ring) * slots; + rx->data.data_ring = dma_alloc_coherent(hdev, bytes, + &rx->data.data_bus, + GFP_KERNEL); + if (!rx->data.data_ring) + return -ENOMEM; + filled_pages = gve_prefill_rx_pages(rx); + if (filled_pages < 0) { + err = -ENOMEM; + goto abort_with_slots; + } + rx->desc.fill_cnt = filled_pages; + /* Ensure data ring slots (packet buffers) are visible. */ + dma_wmb(); + + /* Alloc gve_queue_resources */ + rx->q_resources = + dma_alloc_coherent(hdev, + sizeof(*rx->q_resources), + &rx->q_resources_bus, + GFP_KERNEL); + if (!rx->q_resources) { + err = -ENOMEM; + goto abort_filled; + } + netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, + (unsigned long)rx->data.data_bus); + + /* alloc rx desc ring */ + bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; + npages = bytes / PAGE_SIZE; + if (npages * PAGE_SIZE != bytes) { + err = -EIO; + goto abort_with_q_resources; + } + + rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, + GFP_KERNEL); + if (!rx->desc.desc_ring) { + err = -ENOMEM; + goto abort_with_q_resources; + } + rx->desc.mask = slots - 1; + rx->desc.cnt = 0; + rx->desc.seqno = 1; + gve_rx_add_to_block(priv, idx); + + return 0; + +abort_with_q_resources: + dma_free_coherent(hdev, sizeof(*rx->q_resources), + rx->q_resources, rx->q_resources_bus); + rx->q_resources = NULL; +abort_filled: + kfree(rx->data.page_info); +abort_with_slots: + bytes = sizeof(*rx->data.data_ring) * slots; + dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); + rx->data.data_ring = NULL; + + return err; +} + +int gve_rx_alloc_rings(struct gve_priv *priv) +{ + int err = 0; + int i; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + err = gve_rx_alloc_ring(priv, i); + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to alloc rx ring=%d: err=%d\n", + i, err); + break; + } + } + /* Unallocate if there was an error */ + if (err) { + int j; + + for (j = 0; j < i; j++) + gve_rx_free_ring(priv, j); + } + return err; +} + +void gve_rx_free_rings(struct gve_priv *priv) +{ + int i; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) + gve_rx_free_ring(priv, i); +} + +void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) +{ + u32 db_idx = be32_to_cpu(rx->q_resources->db_index); + + iowrite32be(rx->desc.fill_cnt, &priv->db_bar2[db_idx]); +} + +static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) +{ + if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) + return PKT_HASH_TYPE_L4; + if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) + return PKT_HASH_TYPE_L3; + return PKT_HASH_TYPE_L2; +} + +static struct sk_buff *gve_rx_copy(struct net_device *dev, + struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, + u16 len) +{ + struct sk_buff *skb = napi_alloc_skb(napi, len); + void *va = page_info->page_address + GVE_RX_PAD + + page_info->page_offset; + + if (unlikely(!skb)) + return NULL; + + __skb_put(skb, len); + + skb_copy_to_linear_data(skb, va, len); + + skb->protocol = eth_type_trans(skb, dev); + return skb; +} + +static struct sk_buff *gve_rx_add_frags(struct net_device *dev, + struct napi_struct *napi, + struct gve_rx_slot_page_info *page_info, + u16 len) +{ + struct sk_buff *skb = napi_get_frags(napi); + + if (unlikely(!skb)) + return NULL; + + skb_add_rx_frag(skb, 0, page_info->page, + page_info->page_offset + + GVE_RX_PAD, len, PAGE_SIZE / 2); + + return skb; +} + +static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, + struct gve_rx_data_slot *data_ring) +{ + u64 addr = be64_to_cpu(data_ring->qpl_offset); + + page_info->page_offset ^= PAGE_SIZE / 2; + addr ^= PAGE_SIZE / 2; + data_ring->qpl_offset = cpu_to_be64(addr); +} + +static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, + netdev_features_t feat) +{ + struct gve_rx_slot_page_info *page_info; + struct gve_priv *priv = rx->gve; + struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; + struct net_device *dev = priv->dev; + struct sk_buff *skb; + int pagecount; + u16 len; + u32 idx; + + /* drop this packet */ + if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) + return true; + + len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; + idx = rx->data.cnt & rx->data.mask; + page_info = &rx->data.page_info[idx]; + + /* gvnic can only receive into registered segments. If the buffer + * can't be recycled, our only choice is to copy the data out of + * it so that we can return it to the device. + */ + + if (PAGE_SIZE == 4096) { + if (len <= priv->rx_copybreak) { + /* Just copy small packets */ + skb = gve_rx_copy(dev, napi, page_info, len); + goto have_skb; + } + if (unlikely(!gve_can_recycle_pages(dev))) { + skb = gve_rx_copy(dev, napi, page_info, len); + goto have_skb; + } + pagecount = page_count(page_info->page); + if (pagecount == 1) { + /* No part of this page is used by any SKBs; we attach + * the page fragment to a new SKB and pass it up the + * stack. + */ + skb = gve_rx_add_frags(dev, napi, page_info, len); + if (!skb) + return true; + /* Make sure the kernel stack can't release the page */ + get_page(page_info->page); + /* "flip" to other packet buffer on this page */ + gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]); + } else if (pagecount >= 2) { + /* We have previously passed the other half of this + * page up the stack, but it has not yet been freed. + */ + skb = gve_rx_copy(dev, napi, page_info, len); + } else { + WARN(pagecount < 1, "Pagecount should never be < 1"); + return false; + } + } else { + skb = gve_rx_copy(dev, napi, page_info, len); + } + +have_skb: + /* We didn't manage to allocate an skb but we haven't had any + * reset worthy failures. + */ + if (!skb) + return true; + + rx->data.cnt++; + + if (likely(feat & NETIF_F_RXCSUM)) { + /* NIC passes up the partial sum */ + if (rx_desc->csum) + skb->ip_summed = CHECKSUM_COMPLETE; + else + skb->ip_summed = CHECKSUM_NONE; + skb->csum = csum_unfold(rx_desc->csum); + } + + /* parse flags & pass relevant info up */ + if (likely(feat & NETIF_F_RXHASH) && + gve_needs_rss(rx_desc->flags_seq)) + skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), + gve_rss_type(rx_desc->flags_seq)); + + if (skb_is_nonlinear(skb)) + napi_gro_frags(napi); + else + napi_gro_receive(napi, skb); + return true; +} + +static bool gve_rx_work_pending(struct gve_rx_ring *rx) +{ + struct gve_rx_desc *desc; + __be16 flags_seq; + u32 next_idx; + + next_idx = rx->desc.cnt & rx->desc.mask; + desc = rx->desc.desc_ring + next_idx; + + flags_seq = desc->flags_seq; + /* Make sure we have synchronized the seq no with the device */ + smp_rmb(); + + return (GVE_SEQNO(flags_seq) == rx->desc.seqno); +} + +bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, + netdev_features_t feat) +{ + struct gve_priv *priv = rx->gve; + struct gve_rx_desc *desc; + u32 cnt = rx->desc.cnt; + u32 idx = cnt & rx->desc.mask; + u32 work_done = 0; + u64 bytes = 0; + + desc = rx->desc.desc_ring + idx; + while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && + work_done < budget) { + netif_info(priv, rx_status, priv->dev, + "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", + rx->q_num, idx, desc, desc->flags_seq); + netif_info(priv, rx_status, priv->dev, + "[%d] seqno=%d rx->desc.seqno=%d\n", + rx->q_num, GVE_SEQNO(desc->flags_seq), + rx->desc.seqno); + bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; + if (!gve_rx(rx, desc, feat)) + gve_schedule_reset(priv); + cnt++; + idx = cnt & rx->desc.mask; + desc = rx->desc.desc_ring + idx; + rx->desc.seqno = gve_next_seqno(rx->desc.seqno); + work_done++; + } + + if (!work_done) + return false; + + u64_stats_update_begin(&rx->statss); + rx->rpackets += work_done; + rx->rbytes += bytes; + u64_stats_update_end(&rx->statss); + rx->desc.cnt = cnt; + rx->desc.fill_cnt += work_done; + + /* restock desc ring slots */ + dma_wmb(); /* Ensure descs are visible before ringing doorbell */ + gve_rx_write_doorbell(priv, rx); + return gve_rx_work_pending(rx); +} + +bool gve_rx_poll(struct gve_notify_block *block, int budget) +{ + struct gve_rx_ring *rx = block->rx; + netdev_features_t feat; + bool repoll = false; + + feat = block->napi.dev->features; + + /* If budget is 0, do all the work */ + if (budget == 0) + budget = INT_MAX; + + if (budget > 0) + repoll |= gve_clean_rx_done(rx, budget, feat); + else + repoll |= gve_rx_work_pending(rx); + return repoll; +} diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c new file mode 100644 index 000000000000..778b87b5a06c --- /dev/null +++ b/drivers/net/ethernet/google/gve/gve_tx.c @@ -0,0 +1,584 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Google virtual Ethernet (gve) driver + * + * Copyright (C) 2015-2019 Google, Inc. + */ + +#include "gve.h" +#include "gve_adminq.h" +#include <linux/ip.h> +#include <linux/tcp.h> +#include <linux/vmalloc.h> +#include <linux/skbuff.h> + +static inline void gve_tx_put_doorbell(struct gve_priv *priv, + struct gve_queue_resources *q_resources, + u32 val) +{ + iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]); +} + +/* gvnic can only transmit from a Registered Segment. + * We copy skb payloads into the registered segment before writing Tx + * descriptors and ringing the Tx doorbell. + * + * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must + * free allocations in the order they were allocated. + */ + +static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo) +{ + fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP, + PAGE_KERNEL); + if (unlikely(!fifo->base)) { + netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n", + fifo->qpl->id); + return -ENOMEM; + } + + fifo->size = fifo->qpl->num_entries * PAGE_SIZE; + atomic_set(&fifo->available, fifo->size); + fifo->head = 0; + return 0; +} + +static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo) +{ + WARN(atomic_read(&fifo->available) != fifo->size, + "Releasing non-empty fifo"); + + vunmap(fifo->base); +} + +static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo, + size_t bytes) +{ + return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head; +} + +static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes) +{ + return (atomic_read(&fifo->available) <= bytes) ? false : true; +} + +/* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO + * @fifo: FIFO to allocate from + * @bytes: Allocation size + * @iov: Scatter-gather elements to fill with allocation fragment base/len + * + * Returns number of valid elements in iov[] or negative on error. + * + * Allocations from a given FIFO must be externally synchronized but concurrent + * allocation and frees are allowed. + */ +static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes, + struct gve_tx_iovec iov[2]) +{ + size_t overflow, padding; + u32 aligned_head; + int nfrags = 0; + + if (!bytes) + return 0; + + /* This check happens before we know how much padding is needed to + * align to a cacheline boundary for the payload, but that is fine, + * because the FIFO head always start aligned, and the FIFO's boundaries + * are aligned, so if there is space for the data, there is space for + * the padding to the next alignment. + */ + WARN(!gve_tx_fifo_can_alloc(fifo, bytes), + "Reached %s when there's not enough space in the fifo", __func__); + + nfrags++; + + iov[0].iov_offset = fifo->head; + iov[0].iov_len = bytes; + fifo->head += bytes; + + if (fifo->head > fifo->size) { + /* If the allocation did not fit in the tail fragment of the + * FIFO, also use the head fragment. + */ + nfrags++; + overflow = fifo->head - fifo->size; + iov[0].iov_len -= overflow; + iov[1].iov_offset = 0; /* Start of fifo*/ + iov[1].iov_len = overflow; + + fifo->head = overflow; + } + + /* Re-align to a cacheline boundary */ + aligned_head = L1_CACHE_ALIGN(fifo->head); + padding = aligned_head - fifo->head; + iov[nfrags - 1].iov_padding = padding; + atomic_sub(bytes + padding, &fifo->available); + fifo->head = aligned_head; + + if (fifo->head == fifo->size) + fifo->head = 0; + + return nfrags; +} + +/* gve_tx_free_fifo - Return space to Tx FIFO + * @fifo: FIFO to return fragments to + * @bytes: Bytes to free + */ +static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) +{ + atomic_add(bytes, &fifo->available); +} + +static void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx) +{ + struct gve_notify_block *block = + &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)]; + + block->tx = NULL; +} + +static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, + u32 to_do, bool try_to_wake); + +static void gve_tx_free_ring(struct gve_priv *priv, int idx) +{ + struct gve_tx_ring *tx = &priv->tx[idx]; + struct device *hdev = &priv->pdev->dev; + size_t bytes; + u32 slots; + + gve_tx_remove_from_block(priv, idx); + slots = tx->mask + 1; + gve_clean_tx_done(priv, tx, tx->req, false); + netdev_tx_reset_queue(tx->netdev_txq); + + dma_free_coherent(hdev, sizeof(*tx->q_resources), + tx->q_resources, tx->q_resources_bus); + tx->q_resources = NULL; + + gve_tx_fifo_release(priv, &tx->tx_fifo); + gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); + tx->tx_fifo.qpl = NULL; + + bytes = sizeof(*tx->desc) * slots; + dma_free_coherent(hdev, bytes, tx->desc, tx->bus); + tx->desc = NULL; + + vfree(tx->info); + tx->info = NULL; + + netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); +} + +static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx) +{ + int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx); + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; + struct gve_tx_ring *tx = &priv->tx[queue_idx]; + + block->tx = tx; + tx->ntfy_id = ntfy_idx; +} + +static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) +{ + struct gve_tx_ring *tx = &priv->tx[idx]; + struct device *hdev = &priv->pdev->dev; + u32 slots = priv->tx_desc_cnt; + size_t bytes; + + /* Make sure everything is zeroed to start */ + memset(tx, 0, sizeof(*tx)); + tx->q_num = idx; + + tx->mask = slots - 1; + + /* alloc metadata */ + tx->info = vzalloc(sizeof(*tx->info) * slots); + if (!tx->info) + return -ENOMEM; + + /* alloc tx queue */ + bytes = sizeof(*tx->desc) * slots; + tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); + if (!tx->desc) + goto abort_with_info; + + tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); + + /* map Tx FIFO */ + if (gve_tx_fifo_init(priv, &tx->tx_fifo)) + goto abort_with_desc; + + tx->q_resources = + dma_alloc_coherent(hdev, + sizeof(*tx->q_resources), + &tx->q_resources_bus, + GFP_KERNEL); + if (!tx->q_resources) + goto abort_with_fifo; + + netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, + (unsigned long)tx->bus); + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); + gve_tx_add_to_block(priv, idx); + + return 0; + +abort_with_fifo: + gve_tx_fifo_release(priv, &tx->tx_fifo); +abort_with_desc: + dma_free_coherent(hdev, bytes, tx->desc, tx->bus); + tx->desc = NULL; +abort_with_info: + vfree(tx->info); + tx->info = NULL; + return -ENOMEM; +} + +int gve_tx_alloc_rings(struct gve_priv *priv) +{ + int err = 0; + int i; + + for (i = 0; i < priv->tx_cfg.num_queues; i++) { + err = gve_tx_alloc_ring(priv, i); + if (err) { + netif_err(priv, drv, priv->dev, + "Failed to alloc tx ring=%d: err=%d\n", + i, err); + break; + } + } + /* Unallocate if there was an error */ + if (err) { + int j; + + for (j = 0; j < i; j++) + gve_tx_free_ring(priv, j); + } + return err; +} + +void gve_tx_free_rings(struct gve_priv *priv) +{ + int i; + + for (i = 0; i < priv->tx_cfg.num_queues; i++) + gve_tx_free_ring(priv, i); +} + +/* gve_tx_avail - Calculates the number of slots available in the ring + * @tx: tx ring to check + * + * Returns the number of slots available + * + * The capacity of the queue is mask + 1. We don't need to reserve an entry. + **/ +static inline u32 gve_tx_avail(struct gve_tx_ring *tx) +{ + return tx->mask + 1 - (tx->req - tx->done); +} + +static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, + struct sk_buff *skb) +{ + int pad_bytes, align_hdr_pad; + int bytes; + int hlen; + + hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + + tcp_hdrlen(skb) : skb_headlen(skb); + + pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, + hlen); + /* We need to take into account the header alignment padding. */ + align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen; + bytes = align_hdr_pad + pad_bytes + skb->len; + + return bytes; +} + +/* The most descriptors we could need are 3 - 1 for the headers, 1 for + * the beginning of the payload at the end of the FIFO, and 1 if the + * payload wraps to the beginning of the FIFO. + */ +#define MAX_TX_DESC_NEEDED 3 + +/* Check if sufficient resources (descriptor ring space, FIFO space) are + * available to transmit the given number of bytes. + */ +static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) +{ + return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && + gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required)); +} + +/* Stops the queue if the skb cannot be transmitted. */ +static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb) +{ + int bytes_required; + + bytes_required = gve_skb_fifo_bytes_required(tx, skb); + if (likely(gve_can_tx(tx, bytes_required))) + return 0; + + /* No space, so stop the queue */ + tx->stop_queue++; + netif_tx_stop_queue(tx->netdev_txq); + smp_mb(); /* sync with restarting queue in gve_clean_tx_done() */ + + /* Now check for resources again, in case gve_clean_tx_done() freed + * resources after we checked and we stopped the queue after + * gve_clean_tx_done() checked. + * + * gve_maybe_stop_tx() gve_clean_tx_done() + * nsegs/can_alloc test failed + * gve_tx_free_fifo() + * if (tx queue stopped) + * netif_tx_queue_wake() + * netif_tx_stop_queue() + * Need to check again for space here! + */ + if (likely(!gve_can_tx(tx, bytes_required))) + return -EBUSY; + + netif_tx_start_queue(tx->netdev_txq); + tx->wake_queue++; + return 0; +} + +static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, + struct sk_buff *skb, bool is_gso, + int l4_hdr_offset, u32 desc_cnt, + u16 hlen, u64 addr) +{ + /* l4_hdr_offset and csum_offset are in units of 16-bit words */ + if (is_gso) { + pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; + pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; + pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; + pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; + pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; + } else { + pkt_desc->pkt.type_flags = GVE_TXD_STD; + pkt_desc->pkt.l4_csum_offset = 0; + pkt_desc->pkt.l4_hdr_offset = 0; + } + pkt_desc->pkt.desc_cnt = desc_cnt; + pkt_desc->pkt.len = cpu_to_be16(skb->len); + pkt_desc->pkt.seg_len = cpu_to_be16(hlen); + pkt_desc->pkt.seg_addr = cpu_to_be64(addr); +} + +static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, + struct sk_buff *skb, bool is_gso, + u16 len, u64 addr) +{ + seg_desc->seg.type_flags = GVE_TXD_SEG; + if (is_gso) { + if (skb_is_gso_v6(skb)) + seg_desc->seg.type_flags |= GVE_TXSF_IPV6; + seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1; + seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + } + seg_desc->seg.seg_len = cpu_to_be16(len); + seg_desc->seg.seg_addr = cpu_to_be64(addr); +} + +static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb) +{ + int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; + union gve_tx_desc *pkt_desc, *seg_desc; + struct gve_tx_buffer_state *info; + bool is_gso = skb_is_gso(skb); + u32 idx = tx->req & tx->mask; + int payload_iov = 2; + int copy_offset; + u32 next_idx; + int i; + + info = &tx->info[idx]; + pkt_desc = &tx->desc[idx]; + + l4_hdr_offset = skb_checksum_start_offset(skb); + /* If the skb is gso, then we want the tcp header in the first segment + * otherwise we want the linear portion of the skb (which will contain + * the checksum because skb->csum_start and skb->csum_offset are given + * relative to skb->head) in the first segment. + */ + hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : + skb_headlen(skb); + + info->skb = skb; + /* We don't want to split the header, so if necessary, pad to the end + * of the fifo and then put the header at the beginning of the fifo. + */ + pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen); + hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes, + &info->iov[0]); + WARN(!hdr_nfrags, "hdr_nfrags should never be 0!"); + payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen, + &info->iov[payload_iov]); + + gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, + 1 + payload_nfrags, hlen, + info->iov[hdr_nfrags - 1].iov_offset); + + skb_copy_bits(skb, 0, + tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, + hlen); + copy_offset = hlen; + + for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { + next_idx = (tx->req + 1 + i - payload_iov) & tx->mask; + seg_desc = &tx->desc[next_idx]; + + gve_tx_fill_seg_desc(seg_desc, skb, is_gso, + info->iov[i].iov_len, + info->iov[i].iov_offset); + + skb_copy_bits(skb, copy_offset, + tx->tx_fifo.base + info->iov[i].iov_offset, + info->iov[i].iov_len); + copy_offset += info->iov[i].iov_len; + } + + return 1 + payload_nfrags; +} + +netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct gve_priv *priv = netdev_priv(dev); + struct gve_tx_ring *tx; + int nsegs; + + WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues, + "skb queue index out of range"); + tx = &priv->tx[skb_get_queue_mapping(skb)]; + if (unlikely(gve_maybe_stop_tx(tx, skb))) { + /* We need to ring the txq doorbell -- we have stopped the Tx + * queue for want of resources, but prior calls to gve_tx() + * may have added descriptors without ringing the doorbell. + */ + + /* Ensure tx descs from a prior gve_tx are visible before + * ringing doorbell. + */ + dma_wmb(); + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); + return NETDEV_TX_BUSY; + } + nsegs = gve_tx_add_skb(tx, skb); + + netdev_tx_sent_queue(tx->netdev_txq, skb->len); + skb_tx_timestamp(skb); + + /* give packets to NIC */ + tx->req += nsegs; + + if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more()) + return NETDEV_TX_OK; + + /* Ensure tx descs are visible before ringing doorbell */ + dma_wmb(); + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); + return NETDEV_TX_OK; +} + +#define GVE_TX_START_THRESH PAGE_SIZE + +static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, + u32 to_do, bool try_to_wake) +{ + struct gve_tx_buffer_state *info; + u64 pkts = 0, bytes = 0; + size_t space_freed = 0; + struct sk_buff *skb; + int i, j; + u32 idx; + + for (j = 0; j < to_do; j++) { + idx = tx->done & tx->mask; + netif_info(priv, tx_done, priv->dev, + "[%d] %s: idx=%d (req=%u done=%u)\n", + tx->q_num, __func__, idx, tx->req, tx->done); + info = &tx->info[idx]; + skb = info->skb; + + /* Mark as free */ + if (skb) { + info->skb = NULL; + bytes += skb->len; + pkts++; + dev_consume_skb_any(skb); + /* FIFO free */ + for (i = 0; i < ARRAY_SIZE(info->iov); i++) { + space_freed += info->iov[i].iov_len + + info->iov[i].iov_padding; + info->iov[i].iov_len = 0; + info->iov[i].iov_padding = 0; + } + } + tx->done++; + } + + gve_tx_free_fifo(&tx->tx_fifo, space_freed); + u64_stats_update_begin(&tx->statss); + tx->bytes_done += bytes; + tx->pkt_done += pkts; + u64_stats_update_end(&tx->statss); + netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes); + + /* start the queue if we've stopped it */ +#ifndef CONFIG_BQL + /* Make sure that the doorbells are synced */ + smp_mb(); +#endif + if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) && + likely(gve_can_tx(tx, GVE_TX_START_THRESH))) { + tx->wake_queue++; + netif_tx_wake_queue(tx->netdev_txq); + } + + return pkts; +} + +__be32 gve_tx_load_event_counter(struct gve_priv *priv, + struct gve_tx_ring *tx) +{ + u32 counter_index = be32_to_cpu((tx->q_resources->counter_index)); + + return READ_ONCE(priv->counter_array[counter_index]); +} + +bool gve_tx_poll(struct gve_notify_block *block, int budget) +{ + struct gve_priv *priv = block->priv; + struct gve_tx_ring *tx = block->tx; + bool repoll = false; + u32 nic_done; + u32 to_do; + + /* If budget is 0, do all the work */ + if (budget == 0) + budget = INT_MAX; + + /* Find out how much work there is to be done */ + tx->last_nic_done = gve_tx_load_event_counter(priv, tx); + nic_done = be32_to_cpu(tx->last_nic_done); + if (budget > 0) { + /* Do as much work as we have that the budget will + * allow + */ + to_do = min_t(u32, (nic_done - tx->done), budget); + gve_clean_tx_done(priv, tx, to_do, true); + } + /* If we still have work we want to repoll */ + repoll |= (nic_done != tx->done); + return repoll; +} diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index a0d780c14e60..3892a2062404 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -46,6 +46,16 @@ config HIP04_ETH If you wish to compile a kernel for a hardware with hisilicon p04 SoC and want to use the internal ethernet then you should answer Y to this. +config HI13X1_GMAC + bool "Hisilicon HI13X1 Network Device Support" + depends on HIP04_ETH + help + If you wish to compile a kernel for a hardware with hisilicon hi13x1_gamc + then you should answer Y to this. This makes this driver suitable for use + on certain boards such as the HI13X1. + + If you are unsure, say N. + config HNS_MDIO tristate select PHYLIB diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index e1f2978506fd..625635771b83 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -16,6 +16,8 @@ #include <linux/mfd/syscon.h> #include <linux/regmap.h> +#define SC_PPE_RESET_DREQ 0x026C + #define PPE_CFG_RX_ADDR 0x100 #define PPE_CFG_POOL_GRP 0x300 #define PPE_CFG_RX_BUF_SIZE 0x400 @@ -33,10 +35,23 @@ #define GE_MODE_CHANGE_REG 0x1b4 #define GE_RECV_CONTROL_REG 0x1e0 #define GE_STATION_MAC_ADDRESS 0x210 -#define PPE_CFG_CPU_ADD_ADDR 0x580 -#define PPE_CFG_MAX_FRAME_LEN_REG 0x408 + #define PPE_CFG_BUS_CTRL_REG 0x424 #define PPE_CFG_RX_CTRL_REG 0x428 + +#if defined(CONFIG_HI13X1_GMAC) +#define PPE_CFG_CPU_ADD_ADDR 0x6D0 +#define PPE_CFG_MAX_FRAME_LEN_REG 0x500 +#define PPE_CFG_RX_PKT_MODE_REG 0x504 +#define PPE_CFG_QOS_VMID_GEN 0x520 +#define PPE_CFG_RX_PKT_INT 0x740 +#define PPE_INTEN 0x700 +#define PPE_INTSTS 0x708 +#define PPE_RINT 0x704 +#define PPE_CFG_STS_MODE 0x880 +#else +#define PPE_CFG_CPU_ADD_ADDR 0x580 +#define PPE_CFG_MAX_FRAME_LEN_REG 0x408 #define PPE_CFG_RX_PKT_MODE_REG 0x438 #define PPE_CFG_QOS_VMID_GEN 0x500 #define PPE_CFG_RX_PKT_INT 0x538 @@ -44,8 +59,12 @@ #define PPE_INTSTS 0x608 #define PPE_RINT 0x604 #define PPE_CFG_STS_MODE 0x700 +#endif /* CONFIG_HI13X1_GMAC */ + #define PPE_HIS_RX_PKT_CNT 0x804 +#define RESET_DREQ_ALL 0xffffffff + /* REG_INTERRUPT */ #define RCV_INT BIT(10) #define RCV_NOBUF BIT(8) @@ -57,8 +76,15 @@ /* TX descriptor config */ #define TX_FREE_MEM BIT(0) #define TX_READ_ALLOC_L3 BIT(1) -#define TX_FINISH_CACHE_INV BIT(2) +#if defined(CONFIG_HI13X1_GMAC) +#define TX_CLEAR_WB BIT(7) +#define TX_RELEASE_TO_PPE BIT(4) +#define TX_FINISH_CACHE_INV BIT(6) +#define TX_POOL_SHIFT 16 +#else #define TX_CLEAR_WB BIT(4) +#define TX_FINISH_CACHE_INV BIT(2) +#endif #define TX_L3_CHECKSUM BIT(5) #define TX_LOOP_BACK BIT(11) @@ -93,18 +119,35 @@ #define GE_RX_PORT_EN BIT(1) #define GE_TX_PORT_EN BIT(2) -#define PPE_CFG_STS_RX_PKT_CNT_RC BIT(12) - #define PPE_CFG_RX_PKT_ALIGN BIT(18) -#define PPE_CFG_QOS_VMID_MODE BIT(14) + +#if defined(CONFIG_HI13X1_GMAC) +#define PPE_CFG_QOS_VMID_GRP_SHIFT 4 +#define PPE_CFG_RX_CTRL_ALIGN_SHIFT 7 +#define PPE_CFG_STS_RX_PKT_CNT_RC BIT(0) +#define PPE_CFG_QOS_VMID_MODE BIT(15) +#define PPE_CFG_BUS_LOCAL_REL (BIT(9) | BIT(15) | BIT(19) | BIT(23)) + +/* buf unit size is cache_line_size, which is 64, so the shift is 6 */ +#define PPE_BUF_SIZE_SHIFT 6 +#define PPE_TX_BUF_HOLD BIT(31) +#define CACHE_LINE_MASK 0x3F +#else #define PPE_CFG_QOS_VMID_GRP_SHIFT 8 +#define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 +#define PPE_CFG_STS_RX_PKT_CNT_RC BIT(12) +#define PPE_CFG_QOS_VMID_MODE BIT(14) +#define PPE_CFG_BUS_LOCAL_REL BIT(14) + +/* buf unit size is 1, so the shift is 6 */ +#define PPE_BUF_SIZE_SHIFT 0 +#define PPE_TX_BUF_HOLD 0 +#endif /* CONFIG_HI13X1_GMAC */ #define PPE_CFG_RX_FIFO_FSFU BIT(11) #define PPE_CFG_RX_DEPTH_SHIFT 16 #define PPE_CFG_RX_START_SHIFT 0 -#define PPE_CFG_RX_CTRL_ALIGN_SHIFT 11 -#define PPE_CFG_BUS_LOCAL_REL BIT(14) #define PPE_CFG_BUS_BIG_ENDIEN BIT(0) #define RX_DESC_NUM 128 @@ -128,26 +171,50 @@ #define HIP04_MIN_TX_COALESCE_FRAMES 100 struct tx_desc { +#if defined(CONFIG_HI13X1_GMAC) + u32 reserved1[2]; + u32 send_addr; + u16 send_size; + u16 data_offset; + u32 reserved2[7]; + u32 cfg; + u32 wb_addr; + u32 reserved3[3]; +#else u32 send_addr; u32 send_size; u32 next_addr; u32 cfg; u32 wb_addr; +#endif } __aligned(64); struct rx_desc { +#if defined(CONFIG_HI13X1_GMAC) + u32 reserved1[3]; + u16 pkt_len; + u16 reserved_16; + u32 reserved2[6]; + u32 pkt_err; + u32 reserved3[5]; +#else u16 reserved_16; u16 pkt_len; u32 reserve1[3]; u32 pkt_err; u32 reserve2[4]; +#endif }; struct hip04_priv { void __iomem *base; +#if defined(CONFIG_HI13X1_GMAC) + void __iomem *sysctrl_base; +#endif int phy_mode; int chan; unsigned int port; + unsigned int group; unsigned int speed; unsigned int duplex; unsigned int reg_inten; @@ -221,6 +288,13 @@ static void hip04_config_port(struct net_device *ndev, u32 speed, u32 duplex) writel_relaxed(val, priv->base + GE_MODE_CHANGE_REG); } +static void hip04_reset_dreq(struct hip04_priv *priv) +{ +#if defined(CONFIG_HI13X1_GMAC) + writel_relaxed(RESET_DREQ_ALL, priv->sysctrl_base + SC_PPE_RESET_DREQ); +#endif +} + static void hip04_reset_ppe(struct hip04_priv *priv) { u32 val, tmp, timeout = 0; @@ -241,14 +315,14 @@ static void hip04_config_fifo(struct hip04_priv *priv) val |= PPE_CFG_STS_RX_PKT_CNT_RC; writel_relaxed(val, priv->base + PPE_CFG_STS_MODE); - val = BIT(priv->port); + val = BIT(priv->group); regmap_write(priv->map, priv->port * 4 + PPE_CFG_POOL_GRP, val); - val = priv->port << PPE_CFG_QOS_VMID_GRP_SHIFT; + val = priv->group << PPE_CFG_QOS_VMID_GRP_SHIFT; val |= PPE_CFG_QOS_VMID_MODE; writel_relaxed(val, priv->base + PPE_CFG_QOS_VMID_GEN); - val = RX_BUF_SIZE; + val = RX_BUF_SIZE >> PPE_BUF_SIZE_SHIFT; regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_BUF_SIZE, val); val = RX_DESC_NUM << PPE_CFG_RX_DEPTH_SHIFT; @@ -285,8 +359,10 @@ static void hip04_config_fifo(struct hip04_priv *priv) val |= GE_RX_STRIP_PAD | GE_RX_PAD_EN; writel_relaxed(val, priv->base + GE_RECV_CONTROL_REG); +#ifndef CONFIG_HI13X1_GMAC val = GE_AUTO_NEG_CTL; writel_relaxed(val, priv->base + GE_TX_LOCAL_PAGE_REG); +#endif } static void hip04_mac_enable(struct net_device *ndev) @@ -329,12 +405,18 @@ static void hip04_mac_disable(struct net_device *ndev) static void hip04_set_xmit_desc(struct hip04_priv *priv, dma_addr_t phys) { - writel(phys, priv->base + PPE_CFG_CPU_ADD_ADDR); + u32 val; + + val = phys >> PPE_BUF_SIZE_SHIFT | PPE_TX_BUF_HOLD; + writel(val, priv->base + PPE_CFG_CPU_ADD_ADDR); } static void hip04_set_recv_desc(struct hip04_priv *priv, dma_addr_t phys) { - regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, phys); + u32 val; + + val = phys >> PPE_BUF_SIZE_SHIFT; + regmap_write(priv->map, priv->port * 4 + PPE_CFG_RX_ADDR, val); } static u32 hip04_recv_cnt(struct hip04_priv *priv) @@ -442,11 +524,20 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) priv->tx_skb[tx_head] = skb; priv->tx_phys[tx_head] = phys; - desc->send_addr = cpu_to_be32(phys); - desc->send_size = cpu_to_be32(skb->len); - desc->cfg = cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); + + desc->send_size = (__force u32)cpu_to_be32(skb->len); +#if defined(CONFIG_HI13X1_GMAC) + desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV + | TX_RELEASE_TO_PPE | priv->port << TX_POOL_SHIFT); + desc->data_offset = (__force u32)cpu_to_be32(phys & CACHE_LINE_MASK); + desc->send_addr = (__force u32)cpu_to_be32(phys & ~CACHE_LINE_MASK); +#else + desc->cfg = (__force u32)cpu_to_be32(TX_CLEAR_WB | TX_FINISH_CACHE_INV); + desc->send_addr = (__force u32)cpu_to_be32(phys); +#endif phys = priv->tx_desc_dma + tx_head * sizeof(struct tx_desc); - desc->wb_addr = cpu_to_be32(phys); + desc->wb_addr = (__force u32)cpu_to_be32(phys + + offsetof(struct tx_desc, send_addr)); skb_tx_timestamp(skb); hip04_set_xmit_desc(priv, phys); @@ -507,8 +598,8 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) priv->rx_phys[priv->rx_head] = 0; desc = (struct rx_desc *)skb->data; - len = be16_to_cpu(desc->pkt_len); - err = be32_to_cpu(desc->pkt_err); + len = be16_to_cpu((__force __be16)desc->pkt_len); + err = be32_to_cpu((__force __be32)desc->pkt_err); if (0 == len) { dev_kfree_skb_any(skb); @@ -828,7 +919,16 @@ static int hip04_mac_probe(struct platform_device *pdev) goto init_fail; } - ret = of_parse_phandle_with_fixed_args(node, "port-handle", 2, 0, &arg); +#if defined(CONFIG_HI13X1_GMAC) + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + priv->sysctrl_base = devm_ioremap_resource(d, res); + if (IS_ERR(priv->sysctrl_base)) { + ret = PTR_ERR(priv->sysctrl_base); + goto init_fail; + } +#endif + + ret = of_parse_phandle_with_fixed_args(node, "port-handle", 3, 0, &arg); if (ret < 0) { dev_warn(d, "no port-handle\n"); goto init_fail; @@ -836,6 +936,7 @@ static int hip04_mac_probe(struct platform_device *pdev) priv->port = arg.args[0]; priv->chan = arg.args[1] * RX_DESC_NUM; + priv->group = arg.args[2]; hrtimer_init(&priv->tx_coalesce_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); @@ -896,6 +997,7 @@ static int hip04_mac_probe(struct platform_device *pdev) ndev->irq = irq; netif_napi_add(ndev, &priv->napi, hip04_rx_poll, NAPI_POLL_WEIGHT); + hip04_reset_dreq(priv); hip04_reset_ppe(priv); if (priv->phy_mode == PHY_INTERFACE_MODE_MII) hip04_config_port(ndev, SPEED_100, DUPLEX_FULL); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index fe879c07ae3c..2235dd55fab2 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -2370,6 +2370,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev) ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6; + ndev->vlan_features |= NETIF_F_TSO | NETIF_F_TSO6; ndev->max_mtu = MAC_MAX_MTU_V2 - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN); break; diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 83e19c6b974e..8ad5292eebbe 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -69,7 +69,7 @@ enum hclge_mbx_vlan_cfg_subcode { }; #define HCLGE_MBX_MAX_MSG_SIZE 16 -#define HCLGE_MBX_MAX_RESP_DATA_SIZE 16 +#define HCLGE_MBX_MAX_RESP_DATA_SIZE 8 #define HCLGE_MBX_RING_MAP_BASIC_MSG_NUM 3 #define HCLGE_MBX_RING_NODE_VARIABLE_NUM 3 diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index fa8b8506b120..908d4f45c06a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -16,21 +16,18 @@ static LIST_HEAD(hnae3_ae_dev_list); */ static DEFINE_MUTEX(hnae3_common_lock); -static bool hnae3_client_match(enum hnae3_client_type client_type, - enum hnae3_dev_type dev_type) +static bool hnae3_client_match(enum hnae3_client_type client_type) { - if ((dev_type == HNAE3_DEV_KNIC) && (client_type == HNAE3_CLIENT_KNIC || - client_type == HNAE3_CLIENT_ROCE)) - return true; - - if (dev_type == HNAE3_DEV_UNIC && client_type == HNAE3_CLIENT_UNIC) + if (client_type == HNAE3_CLIENT_KNIC || + client_type == HNAE3_CLIENT_ROCE) return true; return false; } void hnae3_set_client_init_flag(struct hnae3_client *client, - struct hnae3_ae_dev *ae_dev, int inited) + struct hnae3_ae_dev *ae_dev, + unsigned int inited) { if (!client || !ae_dev) return; @@ -39,9 +36,6 @@ void hnae3_set_client_init_flag(struct hnae3_client *client, case HNAE3_CLIENT_KNIC: hnae3_set_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B, inited); break; - case HNAE3_CLIENT_UNIC: - hnae3_set_bit(ae_dev->flag, HNAE3_UNIC_CLIENT_INITED_B, inited); - break; case HNAE3_CLIENT_ROCE: hnae3_set_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B, inited); break; @@ -61,10 +55,6 @@ static int hnae3_get_client_init_flag(struct hnae3_client *client, inited = hnae3_get_bit(ae_dev->flag, HNAE3_KNIC_CLIENT_INITED_B); break; - case HNAE3_CLIENT_UNIC: - inited = hnae3_get_bit(ae_dev->flag, - HNAE3_UNIC_CLIENT_INITED_B); - break; case HNAE3_CLIENT_ROCE: inited = hnae3_get_bit(ae_dev->flag, HNAE3_ROCE_CLIENT_INITED_B); @@ -82,7 +72,7 @@ static int hnae3_init_client_instance(struct hnae3_client *client, int ret; /* check if this client matches the type of ae_dev */ - if (!(hnae3_client_match(client->type, ae_dev->dev_type) && + if (!(hnae3_client_match(client->type) && hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) { return 0; } @@ -99,7 +89,7 @@ static void hnae3_uninit_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { /* check if this client matches the type of ae_dev */ - if (!(hnae3_client_match(client->type, ae_dev->dev_type) && + if (!(hnae3_client_match(client->type) && hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B))) return; @@ -251,6 +241,7 @@ void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo) ae_algo->ops->uninit_ae_dev(ae_dev); hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0); + ae_dev->ops = NULL; } list_del(&ae_algo->node); @@ -351,6 +342,7 @@ void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev) ae_algo->ops->uninit_ae_dev(ae_dev); hnae3_set_bit(ae_dev->flag, HNAE3_DEV_INITED_B, 0); + ae_dev->ops = NULL; } list_del(&ae_dev->node); diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index ad21b0ef1946..48c7b70fc2c4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -102,15 +102,9 @@ enum hnae3_loop { enum hnae3_client_type { HNAE3_CLIENT_KNIC, - HNAE3_CLIENT_UNIC, HNAE3_CLIENT_ROCE, }; -enum hnae3_dev_type { - HNAE3_DEV_KNIC, - HNAE3_DEV_UNIC, -}; - /* mac media type */ enum hnae3_media_type { HNAE3_MEDIA_TYPE_UNKNOWN, @@ -154,7 +148,6 @@ enum hnae3_reset_type { HNAE3_VF_FULL_RESET, HNAE3_FLR_RESET, HNAE3_FUNC_RESET, - HNAE3_CORE_RESET, HNAE3_GLOBAL_RESET, HNAE3_IMP_RESET, HNAE3_UNKNOWN_RESET, @@ -220,8 +213,7 @@ struct hnae3_ae_dev { const struct hnae3_ae_ops *ops; struct list_head node; u32 flag; - u8 override_pci_need_reset; /* fix to stop multiple reset happening */ - enum hnae3_dev_type dev_type; + unsigned long hw_err_reset_req; enum hnae3_reset_type reset_type; void *priv; }; @@ -271,6 +263,8 @@ struct hnae3_ae_dev { * get auto autonegotiation of pause frame use * restart_autoneg() * restart autonegotiation + * halt_autoneg() + * halt/resume autonegotiation when autonegotiation on * get_coalesce_usecs() * get usecs to delay a TX interrupt after a packet is sent * get_rx_max_coalesced_frames() @@ -339,10 +333,14 @@ struct hnae3_ae_dev { * Set vlan filter config of Ports * set_vf_vlan_filter() * Set vlan filter config of vf + * restore_vlan_table() + * Restore vlan filter entries after reset * enable_hw_strip_rxvtag() * Enable/disable hardware strip vlan tag of packets received * set_gro_en * Enable/disable HW GRO + * add_arfs_entry + * Check the 5-tuples of flow, and create flow director rule */ struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); @@ -386,6 +384,7 @@ struct hnae3_ae_ops { int (*set_autoneg)(struct hnae3_handle *handle, bool enable); int (*get_autoneg)(struct hnae3_handle *handle); int (*restart_autoneg)(struct hnae3_handle *handle); + int (*halt_autoneg)(struct hnae3_handle *handle, bool halt); void (*get_coalesce_usecs)(struct hnae3_handle *handle, u32 *tx_usecs, u32 *rx_usecs); @@ -463,6 +462,8 @@ struct hnae3_ae_ops { u16 vlan, u8 qos, __be16 proto); int (*enable_hw_strip_rxvtag)(struct hnae3_handle *handle, bool enable); void (*reset_event)(struct pci_dev *pdev, struct hnae3_handle *handle); + enum hnae3_reset_type (*get_reset_level)(struct hnae3_ae_dev *ae_dev, + unsigned long *addr); void (*set_default_reset_request)(struct hnae3_ae_dev *ae_dev, enum hnae3_reset_type rst_type); void (*get_channels)(struct hnae3_handle *handle, @@ -492,7 +493,9 @@ struct hnae3_ae_ops { struct ethtool_rxnfc *cmd, u32 *rule_locs); int (*restore_fd_rules)(struct hnae3_handle *handle); void (*enable_fd)(struct hnae3_handle *handle, bool enable); - int (*dbg_run_cmd)(struct hnae3_handle *handle, char *cmd_buf); + int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id, + u16 flow_id, struct flow_keys *fkeys); + int (*dbg_run_cmd)(struct hnae3_handle *handle, const char *cmd_buf); pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev); bool (*get_hw_reset_stat)(struct hnae3_handle *handle); bool (*ae_dev_resetting)(struct hnae3_handle *handle); @@ -502,6 +505,7 @@ struct hnae3_ae_ops { void (*set_timer_task)(struct hnae3_handle *handle, bool enable); int (*mac_connect_phy)(struct hnae3_handle *handle); void (*mac_disconnect_phy)(struct hnae3_handle *handle); + void (*restore_vlan_table)(struct hnae3_handle *handle); }; struct hnae3_dcb_ops { @@ -643,5 +647,6 @@ void hnae3_unregister_client(struct hnae3_client *client); int hnae3_register_client(struct hnae3_client *client); void hnae3_set_client_init_flag(struct hnae3_client *client, - struct hnae3_ae_dev *ae_dev, int inited); + struct hnae3_ae_dev *ae_dev, + unsigned int inited); #endif diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c index b6fabbbdfd5b..d2ec4c573bf8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_dcbnl.c @@ -4,8 +4,7 @@ #include "hnae3.h" #include "hns3_enet.h" -static -int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) +static int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) { struct hnae3_handle *h = hns3_get_handle(ndev); @@ -18,8 +17,7 @@ int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets) return -EOPNOTSUPP; } -static -int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) +static int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) { struct hnae3_handle *h = hns3_get_handle(ndev); @@ -32,8 +30,7 @@ int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets) return -EOPNOTSUPP; } -static -int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) +static int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) { struct hnae3_handle *h = hns3_get_handle(ndev); @@ -46,8 +43,7 @@ int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc) return -EOPNOTSUPP; } -static -int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc) +static int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc) { struct hnae3_handle *h = hns3_get_handle(ndev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index fc4917ac44be..a4b937286f55 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -11,7 +11,8 @@ static struct dentry *hns3_dbgfs_root; -static int hns3_dbg_queue_info(struct hnae3_handle *h, char *cmd_buf) +static int hns3_dbg_queue_info(struct hnae3_handle *h, + const char *cmd_buf) { struct hns3_nic_priv *priv = h->priv; struct hns3_nic_ring_data *ring_data; @@ -155,7 +156,7 @@ static int hns3_dbg_queue_map(struct hnae3_handle *h) return 0; } -static int hns3_dbg_bd_info(struct hnae3_handle *h, char *cmd_buf) +static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf) { struct hns3_nic_priv *priv = h->priv; struct hns3_nic_ring_data *ring_data; @@ -252,6 +253,7 @@ static void hns3_dbg_help(struct hnae3_handle *h) dev_info(&h->pdev->dev, "dump qos buf cfg\n"); dev_info(&h->pdev->dev, "dump mng tbl\n"); dev_info(&h->pdev->dev, "dump reset info\n"); + dev_info(&h->pdev->dev, "dump m7 info\n"); dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n"); dev_info(&h->pdev->dev, "dump mac tnl status\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index f326805543a4..310afa708831 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4,6 +4,9 @@ #include <linux/dma-mapping.h> #include <linux/etherdevice.h> #include <linux/interrupt.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -14,6 +17,7 @@ #include <linux/sctp.h> #include <linux/vermagic.h> #include <net/gre.h> +#include <net/ip6_checksum.h> #include <net/pkt_cls.h> #include <net/tcp.h> #include <net/vxlan.h> @@ -24,8 +28,7 @@ #define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift))) #define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE) -static void hns3_clear_all_ring(struct hnae3_handle *h); -static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h); +static void hns3_clear_all_ring(struct hnae3_handle *h, bool force); static void hns3_remove_hw_addr(struct net_device *netdev); static const char hns3_driver_name[] = "hns3"; @@ -79,23 +82,6 @@ static irqreturn_t hns3_irq_handle(int irq, void *vector) return IRQ_HANDLED; } -/* This callback function is used to set affinity changes to the irq affinity - * masks when the irq_set_affinity_notifier function is used. - */ -static void hns3_nic_irq_affinity_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) -{ - struct hns3_enet_tqp_vector *tqp_vectors = - container_of(notify, struct hns3_enet_tqp_vector, - affinity_notify); - - tqp_vectors->affinity_mask = *mask; -} - -static void hns3_nic_irq_affinity_release(struct kref *ref) -{ -} - static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv) { struct hns3_enet_tqp_vector *tqp_vectors; @@ -107,8 +93,7 @@ static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv) if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED) continue; - /* clear the affinity notifier and affinity mask */ - irq_set_affinity_notifier(tqp_vectors->vector_irq, NULL); + /* clear the affinity mask */ irq_set_affinity_hint(tqp_vectors->vector_irq, NULL); /* release the irq resource */ @@ -153,20 +138,14 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv) tqp_vectors->name[HNAE3_INT_NAME_LEN - 1] = '\0'; ret = request_irq(tqp_vectors->vector_irq, hns3_irq_handle, 0, - tqp_vectors->name, - tqp_vectors); + tqp_vectors->name, tqp_vectors); if (ret) { netdev_err(priv->netdev, "request irq(%d) fail\n", tqp_vectors->vector_irq); + hns3_nic_uninit_irq(priv); return ret; } - tqp_vectors->affinity_notify.notify = - hns3_nic_irq_affinity_notify; - tqp_vectors->affinity_notify.release = - hns3_nic_irq_affinity_release; - irq_set_affinity_notifier(tqp_vectors->vector_irq, - &tqp_vectors->affinity_notify); irq_set_affinity_hint(tqp_vectors->vector_irq, &tqp_vectors->affinity_mask); @@ -297,8 +276,7 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev) ret = netif_set_real_num_tx_queues(netdev, queue_size); if (ret) { netdev_err(netdev, - "netif_set_real_num_tx_queues fail, ret=%d!\n", - ret); + "netif_set_real_num_tx_queues fail, ret=%d!\n", ret); return ret; } @@ -340,6 +318,40 @@ static void hns3_tqp_disable(struct hnae3_queue *tqp) hns3_write_dev(tqp, HNS3_RING_EN_REG, rcb_reg); } +static void hns3_free_rx_cpu_rmap(struct net_device *netdev) +{ +#ifdef CONFIG_RFS_ACCEL + free_irq_cpu_rmap(netdev->rx_cpu_rmap); + netdev->rx_cpu_rmap = NULL; +#endif +} + +static int hns3_set_rx_cpu_rmap(struct net_device *netdev) +{ +#ifdef CONFIG_RFS_ACCEL + struct hns3_nic_priv *priv = netdev_priv(netdev); + struct hns3_enet_tqp_vector *tqp_vector; + int i, ret; + + if (!netdev->rx_cpu_rmap) { + netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->vector_num); + if (!netdev->rx_cpu_rmap) + return -ENOMEM; + } + + for (i = 0; i < priv->vector_num; i++) { + tqp_vector = &priv->tqp_vector[i]; + ret = irq_cpu_rmap_add(netdev->rx_cpu_rmap, + tqp_vector->vector_irq); + if (ret) { + hns3_free_rx_cpu_rmap(netdev); + return ret; + } + } +#endif + return 0; +} + static int hns3_nic_net_up(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -351,11 +363,16 @@ static int hns3_nic_net_up(struct net_device *netdev) if (ret) return ret; + /* the device can work without cpu rmap, only aRFS needs it */ + ret = hns3_set_rx_cpu_rmap(netdev); + if (ret) + netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret); + /* get irq resource for all vectors */ ret = hns3_nic_init_irq(priv); if (ret) { - netdev_err(netdev, "hns init irq failed! ret=%d\n", ret); - return ret; + netdev_err(netdev, "init irq failed! ret=%d\n", ret); + goto free_rmap; } clear_bit(HNS3_NIC_STATE_DOWN, &priv->state); @@ -384,7 +401,8 @@ out_start_err: hns3_vector_disable(&priv->tqp_vector[j]); hns3_nic_uninit_irq(priv); - +free_rmap: + hns3_free_rx_cpu_rmap(netdev); return ret; } @@ -429,16 +447,13 @@ static int hns3_nic_net_open(struct net_device *netdev) ret = hns3_nic_net_up(netdev); if (ret) { - netdev_err(netdev, - "hns net up fail, ret=%d!\n", ret); + netdev_err(netdev, "net up fail, ret=%d!\n", ret); return ret; } kinfo = &h->kinfo; - for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) { - netdev_set_prio_tc_map(netdev, i, - kinfo->prio_tc[i]); - } + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) + netdev_set_prio_tc_map(netdev, i, kinfo->prio_tc[i]); if (h->ae_algo->ops->set_timer_task) h->ae_algo->ops->set_timer_task(priv->ae_handle, true); @@ -447,6 +462,20 @@ static int hns3_nic_net_open(struct net_device *netdev) return 0; } +static void hns3_reset_tx_queue(struct hnae3_handle *h) +{ + struct net_device *ndev = h->kinfo.netdev; + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct netdev_queue *dev_queue; + u32 i; + + for (i = 0; i < h->kinfo.num_tqps; i++) { + dev_queue = netdev_get_tx_queue(ndev, + priv->ring_data[i].queue_index); + netdev_tx_reset_queue(dev_queue); + } +} + static void hns3_nic_net_down(struct net_device *netdev) { struct hns3_nic_priv *priv = netdev_priv(netdev); @@ -467,10 +496,19 @@ static void hns3_nic_net_down(struct net_device *netdev) if (ops->stop) ops->stop(priv->ae_handle); + hns3_free_rx_cpu_rmap(netdev); + /* free irq resources */ hns3_nic_uninit_irq(priv); - hns3_clear_all_ring(priv->ae_handle); + /* delay ring buffer clearing to hns3_reset_notify_uninit_enet + * during reset process, because driver may not be able + * to disable the ring through firmware when downing the netdev. + */ + if (!hns3_nic_resetting(netdev)) + hns3_clear_all_ring(priv->ae_handle, false); + + hns3_reset_tx_queue(priv->ae_handle); } static int hns3_nic_net_stop(struct net_device *netdev) @@ -641,7 +679,7 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen, if (l3.v4->version == 4) l3.v4->check = 0; - /* tunnel packet.*/ + /* tunnel packet */ if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_UDP_TUNNEL | @@ -666,11 +704,11 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen, l3.v4->check = 0; } - /* normal or tunnel packet*/ + /* normal or tunnel packet */ l4_offset = l4.hdr - skb->data; hdr_len = (l4.tcp->doff << 2) + l4_offset; - /* remove payload length from inner pseudo checksum when tso*/ + /* remove payload length from inner pseudo checksum when tso */ l4_paylen = skb->len - l4_offset; csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(l4_paylen)); @@ -778,7 +816,7 @@ static void hns3_set_outer_l2l3l4(struct sk_buff *skb, u8 ol4_proto, hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L3LEN_S, l3_len >> 2); il2_hdr = skb_inner_mac_header(skb); - /* compute OL4 header size, defined in 4 Bytes. */ + /* compute OL4 header size, defined in 4 Bytes */ l4_len = il2_hdr - l4.hdr; hns3_set_field(*ol_type_vlan_len_msec, HNS3_TXD_L4LEN_S, l4_len >> 2); @@ -913,8 +951,9 @@ static int hns3_set_l2l3l4(struct sk_buff *skb, u8 ol4_proto, static void hns3_set_txbd_baseinfo(u16 *bdtp_fe_sc_vld_ra_ri, int frag_end) { /* Config bd buffer end */ - hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, !!frag_end); - hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1); + if (!!frag_end) + hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_FE_B, 1U); + hns3_set_field(*bdtp_fe_sc_vld_ra_ri, HNS3_TXD_VLD_B, 1U); } static int hns3_fill_desc_vtags(struct sk_buff *skb, @@ -988,7 +1027,8 @@ static int hns3_fill_desc_vtags(struct sk_buff *skb, } static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, - int size, int frag_end, enum hns_desc_type type) + unsigned int size, int frag_end, + enum hns_desc_type type) { struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_use]; struct hns3_desc *desc = &ring->desc[ring->next_to_use]; @@ -1038,8 +1078,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, /* Set txbd */ desc->tx.ol_type_vlan_len_msec = cpu_to_le32(ol_type_vlan_len_msec); - desc->tx.type_cs_vlan_tso_len = - cpu_to_le32(type_cs_vlan_tso); + desc->tx.type_cs_vlan_tso_len = cpu_to_le32(type_cs_vlan_tso); desc->tx.paylen = cpu_to_le32(paylen); desc->tx.mss = cpu_to_le16(mss); desc->tx.vlan_tag = cpu_to_le16(inner_vtag); @@ -1086,19 +1125,19 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc_cb->priv = priv; desc_cb->dma = dma + HNS3_MAX_BD_SIZE * k; desc_cb->type = (type == DESC_TYPE_SKB && !k) ? - DESC_TYPE_SKB : DESC_TYPE_PAGE; + DESC_TYPE_SKB : DESC_TYPE_PAGE; /* now, fill the descriptor */ desc->addr = cpu_to_le64(dma + HNS3_MAX_BD_SIZE * k); desc->tx.send_size = cpu_to_le16((k == frag_buf_num - 1) ? - (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE); + (u16)sizeoflast : (u16)HNS3_MAX_BD_SIZE); hns3_set_txbd_baseinfo(&bdtp_fe_sc_vld_ra_ri, frag_end && (k == frag_buf_num - 1) ? 1 : 0); desc->tx.bdtp_fe_sc_vld_ra_ri = cpu_to_le16(bdtp_fe_sc_vld_ra_ri); - /* move ring pointer to next.*/ + /* move ring pointer to next */ ring_ptr_move_fw(ring, next_to_use); desc_cb = &ring->desc_cb[ring->next_to_use]; @@ -1452,12 +1491,10 @@ static void hns3_nic_get_stats64(struct net_device *netdev, start = u64_stats_fetch_begin_irq(&ring->syncp); rx_bytes += ring->stats.rx_bytes; rx_pkts += ring->stats.rx_pkts; - rx_drop += ring->stats.non_vld_descs; rx_drop += ring->stats.l2_err; - rx_errors += ring->stats.non_vld_descs; rx_errors += ring->stats.l2_err; + rx_errors += ring->stats.l3l4_csum_err; rx_crc_errors += ring->stats.l2_err; - rx_crc_errors += ring->stats.l3l4_csum_err; rx_multicast += ring->stats.rx_multicast; rx_length_errors += ring->stats.err_pkt_len; } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); @@ -1493,12 +1530,12 @@ static void hns3_nic_get_stats64(struct net_device *netdev, static int hns3_setup_tc(struct net_device *netdev, void *type_data) { struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; - struct hnae3_handle *h = hns3_get_handle(netdev); - struct hnae3_knic_private_info *kinfo = &h->kinfo; u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map; + struct hnae3_knic_private_info *kinfo; u8 tc = mqprio_qopt->qopt.num_tc; u16 mode = mqprio_qopt->mode; u8 hw = mqprio_qopt->qopt.hw; + struct hnae3_handle *h; if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS && mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0))) @@ -1510,6 +1547,9 @@ static int hns3_setup_tc(struct net_device *netdev, void *type_data) if (!netdev) return -EINVAL; + h = hns3_get_handle(netdev); + kinfo = &h->kinfo; + return (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ? kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP; } @@ -1527,15 +1567,11 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct hnae3_handle *h = hns3_get_handle(netdev); - struct hns3_nic_priv *priv = netdev_priv(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vlan_filter) ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, false); - if (!ret) - set_bit(vid, priv->active_vlans); - return ret; } @@ -1543,33 +1579,11 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct hnae3_handle *h = hns3_get_handle(netdev); - struct hns3_nic_priv *priv = netdev_priv(netdev); int ret = -EIO; if (h->ae_algo->ops->set_vlan_filter) ret = h->ae_algo->ops->set_vlan_filter(h, proto, vid, true); - if (!ret) - clear_bit(vid, priv->active_vlans); - - return ret; -} - -static int hns3_restore_vlan(struct net_device *netdev) -{ - struct hns3_nic_priv *priv = netdev_priv(netdev); - int ret = 0; - u16 vid; - - for_each_set_bit(vid, priv->active_vlans, VLAN_N_VID) { - ret = hns3_vlan_rx_add_vid(netdev, htons(ETH_P_8021Q), vid); - if (ret) { - netdev_err(netdev, "Restore vlan: %d filter, ret:%d\n", - vid, ret); - return ret; - } - } - return ret; } @@ -1581,7 +1595,7 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, if (h->ae_algo->ops->set_vf_vlan_filter) ret = h->ae_algo->ops->set_vf_vlan_filter(h, vf, vlan, - qos, vlan_proto); + qos, vlan_proto); return ret; } @@ -1722,6 +1736,32 @@ static void hns3_nic_net_timeout(struct net_device *ndev) h->ae_algo->ops->reset_event(h->pdev, h); } +#ifdef CONFIG_RFS_ACCEL +static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct hnae3_handle *h = hns3_get_handle(dev); + struct flow_keys fkeys; + + if (!h->ae_algo->ops->add_arfs_entry) + return -EOPNOTSUPP; + + if (skb->encapsulation) + return -EPROTONOSUPPORT; + + if (!skb_flow_dissect_flow_keys(skb, &fkeys, 0)) + return -EPROTONOSUPPORT; + + if ((fkeys.basic.n_proto != htons(ETH_P_IP) && + fkeys.basic.n_proto != htons(ETH_P_IPV6)) || + (fkeys.basic.ip_proto != IPPROTO_TCP && + fkeys.basic.ip_proto != IPPROTO_UDP)) + return -EPROTONOSUPPORT; + + return h->ae_algo->ops->add_arfs_entry(h, rxq_index, flow_id, &fkeys); +} +#endif + static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_open = hns3_nic_net_open, .ndo_stop = hns3_nic_net_stop, @@ -1737,6 +1777,10 @@ static const struct net_device_ops hns3_nic_netdev_ops = { .ndo_vlan_rx_add_vid = hns3_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = hns3_vlan_rx_kill_vid, .ndo_set_vf_vlan = hns3_ndo_set_vf_vlan, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = hns3_rx_flow_steer, +#endif + }; bool hns3_is_phys_func(struct pci_dev *pdev) @@ -1802,8 +1846,7 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct hnae3_ae_dev *ae_dev; int ret; - ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev), - GFP_KERNEL); + ae_dev = devm_kzalloc(&pdev->dev, sizeof(*ae_dev), GFP_KERNEL); if (!ae_dev) { ret = -ENOMEM; return ret; @@ -1811,7 +1854,6 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ae_dev->pdev = pdev; ae_dev->flag = ent->driver_data; - ae_dev->dev_type = HNAE3_DEV_KNIC; ae_dev->reset_type = HNAE3_NONE_RESET; hns3_get_dev_capability(pdev, ae_dev); pci_set_drvdata(pdev, ae_dev); @@ -1895,9 +1937,9 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT; - if (!ae_dev) { + if (!ae_dev || !ae_dev->ops) { dev_err(&pdev->dev, - "Can't recover - error happened during device init\n"); + "Can't recover - error happened before device initialized\n"); return PCI_ERS_RESULT_NONE; } @@ -1912,14 +1954,23 @@ static pci_ers_result_t hns3_error_detected(struct pci_dev *pdev, static pci_ers_result_t hns3_slot_reset(struct pci_dev *pdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); + const struct hnae3_ae_ops *ops; + enum hnae3_reset_type reset_type; struct device *dev = &pdev->dev; - dev_info(dev, "requesting reset due to PCI error\n"); + if (!ae_dev || !ae_dev->ops) + return PCI_ERS_RESULT_NONE; + ops = ae_dev->ops; /* request the reset */ - if (ae_dev->ops->reset_event) { - if (!ae_dev->override_pci_need_reset) - ae_dev->ops->reset_event(pdev, NULL); + if (ops->reset_event) { + if (ae_dev->hw_err_reset_req) { + reset_type = ops->get_reset_level(ae_dev, + &ae_dev->hw_err_reset_req); + ops->set_default_reset_request(ae_dev, reset_type); + dev_info(dev, "requesting reset due to PCI error\n"); + ops->reset_event(pdev, NULL); + } return PCI_ERS_RESULT_RECOVERED; } @@ -2168,7 +2219,7 @@ out_buffer_fail: return ret; } -/* detach a in-used buffer and replace with a reserved one */ +/* detach a in-used buffer and replace with a reserved one */ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, struct hns3_desc_cb *res_cb) { @@ -2181,8 +2232,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) { ring->desc_cb[i].reuse_flag = 0; - ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma - + ring->desc_cb[i].page_offset); + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + + ring->desc_cb[i].page_offset); ring->desc[i].rx.bd_base_info = 0; } @@ -2284,8 +2335,8 @@ static int hns3_desc_unused(struct hns3_enet_ring *ring) return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu; } -static void -hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count) +static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, + int cleand_count) { struct hns3_desc_cb *desc_cb; struct hns3_desc_cb res_cbs; @@ -2338,7 +2389,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, /* Avoid re-using remote pages, or the stack is still using the page * when page_offset rollback to zero, flag default unreuse */ - if (unlikely(page_to_nid(desc_cb->priv) != numa_node_id()) || + if (unlikely(page_to_nid(desc_cb->priv) != numa_mem_id()) || (!desc_cb->page_offset && page_count(desc_cb->priv) > 1)) return; @@ -2347,7 +2398,7 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, if (desc_cb->page_offset + truesize <= hnae3_page_size(ring)) { desc_cb->reuse_flag = 1; - /* Bump ref count on page before it is given*/ + /* Bump ref count on page before it is given */ get_page(desc_cb->priv); } else if (page_count(desc_cb->priv) == 1) { desc_cb->reuse_flag = 1; @@ -2356,13 +2407,13 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, } } -static int hns3_gro_complete(struct sk_buff *skb) +static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) { __be16 type = skb->protocol; struct tcphdr *th; int depth = 0; - while (type == htons(ETH_P_8021Q)) { + while (eth_type_vlan(type)) { struct vlan_hdr *vh; if ((depth + VLAN_HLEN) > skb_headlen(skb)) @@ -2373,10 +2424,24 @@ static int hns3_gro_complete(struct sk_buff *skb) depth += VLAN_HLEN; } + skb_set_network_header(skb, depth); + if (type == htons(ETH_P_IP)) { + const struct iphdr *iph = ip_hdr(skb); + depth += sizeof(struct iphdr); + skb_set_transport_header(skb, depth); + th = tcp_hdr(skb); + th->check = ~tcp_v4_check(skb->len - depth, iph->saddr, + iph->daddr, 0); } else if (type == htons(ETH_P_IPV6)) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + depth += sizeof(struct ipv6hdr); + skb_set_transport_header(skb, depth); + th = tcp_hdr(skb); + th->check = ~tcp_v6_check(skb->len - depth, &iph->saddr, + &iph->daddr, 0); } else { netdev_err(skb->dev, "Error: FW GRO supports only IPv4/IPv6, not 0x%04x, depth: %d\n", @@ -2384,13 +2449,16 @@ static int hns3_gro_complete(struct sk_buff *skb) return -EFAULT; } - th = (struct tcphdr *)(skb->data + depth); skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; if (th->cwr) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (l234info & BIT(HNS3_RXD_GRO_FIXID_B)) + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + skb->csum_start = (unsigned char *)th - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + skb->ip_summed = CHECKSUM_PARTIAL; return 0; } @@ -2508,7 +2576,7 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring, } } -static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length, +static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, unsigned char *va) { #define HNS3_NEED_ADD_FRAG 1 @@ -2537,7 +2605,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length, memcpy(__skb_put(skb, length), va, ALIGN(length, sizeof(long))); /* We can reuse buffer as-is, just make sure it is local */ - if (likely(page_to_nid(desc_cb->priv) == numa_node_id())) + if (likely(page_to_nid(desc_cb->priv) == numa_mem_id())) desc_cb->reuse_flag = 1; else /* This page cannot be reused so discard it */ put_page(desc_cb->priv); @@ -2574,7 +2642,7 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc, */ if (pending) { pre_bd = (ring->next_to_clean - 1 + ring->desc_num) % - ring->desc_num; + ring->desc_num; pre_desc = &ring->desc[pre_bd]; bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info); } else { @@ -2628,21 +2696,22 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, u32 l234info, u32 bd_base_info, u32 ol_info) { - u16 gro_count; u32 l3_type; - gro_count = hnae3_get_field(l234info, HNS3_RXD_GRO_COUNT_M, - HNS3_RXD_GRO_COUNT_S); + skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info, + HNS3_RXD_GRO_SIZE_M, + HNS3_RXD_GRO_SIZE_S); /* if there is no HW GRO, do not set gro params */ - if (!gro_count) { + if (!skb_shinfo(skb)->gso_size) { hns3_rx_checksum(ring, skb, l234info, bd_base_info, ol_info); return 0; } - NAPI_GRO_CB(skb)->count = gro_count; + NAPI_GRO_CB(skb)->count = hnae3_get_field(l234info, + HNS3_RXD_GRO_COUNT_M, + HNS3_RXD_GRO_COUNT_S); - l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, - HNS3_RXD_L3ID_S); + l3_type = hnae3_get_field(l234info, HNS3_RXD_L3ID_M, HNS3_RXD_L3ID_S); if (l3_type == HNS3_L3_TYPE_IPV4) skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; else if (l3_type == HNS3_L3_TYPE_IPV6) @@ -2650,11 +2719,7 @@ static int hns3_set_gro_and_checksum(struct hns3_enet_ring *ring, else return -EFAULT; - skb_shinfo(skb)->gso_size = hnae3_get_field(bd_base_info, - HNS3_RXD_GRO_SIZE_M, - HNS3_RXD_GRO_SIZE_S); - - return hns3_gro_complete(skb); + return hns3_gro_complete(skb, l234info); } static void hns3_set_rx_skb_rss_type(struct hns3_enet_ring *ring, @@ -2703,14 +2768,6 @@ static int hns3_handle_bdinfo(struct hns3_enet_ring *ring, struct sk_buff *skb) vlan_tag); } - if (unlikely(!(bd_base_info & BIT(HNS3_RXD_VLD_B)))) { - u64_stats_update_begin(&ring->syncp); - ring->stats.non_vld_descs++; - u64_stats_update_end(&ring->syncp); - - return -EINVAL; - } - if (unlikely(!desc->rx.pkt_len || (l234info & (BIT(HNS3_RXD_TRUNCAT_B) | BIT(HNS3_RXD_L2E_B))))) { u64_stats_update_begin(&ring->syncp); @@ -2762,8 +2819,8 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, struct sk_buff *skb = ring->skb; struct hns3_desc_cb *desc_cb; struct hns3_desc *desc; + unsigned int length; u32 bd_base_info; - int length; int ret; desc = &ring->desc[ring->next_to_clean]; @@ -2828,14 +2885,14 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring, return ret; } + skb_record_rx_queue(skb, ring->tqp->tqp_index); *out_skb = skb; return 0; } -int hns3_clean_rx_ring( - struct hns3_enet_ring *ring, int budget, - void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *)) +int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, + void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *)) { #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16 int recv_pkts, recv_bds, clean_count, err; @@ -2887,42 +2944,25 @@ int hns3_clean_rx_ring( out: /* Make all data has been write before submit */ if (clean_count + unused_count > 0) - hns3_nic_alloc_rx_buffers(ring, - clean_count + unused_count); + hns3_nic_alloc_rx_buffers(ring, clean_count + unused_count); return recv_pkts; } -static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group) +static bool hns3_get_new_flow_lvl(struct hns3_enet_ring_group *ring_group) { - struct hns3_enet_tqp_vector *tqp_vector = - ring_group->ring->tqp_vector; +#define HNS3_RX_LOW_BYTE_RATE 10000 +#define HNS3_RX_MID_BYTE_RATE 20000 +#define HNS3_RX_ULTRA_PACKET_RATE 40 + enum hns3_flow_level_range new_flow_level; - int packets_per_msecs; - int bytes_per_msecs; + struct hns3_enet_tqp_vector *tqp_vector; + int packets_per_msecs, bytes_per_msecs; u32 time_passed_ms; - u16 new_int_gl; - - if (!tqp_vector->last_jiffies) - return false; - - if (ring_group->total_packets == 0) { - ring_group->coal.int_gl = HNS3_INT_GL_50K; - ring_group->coal.flow_level = HNS3_FLOW_LOW; - return true; - } - /* Simple throttlerate management - * 0-10MB/s lower (50000 ints/s) - * 10-20MB/s middle (20000 ints/s) - * 20-1249MB/s high (18000 ints/s) - * > 40000pps ultra (8000 ints/s) - */ - new_flow_level = ring_group->coal.flow_level; - new_int_gl = ring_group->coal.int_gl; + tqp_vector = ring_group->ring->tqp_vector; time_passed_ms = jiffies_to_msecs(jiffies - tqp_vector->last_jiffies); - if (!time_passed_ms) return false; @@ -2932,9 +2972,14 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group) do_div(ring_group->total_bytes, time_passed_ms); bytes_per_msecs = ring_group->total_bytes; -#define HNS3_RX_LOW_BYTE_RATE 10000 -#define HNS3_RX_MID_BYTE_RATE 20000 + new_flow_level = ring_group->coal.flow_level; + /* Simple throttlerate management + * 0-10MB/s lower (50000 ints/s) + * 10-20MB/s middle (20000 ints/s) + * 20-1249MB/s high (18000 ints/s) + * > 40000pps ultra (8000 ints/s) + */ switch (new_flow_level) { case HNS3_FLOW_LOW: if (bytes_per_msecs > HNS3_RX_LOW_BYTE_RATE) @@ -2954,13 +2999,40 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group) break; } -#define HNS3_RX_ULTRA_PACKET_RATE 40 - if (packets_per_msecs > HNS3_RX_ULTRA_PACKET_RATE && &tqp_vector->rx_group == ring_group) new_flow_level = HNS3_FLOW_ULTRA; - switch (new_flow_level) { + ring_group->total_bytes = 0; + ring_group->total_packets = 0; + ring_group->coal.flow_level = new_flow_level; + + return true; +} + +static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group) +{ + struct hns3_enet_tqp_vector *tqp_vector; + u16 new_int_gl; + + if (!ring_group->ring) + return false; + + tqp_vector = ring_group->ring->tqp_vector; + if (!tqp_vector->last_jiffies) + return false; + + if (ring_group->total_packets == 0) { + ring_group->coal.int_gl = HNS3_INT_GL_50K; + ring_group->coal.flow_level = HNS3_FLOW_LOW; + return true; + } + + if (!hns3_get_new_flow_lvl(ring_group)) + return false; + + new_int_gl = ring_group->coal.int_gl; + switch (ring_group->coal.flow_level) { case HNS3_FLOW_LOW: new_int_gl = HNS3_INT_GL_50K; break; @@ -2977,9 +3049,6 @@ static bool hns3_get_new_int_gl(struct hns3_enet_ring_group *ring_group) break; } - ring_group->total_bytes = 0; - ring_group->total_packets = 0; - ring_group->coal.flow_level = new_flow_level; if (new_int_gl != ring_group->coal.int_gl) { ring_group->coal.int_gl = new_int_gl; return true; @@ -3280,6 +3349,7 @@ static int hns3_nic_alloc_vector_data(struct hns3_nic_priv *priv) if (!vector) return -ENOMEM; + /* save the actual available vector number */ vector_num = h->ae_algo->ops->get_vector(h, vector_num, vector); priv->vector_num = vector_num; @@ -3331,8 +3401,6 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv) hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain); if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) { - irq_set_affinity_notifier(tqp_vector->vector_irq, - NULL); irq_set_affinity_hint(tqp_vector->vector_irq, NULL); free_irq(tqp_vector->vector_irq, tqp_vector); tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED; @@ -3364,7 +3432,7 @@ static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv) } static int hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv, - int ring_type) + unsigned int ring_type) { struct hns3_nic_ring_data *ring_data = priv->ring_data; int queue_num = priv->ae_handle->kinfo.num_tqps; @@ -3550,8 +3618,7 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring) struct hnae3_queue *q = ring->tqp; if (!HNAE3_IS_TX_RING(ring)) { - hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG, - (u32)dma); + hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_L_REG, (u32)dma); hns3_write_dev(q, HNS3_RING_RX_RING_BASEADDR_H_REG, (u32)((dma >> 31) >> 1)); @@ -3851,6 +3918,8 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) hns3_client_stop(handle); + hns3_uninit_phy(netdev); + if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) { netdev_warn(netdev, "already uninitialized\n"); goto out_netdev_free; @@ -3858,9 +3927,7 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset) hns3_del_all_fd_rules(netdev, true); - hns3_force_clear_all_rx_ring(handle); - - hns3_uninit_phy(netdev); + hns3_clear_all_ring(handle, true); hns3_nic_uninit_vector_data(priv); @@ -3997,8 +4064,7 @@ static int hns3_clear_rx_ring(struct hns3_enet_ring *ring) ret); return ret; } - hns3_replace_buffer(ring, ring->next_to_use, - &res_cbs); + hns3_replace_buffer(ring, ring->next_to_use, &res_cbs); } ring_ptr_move_fw(ring, next_to_use); } @@ -4030,40 +4096,26 @@ static void hns3_force_clear_rx_ring(struct hns3_enet_ring *ring) } } -static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h) +static void hns3_clear_all_ring(struct hnae3_handle *h, bool force) { struct net_device *ndev = h->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hns3_enet_ring *ring; u32 i; for (i = 0; i < h->kinfo.num_tqps; i++) { - ring = priv->ring_data[i + h->kinfo.num_tqps].ring; - hns3_force_clear_rx_ring(ring); - } -} - -static void hns3_clear_all_ring(struct hnae3_handle *h) -{ - struct net_device *ndev = h->kinfo.netdev; - struct hns3_nic_priv *priv = netdev_priv(ndev); - u32 i; - - for (i = 0; i < h->kinfo.num_tqps; i++) { - struct netdev_queue *dev_queue; struct hns3_enet_ring *ring; ring = priv->ring_data[i].ring; hns3_clear_tx_ring(ring); - dev_queue = netdev_get_tx_queue(ndev, - priv->ring_data[i].queue_index); - netdev_tx_reset_queue(dev_queue); ring = priv->ring_data[i + h->kinfo.num_tqps].ring; /* Continue to clear other rings even if clearing some * rings failed. */ - hns3_clear_rx_ring(ring); + if (force) + hns3_force_clear_rx_ring(ring); + else + hns3_clear_rx_ring(ring); } } @@ -4173,7 +4225,7 @@ static int hns3_reset_notify_up_enet(struct hnae3_handle *handle) if (ret) { set_bit(HNS3_NIC_STATE_RESETTING, &priv->state); netdev_err(kinfo->netdev, - "hns net up fail, ret=%d!\n", ret); + "net up fail, ret=%d!\n", ret); return ret; } } @@ -4251,12 +4303,8 @@ static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle) vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true; hns3_enable_vlan_filter(netdev, vlan_filter_enable); - /* Hardware table is only clear when pf resets */ - if (!(handle->flags & HNAE3_SUPPORT_VF)) { - ret = hns3_restore_vlan(netdev); - if (ret) - return ret; - } + if (handle->ae_algo->ops->restore_vlan_table) + handle->ae_algo->ops->restore_vlan_table(handle); return hns3_restore_fd_rules(netdev); } @@ -4272,7 +4320,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) return 0; } - hns3_force_clear_all_rx_ring(handle); + hns3_clear_all_ring(handle, true); + hns3_reset_tx_queue(priv->ae_handle); hns3_nic_uninit_vector_data(priv); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index c14480f9b625..848b866761df 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -145,7 +145,7 @@ enum hns3_nic_state { #define HNS3_RXD_TSIND_M (0x7 << HNS3_RXD_TSIND_S) #define HNS3_RXD_LKBK_B 15 #define HNS3_RXD_GRO_SIZE_S 16 -#define HNS3_RXD_GRO_SIZE_M (0x3ff << HNS3_RXD_GRO_SIZE_S) +#define HNS3_RXD_GRO_SIZE_M (0x3fff << HNS3_RXD_GRO_SIZE_S) #define HNS3_TXD_L3T_S 0 #define HNS3_TXD_L3T_M (0x3 << HNS3_TXD_L3T_S) @@ -384,7 +384,6 @@ struct ring_stats { u64 rx_err_cnt; u64 reuse_pg_cnt; u64 err_pkt_len; - u64 non_vld_descs; u64 err_bd_num; u64 l2_err; u64 l3l4_csum_err; @@ -417,7 +416,7 @@ struct hns3_enet_ring { */ int next_to_clean; - int pull_len; /* head length for current packet */ + u32 pull_len; /* head length for current packet */ u32 frag_num; unsigned char *va; /* first buffer address for current packet */ @@ -446,25 +445,6 @@ enum hns3_flow_level_range { HNS3_FLOW_ULTRA = 3, }; -enum hns3_link_mode_bits { - HNS3_LM_FIBRE_BIT = BIT(0), - HNS3_LM_AUTONEG_BIT = BIT(1), - HNS3_LM_TP_BIT = BIT(2), - HNS3_LM_PAUSE_BIT = BIT(3), - HNS3_LM_BACKPLANE_BIT = BIT(4), - HNS3_LM_10BASET_HALF_BIT = BIT(5), - HNS3_LM_10BASET_FULL_BIT = BIT(6), - HNS3_LM_100BASET_HALF_BIT = BIT(7), - HNS3_LM_100BASET_FULL_BIT = BIT(8), - HNS3_LM_1000BASET_FULL_BIT = BIT(9), - HNS3_LM_10000BASEKR_FULL_BIT = BIT(10), - HNS3_LM_25000BASEKR_FULL_BIT = BIT(11), - HNS3_LM_40000BASELR4_FULL_BIT = BIT(12), - HNS3_LM_50000BASEKR2_FULL_BIT = BIT(13), - HNS3_LM_100000BASEKR4_FULL_BIT = BIT(14), - HNS3_LM_COUNT = 15 -}; - #define HNS3_INT_GL_MAX 0x1FE0 #define HNS3_INT_GL_50K 0x0014 #define HNS3_INT_GL_20K 0x0032 @@ -550,7 +530,6 @@ struct hns3_nic_priv { struct notifier_block notifier_block; /* Vxlan/Geneve information */ struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX]; - unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct hns3_enet_coalesce tx_coal; struct hns3_enet_coalesce rx_coal; }; @@ -631,7 +610,7 @@ static inline bool hns3_nic_resetting(struct net_device *netdev) #define hnae3_buf_size(_ring) ((_ring)->buf_size) #define hnae3_page_order(_ring) (get_order(hnae3_buf_size(_ring))) -#define hnae3_page_size(_ring) (PAGE_SIZE << hnae3_page_order(_ring)) +#define hnae3_page_size(_ring) (PAGE_SIZE << (u32)hnae3_page_order(_ring)) /* iterator for handling rings in ring group */ #define hns3_for_each_ring(pos, head) \ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index d1588ea6132c..5bff98a9b0dc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -44,7 +44,6 @@ static const struct hns3_stats hns3_rxq_stats[] = { HNS3_TQP_STAT("errors", rx_err_cnt), HNS3_TQP_STAT("reuse_pg_cnt", reuse_pg_cnt), HNS3_TQP_STAT("err_pkt_len", err_pkt_len), - HNS3_TQP_STAT("non_vld_descs", non_vld_descs), HNS3_TQP_STAT("err_bd_num", err_bd_num), HNS3_TQP_STAT("l2_err", l2_err), HNS3_TQP_STAT("l3l4_csum_err", l3l4_csum_err), @@ -60,6 +59,7 @@ static const struct hns3_stats hns3_rxq_stats[] = { #define HNS3_NIC_LB_TEST_PKT_NUM 1 #define HNS3_NIC_LB_TEST_RING_ID 0 #define HNS3_NIC_LB_TEST_PACKET_SIZE 128 +#define HNS3_NIC_LB_SETUP_USEC 10000 /* Nic loopback test err */ #define HNS3_NIC_LB_TEST_NO_MEM_ERR 1 @@ -117,7 +117,7 @@ static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode) return ret; ret = hns3_lp_setup(ndev, loop_mode, true); - usleep_range(10000, 20000); + usleep_range(HNS3_NIC_LB_SETUP_USEC, HNS3_NIC_LB_SETUP_USEC * 2); return ret; } @@ -132,7 +132,7 @@ static int hns3_lp_down(struct net_device *ndev, enum hnae3_loop loop_mode) return ret; } - usleep_range(10000, 20000); + usleep_range(HNS3_NIC_LB_SETUP_USEC, HNS3_NIC_LB_SETUP_USEC * 2); return 0; } @@ -149,6 +149,12 @@ static void hns3_lp_setup_skb(struct sk_buff *skb) packet = skb_put(skb, HNS3_NIC_LB_TEST_PACKET_SIZE); memcpy(ethh->h_dest, ndev->dev_addr, ETH_ALEN); + + /* The dst mac addr of loopback packet is the same as the host' + * mac addr, the SSU component may loop back the packet to host + * before the packet reaches mac or serdes, which will defect + * the purpose of mac or serdes selftest. + */ ethh->h_dest[5] += 0x1f; eth_zero_addr(ethh->h_source); ethh->h_proto = htons(ETH_P_ARP); @@ -243,11 +249,13 @@ static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode) skb_get(skb); tx_ret = hns3_nic_net_xmit(skb, ndev); - if (tx_ret == NETDEV_TX_OK) + if (tx_ret == NETDEV_TX_OK) { good_cnt++; - else + } else { + kfree_skb(skb); netdev_err(ndev, "hns3_lb_run_test xmit failed: %d\n", tx_ret); + } } if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) { ret_val = HNS3_NIC_LB_TEST_TX_CNT_ERR; @@ -327,6 +335,13 @@ static void hns3_self_test(struct net_device *ndev, h->ae_algo->ops->enable_vlan_filter(h, false); #endif + /* Tell firmware to stop mac autoneg before loopback test start, + * otherwise loopback test may be failed when the port is still + * negotiating. + */ + if (h->ae_algo->ops->halt_autoneg) + h->ae_algo->ops->halt_autoneg(h, true); + set_bit(HNS3_NIC_STATE_TESTING, &priv->state); for (i = 0; i < HNS3_SELF_TEST_TYPE_NUM; i++) { @@ -349,6 +364,9 @@ static void hns3_self_test(struct net_device *ndev, clear_bit(HNS3_NIC_STATE_TESTING, &priv->state); + if (h->ae_algo->ops->halt_autoneg) + h->ae_algo->ops->halt_autoneg(h, false); + #if IS_ENABLED(CONFIG_VLAN_8021Q) if (dis_vlan_filter) h->ae_algo->ops->enable_vlan_filter(h, true); @@ -435,7 +453,7 @@ static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data) switch (stringset) { case ETH_SS_STATS: buff = hns3_get_strings_tqps(h, buff); - h->ae_algo->ops->get_strings(h, stringset, (u8 *)buff); + ops->get_strings(h, stringset, (u8 *)buff); break; case ETH_SS_TEST: ops->get_strings(h, stringset, data); @@ -510,6 +528,11 @@ static void hns3_get_drvinfo(struct net_device *netdev, struct hns3_nic_priv *priv = netdev_priv(netdev); struct hnae3_handle *h = priv->ae_handle; + if (!h->ae_algo->ops->get_fw_version) { + netdev_err(netdev, "could not get fw version!\n"); + return; + } + strncpy(drvinfo->version, hns3_driver_version, sizeof(drvinfo->version)); drvinfo->version[sizeof(drvinfo->version) - 1] = '\0'; @@ -530,7 +553,7 @@ static u32 hns3_get_link(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); - if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status) + if (h->ae_algo->ops->get_status) return h->ae_algo->ops->get_status(h); else return 0; @@ -560,7 +583,7 @@ static void hns3_get_pauseparam(struct net_device *netdev, { struct hnae3_handle *h = hns3_get_handle(netdev); - if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam) + if (h->ae_algo->ops->get_pauseparam) h->ae_algo->ops->get_pauseparam(h, ¶m->autoneg, ¶m->rx_pause, ¶m->tx_pause); } @@ -610,9 +633,6 @@ static int hns3_get_link_ksettings(struct net_device *netdev, u8 media_type; u8 link_stat; - if (!h->ae_algo || !h->ae_algo->ops) - return -EOPNOTSUPP; - ops = h->ae_algo->ops; if (ops->get_media_type) ops->get_media_type(h, &media_type, &module_type); @@ -740,8 +760,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops || - !h->ae_algo->ops->get_rss_key_size) + if (!h->ae_algo->ops->get_rss_key_size) return 0; return h->ae_algo->ops->get_rss_key_size(h); @@ -751,8 +770,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev) { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops || - !h->ae_algo->ops->get_rss_indir_size) + if (!h->ae_algo->ops->get_rss_indir_size) return 0; return h->ae_algo->ops->get_rss_indir_size(h); @@ -763,7 +781,7 @@ static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key, { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss) + if (!h->ae_algo->ops->get_rss) return -EOPNOTSUPP; return h->ae_algo->ops->get_rss(h, indir, key, hfunc); @@ -774,7 +792,7 @@ static int hns3_set_rss(struct net_device *netdev, const u32 *indir, { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss) + if (!h->ae_algo->ops->set_rss) return -EOPNOTSUPP; if ((h->pdev->revision == 0x20 && @@ -799,9 +817,6 @@ static int hns3_get_rxnfc(struct net_device *netdev, { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops) - return -EOPNOTSUPP; - switch (cmd->cmd) { case ETHTOOL_GRXRINGS: cmd->data = h->kinfo.num_tqps; @@ -915,9 +930,6 @@ static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops) - return -EOPNOTSUPP; - switch (cmd->cmd) { case ETHTOOL_SRXFH: if (h->ae_algo->ops->set_rss_tuple) @@ -1193,7 +1205,7 @@ static int hns3_set_phys_id(struct net_device *netdev, { struct hnae3_handle *h = hns3_get_handle(netdev); - if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_led_id) + if (!h->ae_algo->ops->set_led_id) return -EOPNOTSUPP; return h->ae_algo->ops->set_led_id(h, state); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index fbd904e3077c..22f6acd45d9a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -110,8 +110,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring) hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG, upper_32_bits(dma)); hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG, - (ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) | - HCLGE_NIC_CMQ_ENABLE); + ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S); hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0); hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0); } else { @@ -120,8 +119,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring) hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG, upper_32_bits(dma)); hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG, - (ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) | - HCLGE_NIC_CMQ_ENABLE); + ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S); hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0); hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0); } @@ -175,7 +173,11 @@ static bool hclge_is_special_opcode(u16 opcode) HCLGE_OPC_STATS_MAC, HCLGE_OPC_STATS_MAC_ALL, HCLGE_OPC_QUERY_32_BIT_REG, - HCLGE_OPC_QUERY_64_BIT_REG}; + HCLGE_OPC_QUERY_64_BIT_REG, + HCLGE_QUERY_CLEAR_MPF_RAS_INT, + HCLGE_QUERY_CLEAR_PF_RAS_INT, + HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, + HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT}; int i; for (i = 0; i < ARRAY_SIZE(spec_opcode); i++) { @@ -186,12 +188,43 @@ static bool hclge_is_special_opcode(u16 opcode) return false; } +static int hclge_cmd_convert_err_code(u16 desc_ret) +{ + switch (desc_ret) { + case HCLGE_CMD_EXEC_SUCCESS: + return 0; + case HCLGE_CMD_NO_AUTH: + return -EPERM; + case HCLGE_CMD_NOT_SUPPORTED: + return -EOPNOTSUPP; + case HCLGE_CMD_QUEUE_FULL: + return -EXFULL; + case HCLGE_CMD_NEXT_ERR: + return -ENOSR; + case HCLGE_CMD_UNEXE_ERR: + return -ENOTBLK; + case HCLGE_CMD_PARA_ERR: + return -EINVAL; + case HCLGE_CMD_RESULT_ERR: + return -ERANGE; + case HCLGE_CMD_TIMEOUT: + return -ETIME; + case HCLGE_CMD_HILINK_ERR: + return -ENOLINK; + case HCLGE_CMD_QUEUE_ILLEGAL: + return -ENXIO; + case HCLGE_CMD_INVALID: + return -EBADR; + default: + return -EIO; + } +} + static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc, int num, int ntc) { u16 opcode, desc_ret; int handle; - int retval; opcode = le16_to_cpu(desc[0].opcode); for (handle = 0; handle < num; handle++) { @@ -205,17 +238,9 @@ static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc, else desc_ret = le16_to_cpu(desc[0].retval); - if (desc_ret == HCLGE_CMD_EXEC_SUCCESS) - retval = 0; - else if (desc_ret == HCLGE_CMD_NO_AUTH) - retval = -EPERM; - else if (desc_ret == HCLGE_CMD_NOT_SUPPORTED) - retval = -EOPNOTSUPP; - else - retval = -EIO; hw->cmq.last_status = desc_ret; - return retval; + return hclge_cmd_convert_err_code(desc_ret); } /** @@ -230,6 +255,7 @@ static int hclge_cmd_check_retval(struct hclge_hw *hw, struct hclge_desc *desc, int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) { struct hclge_dev *hdev = container_of(hw, struct hclge_dev, hw); + struct hclge_cmq_ring *csq = &hw->cmq.csq; struct hclge_desc *desc_to_use; bool complete = false; u32 timeout = 0; @@ -239,8 +265,16 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) spin_lock_bh(&hw->cmq.csq.lock); - if (num > hclge_ring_space(&hw->cmq.csq) || - test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) { + if (test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state)) { + spin_unlock_bh(&hw->cmq.csq.lock); + return -EBUSY; + } + + if (num > hclge_ring_space(&hw->cmq.csq)) { + /* If CMDQ ring is full, SW HEAD and HW HEAD may be different, + * need update the SW HEAD pointer csq->next_to_clean + */ + csq->next_to_clean = hclge_read_dev(hw, HCLGE_NIC_CSQ_HEAD_REG); spin_unlock_bh(&hw->cmq.csq.lock); return -EBUSY; } @@ -278,7 +312,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) } if (!complete) { - retval = -EAGAIN; + retval = -EBADE; } else { retval = hclge_cmd_check_retval(hw, desc, num, ntc); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index d79a209b80f6..96840d8f3e24 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -41,6 +41,14 @@ enum hclge_cmd_return_status { HCLGE_CMD_NO_AUTH = 1, HCLGE_CMD_NOT_SUPPORTED = 2, HCLGE_CMD_QUEUE_FULL = 3, + HCLGE_CMD_NEXT_ERR = 4, + HCLGE_CMD_UNEXE_ERR = 5, + HCLGE_CMD_PARA_ERR = 6, + HCLGE_CMD_RESULT_ERR = 7, + HCLGE_CMD_TIMEOUT = 8, + HCLGE_CMD_HILINK_ERR = 9, + HCLGE_CMD_QUEUE_ILLEGAL = 10, + HCLGE_CMD_INVALID = 11, }; enum hclge_cmd_status { @@ -180,6 +188,9 @@ enum hclge_opcode_type { HCLGE_OPC_CFG_COM_TQP_QUEUE = 0x0B20, HCLGE_OPC_RESET_TQP_QUEUE = 0x0B22, + /* PPU commands */ + HCLGE_OPC_PPU_PF_OTHER_INT_DFX = 0x0B4A, + /* TSO command */ HCLGE_OPC_TSO_GENERIC_CONFIG = 0x0C01, HCLGE_OPC_GRO_GENERIC_CONFIG = 0x0C10, @@ -243,6 +254,9 @@ enum hclge_opcode_type { /* NCL config command */ HCLGE_OPC_QUERY_NCL_CONFIG = 0x7011, + /* M7 stats command */ + HCLGE_OPC_M7_STATS_BD = 0x7012, + HCLGE_OPC_M7_STATS_INFO = 0x7013, /* SFP command */ HCLGE_OPC_GET_SFP_INFO = 0x7104, @@ -265,6 +279,8 @@ enum hclge_opcode_type { HCLGE_CONFIG_ROCEE_RAS_INT_EN = 0x1580, HCLGE_QUERY_CLEAR_ROCEE_RAS_INT = 0x1581, HCLGE_ROCEE_PF_RAS_INT_CMD = 0x1584, + HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD = 0x1585, + HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD = 0x1586, HCLGE_IGU_EGU_TNL_INT_EN = 0x1803, HCLGE_IGU_COMMON_INT_EN = 0x1806, HCLGE_TM_QCN_MEM_INT_CFG = 0x1A14, @@ -641,6 +657,11 @@ enum hclge_mac_vlan_tbl_opcode { HCLGE_MAC_VLAN_LKUP, /* Lookup a entry through mac_vlan key */ }; +enum hclge_mac_vlan_add_resp_code { + HCLGE_ADD_UC_OVERFLOW = 2, /* ADD failed for UC overflow */ + HCLGE_ADD_MC_OVERFLOW, /* ADD failed for MC overflow */ +}; + #define HCLGE_MAC_VLAN_BIT0_EN_B 0 #define HCLGE_MAC_VLAN_BIT1_EN_B 1 #define HCLGE_MAC_EPORT_SW_EN_B 12 @@ -674,7 +695,6 @@ struct hclge_umv_spc_alc_cmd { #define HCLGE_MAC_MGR_MASK_VLAN_B BIT(0) #define HCLGE_MAC_MGR_MASK_MAC_B BIT(1) #define HCLGE_MAC_MGR_MASK_ETHERTYPE_B BIT(2) -#define HCLGE_MAC_ETHERTYPE_LLDP 0x88cc struct hclge_mac_mgr_tbl_entry_cmd { u8 flags; @@ -872,7 +892,7 @@ struct hclge_serdes_lb_cmd { #define HCLGE_TOTAL_PKT_BUF 0x108000 /* 1.03125M bytes */ #define HCLGE_DEFAULT_DV 0xA000 /* 40k byte */ #define HCLGE_DEFAULT_NON_DCB_DV 0x7800 /* 30K byte */ -#define HCLGE_NON_DCB_ADDITIONAL_BUF 0x200 /* 512 byte */ +#define HCLGE_NON_DCB_ADDITIONAL_BUF 0x1400 /* 5120 byte */ #define HCLGE_TYPE_CRQ 0 #define HCLGE_TYPE_CSQ 1 @@ -970,6 +990,25 @@ struct hclge_fd_ad_config_cmd { u8 rsv2[8]; }; +struct hclge_get_m7_bd_cmd { + __le32 bd_num; + u8 rsv[20]; +}; + +struct hclge_query_ppu_pf_other_int_dfx_cmd { + __le16 over_8bd_no_fe_qid; + __le16 over_8bd_no_fe_vf_id; + __le16 tso_mss_cmp_min_err_qid; + __le16 tso_mss_cmp_min_err_vf_id; + __le16 tso_mss_cmp_max_err_qid; + __le16 tso_mss_cmp_max_err_vf_id; + __le16 tx_rd_fbd_poison_qid; + __le16 tx_rd_fbd_poison_vf_id; + __le16 rx_rd_fbd_poison_qid; + __le16 rx_rd_fbd_poison_vf_id; + u8 rsv[4]; +}; + int hclge_cmd_init(struct hclge_dev *hdev); static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 1161361a973b..bac4ce13f6ae 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -325,6 +325,8 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) hdev->tm_info.hw_pfc_map = pfc_map; hdev->tm_info.pfc_en = pfc->pfc_en; + hclge_tm_pfc_info_update(hdev); + return hclge_pause_setup_hw(hdev, false); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c index a9ffb57c4607..ab625c757a95 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c @@ -61,9 +61,11 @@ static int hclge_dbg_cmd_send(struct hclge_dev *hdev, static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev, struct hclge_dbg_dfx_message *dfx_message, - char *cmd_buf, int msg_num, int offset, - enum hclge_opcode_type cmd) + const char *cmd_buf, int msg_num, + int offset, enum hclge_opcode_type cmd) { +#define BD_DATA_NUM 6 + struct hclge_desc *desc_src; struct hclge_desc *desc; int bd_num, buf_len; @@ -92,14 +94,16 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev, return; } - max = (bd_num * 6) <= msg_num ? (bd_num * 6) : msg_num; + max = (bd_num * BD_DATA_NUM) <= msg_num ? + (bd_num * BD_DATA_NUM) : msg_num; desc = desc_src; for (i = 0; i < max; i++) { - (((i / 6) > 0) && ((i % 6) == 0)) ? desc++ : desc; + ((i > 0) && ((i % BD_DATA_NUM) == 0)) ? desc++ : desc; if (dfx_message->flag) dev_info(&hdev->pdev->dev, "%s: 0x%x\n", - dfx_message->message, desc->data[i % 6]); + dfx_message->message, + desc->data[i % BD_DATA_NUM]); dfx_message++; } @@ -107,7 +111,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev, kfree(desc_src); } -static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, char *cmd_buf) +static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf) { struct device *dev = &hdev->pdev->dev; struct hclge_dbg_bitmap_cmd *bitmap; @@ -207,7 +211,7 @@ static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, char *cmd_buf) dev_info(dev, "IGU_TX_PRI_MAP_TC_CFG: 0x%x\n", desc[0].data[5]); } -static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, char *cmd_buf) +static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf) { int msg_num; @@ -395,7 +399,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev) if (ret) goto err_tm_pg_cmd_send; - dev_info(&hdev->pdev->dev, "PRI_SCH pg_id: %u\n", desc.data[0]); + dev_info(&hdev->pdev->dev, "PRI_SCH pri_id: %u\n", desc.data[0]); cmd = HCLGE_OPC_TM_QS_SCH_MODE_CFG; hclge_cmd_setup_basic_desc(&desc, cmd, true); @@ -403,7 +407,7 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev) if (ret) goto err_tm_pg_cmd_send; - dev_info(&hdev->pdev->dev, "QS_SCH pg_id: %u\n", desc.data[0]); + dev_info(&hdev->pdev->dev, "QS_SCH qs_id: %u\n", desc.data[0]); cmd = HCLGE_OPC_TM_BP_TO_QSET_MAPPING; hclge_cmd_setup_basic_desc(&desc, cmd, true); @@ -412,9 +416,9 @@ static void hclge_dbg_dump_tm_pg(struct hclge_dev *hdev) goto err_tm_pg_cmd_send; bp_to_qs_map_cmd = (struct hclge_bp_to_qs_map_cmd *)desc.data; - dev_info(&hdev->pdev->dev, "BP_TO_QSET pg_id: %u\n", + dev_info(&hdev->pdev->dev, "BP_TO_QSET tc_id: %u\n", bp_to_qs_map_cmd->tc_id); - dev_info(&hdev->pdev->dev, "BP_TO_QSET pg_shapping: 0x%x\n", + dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_group_id: 0x%x\n", bp_to_qs_map_cmd->qs_group_id); dev_info(&hdev->pdev->dev, "BP_TO_QSET qs_bit_map: 0x%x\n", bp_to_qs_map_cmd->qs_bit_map); @@ -473,7 +477,7 @@ static void hclge_dbg_dump_tm(struct hclge_dev *hdev) nq_to_qs_map = (struct hclge_nq_to_qs_link_cmd *)desc.data; dev_info(&hdev->pdev->dev, "NQ_TO_QS nq_id: %u\n", nq_to_qs_map->nq_id); - dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: %u\n", + dev_info(&hdev->pdev->dev, "NQ_TO_QS qset_id: 0x%x\n", nq_to_qs_map->qset_id); cmd = HCLGE_OPC_TM_PG_WEIGHT; @@ -537,7 +541,8 @@ err_tm_cmd_send: cmd, ret); } -static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, char *cmd_buf) +static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev, + const char *cmd_buf) { struct hclge_bp_to_qs_map_cmd *bp_to_qs_map_cmd; struct hclge_nq_to_qs_link_cmd *nq_to_qs_map; @@ -921,11 +926,67 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev) hdev->rst_stats.reset_cnt); } +void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev) +{ + struct hclge_desc *desc_src, *desc_tmp; + struct hclge_get_m7_bd_cmd *req; + struct hclge_desc desc; + u32 bd_num, buf_len; + int ret, i; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_STATS_BD, true); + + req = (struct hclge_get_m7_bd_cmd *)desc.data; + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) { + dev_err(&hdev->pdev->dev, + "get firmware statistics bd number failed, ret=%d\n", + ret); + return; + } + + bd_num = le32_to_cpu(req->bd_num); + + buf_len = sizeof(struct hclge_desc) * bd_num; + desc_src = kzalloc(buf_len, GFP_KERNEL); + if (!desc_src) { + dev_err(&hdev->pdev->dev, + "allocate desc for get_m7_stats failed\n"); + return; + } + + desc_tmp = desc_src; + ret = hclge_dbg_cmd_send(hdev, desc_tmp, 0, bd_num, + HCLGE_OPC_M7_STATS_INFO); + if (ret) { + kfree(desc_src); + dev_err(&hdev->pdev->dev, + "get firmware statistics failed, ret=%d\n", ret); + return; + } + + for (i = 0; i < bd_num; i++) { + dev_info(&hdev->pdev->dev, "0x%08x 0x%08x 0x%08x\n", + le32_to_cpu(desc_tmp->data[0]), + le32_to_cpu(desc_tmp->data[1]), + le32_to_cpu(desc_tmp->data[2])); + dev_info(&hdev->pdev->dev, "0x%08x 0x%08x 0x%08x\n", + le32_to_cpu(desc_tmp->data[3]), + le32_to_cpu(desc_tmp->data[4]), + le32_to_cpu(desc_tmp->data[5])); + + desc_tmp++; + } + + kfree(desc_src); +} + /* hclge_dbg_dump_ncl_config: print specified range of NCL_CONFIG file * @hdev: pointer to struct hclge_dev * @cmd_buf: string that contains offset and length */ -static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev, char *cmd_buf) +static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev, + const char *cmd_buf) { #define HCLGE_MAX_NCL_CONFIG_OFFSET 4096 #define HCLGE_MAX_NCL_CONFIG_LENGTH (20 + 24 * 4) @@ -998,13 +1059,13 @@ static void hclge_dbg_dump_mac_tnl_status(struct hclge_dev *hdev) while (kfifo_get(&hdev->mac_tnl_log, &stats)) { rem_nsec = do_div(stats.time, HCLGE_BILLION_NANO_SECONDS); - dev_info(&hdev->pdev->dev, "[%07lu.%03lu]status = 0x%x\n", + dev_info(&hdev->pdev->dev, "[%07lu.%03lu] status = 0x%x\n", (unsigned long)stats.time, rem_nsec / 1000, stats.status); } } -int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf) +int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; @@ -1029,6 +1090,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf) hclge_dbg_dump_reg_cmd(hdev, cmd_buf); } else if (strncmp(cmd_buf, "dump reset info", 15) == 0) { hclge_dbg_dump_rst_info(hdev); + } else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) { + hclge_dbg_get_m7_stats_info(hdev); } else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) { hclge_dbg_dump_ncl_config(hdev, &cmd_buf[sizeof("dump ncl_config")]); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 4ac80634c984..0a7243825e7b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -87,25 +87,25 @@ static const struct hclge_hw_error hclge_msix_sram_ecc_int[] = { static const struct hclge_hw_error hclge_igu_int[] = { { .int_msk = BIT(0), .msg = "igu_rx_buf0_ecc_mbit_err", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(2), .msg = "igu_rx_buf1_ecc_mbit_err", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { /* sentinel */ } }; static const struct hclge_hw_error hclge_igu_egu_tnl_int[] = { { .int_msk = BIT(0), .msg = "rx_buf_overflow", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(1), .msg = "rx_stp_fifo_overflow", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(2), .msg = "rx_stp_fifo_undeflow", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(3), .msg = "tx_buf_overflow", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(4), .msg = "tx_buf_underrun", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(5), .msg = "rx_stp_buf_overflow", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { /* sentinel */ } }; @@ -413,13 +413,13 @@ static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st2[] = { static const struct hclge_hw_error hclge_ppu_mpf_abnormal_int_st3[] = { { .int_msk = BIT(4), .msg = "gro_bd_ecc_mbit_err", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(5), .msg = "gro_context_ecc_mbit_err", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(6), .msg = "rx_stash_cfg_ecc_mbit_err", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { .int_msk = BIT(7), .msg = "axi_rd_fbd_ecc_mbit_err", - .reset_level = HNAE3_CORE_RESET }, + .reset_level = HNAE3_GLOBAL_RESET }, { /* sentinel */ } }; @@ -631,29 +631,20 @@ static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = { { /* sentinel */ } }; -static enum hnae3_reset_type hclge_log_error(struct device *dev, char *reg, - const struct hclge_hw_error *err, - u32 err_sts) +static void hclge_log_error(struct device *dev, char *reg, + const struct hclge_hw_error *err, + u32 err_sts, unsigned long *reset_requests) { - enum hnae3_reset_type reset_level = HNAE3_FUNC_RESET; - bool need_reset = false; - while (err->msg) { if (err->int_msk & err_sts) { dev_warn(dev, "%s %s found [error status=0x%x]\n", reg, err->msg, err_sts); - if (err->reset_level != HNAE3_NONE_RESET && - err->reset_level >= reset_level) { - reset_level = err->reset_level; - need_reset = true; - } + if (err->reset_level && + err->reset_level != HNAE3_NONE_RESET) + set_bit(err->reset_level, reset_requests); } err++; } - if (need_reset) - return reset_level; - else - return HNAE3_NONE_RESET; } /* hclge_cmd_query_error: read the error information @@ -673,19 +664,19 @@ static int hclge_cmd_query_error(struct hclge_dev *hdev, enum hclge_err_int_type int_type) { struct device *dev = &hdev->pdev->dev; - int num = 1; + int desc_num = 1; int ret; hclge_cmd_setup_basic_desc(&desc[0], cmd, true); if (flag) { desc[0].flag |= cpu_to_le16(flag); hclge_cmd_setup_basic_desc(&desc[1], cmd, true); - num = 2; + desc_num = 2; } if (w_num) desc[0].data[w_num] = cpu_to_le32(int_type); - ret = hclge_cmd_send(&hdev->hw, &desc[0], num); + ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); if (ret) dev_err(dev, "query error cmd failed (%d)\n", ret); @@ -941,7 +932,7 @@ static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc[2]; - int num = 1; + int desc_num = 1; int ret; /* configure PPU error interrupts */ @@ -960,7 +951,7 @@ static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, desc[1].data[1] = HCLGE_PPU_MPF_ABNORMAL_INT1_EN_MASK; desc[1].data[2] = HCLGE_PPU_MPF_ABNORMAL_INT2_EN_MASK; desc[1].data[3] |= HCLGE_PPU_MPF_ABNORMAL_INT3_EN_MASK; - num = 2; + desc_num = 2; } else if (cmd == HCLGE_PPU_MPF_OTHER_INT_CMD) { hclge_cmd_setup_basic_desc(&desc[0], cmd, false); if (en) @@ -978,7 +969,7 @@ static int hclge_config_ppu_error_interrupts(struct hclge_dev *hdev, u32 cmd, return -EINVAL; } - ret = hclge_cmd_send(&hdev->hw, &desc[0], num); + ret = hclge_cmd_send(&hdev->hw, &desc[0], desc_num); return ret; } @@ -1069,12 +1060,51 @@ static int hclge_config_ssu_hw_err_int(struct hclge_dev *hdev, bool en) return ret; } -#define HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type) \ - do { \ - if (ae_dev->ops->set_default_reset_request) \ - ae_dev->ops->set_default_reset_request(ae_dev, \ - reset_type); \ - } while (0) +/* hclge_query_bd_num: query number of buffer descriptors + * @hdev: pointer to struct hclge_dev + * @is_ras: true for ras, false for msix + * @mpf_bd_num: number of main PF interrupt buffer descriptors + * @pf_bd_num: number of not main PF interrupt buffer descriptors + * + * This function querys number of mpf and pf buffer descriptors. + */ +static int hclge_query_bd_num(struct hclge_dev *hdev, bool is_ras, + int *mpf_bd_num, int *pf_bd_num) +{ + struct device *dev = &hdev->pdev->dev; + u32 mpf_min_bd_num, pf_min_bd_num; + enum hclge_opcode_type opcode; + struct hclge_desc desc_bd; + int ret; + + if (is_ras) { + opcode = HCLGE_QUERY_RAS_INT_STS_BD_NUM; + mpf_min_bd_num = HCLGE_MPF_RAS_INT_MIN_BD_NUM; + pf_min_bd_num = HCLGE_PF_RAS_INT_MIN_BD_NUM; + } else { + opcode = HCLGE_QUERY_MSIX_INT_STS_BD_NUM; + mpf_min_bd_num = HCLGE_MPF_MSIX_INT_MIN_BD_NUM; + pf_min_bd_num = HCLGE_PF_MSIX_INT_MIN_BD_NUM; + } + + hclge_cmd_setup_basic_desc(&desc_bd, opcode, true); + ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); + if (ret) { + dev_err(dev, "fail(%d) to query msix int status bd num\n", + ret); + return ret; + } + + *mpf_bd_num = le32_to_cpu(desc_bd.data[0]); + *pf_bd_num = le32_to_cpu(desc_bd.data[1]); + if (*mpf_bd_num < mpf_min_bd_num || *pf_bd_num < pf_min_bd_num) { + dev_err(dev, "Invalid bd num: mpf(%d), pf(%d)\n", + *mpf_bd_num, *pf_bd_num); + return -EINVAL; + } + + return 0; +} /* hclge_handle_mpf_ras_error: handle all main PF RAS errors * @hdev: pointer to struct hclge_dev @@ -1089,7 +1119,6 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, int num) { struct hnae3_ae_dev *ae_dev = hdev->ae_dev; - enum hnae3_reset_type reset_level; struct device *dev = &hdev->pdev->dev; __le32 *desc_data; u32 status; @@ -1098,8 +1127,6 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, /* query all main PF RAS errors */ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_MPF_RAS_INT, true); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - ret = hclge_cmd_send(&hdev->hw, &desc[0], num); if (ret) { dev_err(dev, "query all mpf ras int cmd failed (%d)\n", ret); @@ -1108,95 +1135,74 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, /* log HNS common errors */ status = le32_to_cpu(desc[0].data[0]); - if (status) { - reset_level = hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", - &hclge_imp_tcm_ecc_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "IMP_TCM_ECC_INT_STS", + &hclge_imp_tcm_ecc_int[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(desc[0].data[1]); - if (status) { - reset_level = hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", - &hclge_cmdq_nic_mem_ecc_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "CMDQ_MEM_ECC_INT_STS", + &hclge_cmdq_nic_mem_ecc_int[0], status, + &ae_dev->hw_err_reset_req); - if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) { + if ((le32_to_cpu(desc[0].data[2])) & BIT(0)) dev_warn(dev, "imp_rd_data_poison_err found\n"); - HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_NONE_RESET); - } status = le32_to_cpu(desc[0].data[3]); - if (status) { - reset_level = hclge_log_error(dev, "TQP_INT_ECC_INT_STS", - &hclge_tqp_int_ecc_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "TQP_INT_ECC_INT_STS", + &hclge_tqp_int_ecc_int[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(desc[0].data[4]); - if (status) { - reset_level = hclge_log_error(dev, "MSIX_ECC_INT_STS", - &hclge_msix_sram_ecc_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "MSIX_ECC_INT_STS", + &hclge_msix_sram_ecc_int[0], status, + &ae_dev->hw_err_reset_req); /* log SSU(Storage Switch Unit) errors */ desc_data = (__le32 *)&desc[2]; status = le32_to_cpu(*(desc_data + 2)); - if (status) { - reset_level = hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", - &hclge_ssu_mem_ecc_err_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "SSU_ECC_MULTI_BIT_INT_0", + &hclge_ssu_mem_ecc_err_int[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(*(desc_data + 3)) & BIT(0); if (status) { dev_warn(dev, "SSU_ECC_MULTI_BIT_INT_1 ssu_mem32_ecc_mbit_err found [error status=0x%x]\n", status); - HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); } status = le32_to_cpu(*(desc_data + 4)) & HCLGE_SSU_COMMON_ERR_INT_MASK; - if (status) { - reset_level = hclge_log_error(dev, "SSU_COMMON_ERR_INT", - &hclge_ssu_com_err_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "SSU_COMMON_ERR_INT", + &hclge_ssu_com_err_int[0], status, + &ae_dev->hw_err_reset_req); /* log IGU(Ingress Unit) errors */ desc_data = (__le32 *)&desc[3]; status = le32_to_cpu(*desc_data) & HCLGE_IGU_INT_MASK; - if (status) { - reset_level = hclge_log_error(dev, "IGU_INT_STS", - &hclge_igu_int[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "IGU_INT_STS", + &hclge_igu_int[0], status, + &ae_dev->hw_err_reset_req); /* log PPP(Programmable Packet Process) errors */ desc_data = (__le32 *)&desc[4]; status = le32_to_cpu(*(desc_data + 1)); - if (status) { - reset_level = - hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", - &hclge_ppp_mpf_abnormal_int_st1[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST1", + &hclge_ppp_mpf_abnormal_int_st1[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPP_MPF_INT_ST3_MASK; - if (status) { - reset_level = - hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", - &hclge_ppp_mpf_abnormal_int_st3[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "PPP_MPF_ABNORMAL_INT_ST3", + &hclge_ppp_mpf_abnormal_int_st3[0], status, + &ae_dev->hw_err_reset_req); /* log PPU(RCB) errors */ desc_data = (__le32 *)&desc[5]; @@ -1204,66 +1210,53 @@ static int hclge_handle_mpf_ras_error(struct hclge_dev *hdev, if (status) { dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST1 %s found\n", "rpu_rx_pkt_ecc_mbit_err"); - HCLGE_SET_DEFAULT_RESET_REQUEST(HNAE3_GLOBAL_RESET); + set_bit(HNAE3_GLOBAL_RESET, &ae_dev->hw_err_reset_req); } status = le32_to_cpu(*(desc_data + 2)); - if (status) { - reset_level = - hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", - &hclge_ppu_mpf_abnormal_int_st2[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", + &hclge_ppu_mpf_abnormal_int_st2[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(*(desc_data + 3)) & HCLGE_PPU_MPF_INT_ST3_MASK; - if (status) { - reset_level = - hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", - &hclge_ppu_mpf_abnormal_int_st3[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST3", + &hclge_ppu_mpf_abnormal_int_st3[0], status, + &ae_dev->hw_err_reset_req); /* log TM(Traffic Manager) errors */ desc_data = (__le32 *)&desc[6]; status = le32_to_cpu(*desc_data); - if (status) { - reset_level = hclge_log_error(dev, "TM_SCH_RINT", - &hclge_tm_sch_rint[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "TM_SCH_RINT", + &hclge_tm_sch_rint[0], status, + &ae_dev->hw_err_reset_req); /* log QCN(Quantized Congestion Control) errors */ desc_data = (__le32 *)&desc[7]; status = le32_to_cpu(*desc_data) & HCLGE_QCN_FIFO_INT_MASK; - if (status) { - reset_level = hclge_log_error(dev, "QCN_FIFO_RINT", - &hclge_qcn_fifo_rint[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "QCN_FIFO_RINT", + &hclge_qcn_fifo_rint[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(*(desc_data + 1)) & HCLGE_QCN_ECC_INT_MASK; - if (status) { - reset_level = hclge_log_error(dev, "QCN_ECC_RINT", - &hclge_qcn_ecc_rint[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "QCN_ECC_RINT", + &hclge_qcn_ecc_rint[0], status, + &ae_dev->hw_err_reset_req); /* log NCSI errors */ desc_data = (__le32 *)&desc[9]; status = le32_to_cpu(*desc_data) & HCLGE_NCSI_ECC_INT_MASK; - if (status) { - reset_level = hclge_log_error(dev, "NCSI_ECC_INT_RPT", - &hclge_ncsi_err_int[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "NCSI_ECC_INT_RPT", + &hclge_ncsi_err_int[0], status, + &ae_dev->hw_err_reset_req); /* clear all main PF RAS errors */ hclge_cmd_reuse_desc(&desc[0], false); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - ret = hclge_cmd_send(&hdev->hw, &desc[0], num); if (ret) dev_err(dev, "clear all mpf ras int cmd failed (%d)\n", ret); @@ -1285,7 +1278,6 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, { struct hnae3_ae_dev *ae_dev = hdev->ae_dev; struct device *dev = &hdev->pdev->dev; - enum hnae3_reset_type reset_level; __le32 *desc_data; u32 status; int ret; @@ -1293,8 +1285,6 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, /* query all PF RAS errors */ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_PF_RAS_INT, true); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - ret = hclge_cmd_send(&hdev->hw, &desc[0], num); if (ret) { dev_err(dev, "query all pf ras int cmd failed (%d)\n", ret); @@ -1303,53 +1293,41 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, /* log SSU(Storage Switch Unit) errors */ status = le32_to_cpu(desc[0].data[0]); - if (status) { - reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", - &hclge_ssu_port_based_err_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", + &hclge_ssu_port_based_err_int[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(desc[0].data[1]); - if (status) { - reset_level = hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", - &hclge_ssu_fifo_overflow_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "SSU_FIFO_OVERFLOW_INT", + &hclge_ssu_fifo_overflow_int[0], status, + &ae_dev->hw_err_reset_req); status = le32_to_cpu(desc[0].data[2]); - if (status) { - reset_level = hclge_log_error(dev, "SSU_ETS_TCG_INT", - &hclge_ssu_ets_tcg_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "SSU_ETS_TCG_INT", + &hclge_ssu_ets_tcg_int[0], status, + &ae_dev->hw_err_reset_req); /* log IGU(Ingress Unit) EGU(Egress Unit) TNL errors */ desc_data = (__le32 *)&desc[1]; status = le32_to_cpu(*desc_data) & HCLGE_IGU_EGU_TNL_INT_MASK; - if (status) { - reset_level = hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", - &hclge_igu_egu_tnl_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "IGU_EGU_TNL_INT_STS", + &hclge_igu_egu_tnl_int[0], status, + &ae_dev->hw_err_reset_req); /* log PPU(RCB) errors */ desc_data = (__le32 *)&desc[3]; status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_RAS_MASK; - if (status) { - reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", - &hclge_ppu_pf_abnormal_int[0], - status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); - } + if (status) + hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST0", + &hclge_ppu_pf_abnormal_int[0], status, + &ae_dev->hw_err_reset_req); /* clear all PF RAS errors */ hclge_cmd_reuse_desc(&desc[0], false); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - ret = hclge_cmd_send(&hdev->hw, &desc[0], num); if (ret) dev_err(dev, "clear all pf ras int cmd failed (%d)\n", ret); @@ -1359,24 +1337,16 @@ static int hclge_handle_pf_ras_error(struct hclge_dev *hdev, static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) { - struct device *dev = &hdev->pdev->dev; u32 mpf_bd_num, pf_bd_num, bd_num; - struct hclge_desc desc_bd; struct hclge_desc *desc; int ret; /* query the number of registers in the RAS int status */ - hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_RAS_INT_STS_BD_NUM, - true); - ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); - if (ret) { - dev_err(dev, "fail(%d) to query ras int status bd num\n", ret); + ret = hclge_query_bd_num(hdev, true, &mpf_bd_num, &pf_bd_num); + if (ret) return ret; - } - mpf_bd_num = le32_to_cpu(desc_bd.data[0]); - pf_bd_num = le32_to_cpu(desc_bd.data[1]); - bd_num = max_t(u32, mpf_bd_num, pf_bd_num); + bd_num = max_t(u32, mpf_bd_num, pf_bd_num); desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); if (!desc) return -ENOMEM; @@ -1396,6 +1366,66 @@ static int hclge_handle_all_ras_errors(struct hclge_dev *hdev) return ret; } +static int hclge_log_rocee_axi_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[3]; + int ret; + + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + hclge_cmd_setup_basic_desc(&desc[1], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + hclge_cmd_setup_basic_desc(&desc[2], HCLGE_QUERY_ROCEE_AXI_RAS_INFO_CMD, + true); + desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + + ret = hclge_cmd_send(&hdev->hw, &desc[0], 3); + if (ret) { + dev_err(dev, "failed(%d) to query ROCEE AXI error sts\n", ret); + return ret; + } + + dev_info(dev, "AXI1: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), + le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), + le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); + dev_info(dev, "AXI2: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[1].data[0]), le32_to_cpu(desc[1].data[1]), + le32_to_cpu(desc[1].data[2]), le32_to_cpu(desc[1].data[3]), + le32_to_cpu(desc[1].data[4]), le32_to_cpu(desc[1].data[5])); + dev_info(dev, "AXI3: %08X %08X %08X %08X\n", + le32_to_cpu(desc[2].data[0]), le32_to_cpu(desc[2].data[1]), + le32_to_cpu(desc[2].data[2]), le32_to_cpu(desc[2].data[3])); + + return 0; +} + +static int hclge_log_rocee_ecc_error(struct hclge_dev *hdev) +{ + struct device *dev = &hdev->pdev->dev; + struct hclge_desc desc[2]; + int ret; + + ret = hclge_cmd_query_error(hdev, &desc[0], + HCLGE_QUERY_ROCEE_ECC_RAS_INFO_CMD, + HCLGE_CMD_FLAG_NEXT, 0, 0); + if (ret) { + dev_err(dev, "failed(%d) to query ROCEE ECC error sts\n", ret); + return ret; + } + + dev_info(dev, "ECC1: %08X %08X %08X %08X %08X %08X\n", + le32_to_cpu(desc[0].data[0]), le32_to_cpu(desc[0].data[1]), + le32_to_cpu(desc[0].data[2]), le32_to_cpu(desc[0].data[3]), + le32_to_cpu(desc[0].data[4]), le32_to_cpu(desc[0].data[5])); + dev_info(dev, "ECC2: %08X %08X %08X\n", le32_to_cpu(desc[1].data[0]), + le32_to_cpu(desc[1].data[1]), le32_to_cpu(desc[1].data[2])); + + return 0; +} + static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) { struct device *dev = &hdev->pdev->dev; @@ -1403,8 +1433,7 @@ static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) int ret; /* read overflow error status */ - ret = hclge_cmd_query_error(hdev, &desc[0], - HCLGE_ROCEE_PF_RAS_INT_CMD, + ret = hclge_cmd_query_error(hdev, &desc[0], HCLGE_ROCEE_PF_RAS_INT_CMD, 0, 0, 0); if (ret) { dev_err(dev, "failed(%d) to query ROCEE OVF error sts\n", ret); @@ -1464,19 +1493,27 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) status = le32_to_cpu(desc[0].data[0]); - if (status & HCLGE_ROCEE_RERR_INT_MASK) { - dev_warn(dev, "ROCEE RAS AXI rresp error\n"); - reset_type = HNAE3_FUNC_RESET; - } + if (status & HCLGE_ROCEE_AXI_ERR_INT_MASK) { + if (status & HCLGE_ROCEE_RERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI rresp error\n"); + + if (status & HCLGE_ROCEE_BERR_INT_MASK) + dev_warn(dev, "ROCEE RAS AXI bresp error\n"); - if (status & HCLGE_ROCEE_BERR_INT_MASK) { - dev_warn(dev, "ROCEE RAS AXI bresp error\n"); reset_type = HNAE3_FUNC_RESET; + + ret = hclge_log_rocee_axi_error(hdev); + if (ret) + return HNAE3_GLOBAL_RESET; } if (status & HCLGE_ROCEE_ECC_INT_MASK) { dev_warn(dev, "ROCEE RAS 2bit ECC error\n"); reset_type = HNAE3_GLOBAL_RESET; + + ret = hclge_log_rocee_ecc_error(hdev); + if (ret) + return HNAE3_GLOBAL_RESET; } if (status & HCLGE_ROCEE_OVF_INT_MASK) { @@ -1486,7 +1523,6 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) /* reset everything for now */ return HNAE3_GLOBAL_RESET; } - reset_type = HNAE3_FUNC_RESET; } /* clear error status */ @@ -1501,7 +1537,7 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) return reset_type; } -static int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) +int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc; @@ -1539,7 +1575,7 @@ static void hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev) reset_type = hclge_log_and_clear_rocee_ras_error(hdev); if (reset_type != HNAE3_NONE_RESET) - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_type); + set_bit(reset_type, &ae_dev->hw_err_reset_req); } static const struct hclge_hw_blk hw_blk[] = { @@ -1574,10 +1610,9 @@ static const struct hclge_hw_blk hw_blk[] = { { /* sentinel */ } }; -int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) +int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) { const struct hclge_hw_blk *module = hw_blk; - struct device *dev = &hdev->pdev->dev; int ret = 0; while (module->name) { @@ -1589,10 +1624,6 @@ int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) module++; } - ret = hclge_config_rocee_ras_interrupt(hdev, state); - if (ret) - dev_err(dev, "fail(%d) to configure ROCEE err int\n", ret); - return ret; } @@ -1602,165 +1633,281 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) struct device *dev = &hdev->pdev->dev; u32 status; + if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { + dev_err(dev, + "Can't recover - RAS error reported during dev init\n"); + return PCI_ERS_RESULT_NONE; + } + status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); + if (status & HCLGE_RAS_REG_NFE_MASK || + status & HCLGE_RAS_REG_ROCEE_ERR_MASK) + ae_dev->hw_err_reset_req = 0; + else + goto out; + /* Handling Non-fatal HNS RAS errors */ if (status & HCLGE_RAS_REG_NFE_MASK) { dev_warn(dev, "HNS Non-Fatal RAS error(status=0x%x) identified\n", status); hclge_handle_all_ras_errors(hdev); - } else { - if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || - hdev->pdev->revision < 0x21) { - ae_dev->override_pci_need_reset = 1; - return PCI_ERS_RESULT_RECOVERED; - } } - if (status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { - dev_warn(dev, "ROCEE uncorrected RAS error identified\n"); + /* Handling Non-fatal Rocee RAS errors */ + if (hdev->pdev->revision >= 0x21 && + status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { + dev_warn(dev, "ROCEE Non-Fatal RAS error identified\n"); hclge_handle_rocee_ras_error(ae_dev); } - if (status & HCLGE_RAS_REG_NFE_MASK || - status & HCLGE_RAS_REG_ROCEE_ERR_MASK) { - ae_dev->override_pci_need_reset = 0; + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + goto out; + + if (ae_dev->hw_err_reset_req) return PCI_ERS_RESULT_NEED_RESET; - } - ae_dev->override_pci_need_reset = 1; +out: return PCI_ERS_RESULT_RECOVERED; } -int hclge_handle_hw_msix_error(struct hclge_dev *hdev, - unsigned long *reset_requests) +static int hclge_clear_hw_msix_error(struct hclge_dev *hdev, + struct hclge_desc *desc, bool is_mpf, + u32 bd_num) +{ + if (is_mpf) + desc[0].opcode = + cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT); + else + desc[0].opcode = cpu_to_le16(HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT); + + desc[0].flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN); + + return hclge_cmd_send(&hdev->hw, &desc[0], bd_num); +} + +/* hclge_query_8bd_info: query information about over_8bd_nfe_err + * @hdev: pointer to struct hclge_dev + * @vf_id: Index of the virtual function with error + * @q_id: Physical index of the queue with error + * + * This function get specific index of queue and function which causes + * over_8bd_nfe_err by using command. If vf_id is 0, it means error is + * caused by PF instead of VF. + */ +static int hclge_query_over_8bd_err_info(struct hclge_dev *hdev, u16 *vf_id, + u16 *q_id) +{ + struct hclge_query_ppu_pf_other_int_dfx_cmd *req; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_PPU_PF_OTHER_INT_DFX, true); + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + return ret; + + req = (struct hclge_query_ppu_pf_other_int_dfx_cmd *)desc.data; + *vf_id = le16_to_cpu(req->over_8bd_no_fe_vf_id); + *q_id = le16_to_cpu(req->over_8bd_no_fe_qid); + + return 0; +} + +/* hclge_handle_over_8bd_err: handle MSI-X error named over_8bd_nfe_err + * @hdev: pointer to struct hclge_dev + * @reset_requests: reset level that we need to trigger later + * + * over_8bd_nfe_err is a special MSI-X because it may caused by a VF, in + * that case, we need to trigger VF reset. Otherwise, a PF reset is needed. + */ +static void hclge_handle_over_8bd_err(struct hclge_dev *hdev, + unsigned long *reset_requests) { - struct hclge_mac_tnl_stats mac_tnl_stats; struct device *dev = &hdev->pdev->dev; - u32 mpf_bd_num, pf_bd_num, bd_num; - enum hnae3_reset_type reset_level; - struct hclge_desc desc_bd; - struct hclge_desc *desc; - __le32 *desc_data; - u32 status; + u16 vf_id; + u16 q_id; int ret; - /* query the number of bds for the MSIx int status */ - hclge_cmd_setup_basic_desc(&desc_bd, HCLGE_QUERY_MSIX_INT_STS_BD_NUM, - true); - ret = hclge_cmd_send(&hdev->hw, &desc_bd, 1); + ret = hclge_query_over_8bd_err_info(hdev, &vf_id, &q_id); if (ret) { - dev_err(dev, "fail(%d) to query msix int status bd num\n", + dev_err(dev, "fail(%d) to query over_8bd_no_fe info\n", ret); - return ret; + return; } - mpf_bd_num = le32_to_cpu(desc_bd.data[0]); - pf_bd_num = le32_to_cpu(desc_bd.data[1]); - bd_num = max_t(u32, mpf_bd_num, pf_bd_num); + dev_warn(dev, "PPU_PF_ABNORMAL_INT_ST over_8bd_no_fe found, vf_id(%d), queue_id(%d)\n", + vf_id, q_id); - desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); - if (!desc) - goto out; + if (vf_id) { + if (vf_id >= hdev->num_alloc_vport) { + dev_err(dev, "invalid vf id(%d)\n", vf_id); + return; + } + + /* If we need to trigger other reset whose level is higher + * than HNAE3_VF_FUNC_RESET, no need to trigger a VF reset + * here. + */ + if (*reset_requests != 0) + return; + ret = hclge_inform_reset_assert_to_vf(&hdev->vport[vf_id]); + if (ret) + dev_warn(dev, "inform reset to vf(%d) failed %d!\n", + hdev->vport->vport_id, ret); + } else { + set_bit(HNAE3_FUNC_RESET, reset_requests); + } +} + +/* hclge_handle_mpf_msix_error: handle all main PF MSI-X errors + * @hdev: pointer to struct hclge_dev + * @desc: descriptor for describing the command + * @mpf_bd_num: number of extended command structures + * @reset_requests: record of the reset level that we need + * + * This function handles all the main PF MSI-X errors in the hw register/s + * using command. + */ +static int hclge_handle_mpf_msix_error(struct hclge_dev *hdev, + struct hclge_desc *desc, + int mpf_bd_num, + unsigned long *reset_requests) +{ + struct device *dev = &hdev->pdev->dev; + __le32 *desc_data; + u32 status; + int ret; /* query all main PF MSIx errors */ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, true); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); if (ret) { - dev_err(dev, "query all mpf msix int cmd failed (%d)\n", - ret); - goto msi_error; + dev_err(dev, "query all mpf msix int cmd failed (%d)\n", ret); + return ret; } /* log MAC errors */ desc_data = (__le32 *)&desc[1]; status = le32_to_cpu(*desc_data); - if (status) { - reset_level = hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", - &hclge_mac_afifo_tnl_int[0], - status); - set_bit(reset_level, reset_requests); - } + if (status) + hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", + &hclge_mac_afifo_tnl_int[0], status, + reset_requests); /* log PPU(RCB) MPF errors */ desc_data = (__le32 *)&desc[5]; status = le32_to_cpu(*(desc_data + 2)) & HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; - if (status) { - reset_level = - hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", - &hclge_ppu_mpf_abnormal_int_st2[0], - status); - set_bit(reset_level, reset_requests); - } + if (status) + dev_warn(dev, "PPU_MPF_ABNORMAL_INT_ST2 rx_q_search_miss found [dfx status=0x%x\n]", + status); /* clear all main PF MSIx errors */ - hclge_cmd_reuse_desc(&desc[0], false); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); + if (ret) + dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", ret); - ret = hclge_cmd_send(&hdev->hw, &desc[0], mpf_bd_num); - if (ret) { - dev_err(dev, "clear all mpf msix int cmd failed (%d)\n", - ret); - goto msi_error; - } + return ret; +} + +/* hclge_handle_pf_msix_error: handle all PF MSI-X errors + * @hdev: pointer to struct hclge_dev + * @desc: descriptor for describing the command + * @mpf_bd_num: number of extended command structures + * @reset_requests: record of the reset level that we need + * + * This function handles all the PF MSI-X errors in the hw register/s using + * command. + */ +static int hclge_handle_pf_msix_error(struct hclge_dev *hdev, + struct hclge_desc *desc, + int pf_bd_num, + unsigned long *reset_requests) +{ + struct device *dev = &hdev->pdev->dev; + __le32 *desc_data; + u32 status; + int ret; /* query all PF MSIx errors */ - memset(desc, 0, bd_num * sizeof(struct hclge_desc)); hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_PF_MSIX_INT, true); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); if (ret) { - dev_err(dev, "query all pf msix int cmd failed (%d)\n", - ret); - goto msi_error; + dev_err(dev, "query all pf msix int cmd failed (%d)\n", ret); + return ret; } /* log SSU PF errors */ status = le32_to_cpu(desc[0].data[0]) & HCLGE_SSU_PORT_INT_MSIX_MASK; - if (status) { - reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", - &hclge_ssu_port_based_pf_int[0], - status); - set_bit(reset_level, reset_requests); - } + if (status) + hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", + &hclge_ssu_port_based_pf_int[0], + status, reset_requests); /* read and log PPP PF errors */ desc_data = (__le32 *)&desc[2]; status = le32_to_cpu(*desc_data); - if (status) { - reset_level = hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", - &hclge_ppp_pf_abnormal_int[0], - status); - set_bit(reset_level, reset_requests); - } + if (status) + hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", + &hclge_ppp_pf_abnormal_int[0], + status, reset_requests); /* log PPU(RCB) PF errors */ desc_data = (__le32 *)&desc[3]; status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; - if (status) { - reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", - &hclge_ppu_pf_abnormal_int[0], - status); - set_bit(reset_level, reset_requests); - } + if (status) + hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", + &hclge_ppu_pf_abnormal_int[0], + status, reset_requests); + + status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_OVER_8BD_ERR_MASK; + if (status) + hclge_handle_over_8bd_err(hdev, reset_requests); /* clear all PF MSIx errors */ - hclge_cmd_reuse_desc(&desc[0], false); - desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); + if (ret) + dev_err(dev, "clear all pf msix int cmd failed (%d)\n", ret); - ret = hclge_cmd_send(&hdev->hw, &desc[0], pf_bd_num); - if (ret) { - dev_err(dev, "clear all pf msix int cmd failed (%d)\n", - ret); + return ret; +} + +static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) +{ + struct hclge_mac_tnl_stats mac_tnl_stats; + struct device *dev = &hdev->pdev->dev; + u32 mpf_bd_num, pf_bd_num, bd_num; + struct hclge_desc *desc; + u32 status; + int ret; + + /* query the number of bds for the MSIx int status */ + ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); + if (ret) + goto out; + + bd_num = max_t(u32, mpf_bd_num, pf_bd_num); + desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) { + ret = -ENOMEM; + goto out; } + ret = hclge_handle_mpf_msix_error(hdev, desc, mpf_bd_num, + reset_requests); + if (ret) + goto msi_error; + + memset(desc, 0, bd_num * sizeof(struct hclge_desc)); + ret = hclge_handle_pf_msix_error(hdev, desc, pf_bd_num, reset_requests); + if (ret) + goto msi_error; + /* query and clear mac tnl interruptions */ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_MAC_TNL_INT, true); @@ -1783,7 +1930,6 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, ret = hclge_clear_mac_tnl_int(hdev); if (ret) dev_err(dev, "clear mac tnl int failed (%d)\n", ret); - set_bit(HNAE3_NONE_RESET, reset_requests); } msi_error: @@ -1791,3 +1937,70 @@ msi_error: out: return ret; } + +int hclge_handle_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) +{ + struct device *dev = &hdev->pdev->dev; + + if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { + dev_err(dev, + "Can't handle - MSIx error reported during dev init\n"); + return 0; + } + + return hclge_handle_all_hw_msix_error(hdev, reset_requests); +} + +void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) +{ +#define HCLGE_DESC_NO_DATA_LEN 8 + + struct hclge_dev *hdev = ae_dev->priv; + struct device *dev = &hdev->pdev->dev; + u32 mpf_bd_num, pf_bd_num, bd_num; + struct hclge_desc *desc; + u32 status; + int ret; + + ae_dev->hw_err_reset_req = 0; + status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); + + /* query the number of bds for the MSIx int status */ + ret = hclge_query_bd_num(hdev, false, &mpf_bd_num, &pf_bd_num); + if (ret) + return; + + bd_num = max_t(u32, mpf_bd_num, pf_bd_num); + desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); + if (!desc) + return; + + /* Clear HNS hw errors reported through msix */ + memset(&desc[0].data[0], 0xFF, mpf_bd_num * sizeof(struct hclge_desc) - + HCLGE_DESC_NO_DATA_LEN); + ret = hclge_clear_hw_msix_error(hdev, desc, true, mpf_bd_num); + if (ret) { + dev_err(dev, "fail(%d) to clear mpf msix int during init\n", + ret); + goto msi_error; + } + + memset(&desc[0].data[0], 0xFF, pf_bd_num * sizeof(struct hclge_desc) - + HCLGE_DESC_NO_DATA_LEN); + ret = hclge_clear_hw_msix_error(hdev, desc, false, pf_bd_num); + if (ret) { + dev_err(dev, "fail(%d) to clear pf msix int during init\n", + ret); + goto msi_error; + } + + /* Handle Non-fatal HNS RAS errors */ + if (status & HCLGE_RAS_REG_NFE_MASK) { + dev_warn(dev, "HNS hw error(RAS) identified during init\n"); + hclge_handle_all_ras_errors(hdev); + } + +msi_error: + kfree(desc); +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 9645590c9294..7ea8bb28a0cb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -6,6 +6,11 @@ #include "hclge_main.h" +#define HCLGE_MPF_RAS_INT_MIN_BD_NUM 10 +#define HCLGE_PF_RAS_INT_MIN_BD_NUM 4 +#define HCLGE_MPF_MSIX_INT_MIN_BD_NUM 10 +#define HCLGE_PF_MSIX_INT_MIN_BD_NUM 4 + #define HCLGE_RAS_PF_OTHER_INT_STS_REG 0x20B00 #define HCLGE_RAS_REG_NFE_MASK 0xFF00 #define HCLGE_RAS_REG_ROCEE_ERR_MASK 0x3000000 @@ -47,9 +52,9 @@ #define HCLGE_NCSI_ERR_INT_TYPE 0x9 #define HCLGE_MAC_COMMON_ERR_INT_EN 0x107FF #define HCLGE_MAC_COMMON_ERR_INT_EN_MASK 0x107FF -#define HCLGE_MAC_TNL_INT_EN GENMASK(7, 0) -#define HCLGE_MAC_TNL_INT_EN_MASK GENMASK(7, 0) -#define HCLGE_MAC_TNL_INT_CLR GENMASK(7, 0) +#define HCLGE_MAC_TNL_INT_EN GENMASK(9, 0) +#define HCLGE_MAC_TNL_INT_EN_MASK GENMASK(9, 0) +#define HCLGE_MAC_TNL_INT_CLR GENMASK(9, 0) #define HCLGE_PPU_MPF_ABNORMAL_INT0_EN GENMASK(31, 0) #define HCLGE_PPU_MPF_ABNORMAL_INT0_EN_MASK GENMASK(31, 0) #define HCLGE_PPU_MPF_ABNORMAL_INT1_EN GENMASK(31, 0) @@ -81,9 +86,10 @@ #define HCLGE_IGU_EGU_TNL_INT_MASK GENMASK(5, 0) #define HCLGE_PPP_MPF_INT_ST3_MASK GENMASK(5, 0) #define HCLGE_PPU_MPF_INT_ST3_MASK GENMASK(7, 0) -#define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK GENMASK(29, 28) +#define HCLGE_PPU_MPF_INT_ST2_MSIX_MASK BIT(29) #define HCLGE_PPU_PF_INT_RAS_MASK 0x18 -#define HCLGE_PPU_PF_INT_MSIX_MASK 0x27 +#define HCLGE_PPU_PF_INT_MSIX_MASK 0x26 +#define HCLGE_PPU_PF_OVER_8BD_ERR_MASK 0x01 #define HCLGE_QCN_FIFO_INT_MASK GENMASK(17, 0) #define HCLGE_QCN_ECC_INT_MASK GENMASK(21, 0) #define HCLGE_NCSI_ECC_INT_MASK GENMASK(1, 0) @@ -94,6 +100,7 @@ #define HCLGE_ROCEE_RAS_CE_INT_EN_MASK 0x1 #define HCLGE_ROCEE_RERR_INT_MASK BIT(0) #define HCLGE_ROCEE_BERR_INT_MASK BIT(1) +#define HCLGE_ROCEE_AXI_ERR_INT_MASK GENMASK(1, 0) #define HCLGE_ROCEE_ECC_INT_MASK BIT(2) #define HCLGE_ROCEE_OVF_INT_MASK BIT(3) #define HCLGE_ROCEE_OVF_ERR_INT_MASK 0x10000 @@ -119,7 +126,9 @@ struct hclge_hw_error { }; int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en); -int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state); +int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state); +int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en); +void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev); pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev); int hclge_handle_hw_msix_error(struct hclge_dev *hdev, unsigned long *reset_requests); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d3b1f8cb1155..3fde5471e1c0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -27,14 +27,26 @@ #define HCLGE_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset)))) #define HCLGE_MAC_STATS_FIELD_OFF(f) (offsetof(struct hclge_mac_stats, f)) -#define HCLGE_BUF_SIZE_UNIT 256 +#define HCLGE_BUF_SIZE_UNIT 256U +#define HCLGE_BUF_MUL_BY 2 +#define HCLGE_BUF_DIV_BY 2 +#define NEED_RESERVE_TC_NUM 2 +#define BUF_MAX_PERCENT 100 +#define BUF_RESERVE_PERCENT 90 + +#define HCLGE_RESET_MAX_FAIL_CNT 5 static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps); static int hclge_init_vlan_config(struct hclge_dev *hdev); +static void hclge_sync_vlan_filter(struct hclge_dev *hdev); static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle); static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size, u16 *allocated_size, bool is_alloc); +static void hclge_rfs_filter_expire(struct hclge_dev *hdev); +static void hclge_clear_arfs_rules(struct hnae3_handle *handle); +static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, + unsigned long *addr); static struct hnae3_ae_algo ae_algo; @@ -290,7 +302,7 @@ static const struct hclge_comm_stats_str g_mac_stats_string[] = { static const struct hclge_mac_mgr_tbl_entry_cmd hclge_mgr_table[] = { { .flags = HCLGE_MAC_MGR_MASK_VLAN_B, - .ethter_type = cpu_to_le16(HCLGE_MAC_ETHERTYPE_LLDP), + .ethter_type = cpu_to_le16(ETH_P_LLDP), .mac_addr_hi32 = cpu_to_le32(htonl(0x0180C200)), .mac_addr_lo16 = cpu_to_le16(htons(0x000E)), .i_port_bitmap = 0x1, @@ -437,8 +449,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) queue = handle->kinfo.tqp[i]; tqp = container_of(queue, struct hclge_tqp, q); /* command : HCLGE_OPC_QUERY_IGU_STAT */ - hclge_cmd_setup_basic_desc(&desc[0], - HCLGE_OPC_QUERY_RX_STATUS, + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_RX_STATUS, true); desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff)); @@ -446,7 +457,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle) if (ret) { dev_err(&hdev->pdev->dev, "Query tqp stat fail, status = %d,queue = %d\n", - ret, i); + ret, i); return ret; } tqp->tqp_stats.rcb_rx_ring_pktnum_rcd += @@ -500,6 +511,7 @@ static int hclge_tqps_get_sset_count(struct hnae3_handle *handle, int stringset) { struct hnae3_knic_private_info *kinfo = &handle->kinfo; + /* each tqp has TX & RX two queues */ return kinfo->num_tqps * (2); } @@ -528,7 +540,7 @@ static u8 *hclge_tqps_get_strings(struct hnae3_handle *handle, u8 *data) return buff; } -static u64 *hclge_comm_get_stats(void *comm_stats, +static u64 *hclge_comm_get_stats(const void *comm_stats, const struct hclge_comm_stats_str strs[], int size, u64 *data) { @@ -552,8 +564,7 @@ static u8 *hclge_comm_get_strings(u32 stringset, return buff; for (i = 0; i < size; i++) { - snprintf(buff, ETH_GSTRING_LEN, - strs[i].desc); + snprintf(buff, ETH_GSTRING_LEN, "%s", strs[i].desc); buff = buff + ETH_GSTRING_LEN; } @@ -644,8 +655,7 @@ static int hclge_get_sset_count(struct hnae3_handle *handle, int stringset) return count; } -static void hclge_get_strings(struct hnae3_handle *handle, - u32 stringset, +static void hclge_get_strings(struct hnae3_handle *handle, u32 stringset, u8 *data) { u8 *p = (char *)data; @@ -653,21 +663,17 @@ static void hclge_get_strings(struct hnae3_handle *handle, if (stringset == ETH_SS_STATS) { size = ARRAY_SIZE(g_mac_stats_string); - p = hclge_comm_get_strings(stringset, - g_mac_stats_string, - size, - p); + p = hclge_comm_get_strings(stringset, g_mac_stats_string, + size, p); p = hclge_tqps_get_strings(handle, p); } else if (stringset == ETH_SS_TEST) { if (handle->flags & HNAE3_SUPPORT_APP_LOOPBACK) { - memcpy(p, - hns3_nic_test_strs[HNAE3_LOOP_APP], + memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_APP], ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } if (handle->flags & HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK) { - memcpy(p, - hns3_nic_test_strs[HNAE3_LOOP_SERIAL_SERDES], + memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_SERIAL_SERDES], ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } @@ -678,8 +684,7 @@ static void hclge_get_strings(struct hnae3_handle *handle, p += ETH_GSTRING_LEN; } if (handle->flags & HNAE3_SUPPORT_PHY_LOOPBACK) { - memcpy(p, - hns3_nic_test_strs[HNAE3_LOOP_PHY], + memcpy(p, hns3_nic_test_strs[HNAE3_LOOP_PHY], ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } @@ -692,10 +697,8 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data) struct hclge_dev *hdev = vport->back; u64 *p; - p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, - g_mac_stats_string, - ARRAY_SIZE(g_mac_stats_string), - data); + p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string, + ARRAY_SIZE(g_mac_stats_string), data); p = hclge_tqps_get_stats(handle, p); } @@ -726,6 +729,8 @@ static int hclge_parse_func_status(struct hclge_dev *hdev, static int hclge_query_function_status(struct hclge_dev *hdev) { +#define HCLGE_QUERY_MAX_CNT 5 + struct hclge_func_status_cmd *req; struct hclge_desc desc; int timeout = 0; @@ -738,9 +743,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev) ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, - "query function status failed %d.\n", - ret); - + "query function status failed %d.\n", ret); return ret; } @@ -748,7 +751,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev) if (req->pf_state) break; usleep_range(1000, 2000); - } while (timeout++ < 5); + } while (timeout++ < HCLGE_QUERY_MAX_CNT); ret = hclge_parse_func_status(hdev, req); @@ -800,7 +803,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev) /* PF should have NIC vectors and Roce vectors, * NIC vectors are queued before Roce vectors. */ - hdev->num_msi = hdev->num_roce_msi + + hdev->num_msi = hdev->num_roce_msi + hdev->roce_base_msix_offset; } else { hdev->num_msi = @@ -1058,6 +1061,7 @@ static void hclge_parse_copper_link_mode(struct hclge_dev *hdev, linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported); linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported); linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported); + linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported); } static void hclge_parse_link_mode(struct hclge_dev *hdev, u8 speed_ability) @@ -1076,7 +1080,7 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc) struct hclge_cfg_param_cmd *req; u64 mac_addr_tmp_high; u64 mac_addr_tmp; - int i; + unsigned int i; req = (struct hclge_cfg_param_cmd *)desc[0].data; @@ -1138,7 +1142,8 @@ static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg) { struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM]; struct hclge_cfg_param_cmd *req; - int i, ret; + unsigned int i; + int ret; for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) { u32 offset = 0; @@ -1204,7 +1209,8 @@ static void hclge_init_kdump_kernel_config(struct hclge_dev *hdev) static int hclge_configure(struct hclge_dev *hdev) { struct hclge_cfg cfg; - int ret, i; + unsigned int i; + int ret; ret = hclge_get_cfg(hdev, &cfg); if (ret) { @@ -1226,8 +1232,10 @@ static int hclge_configure(struct hclge_dev *hdev) hdev->tm_info.hw_pfc_map = 0; hdev->wanted_umv_size = cfg.umv_space; - if (hnae3_dev_fd_supported(hdev)) + if (hnae3_dev_fd_supported(hdev)) { hdev->fd_en = true; + hdev->fd_active_type = HCLGE_FD_RULE_NONE; + } ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed); if (ret) { @@ -1265,8 +1273,8 @@ static int hclge_configure(struct hclge_dev *hdev) return ret; } -static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min, - int tso_mss_max) +static int hclge_config_tso(struct hclge_dev *hdev, unsigned int tso_mss_min, + unsigned int tso_mss_max) { struct hclge_cfg_tso_status_cmd *req; struct hclge_desc desc; @@ -1352,8 +1360,9 @@ static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id, req = (struct hclge_tqp_map_cmd *)desc.data; req->tqp_id = cpu_to_le16(tqp_pid); req->tqp_vf = func_id; - req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B | - 1 << HCLGE_TQP_MAP_EN_B; + req->tqp_flag = 1U << HCLGE_TQP_MAP_EN_B; + if (!is_pf) + req->tqp_flag |= 1U << HCLGE_TQP_MAP_TYPE_B; req->tqp_vid = cpu_to_le16(tqp_vid); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -1457,11 +1466,6 @@ static int hclge_map_tqp(struct hclge_dev *hdev) return 0; } -static void hclge_unic_setup(struct hclge_vport *vport, u16 num_tqps) -{ - /* this would be initialized later */ -} - static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps) { struct hnae3_handle *nic = &vport->nic; @@ -1472,20 +1476,12 @@ static int hclge_vport_setup(struct hclge_vport *vport, u16 num_tqps) nic->ae_algo = &ae_algo; nic->numa_node_mask = hdev->numa_node_mask; - if (hdev->ae_dev->dev_type == HNAE3_DEV_KNIC) { - ret = hclge_knic_setup(vport, num_tqps, - hdev->num_tx_desc, hdev->num_rx_desc); - - if (ret) { - dev_err(&hdev->pdev->dev, "knic setup failed %d\n", - ret); - return ret; - } - } else { - hclge_unic_setup(vport, num_tqps); - } + ret = hclge_knic_setup(vport, num_tqps, + hdev->num_tx_desc, hdev->num_rx_desc); + if (ret) + dev_err(&hdev->pdev->dev, "knic setup failed %d\n", ret); - return 0; + return ret; } static int hclge_alloc_vport(struct hclge_dev *hdev) @@ -1591,7 +1587,8 @@ static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, static u32 hclge_get_tc_num(struct hclge_dev *hdev) { - int i, cnt = 0; + unsigned int i; + u32 cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) if (hdev->hw_tc_map & BIT(i)) @@ -1604,7 +1601,8 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev, struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; - int i, cnt = 0; + unsigned int i; + int cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { priv = &buf_alloc->priv_buf[i]; @@ -1621,7 +1619,8 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev, struct hclge_pkt_buf_alloc *buf_alloc) { struct hclge_priv_buf *priv; - int i, cnt = 0; + unsigned int i; + int cnt = 0; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { priv = &buf_alloc->priv_buf[i]; @@ -1671,7 +1670,8 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, aligned_mps = roundup(hdev->mps, HCLGE_BUF_SIZE_UNIT); if (hnae3_dev_dcb_supported(hdev)) - shared_buf_min = 2 * aligned_mps + hdev->dv_buf_size; + shared_buf_min = HCLGE_BUF_MUL_BY * aligned_mps + + hdev->dv_buf_size; else shared_buf_min = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF + hdev->dv_buf_size; @@ -1689,7 +1689,8 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, if (hnae3_dev_dcb_supported(hdev)) { buf_alloc->s_buf.self.high = shared_buf - hdev->dv_buf_size; buf_alloc->s_buf.self.low = buf_alloc->s_buf.self.high - - roundup(aligned_mps / 2, HCLGE_BUF_SIZE_UNIT); + - roundup(aligned_mps / HCLGE_BUF_DIV_BY, + HCLGE_BUF_SIZE_UNIT); } else { buf_alloc->s_buf.self.high = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF; @@ -1697,14 +1698,18 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, } if (hnae3_dev_dcb_supported(hdev)) { + hi_thrd = shared_buf - hdev->dv_buf_size; + + if (tc_num <= NEED_RESERVE_TC_NUM) + hi_thrd = hi_thrd * BUF_RESERVE_PERCENT + / BUF_MAX_PERCENT; + if (tc_num) - hi_thrd = (shared_buf - hdev->dv_buf_size) / tc_num; - else - hi_thrd = shared_buf - hdev->dv_buf_size; + hi_thrd = hi_thrd / tc_num; - hi_thrd = max_t(u32, hi_thrd, 2 * aligned_mps); + hi_thrd = max_t(u32, hi_thrd, HCLGE_BUF_MUL_BY * aligned_mps); hi_thrd = rounddown(hi_thrd, HCLGE_BUF_SIZE_UNIT); - lo_thrd = hi_thrd - aligned_mps / 2; + lo_thrd = hi_thrd - aligned_mps / HCLGE_BUF_DIV_BY; } else { hi_thrd = aligned_mps + HCLGE_NON_DCB_ADDITIONAL_BUF; lo_thrd = aligned_mps; @@ -1749,7 +1754,7 @@ static bool hclge_rx_buf_calc_all(struct hclge_dev *hdev, bool max, { u32 rx_all = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc); u32 aligned_mps = round_up(hdev->mps, HCLGE_BUF_SIZE_UNIT); - int i; + unsigned int i; for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; @@ -1765,12 +1770,13 @@ static bool hclge_rx_buf_calc_all(struct hclge_dev *hdev, bool max, priv->enable = 1; if (hdev->tm_info.hw_pfc_map & BIT(i)) { - priv->wl.low = max ? aligned_mps : 256; + priv->wl.low = max ? aligned_mps : HCLGE_BUF_SIZE_UNIT; priv->wl.high = roundup(priv->wl.low + aligned_mps, HCLGE_BUF_SIZE_UNIT); } else { priv->wl.low = 0; - priv->wl.high = max ? (aligned_mps * 2) : aligned_mps; + priv->wl.high = max ? (aligned_mps * HCLGE_BUF_MUL_BY) : + aligned_mps; } priv->buf_size = priv->wl.high + hdev->dv_buf_size; @@ -1789,9 +1795,10 @@ static bool hclge_drop_nopfc_buf_till_fit(struct hclge_dev *hdev, /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; + unsigned int mask = BIT((unsigned int)i); - if (hdev->hw_tc_map & BIT(i) && - !(hdev->tm_info.hw_pfc_map & BIT(i))) { + if (hdev->hw_tc_map & mask && + !(hdev->tm_info.hw_pfc_map & mask)) { /* Clear the no pfc TC private buffer */ priv->wl.low = 0; priv->wl.high = 0; @@ -1818,9 +1825,10 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev, /* let the last to be cleared first */ for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) { struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; + unsigned int mask = BIT((unsigned int)i); - if (hdev->hw_tc_map & BIT(i) && - hdev->tm_info.hw_pfc_map & BIT(i)) { + if (hdev->hw_tc_map & mask && + hdev->tm_info.hw_pfc_map & mask) { /* Reduce the number of pfc TC with private buffer */ priv->wl.low = 0; priv->enable = 0; @@ -1837,6 +1845,55 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev, return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all); } +static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) +{ +#define COMPENSATE_BUFFER 0x3C00 +#define COMPENSATE_HALF_MPS_NUM 5 +#define PRIV_WL_GAP 0x1800 + + u32 rx_priv = hdev->pkt_buf_size - hclge_get_tx_buff_alloced(buf_alloc); + u32 tc_num = hclge_get_tc_num(hdev); + u32 half_mps = hdev->mps >> 1; + u32 min_rx_priv; + unsigned int i; + + if (tc_num) + rx_priv = rx_priv / tc_num; + + if (tc_num <= NEED_RESERVE_TC_NUM) + rx_priv = rx_priv * BUF_RESERVE_PERCENT / BUF_MAX_PERCENT; + + min_rx_priv = hdev->dv_buf_size + COMPENSATE_BUFFER + + COMPENSATE_HALF_MPS_NUM * half_mps; + min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT); + rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT); + + if (rx_priv < min_rx_priv) + return false; + + for (i = 0; i < HCLGE_MAX_TC_NUM; i++) { + struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i]; + + priv->enable = 0; + priv->wl.low = 0; + priv->wl.high = 0; + priv->buf_size = 0; + + if (!(hdev->hw_tc_map & BIT(i))) + continue; + + priv->enable = 1; + priv->buf_size = rx_priv; + priv->wl.high = rx_priv - hdev->dv_buf_size; + priv->wl.low = priv->wl.high - PRIV_WL_GAP; + } + + buf_alloc->s_buf.buf_size = 0; + + return true; +} + /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs * @hdev: pointer to struct hclge_dev * @buf_alloc: pointer to buffer calculation data @@ -1856,6 +1913,9 @@ static int hclge_rx_buffer_calc(struct hclge_dev *hdev, return 0; } + if (hclge_only_alloc_priv_buff(hdev, buf_alloc)) + return 0; + if (hclge_rx_buf_calc_all(hdev, true, buf_alloc)) return 0; @@ -2153,7 +2213,6 @@ static int hclge_init_msi(struct hclge_dev *hdev) static u8 hclge_check_speed_dup(u8 duplex, int speed) { - if (!(speed == HCLGE_MAC_SPEED_10M || speed == HCLGE_MAC_SPEED_100M)) duplex = HCLGE_MAC_FULL; @@ -2171,7 +2230,8 @@ static int hclge_cfg_mac_speed_dup_hw(struct hclge_dev *hdev, int speed, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false); - hnae3_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, !!duplex); + if (duplex) + hnae3_set_bit(req->speed_dup, HCLGE_CFG_DUPLEX_B, 1); switch (speed) { case HCLGE_MAC_SPEED_10M: @@ -2261,7 +2321,8 @@ static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false); req = (struct hclge_config_auto_neg_cmd *)desc.data; - hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable); + if (enable) + hnae3_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, 1U); req->cfg_an_cmd_flag = cpu_to_le32(flag); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -2316,6 +2377,17 @@ static int hclge_restart_autoneg(struct hnae3_handle *handle) return hclge_notify_client(hdev, HNAE3_UP_CLIENT); } +static int hclge_halt_autoneg(struct hnae3_handle *handle, bool halt) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + if (hdev->hw.mac.support_autoneg && hdev->hw.mac.autoneg) + return hclge_set_autoneg_en(hdev, !halt); + + return 0; +} + static int hclge_set_fec_hw(struct hclge_dev *hdev, u32 fec_mode) { struct hclge_config_fec_cmd *req; @@ -2389,6 +2461,15 @@ static int hclge_mac_init(struct hclge_dev *hdev) return ret; } + if (hdev->hw.mac.support_autoneg) { + ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg); + if (ret) { + dev_err(&hdev->pdev->dev, + "Config mac autoneg fail ret=%d\n", ret); + return ret; + } + } + mac->link = 0; if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) { @@ -2423,7 +2504,8 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev) static void hclge_reset_task_schedule(struct hclge_dev *hdev) { - if (!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) + if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) && + !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state)) schedule_work(&hdev->rst_service_task); } @@ -2458,7 +2540,7 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev) static int hclge_get_mac_phy_link(struct hclge_dev *hdev) { - int mac_state; + unsigned int mac_state; int link_stat; if (test_bit(HCLGE_STATE_DOWN, &hdev->state)) @@ -2508,6 +2590,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev) static void hclge_update_port_capability(struct hclge_mac *mac) { + /* update fec ability by speed */ + hclge_convert_setting_fec(mac); + /* firmware can not identify back plane type, the media type * read from configuration can help deal it */ @@ -2529,7 +2614,7 @@ static void hclge_update_port_capability(struct hclge_mac *mac) static int hclge_get_sfp_speed(struct hclge_dev *hdev, u32 *speed) { - struct hclge_sfp_info_cmd *resp = NULL; + struct hclge_sfp_info_cmd *resp; struct hclge_desc desc; int ret; @@ -2580,6 +2665,11 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac) mac->speed_ability = le32_to_cpu(resp->speed_ability); mac->autoneg = resp->autoneg; mac->support_autoneg = resp->autoneg_ability; + mac->speed_type = QUERY_ACTIVE_SPEED; + if (!resp->active_fec) + mac->fec_mode = 0; + else + mac->fec_mode = BIT(resp->active_fec); } else { mac->speed_type = QUERY_SFP_SPEED; } @@ -2645,6 +2735,7 @@ static void hclge_service_timer(struct timer_list *t) mod_timer(&hdev->service_timer, jiffies + HZ); hdev->hw_stats.stats_timer++; + hdev->fd_arfs_expire_timer++; hclge_task_schedule(hdev); } @@ -2693,19 +2784,11 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) return HCLGE_VECTOR0_EVENT_RST; } - if (BIT(HCLGE_VECTOR0_CORERESET_INT_B) & rst_src_reg) { - dev_info(&hdev->pdev->dev, "core reset interrupt\n"); - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); - set_bit(HNAE3_CORE_RESET, &hdev->reset_pending); - *clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B); - hdev->rst_stats.core_rst_cnt++; - return HCLGE_VECTOR0_EVENT_RST; - } - /* check for vector0 msix event source */ if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) { - dev_dbg(&hdev->pdev->dev, "received event 0x%x\n", - msix_src_reg); + dev_info(&hdev->pdev->dev, "received event 0x%x\n", + msix_src_reg); + *clearval = msix_src_reg; return HCLGE_VECTOR0_EVENT_ERR; } @@ -2717,8 +2800,11 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval) } /* print other vector0 event source */ - dev_dbg(&hdev->pdev->dev, "cmdq_src_reg:0x%x, msix_src_reg:0x%x\n", - cmdq_src_reg, msix_src_reg); + dev_info(&hdev->pdev->dev, + "CMDQ INT status:0x%x, other INT status:0x%x\n", + cmdq_src_reg, msix_src_reg); + *clearval = msix_src_reg; + return HCLGE_VECTOR0_EVENT_OTHER; } @@ -2754,8 +2840,8 @@ static void hclge_enable_vector(struct hclge_misc_vector *vector, bool enable) static irqreturn_t hclge_misc_irq_handle(int irq, void *data) { struct hclge_dev *hdev = data; + u32 clearval = 0; u32 event_cause; - u32 clearval; hclge_enable_vector(&hdev->misc_vector, false); event_cause = hclge_check_event_cause(hdev, &clearval); @@ -2797,7 +2883,8 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data) } /* clear the source of interrupt if it is not cause by reset */ - if (event_cause == HCLGE_VECTOR0_EVENT_MBX) { + if (!clearval || + event_cause == HCLGE_VECTOR0_EVENT_MBX) { hclge_clear_event_cause(hdev, event_cause, clearval); hclge_enable_vector(&hdev->misc_vector, true); } @@ -2861,6 +2948,9 @@ int hclge_notify_client(struct hclge_dev *hdev, struct hnae3_client *client = hdev->nic_client; u16 i; + if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client) + return 0; + if (!client->ops->reset_notify) return -EOPNOTSUPP; @@ -2886,7 +2976,7 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev, int ret = 0; u16 i; - if (!client) + if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client) return 0; if (!client->ops->reset_notify) @@ -2923,10 +3013,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev) reg = HCLGE_GLOBAL_RESET_REG; reg_bit = HCLGE_GLOBAL_RESET_BIT; break; - case HNAE3_CORE_RESET: - reg = HCLGE_GLOBAL_RESET_REG; - reg_bit = HCLGE_CORE_RESET_BIT; - break; case HNAE3_FUNC_RESET: reg = HCLGE_FUN_RST_ING; reg_bit = HCLGE_FUN_RST_ING_B; @@ -3058,12 +3144,6 @@ static void hclge_do_reset(struct hclge_dev *hdev) hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val); dev_info(&pdev->dev, "Global Reset requested\n"); break; - case HNAE3_CORE_RESET: - val = hclge_read_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG); - hnae3_set_bit(val, HCLGE_CORE_RESET_BIT, 1); - hclge_write_dev(&hdev->hw, HCLGE_GLOBAL_RESET_REG, val); - dev_info(&pdev->dev, "Core Reset requested\n"); - break; case HNAE3_FUNC_RESET: dev_info(&pdev->dev, "PF Reset requested\n"); /* schedule again to check later */ @@ -3083,10 +3163,11 @@ static void hclge_do_reset(struct hclge_dev *hdev) } } -static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev, +static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, unsigned long *addr) { enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; + struct hclge_dev *hdev = ae_dev->priv; /* first, resolve any unknown reset type to the known type(s) */ if (test_bit(HNAE3_UNKNOWN_RESET, addr)) { @@ -3110,16 +3191,10 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev, rst_level = HNAE3_IMP_RESET; clear_bit(HNAE3_IMP_RESET, addr); clear_bit(HNAE3_GLOBAL_RESET, addr); - clear_bit(HNAE3_CORE_RESET, addr); clear_bit(HNAE3_FUNC_RESET, addr); } else if (test_bit(HNAE3_GLOBAL_RESET, addr)) { rst_level = HNAE3_GLOBAL_RESET; clear_bit(HNAE3_GLOBAL_RESET, addr); - clear_bit(HNAE3_CORE_RESET, addr); - clear_bit(HNAE3_FUNC_RESET, addr); - } else if (test_bit(HNAE3_CORE_RESET, addr)) { - rst_level = HNAE3_CORE_RESET; - clear_bit(HNAE3_CORE_RESET, addr); clear_bit(HNAE3_FUNC_RESET, addr); } else if (test_bit(HNAE3_FUNC_RESET, addr)) { rst_level = HNAE3_FUNC_RESET; @@ -3147,9 +3222,6 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev) case HNAE3_GLOBAL_RESET: clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B); break; - case HNAE3_CORE_RESET: - clearval = BIT(HCLGE_VECTOR0_CORERESET_INT_B); - break; default: break; } @@ -3180,6 +3252,8 @@ static int hclge_reset_prepare_down(struct hclge_dev *hdev) static int hclge_reset_prepare_wait(struct hclge_dev *hdev) { +#define HCLGE_RESET_SYNC_TIME 100 + u32 reg_val; int ret = 0; @@ -3188,7 +3262,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) /* There is no mechanism for PF to know if VF has stopped IO * for now, just wait 100 ms for VF to stop IO */ - msleep(100); + msleep(HCLGE_RESET_SYNC_TIME); ret = hclge_func_reset_cmd(hdev, 0); if (ret) { dev_err(&hdev->pdev->dev, @@ -3208,7 +3282,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) /* There is no mechanism for PF to know if VF has stopped IO * for now, just wait 100 ms for VF to stop IO */ - msleep(100); + msleep(HCLGE_RESET_SYNC_TIME); set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); set_bit(HNAE3_FLR_DOWN, &hdev->flr_state); hdev->rst_stats.flr_rst_cnt++; @@ -3222,6 +3296,10 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) break; } + /* inform hardware that preparatory work is done */ + msleep(HCLGE_RESET_SYNC_TIME); + hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG, + HCLGE_NIC_CMQ_ENABLE); dev_info(&hdev->pdev->dev, "prepare wait ok\n"); return ret; @@ -3230,7 +3308,6 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev) static bool hclge_reset_err_handle(struct hclge_dev *hdev, bool is_timeout) { #define MAX_RESET_FAIL_CNT 5 -#define RESET_UPGRADE_DELAY_SEC 10 if (hdev->reset_pending) { dev_info(&hdev->pdev->dev, "Reset pending %lu\n", @@ -3254,8 +3331,9 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev, bool is_timeout) dev_info(&hdev->pdev->dev, "Upgrade reset level\n"); hclge_clear_reset_cause(hdev); + set_bit(HNAE3_GLOBAL_RESET, &hdev->default_reset_request); mod_timer(&hdev->reset_timer, - jiffies + RESET_UPGRADE_DELAY_SEC * HZ); + jiffies + HCLGE_RESET_INTERVAL); return false; } @@ -3282,6 +3360,25 @@ static int hclge_reset_prepare_up(struct hclge_dev *hdev) return ret; } +static int hclge_reset_stack(struct hclge_dev *hdev) +{ + int ret; + + ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); + if (ret) + return ret; + + ret = hclge_reset_ae_dev(hdev->ae_dev); + if (ret) + return ret; + + ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT); + if (ret) + return ret; + + return hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT); +} + static void hclge_reset(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); @@ -3325,19 +3422,8 @@ static void hclge_reset(struct hclge_dev *hdev) goto err_reset; rtnl_lock(); - ret = hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT); - if (ret) - goto err_reset_lock; - ret = hclge_reset_ae_dev(hdev->ae_dev); - if (ret) - goto err_reset_lock; - - ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT); - if (ret) - goto err_reset_lock; - - ret = hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT); + ret = hclge_reset_stack(hdev); if (ret) goto err_reset_lock; @@ -3347,16 +3433,23 @@ static void hclge_reset(struct hclge_dev *hdev) if (ret) goto err_reset_lock; + rtnl_unlock(); + + ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT); + /* ignore RoCE notify error if it fails HCLGE_RESET_MAX_FAIL_CNT - 1 + * times + */ + if (ret && hdev->reset_fail_cnt < HCLGE_RESET_MAX_FAIL_CNT - 1) + goto err_reset; + + rtnl_lock(); + ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT); if (ret) goto err_reset_lock; rtnl_unlock(); - ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT); - if (ret) - goto err_reset; - ret = hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT); if (ret) goto err_reset; @@ -3399,11 +3492,12 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle) if (!handle) handle = &hdev->vport[0].nic; - if (time_before(jiffies, (hdev->last_reset_time + 3 * HZ))) + if (time_before(jiffies, (hdev->last_reset_time + + HCLGE_RESET_INTERVAL))) return; else if (hdev->default_reset_request) hdev->reset_level = - hclge_get_reset_level(hdev, + hclge_get_reset_level(ae_dev, &hdev->default_reset_request); else if (time_after(jiffies, (hdev->last_reset_time + 4 * 5 * HZ))) hdev->reset_level = HNAE3_FUNC_RESET; @@ -3432,13 +3526,14 @@ static void hclge_reset_timer(struct timer_list *t) struct hclge_dev *hdev = from_timer(hdev, t, reset_timer); dev_info(&hdev->pdev->dev, - "triggering global reset in reset timer\n"); - set_bit(HNAE3_GLOBAL_RESET, &hdev->default_reset_request); + "triggering reset in reset timer\n"); hclge_reset_event(hdev->pdev, NULL); } static void hclge_reset_subtask(struct hclge_dev *hdev) { + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + /* check if there is any ongoing reset in the hardware. This status can * be checked from reset_pending. If there is then, we need to wait for * hardware to complete reset. @@ -3449,12 +3544,12 @@ static void hclge_reset_subtask(struct hclge_dev *hdev) * now. */ hdev->last_reset_time = jiffies; - hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_pending); + hdev->reset_type = hclge_get_reset_level(ae_dev, &hdev->reset_pending); if (hdev->reset_type != HNAE3_NONE_RESET) hclge_reset(hdev); /* check if we got any *new* reset requests to be honored */ - hdev->reset_type = hclge_get_reset_level(hdev, &hdev->reset_request); + hdev->reset_type = hclge_get_reset_level(ae_dev, &hdev->reset_request); if (hdev->reset_type != HNAE3_NONE_RESET) hclge_do_reset(hdev); @@ -3521,6 +3616,11 @@ static void hclge_service_task(struct work_struct *work) hclge_update_port_info(hdev); hclge_update_link_status(hdev); hclge_update_vport_alive(hdev); + hclge_sync_vlan_filter(hdev); + if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) { + hclge_rfs_filter_expire(hdev); + hdev->fd_arfs_expire_timer = 0; + } hclge_service_complete(hdev); } @@ -3614,29 +3714,28 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev, const u8 hfunc, const u8 *key) { struct hclge_rss_config_cmd *req; + unsigned int key_offset = 0; struct hclge_desc desc; - int key_offset; + int key_counts; int key_size; int ret; + key_counts = HCLGE_RSS_KEY_SIZE; req = (struct hclge_rss_config_cmd *)desc.data; - for (key_offset = 0; key_offset < 3; key_offset++) { + while (key_counts) { hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG, false); req->hash_config |= (hfunc & HCLGE_RSS_HASH_ALGO_MASK); req->hash_config |= (key_offset << HCLGE_RSS_HASH_KEY_OFFSET_B); - if (key_offset == 2) - key_size = - HCLGE_RSS_KEY_SIZE - HCLGE_RSS_HASH_KEY_NUM * 2; - else - key_size = HCLGE_RSS_HASH_KEY_NUM; - + key_size = min(HCLGE_RSS_HASH_KEY_NUM, key_counts); memcpy(req->hash_key, key + key_offset * HCLGE_RSS_HASH_KEY_NUM, key_size); + key_counts -= key_size; + key_offset++; ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -3995,13 +4094,14 @@ int hclge_rss_init_hw(struct hclge_dev *hdev) struct hclge_vport *vport = hdev->vport; u8 *rss_indir = vport[0].rss_indirection_tbl; u16 rss_size = vport[0].alloc_rss_size; + u16 tc_offset[HCLGE_MAX_TC_NUM] = {0}; + u16 tc_size[HCLGE_MAX_TC_NUM] = {0}; u8 *key = vport[0].rss_hash_key; u8 hfunc = vport[0].rss_algo; - u16 tc_offset[HCLGE_MAX_TC_NUM]; u16 tc_valid[HCLGE_MAX_TC_NUM]; - u16 tc_size[HCLGE_MAX_TC_NUM]; u16 roundup_size; - int i, ret; + unsigned int i; + int ret; ret = hclge_set_rss_indir_table(hdev, rss_indir); if (ret) @@ -4156,8 +4256,7 @@ int hclge_bind_ring_with_vector(struct hclge_vport *vport, return 0; } -static int hclge_map_ring_to_vector(struct hnae3_handle *handle, - int vector, +static int hclge_map_ring_to_vector(struct hnae3_handle *handle, int vector, struct hnae3_ring_chain_node *ring_chain) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -4174,8 +4273,7 @@ static int hclge_map_ring_to_vector(struct hnae3_handle *handle, return hclge_bind_ring_with_vector(vport, vector_id, true, ring_chain); } -static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, - int vector, +static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, int vector, struct hnae3_ring_chain_node *ring_chain) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -4196,8 +4294,7 @@ static int hclge_unmap_ring_frm_vector(struct hnae3_handle *handle, if (ret) dev_err(&handle->pdev->dev, "Unmap ring from vector fail. vectorid=%d, ret =%d\n", - vector_id, - ret); + vector_id, ret); return ret; } @@ -4503,19 +4600,19 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, case 0: return false; case BIT(INNER_DST_MAC): - for (i = 0; i < 6; i++) { - calc_x(key_x[5 - i], rule->tuples.dst_mac[i], + for (i = 0; i < ETH_ALEN; i++) { + calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], rule->tuples_mask.dst_mac[i]); - calc_y(key_y[5 - i], rule->tuples.dst_mac[i], + calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], rule->tuples_mask.dst_mac[i]); } return true; case BIT(INNER_SRC_MAC): - for (i = 0; i < 6; i++) { - calc_x(key_x[5 - i], rule->tuples.src_mac[i], + for (i = 0; i < ETH_ALEN; i++) { + calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], rule->tuples.src_mac[i]); - calc_y(key_y[5 - i], rule->tuples.src_mac[i], + calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], rule->tuples.src_mac[i]); } @@ -4551,19 +4648,19 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, return true; case BIT(INNER_SRC_IP): - calc_x(tmp_x_l, rule->tuples.src_ip[3], - rule->tuples_mask.src_ip[3]); - calc_y(tmp_y_l, rule->tuples.src_ip[3], - rule->tuples_mask.src_ip[3]); + calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX], + rule->tuples_mask.src_ip[IPV4_INDEX]); + calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX], + rule->tuples_mask.src_ip[IPV4_INDEX]); *(__le32 *)key_x = cpu_to_le32(tmp_x_l); *(__le32 *)key_y = cpu_to_le32(tmp_y_l); return true; case BIT(INNER_DST_IP): - calc_x(tmp_x_l, rule->tuples.dst_ip[3], - rule->tuples_mask.dst_ip[3]); - calc_y(tmp_y_l, rule->tuples.dst_ip[3], - rule->tuples_mask.dst_ip[3]); + calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX], + rule->tuples_mask.dst_ip[IPV4_INDEX]); + calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX], + rule->tuples_mask.dst_ip[IPV4_INDEX]); *(__le32 *)key_x = cpu_to_le32(tmp_x_l); *(__le32 *)key_y = cpu_to_le32(tmp_y_l); @@ -4617,7 +4714,7 @@ static void hclge_fd_convert_meta_data(struct hclge_fd_key_cfg *key_cfg, { u32 tuple_bit, meta_data = 0, tmp_x, tmp_y, port_number; u8 cur_pos = 0, tuple_size, shift_bits; - int i; + unsigned int i; for (i = 0; i < MAX_META_DATA; i++) { tuple_size = meta_data_key_info[i].key_length; @@ -4659,7 +4756,8 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage, struct hclge_fd_key_cfg *key_cfg = &hdev->fd_cfg.key_cfg[stage]; u8 key_x[MAX_KEY_BYTES], key_y[MAX_KEY_BYTES]; u8 *cur_key_x, *cur_key_y; - int i, ret, tuple_size; + unsigned int i; + int ret, tuple_size; u8 meta_data_region; memset(key_x, 0, sizeof(key_x)); @@ -4812,6 +4910,7 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, *unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | BIT(INNER_IP_TOS); + /* check whether src/dst ip address used */ if (!tcp_ip6_spec->ip6src[0] && !tcp_ip6_spec->ip6src[1] && !tcp_ip6_spec->ip6src[2] && !tcp_ip6_spec->ip6src[3]) *unused |= BIT(INNER_SRC_IP); @@ -4836,6 +4935,7 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); + /* check whether src/dst ip address used */ if (!usr_ip6_spec->ip6src[0] && !usr_ip6_spec->ip6src[1] && !usr_ip6_spec->ip6src[2] && !usr_ip6_spec->ip6src[3]) *unused |= BIT(INNER_SRC_IP); @@ -4906,14 +5006,18 @@ static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location) struct hclge_fd_rule *rule = NULL; struct hlist_node *node2; + spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) { if (rule->location >= location) break; } + spin_unlock_bh(&hdev->fd_rule_lock); + return rule && rule->location == location; } +/* make sure being called after lock up with fd_rule_lock */ static int hclge_fd_update_rule_list(struct hclge_dev *hdev, struct hclge_fd_rule *new_rule, u16 location, @@ -4937,9 +5041,13 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev, kfree(rule); hdev->hclge_fd_rule_num--; - if (!is_add) - return 0; + if (!is_add) { + if (!hdev->hclge_fd_rule_num) + hdev->fd_active_type = HCLGE_FD_RULE_NONE; + clear_bit(location, hdev->fd_bmap); + return 0; + } } else if (!is_add) { dev_err(&hdev->pdev->dev, "delete fail, rule %d is inexistent\n", @@ -4954,7 +5062,9 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev, else hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list); + set_bit(location, hdev->fd_bmap); hdev->hclge_fd_rule_num++; + hdev->fd_active_type = new_rule->rule_type; return 0; } @@ -4969,14 +5079,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, case SCTP_V4_FLOW: case TCP_V4_FLOW: case UDP_V4_FLOW: - rule->tuples.src_ip[3] = + rule->tuples.src_ip[IPV4_INDEX] = be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); - rule->tuples_mask.src_ip[3] = + rule->tuples_mask.src_ip[IPV4_INDEX] = be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src); - rule->tuples.dst_ip[3] = + rule->tuples.dst_ip[IPV4_INDEX] = be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[3] = + rule->tuples_mask.dst_ip[IPV4_INDEX] = be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst); rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); @@ -4995,14 +5105,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, break; case IP_USER_FLOW: - rule->tuples.src_ip[3] = + rule->tuples.src_ip[IPV4_INDEX] = be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); - rule->tuples_mask.src_ip[3] = + rule->tuples_mask.src_ip[IPV4_INDEX] = be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src); - rule->tuples.dst_ip[3] = + rule->tuples.dst_ip[IPV4_INDEX] = be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[3] = + rule->tuples_mask.dst_ip[IPV4_INDEX] = be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst); rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; @@ -5019,14 +5129,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, case TCP_V6_FLOW: case UDP_V6_FLOW: be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.tcp_ip6_spec.ip6src, 4); + fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE); be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.tcp_ip6_spec.ip6src, 4); + fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE); be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.tcp_ip6_spec.ip6dst, 4); + fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.tcp_ip6_spec.ip6dst, 4); + fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); rule->tuples_mask.src_port = @@ -5042,14 +5152,14 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, break; case IPV6_USER_FLOW: be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.usr_ip6_spec.ip6src, 4); + fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE); be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.usr_ip6_spec.ip6src, 4); + fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE); be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.usr_ip6_spec.ip6dst, 4); + fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE); be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.usr_ip6_spec.ip6dst, 4); + fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE); rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto; @@ -5112,6 +5222,36 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, return 0; } +/* make sure being called after lock up with fd_rule_lock */ +static int hclge_fd_config_rule(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + int ret; + + if (!rule) { + dev_err(&hdev->pdev->dev, + "The flow director rule is NULL\n"); + return -EINVAL; + } + + /* it will never fail here, so needn't to check return value */ + hclge_fd_update_rule_list(hdev, rule, rule->location, true); + + ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + if (ret) + goto clear_rule; + + ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); + if (ret) + goto clear_rule; + + return 0; + +clear_rule: + hclge_fd_update_rule_list(hdev, rule, rule->location, false); + return ret; +} + static int hclge_add_fd_entry(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd) { @@ -5174,8 +5314,10 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, return -ENOMEM; ret = hclge_fd_get_tuple(hdev, fs, rule); - if (ret) - goto free_rule; + if (ret) { + kfree(rule); + return ret; + } rule->flow_type = fs->flow_type; @@ -5184,24 +5326,19 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, rule->vf_id = dst_vport_id; rule->queue_id = q_index; rule->action = action; + rule->rule_type = HCLGE_FD_EP_ACTIVE; - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) - goto free_rule; + /* to avoid rule conflict, when user configure rule by ethtool, + * we need to clear all arfs rules + */ + hclge_clear_arfs_rules(handle); - ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) - goto free_rule; + spin_lock_bh(&hdev->fd_rule_lock); + ret = hclge_fd_config_rule(hdev, rule); - ret = hclge_fd_update_rule_list(hdev, rule, fs->location, true); - if (ret) - goto free_rule; + spin_unlock_bh(&hdev->fd_rule_lock); return ret; - -free_rule: - kfree(rule); - return ret; } static int hclge_del_fd_entry(struct hnae3_handle *handle, @@ -5222,18 +5359,21 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle, if (!hclge_fd_rule_exist(hdev, fs->location)) { dev_err(&hdev->pdev->dev, - "Delete fail, rule %d is inexistent\n", - fs->location); + "Delete fail, rule %d is inexistent\n", fs->location); return -ENOENT; } - ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, - fs->location, NULL, false); + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location, + NULL, false); if (ret) return ret; - return hclge_fd_update_rule_list(hdev, NULL, fs->location, - false); + spin_lock_bh(&hdev->fd_rule_lock); + ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false); + + spin_unlock_bh(&hdev->fd_rule_lock); + + return ret; } static void hclge_del_all_fd_entries(struct hnae3_handle *handle, @@ -5243,25 +5383,30 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; + u16 location; if (!hnae3_dev_fd_supported(hdev)) return; + spin_lock_bh(&hdev->fd_rule_lock); + for_each_set_bit(location, hdev->fd_bmap, + hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) + hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location, + NULL, false); + if (clear_list) { hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { - hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, - rule->location, NULL, false); hlist_del(&rule->rule_node); kfree(rule); - hdev->hclge_fd_rule_num--; } - } else { - hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, - rule_node) - hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, - rule->location, NULL, false); + hdev->fd_active_type = HCLGE_FD_RULE_NONE; + hdev->hclge_fd_rule_num = 0; + bitmap_zero(hdev->fd_bmap, + hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]); } + + spin_unlock_bh(&hdev->fd_rule_lock); } static int hclge_restore_fd_entries(struct hnae3_handle *handle) @@ -5283,6 +5428,7 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle) if (!hdev->fd_en) return 0; + spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); if (!ret) @@ -5292,11 +5438,18 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle) dev_warn(&hdev->pdev->dev, "Restore rule %d failed, remove it\n", rule->location); + clear_bit(rule->location, hdev->fd_bmap); hlist_del(&rule->rule_node); kfree(rule); hdev->hclge_fd_rule_num--; } } + + if (hdev->hclge_fd_rule_num) + hdev->fd_active_type = HCLGE_FD_EP_ACTIVE; + + spin_unlock_bh(&hdev->fd_rule_lock); + return 0; } @@ -5329,13 +5482,18 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle, fs = (struct ethtool_rx_flow_spec *)&cmd->fs; + spin_lock_bh(&hdev->fd_rule_lock); + hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) { if (rule->location >= fs->location) break; } - if (!rule || fs->location != rule->location) + if (!rule || fs->location != rule->location) { + spin_unlock_bh(&hdev->fd_rule_lock); + return -ENOENT; + } fs->flow_type = rule->flow_type; switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { @@ -5343,16 +5501,16 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle, case TCP_V4_FLOW: case UDP_V4_FLOW: fs->h_u.tcp_ip4_spec.ip4src = - cpu_to_be32(rule->tuples.src_ip[3]); + cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]); fs->m_u.tcp_ip4_spec.ip4src = - rule->unused_tuple & BIT(INNER_SRC_IP) ? - 0 : cpu_to_be32(rule->tuples_mask.src_ip[3]); + rule->unused_tuple & BIT(INNER_SRC_IP) ? + 0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]); fs->h_u.tcp_ip4_spec.ip4dst = - cpu_to_be32(rule->tuples.dst_ip[3]); + cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]); fs->m_u.tcp_ip4_spec.ip4dst = - rule->unused_tuple & BIT(INNER_DST_IP) ? - 0 : cpu_to_be32(rule->tuples_mask.dst_ip[3]); + rule->unused_tuple & BIT(INNER_DST_IP) ? + 0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]); fs->h_u.tcp_ip4_spec.psrc = cpu_to_be16(rule->tuples.src_port); fs->m_u.tcp_ip4_spec.psrc = @@ -5372,16 +5530,16 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle, break; case IP_USER_FLOW: fs->h_u.usr_ip4_spec.ip4src = - cpu_to_be32(rule->tuples.src_ip[3]); + cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]); fs->m_u.tcp_ip4_spec.ip4src = - rule->unused_tuple & BIT(INNER_SRC_IP) ? - 0 : cpu_to_be32(rule->tuples_mask.src_ip[3]); + rule->unused_tuple & BIT(INNER_SRC_IP) ? + 0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]); fs->h_u.usr_ip4_spec.ip4dst = - cpu_to_be32(rule->tuples.dst_ip[3]); + cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]); fs->m_u.usr_ip4_spec.ip4dst = - rule->unused_tuple & BIT(INNER_DST_IP) ? - 0 : cpu_to_be32(rule->tuples_mask.dst_ip[3]); + rule->unused_tuple & BIT(INNER_DST_IP) ? + 0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]); fs->h_u.usr_ip4_spec.tos = rule->tuples.ip_tos; fs->m_u.usr_ip4_spec.tos = @@ -5400,20 +5558,22 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle, case TCP_V6_FLOW: case UDP_V6_FLOW: cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6src, - rule->tuples.src_ip, 4); + rule->tuples.src_ip, IPV6_SIZE); if (rule->unused_tuple & BIT(INNER_SRC_IP)) - memset(fs->m_u.tcp_ip6_spec.ip6src, 0, sizeof(int) * 4); + memset(fs->m_u.tcp_ip6_spec.ip6src, 0, + sizeof(int) * IPV6_SIZE); else cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6src, - rule->tuples_mask.src_ip, 4); + rule->tuples_mask.src_ip, IPV6_SIZE); cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6dst, - rule->tuples.dst_ip, 4); + rule->tuples.dst_ip, IPV6_SIZE); if (rule->unused_tuple & BIT(INNER_DST_IP)) - memset(fs->m_u.tcp_ip6_spec.ip6dst, 0, sizeof(int) * 4); + memset(fs->m_u.tcp_ip6_spec.ip6dst, 0, + sizeof(int) * IPV6_SIZE); else cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6dst, - rule->tuples_mask.dst_ip, 4); + rule->tuples_mask.dst_ip, IPV6_SIZE); fs->h_u.tcp_ip6_spec.psrc = cpu_to_be16(rule->tuples.src_port); fs->m_u.tcp_ip6_spec.psrc = @@ -5428,20 +5588,22 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle, break; case IPV6_USER_FLOW: cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6src, - rule->tuples.src_ip, 4); + rule->tuples.src_ip, IPV6_SIZE); if (rule->unused_tuple & BIT(INNER_SRC_IP)) - memset(fs->m_u.usr_ip6_spec.ip6src, 0, sizeof(int) * 4); + memset(fs->m_u.usr_ip6_spec.ip6src, 0, + sizeof(int) * IPV6_SIZE); else cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6src, - rule->tuples_mask.src_ip, 4); + rule->tuples_mask.src_ip, IPV6_SIZE); cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6dst, - rule->tuples.dst_ip, 4); + rule->tuples.dst_ip, IPV6_SIZE); if (rule->unused_tuple & BIT(INNER_DST_IP)) - memset(fs->m_u.usr_ip6_spec.ip6dst, 0, sizeof(int) * 4); + memset(fs->m_u.usr_ip6_spec.ip6dst, 0, + sizeof(int) * IPV6_SIZE); else cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6dst, - rule->tuples_mask.dst_ip, 4); + rule->tuples_mask.dst_ip, IPV6_SIZE); fs->h_u.usr_ip6_spec.l4_proto = rule->tuples.ip_proto; fs->m_u.usr_ip6_spec.l4_proto = @@ -5474,6 +5636,7 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle, break; default: + spin_unlock_bh(&hdev->fd_rule_lock); return -EOPNOTSUPP; } @@ -5505,6 +5668,8 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle, fs->ring_cookie |= vf_id; } + spin_unlock_bh(&hdev->fd_rule_lock); + return 0; } @@ -5522,20 +5687,208 @@ static int hclge_get_all_rules(struct hnae3_handle *handle, cmd->data = hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]; + spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) { - if (cnt == cmd->rule_cnt) + if (cnt == cmd->rule_cnt) { + spin_unlock_bh(&hdev->fd_rule_lock); return -EMSGSIZE; + } rule_locs[cnt] = rule->location; cnt++; } + spin_unlock_bh(&hdev->fd_rule_lock); + cmd->rule_cnt = cnt; return 0; } +static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys, + struct hclge_fd_rule_tuples *tuples) +{ + tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto); + tuples->ip_proto = fkeys->basic.ip_proto; + tuples->dst_port = be16_to_cpu(fkeys->ports.dst); + + if (fkeys->basic.n_proto == htons(ETH_P_IP)) { + tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src); + tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst); + } else { + memcpy(tuples->src_ip, + fkeys->addrs.v6addrs.src.in6_u.u6_addr32, + sizeof(tuples->src_ip)); + memcpy(tuples->dst_ip, + fkeys->addrs.v6addrs.dst.in6_u.u6_addr32, + sizeof(tuples->dst_ip)); + } +} + +/* traverse all rules, check whether an existed rule has the same tuples */ +static struct hclge_fd_rule * +hclge_fd_search_flow_keys(struct hclge_dev *hdev, + const struct hclge_fd_rule_tuples *tuples) +{ + struct hclge_fd_rule *rule = NULL; + struct hlist_node *node; + + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (!memcmp(tuples, &rule->tuples, sizeof(*tuples))) + return rule; + } + + return NULL; +} + +static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples, + struct hclge_fd_rule *rule) +{ + rule->unused_tuple = BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | + BIT(INNER_VLAN_TAG_FST) | BIT(INNER_IP_TOS) | + BIT(INNER_SRC_PORT); + rule->action = 0; + rule->vf_id = 0; + rule->rule_type = HCLGE_FD_ARFS_ACTIVE; + if (tuples->ether_proto == ETH_P_IP) { + if (tuples->ip_proto == IPPROTO_TCP) + rule->flow_type = TCP_V4_FLOW; + else + rule->flow_type = UDP_V4_FLOW; + } else { + if (tuples->ip_proto == IPPROTO_TCP) + rule->flow_type = TCP_V6_FLOW; + else + rule->flow_type = UDP_V6_FLOW; + } + memcpy(&rule->tuples, tuples, sizeof(rule->tuples)); + memset(&rule->tuples_mask, 0xFF, sizeof(rule->tuples_mask)); +} + +static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, + u16 flow_id, struct flow_keys *fkeys) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_fd_rule_tuples new_tuples; + struct hclge_dev *hdev = vport->back; + struct hclge_fd_rule *rule; + u16 tmp_queue_id; + u16 bit_id; + int ret; + + if (!hnae3_dev_fd_supported(hdev)) + return -EOPNOTSUPP; + + memset(&new_tuples, 0, sizeof(new_tuples)); + hclge_fd_get_flow_tuples(fkeys, &new_tuples); + + spin_lock_bh(&hdev->fd_rule_lock); + + /* when there is already fd rule existed add by user, + * arfs should not work + */ + if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) { + spin_unlock_bh(&hdev->fd_rule_lock); + + return -EOPNOTSUPP; + } + + /* check is there flow director filter existed for this flow, + * if not, create a new filter for it; + * if filter exist with different queue id, modify the filter; + * if filter exist with same queue id, do nothing + */ + rule = hclge_fd_search_flow_keys(hdev, &new_tuples); + if (!rule) { + bit_id = find_first_zero_bit(hdev->fd_bmap, MAX_FD_FILTER_NUM); + if (bit_id >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) { + spin_unlock_bh(&hdev->fd_rule_lock); + + return -ENOSPC; + } + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) { + spin_unlock_bh(&hdev->fd_rule_lock); + + return -ENOMEM; + } + + set_bit(bit_id, hdev->fd_bmap); + rule->location = bit_id; + rule->flow_id = flow_id; + rule->queue_id = queue_id; + hclge_fd_build_arfs_rule(&new_tuples, rule); + ret = hclge_fd_config_rule(hdev, rule); + + spin_unlock_bh(&hdev->fd_rule_lock); + + if (ret) + return ret; + + return rule->location; + } + + spin_unlock_bh(&hdev->fd_rule_lock); + + if (rule->queue_id == queue_id) + return rule->location; + + tmp_queue_id = rule->queue_id; + rule->queue_id = queue_id; + ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + if (ret) { + rule->queue_id = tmp_queue_id; + return ret; + } + + return rule->location; +} + +static void hclge_rfs_filter_expire(struct hclge_dev *hdev) +{ +#ifdef CONFIG_RFS_ACCEL + struct hnae3_handle *handle = &hdev->vport[0].nic; + struct hclge_fd_rule *rule; + struct hlist_node *node; + HLIST_HEAD(del_list); + + spin_lock_bh(&hdev->fd_rule_lock); + if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) { + spin_unlock_bh(&hdev->fd_rule_lock); + return; + } + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (rps_may_expire_flow(handle->netdev, rule->queue_id, + rule->flow_id, rule->location)) { + hlist_del_init(&rule->rule_node); + hlist_add_head(&rule->rule_node, &del_list); + hdev->hclge_fd_rule_num--; + clear_bit(rule->location, hdev->fd_bmap); + } + } + spin_unlock_bh(&hdev->fd_rule_lock); + + hlist_for_each_entry_safe(rule, node, &del_list, rule_node) { + hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + kfree(rule); + } +#endif +} + +static void hclge_clear_arfs_rules(struct hnae3_handle *handle) +{ +#ifdef CONFIG_RFS_ACCEL + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE) + hclge_del_all_fd_entries(handle, true); +#endif +} + static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -5565,10 +5918,12 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + bool clear; hdev->fd_en = enable; + clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE ? true : false; if (!enable) - hclge_del_all_fd_entries(handle, false); + hclge_del_all_fd_entries(handle, clear); else hclge_restore_fd_entries(handle); } @@ -5582,20 +5937,20 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) int ret; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false); - hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0); - hnae3_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0); - hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0); - hnae3_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0); - hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable); - hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable); + + if (enable) { + hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, 1U); + hnae3_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, 1U); + } + req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -5726,7 +6081,7 @@ static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en, return -EBUSY; } -static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id, +static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id, int stream_id, bool enable) { struct hclge_desc desc; @@ -5737,7 +6092,8 @@ static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id, hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false); req->tqp_id = cpu_to_le16(tqp_id & HCLGE_RING_ID_MASK); req->stream_id = cpu_to_le16(stream_id); - req->enable |= enable << HCLGE_TQP_ENABLE_B; + if (enable) + req->enable |= 1U << HCLGE_TQP_ENABLE_B; ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) @@ -5838,6 +6194,8 @@ static void hclge_ae_stop(struct hnae3_handle *handle) set_bit(HCLGE_STATE_DOWN, &hdev->state); + hclge_clear_arfs_rules(handle); + /* If it is not PF reset, the firmware will disable the MAC, * so it only need to stop phy here. */ @@ -5903,11 +6261,11 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport, if (op == HCLGE_MAC_VLAN_ADD) { if ((!resp_code) || (resp_code == 1)) { return_status = 0; - } else if (resp_code == 2) { + } else if (resp_code == HCLGE_ADD_UC_OVERFLOW) { return_status = -ENOSPC; dev_err(&hdev->pdev->dev, "add mac addr failed for uc_overflow.\n"); - } else if (resp_code == 3) { + } else if (resp_code == HCLGE_ADD_MC_OVERFLOW) { return_status = -ENOSPC; dev_err(&hdev->pdev->dev, "add mac addr failed for mc_overflow.\n"); @@ -5952,13 +6310,15 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport, static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr) { - int word_num; - int bit_num; +#define HCLGE_VF_NUM_IN_FIRST_DESC 192 + + unsigned int word_num; + unsigned int bit_num; if (vfid > 255 || vfid < 0) return -EIO; - if (vfid >= 0 && vfid <= 191) { + if (vfid >= 0 && vfid < HCLGE_VF_NUM_IN_FIRST_DESC) { word_num = vfid / 32; bit_num = vfid % 32; if (clr) @@ -5966,7 +6326,7 @@ static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr) else desc[1].data[word_num] |= cpu_to_le32(1 << bit_num); } else { - word_num = (vfid - 192) / 32; + word_num = (vfid - HCLGE_VF_NUM_IN_FIRST_DESC) / 32; bit_num = vfid % 32; if (clr) desc[2].data[word_num] &= cpu_to_le32(~(1 << bit_num)); @@ -6149,6 +6509,10 @@ static int hclge_init_umv_space(struct hclge_dev *hdev) mutex_init(&hdev->umv_mutex); hdev->max_umv_size = allocated_size; + /* divide max_umv_size by (hdev->num_req_vfs + 2), in order to + * preserve some unicast mac vlan table entries shared by pf + * and its vfs. + */ hdev->priv_umv_size = hdev->max_umv_size / (hdev->num_req_vfs + 2); hdev->share_umv_size = hdev->priv_umv_size + hdev->max_umv_size % (hdev->num_req_vfs + 2); @@ -6181,7 +6545,9 @@ static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size, req = (struct hclge_umv_spc_alc_cmd *)desc.data; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_ALLOCATE, false); - hnae3_set_bit(req->allocate, HCLGE_UMV_SPC_ALC_B, !is_alloc); + if (!is_alloc) + hnae3_set_bit(req->allocate, HCLGE_UMV_SPC_ALC_B, 1); + req->space_size = cpu_to_le32(space_size); ret = hclge_cmd_send(&hdev->hw, &desc, 1); @@ -6270,8 +6636,7 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, is_multicast_ether_addr(addr)) { dev_err(&hdev->pdev->dev, "Set_uc mac err! invalid mac:%pM. is_zero:%d,is_br=%d,is_mul=%d\n", - addr, - is_zero_ether_addr(addr), + addr, is_zero_ether_addr(addr), is_broadcast_ether_addr(addr), is_multicast_ether_addr(addr)); return -EINVAL; @@ -6338,9 +6703,8 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport, if (is_zero_ether_addr(addr) || is_broadcast_ether_addr(addr) || is_multicast_ether_addr(addr)) { - dev_dbg(&hdev->pdev->dev, - "Remove mac err! invalid mac:%pM.\n", - addr); + dev_dbg(&hdev->pdev->dev, "Remove mac err! invalid mac:%pM.\n", + addr); return -EINVAL; } @@ -6381,18 +6745,16 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport, hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0); hclge_prepare_mac_addr(&req, addr, true); status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true); - if (!status) { - /* This mac addr exist, update VFID for it */ - hclge_update_desc_vfid(desc, vport->vport_id, false); - status = hclge_add_mac_vlan_tbl(vport, &req, desc); - } else { + if (status) { /* This mac addr do not exist, add new entry for it */ memset(desc[0].data, 0, sizeof(desc[0].data)); memset(desc[1].data, 0, sizeof(desc[0].data)); memset(desc[2].data, 0, sizeof(desc[0].data)); - hclge_update_desc_vfid(desc, vport->vport_id, false); - status = hclge_add_mac_vlan_tbl(vport, &req, desc); } + status = hclge_update_desc_vfid(desc, vport->vport_id, false); + if (status) + return status; + status = hclge_add_mac_vlan_tbl(vport, &req, desc); if (status == -ENOSPC) dev_err(&hdev->pdev->dev, "mc mac vlan table is full\n"); @@ -6430,7 +6792,9 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport, status = hclge_lookup_mac_vlan_tbl(vport, &req, desc, true); if (!status) { /* This mac addr exist, remove this handle's VFID for it */ - hclge_update_desc_vfid(desc, vport->vport_id, true); + status = hclge_update_desc_vfid(desc, vport->vport_id, true); + if (status) + return status; if (hclge_is_all_function_id_zero(desc)) /* All the vfid is zero, so need to delete this entry */ @@ -6759,7 +7123,7 @@ static void hclge_enable_vlan_filter(struct hnae3_handle *handle, bool enable) handle->netdev_flags &= ~HNAE3_VLAN_FLTR; } -static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, +static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid, bool is_kill, u16 vlan, u8 qos, __be16 proto) { @@ -6771,6 +7135,12 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, u8 vf_byte_off; int ret; + /* if vf vlan table is full, firmware will close vf vlan filter, it + * is unable and unnecessary to add new vlan id to vf vlan filter + */ + if (test_bit(vfid, hdev->vf_vlan_full) && !is_kill) + return 0; + hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_VLAN_FILTER_VF_CFG, false); hclge_cmd_setup_basic_desc(&desc[1], @@ -6806,6 +7176,7 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, return 0; if (req0->resp_code == HCLGE_VF_VLAN_NO_ENTRY) { + set_bit(vfid, hdev->vf_vlan_full); dev_warn(&hdev->pdev->dev, "vf vlan table is full, vf vlan filter is disabled\n"); return 0; @@ -6819,12 +7190,13 @@ static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid, if (!req0->resp_code) return 0; - if (req0->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND) { - dev_warn(&hdev->pdev->dev, - "vlan %d filter is not in vf vlan table\n", - vlan); + /* vf vlan filter is disabled when vf vlan table is full, + * then new vlan id will not be added into vf vlan table. + * Just return 0 without warning, avoid massive verbose + * print logs when unload. + */ + if (req0->resp_code == HCLGE_VF_VLAN_DEL_NO_FOUND) return 0; - } dev_err(&hdev->pdev->dev, "Kill vf vlan filter fail, ret =%d.\n", @@ -7140,10 +7512,6 @@ static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, { struct hclge_vport_vlan_cfg *vlan; - /* vlan 0 is reserved */ - if (!vlan_id) - return; - vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); if (!vlan) return; @@ -7238,6 +7606,43 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev) mutex_unlock(&hdev->vport_cfg_mutex); } +static void hclge_restore_vlan_table(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_vport_vlan_cfg *vlan, *tmp; + struct hclge_dev *hdev = vport->back; + u16 vlan_proto, qos; + u16 state, vlan_id; + int i; + + mutex_lock(&hdev->vport_cfg_mutex); + for (i = 0; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto; + vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag; + qos = vport->port_base_vlan_cfg.vlan_info.qos; + state = vport->port_base_vlan_cfg.state; + + if (state != HNAE3_PORT_BASE_VLAN_DISABLE) { + hclge_set_vlan_filter_hw(hdev, htons(vlan_proto), + vport->vport_id, vlan_id, qos, + false); + continue; + } + + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) { + if (vlan->hd_tbl_status) + hclge_set_vlan_filter_hw(hdev, + htons(ETH_P_8021Q), + vport->vport_id, + vlan->vlan_id, 0, + false); + } + } + + mutex_unlock(&hdev->vport_cfg_mutex); +} + int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); @@ -7415,11 +7820,20 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, bool writen_to_tbl = false; int ret = 0; - /* when port based VLAN enabled, we use port based VLAN as the VLAN - * filter entry. In this case, we don't update VLAN filter table - * when user add new VLAN or remove exist VLAN, just update the vport - * VLAN list. The VLAN id in VLAN list won't be writen in VLAN filter - * table until port based VLAN disabled + /* When device is resetting, firmware is unable to handle + * mailbox. Just record the vlan id, and remove it after + * reset finished. + */ + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) && is_kill) { + set_bit(vlan_id, vport->vlan_del_fail_bmap); + return -EBUSY; + } + + /* When port base vlan enabled, we use port base vlan as the vlan + * filter entry. In this case, we don't update vlan filter table + * when user add new vlan or remove exist vlan, just update the vport + * vlan list. The vlan id in vlan list will be writen in vlan filter + * table until port base vlan disabled */ if (handle->port_base_vlan_state == HNAE3_PORT_BASE_VLAN_DISABLE) { ret = hclge_set_vlan_filter_hw(hdev, proto, vport->vport_id, @@ -7427,16 +7841,53 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto, writen_to_tbl = true; } - if (ret) - return ret; + if (!ret) { + if (is_kill) + hclge_rm_vport_vlan_table(vport, vlan_id, false); + else + hclge_add_vport_vlan_table(vport, vlan_id, + writen_to_tbl); + } else if (is_kill) { + /* When remove hw vlan filter failed, record the vlan id, + * and try to remove it from hw later, to be consistence + * with stack + */ + set_bit(vlan_id, vport->vlan_del_fail_bmap); + } + return ret; +} - if (is_kill) - hclge_rm_vport_vlan_table(vport, vlan_id, false); - else - hclge_add_vport_vlan_table(vport, vlan_id, - writen_to_tbl); +static void hclge_sync_vlan_filter(struct hclge_dev *hdev) +{ +#define HCLGE_MAX_SYNC_COUNT 60 - return 0; + int i, ret, sync_cnt = 0; + u16 vlan_id; + + /* start from vport 1 for PF is always alive */ + for (i = 0; i < hdev->num_alloc_vport; i++) { + struct hclge_vport *vport = &hdev->vport[i]; + + vlan_id = find_first_bit(vport->vlan_del_fail_bmap, + VLAN_N_VID); + while (vlan_id != VLAN_N_VID) { + ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q), + vport->vport_id, vlan_id, + 0, true); + if (ret && ret != -EINVAL) + return; + + clear_bit(vlan_id, vport->vlan_del_fail_bmap); + hclge_rm_vport_vlan_table(vport, vlan_id, false); + + sync_cnt++; + if (sync_cnt >= HCLGE_MAX_SYNC_COUNT) + return; + + vlan_id = find_first_bit(vport->vlan_del_fail_bmap, + VLAN_N_VID); + } + } } static int hclge_set_mac_mtu(struct hclge_dev *hdev, int new_mps) @@ -7463,7 +7914,7 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu) int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu) { struct hclge_dev *hdev = vport->back; - int i, max_frm_size, ret = 0; + int i, max_frm_size, ret; max_frm_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + 2 * VLAN_HLEN; if (max_frm_size < HCLGE_MAC_MIN_FRAME || @@ -7523,7 +7974,8 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id, req = (struct hclge_reset_tqp_queue_cmd *)desc.data; req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK); - hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable); + if (enable) + hnae3_set_bit(req->reset_req, HCLGE_TQP_RESET_B, 1U); ret = hclge_cmd_send(&hdev->hw, &desc, 1); if (ret) { @@ -7574,7 +8026,7 @@ int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) int reset_try_times = 0; int reset_status; u16 queue_gid; - int ret = 0; + int ret; queue_gid = hclge_covert_handle_qid_global(handle, queue_id); @@ -7591,7 +8043,6 @@ int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id) return ret; } - reset_try_times = 0; while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { /* Wait for tqp hw reset */ msleep(20); @@ -7630,7 +8081,6 @@ void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id) return; } - reset_try_times = 0; while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) { /* Wait for tqp hw reset */ msleep(20); @@ -7700,7 +8150,7 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev) { struct phy_device *phydev = hdev->hw.mac.phydev; u16 remote_advertising = 0; - u16 local_advertising = 0; + u16 local_advertising; u32 rx_pause, tx_pause; u8 flowctl; @@ -7733,8 +8183,9 @@ static void hclge_get_pauseparam(struct hnae3_handle *handle, u32 *auto_neg, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + struct phy_device *phydev = hdev->hw.mac.phydev; - *auto_neg = hclge_get_autoneg(handle); + *auto_neg = phydev ? hclge_get_autoneg(handle) : 0; if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) { *rx_en = 0; @@ -7765,11 +8216,13 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, struct phy_device *phydev = hdev->hw.mac.phydev; u32 fc_autoneg; - fc_autoneg = hclge_get_autoneg(handle); - if (auto_neg != fc_autoneg) { - dev_info(&hdev->pdev->dev, - "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n"); - return -EOPNOTSUPP; + if (phydev) { + fc_autoneg = hclge_get_autoneg(handle); + if (auto_neg != fc_autoneg) { + dev_info(&hdev->pdev->dev, + "To change autoneg please use: ethtool -s <dev> autoneg <on|off>\n"); + return -EOPNOTSUPP; + } } if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) { @@ -7780,16 +8233,13 @@ static int hclge_set_pauseparam(struct hnae3_handle *handle, u32 auto_neg, hclge_set_flowctrl_adv(hdev, rx_en, tx_en); - if (!fc_autoneg) + if (!auto_neg) return hclge_cfg_pauseparam(hdev, rx_en, tx_en); if (phydev) return phy_start_aneg(phydev); - if (hdev->pdev->revision == 0x20) - return -EOPNOTSUPP; - - return hclge_restart_autoneg(handle); + return -EOPNOTSUPP; } static void hclge_get_ksettings_an_result(struct hnae3_handle *handle, @@ -7825,7 +8275,8 @@ static void hclge_get_mdix_mode(struct hnae3_handle *handle, struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; struct phy_device *phydev = hdev->hw.mac.phydev; - int mdix_ctrl, mdix, retval, is_resolved; + int mdix_ctrl, mdix, is_resolved; + unsigned int retval; if (!phydev) { *tp_mdix_ctrl = ETH_TP_MDI_INVALID; @@ -7894,6 +8345,102 @@ static void hclge_info_show(struct hclge_dev *hdev) dev_info(dev, "PF info end.\n"); } +static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev, + struct hclge_vport *vport) +{ + struct hnae3_client *client = vport->nic.client; + struct hclge_dev *hdev = ae_dev->priv; + int rst_cnt; + int ret; + + rst_cnt = hdev->rst_stats.reset_cnt; + ret = client->ops->init_instance(&vport->nic); + if (ret) + return ret; + + set_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + rst_cnt != hdev->rst_stats.reset_cnt) { + ret = -EBUSY; + goto init_nic_err; + } + + /* Enable nic hw error interrupts */ + ret = hclge_config_nic_hw_error(hdev, true); + if (ret) { + dev_err(&ae_dev->pdev->dev, + "fail(%d) to enable hw error interrupts\n", ret); + goto init_nic_err; + } + + hnae3_set_client_init_flag(client, ae_dev, 1); + + if (netif_msg_drv(&hdev->vport->nic)) + hclge_info_show(hdev); + + return ret; + +init_nic_err: + clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + client->ops->uninit_instance(&vport->nic, 0); + + return ret; +} + +static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev, + struct hclge_vport *vport) +{ + struct hnae3_client *client = vport->roce.client; + struct hclge_dev *hdev = ae_dev->priv; + int rst_cnt; + int ret; + + if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client || + !hdev->nic_client) + return 0; + + client = hdev->roce_client; + ret = hclge_init_roce_base_info(vport); + if (ret) + return ret; + + rst_cnt = hdev->rst_stats.reset_cnt; + ret = client->ops->init_instance(&vport->roce); + if (ret) + return ret; + + set_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); + if (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + rst_cnt != hdev->rst_stats.reset_cnt) { + ret = -EBUSY; + goto init_roce_err; + } + + /* Enable roce ras interrupts */ + ret = hclge_config_rocee_ras_interrupt(hdev, true); + if (ret) { + dev_err(&ae_dev->pdev->dev, + "fail(%d) to enable roce ras interrupts\n", ret); + goto init_roce_err; + } + + hnae3_set_client_init_flag(client, ae_dev, 1); + + return 0; + +init_roce_err: + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); + + return ret; +} + static int hclge_init_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { @@ -7909,41 +8456,13 @@ static int hclge_init_client_instance(struct hnae3_client *client, hdev->nic_client = client; vport->nic.client = client; - ret = client->ops->init_instance(&vport->nic); + ret = hclge_init_nic_client_instance(ae_dev, vport); if (ret) goto clear_nic; - hnae3_set_client_init_flag(client, ae_dev, 1); - - if (netif_msg_drv(&hdev->vport->nic)) - hclge_info_show(hdev); - - if (hdev->roce_client && - hnae3_dev_roce_supported(hdev)) { - struct hnae3_client *rc = hdev->roce_client; - - ret = hclge_init_roce_base_info(vport); - if (ret) - goto clear_roce; - - ret = rc->ops->init_instance(&vport->roce); - if (ret) - goto clear_roce; - - hnae3_set_client_init_flag(hdev->roce_client, - ae_dev, 1); - } - - break; - case HNAE3_CLIENT_UNIC: - hdev->nic_client = client; - vport->nic.client = client; - - ret = client->ops->init_instance(&vport->nic); + ret = hclge_init_roce_client_instance(ae_dev, vport); if (ret) - goto clear_nic; - - hnae3_set_client_init_flag(client, ae_dev, 1); + goto clear_roce; break; case HNAE3_CLIENT_ROCE: @@ -7952,17 +8471,9 @@ static int hclge_init_client_instance(struct hnae3_client *client, vport->roce.client = client; } - if (hdev->roce_client && hdev->nic_client) { - ret = hclge_init_roce_base_info(vport); - if (ret) - goto clear_roce; - - ret = client->ops->init_instance(&vport->roce); - if (ret) - goto clear_roce; - - hnae3_set_client_init_flag(client, ae_dev, 1); - } + ret = hclge_init_roce_client_instance(ae_dev, vport); + if (ret) + goto clear_roce; break; default: @@ -7970,7 +8481,7 @@ static int hclge_init_client_instance(struct hnae3_client *client, } } - return 0; + return ret; clear_nic: hdev->nic_client = NULL; @@ -7992,6 +8503,10 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, for (i = 0; i < hdev->num_vmdq_vport + 1; i++) { vport = &hdev->vport[i]; if (hdev->roce_client) { + clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + hdev->roce_client->ops->uninit_instance(&vport->roce, 0); hdev->roce_client = NULL; @@ -8000,6 +8515,10 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, if (client->type == HNAE3_CLIENT_ROCE) return; if (hdev->nic_client && client->ops->uninit_instance) { + clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state); + while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGE_WAIT_RESET_DONE); + client->ops->uninit_instance(&vport->nic, 0); hdev->nic_client = NULL; vport->nic.client = NULL; @@ -8081,6 +8600,7 @@ static void hclge_state_init(struct hclge_dev *hdev) static void hclge_state_uninit(struct hclge_dev *hdev) { set_bit(HCLGE_STATE_DOWN, &hdev->state); + set_bit(HCLGE_STATE_REMOVING, &hdev->state); if (hdev->service_timer.function) del_timer_sync(&hdev->service_timer); @@ -8122,6 +8642,23 @@ static void hclge_flr_done(struct hnae3_ae_dev *ae_dev) set_bit(HNAE3_FLR_DONE, &hdev->flr_state); } +static void hclge_clear_resetting_state(struct hclge_dev *hdev) +{ + u16 i; + + for (i = 0; i < hdev->num_alloc_vport; i++) { + struct hclge_vport *vport = &hdev->vport[i]; + int ret; + + /* Send cmd to clear VF's FUNC_RST_ING */ + ret = hclge_set_vf_rst(hdev, vport->vport_id, false); + if (ret) + dev_warn(&hdev->pdev->dev, + "clear vf(%d) rst failed %d!\n", + vport->vport_id, ret); + } +} + static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) { struct pci_dev *pdev = ae_dev->pdev; @@ -8143,6 +8680,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) mutex_init(&hdev->vport_lock); mutex_init(&hdev->vport_cfg_mutex); + spin_lock_init(&hdev->fd_rule_lock); ret = hclge_pci_init(hdev); if (ret) { @@ -8270,13 +8808,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_mdiobus_unreg; } - ret = hclge_hw_error_set_state(hdev, true); - if (ret) { - dev_err(&pdev->dev, - "fail(%d) to enable hw error interrupts\n", ret); - goto err_mdiobus_unreg; - } - INIT_KFIFO(hdev->mac_tnl_log); hclge_dcb_ops_set(hdev); @@ -8288,6 +8819,22 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task); hclge_clear_all_event_cause(hdev); + hclge_clear_resetting_state(hdev); + + /* Log and clear the hw errors those already occurred */ + hclge_handle_all_hns_hw_errors(ae_dev); + + /* request delayed reset for the error recovery because an immediate + * global reset on a PF affecting pending initialization of other PFs + */ + if (ae_dev->hw_err_reset_req) { + enum hnae3_reset_type reset_level; + + reset_level = hclge_get_reset_level(ae_dev, + &ae_dev->hw_err_reset_req); + hclge_set_def_reset_request(ae_dev, reset_level); + mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL); + } /* Enable MISC vector(vector0) */ hclge_enable_vector(&hdev->misc_vector, true); @@ -8342,6 +8889,7 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_stats_clear(hdev); memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table)); + memset(hdev->vf_vlan_full, 0, sizeof(hdev->vf_vlan_full)); ret = hclge_cmd_init(hdev); if (ret) { @@ -8393,21 +8941,31 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) ret = hclge_init_fd_config(hdev); if (ret) { - dev_err(&pdev->dev, - "fd table init fail, ret=%d\n", ret); + dev_err(&pdev->dev, "fd table init fail, ret=%d\n", ret); return ret; } /* Re-enable the hw error interrupts because - * the interrupts get disabled on core/global reset. + * the interrupts get disabled on global reset. */ - ret = hclge_hw_error_set_state(hdev, true); + ret = hclge_config_nic_hw_error(hdev, true); if (ret) { dev_err(&pdev->dev, - "fail(%d) to re-enable HNS hw error interrupts\n", ret); + "fail(%d) to re-enable NIC hw error interrupts\n", + ret); return ret; } + if (hdev->roce_client) { + ret = hclge_config_rocee_ras_interrupt(hdev, true); + if (ret) { + dev_err(&pdev->dev, + "fail(%d) to re-enable roce ras interrupts\n", + ret); + return ret; + } + } + hclge_reset_vport_state(hdev); dev_info(&pdev->dev, "Reset done, %s driver initialization finished.\n", @@ -8432,8 +8990,11 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_enable_vector(&hdev->misc_vector, false); synchronize_irq(hdev->misc_vector.vector_irq); + /* Disable all hw interrupts */ hclge_config_mac_tnl_int(hdev, false); - hclge_hw_error_set_state(hdev, false); + hclge_config_nic_hw_error(hdev, false); + hclge_config_rocee_ras_interrupt(hdev, false); + hclge_cmd_uninit(hdev); hclge_misc_irq_uninit(hdev); hclge_pci_uninit(hdev); @@ -8478,15 +9039,16 @@ static int hclge_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, { struct hclge_vport *vport = hclge_get_vport(handle); struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; + u16 tc_offset[HCLGE_MAX_TC_NUM] = {0}; struct hclge_dev *hdev = vport->back; + u16 tc_size[HCLGE_MAX_TC_NUM] = {0}; int cur_rss_size = kinfo->rss_size; int cur_tqps = kinfo->num_tqps; - u16 tc_offset[HCLGE_MAX_TC_NUM]; u16 tc_valid[HCLGE_MAX_TC_NUM]; - u16 tc_size[HCLGE_MAX_TC_NUM]; u16 roundup_size; u32 *rss_indir; - int ret, i; + unsigned int i; + int ret; kinfo->req_rss_size = new_tqps_num; @@ -8571,10 +9133,12 @@ static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num, void *data) { #define HCLGE_32_BIT_REG_RTN_DATANUM 8 +#define HCLGE_32_BIT_DESC_NODATA_LEN 2 struct hclge_desc *desc; u32 *reg_val = data; __le32 *desc_data; + int nodata_num; int cmd_num; int i, k, n; int ret; @@ -8582,7 +9146,9 @@ static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num, if (regs_num == 0) return 0; - cmd_num = DIV_ROUND_UP(regs_num + 2, HCLGE_32_BIT_REG_RTN_DATANUM); + nodata_num = HCLGE_32_BIT_DESC_NODATA_LEN; + cmd_num = DIV_ROUND_UP(regs_num + nodata_num, + HCLGE_32_BIT_REG_RTN_DATANUM); desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); if (!desc) return -ENOMEM; @@ -8599,7 +9165,7 @@ static int hclge_get_32_bit_regs(struct hclge_dev *hdev, u32 regs_num, for (i = 0; i < cmd_num; i++) { if (i == 0) { desc_data = (__le32 *)(&desc[i].data[0]); - n = HCLGE_32_BIT_REG_RTN_DATANUM - 2; + n = HCLGE_32_BIT_REG_RTN_DATANUM - nodata_num; } else { desc_data = (__le32 *)(&desc[i]); n = HCLGE_32_BIT_REG_RTN_DATANUM; @@ -8621,10 +9187,12 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, void *data) { #define HCLGE_64_BIT_REG_RTN_DATANUM 4 +#define HCLGE_64_BIT_DESC_NODATA_LEN 1 struct hclge_desc *desc; u64 *reg_val = data; __le64 *desc_data; + int nodata_len; int cmd_num; int i, k, n; int ret; @@ -8632,7 +9200,9 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, if (regs_num == 0) return 0; - cmd_num = DIV_ROUND_UP(regs_num + 1, HCLGE_64_BIT_REG_RTN_DATANUM); + nodata_len = HCLGE_64_BIT_DESC_NODATA_LEN; + cmd_num = DIV_ROUND_UP(regs_num + nodata_len, + HCLGE_64_BIT_REG_RTN_DATANUM); desc = kcalloc(cmd_num, sizeof(struct hclge_desc), GFP_KERNEL); if (!desc) return -ENOMEM; @@ -8649,7 +9219,7 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num, for (i = 0; i < cmd_num; i++) { if (i == 0) { desc_data = (__le64 *)(&desc[i].data[0]); - n = HCLGE_64_BIT_REG_RTN_DATANUM - 1; + n = HCLGE_64_BIT_REG_RTN_DATANUM - nodata_len; } else { desc_data = (__le64 *)(&desc[i]); n = HCLGE_64_BIT_REG_RTN_DATANUM; @@ -8876,6 +9446,7 @@ static const struct hnae3_ae_ops hclge_ops = { .set_autoneg = hclge_set_autoneg, .get_autoneg = hclge_get_autoneg, .restart_autoneg = hclge_restart_autoneg, + .halt_autoneg = hclge_halt_autoneg, .get_pauseparam = hclge_get_pauseparam, .set_pauseparam = hclge_set_pauseparam, .set_mtu = hclge_set_mtu, @@ -8892,6 +9463,7 @@ static const struct hnae3_ae_ops hclge_ops = { .set_vf_vlan_filter = hclge_set_vf_vlan_filter, .enable_hw_strip_rxvtag = hclge_en_hw_strip_rxvtag, .reset_event = hclge_reset_event, + .get_reset_level = hclge_get_reset_level, .set_default_reset_request = hclge_set_def_reset_request, .get_tqps_and_rss_info = hclge_get_tqps_and_rss_info, .set_channels = hclge_set_channels, @@ -8908,6 +9480,7 @@ static const struct hnae3_ae_ops hclge_ops = { .get_fd_all_rules = hclge_get_all_rules, .restore_fd_rules = hclge_restore_fd_entries, .enable_fd = hclge_enable_fd, + .add_arfs_entry = hclge_add_fd_entry_by_arfs, .dbg_run_cmd = hclge_dbg_run_cmd, .handle_hw_ras_error = hclge_handle_hw_ras_error, .get_hw_reset_stat = hclge_get_hw_reset_stat, @@ -8918,6 +9491,7 @@ static const struct hnae3_ae_ops hclge_ops = { .set_timer_task = hclge_set_timer_task, .mac_connect_phy = hclge_mac_connect_phy, .mac_disconnect_phy = hclge_mac_disconnect_phy, + .restore_vlan_table = hclge_restore_vlan_table, }; static struct hnae3_ae_algo ae_algo = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index dd06b11187b0..6a12285f4c76 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -201,6 +201,8 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_DOWN, HCLGE_STATE_DISABLED, HCLGE_STATE_REMOVING, + HCLGE_STATE_NIC_REGISTERED, + HCLGE_STATE_ROCE_REGISTERED, HCLGE_STATE_SERVICE_INITED, HCLGE_STATE_SERVICE_SCHED, HCLGE_STATE_RST_SERVICE_SCHED, @@ -472,6 +474,7 @@ enum HCLGE_FD_KEY_TYPE { enum HCLGE_FD_STAGE { HCLGE_FD_STAGE_1, HCLGE_FD_STAGE_2, + MAX_STAGE_NUM, }; /* OUTER_XXX indicates tuples in tunnel header of tunnel packet @@ -526,7 +529,7 @@ enum HCLGE_FD_META_DATA { struct key_info { u8 key_type; - u8 key_length; + u8 key_length; /* use bit as unit */ }; static const struct key_info meta_data_key_info[] = { @@ -578,6 +581,16 @@ static const struct key_info tuple_key_info[] = { #define MAX_KEY_BYTES (MAX_KEY_DWORDS * 4) #define MAX_META_DATA_LENGTH 32 +/* assigned by firmware, the real filter number for each pf may be less */ +#define MAX_FD_FILTER_NUM 4096 +#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL 5 + +enum HCLGE_FD_ACTIVE_RULE_TYPE { + HCLGE_FD_RULE_NONE, + HCLGE_FD_ARFS_ACTIVE, + HCLGE_FD_EP_ACTIVE, +}; + enum HCLGE_FD_PACKET_TYPE { NIC_PACKET, ROCE_PACKET, @@ -600,18 +613,23 @@ struct hclge_fd_key_cfg { struct hclge_fd_cfg { u8 fd_mode; - u16 max_key_length; + u16 max_key_length; /* use bit as unit */ u32 proto_support; - u32 rule_num[2]; /* rule entry number */ - u16 cnt_num[2]; /* rule hit counter number */ - struct hclge_fd_key_cfg key_cfg[2]; + u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */ + u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */ + struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM]; }; +#define IPV4_INDEX 3 +#define IPV6_SIZE 4 struct hclge_fd_rule_tuples { - u8 src_mac[6]; - u8 dst_mac[6]; - u32 src_ip[4]; - u32 dst_ip[4]; + u8 src_mac[ETH_ALEN]; + u8 dst_mac[ETH_ALEN]; + /* Be compatible for ip address of both ipv4 and ipv6. + * For ipv4 address, we store it in src/dst_ip[3]. + */ + u32 src_ip[IPV6_SIZE]; + u32 dst_ip[IPV6_SIZE]; u16 src_port; u16 dst_port; u16 vlan_tag1; @@ -630,6 +648,8 @@ struct hclge_fd_rule { u16 vf_id; u16 queue_id; u16 location; + u16 flow_id; /* only used for arfs */ + enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type; }; struct hclge_fd_ad_data { @@ -679,6 +699,20 @@ struct hclge_mac_tnl_stats { u32 status; }; +#define HCLGE_RESET_INTERVAL (10 * HZ) +#define HCLGE_WAIT_RESET_DONE 100 + +#pragma pack(1) +struct hclge_vf_vlan_cfg { + u8 mbx_cmd; + u8 subcode; + u8 is_kill; + u16 vlan; + u16 proto; +}; + +#pragma pack() + /* For each bit of TCAM entry, it uses a pair of 'x' and * 'y' to indicate which value to match, like below: * ---------------------------------- @@ -806,10 +840,15 @@ struct hclge_dev { struct hclge_vlan_type_cfg vlan_type_cfg; unsigned long vlan_table[VLAN_N_VID][BITS_TO_LONGS(HCLGE_VPORT_NUM)]; + unsigned long vf_vlan_full[BITS_TO_LONGS(HCLGE_VPORT_NUM)]; struct hclge_fd_cfg fd_cfg; struct hlist_head fd_rule_list; + spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */ u16 hclge_fd_rule_num; + u16 fd_arfs_expire_timer; + unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)]; + enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type; u8 fd_en; u16 wanted_umv_size; @@ -891,13 +930,14 @@ struct hclge_vport { u32 bw_limit; /* VSI BW Limit (0 = disabled) */ u8 dwrr; + unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)]; struct hclge_port_base_vlan_config port_base_vlan_cfg; struct hclge_tx_vtag_cfg txvlan_cfg; struct hclge_rx_vtag_cfg rxvlan_cfg; u16 used_umv_num; - int vport_id; + u16 vport_id; struct hclge_dev *back; /* Back reference to associated dev */ struct hnae3_handle nic; struct hnae3_handle roce; @@ -959,7 +999,7 @@ int hclge_func_reset_cmd(struct hclge_dev *hdev, int func_id); int hclge_vport_start(struct hclge_vport *vport); void hclge_vport_stop(struct hclge_vport *vport); int hclge_set_vport_mtu(struct hclge_vport *vport, int new_mtu); -int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf); +int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf); u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id); int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 0e04e63f2a94..a38ac7cfe16b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -29,6 +29,10 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport, "PF fail to gen resp to VF len %d exceeds max len %d\n", resp_data_len, HCLGE_MBX_MAX_RESP_DATA_SIZE); + /* If resp_data_len is too long, set the value to max length + * and return the msg to VF + */ + resp_data_len = HCLGE_MBX_MAX_RESP_DATA_SIZE; } hclge_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_PF_TO_VF, false); @@ -93,7 +97,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) else if (hdev->reset_type == HNAE3_FLR_RESET) reset_type = HNAE3_VF_FULL_RESET; else - return -EINVAL; + reset_type = HNAE3_VF_FUNC_RESET; memcpy(&msg_data[0], &reset_type, sizeof(u16)); @@ -192,12 +196,10 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en, return ret; ret = hclge_bind_ring_with_vector(vport, vector_id, en, &ring_chain); - if (ret) - return ret; hclge_free_vector_ring_chain(&ring_chain); - return 0; + return ret; } static int hclge_set_vf_promisc_mode(struct hclge_vport *vport, @@ -308,21 +310,23 @@ int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid, static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport, struct hclge_mbx_vf_to_pf_cmd *mbx_req) { + struct hclge_vf_vlan_cfg *msg_cmd; int status = 0; - if (mbx_req->msg[1] == HCLGE_MBX_VLAN_FILTER) { + msg_cmd = (struct hclge_vf_vlan_cfg *)mbx_req->msg; + if (msg_cmd->subcode == HCLGE_MBX_VLAN_FILTER) { struct hnae3_handle *handle = &vport->nic; u16 vlan, proto; bool is_kill; - is_kill = !!mbx_req->msg[2]; - memcpy(&vlan, &mbx_req->msg[3], sizeof(vlan)); - memcpy(&proto, &mbx_req->msg[5], sizeof(proto)); + is_kill = !!msg_cmd->is_kill; + vlan = msg_cmd->vlan; + proto = msg_cmd->proto; status = hclge_set_vlan_filter(handle, cpu_to_be16(proto), vlan, is_kill); - } else if (mbx_req->msg[1] == HCLGE_MBX_VLAN_RX_OFF_CFG) { + } else if (msg_cmd->subcode == HCLGE_MBX_VLAN_RX_OFF_CFG) { struct hnae3_handle *handle = &vport->nic; - bool en = mbx_req->msg[2] ? true : false; + bool en = msg_cmd->is_kill ? true : false; status = hclge_en_hw_strip_rxvtag(handle, en); } else if (mbx_req->msg[1] == HCLGE_MBX_PORT_BASE_VLAN_CFG) { @@ -365,13 +369,14 @@ static int hclge_get_vf_tcinfo(struct hclge_vport *vport, { struct hnae3_knic_private_info *kinfo = &vport->nic.kinfo; u8 vf_tc_map = 0; - int i, ret; + unsigned int i; + int ret; for (i = 0; i < kinfo->num_tc; i++) vf_tc_map |= BIT(i); ret = hclge_gen_resp_to_vf(vport, mbx_req, 0, &vf_tc_map, - sizeof(u8)); + sizeof(vf_tc_map)); return ret; } @@ -553,7 +558,8 @@ void hclge_mbx_handler(struct hclge_dev *hdev) struct hclge_mbx_vf_to_pf_cmd *req; struct hclge_vport *vport; struct hclge_desc *desc; - int ret, flag; + unsigned int flag; + int ret; /* handle all the mailbox requests in the queue */ while (!hclge_cmd_crq_empty(&hdev->hw)) { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 1e8134892d77..abb1b438564e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -55,9 +55,9 @@ static int hclge_mdio_write(struct mii_bus *bus, int phyid, int regnum, mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data; hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M, - HCLGE_MDIO_PHYID_S, phyid); + HCLGE_MDIO_PHYID_S, (u32)phyid); hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M, - HCLGE_MDIO_PHYREG_S, regnum); + HCLGE_MDIO_PHYREG_S, (u32)regnum); hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1); hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M, @@ -93,9 +93,9 @@ static int hclge_mdio_read(struct mii_bus *bus, int phyid, int regnum) mdio_cmd = (struct hclge_mdio_cfg_cmd *)desc.data; hnae3_set_field(mdio_cmd->phyid, HCLGE_MDIO_PHYID_M, - HCLGE_MDIO_PHYID_S, phyid); + HCLGE_MDIO_PHYID_S, (u32)phyid); hnae3_set_field(mdio_cmd->phyad, HCLGE_MDIO_PHYREG_M, - HCLGE_MDIO_PHYREG_S, regnum); + HCLGE_MDIO_PHYREG_S, (u32)regnum); hnae3_set_bit(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_START_B, 1); hnae3_set_field(mdio_cmd->ctrl_bit, HCLGE_MDIO_CTRL_ST_M, @@ -224,6 +224,13 @@ int hclge_mac_connect_phy(struct hnae3_handle *handle) linkmode_and(phydev->supported, phydev->supported, mask); linkmode_copy(phydev->advertising, phydev->supported); + /* supported flag is Pause and Asym Pause, but default advertising + * should be rx on, tx on, so need clear Asym Pause in advertising + * flag + */ + linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, + phydev->advertising); + return 0; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index a7bbb6d3091a..3f41fa2bc414 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -43,18 +43,23 @@ enum hclge_shaper_level { static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, u8 *ir_b, u8 *ir_u, u8 *ir_s) { +#define DIVISOR_CLK (1000 * 8) +#define DIVISOR_IR_B_126 (126 * DIVISOR_CLK) + const u16 tick_array[HCLGE_SHAPER_LVL_CNT] = { 6 * 256, /* Prioriy level */ 6 * 32, /* Prioriy group level */ 6 * 8, /* Port level */ 6 * 256 /* Qset level */ }; - u8 ir_u_calc = 0, ir_s_calc = 0; + u8 ir_u_calc = 0; + u8 ir_s_calc = 0; u32 ir_calc; u32 tick; /* Calc tick */ - if (shaper_level >= HCLGE_SHAPER_LVL_CNT) + if (shaper_level >= HCLGE_SHAPER_LVL_CNT || + ir > HCLGE_ETHER_MAX_RATE) return -EINVAL; tick = tick_array[shaper_level]; @@ -66,7 +71,7 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, * ir_calc = ---------------- * 1000 * tick * 1 */ - ir_calc = (1008000 + (tick >> 1) - 1) / tick; + ir_calc = (DIVISOR_IR_B_126 + (tick >> 1) - 1) / tick; if (ir_calc == ir) { *ir_b = 126; @@ -78,27 +83,28 @@ static int hclge_shaper_para_calc(u32 ir, u8 shaper_level, /* Increasing the denominator to select ir_s value */ while (ir_calc > ir) { ir_s_calc++; - ir_calc = 1008000 / (tick * (1 << ir_s_calc)); + ir_calc = DIVISOR_IR_B_126 / (tick * (1 << ir_s_calc)); } if (ir_calc == ir) *ir_b = 126; else - *ir_b = (ir * tick * (1 << ir_s_calc) + 4000) / 8000; + *ir_b = (ir * tick * (1 << ir_s_calc) + + (DIVISOR_CLK >> 1)) / DIVISOR_CLK; } else { /* Increasing the numerator to select ir_u value */ u32 numerator; while (ir_calc < ir) { ir_u_calc++; - numerator = 1008000 * (1 << ir_u_calc); + numerator = DIVISOR_IR_B_126 * (1 << ir_u_calc); ir_calc = (numerator + (tick >> 1)) / tick; } if (ir_calc == ir) { *ir_b = 126; } else { - u32 denominator = (8000 * (1 << --ir_u_calc)); + u32 denominator = (DIVISOR_CLK * (1 << --ir_u_calc)); *ir_b = (ir * tick + (denominator >> 1)) / denominator; } } @@ -119,14 +125,13 @@ static int hclge_pfc_stats_get(struct hclge_dev *hdev, opcode == HCLGE_OPC_QUERY_PFC_TX_PKT_CNT)) return -EINVAL; - for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM; i++) { + for (i = 0; i < HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1; i++) { hclge_cmd_setup_basic_desc(&desc[i], opcode, true); - if (i != (HCLGE_TM_PFC_PKT_GET_CMD_NUM - 1)) - desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT); + desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT); } + hclge_cmd_setup_basic_desc(&desc[i], opcode, true); + ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_TM_PFC_PKT_GET_CMD_NUM); if (ret) return ret; @@ -219,8 +224,7 @@ int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr) trans_gap = pause_param->pause_trans_gap; trans_time = le16_to_cpu(pause_param->pause_trans_time); - return hclge_pause_param_cfg(hdev, mac_addr, trans_gap, - trans_time); + return hclge_pause_param_cfg(hdev, mac_addr, trans_gap, trans_time); } static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id) @@ -361,29 +365,36 @@ static int hclge_tm_qs_weight_cfg(struct hclge_dev *hdev, u16 qs_id, return hclge_cmd_send(&hdev->hw, &desc, 1); } +static u32 hclge_tm_get_shapping_para(u8 ir_b, u8 ir_u, u8 ir_s, + u8 bs_b, u8 bs_s) +{ + u32 shapping_para = 0; + + hclge_tm_set_field(shapping_para, IR_B, ir_b); + hclge_tm_set_field(shapping_para, IR_U, ir_u); + hclge_tm_set_field(shapping_para, IR_S, ir_s); + hclge_tm_set_field(shapping_para, BS_B, bs_b); + hclge_tm_set_field(shapping_para, BS_S, bs_s); + + return shapping_para; +} + static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev, enum hclge_shap_bucket bucket, u8 pg_id, - u8 ir_b, u8 ir_u, u8 ir_s, u8 bs_b, u8 bs_s) + u32 shapping_para) { struct hclge_pg_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; struct hclge_desc desc; - u32 shapping_para = 0; opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING : - HCLGE_OPC_TM_PG_C_SHAPPING; + HCLGE_OPC_TM_PG_C_SHAPPING; hclge_cmd_setup_basic_desc(&desc, opcode, false); shap_cfg_cmd = (struct hclge_pg_shapping_cmd *)desc.data; shap_cfg_cmd->pg_id = pg_id; - hclge_tm_set_field(shapping_para, IR_B, ir_b); - hclge_tm_set_field(shapping_para, IR_U, ir_u); - hclge_tm_set_field(shapping_para, IR_S, ir_s); - hclge_tm_set_field(shapping_para, BS_B, bs_b); - hclge_tm_set_field(shapping_para, BS_S, bs_s); - shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para); return hclge_cmd_send(&hdev->hw, &desc, 1); @@ -397,7 +408,7 @@ static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) u8 ir_u, ir_b, ir_s; int ret; - ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE, + ret = hclge_shaper_para_calc(hdev->hw.mac.speed, HCLGE_SHAPER_LVL_PORT, &ir_b, &ir_u, &ir_s); if (ret) @@ -406,11 +417,9 @@ static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false); shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data; - hclge_tm_set_field(shapping_para, IR_B, ir_b); - hclge_tm_set_field(shapping_para, IR_U, ir_u); - hclge_tm_set_field(shapping_para, IR_S, ir_s); - hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF); - hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF); + shapping_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para); @@ -419,16 +428,14 @@ static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, enum hclge_shap_bucket bucket, u8 pri_id, - u8 ir_b, u8 ir_u, u8 ir_s, - u8 bs_b, u8 bs_s) + u32 shapping_para) { struct hclge_pri_shapping_cmd *shap_cfg_cmd; enum hclge_opcode_type opcode; struct hclge_desc desc; - u32 shapping_para = 0; opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING : - HCLGE_OPC_TM_PRI_C_SHAPPING; + HCLGE_OPC_TM_PRI_C_SHAPPING; hclge_cmd_setup_basic_desc(&desc, opcode, false); @@ -436,12 +443,6 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev, shap_cfg_cmd->pri_id = pri_id; - hclge_tm_set_field(shapping_para, IR_B, ir_b); - hclge_tm_set_field(shapping_para, IR_U, ir_u); - hclge_tm_set_field(shapping_para, IR_S, ir_s); - hclge_tm_set_field(shapping_para, BS_B, bs_b); - hclge_tm_set_field(shapping_para, BS_S, bs_s); - shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para); return hclge_cmd_send(&hdev->hw, &desc, 1); @@ -531,6 +532,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) max_rss_size = min_t(u16, hdev->rss_size_max, vport->alloc_tqps / kinfo->num_tc); + /* Set to user value, no larger than max_rss_size. */ if (kinfo->req_rss_size != kinfo->rss_size && kinfo->req_rss_size && kinfo->req_rss_size <= max_rss_size) { dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", @@ -538,6 +540,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) kinfo->rss_size = kinfo->req_rss_size; } else if (kinfo->rss_size > max_rss_size || (!kinfo->req_rss_size && kinfo->rss_size < max_rss_size)) { + /* Set to the maximum specification value (max_rss_size). */ dev_info(&hdev->pdev->dev, "rss changes from %d to %d\n", kinfo->rss_size, max_rss_size); kinfo->rss_size = max_rss_size; @@ -595,8 +598,10 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) hdev->tm_info.prio_tc[i] = (i >= hdev->tm_info.num_tc) ? 0 : i; - /* DCB is enabled if we have more than 1 TC */ - if (hdev->tm_info.num_tc > 1) + /* DCB is enabled if we have more than 1 TC or pfc_en is + * non-zero. + */ + if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en) hdev->flag |= HCLGE_FLAG_DCB_ENABLE; else hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; @@ -604,12 +609,14 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) static void hclge_tm_pg_info_init(struct hclge_dev *hdev) { +#define BW_PERCENT 100 + u8 i; for (i = 0; i < hdev->tm_info.num_pg; i++) { int k; - hdev->tm_info.pg_dwrr[i] = i ? 0 : 100; + hdev->tm_info.pg_dwrr[i] = i ? 0 : BW_PERCENT; hdev->tm_info.pg_info[i].pg_id = i; hdev->tm_info.pg_info[i].pg_sch_mode = HCLGE_SCH_MODE_DWRR; @@ -621,7 +628,7 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map; for (k = 0; k < hdev->tm_info.num_tc; k++) - hdev->tm_info.pg_info[i].tc_dwrr[k] = 100; + hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT; } } @@ -682,6 +689,7 @@ static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev) static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev) { u8 ir_u, ir_b, ir_s; + u32 shaper_para; int ret; u32 i; @@ -699,18 +707,21 @@ static int hclge_tm_pg_shaper_cfg(struct hclge_dev *hdev) if (ret) return ret; + shaper_para = hclge_tm_get_shapping_para(0, 0, 0, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pg_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i, - 0, 0, 0, HCLGE_SHAPER_BS_U_DEF, - HCLGE_SHAPER_BS_S_DEF); + shaper_para); if (ret) return ret; + shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pg_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i, - ir_b, ir_u, ir_s, - HCLGE_SHAPER_BS_U_DEF, - HCLGE_SHAPER_BS_S_DEF); + shaper_para); if (ret) return ret; } @@ -730,8 +741,7 @@ static int hclge_tm_pg_dwrr_cfg(struct hclge_dev *hdev) /* pg to prio */ for (i = 0; i < hdev->tm_info.num_pg; i++) { /* Cfg dwrr */ - ret = hclge_tm_pg_weight_cfg(hdev, i, - hdev->tm_info.pg_dwrr[i]); + ret = hclge_tm_pg_weight_cfg(hdev, i, hdev->tm_info.pg_dwrr[i]); if (ret) return ret; } @@ -811,6 +821,7 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev) static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev) { u8 ir_u, ir_b, ir_s; + u32 shaper_para; int ret; u32 i; @@ -822,17 +833,19 @@ static int hclge_tm_pri_tc_base_shaper_cfg(struct hclge_dev *hdev) if (ret) return ret; - ret = hclge_tm_pri_shapping_cfg( - hdev, HCLGE_TM_SHAP_C_BUCKET, i, - 0, 0, 0, HCLGE_SHAPER_BS_U_DEF, - HCLGE_SHAPER_BS_S_DEF); + shaper_para = hclge_tm_get_shapping_para(0, 0, 0, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); + ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i, + shaper_para); if (ret) return ret; - ret = hclge_tm_pri_shapping_cfg( - hdev, HCLGE_TM_SHAP_P_BUCKET, i, - ir_b, ir_u, ir_s, HCLGE_SHAPER_BS_U_DEF, - HCLGE_SHAPER_BS_S_DEF); + shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); + ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i, + shaper_para); if (ret) return ret; } @@ -844,6 +857,7 @@ static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; u8 ir_u, ir_b, ir_s; + u32 shaper_para; int ret; ret = hclge_shaper_para_calc(vport->bw_limit, HCLGE_SHAPER_LVL_VF, @@ -851,18 +865,19 @@ static int hclge_tm_pri_vnet_base_shaper_pri_cfg(struct hclge_vport *vport) if (ret) return ret; + shaper_para = hclge_tm_get_shapping_para(0, 0, 0, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, - vport->vport_id, - 0, 0, 0, HCLGE_SHAPER_BS_U_DEF, - HCLGE_SHAPER_BS_S_DEF); + vport->vport_id, shaper_para); if (ret) return ret; + shaper_para = hclge_tm_get_shapping_para(ir_b, ir_u, ir_s, + HCLGE_SHAPER_BS_U_DEF, + HCLGE_SHAPER_BS_S_DEF); ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, - vport->vport_id, - ir_b, ir_u, ir_s, - HCLGE_SHAPER_BS_U_DEF, - HCLGE_SHAPER_BS_S_DEF); + vport->vport_id, shaper_para); if (ret) return ret; @@ -964,7 +979,7 @@ static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) struct hclge_ets_tc_weight_cmd *ets_weight; struct hclge_desc desc; - int i; + unsigned int i; hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ETS_TC_WEIGHT, false); ets_weight = (struct hclge_ets_tc_weight_cmd *)desc.data; @@ -1124,6 +1139,9 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport) int ret; u8 i; + if (vport->vport_id >= HNAE3_MAX_TC) + return -EINVAL; + ret = hclge_tm_pri_schd_mode_cfg(hdev, vport->vport_id); if (ret) return ret; @@ -1212,8 +1230,8 @@ static int hclge_pause_param_setup_hw(struct hclge_dev *hdev) struct hclge_mac *mac = &hdev->hw.mac; return hclge_pause_param_cfg(hdev, mac->mac_addr, - HCLGE_DEFAULT_PAUSE_TRANS_GAP, - HCLGE_DEFAULT_PAUSE_TRANS_TIME); + HCLGE_DEFAULT_PAUSE_TRANS_GAP, + HCLGE_DEFAULT_PAUSE_TRANS_TIME); } static int hclge_pfc_setup_hw(struct hclge_dev *hdev) @@ -1358,7 +1376,8 @@ void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc) void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) { - u8 i, bit_map = 0; + u8 bit_map = 0; + u8 i; hdev->tm_info.num_tc = num_tc; @@ -1375,6 +1394,19 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) hclge_tm_schd_info_init(hdev); } +void hclge_tm_pfc_info_update(struct hclge_dev *hdev) +{ + /* DCB is enabled if we have more than 1 TC or pfc_en is + * non-zero. + */ + if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en) + hdev->flag |= HCLGE_FLAG_DCB_ENABLE; + else + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + + hclge_pfc_info_init(hdev); +} + int hclge_tm_init_hw(struct hclge_dev *hdev, bool init) { int ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h index f60e540c7a62..818610988d34 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h @@ -12,7 +12,7 @@ #define HCLGE_TM_PORT_BASE_MODE_MSK BIT(0) -#define HCLGE_DEFAULT_PAUSE_TRANS_GAP 0xFF +#define HCLGE_DEFAULT_PAUSE_TRANS_GAP 0x7F #define HCLGE_DEFAULT_PAUSE_TRANS_TIME 0xFFFF /* SP or DWRR */ @@ -147,6 +147,7 @@ int hclge_pause_setup_hw(struct hclge_dev *hdev, bool init); int hclge_tm_schd_setup_hw(struct hclge_dev *hdev); void hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc); void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc); +void hclge_tm_pfc_info_update(struct hclge_dev *hdev); int hclge_tm_dwrr_cfg(struct hclge_dev *hdev); int hclge_tm_init_hw(struct hclge_dev *hdev, bool init); int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile index 6193f8fa7cf3..53804d95ea90 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile @@ -6,4 +6,4 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o -hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
\ No newline at end of file +hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 71f356fc2446..652b796044e3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -98,7 +98,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring) hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val); reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S); - reg_val |= HCLGEVF_NIC_CMQ_ENABLE; hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, reg_val); hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0); @@ -110,7 +109,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring) hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val); reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S); - reg_val |= HCLGEVF_NIC_CMQ_ENABLE; hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, reg_val); hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0); @@ -179,6 +177,38 @@ void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc, desc->flag &= cpu_to_le16(~HCLGEVF_CMD_FLAG_WR); } +static int hclgevf_cmd_convert_err_code(u16 desc_ret) +{ + switch (desc_ret) { + case HCLGEVF_CMD_EXEC_SUCCESS: + return 0; + case HCLGEVF_CMD_NO_AUTH: + return -EPERM; + case HCLGEVF_CMD_NOT_SUPPORTED: + return -EOPNOTSUPP; + case HCLGEVF_CMD_QUEUE_FULL: + return -EXFULL; + case HCLGEVF_CMD_NEXT_ERR: + return -ENOSR; + case HCLGEVF_CMD_UNEXE_ERR: + return -ENOTBLK; + case HCLGEVF_CMD_PARA_ERR: + return -EINVAL; + case HCLGEVF_CMD_RESULT_ERR: + return -ERANGE; + case HCLGEVF_CMD_TIMEOUT: + return -ETIME; + case HCLGEVF_CMD_HILINK_ERR: + return -ENOLINK; + case HCLGEVF_CMD_QUEUE_ILLEGAL: + return -ENXIO; + case HCLGEVF_CMD_INVALID: + return -EBADR; + default: + return -EIO; + } +} + /* hclgevf_cmd_send - send command to command queue * @hw: pointer to the hw struct * @desc: prefilled descriptor for describing the command @@ -190,6 +220,7 @@ void hclgevf_cmd_setup_basic_desc(struct hclgevf_desc *desc, int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num) { struct hclgevf_dev *hdev = (struct hclgevf_dev *)hw->hdev; + struct hclgevf_cmq_ring *csq = &hw->cmq.csq; struct hclgevf_desc *desc_to_use; bool complete = false; u32 timeout = 0; @@ -201,8 +232,17 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num) spin_lock_bh(&hw->cmq.csq.lock); - if (num > hclgevf_ring_space(&hw->cmq.csq) || - test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) { + if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state)) { + spin_unlock_bh(&hw->cmq.csq.lock); + return -EBUSY; + } + + if (num > hclgevf_ring_space(&hw->cmq.csq)) { + /* If CMDQ ring is full, SW HEAD and HW HEAD may be different, + * need update the SW HEAD pointer csq->next_to_clean + */ + csq->next_to_clean = hclgevf_read_dev(hw, + HCLGEVF_NIC_CSQ_HEAD_REG); spin_unlock_bh(&hw->cmq.csq.lock); return -EBUSY; } @@ -251,11 +291,7 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num) else retval = le16_to_cpu(desc[0].retval); - if ((enum hclgevf_cmd_return_status)retval == - HCLGEVF_CMD_EXEC_SUCCESS) - status = 0; - else - status = -EIO; + status = hclgevf_cmd_convert_err_code(retval); hw->cmq.last_status = (enum hclgevf_cmd_status)retval; ntc++; handle++; @@ -265,14 +301,13 @@ int hclgevf_cmd_send(struct hclgevf_hw *hw, struct hclgevf_desc *desc, int num) } if (!complete) - status = -EAGAIN; + status = -EBADE; /* Clean the command send queue */ handle = hclgevf_cmd_csq_clean(hw); - if (handle != num) { + if (handle != num) dev_warn(&hdev->pdev->dev, "cleaned %d, need to clean %d\n", handle, num); - } spin_unlock_bh(&hw->cmq.csq.lock); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index 47030b42341f..127a434a56f3 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -46,9 +46,17 @@ struct hclgevf_cmq_ring { enum hclgevf_cmd_return_status { HCLGEVF_CMD_EXEC_SUCCESS = 0, - HCLGEVF_CMD_NO_AUTH = 1, - HCLGEVF_CMD_NOT_EXEC = 2, - HCLGEVF_CMD_QUEUE_FULL = 3, + HCLGEVF_CMD_NO_AUTH = 1, + HCLGEVF_CMD_NOT_SUPPORTED = 2, + HCLGEVF_CMD_QUEUE_FULL = 3, + HCLGEVF_CMD_NEXT_ERR = 4, + HCLGEVF_CMD_UNEXE_ERR = 5, + HCLGEVF_CMD_PARA_ERR = 6, + HCLGEVF_CMD_RESULT_ERR = 7, + HCLGEVF_CMD_TIMEOUT = 8, + HCLGEVF_CMD_HILINK_ERR = 9, + HCLGEVF_CMD_QUEUE_ILLEGAL = 10, + HCLGEVF_CMD_INVALID = 11, }; enum hclgevf_cmd_status { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 5d53467ee2d2..a13a0e101c3b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -11,6 +11,8 @@ #define HCLGEVF_NAME "hclgevf" +#define HCLGEVF_RESET_MAX_FAIL_CNT 5 + static int hclgevf_reset_hdev(struct hclgevf_dev *hdev); static struct hnae3_ae_algo ae_algovf; @@ -83,8 +85,7 @@ static const u32 tqp_intr_reg_addr_list[] = {HCLGEVF_TQP_INTR_CTRL_REG, HCLGEVF_TQP_INTR_GL2_REG, HCLGEVF_TQP_INTR_RL_REG}; -static inline struct hclgevf_dev *hclgevf_ae_get_hdev( - struct hnae3_handle *handle) +static struct hclgevf_dev *hclgevf_ae_get_hdev(struct hnae3_handle *handle) { if (!handle->client) return container_of(handle, struct hclgevf_dev, nic); @@ -232,7 +233,7 @@ static int hclgevf_get_tc_info(struct hclgevf_dev *hdev) int status; status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_TCINFO, 0, NULL, 0, - true, &resp_msg, sizeof(u8)); + true, &resp_msg, sizeof(resp_msg)); if (status) { dev_err(&hdev->pdev->dev, "VF request to get TC info from PF failed %d", @@ -321,7 +322,8 @@ static u16 hclgevf_get_qid_global(struct hnae3_handle *handle, u16 queue_id) memcpy(&msg_data[0], &queue_id, sizeof(queue_id)); ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_QID_IN_PF, 0, msg_data, - 2, true, resp_data, 2); + sizeof(msg_data), true, resp_data, + sizeof(resp_data)); if (!ret) qid_in_pf = *(u16 *)resp_data; @@ -382,7 +384,7 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) struct hnae3_handle *nic = &hdev->nic; struct hnae3_knic_private_info *kinfo; u16 new_tqps = hdev->num_tqps; - int i; + unsigned int i; kinfo = &nic->kinfo; kinfo->num_tc = 0; @@ -418,7 +420,7 @@ static void hclgevf_request_link_info(struct hclgevf_dev *hdev) u8 resp_msg; status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_STATUS, 0, NULL, - 0, false, &resp_msg, sizeof(u8)); + 0, false, &resp_msg, sizeof(resp_msg)); if (status) dev_err(&hdev->pdev->dev, "VF failed to fetch link status(%d) from PF", status); @@ -453,11 +455,13 @@ static void hclgevf_update_link_mode(struct hclgevf_dev *hdev) u8 resp_msg; send_msg = HCLGEVF_ADVERTISING; - hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, &send_msg, - sizeof(u8), false, &resp_msg, sizeof(u8)); + hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, + &send_msg, sizeof(send_msg), false, + &resp_msg, sizeof(resp_msg)); send_msg = HCLGEVF_SUPPORTED; - hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, &send_msg, - sizeof(u8), false, &resp_msg, sizeof(u8)); + hclgevf_send_mbx_msg(hdev, HCLGE_MBX_GET_LINK_MODE, 0, + &send_msg, sizeof(send_msg), false, + &resp_msg, sizeof(resp_msg)); } static int hclgevf_set_handle_info(struct hclgevf_dev *hdev) @@ -470,12 +474,6 @@ static int hclgevf_set_handle_info(struct hclgevf_dev *hdev) nic->numa_node_mask = hdev->numa_node_mask; nic->flags |= HNAE3_SUPPORT_VF; - if (hdev->ae_dev->dev_type != HNAE3_DEV_KNIC) { - dev_err(&hdev->pdev->dev, "unsupported device type %d\n", - hdev->ae_dev->dev_type); - return -EINVAL; - } - ret = hclgevf_knic_setup(hdev); if (ret) dev_err(&hdev->pdev->dev, "VF knic setup failed %d\n", @@ -544,14 +542,16 @@ static int hclgevf_set_rss_algo_key(struct hclgevf_dev *hdev, const u8 hfunc, const u8 *key) { struct hclgevf_rss_config_cmd *req; + unsigned int key_offset = 0; struct hclgevf_desc desc; - int key_offset; + int key_counts; int key_size; int ret; + key_counts = HCLGEVF_RSS_KEY_SIZE; req = (struct hclgevf_rss_config_cmd *)desc.data; - for (key_offset = 0; key_offset < 3; key_offset++) { + while (key_counts) { hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_GENERIC_CONFIG, false); @@ -560,15 +560,12 @@ static int hclgevf_set_rss_algo_key(struct hclgevf_dev *hdev, req->hash_config |= (key_offset << HCLGEVF_RSS_HASH_KEY_OFFSET_B); - if (key_offset == 2) - key_size = - HCLGEVF_RSS_KEY_SIZE - HCLGEVF_RSS_HASH_KEY_NUM * 2; - else - key_size = HCLGEVF_RSS_HASH_KEY_NUM; - + key_size = min(HCLGEVF_RSS_HASH_KEY_NUM, key_counts); memcpy(req->hash_key, key + key_offset * HCLGEVF_RSS_HASH_KEY_NUM, key_size); + key_counts -= key_size; + key_offset++; ret = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (ret) { dev_err(&hdev->pdev->dev, @@ -631,7 +628,7 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) struct hclgevf_desc desc; u16 roundup_size; int status; - int i; + unsigned int i; req = (struct hclgevf_rss_tc_mode_cmd *)desc.data; @@ -997,6 +994,8 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en, u8 type; req = (struct hclge_mbx_vf_to_pf_cmd *)desc.data; + type = en ? HCLGE_MBX_MAP_RING_TO_VECTOR : + HCLGE_MBX_UNMAP_RING_TO_VECTOR; for (node = ring_chain; node; node = node->next) { int idx_offset = HCLGE_MBX_RING_MAP_BASIC_MSG_NUM + @@ -1006,9 +1005,6 @@ static int hclgevf_bind_ring_to_vector(struct hnae3_handle *handle, bool en, hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_MBX_VF_TO_PF, false); - type = en ? - HCLGE_MBX_MAP_RING_TO_VECTOR : - HCLGE_MBX_UNMAP_RING_TO_VECTOR; req->msg[0] = type; req->msg[1] = vector_id; } @@ -1134,7 +1130,7 @@ static int hclgevf_set_promisc_mode(struct hclgevf_dev *hdev, bool en_bc_pmc) return hclgevf_cmd_set_promisc_mode(hdev, en_bc_pmc); } -static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id, +static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id, int stream_id, bool enable) { struct hclgevf_cfg_com_tqp_queue_cmd *req; @@ -1147,7 +1143,8 @@ static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, int tqp_id, false); req->tqp_id = cpu_to_le16(tqp_id & HCLGEVF_RING_ID_MASK); req->stream_id = cpu_to_le16(stream_id); - req->enable |= enable << HCLGEVF_TQP_ENABLE_B; + if (enable) + req->enable |= 1U << HCLGEVF_TQP_ENABLE_B; status = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (status) @@ -1193,7 +1190,7 @@ static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p, HCLGE_MBX_MAC_VLAN_UC_MODIFY; status = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_UNICAST, - subcode, msg_data, ETH_ALEN * 2, + subcode, msg_data, sizeof(msg_data), true, NULL, 0); if (!status) ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr); @@ -1248,19 +1245,61 @@ static int hclgevf_set_vlan_filter(struct hnae3_handle *handle, #define HCLGEVF_VLAN_MBX_MSG_LEN 5 struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); u8 msg_data[HCLGEVF_VLAN_MBX_MSG_LEN]; + int ret; - if (vlan_id > 4095) + if (vlan_id > HCLGEVF_MAX_VLAN_ID) return -EINVAL; if (proto != htons(ETH_P_8021Q)) return -EPROTONOSUPPORT; + /* When device is resetting, firmware is unable to handle + * mailbox. Just record the vlan id, and remove it after + * reset finished. + */ + if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) && is_kill) { + set_bit(vlan_id, hdev->vlan_del_fail_bmap); + return -EBUSY; + } + msg_data[0] = is_kill; memcpy(&msg_data[1], &vlan_id, sizeof(vlan_id)); memcpy(&msg_data[3], &proto, sizeof(proto)); - return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, - HCLGE_MBX_VLAN_FILTER, msg_data, - HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0); + ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_SET_VLAN, + HCLGE_MBX_VLAN_FILTER, msg_data, + HCLGEVF_VLAN_MBX_MSG_LEN, false, NULL, 0); + + /* When remove hw vlan filter failed, record the vlan id, + * and try to remove it from hw later, to be consistence + * with stack. + */ + if (is_kill && ret) + set_bit(vlan_id, hdev->vlan_del_fail_bmap); + + return ret; +} + +static void hclgevf_sync_vlan_filter(struct hclgevf_dev *hdev) +{ +#define HCLGEVF_MAX_SYNC_COUNT 60 + struct hnae3_handle *handle = &hdev->nic; + int ret, sync_cnt = 0; + u16 vlan_id; + + vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); + while (vlan_id != VLAN_N_VID) { + ret = hclgevf_set_vlan_filter(handle, htons(ETH_P_8021Q), + vlan_id, true); + if (ret) + return; + + clear_bit(vlan_id, hdev->vlan_del_fail_bmap); + sync_cnt++; + if (sync_cnt >= HCLGEVF_MAX_SYNC_COUNT) + return; + + vlan_id = find_first_bit(hdev->vlan_del_fail_bmap, VLAN_N_VID); + } } static int hclgevf_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable) @@ -1280,7 +1319,7 @@ static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) u8 msg_data[2]; int ret; - memcpy(&msg_data[0], &queue_id, sizeof(queue_id)); + memcpy(msg_data, &queue_id, sizeof(queue_id)); /* disable vf queue before send queue reset msg to PF */ ret = hclgevf_tqp_enable(hdev, queue_id, 0, false); @@ -1288,7 +1327,7 @@ static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id) return ret; return hclgevf_send_mbx_msg(hdev, HCLGE_MBX_QUEUE_RESET, 0, msg_data, - 2, true, NULL, 0); + sizeof(msg_data), true, NULL, 0); } static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu) @@ -1306,6 +1345,10 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev, struct hnae3_handle *handle = &hdev->nic; int ret; + if (!test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state) || + !client) + return 0; + if (!client->ops->reset_notify) return -EOPNOTSUPP; @@ -1410,6 +1453,8 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev) static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) { +#define HCLGEVF_RESET_SYNC_TIME 100 + int ret = 0; switch (hdev->reset_type) { @@ -1427,13 +1472,34 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev) } set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); - + /* inform hardware that preparatory work is done */ + msleep(HCLGEVF_RESET_SYNC_TIME); + hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG, + HCLGEVF_NIC_CMQ_ENABLE); dev_info(&hdev->pdev->dev, "prepare reset(%d) wait done, ret:%d\n", hdev->reset_type, ret); return ret; } +static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev) +{ + hdev->rst_stats.rst_fail_cnt++; + dev_err(&hdev->pdev->dev, "failed to reset VF(%d)\n", + hdev->rst_stats.rst_fail_cnt); + + if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT) + set_bit(hdev->reset_type, &hdev->reset_pending); + + if (hclgevf_is_reset_pending(hdev)) { + set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state); + hclgevf_reset_task_schedule(hdev); + } else { + hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG, + HCLGEVF_NIC_CMQ_ENABLE); + } +} + static int hclgevf_reset(struct hclgevf_dev *hdev) { struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); @@ -1490,19 +1556,13 @@ static int hclgevf_reset(struct hclgevf_dev *hdev) hdev->last_reset_time = jiffies; ae_dev->reset_type = HNAE3_NONE_RESET; hdev->rst_stats.rst_done_cnt++; + hdev->rst_stats.rst_fail_cnt = 0; return ret; err_reset_lock: rtnl_unlock(); err_reset: - /* When VF reset failed, only the higher level reset asserted by PF - * can restore it, so re-initialize the command queue to receive - * this higher reset event. - */ - hclgevf_cmd_init(hdev); - dev_err(&hdev->pdev->dev, "failed to reset VF\n"); - if (hclgevf_is_reset_pending(hdev)) - hclgevf_reset_task_schedule(hdev); + hclgevf_reset_err_handle(hdev); return ret; } @@ -1612,7 +1672,8 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev) void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev) { - if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) { + if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) && + !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) { set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state); schedule_work(&hdev->rst_service_task); } @@ -1648,7 +1709,8 @@ static void hclgevf_service_timer(struct timer_list *t) { struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer); - mod_timer(&hdev->service_timer, jiffies + 5 * HZ); + mod_timer(&hdev->service_timer, jiffies + + HCLGEVF_GENERAL_TASK_INTERVAL * HZ); hdev->stats_timer++; hclgevf_task_schedule(hdev); @@ -1668,9 +1730,9 @@ static void hclgevf_reset_service_task(struct work_struct *work) if (test_and_clear_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state)) { /* PF has initmated that it is about to reset the hardware. - * We now have to poll & check if harware has actually completed - * the reset sequence. On hardware reset completion, VF needs to - * reset the client and ae device. + * We now have to poll & check if hardware has actually + * completed the reset sequence. On hardware reset completion, + * VF needs to reset the client and ae device. */ hdev->reset_attempts = 0; @@ -1686,7 +1748,7 @@ static void hclgevf_reset_service_task(struct work_struct *work) } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state)) { /* we could be here when either of below happens: - * 1. reset was initiated due to watchdog timeout due to + * 1. reset was initiated due to watchdog timeout caused by * a. IMP was earlier reset and our TX got choked down and * which resulted in watchdog reacting and inducing VF * reset. This also means our cmdq would be unreliable. @@ -1748,7 +1810,8 @@ static void hclgevf_keep_alive_timer(struct timer_list *t) struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer); schedule_work(&hdev->keep_alive_task); - mod_timer(&hdev->keep_alive_timer, jiffies + 2 * HZ); + mod_timer(&hdev->keep_alive_timer, jiffies + + HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); } static void hclgevf_keep_alive_task(struct work_struct *work) @@ -1763,7 +1826,7 @@ static void hclgevf_keep_alive_task(struct work_struct *work) return; ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_KEEP_ALIVE, 0, NULL, - 0, false, &respmsg, sizeof(u8)); + 0, false, &respmsg, sizeof(respmsg)); if (ret) dev_err(&hdev->pdev->dev, "VF sends keep alive cmd failed(=%d)\n", ret); @@ -1789,6 +1852,8 @@ static void hclgevf_service_task(struct work_struct *work) hclgevf_update_link_mode(hdev); + hclgevf_sync_vlan_filter(hdev); + hclgevf_deferred_task_schedule(hdev); clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state); @@ -1995,7 +2060,7 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) } - /* Initialize RSS indirect table for each vport */ + /* Initialize RSS indirect table */ for (i = 0; i < HCLGEVF_RSS_IND_TBL_SIZE; i++) rss_cfg->rss_indirection_tbl[i] = i % hdev->rss_size_max; @@ -2008,9 +2073,6 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev) static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) { - /* other vlan config(like, VLAN TX/RX offload) would also be added - * here later - */ return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0, false); } @@ -2032,7 +2094,6 @@ static int hclgevf_ae_start(struct hnae3_handle *handle) { struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); - /* reset tqp stats */ hclgevf_reset_tqp_stats(handle); hclgevf_request_link_info(hdev); @@ -2056,7 +2117,6 @@ static void hclgevf_ae_stop(struct hnae3_handle *handle) if (hclgevf_reset_tqp(handle, i)) break; - /* reset tqp stats */ hclgevf_reset_tqp_stats(handle); hclgevf_update_link_status(hdev, 0); } @@ -2080,7 +2140,8 @@ static int hclgevf_client_start(struct hnae3_handle *handle) if (ret) return ret; - mod_timer(&hdev->keep_alive_timer, jiffies + 2 * HZ); + mod_timer(&hdev->keep_alive_timer, jiffies + + HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ); return 0; } @@ -2123,6 +2184,7 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev) static void hclgevf_state_uninit(struct hclgevf_dev *hdev) { set_bit(HCLGEVF_STATE_DOWN, &hdev->state); + set_bit(HCLGEVF_STATE_REMOVING, &hdev->state); if (hdev->keep_alive_timer.function) del_timer_sync(&hdev->keep_alive_timer); @@ -2249,49 +2311,68 @@ static void hclgevf_info_show(struct hclgevf_dev *hdev) dev_info(dev, "VF info end.\n"); } -static int hclgevf_init_client_instance(struct hnae3_client *client, - struct hnae3_ae_dev *ae_dev) +static int hclgevf_init_nic_client_instance(struct hnae3_ae_dev *ae_dev, + struct hnae3_client *client) { struct hclgevf_dev *hdev = ae_dev->priv; int ret; - switch (client->type) { - case HNAE3_CLIENT_KNIC: - hdev->nic_client = client; - hdev->nic.client = client; + ret = client->ops->init_instance(&hdev->nic); + if (ret) + return ret; - ret = client->ops->init_instance(&hdev->nic); - if (ret) - goto clear_nic; + set_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state); + hnae3_set_client_init_flag(client, ae_dev, 1); - hnae3_set_client_init_flag(client, ae_dev, 1); + if (netif_msg_drv(&hdev->nic)) + hclgevf_info_show(hdev); - if (netif_msg_drv(&hdev->nic)) - hclgevf_info_show(hdev); + return 0; +} - if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) { - struct hnae3_client *rc = hdev->roce_client; +static int hclgevf_init_roce_client_instance(struct hnae3_ae_dev *ae_dev, + struct hnae3_client *client) +{ + struct hclgevf_dev *hdev = ae_dev->priv; + int ret; - ret = hclgevf_init_roce_base_info(hdev); - if (ret) - goto clear_roce; - ret = rc->ops->init_instance(&hdev->roce); - if (ret) - goto clear_roce; + if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client || + !hdev->nic_client) + return 0; - hnae3_set_client_init_flag(hdev->roce_client, ae_dev, - 1); - } - break; - case HNAE3_CLIENT_UNIC: + ret = hclgevf_init_roce_base_info(hdev); + if (ret) + return ret; + + ret = client->ops->init_instance(&hdev->roce); + if (ret) + return ret; + + hnae3_set_client_init_flag(client, ae_dev, 1); + + return 0; +} + +static int hclgevf_init_client_instance(struct hnae3_client *client, + struct hnae3_ae_dev *ae_dev) +{ + struct hclgevf_dev *hdev = ae_dev->priv; + int ret; + + switch (client->type) { + case HNAE3_CLIENT_KNIC: hdev->nic_client = client; hdev->nic.client = client; - ret = client->ops->init_instance(&hdev->nic); + ret = hclgevf_init_nic_client_instance(ae_dev, client); if (ret) goto clear_nic; - hnae3_set_client_init_flag(client, ae_dev, 1); + ret = hclgevf_init_roce_client_instance(ae_dev, + hdev->roce_client); + if (ret) + goto clear_roce; + break; case HNAE3_CLIENT_ROCE: if (hnae3_dev_roce_supported(hdev)) { @@ -2299,17 +2380,10 @@ static int hclgevf_init_client_instance(struct hnae3_client *client, hdev->roce.client = client; } - if (hdev->roce_client && hdev->nic_client) { - ret = hclgevf_init_roce_base_info(hdev); - if (ret) - goto clear_roce; - - ret = client->ops->init_instance(&hdev->roce); - if (ret) - goto clear_roce; - } + ret = hclgevf_init_roce_client_instance(ae_dev, client); + if (ret) + goto clear_roce; - hnae3_set_client_init_flag(client, ae_dev, 1); break; default: return -EINVAL; @@ -2342,6 +2416,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, /* un-init nic/unic, if this was not called by roce client */ if (client->ops->uninit_instance && hdev->nic_client && client->type != HNAE3_CLIENT_ROCE) { + clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state); + client->ops->uninit_instance(&hdev->nic, 0); hdev->nic_client = NULL; hdev->nic.client = NULL; @@ -2512,6 +2588,12 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev) return ret; } + if (pdev->revision >= 0x21) { + ret = hclgevf_set_promisc_mode(hdev, true); + if (ret) + return ret; + } + dev_info(&hdev->pdev->dev, "Reset done\n"); return 0; @@ -2591,9 +2673,11 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) * firmware makes sure broadcast packets can be accepted. * For revision 0x21, default to enable broadcast promisc mode. */ - ret = hclgevf_set_promisc_mode(hdev, true); - if (ret) - goto err_config; + if (pdev->revision >= 0x21) { + ret = hclgevf_set_promisc_mode(hdev, true); + if (ret) + goto err_config; + } /* Initialize RSS for this VF */ ret = hclgevf_rss_init_hw(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index cc52f54f8c08..5a9e30998a8f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -4,6 +4,7 @@ #ifndef __HCLGEVF_MAIN_H #define __HCLGEVF_MAIN_H #include <linux/fs.h> +#include <linux/if_vlan.h> #include <linux/types.h> #include "hclge_mbx.h" #include "hclgevf_cmd.h" @@ -12,9 +13,12 @@ #define HCLGEVF_MOD_VERSION "1.0" #define HCLGEVF_DRIVER_NAME "hclgevf" +#define HCLGEVF_MAX_VLAN_ID 4095 #define HCLGEVF_MISC_VECTOR_NUM 0 #define HCLGEVF_INVALID_VPORT 0xffff +#define HCLGEVF_GENERAL_TASK_INTERVAL 5 +#define HCLGEVF_KEEP_ALIVE_TASK_INTERVAL 2 /* This number in actual depends upon the total number of VFs * created by physical function. But the maximum number of @@ -130,6 +134,8 @@ enum hclgevf_states { HCLGEVF_STATE_DOWN, HCLGEVF_STATE_DISABLED, HCLGEVF_STATE_IRQ_INITED, + HCLGEVF_STATE_REMOVING, + HCLGEVF_STATE_NIC_REGISTERED, /* task states */ HCLGEVF_STATE_SERVICE_SCHED, HCLGEVF_STATE_RST_SERVICE_SCHED, @@ -220,6 +226,7 @@ struct hclgevf_rst_stats { u32 vf_rst_cnt; /* the number of VF reset */ u32 rst_done_cnt; /* the number of reset completed */ u32 hw_rst_done_cnt; /* the number of HW reset completed */ + u32 rst_fail_cnt; /* the number of VF reset fail */ }; struct hclgevf_dev { @@ -265,6 +272,8 @@ struct hclgevf_dev { u16 *vector_status; int *vector_irq; + unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)]; + bool mbx_event_pending; struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */ struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 30f2e9352cf3..f60b80bd605e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -102,7 +102,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, u16 code, u16 subcode, ~HCLGE_MBX_NEED_RESP_BIT; req->msg[0] = code; req->msg[1] = subcode; - memcpy(&req->msg[2], msg_data, msg_len); + if (msg_data) + memcpy(&req->msg[2], msg_data, msg_len); /* synchronous send */ if (need_resp) { diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile index 99de5b6607d5..fe88ab88cacc 100644 --- a/drivers/net/ethernet/huawei/hinic/Makefile +++ b/drivers/net/ethernet/huawei/hinic/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_HINIC) += hinic.o hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \ hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \ hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \ - hinic_common.o + hinic_common.o hinic_ethtool.o diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h index 353276fdcaed..a209b14160cc 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h @@ -22,6 +22,7 @@ enum hinic_flags { HINIC_LINK_UP = BIT(0), HINIC_INTF_UP = BIT(1), + HINIC_RSS_ENABLE = BIT(2), }; struct hinic_rx_mode_work { @@ -29,6 +30,23 @@ struct hinic_rx_mode_work { u32 rx_mode; }; +struct hinic_rss_type { + u8 tcp_ipv6_ext; + u8 ipv6_ext; + u8 tcp_ipv6; + u8 ipv6; + u8 tcp_ipv4; + u8 ipv4; + u8 udp_ipv6; + u8 udp_ipv4; +}; + +enum hinic_rss_hash_type { + HINIC_RSS_HASH_ENGINE_TYPE_XOR, + HINIC_RSS_HASH_ENGINE_TYPE_TOEP, + HINIC_RSS_HASH_ENGINE_TYPE_MAX, +}; + struct hinic_dev { struct net_device *netdev; struct hinic_hwdev *hwdev; @@ -36,6 +54,8 @@ struct hinic_dev { u32 msg_enable; unsigned int tx_weight; unsigned int rx_weight; + u16 num_qps; + u16 max_qps; unsigned int flags; @@ -50,6 +70,14 @@ struct hinic_dev { struct hinic_txq_stats tx_stats; struct hinic_rxq_stats rx_stats; + + u8 rss_tmpl_idx; + u8 rss_hash_engine; + u16 num_rss; + u16 rss_limit; + struct hinic_rss_type rss_type; + u8 *rss_hkey_user; + s32 *rss_indir_user; }; #endif diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c new file mode 100644 index 000000000000..60ec48fe4144 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c @@ -0,0 +1,762 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Huawei HiNIC PCI Express Linux driver + * Copyright(c) 2017 Huawei Technologies Co., Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/etherdevice.h> +#include <linux/netdevice.h> +#include <linux/if_vlan.h> +#include <linux/ethtool.h> +#include <linux/vmalloc.h> + +#include "hinic_hw_qp.h" +#include "hinic_hw_dev.h" +#include "hinic_port.h" +#include "hinic_tx.h" +#include "hinic_rx.h" +#include "hinic_dev.h" + +static void set_link_speed(struct ethtool_link_ksettings *link_ksettings, + enum hinic_speed speed) +{ + switch (speed) { + case HINIC_SPEED_10MB_LINK: + link_ksettings->base.speed = SPEED_10; + break; + + case HINIC_SPEED_100MB_LINK: + link_ksettings->base.speed = SPEED_100; + break; + + case HINIC_SPEED_1000MB_LINK: + link_ksettings->base.speed = SPEED_1000; + break; + + case HINIC_SPEED_10GB_LINK: + link_ksettings->base.speed = SPEED_10000; + break; + + case HINIC_SPEED_25GB_LINK: + link_ksettings->base.speed = SPEED_25000; + break; + + case HINIC_SPEED_40GB_LINK: + link_ksettings->base.speed = SPEED_40000; + break; + + case HINIC_SPEED_100GB_LINK: + link_ksettings->base.speed = SPEED_100000; + break; + + default: + link_ksettings->base.speed = SPEED_UNKNOWN; + break; + } +} + +static int hinic_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings + *link_ksettings) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + enum hinic_port_link_state link_state; + struct hinic_port_cap port_cap; + int err; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Autoneg); + + link_ksettings->base.speed = SPEED_UNKNOWN; + link_ksettings->base.autoneg = AUTONEG_DISABLE; + link_ksettings->base.duplex = DUPLEX_UNKNOWN; + + err = hinic_port_get_cap(nic_dev, &port_cap); + if (err) + return err; + + err = hinic_port_link_state(nic_dev, &link_state); + if (err) + return err; + + if (link_state != HINIC_LINK_STATE_UP) + return err; + + set_link_speed(link_ksettings, port_cap.speed); + + if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED)) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); + + if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE) + link_ksettings->base.autoneg = AUTONEG_ENABLE; + + link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ? + DUPLEX_FULL : DUPLEX_HALF; + return 0; +} + +static void hinic_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + u8 mgmt_ver[HINIC_MGMT_VERSION_MAX_LEN] = {0}; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + int err; + + strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver)); + strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info)); + + err = hinic_get_mgmt_version(nic_dev, mgmt_ver); + if (err) + return; + + snprintf(info->fw_version, sizeof(info->fw_version), "%s", mgmt_ver); +} + +static void hinic_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + ring->rx_max_pending = HINIC_RQ_DEPTH; + ring->tx_max_pending = HINIC_SQ_DEPTH; + ring->rx_pending = HINIC_RQ_DEPTH; + ring->tx_pending = HINIC_SQ_DEPTH; +} + +static void hinic_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + struct hinic_hwdev *hwdev = nic_dev->hwdev; + + channels->max_rx = hwdev->nic_cap.max_qps; + channels->max_tx = hwdev->nic_cap.max_qps; + channels->max_other = 0; + channels->max_combined = 0; + channels->rx_count = hinic_hwdev_num_qps(hwdev); + channels->tx_count = hinic_hwdev_num_qps(hwdev); + channels->other_count = 0; + channels->combined_count = 0; +} + +static int hinic_get_rss_hash_opts(struct hinic_dev *nic_dev, + struct ethtool_rxnfc *cmd) +{ + struct hinic_rss_type rss_type = { 0 }; + int err; + + cmd->data = 0; + + if (!(nic_dev->flags & HINIC_RSS_ENABLE)) + return 0; + + err = hinic_get_rss_type(nic_dev, nic_dev->rss_tmpl_idx, + &rss_type); + if (err) + return err; + + cmd->data = RXH_IP_SRC | RXH_IP_DST; + switch (cmd->flow_type) { + case TCP_V4_FLOW: + if (rss_type.tcp_ipv4) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + break; + case TCP_V6_FLOW: + if (rss_type.tcp_ipv6) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + break; + case UDP_V4_FLOW: + if (rss_type.udp_ipv4) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + break; + case UDP_V6_FLOW: + if (rss_type.udp_ipv6) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + break; + case IPV4_FLOW: + case IPV6_FLOW: + break; + default: + cmd->data = 0; + return -EINVAL; + } + + return 0; +} + +static int set_l4_rss_hash_ops(struct ethtool_rxnfc *cmd, + struct hinic_rss_type *rss_type) +{ + u8 rss_l4_en = 0; + + switch (cmd->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_l4_en = 0; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_l4_en = 1; + break; + default: + return -EINVAL; + } + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + rss_type->tcp_ipv4 = rss_l4_en; + break; + case TCP_V6_FLOW: + rss_type->tcp_ipv6 = rss_l4_en; + break; + case UDP_V4_FLOW: + rss_type->udp_ipv4 = rss_l4_en; + break; + case UDP_V6_FLOW: + rss_type->udp_ipv6 = rss_l4_en; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int hinic_set_rss_hash_opts(struct hinic_dev *nic_dev, + struct ethtool_rxnfc *cmd) +{ + struct hinic_rss_type *rss_type = &nic_dev->rss_type; + int err; + + if (!(nic_dev->flags & HINIC_RSS_ENABLE)) { + cmd->data = 0; + return -EOPNOTSUPP; + } + + /* RSS does not support anything other than hashing + * to queues on src and dst IPs and ports + */ + if (cmd->data & ~(RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | + RXH_L4_B_2_3)) + return -EINVAL; + + /* We need at least the IP SRC and DEST fields for hashing */ + if (!(cmd->data & RXH_IP_SRC) || !(cmd->data & RXH_IP_DST)) + return -EINVAL; + + err = hinic_get_rss_type(nic_dev, + nic_dev->rss_tmpl_idx, rss_type); + if (err) + return -EFAULT; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V4_FLOW: + case UDP_V6_FLOW: + err = set_l4_rss_hash_ops(cmd, rss_type); + if (err) + return err; + break; + case IPV4_FLOW: + rss_type->ipv4 = 1; + break; + case IPV6_FLOW: + rss_type->ipv6 = 1; + break; + default: + return -EINVAL; + } + + err = hinic_set_rss_type(nic_dev, nic_dev->rss_tmpl_idx, + *rss_type); + if (err) + return -EFAULT; + + return 0; +} + +static int __set_rss_rxfh(struct net_device *netdev, + const u32 *indir, const u8 *key) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + int err; + + if (indir) { + if (!nic_dev->rss_indir_user) { + nic_dev->rss_indir_user = + kzalloc(sizeof(u32) * HINIC_RSS_INDIR_SIZE, + GFP_KERNEL); + if (!nic_dev->rss_indir_user) + return -ENOMEM; + } + + memcpy(nic_dev->rss_indir_user, indir, + sizeof(u32) * HINIC_RSS_INDIR_SIZE); + + err = hinic_rss_set_indir_tbl(nic_dev, + nic_dev->rss_tmpl_idx, indir); + if (err) + return -EFAULT; + } + + if (key) { + if (!nic_dev->rss_hkey_user) { + nic_dev->rss_hkey_user = + kzalloc(HINIC_RSS_KEY_SIZE * 2, GFP_KERNEL); + + if (!nic_dev->rss_hkey_user) + return -ENOMEM; + } + + memcpy(nic_dev->rss_hkey_user, key, HINIC_RSS_KEY_SIZE); + + err = hinic_rss_set_template_tbl(nic_dev, + nic_dev->rss_tmpl_idx, key); + if (err) + return -EFAULT; + } + + return 0; +} + +static int hinic_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + int err = 0; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = nic_dev->num_qps; + break; + case ETHTOOL_GRXFH: + err = hinic_get_rss_hash_opts(nic_dev, cmd); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int hinic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + int err = 0; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + err = hinic_set_rss_hash_opts(nic_dev, cmd); + break; + default: + err = -EOPNOTSUPP; + break; + } + + return err; +} + +static int hinic_get_rxfh(struct net_device *netdev, + u32 *indir, u8 *key, u8 *hfunc) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + u8 hash_engine_type = 0; + int err = 0; + + if (!(nic_dev->flags & HINIC_RSS_ENABLE)) + return -EOPNOTSUPP; + + if (hfunc) { + err = hinic_rss_get_hash_engine(nic_dev, + nic_dev->rss_tmpl_idx, + &hash_engine_type); + if (err) + return -EFAULT; + + *hfunc = hash_engine_type ? ETH_RSS_HASH_TOP : ETH_RSS_HASH_XOR; + } + + if (indir) { + err = hinic_rss_get_indir_tbl(nic_dev, + nic_dev->rss_tmpl_idx, indir); + if (err) + return -EFAULT; + } + + if (key) + err = hinic_rss_get_template_tbl(nic_dev, + nic_dev->rss_tmpl_idx, key); + + return err; +} + +static int hinic_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + int err = 0; + + if (!(nic_dev->flags & HINIC_RSS_ENABLE)) + return -EOPNOTSUPP; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE) { + if (hfunc != ETH_RSS_HASH_TOP && hfunc != ETH_RSS_HASH_XOR) + return -EOPNOTSUPP; + + nic_dev->rss_hash_engine = (hfunc == ETH_RSS_HASH_XOR) ? + HINIC_RSS_HASH_ENGINE_TYPE_XOR : + HINIC_RSS_HASH_ENGINE_TYPE_TOEP; + err = hinic_rss_set_hash_engine + (nic_dev, nic_dev->rss_tmpl_idx, + nic_dev->rss_hash_engine); + if (err) + return -EFAULT; + } + + err = __set_rss_rxfh(netdev, indir, key); + + return err; +} + +static u32 hinic_get_rxfh_key_size(struct net_device *netdev) +{ + return HINIC_RSS_KEY_SIZE; +} + +static u32 hinic_get_rxfh_indir_size(struct net_device *netdev) +{ + return HINIC_RSS_INDIR_SIZE; +} + +#define ARRAY_LEN(arr) ((int)((int)sizeof(arr) / (int)sizeof(arr[0]))) + +#define HINIC_FUNC_STAT(_stat_item) { \ + .name = #_stat_item, \ + .size = FIELD_SIZEOF(struct hinic_vport_stats, _stat_item), \ + .offset = offsetof(struct hinic_vport_stats, _stat_item) \ +} + +static struct hinic_stats hinic_function_stats[] = { + HINIC_FUNC_STAT(tx_unicast_pkts_vport), + HINIC_FUNC_STAT(tx_unicast_bytes_vport), + HINIC_FUNC_STAT(tx_multicast_pkts_vport), + HINIC_FUNC_STAT(tx_multicast_bytes_vport), + HINIC_FUNC_STAT(tx_broadcast_pkts_vport), + HINIC_FUNC_STAT(tx_broadcast_bytes_vport), + + HINIC_FUNC_STAT(rx_unicast_pkts_vport), + HINIC_FUNC_STAT(rx_unicast_bytes_vport), + HINIC_FUNC_STAT(rx_multicast_pkts_vport), + HINIC_FUNC_STAT(rx_multicast_bytes_vport), + HINIC_FUNC_STAT(rx_broadcast_pkts_vport), + HINIC_FUNC_STAT(rx_broadcast_bytes_vport), + + HINIC_FUNC_STAT(tx_discard_vport), + HINIC_FUNC_STAT(rx_discard_vport), + HINIC_FUNC_STAT(tx_err_vport), + HINIC_FUNC_STAT(rx_err_vport), +}; + +#define HINIC_PORT_STAT(_stat_item) { \ + .name = #_stat_item, \ + .size = FIELD_SIZEOF(struct hinic_phy_port_stats, _stat_item), \ + .offset = offsetof(struct hinic_phy_port_stats, _stat_item) \ +} + +static struct hinic_stats hinic_port_stats[] = { + HINIC_PORT_STAT(mac_rx_total_pkt_num), + HINIC_PORT_STAT(mac_rx_total_oct_num), + HINIC_PORT_STAT(mac_rx_bad_pkt_num), + HINIC_PORT_STAT(mac_rx_bad_oct_num), + HINIC_PORT_STAT(mac_rx_good_pkt_num), + HINIC_PORT_STAT(mac_rx_good_oct_num), + HINIC_PORT_STAT(mac_rx_uni_pkt_num), + HINIC_PORT_STAT(mac_rx_multi_pkt_num), + HINIC_PORT_STAT(mac_rx_broad_pkt_num), + HINIC_PORT_STAT(mac_tx_total_pkt_num), + HINIC_PORT_STAT(mac_tx_total_oct_num), + HINIC_PORT_STAT(mac_tx_bad_pkt_num), + HINIC_PORT_STAT(mac_tx_bad_oct_num), + HINIC_PORT_STAT(mac_tx_good_pkt_num), + HINIC_PORT_STAT(mac_tx_good_oct_num), + HINIC_PORT_STAT(mac_tx_uni_pkt_num), + HINIC_PORT_STAT(mac_tx_multi_pkt_num), + HINIC_PORT_STAT(mac_tx_broad_pkt_num), + HINIC_PORT_STAT(mac_rx_fragment_pkt_num), + HINIC_PORT_STAT(mac_rx_undersize_pkt_num), + HINIC_PORT_STAT(mac_rx_undermin_pkt_num), + HINIC_PORT_STAT(mac_rx_64_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_65_127_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_128_255_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_256_511_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_512_1023_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_1024_1518_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_1519_2047_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_2048_4095_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_4096_8191_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_8192_9216_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_9217_12287_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_12288_16383_oct_pkt_num), + HINIC_PORT_STAT(mac_rx_1519_max_good_pkt_num), + HINIC_PORT_STAT(mac_rx_1519_max_bad_pkt_num), + HINIC_PORT_STAT(mac_rx_oversize_pkt_num), + HINIC_PORT_STAT(mac_rx_jabber_pkt_num), + HINIC_PORT_STAT(mac_rx_pause_num), + HINIC_PORT_STAT(mac_rx_pfc_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri0_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri1_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri2_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri3_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri4_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri5_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri6_pkt_num), + HINIC_PORT_STAT(mac_rx_pfc_pri7_pkt_num), + HINIC_PORT_STAT(mac_rx_control_pkt_num), + HINIC_PORT_STAT(mac_rx_sym_err_pkt_num), + HINIC_PORT_STAT(mac_rx_fcs_err_pkt_num), + HINIC_PORT_STAT(mac_rx_send_app_good_pkt_num), + HINIC_PORT_STAT(mac_rx_send_app_bad_pkt_num), + HINIC_PORT_STAT(mac_tx_fragment_pkt_num), + HINIC_PORT_STAT(mac_tx_undersize_pkt_num), + HINIC_PORT_STAT(mac_tx_undermin_pkt_num), + HINIC_PORT_STAT(mac_tx_64_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_65_127_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_128_255_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_256_511_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_512_1023_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_1024_1518_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_1519_2047_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_2048_4095_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_4096_8191_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_8192_9216_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_9217_12287_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_12288_16383_oct_pkt_num), + HINIC_PORT_STAT(mac_tx_1519_max_good_pkt_num), + HINIC_PORT_STAT(mac_tx_1519_max_bad_pkt_num), + HINIC_PORT_STAT(mac_tx_oversize_pkt_num), + HINIC_PORT_STAT(mac_tx_jabber_pkt_num), + HINIC_PORT_STAT(mac_tx_pause_num), + HINIC_PORT_STAT(mac_tx_pfc_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri0_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri1_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri2_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri3_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri4_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri5_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri6_pkt_num), + HINIC_PORT_STAT(mac_tx_pfc_pri7_pkt_num), + HINIC_PORT_STAT(mac_tx_control_pkt_num), + HINIC_PORT_STAT(mac_tx_err_all_pkt_num), + HINIC_PORT_STAT(mac_tx_from_app_good_pkt_num), + HINIC_PORT_STAT(mac_tx_from_app_bad_pkt_num), +}; + +#define HINIC_TXQ_STAT(_stat_item) { \ + .name = "txq%d_"#_stat_item, \ + .size = FIELD_SIZEOF(struct hinic_txq_stats, _stat_item), \ + .offset = offsetof(struct hinic_txq_stats, _stat_item) \ +} + +static struct hinic_stats hinic_tx_queue_stats[] = { + HINIC_TXQ_STAT(pkts), + HINIC_TXQ_STAT(bytes), + HINIC_TXQ_STAT(tx_busy), + HINIC_TXQ_STAT(tx_wake), + HINIC_TXQ_STAT(tx_dropped), + HINIC_TXQ_STAT(big_frags_pkts), +}; + +#define HINIC_RXQ_STAT(_stat_item) { \ + .name = "rxq%d_"#_stat_item, \ + .size = FIELD_SIZEOF(struct hinic_rxq_stats, _stat_item), \ + .offset = offsetof(struct hinic_rxq_stats, _stat_item) \ +} + +static struct hinic_stats hinic_rx_queue_stats[] = { + HINIC_RXQ_STAT(pkts), + HINIC_RXQ_STAT(bytes), + HINIC_RXQ_STAT(errors), + HINIC_RXQ_STAT(csum_errors), + HINIC_RXQ_STAT(other_errors), +}; + +static void get_drv_queue_stats(struct hinic_dev *nic_dev, u64 *data) +{ + struct hinic_txq_stats txq_stats; + struct hinic_rxq_stats rxq_stats; + u16 i = 0, j = 0, qid = 0; + char *p; + + for (qid = 0; qid < nic_dev->num_qps; qid++) { + if (!nic_dev->txqs) + break; + + hinic_txq_get_stats(&nic_dev->txqs[qid], &txq_stats); + for (j = 0; j < ARRAY_LEN(hinic_tx_queue_stats); j++, i++) { + p = (char *)&txq_stats + + hinic_tx_queue_stats[j].offset; + data[i] = (hinic_tx_queue_stats[j].size == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + } + + for (qid = 0; qid < nic_dev->num_qps; qid++) { + if (!nic_dev->rxqs) + break; + + hinic_rxq_get_stats(&nic_dev->rxqs[qid], &rxq_stats); + for (j = 0; j < ARRAY_LEN(hinic_rx_queue_stats); j++, i++) { + p = (char *)&rxq_stats + + hinic_rx_queue_stats[j].offset; + data[i] = (hinic_rx_queue_stats[j].size == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + } +} + +static void hinic_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + struct hinic_vport_stats vport_stats = {0}; + struct hinic_phy_port_stats *port_stats; + u16 i = 0, j = 0; + char *p; + int err; + + err = hinic_get_vport_stats(nic_dev, &vport_stats); + if (err) + netif_err(nic_dev, drv, netdev, + "Failed to get vport stats from firmware\n"); + + for (j = 0; j < ARRAY_LEN(hinic_function_stats); j++, i++) { + p = (char *)&vport_stats + hinic_function_stats[j].offset; + data[i] = (hinic_function_stats[j].size == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + + port_stats = kzalloc(sizeof(*port_stats), GFP_KERNEL); + if (!port_stats) { + memset(&data[i], 0, + ARRAY_LEN(hinic_port_stats) * sizeof(*data)); + i += ARRAY_LEN(hinic_port_stats); + goto get_drv_stats; + } + + err = hinic_get_phy_port_stats(nic_dev, port_stats); + if (err) + netif_err(nic_dev, drv, netdev, + "Failed to get port stats from firmware\n"); + + for (j = 0; j < ARRAY_LEN(hinic_port_stats); j++, i++) { + p = (char *)port_stats + hinic_port_stats[j].offset; + data[i] = (hinic_port_stats[j].size == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + + kfree(port_stats); + +get_drv_stats: + get_drv_queue_stats(nic_dev, data + i); +} + +static int hinic_get_sset_count(struct net_device *netdev, int sset) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + int count, q_num; + + switch (sset) { + case ETH_SS_STATS: + q_num = nic_dev->num_qps; + count = ARRAY_LEN(hinic_function_stats) + + (ARRAY_LEN(hinic_tx_queue_stats) + + ARRAY_LEN(hinic_rx_queue_stats)) * q_num; + + count += ARRAY_LEN(hinic_port_stats); + + return count; + default: + return -EOPNOTSUPP; + } +} + +static void hinic_get_strings(struct net_device *netdev, + u32 stringset, u8 *data) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + char *p = (char *)data; + u16 i, j; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_LEN(hinic_function_stats); i++) { + memcpy(p, hinic_function_stats[i].name, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < ARRAY_LEN(hinic_port_stats); i++) { + memcpy(p, hinic_port_stats[i].name, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + + for (i = 0; i < nic_dev->num_qps; i++) { + for (j = 0; j < ARRAY_LEN(hinic_tx_queue_stats); j++) { + sprintf(p, hinic_tx_queue_stats[j].name, i); + p += ETH_GSTRING_LEN; + } + } + + for (i = 0; i < nic_dev->num_qps; i++) { + for (j = 0; j < ARRAY_LEN(hinic_rx_queue_stats); j++) { + sprintf(p, hinic_rx_queue_stats[j].name, i); + p += ETH_GSTRING_LEN; + } + } + + return; + default: + return; + } +} + +static const struct ethtool_ops hinic_ethtool_ops = { + .get_link_ksettings = hinic_get_link_ksettings, + .get_drvinfo = hinic_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ringparam = hinic_get_ringparam, + .get_channels = hinic_get_channels, + .get_rxnfc = hinic_get_rxnfc, + .set_rxnfc = hinic_set_rxnfc, + .get_rxfh_key_size = hinic_get_rxfh_key_size, + .get_rxfh_indir_size = hinic_get_rxfh_indir_size, + .get_rxfh = hinic_get_rxfh, + .set_rxfh = hinic_set_rxfh, + .get_sset_count = hinic_get_sset_count, + .get_ethtool_stats = hinic_get_ethtool_stats, + .get_strings = hinic_get_strings, +}; + +void hinic_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &hinic_ethtool_ops; +} diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 408705687de6..6f2cf569a283 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -89,9 +89,6 @@ static int get_capability(struct hinic_hwdev *hwdev, if (nic_cap->num_qps > HINIC_Q_CTXT_MAX) nic_cap->num_qps = HINIC_Q_CTXT_MAX; - /* num_qps must be power of 2 */ - nic_cap->num_qps = BIT(fls(nic_cap->num_qps) - 1); - nic_cap->max_qps = dev_cap->max_sqs + 1; if (nic_cap->max_qps != (dev_cap->max_rqs + 1)) return -EFAULT; @@ -304,6 +301,8 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; + hw_ioctxt.lro_en = 1; + hw_ioctxt.rq_depth = ilog2(rq_depth); hw_ioctxt.rx_buf_sz_idx = HINIC_RX_BUF_SZ_IDX; @@ -872,6 +871,13 @@ void hinic_free_hwdev(struct hinic_hwdev *hwdev) hinic_free_hwif(hwdev->hwif); } +int hinic_hwdev_max_num_qps(struct hinic_hwdev *hwdev) +{ + struct hinic_cap *nic_cap = &hwdev->nic_cap; + + return nic_cap->max_qps; +} + /** * hinic_hwdev_num_qps - return the number QPs available for use * @hwdev: the NIC HW device diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index a0a5b7434ad7..b069045de416 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -41,21 +41,73 @@ enum hinic_port_cmd { HINIC_PORT_CMD_GET_LINK_STATE = 24, + HINIC_PORT_CMD_SET_LRO = 25, + HINIC_PORT_CMD_SET_RX_CSUM = 26, + HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD = 27, + + HINIC_PORT_CMD_GET_PORT_STATISTICS = 28, + + HINIC_PORT_CMD_CLEAR_PORT_STATISTICS = 29, + + HINIC_PORT_CMD_GET_VPORT_STAT = 30, + + HINIC_PORT_CMD_CLEAN_VPORT_STAT = 31, + + HINIC_PORT_CMD_GET_RSS_TEMPLATE_INDIR_TBL = 37, + HINIC_PORT_CMD_SET_PORT_STATE = 41, + HINIC_PORT_CMD_SET_RSS_TEMPLATE_TBL = 43, + + HINIC_PORT_CMD_GET_RSS_TEMPLATE_TBL = 44, + + HINIC_PORT_CMD_SET_RSS_HASH_ENGINE = 45, + + HINIC_PORT_CMD_GET_RSS_HASH_ENGINE = 46, + + HINIC_PORT_CMD_GET_RSS_CTX_TBL = 47, + + HINIC_PORT_CMD_SET_RSS_CTX_TBL = 48, + + HINIC_PORT_CMD_RSS_TEMP_MGR = 49, + + HINIC_PORT_CMD_RSS_CFG = 66, + HINIC_PORT_CMD_FWCTXT_INIT = 69, + HINIC_PORT_CMD_GET_MGMT_VERSION = 88, + HINIC_PORT_CMD_SET_FUNC_STATE = 93, HINIC_PORT_CMD_GET_GLOBAL_QPN = 102, HINIC_PORT_CMD_SET_TSO = 112, + HINIC_PORT_CMD_SET_RQ_IQ_MAP = 115, + HINIC_PORT_CMD_GET_CAP = 170, + + HINIC_PORT_CMD_SET_LRO_TIMER = 244, }; +enum hinic_ucode_cmd { + HINIC_UCODE_CMD_MODIFY_QUEUE_CONTEXT = 0, + HINIC_UCODE_CMD_CLEAN_QUEUE_CONTEXT, + HINIC_UCODE_CMD_ARM_SQ, + HINIC_UCODE_CMD_ARM_RQ, + HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE, + HINIC_UCODE_CMD_SET_RSS_CONTEXT_TABLE, + HINIC_UCODE_CMD_GET_RSS_INDIR_TABLE, + HINIC_UCODE_CMD_GET_RSS_CONTEXT_TABLE, + HINIC_UCODE_CMD_SET_IQ_ENABLE, + HINIC_UCODE_CMD_SET_RQ_FLUSH = 10 +}; + +#define NIC_RSS_CMD_TEMP_ALLOC 0x01 +#define NIC_RSS_CMD_TEMP_FREE 0x02 + enum hinic_mgmt_msg_cmd { HINIC_MGMT_MSG_CMD_BASE = 160, @@ -97,7 +149,7 @@ struct hinic_cmd_hw_ioctxt { u8 set_cmdq_depth; u8 cmdq_depth; - u8 rsvd2; + u8 lro_en; u8 rsvd3; u8 rsvd4; u8 rsvd5; @@ -215,6 +267,8 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev); void hinic_free_hwdev(struct hinic_hwdev *hwdev); +int hinic_hwdev_max_num_qps(struct hinic_hwdev *hwdev); + int hinic_hwdev_num_qps(struct hinic_hwdev *hwdev); struct hinic_sq *hinic_hwdev_get_sq(struct hinic_hwdev *hwdev, int i); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c index 2d07bdd17432..d66f86fa3f46 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c @@ -36,6 +36,7 @@ enum io_cmd { IO_CMD_MODIFY_QUEUE_CTXT = 0, + IO_CMD_CLEAN_QUEUE_CTXT, }; static void init_db_area_idx(struct hinic_free_db_area *free_db_area) @@ -201,6 +202,59 @@ static int write_qp_ctxts(struct hinic_func_to_io *func_to_io, u16 base_qpn, write_rq_ctxts(func_to_io, base_qpn, num_qps)); } +static int hinic_clean_queue_offload_ctxt(struct hinic_func_to_io *func_to_io, + enum hinic_qp_ctxt_type ctxt_type) +{ + struct hinic_hwif *hwif = func_to_io->hwif; + struct hinic_clean_queue_ctxt *ctxt_block; + struct pci_dev *pdev = hwif->pdev; + struct hinic_cmdq_buf cmdq_buf; + u64 out_param = 0; + int err; + + err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf); + if (err) { + dev_err(&pdev->dev, "Failed to allocate cmdq buf\n"); + return err; + } + + ctxt_block = cmdq_buf.buf; + ctxt_block->cmdq_hdr.num_queues = func_to_io->max_qps; + ctxt_block->cmdq_hdr.queue_type = ctxt_type; + ctxt_block->cmdq_hdr.addr_offset = 0; + + /* TSO/LRO ctxt size: 0x0:0B; 0x1:160B; 0x2:200B; 0x3:240B */ + ctxt_block->ctxt_size = 0x3; + + hinic_cpu_to_be32(ctxt_block, sizeof(*ctxt_block)); + + cmdq_buf.size = sizeof(*ctxt_block); + + err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC, + IO_CMD_CLEAN_QUEUE_CTXT, + &cmdq_buf, &out_param); + + if (err || out_param) { + dev_err(&pdev->dev, "Failed to clean offload ctxts, err: %d, out_param: 0x%llx\n", + err, out_param); + + err = -EFAULT; + } + + hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmdq_buf); + + return err; +} + +static int hinic_clean_qp_offload_ctxt(struct hinic_func_to_io *func_to_io) +{ + /* clean LRO/TSO context space */ + return (hinic_clean_queue_offload_ctxt(func_to_io, + HINIC_QP_CTXT_TYPE_SQ) || + hinic_clean_queue_offload_ctxt(func_to_io, + HINIC_QP_CTXT_TYPE_RQ)); +} + /** * init_qp - Initialize a Queue Pair * @func_to_io: func to io channel that holds the IO components @@ -372,6 +426,12 @@ int hinic_io_create_qps(struct hinic_func_to_io *func_to_io, goto err_write_qp_ctxts; } + err = hinic_clean_qp_offload_ctxt(func_to_io); + if (err) { + dev_err(&pdev->dev, "Failed to clean QP contexts space\n"); + goto err_write_qp_ctxts; + } + return 0; err_write_qp_ctxts: diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h index 1856fdcc1e32..00900a6640ad 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp_ctxt.h @@ -192,6 +192,11 @@ struct hinic_rq_ctxt { u32 wq_block_lo_pfn; }; +struct hinic_clean_queue_ctxt { + struct hinic_qp_ctxt_header cmdq_hdr; + u32 ctxt_size; +}; + struct hinic_sq_ctxt_block { struct hinic_qp_ctxt_header hdr; struct hinic_sq_ctxt sq_ctxt[HINIC_Q_CTXT_MAX]; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h index 8991c9a5ef04..f4b6d2c1061f 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h @@ -210,6 +210,57 @@ #define HINIC_MSS_DEFAULT 0x3E00 #define HINIC_MSS_MIN 0x50 +#define RQ_CQE_STATUS_NUM_LRO_SHIFT 16 +#define RQ_CQE_STATUS_NUM_LRO_MASK 0xFFU + +#define RQ_CQE_STATUS_GET(val, member) (((val) >> \ + RQ_CQE_STATUS_##member##_SHIFT) & \ + RQ_CQE_STATUS_##member##_MASK) + +#define HINIC_GET_RX_NUM_LRO(status) \ + RQ_CQE_STATUS_GET(status, NUM_LRO) + +#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_SHIFT 0 +#define RQ_CQE_OFFOLAD_TYPE_PKT_TYPE_MASK 0xFFFU +#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_SHIFT 21 +#define RQ_CQE_OFFOLAD_TYPE_VLAN_EN_MASK 0x1U + +#define RQ_CQE_OFFOLAD_TYPE_GET(val, member) (((val) >> \ + RQ_CQE_OFFOLAD_TYPE_##member##_SHIFT) & \ + RQ_CQE_OFFOLAD_TYPE_##member##_MASK) + +#define HINIC_GET_RX_PKT_TYPE(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, PKT_TYPE) + +#define HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type) \ + RQ_CQE_OFFOLAD_TYPE_GET(offload_type, VLAN_EN) + +#define RQ_CQE_SGE_VLAN_MASK 0xFFFFU +#define RQ_CQE_SGE_VLAN_SHIFT 0 + +#define RQ_CQE_SGE_GET(val, member) (((val) >> \ + RQ_CQE_SGE_##member##_SHIFT) & \ + RQ_CQE_SGE_##member##_MASK) + +#define HINIC_GET_RX_VLAN_TAG(vlan_len) \ + RQ_CQE_SGE_GET(vlan_len, VLAN) + +#define HINIC_RSS_TYPE_VALID_SHIFT 23 +#define HINIC_RSS_TYPE_TCP_IPV6_EXT_SHIFT 24 +#define HINIC_RSS_TYPE_IPV6_EXT_SHIFT 25 +#define HINIC_RSS_TYPE_TCP_IPV6_SHIFT 26 +#define HINIC_RSS_TYPE_IPV6_SHIFT 27 +#define HINIC_RSS_TYPE_TCP_IPV4_SHIFT 28 +#define HINIC_RSS_TYPE_IPV4_SHIFT 29 +#define HINIC_RSS_TYPE_UDP_IPV6_SHIFT 30 +#define HINIC_RSS_TYPE_UDP_IPV4_SHIFT 31 + +#define HINIC_RSS_TYPE_SET(val, member) \ + (((u32)(val) & 0x1) << HINIC_RSS_TYPE_##member##_SHIFT) + +#define HINIC_RSS_TYPE_GET(val, member) \ + (((u32)(val) >> HINIC_RSS_TYPE_##member##_SHIFT) & 0x1) + enum hinic_l4offload_type { HINIC_L4_OFF_DISABLE = 0, HINIC_TCP_OFFLOAD_ENABLE = 1, @@ -363,7 +414,7 @@ struct hinic_rq_cqe { u32 status; u32 len; - u32 rsvd2; + u32 offload_type; u32 rsvd3; u32 rsvd4; u32 rsvd5; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index b695d29d364c..2411ad270c98 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -53,6 +53,10 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)"); NETIF_MSG_IFUP | \ NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) +#define HINIC_LRO_MAX_WQE_NUM_DEFAULT 8 + +#define HINIC_LRO_RX_TIMER_DEFAULT 16 + #define VLAN_BITMAP_SIZE(nic_dev) (ALIGN(VLAN_N_VID, 8) / 8) #define work_to_rx_mode_work(work) \ @@ -63,137 +67,9 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)"); static int change_mac_addr(struct net_device *netdev, const u8 *addr); -static void set_link_speed(struct ethtool_link_ksettings *link_ksettings, - enum hinic_speed speed) -{ - switch (speed) { - case HINIC_SPEED_10MB_LINK: - link_ksettings->base.speed = SPEED_10; - break; - - case HINIC_SPEED_100MB_LINK: - link_ksettings->base.speed = SPEED_100; - break; - - case HINIC_SPEED_1000MB_LINK: - link_ksettings->base.speed = SPEED_1000; - break; - - case HINIC_SPEED_10GB_LINK: - link_ksettings->base.speed = SPEED_10000; - break; - - case HINIC_SPEED_25GB_LINK: - link_ksettings->base.speed = SPEED_25000; - break; - - case HINIC_SPEED_40GB_LINK: - link_ksettings->base.speed = SPEED_40000; - break; - - case HINIC_SPEED_100GB_LINK: - link_ksettings->base.speed = SPEED_100000; - break; - - default: - link_ksettings->base.speed = SPEED_UNKNOWN; - break; - } -} - -static int hinic_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings - *link_ksettings) -{ - struct hinic_dev *nic_dev = netdev_priv(netdev); - enum hinic_port_link_state link_state; - struct hinic_port_cap port_cap; - int err; - - ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - ethtool_link_ksettings_add_link_mode(link_ksettings, supported, - Autoneg); - - link_ksettings->base.speed = SPEED_UNKNOWN; - link_ksettings->base.autoneg = AUTONEG_DISABLE; - link_ksettings->base.duplex = DUPLEX_UNKNOWN; - - err = hinic_port_get_cap(nic_dev, &port_cap); - if (err) { - netif_err(nic_dev, drv, netdev, - "Failed to get port capabilities\n"); - return err; - } - - err = hinic_port_link_state(nic_dev, &link_state); - if (err) { - netif_err(nic_dev, drv, netdev, - "Failed to get port link state\n"); - return err; - } - - if (link_state != HINIC_LINK_STATE_UP) { - netif_info(nic_dev, drv, netdev, "No link\n"); - return err; - } - - set_link_speed(link_ksettings, port_cap.speed); - - if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED)) - ethtool_link_ksettings_add_link_mode(link_ksettings, - advertising, Autoneg); - - if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE) - link_ksettings->base.autoneg = AUTONEG_ENABLE; - - link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ? - DUPLEX_FULL : DUPLEX_HALF; - return 0; -} - -static void hinic_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *info) -{ - struct hinic_dev *nic_dev = netdev_priv(netdev); - struct hinic_hwdev *hwdev = nic_dev->hwdev; - struct hinic_hwif *hwif = hwdev->hwif; - - strlcpy(info->driver, HINIC_DRV_NAME, sizeof(info->driver)); - strlcpy(info->bus_info, pci_name(hwif->pdev), sizeof(info->bus_info)); -} - -static void hinic_get_ringparam(struct net_device *netdev, - struct ethtool_ringparam *ring) -{ - ring->rx_max_pending = HINIC_RQ_DEPTH; - ring->tx_max_pending = HINIC_SQ_DEPTH; - ring->rx_pending = HINIC_RQ_DEPTH; - ring->tx_pending = HINIC_SQ_DEPTH; -} - -static void hinic_get_channels(struct net_device *netdev, - struct ethtool_channels *channels) -{ - struct hinic_dev *nic_dev = netdev_priv(netdev); - struct hinic_hwdev *hwdev = nic_dev->hwdev; - - channels->max_rx = hwdev->nic_cap.max_qps; - channels->max_tx = hwdev->nic_cap.max_qps; - channels->max_other = 0; - channels->max_combined = 0; - channels->rx_count = hinic_hwdev_num_qps(hwdev); - channels->tx_count = hinic_hwdev_num_qps(hwdev); - channels->other_count = 0; - channels->combined_count = 0; -} - -static const struct ethtool_ops hinic_ethtool_ops = { - .get_link_ksettings = hinic_get_link_ksettings, - .get_drvinfo = hinic_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_ringparam = hinic_get_ringparam, - .get_channels = hinic_get_channels, -}; +static int set_features(struct hinic_dev *nic_dev, + netdev_features_t pre_features, + netdev_features_t features, bool force_change); static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq) { @@ -207,6 +83,9 @@ static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq) u64_stats_update_begin(&nic_rx_stats->syncp); nic_rx_stats->bytes += rx_stats.bytes; nic_rx_stats->pkts += rx_stats.pkts; + nic_rx_stats->errors += rx_stats.errors; + nic_rx_stats->csum_errors += rx_stats.csum_errors; + nic_rx_stats->other_errors += rx_stats.other_errors; u64_stats_update_end(&nic_rx_stats->syncp); hinic_rxq_clean_stats(rxq); @@ -227,6 +106,7 @@ static void update_tx_stats(struct hinic_dev *nic_dev, struct hinic_txq *txq) nic_tx_stats->tx_busy += tx_stats.tx_busy; nic_tx_stats->tx_wake += tx_stats.tx_wake; nic_tx_stats->tx_dropped += tx_stats.tx_dropped; + nic_tx_stats->big_frags_pkts += tx_stats.big_frags_pkts; u64_stats_update_end(&nic_tx_stats->syncp); hinic_txq_clean_stats(txq); @@ -363,11 +243,135 @@ static void free_rxqs(struct hinic_dev *nic_dev) nic_dev->rxqs = NULL; } +static int hinic_configure_max_qnum(struct hinic_dev *nic_dev) +{ + int err; + + err = hinic_set_max_qnum(nic_dev, nic_dev->hwdev->nic_cap.max_qps); + if (err) + return err; + + return 0; +} + +static int hinic_rss_init(struct hinic_dev *nic_dev) +{ + u8 default_rss_key[HINIC_RSS_KEY_SIZE]; + u8 tmpl_idx = nic_dev->rss_tmpl_idx; + u32 *indir_tbl; + int err, i; + + indir_tbl = kcalloc(HINIC_RSS_INDIR_SIZE, sizeof(u32), GFP_KERNEL); + if (!indir_tbl) + return -ENOMEM; + + netdev_rss_key_fill(default_rss_key, sizeof(default_rss_key)); + for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++) + indir_tbl[i] = ethtool_rxfh_indir_default(i, nic_dev->num_rss); + + err = hinic_rss_set_template_tbl(nic_dev, tmpl_idx, default_rss_key); + if (err) + goto out; + + err = hinic_rss_set_indir_tbl(nic_dev, tmpl_idx, indir_tbl); + if (err) + goto out; + + err = hinic_set_rss_type(nic_dev, tmpl_idx, nic_dev->rss_type); + if (err) + goto out; + + err = hinic_rss_set_hash_engine(nic_dev, tmpl_idx, + nic_dev->rss_hash_engine); + if (err) + goto out; + + err = hinic_rss_cfg(nic_dev, 1, tmpl_idx); + if (err) + goto out; + +out: + kfree(indir_tbl); + return err; +} + +static void hinic_rss_deinit(struct hinic_dev *nic_dev) +{ + hinic_rss_cfg(nic_dev, 0, nic_dev->rss_tmpl_idx); +} + +static void hinic_init_rss_parameters(struct hinic_dev *nic_dev) +{ + nic_dev->rss_hash_engine = HINIC_RSS_HASH_ENGINE_TYPE_XOR; + nic_dev->rss_type.tcp_ipv6_ext = 1; + nic_dev->rss_type.ipv6_ext = 1; + nic_dev->rss_type.tcp_ipv6 = 1; + nic_dev->rss_type.ipv6 = 1; + nic_dev->rss_type.tcp_ipv4 = 1; + nic_dev->rss_type.ipv4 = 1; + nic_dev->rss_type.udp_ipv6 = 1; + nic_dev->rss_type.udp_ipv4 = 1; +} + +static void hinic_enable_rss(struct hinic_dev *nic_dev) +{ + struct net_device *netdev = nic_dev->netdev; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + int i, node, err = 0; + u16 num_cpus = 0; + + nic_dev->max_qps = hinic_hwdev_max_num_qps(hwdev); + if (nic_dev->max_qps <= 1) { + nic_dev->flags &= ~HINIC_RSS_ENABLE; + nic_dev->rss_limit = nic_dev->max_qps; + nic_dev->num_qps = nic_dev->max_qps; + nic_dev->num_rss = nic_dev->max_qps; + + return; + } + + err = hinic_rss_template_alloc(nic_dev, &nic_dev->rss_tmpl_idx); + if (err) { + netif_err(nic_dev, drv, netdev, + "Failed to alloc tmpl_idx for rss, can't enable rss for this function\n"); + nic_dev->flags &= ~HINIC_RSS_ENABLE; + nic_dev->max_qps = 1; + nic_dev->rss_limit = nic_dev->max_qps; + nic_dev->num_qps = nic_dev->max_qps; + nic_dev->num_rss = nic_dev->max_qps; + + return; + } + + nic_dev->flags |= HINIC_RSS_ENABLE; + + for (i = 0; i < num_online_cpus(); i++) { + node = cpu_to_node(i); + if (node == dev_to_node(&pdev->dev)) + num_cpus++; + } + + if (!num_cpus) + num_cpus = num_online_cpus(); + + nic_dev->num_qps = min_t(u16, nic_dev->max_qps, num_cpus); + + nic_dev->rss_limit = nic_dev->num_qps; + nic_dev->num_rss = nic_dev->num_qps; + + hinic_init_rss_parameters(nic_dev); + err = hinic_rss_init(nic_dev); + if (err) + netif_err(nic_dev, drv, netdev, "Failed to init rss\n"); +} + static int hinic_open(struct net_device *netdev) { struct hinic_dev *nic_dev = netdev_priv(netdev); enum hinic_port_link_state link_state; - int err, ret, num_qps; + int err, ret; if (!(nic_dev->flags & HINIC_INTF_UP)) { err = hinic_hwdev_ifup(nic_dev->hwdev); @@ -392,9 +396,17 @@ static int hinic_open(struct net_device *netdev) goto err_create_rxqs; } - num_qps = hinic_hwdev_num_qps(nic_dev->hwdev); - netif_set_real_num_tx_queues(netdev, num_qps); - netif_set_real_num_rx_queues(netdev, num_qps); + hinic_enable_rss(nic_dev); + + err = hinic_configure_max_qnum(nic_dev); + if (err) { + netif_err(nic_dev, drv, nic_dev->netdev, + "Failed to configure the maximum number of queues\n"); + goto err_port_state; + } + + netif_set_real_num_tx_queues(netdev, nic_dev->num_qps); + netif_set_real_num_rx_queues(netdev, nic_dev->num_qps); err = hinic_port_set_state(nic_dev, HINIC_PORT_ENABLE); if (err) { @@ -450,9 +462,12 @@ err_func_port_state: if (ret) netif_warn(nic_dev, drv, netdev, "Failed to revert port state\n"); - err_port_state: free_rxqs(nic_dev); + if (nic_dev->flags & HINIC_RSS_ENABLE) { + hinic_rss_deinit(nic_dev); + hinic_rss_template_free(nic_dev, nic_dev->rss_tmpl_idx); + } err_create_rxqs: free_txqs(nic_dev); @@ -496,6 +511,11 @@ static int hinic_close(struct net_device *netdev) return err; } + if (nic_dev->flags & HINIC_RSS_ENABLE) { + hinic_rss_deinit(nic_dev); + hinic_rss_template_free(nic_dev, nic_dev->rss_tmpl_idx); + } + free_rxqs(nic_dev); free_txqs(nic_dev); @@ -715,7 +735,6 @@ static void set_rx_mode(struct work_struct *work) { struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work); struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work); - struct netdev_hw_addr *ha; netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n"); @@ -723,9 +742,6 @@ static void set_rx_mode(struct work_struct *work) __dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr); __dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr); - - netdev_for_each_mc_addr(ha, nic_dev->netdev) - add_mac_addr(nic_dev->netdev, ha->addr); } static void hinic_set_rx_mode(struct net_device *netdev) @@ -776,12 +792,36 @@ static void hinic_get_stats64(struct net_device *netdev, stats->rx_bytes = nic_rx_stats->bytes; stats->rx_packets = nic_rx_stats->pkts; + stats->rx_errors = nic_rx_stats->errors; stats->tx_bytes = nic_tx_stats->bytes; stats->tx_packets = nic_tx_stats->pkts; stats->tx_errors = nic_tx_stats->tx_dropped; } +static int hinic_set_features(struct net_device *netdev, + netdev_features_t features) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + + return set_features(nic_dev, nic_dev->netdev->features, + features, false); +} + +static netdev_features_t hinic_fix_features(struct net_device *netdev, + netdev_features_t features) +{ + struct hinic_dev *nic_dev = netdev_priv(netdev); + + /* If Rx checksum is disabled, then LRO should also be disabled */ + if (!(features & NETIF_F_RXCSUM)) { + netif_info(nic_dev, drv, netdev, "disabling LRO as RXCSUM is off\n"); + features &= ~NETIF_F_LRO; + } + + return features; +} + static const struct net_device_ops hinic_netdev_ops = { .ndo_open = hinic_open, .ndo_stop = hinic_close, @@ -794,13 +834,16 @@ static const struct net_device_ops hinic_netdev_ops = { .ndo_start_xmit = hinic_xmit_frame, .ndo_tx_timeout = hinic_tx_timeout, .ndo_get_stats64 = hinic_get_stats64, + .ndo_fix_features = hinic_fix_features, + .ndo_set_features = hinic_set_features, }; static void netdev_features_init(struct net_device *netdev) { netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_RXCSUM; + NETIF_F_RXCSUM | NETIF_F_LRO | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; netdev->vlan_features = netdev->hw_features; @@ -873,6 +916,18 @@ static int set_features(struct hinic_dev *nic_dev, if (changed & NETIF_F_RXCSUM) err = hinic_set_rx_csum_offload(nic_dev, csum_en); + if (changed & NETIF_F_LRO) { + err = hinic_set_rx_lro_state(nic_dev, + !!(features & NETIF_F_LRO), + HINIC_LRO_RX_TIMER_DEFAULT, + HINIC_LRO_MAX_WQE_NUM_DEFAULT); + } + + if (changed & NETIF_F_HW_VLAN_CTAG_RX) + err = hinic_set_rx_vlan_offload(nic_dev, + !!(features & + NETIF_F_HW_VLAN_CTAG_RX)); + return err; } @@ -912,8 +967,8 @@ static int nic_dev_init(struct pci_dev *pdev) goto err_alloc_etherdev; } + hinic_set_ethtool_ops(netdev); netdev->netdev_ops = &hinic_netdev_ops; - netdev->ethtool_ops = &hinic_ethtool_ops; netdev->max_mtu = ETH_MAX_MTU; nic_dev = netdev_priv(netdev); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index 4b3b7d39e437..1e389a004e50 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -430,3 +430,641 @@ int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en) return 0; } + +int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_vlan_cfg vlan_cfg; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u16 out_size; + int err; + + if (!hwdev) + return -EINVAL; + + hwif = hwdev->hwif; + pdev = hwif->pdev; + vlan_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif); + vlan_cfg.vlan_rx_offload = en; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_VLAN_OFFLOAD, + &vlan_cfg, sizeof(vlan_cfg), + &vlan_cfg, &out_size); + if (err || !out_size || vlan_cfg.status) { + dev_err(&pdev->dev, + "Failed to set rx vlan offload, err: %d, status: 0x%x, out size: 0x%x\n", + err, vlan_cfg.status, out_size); + return -EINVAL; + } + + return 0; +} + +int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + struct hinic_rq_num rq_num = { 0 }; + u16 out_size = sizeof(rq_num); + int err; + + rq_num.func_id = HINIC_HWIF_FUNC_IDX(hwif); + rq_num.num_rqs = num_rqs; + rq_num.rq_depth = ilog2(HINIC_SQ_DEPTH); + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RQ_IQ_MAP, + &rq_num, sizeof(rq_num), + &rq_num, &out_size); + if (err || !out_size || rq_num.status) { + dev_err(&pdev->dev, + "Failed to rxq number, ret = %d\n", + rq_num.status); + return -EINVAL; + } + + return 0; +} + +static int hinic_set_rx_lro(struct hinic_dev *nic_dev, u8 ipv4_en, u8 ipv6_en, + u8 max_wqe_num) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct hinic_lro_config lro_cfg = { 0 }; + struct pci_dev *pdev = hwif->pdev; + u16 out_size = sizeof(lro_cfg); + int err; + + lro_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif); + lro_cfg.lro_ipv4_en = ipv4_en; + lro_cfg.lro_ipv6_en = ipv6_en; + lro_cfg.lro_max_wqe_num = max_wqe_num; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_LRO, + &lro_cfg, sizeof(lro_cfg), + &lro_cfg, &out_size); + if (err || !out_size || lro_cfg.status) { + dev_err(&pdev->dev, + "Failed to set lro offload, ret = %d\n", + lro_cfg.status); + return -EINVAL; + } + + return 0; +} + +static int hinic_set_rx_lro_timer(struct hinic_dev *nic_dev, u32 timer_value) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_lro_timer lro_timer = { 0 }; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + u16 out_size = sizeof(lro_timer); + int err; + + lro_timer.status = 0; + lro_timer.type = 0; + lro_timer.enable = 1; + lro_timer.timer = timer_value; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_LRO_TIMER, + &lro_timer, sizeof(lro_timer), + &lro_timer, &out_size); + if (lro_timer.status == 0xFF) { + /* For this case, we think status (0xFF) is OK */ + lro_timer.status = 0; + dev_dbg(&pdev->dev, + "Set lro timer not supported by the current FW version, it will be 1ms default\n"); + } + + if (err || !out_size || lro_timer.status) { + dev_err(&pdev->dev, + "Failed to set lro timer, ret = %d\n", + lro_timer.status); + + return -EINVAL; + } + + return 0; +} + +int hinic_set_rx_lro_state(struct hinic_dev *nic_dev, u8 lro_en, + u32 lro_timer, u32 wqe_num) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + u8 ipv4_en; + u8 ipv6_en; + int err; + + if (!hwdev) + return -EINVAL; + + ipv4_en = lro_en ? 1 : 0; + ipv6_en = lro_en ? 1 : 0; + + err = hinic_set_rx_lro(nic_dev, ipv4_en, ipv6_en, (u8)wqe_num); + if (err) + return err; + + err = hinic_set_rx_lro_timer(nic_dev, lro_timer); + if (err) + return err; + + return 0; +} + +int hinic_rss_set_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx, + const u32 *indir_table) +{ + struct hinic_rss_indirect_tbl *indir_tbl; + struct hinic_func_to_io *func_to_io; + struct hinic_cmdq_buf cmd_buf; + struct hinic_hwdev *hwdev; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u32 indir_size; + u64 out_param; + int err, i; + u32 *temp; + + hwdev = nic_dev->hwdev; + func_to_io = &hwdev->func_to_io; + hwif = hwdev->hwif; + pdev = hwif->pdev; + + err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmd_buf); + if (err) { + dev_err(&pdev->dev, "Failed to allocate cmdq buf\n"); + return err; + } + + cmd_buf.size = sizeof(*indir_tbl); + + indir_tbl = cmd_buf.buf; + indir_tbl->group_index = cpu_to_be32(tmpl_idx); + + for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++) { + indir_tbl->entry[i] = indir_table[i]; + + if (0x3 == (i & 0x3)) { + temp = (u32 *)&indir_tbl->entry[i - 3]; + *temp = cpu_to_be32(*temp); + } + } + + /* cfg the rss indirect table by command queue */ + indir_size = HINIC_RSS_INDIR_SIZE / 2; + indir_tbl->offset = 0; + indir_tbl->size = cpu_to_be32(indir_size); + + err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC, + HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE, + &cmd_buf, &out_param); + if (err || out_param != 0) { + dev_err(&pdev->dev, "Failed to set rss indir table\n"); + err = -EFAULT; + goto free_buf; + } + + indir_tbl->offset = cpu_to_be32(indir_size); + indir_tbl->size = cpu_to_be32(indir_size); + memcpy(&indir_tbl->entry[0], &indir_tbl->entry[indir_size], indir_size); + + err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC, + HINIC_UCODE_CMD_SET_RSS_INDIR_TABLE, + &cmd_buf, &out_param); + if (err || out_param != 0) { + dev_err(&pdev->dev, "Failed to set rss indir table\n"); + err = -EFAULT; + } + +free_buf: + hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmd_buf); + + return err; +} + +int hinic_rss_get_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx, + u32 *indir_table) +{ + struct hinic_rss_indir_table rss_cfg = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + u16 out_size = sizeof(rss_cfg); + int err = 0, i; + + rss_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif); + rss_cfg.template_id = tmpl_idx; + + err = hinic_port_msg_cmd(hwdev, + HINIC_PORT_CMD_GET_RSS_TEMPLATE_INDIR_TBL, + &rss_cfg, sizeof(rss_cfg), &rss_cfg, + &out_size); + if (err || !out_size || rss_cfg.status) { + dev_err(&pdev->dev, "Failed to get indir table, err: %d, status: 0x%x, out size: 0x%x\n", + err, rss_cfg.status, out_size); + return -EINVAL; + } + + hinic_be32_to_cpu(rss_cfg.indir, HINIC_RSS_INDIR_SIZE); + for (i = 0; i < HINIC_RSS_INDIR_SIZE; i++) + indir_table[i] = rss_cfg.indir[i]; + + return 0; +} + +int hinic_set_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx, + struct hinic_rss_type rss_type) +{ + struct hinic_rss_context_tbl *ctx_tbl; + struct hinic_func_to_io *func_to_io; + struct hinic_cmdq_buf cmd_buf; + struct hinic_hwdev *hwdev; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u64 out_param; + u32 ctx = 0; + int err; + + hwdev = nic_dev->hwdev; + func_to_io = &hwdev->func_to_io; + hwif = hwdev->hwif; + pdev = hwif->pdev; + + err = hinic_alloc_cmdq_buf(&func_to_io->cmdqs, &cmd_buf); + if (err) { + dev_err(&pdev->dev, "Failed to allocate cmd buf\n"); + return -ENOMEM; + } + + ctx |= HINIC_RSS_TYPE_SET(1, VALID) | + HINIC_RSS_TYPE_SET(rss_type.ipv4, IPV4) | + HINIC_RSS_TYPE_SET(rss_type.ipv6, IPV6) | + HINIC_RSS_TYPE_SET(rss_type.ipv6_ext, IPV6_EXT) | + HINIC_RSS_TYPE_SET(rss_type.tcp_ipv4, TCP_IPV4) | + HINIC_RSS_TYPE_SET(rss_type.tcp_ipv6, TCP_IPV6) | + HINIC_RSS_TYPE_SET(rss_type.tcp_ipv6_ext, TCP_IPV6_EXT) | + HINIC_RSS_TYPE_SET(rss_type.udp_ipv4, UDP_IPV4) | + HINIC_RSS_TYPE_SET(rss_type.udp_ipv6, UDP_IPV6); + + cmd_buf.size = sizeof(struct hinic_rss_context_tbl); + + ctx_tbl = (struct hinic_rss_context_tbl *)cmd_buf.buf; + ctx_tbl->group_index = cpu_to_be32(tmpl_idx); + ctx_tbl->offset = 0; + ctx_tbl->size = sizeof(u32); + ctx_tbl->size = cpu_to_be32(ctx_tbl->size); + ctx_tbl->rsvd = 0; + ctx_tbl->ctx = cpu_to_be32(ctx); + + /* cfg the rss context table by command queue */ + err = hinic_cmdq_direct_resp(&func_to_io->cmdqs, HINIC_MOD_L2NIC, + HINIC_UCODE_CMD_SET_RSS_CONTEXT_TABLE, + &cmd_buf, &out_param); + + hinic_free_cmdq_buf(&func_to_io->cmdqs, &cmd_buf); + + if (err || out_param != 0) { + dev_err(&pdev->dev, "Failed to set rss context table, err: %d\n", + err); + return -EFAULT; + } + + return 0; +} + +int hinic_get_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx, + struct hinic_rss_type *rss_type) +{ + struct hinic_rss_context_table ctx_tbl = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u16 out_size = sizeof(ctx_tbl); + int err; + + if (!hwdev || !rss_type) + return -EINVAL; + + hwif = hwdev->hwif; + pdev = hwif->pdev; + + ctx_tbl.func_id = HINIC_HWIF_FUNC_IDX(hwif); + ctx_tbl.template_id = tmpl_idx; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_CTX_TBL, + &ctx_tbl, sizeof(ctx_tbl), + &ctx_tbl, &out_size); + if (err || !out_size || ctx_tbl.status) { + dev_err(&pdev->dev, "Failed to get hash type, err: %d, status: 0x%x, out size: 0x%x\n", + err, ctx_tbl.status, out_size); + return -EINVAL; + } + + rss_type->ipv4 = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV4); + rss_type->ipv6 = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV6); + rss_type->ipv6_ext = HINIC_RSS_TYPE_GET(ctx_tbl.context, IPV6_EXT); + rss_type->tcp_ipv4 = HINIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV4); + rss_type->tcp_ipv6 = HINIC_RSS_TYPE_GET(ctx_tbl.context, TCP_IPV6); + rss_type->tcp_ipv6_ext = HINIC_RSS_TYPE_GET(ctx_tbl.context, + TCP_IPV6_EXT); + rss_type->udp_ipv4 = HINIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV4); + rss_type->udp_ipv6 = HINIC_RSS_TYPE_GET(ctx_tbl.context, UDP_IPV6); + + return 0; +} + +int hinic_rss_set_template_tbl(struct hinic_dev *nic_dev, u32 template_id, + const u8 *temp) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct hinic_rss_key rss_key = { 0 }; + struct pci_dev *pdev = hwif->pdev; + u16 out_size; + int err; + + rss_key.func_id = HINIC_HWIF_FUNC_IDX(hwif); + rss_key.template_id = template_id; + memcpy(rss_key.key, temp, HINIC_RSS_KEY_SIZE); + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RSS_TEMPLATE_TBL, + &rss_key, sizeof(rss_key), + &rss_key, &out_size); + if (err || !out_size || rss_key.status) { + dev_err(&pdev->dev, + "Failed to set rss hash key, err: %d, status: 0x%x, out size: 0x%x\n", + err, rss_key.status, out_size); + return -EINVAL; + } + + return 0; +} + +int hinic_rss_get_template_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx, + u8 *temp) +{ + struct hinic_rss_template_key temp_key = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u16 out_size = sizeof(temp_key); + int err; + + if (!hwdev || !temp) + return -EINVAL; + + hwif = hwdev->hwif; + pdev = hwif->pdev; + + temp_key.func_id = HINIC_HWIF_FUNC_IDX(hwif); + temp_key.template_id = tmpl_idx; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_TEMPLATE_TBL, + &temp_key, sizeof(temp_key), + &temp_key, &out_size); + if (err || !out_size || temp_key.status) { + dev_err(&pdev->dev, "Failed to set hash key, err: %d, status: 0x%x, out size: 0x%x\n", + err, temp_key.status, out_size); + return -EINVAL; + } + + memcpy(temp, temp_key.key, HINIC_RSS_KEY_SIZE); + + return 0; +} + +int hinic_rss_set_hash_engine(struct hinic_dev *nic_dev, u8 template_id, + u8 type) +{ + struct hinic_rss_engine_type rss_engine = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + u16 out_size; + int err; + + rss_engine.func_id = HINIC_HWIF_FUNC_IDX(hwif); + rss_engine.hash_engine = type; + rss_engine.template_id = template_id; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RSS_HASH_ENGINE, + &rss_engine, sizeof(rss_engine), + &rss_engine, &out_size); + if (err || !out_size || rss_engine.status) { + dev_err(&pdev->dev, + "Failed to set hash engine, err: %d, status: 0x%x, out size: 0x%x\n", + err, rss_engine.status, out_size); + return -EINVAL; + } + + return 0; +} + +int hinic_rss_get_hash_engine(struct hinic_dev *nic_dev, u8 tmpl_idx, u8 *type) +{ + struct hinic_rss_engine_type hash_type = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u16 out_size = sizeof(hash_type); + int err; + + if (!hwdev || !type) + return -EINVAL; + + hwif = hwdev->hwif; + pdev = hwif->pdev; + + hash_type.func_id = HINIC_HWIF_FUNC_IDX(hwif); + hash_type.template_id = tmpl_idx; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_RSS_HASH_ENGINE, + &hash_type, sizeof(hash_type), + &hash_type, &out_size); + if (err || !out_size || hash_type.status) { + dev_err(&pdev->dev, "Failed to get hash engine, err: %d, status: 0x%x, out size: 0x%x\n", + err, hash_type.status, out_size); + return -EINVAL; + } + + *type = hash_type.hash_engine; + return 0; +} + +int hinic_rss_cfg(struct hinic_dev *nic_dev, u8 rss_en, u8 template_id) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_rss_config rss_cfg = { 0 }; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + u16 out_size; + int err; + + rss_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif); + rss_cfg.rss_en = rss_en; + rss_cfg.template_id = template_id; + rss_cfg.rq_priority_number = 0; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_CFG, + &rss_cfg, sizeof(rss_cfg), + &rss_cfg, &out_size); + if (err || !out_size || rss_cfg.status) { + dev_err(&pdev->dev, + "Failed to set rss cfg, err: %d, status: 0x%x, out size: 0x%x\n", + err, rss_cfg.status, out_size); + return -EINVAL; + } + + return 0; +} + +int hinic_rss_template_alloc(struct hinic_dev *nic_dev, u8 *tmpl_idx) +{ + struct hinic_rss_template_mgmt template_mgmt = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + u16 out_size; + int err; + + template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif); + template_mgmt.cmd = NIC_RSS_CMD_TEMP_ALLOC; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_TEMP_MGR, + &template_mgmt, sizeof(template_mgmt), + &template_mgmt, &out_size); + if (err || !out_size || template_mgmt.status) { + dev_err(&pdev->dev, "Failed to alloc rss template, err: %d, status: 0x%x, out size: 0x%x\n", + err, template_mgmt.status, out_size); + return -EINVAL; + } + + *tmpl_idx = template_mgmt.template_id; + + return 0; +} + +int hinic_rss_template_free(struct hinic_dev *nic_dev, u8 tmpl_idx) +{ + struct hinic_rss_template_mgmt template_mgmt = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct pci_dev *pdev = hwif->pdev; + u16 out_size; + int err; + + template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif); + template_mgmt.template_id = tmpl_idx; + template_mgmt.cmd = NIC_RSS_CMD_TEMP_FREE; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_RSS_TEMP_MGR, + &template_mgmt, sizeof(template_mgmt), + &template_mgmt, &out_size); + if (err || !out_size || template_mgmt.status) { + dev_err(&pdev->dev, "Failed to free rss template, err: %d, status: 0x%x, out size: 0x%x\n", + err, template_mgmt.status, out_size); + return -EINVAL; + } + + return 0; +} + +int hinic_get_vport_stats(struct hinic_dev *nic_dev, + struct hinic_vport_stats *stats) +{ + struct hinic_cmd_vport_stats vport_stats = { 0 }; + struct hinic_port_stats_info stats_info = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + u16 out_size = sizeof(vport_stats); + struct pci_dev *pdev = hwif->pdev; + int err; + + stats_info.stats_version = HINIC_PORT_STATS_VERSION; + stats_info.func_id = HINIC_HWIF_FUNC_IDX(hwif); + stats_info.stats_size = sizeof(vport_stats); + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_VPORT_STAT, + &stats_info, sizeof(stats_info), + &vport_stats, &out_size); + if (err || !out_size || vport_stats.status) { + dev_err(&pdev->dev, + "Failed to get function statistics, err: %d, status: 0x%x, out size: 0x%x\n", + err, vport_stats.status, out_size); + return -EFAULT; + } + + memcpy(stats, &vport_stats.stats, sizeof(*stats)); + return 0; +} + +int hinic_get_phy_port_stats(struct hinic_dev *nic_dev, + struct hinic_phy_port_stats *stats) +{ + struct hinic_port_stats_info stats_info = { 0 }; + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_hwif *hwif = hwdev->hwif; + struct hinic_port_stats *port_stats; + u16 out_size = sizeof(*port_stats); + struct pci_dev *pdev = hwif->pdev; + int err; + + port_stats = kzalloc(sizeof(*port_stats), GFP_KERNEL); + if (!port_stats) + return -ENOMEM; + + stats_info.stats_version = HINIC_PORT_STATS_VERSION; + stats_info.stats_size = sizeof(*port_stats); + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_PORT_STATISTICS, + &stats_info, sizeof(stats_info), + port_stats, &out_size); + if (err || !out_size || port_stats->status) { + dev_err(&pdev->dev, + "Failed to get port statistics, err: %d, status: 0x%x, out size: 0x%x\n", + err, port_stats->status, out_size); + err = -EINVAL; + goto out; + } + + memcpy(stats, &port_stats->stats, sizeof(*stats)); + +out: + kfree(port_stats); + + return err; +} + +int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver) +{ + struct hinic_hwdev *hwdev = nic_dev->hwdev; + struct hinic_version_info up_ver = {0}; + struct hinic_hwif *hwif; + struct pci_dev *pdev; + u16 out_size; + int err; + + if (!hwdev) + return -EINVAL; + + hwif = hwdev->hwif; + pdev = hwif->pdev; + + err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_MGMT_VERSION, + &up_ver, sizeof(up_ver), &up_ver, + &out_size); + if (err || !out_size || up_ver.status) { + dev_err(&pdev->dev, + "Failed to get mgmt version, err: %d, status: 0x%x, out size: 0x%x\n", + err, up_ver.status, out_size); + return -EINVAL; + } + + snprintf(mgmt_ver, HINIC_MGMT_VERSION_MAX_LEN, "%s", up_ver.ver); + + return 0; +} diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h index c562afd206be..44772fd47fc1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h @@ -13,6 +13,22 @@ #include "hinic_dev.h" +#define HINIC_RSS_KEY_SIZE 40 +#define HINIC_RSS_INDIR_SIZE 256 +#define HINIC_PORT_STATS_VERSION 0 +#define HINIC_FW_VERSION_NAME 16 +#define HINIC_COMPILE_TIME_LEN 20 +#define HINIC_MGMT_VERSION_MAX_LEN 32 + +struct hinic_version_info { + u8 status; + u8 version; + u8 rsvd[6]; + + u8 ver[HINIC_FW_VERSION_NAME]; + u8 time[HINIC_COMPILE_TIME_LEN]; +}; + enum hinic_rx_mode { HINIC_RX_MODE_UC = BIT(0), HINIC_RX_MODE_MC = BIT(1), @@ -183,6 +199,313 @@ struct hinic_checksum_offload { u16 rsvd1; u32 rx_csum_offload; }; + +struct hinic_rq_num { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u16 rsvd1[33]; + u32 num_rqs; + u32 rq_depth; +}; + +struct hinic_lro_config { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u16 rsvd1; + u8 lro_ipv4_en; + u8 lro_ipv6_en; + u8 lro_max_wqe_num; + u8 resv2[13]; +}; + +struct hinic_lro_timer { + u8 status; + u8 version; + u8 rsvd0[6]; + + u8 type; /* 0: set timer value, 1: get timer value */ + u8 enable; /* when set lro time, enable should be 1 */ + u16 rsvd1; + u32 timer; +}; + +struct hinic_vlan_cfg { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 vlan_rx_offload; + u8 rsvd1[5]; +}; + +struct hinic_rss_template_mgmt { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 cmd; + u8 template_id; + u8 rsvd1[4]; +}; + +struct hinic_rss_template_key { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 template_id; + u8 rsvd1; + u8 key[HINIC_RSS_KEY_SIZE]; +}; + +struct hinic_rss_context_tbl { + u32 group_index; + u32 offset; + u32 size; + u32 rsvd; + u32 ctx; +}; + +struct hinic_rss_context_table { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 template_id; + u8 rsvd1; + u32 context; +}; + +struct hinic_rss_indirect_tbl { + u32 group_index; + u32 offset; + u32 size; + u32 rsvd; + u8 entry[HINIC_RSS_INDIR_SIZE]; +}; + +struct hinic_rss_indir_table { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 template_id; + u8 rsvd1; + u8 indir[HINIC_RSS_INDIR_SIZE]; +}; + +struct hinic_rss_key { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 template_id; + u8 rsvd1; + u8 key[HINIC_RSS_KEY_SIZE]; +}; + +struct hinic_rss_engine_type { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 template_id; + u8 hash_engine; + u8 rsvd1[4]; +}; + +struct hinic_rss_config { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u8 rss_en; + u8 template_id; + u8 rq_priority_number; + u8 rsvd1[11]; +}; + +struct hinic_stats { + char name[ETH_GSTRING_LEN]; + u32 size; + int offset; +}; + +struct hinic_vport_stats { + u64 tx_unicast_pkts_vport; + u64 tx_unicast_bytes_vport; + u64 tx_multicast_pkts_vport; + u64 tx_multicast_bytes_vport; + u64 tx_broadcast_pkts_vport; + u64 tx_broadcast_bytes_vport; + + u64 rx_unicast_pkts_vport; + u64 rx_unicast_bytes_vport; + u64 rx_multicast_pkts_vport; + u64 rx_multicast_bytes_vport; + u64 rx_broadcast_pkts_vport; + u64 rx_broadcast_bytes_vport; + + u64 tx_discard_vport; + u64 rx_discard_vport; + u64 tx_err_vport; + u64 rx_err_vport; +}; + +struct hinic_phy_port_stats { + u64 mac_rx_total_pkt_num; + u64 mac_rx_total_oct_num; + u64 mac_rx_bad_pkt_num; + u64 mac_rx_bad_oct_num; + u64 mac_rx_good_pkt_num; + u64 mac_rx_good_oct_num; + u64 mac_rx_uni_pkt_num; + u64 mac_rx_multi_pkt_num; + u64 mac_rx_broad_pkt_num; + + u64 mac_tx_total_pkt_num; + u64 mac_tx_total_oct_num; + u64 mac_tx_bad_pkt_num; + u64 mac_tx_bad_oct_num; + u64 mac_tx_good_pkt_num; + u64 mac_tx_good_oct_num; + u64 mac_tx_uni_pkt_num; + u64 mac_tx_multi_pkt_num; + u64 mac_tx_broad_pkt_num; + + u64 mac_rx_fragment_pkt_num; + u64 mac_rx_undersize_pkt_num; + u64 mac_rx_undermin_pkt_num; + u64 mac_rx_64_oct_pkt_num; + u64 mac_rx_65_127_oct_pkt_num; + u64 mac_rx_128_255_oct_pkt_num; + u64 mac_rx_256_511_oct_pkt_num; + u64 mac_rx_512_1023_oct_pkt_num; + u64 mac_rx_1024_1518_oct_pkt_num; + u64 mac_rx_1519_2047_oct_pkt_num; + u64 mac_rx_2048_4095_oct_pkt_num; + u64 mac_rx_4096_8191_oct_pkt_num; + u64 mac_rx_8192_9216_oct_pkt_num; + u64 mac_rx_9217_12287_oct_pkt_num; + u64 mac_rx_12288_16383_oct_pkt_num; + u64 mac_rx_1519_max_bad_pkt_num; + u64 mac_rx_1519_max_good_pkt_num; + u64 mac_rx_oversize_pkt_num; + u64 mac_rx_jabber_pkt_num; + + u64 mac_rx_pause_num; + u64 mac_rx_pfc_pkt_num; + u64 mac_rx_pfc_pri0_pkt_num; + u64 mac_rx_pfc_pri1_pkt_num; + u64 mac_rx_pfc_pri2_pkt_num; + u64 mac_rx_pfc_pri3_pkt_num; + u64 mac_rx_pfc_pri4_pkt_num; + u64 mac_rx_pfc_pri5_pkt_num; + u64 mac_rx_pfc_pri6_pkt_num; + u64 mac_rx_pfc_pri7_pkt_num; + u64 mac_rx_control_pkt_num; + u64 mac_rx_y1731_pkt_num; + u64 mac_rx_sym_err_pkt_num; + u64 mac_rx_fcs_err_pkt_num; + u64 mac_rx_send_app_good_pkt_num; + u64 mac_rx_send_app_bad_pkt_num; + + u64 mac_tx_fragment_pkt_num; + u64 mac_tx_undersize_pkt_num; + u64 mac_tx_undermin_pkt_num; + u64 mac_tx_64_oct_pkt_num; + u64 mac_tx_65_127_oct_pkt_num; + u64 mac_tx_128_255_oct_pkt_num; + u64 mac_tx_256_511_oct_pkt_num; + u64 mac_tx_512_1023_oct_pkt_num; + u64 mac_tx_1024_1518_oct_pkt_num; + u64 mac_tx_1519_2047_oct_pkt_num; + u64 mac_tx_2048_4095_oct_pkt_num; + u64 mac_tx_4096_8191_oct_pkt_num; + u64 mac_tx_8192_9216_oct_pkt_num; + u64 mac_tx_9217_12287_oct_pkt_num; + u64 mac_tx_12288_16383_oct_pkt_num; + u64 mac_tx_1519_max_bad_pkt_num; + u64 mac_tx_1519_max_good_pkt_num; + u64 mac_tx_oversize_pkt_num; + u64 mac_tx_jabber_pkt_num; + + u64 mac_tx_pause_num; + u64 mac_tx_pfc_pkt_num; + u64 mac_tx_pfc_pri0_pkt_num; + u64 mac_tx_pfc_pri1_pkt_num; + u64 mac_tx_pfc_pri2_pkt_num; + u64 mac_tx_pfc_pri3_pkt_num; + u64 mac_tx_pfc_pri4_pkt_num; + u64 mac_tx_pfc_pri5_pkt_num; + u64 mac_tx_pfc_pri6_pkt_num; + u64 mac_tx_pfc_pri7_pkt_num; + u64 mac_tx_control_pkt_num; + u64 mac_tx_y1731_pkt_num; + u64 mac_tx_1588_pkt_num; + u64 mac_tx_err_all_pkt_num; + u64 mac_tx_from_app_good_pkt_num; + u64 mac_tx_from_app_bad_pkt_num; + + u64 mac_rx_higig2_ext_pkt_num; + u64 mac_rx_higig2_message_pkt_num; + u64 mac_rx_higig2_error_pkt_num; + u64 mac_rx_higig2_cpu_ctrl_pkt_num; + u64 mac_rx_higig2_unicast_pkt_num; + u64 mac_rx_higig2_broadcast_pkt_num; + u64 mac_rx_higig2_l2_multicast_pkt_num; + u64 mac_rx_higig2_l3_multicast_pkt_num; + + u64 mac_tx_higig2_message_pkt_num; + u64 mac_tx_higig2_ext_pkt_num; + u64 mac_tx_higig2_cpu_ctrl_pkt_num; + u64 mac_tx_higig2_unicast_pkt_num; + u64 mac_tx_higig2_broadcast_pkt_num; + u64 mac_tx_higig2_l2_multicast_pkt_num; + u64 mac_tx_higig2_l3_multicast_pkt_num; +}; + +struct hinic_port_stats_info { + u8 status; + u8 version; + u8 rsvd0[6]; + + u16 func_id; + u16 rsvd1; + u32 stats_version; + u32 stats_size; +}; + +struct hinic_port_stats { + u8 status; + u8 version; + u8 rsvd[6]; + + struct hinic_phy_port_stats stats; +}; + +struct hinic_cmd_vport_stats { + u8 status; + u8 version; + u8 rsvd0[6]; + + struct hinic_vport_stats stats; +}; + int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr, u16 vlan_id); @@ -211,7 +534,55 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev, int hinic_port_get_cap(struct hinic_dev *nic_dev, struct hinic_port_cap *port_cap); +int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs); + int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state); int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en); + +int hinic_set_rx_lro_state(struct hinic_dev *nic_dev, u8 lro_en, + u32 lro_timer, u32 wqe_num); + +int hinic_set_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx, + struct hinic_rss_type rss_type); + +int hinic_rss_set_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx, + const u32 *indir_table); + +int hinic_rss_set_template_tbl(struct hinic_dev *nic_dev, u32 template_id, + const u8 *temp); + +int hinic_rss_set_hash_engine(struct hinic_dev *nic_dev, u8 template_id, + u8 type); + +int hinic_rss_cfg(struct hinic_dev *nic_dev, u8 rss_en, u8 template_id); + +int hinic_rss_template_alloc(struct hinic_dev *nic_dev, u8 *tmpl_idx); + +int hinic_rss_template_free(struct hinic_dev *nic_dev, u8 tmpl_idx); + +void hinic_set_ethtool_ops(struct net_device *netdev); + +int hinic_get_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx, + struct hinic_rss_type *rss_type); + +int hinic_rss_get_indir_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx, + u32 *indir_table); + +int hinic_rss_get_template_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx, + u8 *temp); + +int hinic_rss_get_hash_engine(struct hinic_dev *nic_dev, u8 tmpl_idx, + u8 *type); + +int hinic_get_phy_port_stats(struct hinic_dev *nic_dev, + struct hinic_phy_port_stats *stats); + +int hinic_get_vport_stats(struct hinic_dev *nic_dev, + struct hinic_vport_stats *stats); + +int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en); + +int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver); + #endif diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 0850ea83d6c1..56ea6d692f1c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -18,6 +18,7 @@ #include <linux/dma-mapping.h> #include <linux/prefetch.h> #include <linux/cpumask.h> +#include <linux/if_vlan.h> #include <asm/barrier.h> #include "hinic_common.h" @@ -36,6 +37,15 @@ #define RX_IRQ_NO_RESEND_TIMER 0 #define HINIC_RX_BUFFER_WRITE 16 +#define HINIC_RX_IPV6_PKT 7 +#define LRO_PKT_HDR_LEN_IPV4 66 +#define LRO_PKT_HDR_LEN_IPV6 86 +#define LRO_REPLENISH_THLD 256 + +#define LRO_PKT_HDR_LEN(cqe) \ + (HINIC_GET_RX_PKT_TYPE(be32_to_cpu((cqe)->offload_type)) == \ + HINIC_RX_IPV6_PKT ? LRO_PKT_HDR_LEN_IPV6 : LRO_PKT_HDR_LEN_IPV4) + /** * hinic_rxq_clean_stats - Clean the statistics of specific queue * @rxq: Logical Rx Queue @@ -47,6 +57,9 @@ void hinic_rxq_clean_stats(struct hinic_rxq *rxq) u64_stats_update_begin(&rxq_stats->syncp); rxq_stats->pkts = 0; rxq_stats->bytes = 0; + rxq_stats->errors = 0; + rxq_stats->csum_errors = 0; + rxq_stats->other_errors = 0; u64_stats_update_end(&rxq_stats->syncp); } @@ -65,6 +78,10 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) start = u64_stats_fetch_begin(&rxq_stats->syncp); stats->pkts = rxq_stats->pkts; stats->bytes = rxq_stats->bytes; + stats->errors = rxq_stats->csum_errors + + rxq_stats->other_errors; + stats->csum_errors = rxq_stats->csum_errors; + stats->other_errors = rxq_stats->other_errors; } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); u64_stats_update_end(&stats->syncp); } @@ -81,27 +98,25 @@ static void rxq_stats_init(struct hinic_rxq *rxq) hinic_rxq_clean_stats(rxq); } -static void rx_csum(struct hinic_rxq *rxq, u16 cons_idx, +static void rx_csum(struct hinic_rxq *rxq, u32 status, struct sk_buff *skb) { struct net_device *netdev = rxq->netdev; - struct hinic_rq_cqe *cqe; - struct hinic_rq *rq; u32 csum_err; - u32 status; - rq = rxq->rq; - cqe = rq->cqe[cons_idx]; - status = be32_to_cpu(cqe->status); csum_err = HINIC_RQ_CQE_STATUS_GET(status, CSUM_ERR); if (!(netdev->features & NETIF_F_RXCSUM)) return; - if (!csum_err) + if (!csum_err) { skb->ip_summed = CHECKSUM_UNNECESSARY; - else + } else { + if (!(csum_err & (HINIC_RX_CSUM_HW_CHECK_NONE | + HINIC_RX_CSUM_IPSU_OTHER_ERR))) + rxq->rxq_stats.csum_errors++; skb->ip_summed = CHECKSUM_NONE; + } } /** * rx_alloc_skb - allocate skb and map it to dma address @@ -311,13 +326,21 @@ static int rx_recv_jumbo_pkt(struct hinic_rxq *rxq, struct sk_buff *head_skb, static int rxq_recv(struct hinic_rxq *rxq, int budget) { struct hinic_qp *qp = container_of(rxq->rq, struct hinic_qp, rq); + struct net_device *netdev = rxq->netdev; u64 pkt_len = 0, rx_bytes = 0; + struct hinic_rq *rq = rxq->rq; struct hinic_rq_wqe *rq_wqe; unsigned int free_wqebbs; + struct hinic_rq_cqe *cqe; int num_wqes, pkts = 0; struct hinic_sge sge; + unsigned int status; struct sk_buff *skb; - u16 ci; + u32 offload_type; + u16 ci, num_lro; + u16 num_wqe = 0; + u32 vlan_len; + u16 vid; while (pkts < budget) { num_wqes = 0; @@ -327,11 +350,13 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) if (!rq_wqe) break; + cqe = rq->cqe[ci]; + status = be32_to_cpu(cqe->status); hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge); rx_unmap_skb(rxq, hinic_sge_to_dma(&sge)); - rx_csum(rxq, ci, skb); + rx_csum(rxq, status, skb); prefetch(skb->data); @@ -345,9 +370,17 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) HINIC_RX_BUF_SZ, ci); } - hinic_rq_put_wqe(rxq->rq, ci, + hinic_rq_put_wqe(rq, ci, (num_wqes + 1) * HINIC_RQ_WQE_SIZE); + offload_type = be32_to_cpu(cqe->offload_type); + vlan_len = be32_to_cpu(cqe->len); + if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && + HINIC_GET_RX_VLAN_OFFLOAD_EN(offload_type)) { + vid = HINIC_GET_RX_VLAN_TAG(vlan_len); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } + skb_record_rx_queue(skb, qp->q_id); skb->protocol = eth_type_trans(skb, rxq->netdev); @@ -355,6 +388,21 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) pkts++; rx_bytes += pkt_len; + + num_lro = HINIC_GET_RX_NUM_LRO(status); + if (num_lro) { + rx_bytes += ((num_lro - 1) * + LRO_PKT_HDR_LEN(cqe)); + + num_wqe += + (u16)(pkt_len >> rxq->rx_buff_shift) + + ((pkt_len & (rxq->buf_len - 1)) ? 1 : 0); + } + + cqe->status = 0; + + if (num_wqe >= LRO_REPLENISH_THLD) + break; } free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq); @@ -469,20 +517,20 @@ int hinic_init_rxq(struct hinic_rxq *rxq, struct hinic_rq *rq, struct net_device *netdev) { struct hinic_qp *qp = container_of(rq, struct hinic_qp, rq); - int err, pkts, irqname_len; + int err, pkts; rxq->netdev = netdev; rxq->rq = rq; + rxq->buf_len = HINIC_RX_BUF_SZ; + rxq->rx_buff_shift = ilog2(HINIC_RX_BUF_SZ); rxq_stats_init(rxq); - irqname_len = snprintf(NULL, 0, "hinic_rxq%d", qp->q_id) + 1; - rxq->irq_name = devm_kzalloc(&netdev->dev, irqname_len, GFP_KERNEL); + rxq->irq_name = devm_kasprintf(&netdev->dev, GFP_KERNEL, + "hinic_rxq%d", qp->q_id); if (!rxq->irq_name) return -ENOMEM; - sprintf(rxq->irq_name, "hinic_rxq%d", qp->q_id); - pkts = rx_alloc_pkts(rxq); if (!pkts) { err = -ENOMEM; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.h b/drivers/net/ethernet/huawei/hinic/hinic_rx.h index bc797498a87f..507dcbae9085 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.h @@ -21,7 +21,10 @@ struct hinic_rxq_stats { u64 pkts; u64 bytes; - + u64 errors; + u64 csum_errors; + u64 other_errors; + u64 alloc_skb_err; struct u64_stats_sync syncp; }; @@ -32,6 +35,8 @@ struct hinic_rxq { struct hinic_rxq_stats rxq_stats; char *irq_name; + u16 buf_len; + u32 rx_buff_shift; struct napi_struct napi; }; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index b9fd8d720349..9c78251f9c39 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -83,6 +83,7 @@ void hinic_txq_clean_stats(struct hinic_txq *txq) txq_stats->tx_busy = 0; txq_stats->tx_wake = 0; txq_stats->tx_dropped = 0; + txq_stats->big_frags_pkts = 0; u64_stats_update_end(&txq_stats->syncp); } @@ -104,6 +105,7 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) stats->tx_busy = txq_stats->tx_busy; stats->tx_wake = txq_stats->tx_wake; stats->tx_dropped = txq_stats->tx_dropped; + stats->big_frags_pkts = txq_stats->big_frags_pkts; } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); u64_stats_update_end(&stats->syncp); } @@ -405,10 +407,20 @@ static int offload_csum(struct hinic_sq_task *task, u32 *queue_info, return 1; } +static void offload_vlan(struct hinic_sq_task *task, u32 *queue_info, + u16 vlan_tag, u16 vlan_pri) +{ + task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(vlan_tag, VLAN_TAG) | + HINIC_SQ_TASK_INFO0_SET(1U, VLAN_OFFLOAD); + + *queue_info |= HINIC_SQ_CTRL_SET(vlan_pri, QUEUE_INFO_PRI); +} + static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task, u32 *queue_info) { enum hinic_offload_type offload = 0; + u16 vlan_tag; int enabled; enabled = offload_tso(task, queue_info, skb); @@ -422,6 +434,13 @@ static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task, return -EPROTONOSUPPORT; } + if (unlikely(skb_vlan_tag_present(skb))) { + vlan_tag = skb_vlan_tag_get(skb); + offload_vlan(task, queue_info, vlan_tag, + vlan_tag >> VLAN_PRIO_SHIFT); + offload |= TX_OFFLOAD_VLAN; + } + if (offload) hinic_task_set_l2hdr(task, skb_network_offset(skb)); @@ -464,6 +483,12 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } nr_sges = skb_shinfo(skb)->nr_frags + 1; + if (nr_sges > 17) { + u64_stats_update_begin(&txq->txq_stats.syncp); + txq->txq_stats.big_frags_pkts++; + u64_stats_update_end(&txq->txq_stats.syncp); + } + if (nr_sges > txq->max_sges) { netdev_err(netdev, "Too many Tx sges\n"); goto skb_error; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.h b/drivers/net/ethernet/huawei/hinic/hinic_tx.h index ca5f537fc383..f158b7db7fb8 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.h @@ -21,6 +21,7 @@ struct hinic_txq_stats { u64 tx_busy; u64 tx_wake; u64 tx_dropped; + u64 big_frags_pkts; struct u64_stats_sync syncp; }; diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 551de8c2fef2..f703fa58458e 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -3019,7 +3019,7 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, * applicable for weak-ordered memory model archs, * such as IA-64). */ - wmb(); + dma_wmb(); tx_ring->next_to_use = i; } @@ -4540,7 +4540,7 @@ e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter, * applicable for weak-ordered memory model archs, * such as IA-64). */ - wmb(); + dma_wmb(); writel(i, adapter->hw.hw_addr + rx_ring->rdt); } } @@ -4655,7 +4655,7 @@ static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter, * applicable for weak-ordered memory model archs, * such as IA-64). */ - wmb(); + dma_wmb(); writel(i, hw->hw_addr + rx_ring->rdt); } } diff --git a/drivers/net/ethernet/intel/e1000e/80003es2lan.c b/drivers/net/ethernet/intel/e1000e/80003es2lan.c index f86d55657959..4b103cca8a39 100644 --- a/drivers/net/ethernet/intel/e1000e/80003es2lan.c +++ b/drivers/net/ethernet/intel/e1000e/80003es2lan.c @@ -680,7 +680,7 @@ static s32 e1000_reset_hw_80003es2lan(struct e1000_hw *hw) ew32(TCTL, E1000_TCTL_PSP); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); ctrl = er32(CTRL); diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index b9309302c29e..2c1bab377b2a 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -959,7 +959,7 @@ static s32 e1000_reset_hw_82571(struct e1000_hw *hw) ew32(TCTL, tctl); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Must acquire the MDIO ownership before MAC reset. * Ownership defaults to firmware after a reset. diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h index fd550dee4982..63c3c79380a1 100644 --- a/drivers/net/ethernet/intel/e1000e/defines.h +++ b/drivers/net/ethernet/intel/e1000e/defines.h @@ -222,6 +222,9 @@ #define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ #define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master Req status */ +/* PCIm function state */ +#define E1000_STATUS_PCIM_STATE 0x40000000 + #define HALF_DUPLEX 1 #define FULL_DUPLEX 2 diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index be13227f1697..34cd67951aec 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -186,12 +186,13 @@ struct e1000_phy_regs { /* board specific private data structure */ struct e1000_adapter { - struct timer_list watchdog_timer; struct timer_list phy_info_timer; struct timer_list blink_timer; struct work_struct reset_task; - struct work_struct watchdog_task; + struct delayed_work watchdog_task; + + struct workqueue_struct *e1000_workqueue; const struct e1000_info *ei; diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index 02ebf208f48b..08342698386d 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -1014,7 +1014,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Test each interrupt */ for (i = 0; i < 10; i++) { @@ -1046,7 +1046,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) ew32(IMC, mask); ew32(ICS, mask); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (adapter->test_icr & mask) { *data = 3; @@ -1064,7 +1064,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) ew32(IMS, mask); ew32(ICS, mask); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (!(adapter->test_icr & mask)) { *data = 4; @@ -1082,7 +1082,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) ew32(IMC, ~mask & 0x00007FFF); ew32(ICS, ~mask & 0x00007FFF); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (adapter->test_icr) { *data = 5; @@ -1094,7 +1094,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data) /* Disable all the interrupts */ ew32(IMC, 0xFFFFFFFF); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Unhook test interrupt handler */ free_irq(irq, netdev); @@ -1470,7 +1470,7 @@ static int e1000_set_82571_fiber_loopback(struct e1000_adapter *adapter) */ ew32(SCTL, E1000_SCTL_ENABLE_SERDES_LOOPBACK); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); return 0; } @@ -1584,7 +1584,7 @@ static void e1000_loopback_cleanup(struct e1000_adapter *adapter) hw->phy.media_type == e1000_media_type_internal_serdes) { ew32(SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); break; } /* Fall Through */ diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index cdae0efde8e6..395b05701480 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -271,7 +271,7 @@ static void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) u16 count = 20; do { - usleep_range(5000, 10000); + usleep_range(5000, 6000); } while (!(er32(CTRL_EXT) & E1000_CTRL_EXT_LPCD) && count--); msleep(30); @@ -405,7 +405,7 @@ out: /* Ungate automatic PHY configuration on non-managed 82579 */ if ((hw->mac.type == e1000_pch2lan) && !(fwsm & E1000_ICH_FWSM_FW_VALID)) { - usleep_range(10000, 20000); + usleep_range(10000, 11000); e1000_gate_hw_phy_config_ich8lan(hw, false); } @@ -531,7 +531,7 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw) phy->id = 0; while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) && (i++ < 100)) { - usleep_range(1000, 2000); + usleep_range(1000, 1100); ret_val = e1000e_get_phy_id(hw); if (ret_val) return ret_val; @@ -1244,7 +1244,7 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) goto out; } - usleep_range(10000, 20000); + usleep_range(10000, 11000); } e_dbg("ULP_CONFIG_DONE cleared after %dmsec\n", i * 10); @@ -1999,7 +1999,7 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) && (i++ < 30)) - usleep_range(10000, 20000); + usleep_range(10000, 11000); return blocked ? E1000_BLK_PHY_RESET : 0; } @@ -2818,7 +2818,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) return 0; /* Allow time for h/w to get to quiescent state after reset */ - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Perform any necessary post-reset workarounds */ switch (hw->mac.type) { @@ -2854,7 +2854,7 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) if (hw->mac.type == e1000_pch2lan) { /* Ungate automatic PHY configuration on non-managed 82579 */ if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { - usleep_range(10000, 20000); + usleep_range(10000, 11000); e1000_gate_hw_phy_config_ich8lan(hw, false); } @@ -3875,7 +3875,7 @@ release: */ if (!ret_val) { nvm->ops.reload(hw); - usleep_range(10000, 20000); + usleep_range(10000, 11000); } out: @@ -4026,7 +4026,7 @@ release: */ if (!ret_val) { nvm->ops.reload(hw); - usleep_range(10000, 20000); + usleep_range(10000, 11000); } out: @@ -4650,7 +4650,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) ew32(TCTL, E1000_TCTL_PSP); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); /* Workaround for ICH8 bit corruption issue in FIFO memory */ if (hw->mac.type == e1000_ich8lan) { diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c index 4abd55d646c5..e531976f8a67 100644 --- a/drivers/net/ethernet/intel/e1000e/mac.c +++ b/drivers/net/ethernet/intel/e1000e/mac.c @@ -797,7 +797,7 @@ static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) * milliseconds even if the other end is doing it in SW). */ for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { - usleep_range(10000, 20000); + usleep_range(10000, 11000); status = er32(STATUS); if (status & E1000_STATUS_LU) break; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 0e09bede42a2..e4baa13b3cda 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -1780,7 +1780,8 @@ static irqreturn_t e1000_intr_msi(int __always_unused irq, void *data) } /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, 1); } /* Reset on uncorrectable ECC error */ @@ -1860,7 +1861,8 @@ static irqreturn_t e1000_intr(int __always_unused irq, void *data) } /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, 1); } /* Reset on uncorrectable ECC error */ @@ -1905,7 +1907,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data) hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, 1); } if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -3208,7 +3211,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) if (!(adapter->flags2 & FLAG2_NO_DISABLE_RX)) ew32(RCTL, rctl & ~E1000_RCTL_EN); e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); if (adapter->flags2 & FLAG2_DMA_BURST) { /* set the writeback threshold (only takes effect if the RDTR @@ -4046,12 +4049,12 @@ void e1000e_reset(struct e1000_adapter *adapter) case e1000_pch_lpt: case e1000_pch_spt: case e1000_pch_cnp: - fc->refresh_time = 0x0400; + fc->refresh_time = 0xFFFF; + fc->pause_time = 0xFFFF; if (adapter->netdev->mtu <= ETH_DATA_LEN) { fc->high_water = 0x05C20; fc->low_water = 0x05048; - fc->pause_time = 0x0650; break; } @@ -4208,7 +4211,7 @@ void e1000e_up(struct e1000_adapter *adapter) e1000_configure_msix(adapter); e1000_irq_enable(adapter); - netif_start_queue(adapter->netdev); + /* Tx queue started by watchdog timer when link is up */ e1000e_trigger_lsc(adapter); } @@ -4272,13 +4275,12 @@ void e1000e_down(struct e1000_adapter *adapter, bool reset) /* flush both disables and wait for them to finish */ e1e_flush(); - usleep_range(10000, 20000); + usleep_range(10000, 11000); e1000_irq_disable(adapter); napi_synchronize(&adapter->napi); - del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); spin_lock(&adapter->stats64_lock); @@ -4310,7 +4312,7 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter) { might_sleep(); while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) - usleep_range(1000, 2000); + usleep_range(1000, 1100); e1000e_down(adapter, true); e1000e_up(adapter); clear_bit(__E1000_RESETTING, &adapter->state); @@ -4606,6 +4608,7 @@ int e1000e_open(struct net_device *netdev) pm_runtime_get_sync(&pdev->dev); netif_carrier_off(netdev); + netif_stop_queue(netdev); /* allocate transmit descriptors */ err = e1000e_setup_tx_resources(adapter->tx_ring); @@ -4666,7 +4669,6 @@ int e1000e_open(struct net_device *netdev) e1000_irq_enable(adapter); adapter->tx_hang_recheck = false; - netif_start_queue(netdev); hw->mac.get_link_status = true; pm_runtime_put(&pdev->dev); @@ -4707,7 +4709,7 @@ int e1000e_close(struct net_device *netdev) int count = E1000_CHECK_RESET_COUNT; while (test_bit(__E1000_RESETTING, &adapter->state) && count--) - usleep_range(10000, 20000); + usleep_range(10000, 11000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); @@ -5150,31 +5152,18 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter) } } -/** - * e1000_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long - **/ -static void e1000_watchdog(struct timer_list *t) -{ - struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer); - - /* Do the rest outside of interrupt context */ - schedule_work(&adapter->watchdog_task); - - /* TODO: make this use queue_delayed_work() */ -} - static void e1000_watchdog_task(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, - watchdog_task); + watchdog_task.work); struct net_device *netdev = adapter->netdev; struct e1000_mac_info *mac = &adapter->hw.mac; struct e1000_phy_info *phy = &adapter->hw.phy; struct e1000_ring *tx_ring = adapter->tx_ring; + u32 dmoff_exit_timeout = 100, tries = 0; struct e1000_hw *hw = &adapter->hw; - u32 link, tctl; + u32 link, tctl, pcim_state; if (test_bit(__E1000_DOWN, &adapter->state)) return; @@ -5199,6 +5188,21 @@ static void e1000_watchdog_task(struct work_struct *work) /* Cancel scheduled suspend requests. */ pm_runtime_resume(netdev->dev.parent); + /* Checking if MAC is in DMoff state*/ + pcim_state = er32(STATUS); + while (pcim_state & E1000_STATUS_PCIM_STATE) { + if (tries++ == dmoff_exit_timeout) { + e_dbg("Error in exiting dmoff\n"); + break; + } + usleep_range(10000, 20000); + pcim_state = er32(STATUS); + + /* Checking if MAC exited DMoff state */ + if (!(pcim_state & E1000_STATUS_PCIM_STATE)) + e1000_phy_hw_reset(&adapter->hw); + } + /* update snapshot of PHY registers on LSC */ e1000_phy_read_status(adapter); mac->ops.get_link_up_info(&adapter->hw, @@ -5288,6 +5292,7 @@ static void e1000_watchdog_task(struct work_struct *work) if (phy->ops.cfg_on_link_up) phy->ops.cfg_on_link_up(hw); + netif_wake_queue(netdev); netif_carrier_on(netdev); if (!test_bit(__E1000_DOWN, &adapter->state)) @@ -5301,6 +5306,7 @@ static void e1000_watchdog_task(struct work_struct *work) /* Link status message must follow this format */ pr_info("%s NIC Link is Down\n", adapter->netdev->name); netif_carrier_off(netdev); + netif_stop_queue(netdev); if (!test_bit(__E1000_DOWN, &adapter->state)) mod_timer(&adapter->phy_info_timer, round_jiffies(jiffies + 2 * HZ)); @@ -5308,13 +5314,8 @@ static void e1000_watchdog_task(struct work_struct *work) /* 8000ES2LAN requires a Rx packet buffer work-around * on link down event; reset the controller to flush * the Rx packet buffer. - * - * If the link is lost the controller stops DMA, but - * if there is queued Tx work it cannot be done. So - * reset the controller to flush the Tx packet buffers. */ - if ((adapter->flags & FLAG_RX_NEEDS_RESTART) || - e1000_desc_unused(tx_ring) + 1 < tx_ring->count) + if (adapter->flags & FLAG_RX_NEEDS_RESTART) adapter->flags |= FLAG_RESTART_NOW; else pm_schedule_suspend(netdev->dev.parent, @@ -5337,6 +5338,14 @@ link_up: adapter->gotc_old = adapter->stats.gotc; spin_unlock(&adapter->stats64_lock); + /* If the link is lost the controller stops DMA, but + * if there is queued Tx work it cannot be done. So + * reset the controller to flush the Tx packet buffers. + */ + if (!netif_carrier_ok(netdev) && + (e1000_desc_unused(tx_ring) + 1 < tx_ring->count)) + adapter->flags |= FLAG_RESTART_NOW; + /* If reset is necessary, do it outside of interrupt context. */ if (adapter->flags & FLAG_RESTART_NOW) { schedule_work(&adapter->reset_task); @@ -5395,8 +5404,9 @@ link_up: /* Reset the timer */ if (!test_bit(__E1000_DOWN, &adapter->state)) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 2 * HZ)); + queue_delayed_work(adapter->e1000_workqueue, + &adapter->watchdog_task, + round_jiffies(2 * HZ)); } #define E1000_TX_FLAGS_CSUM 0x00000001 @@ -6016,7 +6026,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) } while (test_and_set_bit(__E1000_RESETTING, &adapter->state)) - usleep_range(1000, 2000); + usleep_range(1000, 1100); /* e1000e_down -> e1000e_reset dependent on max_frame_size & mtu */ adapter->max_frame_size = max_frame; e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu); @@ -6296,7 +6306,7 @@ static int e1000e_pm_freeze(struct device *dev) int count = E1000_CHECK_RESET_COUNT; while (test_bit(__E1000_RESETTING, &adapter->state) && count--) - usleep_range(10000, 20000); + usleep_range(10000, 11000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); @@ -6711,7 +6721,7 @@ static int e1000e_pm_runtime_suspend(struct device *dev) int count = E1000_CHECK_RESET_COUNT; while (test_bit(__E1000_RESETTING, &adapter->state) && count--) - usleep_range(10000, 20000); + usleep_range(10000, 11000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->state)); @@ -7251,11 +7261,21 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0); + adapter->e1000_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + e1000e_driver_name); + + if (!adapter->e1000_workqueue) { + err = -ENOMEM; + goto err_workqueue; + } + + INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog_task); + queue_delayed_work(adapter->e1000_workqueue, &adapter->watchdog_task, + 0); + timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0); INIT_WORK(&adapter->reset_task, e1000_reset_task); - INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task); INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround); INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task); INIT_WORK(&adapter->print_hang_task, e1000_print_hw_hang); @@ -7349,6 +7369,9 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_register: + flush_workqueue(adapter->e1000_workqueue); + destroy_workqueue(adapter->e1000_workqueue); +err_workqueue: if (!(adapter->flags & FLAG_HAS_AMT)) e1000e_release_hw_control(adapter); err_eeprom: @@ -7395,15 +7418,17 @@ static void e1000_remove(struct pci_dev *pdev) */ if (!down) set_bit(__E1000_DOWN, &adapter->state); - del_timer_sync(&adapter->watchdog_timer); del_timer_sync(&adapter->phy_info_timer); cancel_work_sync(&adapter->reset_task); - cancel_work_sync(&adapter->watchdog_task); cancel_work_sync(&adapter->downshift_task); cancel_work_sync(&adapter->update_phy_task); cancel_work_sync(&adapter->print_hang_task); + cancel_delayed_work(&adapter->watchdog_task); + flush_workqueue(adapter->e1000_workqueue); + destroy_workqueue(adapter->e1000_workqueue); + if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) { cancel_work_sync(&adapter->tx_hwtstamp_work); if (adapter->tx_hwtstamp_skb) { diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index 937f9af22d26..e609f4df86f4 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -392,7 +392,7 @@ s32 e1000e_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) break; } } - usleep_range(10000, 20000); + usleep_range(10000, 11000); nvm->ops.release(hw); } diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 7ce42040b851..84bd06901014 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -27,6 +27,7 @@ #include <net/ip6_checksum.h> #include <linux/ethtool.h> #include <linux/if_vlan.h> +#include <linux/if_macvlan.h> #include <linux/if_bridge.h> #include <linux/clocksource.h> #include <linux/net_tstamp.h> @@ -295,8 +296,6 @@ struct i40e_cloud_filter { u8 tunnel_type; }; -#define I40E_ETH_P_LLDP 0x88cc - #define I40E_DCB_PRIO_TYPE_STRICT 0 #define I40E_DCB_PRIO_TYPE_ETS 1 #define I40E_DCB_STRICT_PRIO_CREDITS 127 @@ -414,6 +413,11 @@ struct i40e_flex_pit { u8 pit_index; }; +struct i40e_fwd_adapter { + struct net_device *netdev; + int bit_no; +}; + struct i40e_channel { struct list_head list; bool initialized; @@ -428,11 +432,25 @@ struct i40e_channel { struct i40e_aqc_vsi_properties_data info; u64 max_tx_rate; + struct i40e_fwd_adapter *fwd; /* track this channel belongs to which VSI */ struct i40e_vsi *parent_vsi; }; +static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch) +{ + return !!ch->fwd; +} + +static inline u8 *i40e_channel_mac(struct i40e_channel *ch) +{ + if (i40e_is_channel_macvlan(ch)) + return ch->fwd->netdev->dev_addr; + else + return NULL; +} + /* struct that defines the Ethernet device */ struct i40e_pf { struct pci_dev *pdev; @@ -777,7 +795,8 @@ struct i40e_vsi { u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */ u16 req_queue_pairs; /* User requested queue pairs */ u16 num_queue_pairs; /* Used tx and rx pairs */ - u16 num_desc; + u16 num_tx_desc; + u16 num_rx_desc; enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */ s16 vf_id; /* Virtual function ID for SRIOV VSIs */ @@ -814,6 +833,13 @@ struct i40e_vsi { struct list_head ch_list; u16 tc_seid_map[I40E_MAX_TRAFFIC_CLASS]; + /* macvlan fields */ +#define I40E_MAX_MACVLANS 128 /* Max HW vectors - 1 on FVL */ +#define I40E_MIN_MACVLAN_VECTORS 2 /* Min vectors to enable macvlans */ + DECLARE_BITMAP(fwd_bitmask, I40E_MAX_MACVLANS); + struct list_head macvlan_list; + int macvlan_cnt; + void *priv; /* client driver data reference. */ /* VSI specific handlers */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index 243dcd4bec19..814acbe79ffd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -675,7 +675,7 @@ static u16 i40e_clean_asq(struct i40e_hw *hw) desc = I40E_ADMINQ_DESC(*asq, ntc); details = I40E_ADMINQ_DETAILS(*asq, ntc); while (rd32(hw, hw->aq.asq.head) != ntc) { - i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head)); if (details->callback) { @@ -835,7 +835,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw, } /* bump the tail */ - i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQTX: desc and buffer:\n"); + i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQTX: desc and buffer:\n"); i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring, buff, buff_size); (hw->aq.asq.next_to_use)++; @@ -886,7 +886,7 @@ i40e_status i40e_asq_send_command(struct i40e_hw *hw, hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval; } - i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, + i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQTX: desc and buffer writeback:\n"); i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, buff, buff_size); @@ -995,7 +995,7 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw, memcpy(e->msg_buf, hw->aq.arq.r.arq_bi[desc_idx].va, e->msg_len); - i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: desc and buffer:\n"); + i40e_debug(hw, I40E_DEBUG_AQ_COMMAND, "AQRX: desc and buffer:\n"); i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc, e->msg_buf, hw->aq.arq_buf_size); diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index ecb1adaa54ec..906cf68d3453 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -281,47 +281,49 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, void *buffer, u16 buf_len) { struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; + u32 effective_mask = hw->debug_mask & mask; + char prefix[27]; u16 len; u8 *buf = (u8 *)buffer; - if ((!(mask & hw->debug_mask)) || (desc == NULL)) + if (!effective_mask || !desc) return; len = le16_to_cpu(aq_desc->datalen); - i40e_debug(hw, mask, + i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR, "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n", le16_to_cpu(aq_desc->opcode), le16_to_cpu(aq_desc->flags), le16_to_cpu(aq_desc->datalen), le16_to_cpu(aq_desc->retval)); - i40e_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n", + i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR, + "\tcookie (h,l) 0x%08X 0x%08X\n", le32_to_cpu(aq_desc->cookie_high), le32_to_cpu(aq_desc->cookie_low)); - i40e_debug(hw, mask, "\tparam (0,1) 0x%08X 0x%08X\n", + i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR, + "\tparam (0,1) 0x%08X 0x%08X\n", le32_to_cpu(aq_desc->params.internal.param0), le32_to_cpu(aq_desc->params.internal.param1)); - i40e_debug(hw, mask, "\taddr (h,l) 0x%08X 0x%08X\n", + i40e_debug(hw, mask & I40E_DEBUG_AQ_DESCRIPTOR, + "\taddr (h,l) 0x%08X 0x%08X\n", le32_to_cpu(aq_desc->params.external.addr_high), le32_to_cpu(aq_desc->params.external.addr_low)); - if ((buffer != NULL) && (aq_desc->datalen != 0)) { + if (buffer && buf_len != 0 && len != 0 && + (effective_mask & I40E_DEBUG_AQ_DESC_BUFFER)) { i40e_debug(hw, mask, "AQ CMD Buffer:\n"); if (buf_len < len) len = buf_len; - /* write the full 16-byte chunks */ - if (hw->debug_mask & mask) { - char prefix[27]; - - snprintf(prefix, sizeof(prefix), - "i40e %02x:%02x.%x: \t0x", - hw->bus.bus_id, - hw->bus.device, - hw->bus.func); - - print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, - 16, 1, buf, len, false); - } + + snprintf(prefix, sizeof(prefix), + "i40e %02x:%02x.%x: \t0x", + hw->bus.bus_id, + hw->bus.device, + hw->bus.func); + + print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); } } @@ -1859,8 +1861,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw, hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; - if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && - hw->aq.api_min_ver >= 7) { + if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) { __le32 tmp; memcpy(&tmp, resp->link_type, sizeof(tmp)); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 7ea4f09229e4..55d20acfcf70 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -333,8 +333,9 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) " seid = %d, id = %d, uplink_seid = %d\n", vsi->seid, vsi->id, vsi->uplink_seid); dev_info(&pf->pdev->dev, - " base_queue = %d, num_queue_pairs = %d, num_desc = %d\n", - vsi->base_queue, vsi->num_queue_pairs, vsi->num_desc); + " base_queue = %d, num_queue_pairs = %d, num_tx_desc = %d, num_rx_desc = %d\n", + vsi->base_queue, vsi->num_queue_pairs, vsi->num_tx_desc, + vsi->num_rx_desc); dev_info(&pf->pdev->dev, " type = %i\n", vsi->type); if (vsi->type == I40E_VSI_SRIOV) dev_info(&pf->pdev->dev, " VF ID = %i\n", vsi->vf_id); @@ -1330,7 +1331,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, } ret = i40e_aq_add_rem_control_packet_filter(&pf->hw, pf->hw.mac.addr, - I40E_ETH_P_LLDP, 0, + ETH_P_LLDP, 0, pf->vsi[pf->lan_vsi]->seid, 0, true, NULL, NULL); if (ret) { @@ -1348,7 +1349,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, ret = i40e_aq_add_rem_control_packet_filter(&pf->hw, pf->hw.mac.addr, - I40E_ETH_P_LLDP, 0, + ETH_P_LLDP, 0, pf->vsi[pf->lan_vsi]->seid, 0, false, NULL, NULL); if (ret) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 7545b21bee3c..527eb52c5401 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1982,6 +1982,8 @@ static int i40e_set_ringparam(struct net_device *netdev, if (i40e_enabled_xdp_vsi(vsi)) vsi->xdp_rings[i]->count = new_tx_count; } + vsi->num_tx_desc = new_tx_count; + vsi->num_rx_desc = new_rx_count; goto done; } @@ -2118,6 +2120,8 @@ rx_unwind: rx_rings = NULL; } + vsi->num_tx_desc = new_tx_count; + vsi->num_rx_desc = new_rx_count; i40e_up(vsi); free_tx: @@ -4852,9 +4856,12 @@ static u32 i40e_get_priv_flags(struct net_device *dev) static int i40e_set_priv_flags(struct net_device *dev, u32 flags) { struct i40e_netdev_priv *np = netdev_priv(dev); + u64 orig_flags, new_flags, changed_flags; + enum i40e_admin_queue_err adq_err; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - u64 orig_flags, new_flags, changed_flags; + bool is_reset_needed; + i40e_status status; u32 i, j; orig_flags = READ_ONCE(pf->flags); @@ -4898,6 +4905,10 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) flags_complete: changed_flags = orig_flags ^ new_flags; + is_reset_needed = !!(changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | + I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED | + I40E_FLAG_DISABLE_FW_LLDP)); + /* Before we finalize any flag changes, we need to perform some * checks to ensure that the changes are supported and safe. */ @@ -4932,13 +4943,6 @@ flags_complete: return -EOPNOTSUPP; } - /* Now that we've checked to ensure that the new flags are valid, load - * them into place. Since we only modify flags either (a) during - * initialization or (b) while holding the RTNL lock, we don't need - * anything fancy here. - */ - pf->flags = new_flags; - /* Process any additional changes needed as a result of flag changes. * The changed_flags value reflects the list of bits that were * changed in the code above. @@ -4946,7 +4950,7 @@ flags_complete: /* Flush current ATR settings if ATR was disabled */ if ((changed_flags & I40E_FLAG_FD_ATR_ENABLED) && - !(pf->flags & I40E_FLAG_FD_ATR_ENABLED)) { + !(new_flags & I40E_FLAG_FD_ATR_ENABLED)) { set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); } @@ -4955,7 +4959,7 @@ flags_complete: u16 sw_flags = 0, valid_flags = 0; int ret; - if (!(pf->flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) + if (!(new_flags & I40E_FLAG_TRUE_PROMISC_SUPPORT)) sw_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC; ret = i40e_aq_set_switch_config(&pf->hw, sw_flags, valid_flags, @@ -4974,13 +4978,13 @@ flags_complete: (changed_flags & I40E_FLAG_BASE_R_FEC)) { u8 fec_cfg = 0; - if (pf->flags & I40E_FLAG_RS_FEC && - pf->flags & I40E_FLAG_BASE_R_FEC) { + if (new_flags & I40E_FLAG_RS_FEC && + new_flags & I40E_FLAG_BASE_R_FEC) { fec_cfg = I40E_AQ_SET_FEC_AUTO; - } else if (pf->flags & I40E_FLAG_RS_FEC) { + } else if (new_flags & I40E_FLAG_RS_FEC) { fec_cfg = (I40E_AQ_SET_FEC_REQUEST_RS | I40E_AQ_SET_FEC_ABILITY_RS); - } else if (pf->flags & I40E_FLAG_BASE_R_FEC) { + } else if (new_flags & I40E_FLAG_BASE_R_FEC) { fec_cfg = (I40E_AQ_SET_FEC_REQUEST_KR | I40E_AQ_SET_FEC_ABILITY_KR); } @@ -4988,14 +4992,14 @@ flags_complete: dev_warn(&pf->pdev->dev, "Cannot change FEC config\n"); } - if ((changed_flags & pf->flags & + if ((changed_flags & new_flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED) && - (pf->flags & I40E_FLAG_MFP_ENABLED)) + (new_flags & I40E_FLAG_MFP_ENABLED)) dev_warn(&pf->pdev->dev, "Turning on link-down-on-close flag may affect other partitions\n"); if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) { - if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) { + if (new_flags & I40E_FLAG_DISABLE_FW_LLDP) { struct i40e_dcbx_config *dcbcfg; i40e_aq_stop_lldp(&pf->hw, true, false, NULL); @@ -5013,17 +5017,43 @@ flags_complete: dcbcfg->pfc.willing = 1; dcbcfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; } else { - i40e_aq_start_lldp(&pf->hw, false, NULL); + status = i40e_aq_start_lldp(&pf->hw, false, NULL); + if (status) { + adq_err = pf->hw.aq.asq_last_status; + switch (adq_err) { + case I40E_AQ_RC_EEXIST: + dev_warn(&pf->pdev->dev, + "FW LLDP agent is already running\n"); + is_reset_needed = false; + break; + case I40E_AQ_RC_EPERM: + dev_warn(&pf->pdev->dev, + "Device configuration forbids SW from starting the LLDP agent.\n"); + return -EINVAL; + default: + dev_warn(&pf->pdev->dev, + "Starting FW LLDP agent failed: error: %s, %s\n", + i40e_stat_str(&pf->hw, + status), + i40e_aq_str(&pf->hw, + adq_err)); + return -EINVAL; + } + } } } + /* Now that we've checked to ensure that the new flags are valid, load + * them into place. Since we only modify flags either (a) during + * initialization or (b) while holding the RTNL lock, we don't need + * anything fancy here. + */ + pf->flags = new_flags; + /* Issue reset to cause things to take effect, as additional bits * are added we will need to create a mask of bits requiring reset */ - if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | - I40E_FLAG_LEGACY_RX | - I40E_FLAG_SOURCE_PRUNING_DISABLED | - I40E_FLAG_DISABLE_FW_LLDP)) + if (is_reset_needed) i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); return 0; @@ -5181,6 +5211,16 @@ static int i40e_get_module_eeprom(struct net_device *netdev, return 0; } +static int i40e_get_eee(struct net_device *netdev, struct ethtool_eee *edata) +{ + return -EOPNOTSUPP; +} + +static int i40e_set_eee(struct net_device *netdev, struct ethtool_eee *edata) +{ + return -EOPNOTSUPP; +} + static const struct ethtool_ops i40e_ethtool_recovery_mode_ops = { .set_eeprom = i40e_set_eeprom, .get_eeprom_len = i40e_get_eeprom_len, @@ -5208,6 +5248,8 @@ static const struct ethtool_ops i40e_ethtool_ops = { .set_rxnfc = i40e_set_rxnfc, .self_test = i40e_diag_test, .get_strings = i40e_get_strings, + .get_eee = i40e_get_eee, + .set_eee = i40e_set_eee, .set_phys_id = i40e_set_phys_id, .get_sset_count = i40e_get_sset_count, .get_ethtool_stats = i40e_get_ethtool_stats, diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 320562b39686..9ebbe3da61bb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -32,7 +32,7 @@ static const char i40e_driver_string[] = __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN const char i40e_driver_version_str[] = DRV_VERSION; -static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporation."; +static const char i40e_copyright[] = "Copyright (c) 2013 - 2019 Intel Corporation."; /* a bit of forward declarations */ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi); @@ -636,9 +636,6 @@ void i40e_update_eth_stats(struct i40e_vsi *vsi) i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unknown_protocol, &es->rx_unknown_protocol); - i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx), - vsi->stat_offsets_loaded, - &oes->tx_errors, &es->tx_errors); i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), @@ -5864,8 +5861,10 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, return -ENOENT; } - /* Success, update channel */ - ch->enabled_tc = enabled_tc; + /* Success, update channel, set enabled_tc only if the channel + * is not a macvlan + */ + ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc; ch->seid = ctxt.seid; ch->vsi_number = ctxt.vsi_number; ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx); @@ -6413,6 +6412,50 @@ static int i40e_resume_port_tx(struct i40e_pf *pf) } /** + * i40e_update_dcb_config + * @hw: pointer to the HW struct + * @enable_mib_change: enable MIB change event + * + * Update DCB configuration from the firmware + **/ +static enum i40e_status_code +i40e_update_dcb_config(struct i40e_hw *hw, bool enable_mib_change) +{ + struct i40e_lldp_variables lldp_cfg; + i40e_status ret; + + if (!hw->func_caps.dcb) + return I40E_NOT_SUPPORTED; + + /* Read LLDP NVM area */ + ret = i40e_read_lldp_cfg(hw, &lldp_cfg); + if (ret) + return I40E_ERR_NOT_READY; + + /* Get DCBX status */ + ret = i40e_get_dcbx_status(hw, &hw->dcbx_status); + if (ret) + return ret; + + /* Check the DCBX Status */ + if (hw->dcbx_status == I40E_DCBX_STATUS_DONE || + hw->dcbx_status == I40E_DCBX_STATUS_IN_PROGRESS) { + /* Get current DCBX configuration */ + ret = i40e_get_dcb_config(hw); + if (ret) + return ret; + } else if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) { + return I40E_ERR_NOT_READY; + } + + /* Configure the LLDP MIB change event */ + if (enable_mib_change) + ret = i40e_aq_cfg_lldp_mib_change_event(hw, true, NULL); + + return ret; +} + +/** * i40e_init_pf_dcb - Initialize DCB configuration * @pf: PF being configured * @@ -6428,11 +6471,13 @@ static int i40e_init_pf_dcb(struct i40e_pf *pf) * Also do not enable DCBx if FW LLDP agent is disabled */ if ((pf->hw_features & I40E_HW_NO_DCB_SUPPORT) || - (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) + (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)) { + dev_info(&pf->pdev->dev, "DCB is not supported or FW LLDP is disabled\n"); + err = I40E_NOT_SUPPORTED; goto out; + } - /* Get the initial DCB configuration */ - err = i40e_init_dcb(hw, true); + err = i40e_update_dcb_config(hw, true); if (!err) { /* Device/Function is not DCBX capable */ if ((!hw->func_caps.dcb) || @@ -6869,6 +6914,489 @@ static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi) } /** + * i40e_del_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function deletes a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static i40e_status i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_remove_macvlan_element_data element; + i40e_status status; + + memset(&element, 0, sizeof(element)); + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; + status = i40e_aq_remove_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_add_macvlan_filter + * @hw: pointer to the HW structure + * @seid: seid of the channel VSI + * @macaddr: the mac address to apply as a filter + * @aq_err: store the admin Q error + * + * This function adds a mac filter on the channel VSI which serves as the + * macvlan. Returns 0 on success. + **/ +static i40e_status i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid, + const u8 *macaddr, int *aq_err) +{ + struct i40e_aqc_add_macvlan_element_data element; + i40e_status status; + u16 cmd_flags = 0; + + ether_addr_copy(element.mac_addr, macaddr); + element.vlan_tag = 0; + element.queue_number = 0; + element.match_method = I40E_AQC_MM_ERR_NO_RES; + cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; + element.flags = cpu_to_le16(cmd_flags); + status = i40e_aq_add_macvlan(hw, seid, &element, 1, NULL); + *aq_err = hw->aq.asq_last_status; + + return status; +} + +/** + * i40e_reset_ch_rings - Reset the queue contexts in a channel + * @vsi: the VSI we want to access + * @ch: the channel we want to access + */ +static void i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch) +{ + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + int i; + + for (i = 0; i < ch->num_queue_pairs; i++) { + pf_q = ch->base_queue + i; + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = NULL; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = NULL; + } +} + +/** + * i40e_free_macvlan_channels + * @vsi: the VSI we want to access + * + * This function frees the Qs of the channel VSI from + * the stack and also deletes the channel VSIs which + * serve as macvlans. + */ +static void i40e_free_macvlan_channels(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + int ret; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + struct i40e_vsi *parent_vsi; + + if (i40e_is_channel_macvlan(ch)) { + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + + list_del(&ch->list); + parent_vsi = ch->parent_vsi; + if (!parent_vsi || !ch->initialized) { + kfree(ch); + continue; + } + + /* remove the VSI */ + ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid, + NULL); + if (ret) + dev_err(&vsi->back->pdev->dev, + "unable to remove channel (%d) for parent VSI(%d)\n", + ch->seid, parent_vsi->seid); + kfree(ch); + } + vsi->macvlan_cnt = 0; +} + +/** + * i40e_fwd_ring_up - bring the macvlan device up + * @vsi: the VSI we want to access + * @vdev: macvlan netdevice + * @fwd: the private fwd structure + */ +static int i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, + struct i40e_fwd_adapter *fwd) +{ + int ret = 0, num_tc = 1, i, aq_err; + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + + if (list_empty(&vsi->macvlan_list)) + return -EINVAL; + + /* Go through the list and find an available channel */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (!i40e_is_channel_macvlan(ch)) { + ch->fwd = fwd; + /* record configuration for macvlan interface in vdev */ + for (i = 0; i < num_tc; i++) + netdev_bind_sb_channel_queue(vsi->netdev, vdev, + i, + ch->num_queue_pairs, + ch->base_queue); + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *tx_ring, *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + + /* Get to TX ring ptr */ + tx_ring = vsi->tx_rings[pf_q]; + tx_ring->ch = ch; + + /* Get the RX ring ptr */ + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->ch = ch; + } + break; + } + } + + /* Guarantee all rings are updated before we update the + * MAC address filter. + */ + wmb(); + + /* Add a mac filter */ + ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); + if (ret) { + /* if we cannot add the MAC rule then disable the offload */ + macvlan_release_l2fw_offload(vdev); + for (i = 0; i < ch->num_queue_pairs; i++) { + struct i40e_ring *rx_ring; + u16 pf_q; + + pf_q = ch->base_queue + i; + rx_ring = vsi->rx_rings[pf_q]; + rx_ring->netdev = NULL; + } + dev_info(&pf->pdev->dev, + "Error adding mac filter on macvlan err %s, aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, aq_err)); + netdev_err(vdev, "L2fwd offload disabled to L2 filter error\n"); + } + + return ret; +} + +/** + * i40e_setup_macvlans - create the channels which will be macvlans + * @vsi: the VSI we want to access + * @macvlan_cnt: no. of macvlans to be setup + * @qcnt: no. of Qs per macvlan + * @vdev: macvlan netdevice + */ +static int i40e_setup_macvlans(struct i40e_vsi *vsi, u16 macvlan_cnt, u16 qcnt, + struct net_device *vdev) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + struct i40e_vsi_context ctxt; + u16 sections, qmap, num_qps; + struct i40e_channel *ch; + int i, pow, ret = 0; + u8 offset = 0; + + if (vsi->type != I40E_VSI_MAIN || !macvlan_cnt) + return -EINVAL; + + num_qps = vsi->num_queue_pairs - (macvlan_cnt * qcnt); + + /* find the next higher power-of-2 of num queue pairs */ + pow = fls(roundup_pow_of_two(num_qps) - 1); + + qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) | + (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT); + + /* Setup context bits for the main VSI */ + sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; + sections |= I40E_AQ_VSI_PROP_SCHED_VALID; + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.seid = vsi->seid; + ctxt.pf_num = vsi->back->hw.pf_id; + ctxt.vf_num = 0; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.info = vsi->info; + ctxt.info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt.info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG); + ctxt.info.queue_mapping[0] = cpu_to_le16(vsi->base_queue); + ctxt.info.valid_sections |= cpu_to_le16(sections); + + /* Reconfigure RSS for main VSI with new max queue count */ + vsi->rss_size = max_t(u16, num_qps, qcnt); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, + "Failed to reconfig RSS for num_queues (%u)\n", + vsi->rss_size); + return ret; + } + vsi->reconfig_rss = true; + dev_dbg(&vsi->back->pdev->dev, + "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size); + vsi->next_base_queue = num_qps; + vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps; + + /* Update the VSI after updating the VSI queue-mapping + * information + */ + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, + "Update vsi tc config failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + /* Create channels for macvlans */ + INIT_LIST_HEAD(&vsi->macvlan_list); + for (i = 0; i < macvlan_cnt; i++) { + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + INIT_LIST_HEAD(&ch->list); + ch->num_queue_pairs = qcnt; + if (!i40e_setup_channel(pf, vsi, ch)) { + ret = -EINVAL; + goto err_free; + } + ch->parent_vsi = vsi; + vsi->cnt_q_avail -= ch->num_queue_pairs; + vsi->macvlan_cnt++; + list_add_tail(&ch->list, &vsi->macvlan_list); + } + + return ret; + +err_free: + dev_info(&pf->pdev->dev, "Failed to setup macvlans\n"); + i40e_free_macvlan_channels(vsi); + + return ret; +} + +/** + * i40e_fwd_add - configure macvlans + * @netdev: net device to configure + * @vdev: macvlan netdevice + **/ +static void *i40e_fwd_add(struct net_device *netdev, struct net_device *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + u16 q_per_macvlan = 0, macvlan_cnt = 0, vectors; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_fwd_adapter *fwd; + int avail_macvlan, ret; + + if ((pf->flags & I40E_FLAG_DCB_ENABLED)) { + netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); + return ERR_PTR(-EINVAL); + } + if ((pf->flags & I40E_FLAG_TC_MQPRIO)) { + netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n"); + return ERR_PTR(-EINVAL); + } + if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) { + netdev_info(netdev, "Not enough vectors available to support macvlans\n"); + return ERR_PTR(-EINVAL); + } + + /* The macvlan device has to be a single Q device so that the + * tc_to_txq field can be reused to pick the tx queue. + */ + if (netif_is_multiqueue(vdev)) + return ERR_PTR(-ERANGE); + + if (!vsi->macvlan_cnt) { + /* reserve bit 0 for the pf device */ + set_bit(0, vsi->fwd_bitmask); + + /* Try to reserve as many queues as possible for macvlans. First + * reserve 3/4th of max vectors, then half, then quarter and + * calculate Qs per macvlan as you go + */ + vectors = pf->num_lan_msix; + if (vectors <= I40E_MAX_MACVLANS && vectors > 64) { + /* allocate 4 Qs per macvlan and 32 Qs to the PF*/ + q_per_macvlan = 4; + macvlan_cnt = (vectors - 32) / 4; + } else if (vectors <= 64 && vectors > 32) { + /* allocate 2 Qs per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 2; + macvlan_cnt = (vectors - 16) / 2; + } else if (vectors <= 32 && vectors > 16) { + /* allocate 1 Q per macvlan and 16 Qs to the PF*/ + q_per_macvlan = 1; + macvlan_cnt = vectors - 16; + } else if (vectors <= 16 && vectors > 8) { + /* allocate 1 Q per macvlan and 8 Qs to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 8; + } else { + /* allocate 1 Q per macvlan and 1 Q to the PF */ + q_per_macvlan = 1; + macvlan_cnt = vectors - 1; + } + + if (macvlan_cnt == 0) + return ERR_PTR(-EBUSY); + + /* Quiesce VSI queues */ + i40e_quiesce_vsi(vsi); + + /* sets up the macvlans but does not "enable" them */ + ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan, + vdev); + if (ret) + return ERR_PTR(ret); + + /* Unquiesce VSI */ + i40e_unquiesce_vsi(vsi); + } + avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask, + vsi->macvlan_cnt); + if (avail_macvlan >= I40E_MAX_MACVLANS) + return ERR_PTR(-EBUSY); + + /* create the fwd struct */ + fwd = kzalloc(sizeof(*fwd), GFP_KERNEL); + if (!fwd) + return ERR_PTR(-ENOMEM); + + set_bit(avail_macvlan, vsi->fwd_bitmask); + fwd->bit_no = avail_macvlan; + netdev_set_sb_channel(vdev, avail_macvlan); + fwd->netdev = vdev; + + if (!netif_running(netdev)) + return fwd; + + /* Set fwd ring up */ + ret = i40e_fwd_ring_up(vsi, vdev, fwd); + if (ret) { + /* unbind the queues and drop the subordinate channel config */ + netdev_unbind_sb_channel(netdev, vdev); + netdev_set_sb_channel(vdev, 0); + + kfree(fwd); + return ERR_PTR(-EINVAL); + } + + return fwd; +} + +/** + * i40e_del_all_macvlans - Delete all the mac filters on the channels + * @vsi: the VSI we want to access + */ +static void i40e_del_all_macvlans(struct i40e_vsi *vsi) +{ + struct i40e_channel *ch, *ch_tmp; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + if (list_empty(&vsi->macvlan_list)) + return; + + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(vsi->netdev, + ch->fwd->netdev); + netdev_set_sb_channel(ch->fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } + } + } +} + +/** + * i40e_fwd_del - delete macvlan interfaces + * @netdev: net device to configure + * @vdev: macvlan netdevice + */ +static void i40e_fwd_del(struct net_device *netdev, void *vdev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_fwd_adapter *fwd = vdev; + struct i40e_channel *ch, *ch_tmp; + struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + int aq_err, ret = 0; + + /* Find the channel associated with the macvlan and del mac filter */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->macvlan_list, list) { + if (i40e_is_channel_macvlan(ch) && + ether_addr_equal(i40e_channel_mac(ch), + fwd->netdev->dev_addr)) { + ret = i40e_del_macvlan_filter(hw, ch->seid, + i40e_channel_mac(ch), + &aq_err); + if (!ret) { + /* Reset queue contexts */ + i40e_reset_ch_rings(vsi, ch); + clear_bit(ch->fwd->bit_no, vsi->fwd_bitmask); + netdev_unbind_sb_channel(netdev, fwd->netdev); + netdev_set_sb_channel(fwd->netdev, 0); + kfree(ch->fwd); + ch->fwd = NULL; + } else { + dev_info(&pf->pdev->dev, + "Error deleting mac filter on macvlan err %s, aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, aq_err)); + } + break; + } + } +} + +/** * i40e_setup_tc - configure multiple traffic classes * @netdev: net device to configure * @type_data: tc offload data @@ -6963,6 +7491,10 @@ config_tc: vsi->seid); need_reset = true; goto exit; + } else { + dev_info(&vsi->back->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + vsi->seid, vsi->tc_config.tc_info[0].qcount); } if (pf->flags & I40E_FLAG_TC_MQPRIO) { @@ -7227,15 +7759,15 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, /** * i40e_parse_cls_flower - Parse tc flower filters provided by kernel * @vsi: Pointer to VSI - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @cls_flower: Pointer to struct flow_cls_offload * @filter: Pointer to cloud filter structure * **/ static int i40e_parse_cls_flower(struct i40e_vsi *vsi, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct i40e_cloud_filter *filter) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; struct i40e_pf *pf = vsi->back; @@ -7469,11 +8001,11 @@ static int i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc, /** * i40e_configure_clsflower - Configure tc flower filters * @vsi: Pointer to VSI - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @cls_flower: Pointer to struct flow_cls_offload * **/ static int i40e_configure_clsflower(struct i40e_vsi *vsi, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); struct i40e_cloud_filter *filter = NULL; @@ -7565,11 +8097,11 @@ static struct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi, /** * i40e_delete_clsflower - Remove tc flower filters * @vsi: Pointer to VSI - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @cls_flower: Pointer to struct flow_cls_offload * **/ static int i40e_delete_clsflower(struct i40e_vsi *vsi, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct i40e_cloud_filter *filter = NULL; struct i40e_pf *pf = vsi->back; @@ -7612,16 +8144,16 @@ static int i40e_delete_clsflower(struct i40e_vsi *vsi, * @type_data: offload data **/ static int i40e_setup_tc_cls_flower(struct i40e_netdev_priv *np, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct i40e_vsi *vsi = np->vsi; switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return i40e_configure_clsflower(vsi, cls_flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return i40e_delete_clsflower(vsi, cls_flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return -EOPNOTSUPP; default: return -EOPNOTSUPP; @@ -7645,34 +8177,21 @@ static int i40e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -static int i40e_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct i40e_netdev_priv *np = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, i40e_setup_tc_block_cb, - np, np, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, i40e_setup_tc_block_cb, np); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(i40e_block_cb_list); static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { + struct i40e_netdev_priv *np = netdev_priv(netdev); + switch (type) { case TC_SETUP_QDISC_MQPRIO: return i40e_setup_tc(netdev, type_data); case TC_SETUP_BLOCK: - return i40e_setup_tc_block(netdev, type_data); + return flow_block_cb_setup_simple(type_data, + &i40e_block_cb_list, + i40e_setup_tc_block_cb, + np, np, true); default: return -EOPNOTSUPP; } @@ -8570,7 +9089,7 @@ static void i40e_link_event(struct i40e_pf *pf) /* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified. */ - if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb]) + if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb]) i40e_veb_link_event(pf->veb[pf->lan_veb], new_link); else i40e_vsi_link_event(vsi, new_link); @@ -10031,8 +10550,12 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) switch (vsi->type) { case I40E_VSI_MAIN: vsi->alloc_queue_pairs = pf->num_lan_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); if (pf->flags & I40E_FLAG_MSIX_ENABLED) vsi->num_q_vectors = pf->num_lan_msix; else @@ -10042,22 +10565,32 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi) case I40E_VSI_FDIR: vsi->alloc_queue_pairs = 1; - vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT, - I40E_REQ_DESCRIPTOR_MULTIPLE); + vsi->num_tx_desc = ALIGN(I40E_FDIR_RING_COUNT, + I40E_REQ_DESCRIPTOR_MULTIPLE); + vsi->num_rx_desc = ALIGN(I40E_FDIR_RING_COUNT, + I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_fdsb_msix; break; case I40E_VSI_VMDQ2: vsi->alloc_queue_pairs = pf->num_vmdq_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); vsi->num_q_vectors = pf->num_vmdq_msix; break; case I40E_VSI_SRIOV: vsi->alloc_queue_pairs = pf->num_vf_qps; - vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, - I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_tx_desc) + vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); + if (!vsi->num_rx_desc) + vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS, + I40E_REQ_DESCRIPTOR_MULTIPLE); break; default: @@ -10333,7 +10866,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) @@ -10350,7 +10883,7 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi) ring->vsi = vsi; ring->netdev = NULL; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_tx_desc; ring->size = 0; ring->dcb_tc = 0; if (vsi->back->hw_features & I40E_HW_WB_ON_ITR_CAPABLE) @@ -10366,7 +10899,7 @@ setup_rx: ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = &pf->pdev->dev; - ring->count = vsi->num_desc; + ring->count = vsi->num_rx_desc; ring->size = 0; ring->dcb_tc = 0; ring->itr_setting = pf->rx_itr_default; @@ -11604,6 +12137,9 @@ static int i40e_set_features(struct net_device *netdev, return -EINVAL; } + if (!(features & NETIF_F_HW_L2FW_DOFFLOAD) && vsi->macvlan_cnt) + i40e_del_all_macvlans(vsi); + need_reset = i40e_set_ntuple(pf, features); if (need_reset) @@ -12348,6 +12884,8 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_bpf = i40e_xdp, .ndo_xdp_xmit = i40e_xdp_xmit, .ndo_xsk_async_xmit = i40e_xsk_async_xmit, + .ndo_dfwd_add_station = i40e_fwd_add, + .ndo_dfwd_del_station = i40e_fwd_del, }; /** @@ -12407,6 +12945,9 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) /* record features VLANs can make use of */ netdev->vlan_features |= hw_enc_features | NETIF_F_TSO_MANGLEID; + /* enable macvlan offloads */ + netdev->hw_features |= NETIF_F_HW_L2FW_DOFFLOAD; + hw_features = hw_enc_features | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; @@ -12519,7 +13060,7 @@ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi) struct i40e_pf *pf = vsi->back; /* Uplink is not a bridge so default to VEB */ - if (vsi->veb_idx == I40E_NO_VEB) + if (vsi->veb_idx >= I40E_MAX_VEB) return 1; veb = pf->veb[vsi->veb_idx]; @@ -13577,7 +14118,7 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, /* Main VEB? */ if (uplink_seid != pf->mac_seid) break; - if (pf->lan_veb == I40E_NO_VEB) { + if (pf->lan_veb >= I40E_MAX_VEB) { int v; /* find existing or else empty VEB */ @@ -13587,13 +14128,15 @@ static void i40e_setup_pf_switch_element(struct i40e_pf *pf, break; } } - if (pf->lan_veb == I40E_NO_VEB) { + if (pf->lan_veb >= I40E_MAX_VEB) { v = i40e_veb_mem_alloc(pf); if (v < 0) break; pf->lan_veb = v; } } + if (pf->lan_veb >= I40E_MAX_VEB) + break; pf->veb[pf->lan_veb]->seid = seid; pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid; @@ -13747,7 +14290,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit) /* Set up the PF VSI associated with the PF's main VSI * that is already in the HW switch */ - if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb]) + if (pf->lan_veb < I40E_MAX_VEB && pf->veb[pf->lan_veb]) uplink_seid = pf->veb[pf->lan_veb]->seid; else uplink_seid = pf->mac_seid; @@ -14203,7 +14746,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0), I40E_MAX_CSR_SPACE); - + /* We believe that the highest register to read is + * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size + * is not less than that before mapping to prevent a + * kernel panic. + */ + if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) { + dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n", + pf->ioremap_len); + err = -ENOMEM; + goto err_ioremap; + } hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len); if (!hw->hw_addr) { err = -EIO; @@ -14388,6 +14941,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, pf); pci_save_state(pdev); + dev_info(&pdev->dev, + (pf->flags & I40E_FLAG_DISABLE_FW_LLDP) ? + "FW LLDP is disabled\n" : + "FW LLDP is enabled\n"); + /* Enable FW to write default DCB config on link-up */ i40e_aq_set_dcb_parameters(hw, true, NULL); diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 882627073dce..eac88bcc6c06 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -350,6 +350,10 @@ i40e_virtchnl_link_speed(enum i40e_aq_link_speed link_speed) return VIRTCHNL_LINK_SPEED_100MB; case I40E_LINK_SPEED_1GB: return VIRTCHNL_LINK_SPEED_1GB; + case I40E_LINK_SPEED_2_5GB: + return VIRTCHNL_LINK_SPEED_2_5GB; + case I40E_LINK_SPEED_5GB: + return VIRTCHNL_LINK_SPEED_5GB; case I40E_LINK_SPEED_10GB: return VIRTCHNL_LINK_SPEED_10GB; case I40E_LINK_SPEED_40GB: diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index 439c35f0c581..11394a52e21c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -140,8 +140,7 @@ static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) * @ptp: The PTP clock structure * @delta: Offset in nanoseconds to adjust the PHC time by * - * Adjust the frequency of the PHC by the indicated parts per billion from the - * base frequency. + * Adjust the current clock time by a delta specified in nanoseconds. **/ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 20a283702c9f..2a2fe3ec7926 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -774,7 +774,7 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, int napi_budget) { - u16 i = tx_ring->next_to_clean; + int i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; struct i40e_tx_desc *tx_head; struct i40e_tx_desc *tx_desc; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 479bc60c8f71..02b09a8ad54c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -440,7 +440,7 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf, struct virtchnl_iwarp_qv_info *qv_info; u32 v_idx, i, reg_idx, reg; u32 next_q_idx, next_q_type; - u32 msix_vf, size; + u32 msix_vf; int ret = 0; msix_vf = pf->hw.func_caps.num_msix_vectors_vf; @@ -454,11 +454,10 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf, goto err_out; } - size = sizeof(struct virtchnl_iwarp_qvlist_info) + - (sizeof(struct virtchnl_iwarp_qv_info) * - (qvlist_info->num_vectors - 1)); kfree(vf->qvlist_info); - vf->qvlist_info = kzalloc(size, GFP_KERNEL); + vf->qvlist_info = kzalloc(struct_size(vf->qvlist_info, qv_info, + qvlist_info->num_vectors - 1), + GFP_KERNEL); if (!vf->qvlist_info) { ret = -ENOMEM; goto err_out; @@ -470,14 +469,15 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf, qv_info = &qvlist_info->qv_info[i]; if (!qv_info) continue; - v_idx = qv_info->v_idx; /* Validate vector id belongs to this vf */ - if (!i40e_vc_isvalid_vector_id(vf, v_idx)) { + if (!i40e_vc_isvalid_vector_id(vf, qv_info->v_idx)) { ret = -EINVAL; goto err_free; } + v_idx = qv_info->v_idx; + vf->qvlist_info->qv_info[i] = *qv_info; reg_idx = ((msix_vf - 1) * vf->vf_id) + (v_idx - 1); @@ -1845,7 +1845,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; int num_vsis = 1; - int len = 0; + size_t len = 0; int ret; if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { @@ -1853,9 +1853,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) goto err; } - len = (sizeof(struct virtchnl_vf_resource) + - sizeof(struct virtchnl_vsi_resource) * num_vsis); - + len = struct_size(vfres, vsi_res, num_vsis); vfres = kzalloc(len, GFP_KERNEL); if (!vfres) { aq_ret = I40E_ERR_NO_MEMORY; @@ -2135,8 +2133,13 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) } } - if (vf->adq_enabled) + if (vf->adq_enabled) { + if (idx >= ARRAY_SIZE(vf->ch)) { + aq_ret = I40E_ERR_NO_AVAILABLE_VSI; + goto error_param; + } vsi_id = vf->ch[idx].vsi_id; + } if (i40e_config_vsi_rx_queue(vf, vsi_id, vsi_queue_id, &qpi->rxq) || @@ -2152,6 +2155,10 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) * to its appropriate VSIs based on TC mapping **/ if (vf->adq_enabled) { + if (idx >= ARRAY_SIZE(vf->ch)) { + aq_ret = I40E_ERR_NO_AVAILABLE_VSI; + goto error_param; + } if (j == (vf->ch[idx].num_qps - 1)) { idx++; j = 0; /* resetting the queue count */ @@ -2318,7 +2325,6 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) struct virtchnl_queue_select *vqs = (struct virtchnl_queue_select *)msg; struct i40e_pf *pf = vf->pf; - u16 vsi_id = vqs->vsi_id; i40e_status aq_ret = 0; int i; @@ -2327,7 +2333,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg) goto error_param; } - if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) { + if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2427,18 +2433,14 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) { struct virtchnl_vf_res_request *vfres = (struct virtchnl_vf_res_request *)msg; - int req_pairs = vfres->num_queue_pairs; - int cur_pairs = vf->num_queue_pairs; + u16 req_pairs = vfres->num_queue_pairs; + u8 cur_pairs = vf->num_queue_pairs; struct i40e_pf *pf = vf->pf; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) return -EINVAL; - if (req_pairs <= 0) { - dev_err(&pf->pdev->dev, - "VF %d tried to request %d queues. Ignoring.\n", - vf->vf_id, req_pairs); - } else if (req_pairs > I40E_MAX_VF_QUEUES) { + if (req_pairs > I40E_MAX_VF_QUEUES) { dev_err(&pf->pdev->dev, "VF %d tried to request more than %d queues.\n", vf->vf_id, @@ -2509,7 +2511,7 @@ error_param: * MAC filters: 16 for multicast, 1 for MAC, 1 for broadcast */ #define I40E_VC_MAX_MAC_ADDR_PER_VF (16 + 1 + 1) -#define I40E_VC_MAX_VLAN_PER_VF 8 +#define I40E_VC_MAX_VLAN_PER_VF 16 /** * i40e_check_vf_permission @@ -2587,12 +2589,11 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) (struct virtchnl_ether_addr_list *)msg; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; - u16 vsi_id = al->vsi_id; i40e_status ret = 0; int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; } @@ -2657,12 +2658,11 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) (struct virtchnl_ether_addr_list *)msg; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; - u16 vsi_id = al->vsi_id; i40e_status ret = 0; int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; } @@ -2726,7 +2726,6 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg) (struct virtchnl_vlan_filter_list *)msg; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; - u16 vsi_id = vfl->vsi_id; i40e_status aq_ret = 0; int i; @@ -2737,7 +2736,7 @@ static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg) goto error_param; } if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2798,12 +2797,11 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) (struct virtchnl_vlan_filter_list *)msg; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; - u16 vsi_id = vfl->vsi_id; i40e_status aq_ret = 0; int i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2920,11 +2918,10 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) (struct virtchnl_rss_key *)msg; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; - u16 vsi_id = vrk->vsi_id; i40e_status aq_ret = 0; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, vsi_id) || + !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) || (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) { aq_ret = I40E_ERR_PARAM; goto err; @@ -2951,16 +2948,22 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) (struct virtchnl_rss_lut *)msg; struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = NULL; - u16 vsi_id = vrl->vsi_id; i40e_status aq_ret = 0; + u16 i; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, vsi_id) || + !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) || (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) { aq_ret = I40E_ERR_PARAM; goto err; } + for (i = 0; i < vrl->lut_entries; i++) + if (vrl->lut[i] >= vf->num_queue_pairs) { + aq_ret = I40E_ERR_PARAM; + goto err; + } + vsi = pf->vsi[vf->lan_vsi_idx]; aq_ret = i40e_config_rss(vsi, NULL, vrl->lut, I40E_VF_HLUT_ARRAY_SIZE); /* send the response to the VF */ @@ -3041,14 +3044,15 @@ err: **/ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) { - struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; i40e_status aq_ret = 0; + struct i40e_vsi *vsi; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto err; } + vsi = vf->pf->vsi[vf->lan_vsi_idx]; i40e_vlan_stripping_enable(vsi); /* send the response to the VF */ @@ -3066,14 +3070,15 @@ err: **/ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) { - struct i40e_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; i40e_status aq_ret = 0; + struct i40e_vsi *vsi; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; goto err; } + vsi = vf->pf->vsi[vf->lan_vsi_idx]; i40e_vlan_stripping_disable(vsi); /* send the response to the VF */ @@ -3531,8 +3536,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) (struct virtchnl_tc_info *)msg; struct i40e_pf *pf = vf->pf; struct i40e_link_status *ls = &pf->hw.phy.link_info; - int i, adq_request_qps = 0, speed = 0; + int i, adq_request_qps = 0; i40e_status aq_ret = 0; + u64 speed = 0; if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { aq_ret = I40E_ERR_PARAM; @@ -3558,8 +3564,8 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) /* max number of traffic classes for VF currently capped at 4 */ if (!tci->num_tc || tci->num_tc > I40E_MAX_VF_VSI) { dev_err(&pf->pdev->dev, - "VF %d trying to set %u TCs, valid range 1-4 TCs per VF\n", - vf->vf_id, tci->num_tc); + "VF %d trying to set %u TCs, valid range 1-%u TCs per VF\n", + vf->vf_id, tci->num_tc, I40E_MAX_VF_VSI); aq_ret = I40E_ERR_PARAM; goto err; } @@ -3569,8 +3575,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) if (!tci->list[i].count || tci->list[i].count > I40E_DEFAULT_QUEUES_PER_VF) { dev_err(&pf->pdev->dev, - "VF %d: TC %d trying to set %u queues, valid range 1-4 queues per TC\n", - vf->vf_id, i, tci->list[i].count); + "VF %d: TC %d trying to set %u queues, valid range 1-%u queues per TC\n", + vf->vf_id, i, tci->list[i].count, + I40E_DEFAULT_QUEUES_PER_VF); aq_ret = I40E_ERR_PARAM; goto err; } @@ -3730,19 +3737,6 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, /* perform basic checks on the msg */ ret = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen); - /* perform additional checks specific to this driver */ - if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) { - struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; - - if (vrk->key_len != I40E_HKEY_ARRAY_SIZE) - ret = -EINVAL; - } else if (v_opcode == VIRTCHNL_OP_CONFIG_RSS_LUT) { - struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; - - if (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) - ret = -EINVAL; - } - if (ret) { i40e_vc_send_resp_to_vf(vf, v_opcode, I40E_ERR_PARAM); dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n", @@ -3943,6 +3937,11 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) int bkt; u8 i; + if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) { + dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n"); + return -EAGAIN; + } + /* validate the request */ ret = i40e_validate_vf(pf, vf_id); if (ret) @@ -3967,11 +3966,6 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) goto error_param; } - if (test_and_set_bit(__I40E_VIRTCHNL_OP_PENDING, pf->state)) { - dev_warn(&pf->pdev->dev, "Unable to configure VFs, other operation is pending.\n"); - return -EAGAIN; - } - if (is_multicast_ether_addr(mac)) { dev_err(&pf->pdev->dev, "Invalid Ethernet address %pM for VF %d\n", mac, vf_id); @@ -4302,10 +4296,8 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, vf = &pf->vf[vf_id]; /* first vsi is always the LAN vsi */ vsi = pf->vsi[vf->lan_vsi_idx]; - if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { - dev_err(&pf->pdev->dev, "VF %d still in reset. Try again.\n", - vf_id); - ret = -EAGAIN; + if (!vsi) { + ret = -ENOENT; goto error_param; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 1b17486543ac..32bad014d76c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -215,6 +215,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp) break; default: bpf_warn_invalid_xdp_action(act); + /* fall through */ case XDP_ABORTED: trace_xdp_exception(rx_ring->netdev, xdp_prog, act); /* fallthrough -- handle aborts by dropping packet */ @@ -640,8 +641,8 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) struct i40e_tx_desc *tx_desc = NULL; struct i40e_tx_buffer *tx_bi; bool work_done = true; + struct xdp_desc desc; dma_addr_t dma; - u32 len; while (budget-- > 0) { if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) { @@ -650,21 +651,23 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) break; } - if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len)) + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc)) break; - dma_sync_single_for_device(xdp_ring->dev, dma, len, + dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr); + + dma_sync_single_for_device(xdp_ring->dev, dma, desc.len, DMA_BIDIRECTIONAL); tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use]; - tx_bi->bytecount = len; + tx_bi->bytecount = desc.len; tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use); tx_desc->buffer_addr = cpu_to_le64(dma); tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC | I40E_TX_DESC_CMD_EOP, - 0, len, 0); + 0, desc.len, 0); xdp_ring->next_to_use++; if (xdp_ring->next_to_use == xdp_ring->count) diff --git a/drivers/net/ethernet/intel/iavf/Makefile b/drivers/net/ethernet/intel/iavf/Makefile index 9cbb5743ed12..c997063ed728 100644 --- a/drivers/net/ethernet/intel/iavf/Makefile +++ b/drivers/net/ethernet/intel/iavf/Makefile @@ -12,4 +12,4 @@ subdir-ccflags-y += -I$(src) obj-$(CONFIG_IAVF) += iavf.o iavf-objs := iavf_main.o iavf_ethtool.o iavf_virtchnl.o \ - iavf_txrx.o iavf_common.o i40e_adminq.o iavf_client.o + iavf_txrx.o iavf_common.o iavf_adminq.o iavf_client.o diff --git a/drivers/net/ethernet/intel/iavf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/iavf/i40e_adminq_cmd.h deleted file mode 100644 index e5ae4a1c0cff..000000000000 --- a/drivers/net/ethernet/intel/iavf/i40e_adminq_cmd.h +++ /dev/null @@ -1,530 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 2013 - 2018 Intel Corporation. */ - -#ifndef _I40E_ADMINQ_CMD_H_ -#define _I40E_ADMINQ_CMD_H_ - -/* This header file defines the i40e Admin Queue commands and is shared between - * i40e Firmware and Software. Do not change the names in this file to IAVF - * because this file should be diff-able against the i40e version, even - * though many parts have been removed in this VF version. - * - * This file needs to comply with the Linux Kernel coding style. - */ - -#define I40E_FW_API_VERSION_MAJOR 0x0001 -#define I40E_FW_API_VERSION_MINOR_X722 0x0005 -#define I40E_FW_API_VERSION_MINOR_X710 0x0008 - -#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \ - I40E_FW_API_VERSION_MINOR_X710 : \ - I40E_FW_API_VERSION_MINOR_X722) - -/* API version 1.7 implements additional link and PHY-specific APIs */ -#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007 - -struct i40e_aq_desc { - __le16 flags; - __le16 opcode; - __le16 datalen; - __le16 retval; - __le32 cookie_high; - __le32 cookie_low; - union { - struct { - __le32 param0; - __le32 param1; - __le32 param2; - __le32 param3; - } internal; - struct { - __le32 param0; - __le32 param1; - __le32 addr_high; - __le32 addr_low; - } external; - u8 raw[16]; - } params; -}; - -/* Flags sub-structure - * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 | - * |DD |CMP|ERR|VFE| * * RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE | - */ - -/* command flags and offsets*/ -#define I40E_AQ_FLAG_DD_SHIFT 0 -#define I40E_AQ_FLAG_CMP_SHIFT 1 -#define I40E_AQ_FLAG_ERR_SHIFT 2 -#define I40E_AQ_FLAG_VFE_SHIFT 3 -#define I40E_AQ_FLAG_LB_SHIFT 9 -#define I40E_AQ_FLAG_RD_SHIFT 10 -#define I40E_AQ_FLAG_VFC_SHIFT 11 -#define I40E_AQ_FLAG_BUF_SHIFT 12 -#define I40E_AQ_FLAG_SI_SHIFT 13 -#define I40E_AQ_FLAG_EI_SHIFT 14 -#define I40E_AQ_FLAG_FE_SHIFT 15 - -#define I40E_AQ_FLAG_DD BIT(I40E_AQ_FLAG_DD_SHIFT) /* 0x1 */ -#define I40E_AQ_FLAG_CMP BIT(I40E_AQ_FLAG_CMP_SHIFT) /* 0x2 */ -#define I40E_AQ_FLAG_ERR BIT(I40E_AQ_FLAG_ERR_SHIFT) /* 0x4 */ -#define I40E_AQ_FLAG_VFE BIT(I40E_AQ_FLAG_VFE_SHIFT) /* 0x8 */ -#define I40E_AQ_FLAG_LB BIT(I40E_AQ_FLAG_LB_SHIFT) /* 0x200 */ -#define I40E_AQ_FLAG_RD BIT(I40E_AQ_FLAG_RD_SHIFT) /* 0x400 */ -#define I40E_AQ_FLAG_VFC BIT(I40E_AQ_FLAG_VFC_SHIFT) /* 0x800 */ -#define I40E_AQ_FLAG_BUF BIT(I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */ -#define I40E_AQ_FLAG_SI BIT(I40E_AQ_FLAG_SI_SHIFT) /* 0x2000 */ -#define I40E_AQ_FLAG_EI BIT(I40E_AQ_FLAG_EI_SHIFT) /* 0x4000 */ -#define I40E_AQ_FLAG_FE BIT(I40E_AQ_FLAG_FE_SHIFT) /* 0x8000 */ - -/* error codes */ -enum i40e_admin_queue_err { - I40E_AQ_RC_OK = 0, /* success */ - I40E_AQ_RC_EPERM = 1, /* Operation not permitted */ - I40E_AQ_RC_ENOENT = 2, /* No such element */ - I40E_AQ_RC_ESRCH = 3, /* Bad opcode */ - I40E_AQ_RC_EINTR = 4, /* operation interrupted */ - I40E_AQ_RC_EIO = 5, /* I/O error */ - I40E_AQ_RC_ENXIO = 6, /* No such resource */ - I40E_AQ_RC_E2BIG = 7, /* Arg too long */ - I40E_AQ_RC_EAGAIN = 8, /* Try again */ - I40E_AQ_RC_ENOMEM = 9, /* Out of memory */ - I40E_AQ_RC_EACCES = 10, /* Permission denied */ - I40E_AQ_RC_EFAULT = 11, /* Bad address */ - I40E_AQ_RC_EBUSY = 12, /* Device or resource busy */ - I40E_AQ_RC_EEXIST = 13, /* object already exists */ - I40E_AQ_RC_EINVAL = 14, /* Invalid argument */ - I40E_AQ_RC_ENOTTY = 15, /* Not a typewriter */ - I40E_AQ_RC_ENOSPC = 16, /* No space left or alloc failure */ - I40E_AQ_RC_ENOSYS = 17, /* Function not implemented */ - I40E_AQ_RC_ERANGE = 18, /* Parameter out of range */ - I40E_AQ_RC_EFLUSHED = 19, /* Cmd flushed due to prev cmd error */ - I40E_AQ_RC_BAD_ADDR = 20, /* Descriptor contains a bad pointer */ - I40E_AQ_RC_EMODE = 21, /* Op not allowed in current dev mode */ - I40E_AQ_RC_EFBIG = 22, /* File too large */ -}; - -/* Admin Queue command opcodes */ -enum i40e_admin_queue_opc { - /* aq commands */ - i40e_aqc_opc_get_version = 0x0001, - i40e_aqc_opc_driver_version = 0x0002, - i40e_aqc_opc_queue_shutdown = 0x0003, - i40e_aqc_opc_set_pf_context = 0x0004, - - /* resource ownership */ - i40e_aqc_opc_request_resource = 0x0008, - i40e_aqc_opc_release_resource = 0x0009, - - i40e_aqc_opc_list_func_capabilities = 0x000A, - i40e_aqc_opc_list_dev_capabilities = 0x000B, - - /* Proxy commands */ - i40e_aqc_opc_set_proxy_config = 0x0104, - i40e_aqc_opc_set_ns_proxy_table_entry = 0x0105, - - /* LAA */ - i40e_aqc_opc_mac_address_read = 0x0107, - i40e_aqc_opc_mac_address_write = 0x0108, - - /* PXE */ - i40e_aqc_opc_clear_pxe_mode = 0x0110, - - /* WoL commands */ - i40e_aqc_opc_set_wol_filter = 0x0120, - i40e_aqc_opc_get_wake_reason = 0x0121, - - /* internal switch commands */ - i40e_aqc_opc_get_switch_config = 0x0200, - i40e_aqc_opc_add_statistics = 0x0201, - i40e_aqc_opc_remove_statistics = 0x0202, - i40e_aqc_opc_set_port_parameters = 0x0203, - i40e_aqc_opc_get_switch_resource_alloc = 0x0204, - i40e_aqc_opc_set_switch_config = 0x0205, - i40e_aqc_opc_rx_ctl_reg_read = 0x0206, - i40e_aqc_opc_rx_ctl_reg_write = 0x0207, - - i40e_aqc_opc_add_vsi = 0x0210, - i40e_aqc_opc_update_vsi_parameters = 0x0211, - i40e_aqc_opc_get_vsi_parameters = 0x0212, - - i40e_aqc_opc_add_pv = 0x0220, - i40e_aqc_opc_update_pv_parameters = 0x0221, - i40e_aqc_opc_get_pv_parameters = 0x0222, - - i40e_aqc_opc_add_veb = 0x0230, - i40e_aqc_opc_update_veb_parameters = 0x0231, - i40e_aqc_opc_get_veb_parameters = 0x0232, - - i40e_aqc_opc_delete_element = 0x0243, - - i40e_aqc_opc_add_macvlan = 0x0250, - i40e_aqc_opc_remove_macvlan = 0x0251, - i40e_aqc_opc_add_vlan = 0x0252, - i40e_aqc_opc_remove_vlan = 0x0253, - i40e_aqc_opc_set_vsi_promiscuous_modes = 0x0254, - i40e_aqc_opc_add_tag = 0x0255, - i40e_aqc_opc_remove_tag = 0x0256, - i40e_aqc_opc_add_multicast_etag = 0x0257, - i40e_aqc_opc_remove_multicast_etag = 0x0258, - i40e_aqc_opc_update_tag = 0x0259, - i40e_aqc_opc_add_control_packet_filter = 0x025A, - i40e_aqc_opc_remove_control_packet_filter = 0x025B, - i40e_aqc_opc_add_cloud_filters = 0x025C, - i40e_aqc_opc_remove_cloud_filters = 0x025D, - i40e_aqc_opc_clear_wol_switch_filters = 0x025E, - - i40e_aqc_opc_add_mirror_rule = 0x0260, - i40e_aqc_opc_delete_mirror_rule = 0x0261, - - /* Dynamic Device Personalization */ - i40e_aqc_opc_write_personalization_profile = 0x0270, - i40e_aqc_opc_get_personalization_profile_list = 0x0271, - - /* DCB commands */ - i40e_aqc_opc_dcb_ignore_pfc = 0x0301, - i40e_aqc_opc_dcb_updated = 0x0302, - i40e_aqc_opc_set_dcb_parameters = 0x0303, - - /* TX scheduler */ - i40e_aqc_opc_configure_vsi_bw_limit = 0x0400, - i40e_aqc_opc_configure_vsi_ets_sla_bw_limit = 0x0406, - i40e_aqc_opc_configure_vsi_tc_bw = 0x0407, - i40e_aqc_opc_query_vsi_bw_config = 0x0408, - i40e_aqc_opc_query_vsi_ets_sla_config = 0x040A, - i40e_aqc_opc_configure_switching_comp_bw_limit = 0x0410, - - i40e_aqc_opc_enable_switching_comp_ets = 0x0413, - i40e_aqc_opc_modify_switching_comp_ets = 0x0414, - i40e_aqc_opc_disable_switching_comp_ets = 0x0415, - i40e_aqc_opc_configure_switching_comp_ets_bw_limit = 0x0416, - i40e_aqc_opc_configure_switching_comp_bw_config = 0x0417, - i40e_aqc_opc_query_switching_comp_ets_config = 0x0418, - i40e_aqc_opc_query_port_ets_config = 0x0419, - i40e_aqc_opc_query_switching_comp_bw_config = 0x041A, - i40e_aqc_opc_suspend_port_tx = 0x041B, - i40e_aqc_opc_resume_port_tx = 0x041C, - i40e_aqc_opc_configure_partition_bw = 0x041D, - /* hmc */ - i40e_aqc_opc_query_hmc_resource_profile = 0x0500, - i40e_aqc_opc_set_hmc_resource_profile = 0x0501, - - /* phy commands*/ - i40e_aqc_opc_get_phy_abilities = 0x0600, - i40e_aqc_opc_set_phy_config = 0x0601, - i40e_aqc_opc_set_mac_config = 0x0603, - i40e_aqc_opc_set_link_restart_an = 0x0605, - i40e_aqc_opc_get_link_status = 0x0607, - i40e_aqc_opc_set_phy_int_mask = 0x0613, - i40e_aqc_opc_get_local_advt_reg = 0x0614, - i40e_aqc_opc_set_local_advt_reg = 0x0615, - i40e_aqc_opc_get_partner_advt = 0x0616, - i40e_aqc_opc_set_lb_modes = 0x0618, - i40e_aqc_opc_get_phy_wol_caps = 0x0621, - i40e_aqc_opc_set_phy_debug = 0x0622, - i40e_aqc_opc_upload_ext_phy_fm = 0x0625, - i40e_aqc_opc_run_phy_activity = 0x0626, - i40e_aqc_opc_set_phy_register = 0x0628, - i40e_aqc_opc_get_phy_register = 0x0629, - - /* NVM commands */ - i40e_aqc_opc_nvm_read = 0x0701, - i40e_aqc_opc_nvm_erase = 0x0702, - i40e_aqc_opc_nvm_update = 0x0703, - i40e_aqc_opc_nvm_config_read = 0x0704, - i40e_aqc_opc_nvm_config_write = 0x0705, - i40e_aqc_opc_oem_post_update = 0x0720, - i40e_aqc_opc_thermal_sensor = 0x0721, - - /* virtualization commands */ - i40e_aqc_opc_send_msg_to_pf = 0x0801, - i40e_aqc_opc_send_msg_to_vf = 0x0802, - i40e_aqc_opc_send_msg_to_peer = 0x0803, - - /* alternate structure */ - i40e_aqc_opc_alternate_write = 0x0900, - i40e_aqc_opc_alternate_write_indirect = 0x0901, - i40e_aqc_opc_alternate_read = 0x0902, - i40e_aqc_opc_alternate_read_indirect = 0x0903, - i40e_aqc_opc_alternate_write_done = 0x0904, - i40e_aqc_opc_alternate_set_mode = 0x0905, - i40e_aqc_opc_alternate_clear_port = 0x0906, - - /* LLDP commands */ - i40e_aqc_opc_lldp_get_mib = 0x0A00, - i40e_aqc_opc_lldp_update_mib = 0x0A01, - i40e_aqc_opc_lldp_add_tlv = 0x0A02, - i40e_aqc_opc_lldp_update_tlv = 0x0A03, - i40e_aqc_opc_lldp_delete_tlv = 0x0A04, - i40e_aqc_opc_lldp_stop = 0x0A05, - i40e_aqc_opc_lldp_start = 0x0A06, - - /* Tunnel commands */ - i40e_aqc_opc_add_udp_tunnel = 0x0B00, - i40e_aqc_opc_del_udp_tunnel = 0x0B01, - i40e_aqc_opc_set_rss_key = 0x0B02, - i40e_aqc_opc_set_rss_lut = 0x0B03, - i40e_aqc_opc_get_rss_key = 0x0B04, - i40e_aqc_opc_get_rss_lut = 0x0B05, - - /* Async Events */ - i40e_aqc_opc_event_lan_overflow = 0x1001, - - /* OEM commands */ - i40e_aqc_opc_oem_parameter_change = 0xFE00, - i40e_aqc_opc_oem_device_status_change = 0xFE01, - i40e_aqc_opc_oem_ocsd_initialize = 0xFE02, - i40e_aqc_opc_oem_ocbb_initialize = 0xFE03, - - /* debug commands */ - i40e_aqc_opc_debug_read_reg = 0xFF03, - i40e_aqc_opc_debug_write_reg = 0xFF04, - i40e_aqc_opc_debug_modify_reg = 0xFF07, - i40e_aqc_opc_debug_dump_internals = 0xFF08, -}; - -/* command structures and indirect data structures */ - -/* Structure naming conventions: - * - no suffix for direct command descriptor structures - * - _data for indirect sent data - * - _resp for indirect return data (data which is both will use _data) - * - _completion for direct return data - * - _element_ for repeated elements (may also be _data or _resp) - * - * Command structures are expected to overlay the params.raw member of the basic - * descriptor, and as such cannot exceed 16 bytes in length. - */ - -/* This macro is used to generate a compilation error if a structure - * is not exactly the correct length. It gives a divide by zero error if the - * structure is not of the correct size, otherwise it creates an enum that is - * never used. - */ -#define I40E_CHECK_STRUCT_LEN(n, X) enum i40e_static_assert_enum_##X \ - { i40e_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) } - -/* This macro is used extensively to ensure that command structures are 16 - * bytes in length as they have to map to the raw array of that size. - */ -#define I40E_CHECK_CMD_LENGTH(X) I40E_CHECK_STRUCT_LEN(16, X) - -/* Queue Shutdown (direct 0x0003) */ -struct i40e_aqc_queue_shutdown { - __le32 driver_unloading; -#define I40E_AQ_DRIVER_UNLOADING 0x1 - u8 reserved[12]; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown); - -struct i40e_aqc_vsi_properties_data { - /* first 96 byte are written by SW */ - __le16 valid_sections; -#define I40E_AQ_VSI_PROP_SWITCH_VALID 0x0001 -#define I40E_AQ_VSI_PROP_SECURITY_VALID 0x0002 -#define I40E_AQ_VSI_PROP_VLAN_VALID 0x0004 -#define I40E_AQ_VSI_PROP_CAS_PV_VALID 0x0008 -#define I40E_AQ_VSI_PROP_INGRESS_UP_VALID 0x0010 -#define I40E_AQ_VSI_PROP_EGRESS_UP_VALID 0x0020 -#define I40E_AQ_VSI_PROP_QUEUE_MAP_VALID 0x0040 -#define I40E_AQ_VSI_PROP_QUEUE_OPT_VALID 0x0080 -#define I40E_AQ_VSI_PROP_OUTER_UP_VALID 0x0100 -#define I40E_AQ_VSI_PROP_SCHED_VALID 0x0200 - /* switch section */ - __le16 switch_id; /* 12bit id combined with flags below */ -#define I40E_AQ_VSI_SW_ID_SHIFT 0x0000 -#define I40E_AQ_VSI_SW_ID_MASK (0xFFF << I40E_AQ_VSI_SW_ID_SHIFT) -#define I40E_AQ_VSI_SW_ID_FLAG_NOT_STAG 0x1000 -#define I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB 0x2000 -#define I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB 0x4000 - u8 sw_reserved[2]; - /* security section */ - u8 sec_flags; -#define I40E_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD 0x01 -#define I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK 0x02 -#define I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK 0x04 - u8 sec_reserved; - /* VLAN section */ - __le16 pvid; /* VLANS include priority bits */ - __le16 fcoe_pvid; - u8 port_vlan_flags; -#define I40E_AQ_VSI_PVLAN_MODE_SHIFT 0x00 -#define I40E_AQ_VSI_PVLAN_MODE_MASK (0x03 << \ - I40E_AQ_VSI_PVLAN_MODE_SHIFT) -#define I40E_AQ_VSI_PVLAN_MODE_TAGGED 0x01 -#define I40E_AQ_VSI_PVLAN_MODE_UNTAGGED 0x02 -#define I40E_AQ_VSI_PVLAN_MODE_ALL 0x03 -#define I40E_AQ_VSI_PVLAN_INSERT_PVID 0x04 -#define I40E_AQ_VSI_PVLAN_EMOD_SHIFT 0x03 -#define I40E_AQ_VSI_PVLAN_EMOD_MASK (0x3 << \ - I40E_AQ_VSI_PVLAN_EMOD_SHIFT) -#define I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH 0x0 -#define I40E_AQ_VSI_PVLAN_EMOD_STR_UP 0x08 -#define I40E_AQ_VSI_PVLAN_EMOD_STR 0x10 -#define I40E_AQ_VSI_PVLAN_EMOD_NOTHING 0x18 - u8 pvlan_reserved[3]; - /* ingress egress up sections */ - __le32 ingress_table; /* bitmap, 3 bits per up */ -#define I40E_AQ_VSI_UP_TABLE_UP0_SHIFT 0 -#define I40E_AQ_VSI_UP_TABLE_UP0_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP0_SHIFT) -#define I40E_AQ_VSI_UP_TABLE_UP1_SHIFT 3 -#define I40E_AQ_VSI_UP_TABLE_UP1_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP1_SHIFT) -#define I40E_AQ_VSI_UP_TABLE_UP2_SHIFT 6 -#define I40E_AQ_VSI_UP_TABLE_UP2_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP2_SHIFT) -#define I40E_AQ_VSI_UP_TABLE_UP3_SHIFT 9 -#define I40E_AQ_VSI_UP_TABLE_UP3_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP3_SHIFT) -#define I40E_AQ_VSI_UP_TABLE_UP4_SHIFT 12 -#define I40E_AQ_VSI_UP_TABLE_UP4_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP4_SHIFT) -#define I40E_AQ_VSI_UP_TABLE_UP5_SHIFT 15 -#define I40E_AQ_VSI_UP_TABLE_UP5_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP5_SHIFT) -#define I40E_AQ_VSI_UP_TABLE_UP6_SHIFT 18 -#define I40E_AQ_VSI_UP_TABLE_UP6_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP6_SHIFT) -#define I40E_AQ_VSI_UP_TABLE_UP7_SHIFT 21 -#define I40E_AQ_VSI_UP_TABLE_UP7_MASK (0x7 << \ - I40E_AQ_VSI_UP_TABLE_UP7_SHIFT) - __le32 egress_table; /* same defines as for ingress table */ - /* cascaded PV section */ - __le16 cas_pv_tag; - u8 cas_pv_flags; -#define I40E_AQ_VSI_CAS_PV_TAGX_SHIFT 0x00 -#define I40E_AQ_VSI_CAS_PV_TAGX_MASK (0x03 << \ - I40E_AQ_VSI_CAS_PV_TAGX_SHIFT) -#define I40E_AQ_VSI_CAS_PV_TAGX_LEAVE 0x00 -#define I40E_AQ_VSI_CAS_PV_TAGX_REMOVE 0x01 -#define I40E_AQ_VSI_CAS_PV_TAGX_COPY 0x02 -#define I40E_AQ_VSI_CAS_PV_INSERT_TAG 0x10 -#define I40E_AQ_VSI_CAS_PV_ETAG_PRUNE 0x20 -#define I40E_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG 0x40 - u8 cas_pv_reserved; - /* queue mapping section */ - __le16 mapping_flags; -#define I40E_AQ_VSI_QUE_MAP_CONTIG 0x0 -#define I40E_AQ_VSI_QUE_MAP_NONCONTIG 0x1 - __le16 queue_mapping[16]; -#define I40E_AQ_VSI_QUEUE_SHIFT 0x0 -#define I40E_AQ_VSI_QUEUE_MASK (0x7FF << I40E_AQ_VSI_QUEUE_SHIFT) - __le16 tc_mapping[8]; -#define I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT 0 -#define I40E_AQ_VSI_TC_QUE_OFFSET_MASK (0x1FF << \ - I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) -#define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT 9 -#define I40E_AQ_VSI_TC_QUE_NUMBER_MASK (0x7 << \ - I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) - /* queueing option section */ - u8 queueing_opt_flags; -#define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA 0x04 -#define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA 0x08 -#define I40E_AQ_VSI_QUE_OPT_TCP_ENA 0x10 -#define I40E_AQ_VSI_QUE_OPT_FCOE_ENA 0x20 -#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF 0x00 -#define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI 0x40 - u8 queueing_opt_reserved[3]; - /* scheduler section */ - u8 up_enable_bits; - u8 sched_reserved; - /* outer up section */ - __le32 outer_up_table; /* same structure and defines as ingress tbl */ - u8 cmd_reserved[8]; - /* last 32 bytes are written by FW */ - __le16 qs_handle[8]; -#define I40E_AQ_VSI_QS_HANDLE_INVALID 0xFFFF - __le16 stat_counter_idx; - __le16 sched_id; - u8 resp_reserved[12]; -}; - -I40E_CHECK_STRUCT_LEN(128, i40e_aqc_vsi_properties_data); - -/* Get VEB Parameters (direct 0x0232) - * uses i40e_aqc_switch_seid for the descriptor - */ -struct i40e_aqc_get_veb_parameters_completion { - __le16 seid; - __le16 switch_id; - __le16 veb_flags; /* only the first/last flags from 0x0230 is valid */ - __le16 statistic_index; - __le16 vebs_used; - __le16 vebs_free; - u8 reserved[4]; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aqc_get_veb_parameters_completion); - -#define I40E_LINK_SPEED_100MB_SHIFT 0x1 -#define I40E_LINK_SPEED_1000MB_SHIFT 0x2 -#define I40E_LINK_SPEED_10GB_SHIFT 0x3 -#define I40E_LINK_SPEED_40GB_SHIFT 0x4 -#define I40E_LINK_SPEED_20GB_SHIFT 0x5 -#define I40E_LINK_SPEED_25GB_SHIFT 0x6 - -enum i40e_aq_link_speed { - I40E_LINK_SPEED_UNKNOWN = 0, - I40E_LINK_SPEED_100MB = BIT(I40E_LINK_SPEED_100MB_SHIFT), - I40E_LINK_SPEED_1GB = BIT(I40E_LINK_SPEED_1000MB_SHIFT), - I40E_LINK_SPEED_10GB = BIT(I40E_LINK_SPEED_10GB_SHIFT), - I40E_LINK_SPEED_40GB = BIT(I40E_LINK_SPEED_40GB_SHIFT), - I40E_LINK_SPEED_20GB = BIT(I40E_LINK_SPEED_20GB_SHIFT), - I40E_LINK_SPEED_25GB = BIT(I40E_LINK_SPEED_25GB_SHIFT), -}; - -/* Send to PF command (indirect 0x0801) id is only used by PF - * Send to VF command (indirect 0x0802) id is only used by PF - * Send to Peer PF command (indirect 0x0803) - */ -struct i40e_aqc_pf_vf_message { - __le32 id; - u8 reserved[4]; - __le32 addr_high; - __le32 addr_low; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aqc_pf_vf_message); - -struct i40e_aqc_get_set_rss_key { -#define I40E_AQC_SET_RSS_KEY_VSI_VALID BIT(15) -#define I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT 0 -#define I40E_AQC_SET_RSS_KEY_VSI_ID_MASK (0x3FF << \ - I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) - __le16 vsi_id; - u8 reserved[6]; - __le32 addr_high; - __le32 addr_low; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_key); - -struct i40e_aqc_get_set_rss_key_data { - u8 standard_rss_key[0x28]; - u8 extended_hash_key[0xc]; -}; - -I40E_CHECK_STRUCT_LEN(0x34, i40e_aqc_get_set_rss_key_data); - -struct i40e_aqc_get_set_rss_lut { -#define I40E_AQC_SET_RSS_LUT_VSI_VALID BIT(15) -#define I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT 0 -#define I40E_AQC_SET_RSS_LUT_VSI_ID_MASK (0x3FF << \ - I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) - __le16 vsi_id; -#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT 0 -#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \ - BIT(I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) - -#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI 0 -#define I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF 1 - __le16 flags; - u8 reserved[4]; - __le32 addr_high; - __le32 addr_low; -}; - -I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut); -#endif /* _I40E_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 272d76b733aa..9fc635d816d2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -109,7 +109,7 @@ struct iavf_q_vector { /* Helper macros to switch between ints/sec and what the register uses. * And yes, it's the same math going both ways. The lowest value - * supported by all of the i40e hardware is 8. + * supported by all of the iavf hardware is 8. */ #define EITR_INTS_PER_SEC_TO_REG(_eitr) \ ((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8) @@ -171,6 +171,7 @@ enum iavf_state_t { __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ __IAVF_INIT_SW, /* got resources, setting up structs */ __IAVF_RESETTING, /* in reset */ + __IAVF_COMM_FAILED, /* communication with PF failed */ /* Below here, watchdog is running */ __IAVF_DOWN, /* ready, can be opened */ __IAVF_DOWN_PENDING, /* descending, waiting for watchdog */ @@ -216,7 +217,6 @@ struct iavf_cloud_filter { /* board specific private data structure */ struct iavf_adapter { - struct timer_list watchdog_timer; struct work_struct reset_task; struct work_struct adminq_task; struct delayed_work client_task; @@ -244,7 +244,7 @@ struct iavf_adapter { int num_iwarp_msix; int iwarp_base_vector; u32 client_pending; - struct i40e_client_instance *cinst; + struct iavf_client_instance *cinst; struct msix_entry *msix_entries; u32 flags; @@ -303,7 +303,7 @@ struct iavf_adapter { enum iavf_state_t state; unsigned long crit_section; - struct work_struct watchdog_task; + struct delayed_work watchdog_task; bool netdev_registered; bool link_up; enum virtchnl_link_speed link_speed; @@ -351,7 +351,7 @@ struct iavf_adapter { /* Ethtool Private Flags */ /* lan device, used by client interface */ -struct i40e_device { +struct iavf_device { struct list_head list; struct iavf_adapter *vf; }; @@ -359,6 +359,7 @@ struct i40e_device { /* needed by iavf_ethtool.c */ extern char iavf_driver_name[]; extern const char iavf_driver_version[]; +extern struct workqueue_struct *iavf_wq; int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); @@ -402,7 +403,7 @@ void iavf_enable_vlan_stripping(struct iavf_adapter *adapter); void iavf_disable_vlan_stripping(struct iavf_adapter *adapter); void iavf_virtchnl_completion(struct iavf_adapter *adapter, enum virtchnl_ops v_opcode, - iavf_status v_retval, u8 *msg, u16 msglen); + enum iavf_status v_retval, u8 *msg, u16 msglen); int iavf_config_rss(struct iavf_adapter *adapter); int iavf_lan_add_device(struct iavf_adapter *adapter); int iavf_lan_del_device(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/i40e_adminq.c b/drivers/net/ethernet/intel/iavf/iavf_adminq.c index fca1ecfd9f71..9fa3fa99b4c2 100644 --- a/drivers/net/ethernet/intel/iavf/i40e_adminq.c +++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.c @@ -4,16 +4,16 @@ #include "iavf_status.h" #include "iavf_type.h" #include "iavf_register.h" -#include "i40e_adminq.h" +#include "iavf_adminq.h" #include "iavf_prototype.h" /** - * i40e_adminq_init_regs - Initialize AdminQ registers + * iavf_adminq_init_regs - Initialize AdminQ registers * @hw: pointer to the hardware structure * * This assumes the alloc_asq and alloc_arq functions have already been called **/ -static void i40e_adminq_init_regs(struct iavf_hw *hw) +static void iavf_adminq_init_regs(struct iavf_hw *hw) { /* set head and tail registers in our local struct */ hw->aq.asq.tail = IAVF_VF_ATQT1; @@ -29,24 +29,24 @@ static void i40e_adminq_init_regs(struct iavf_hw *hw) } /** - * i40e_alloc_adminq_asq_ring - Allocate Admin Queue send rings + * iavf_alloc_adminq_asq_ring - Allocate Admin Queue send rings * @hw: pointer to the hardware structure **/ -static iavf_status i40e_alloc_adminq_asq_ring(struct iavf_hw *hw) +static enum iavf_status iavf_alloc_adminq_asq_ring(struct iavf_hw *hw) { - iavf_status ret_code; + enum iavf_status ret_code; ret_code = iavf_allocate_dma_mem(hw, &hw->aq.asq.desc_buf, - i40e_mem_atq_ring, + iavf_mem_atq_ring, (hw->aq.num_asq_entries * - sizeof(struct i40e_aq_desc)), + sizeof(struct iavf_aq_desc)), IAVF_ADMINQ_DESC_ALIGNMENT); if (ret_code) return ret_code; ret_code = iavf_allocate_virt_mem(hw, &hw->aq.asq.cmd_buf, (hw->aq.num_asq_entries * - sizeof(struct i40e_asq_cmd_details))); + sizeof(struct iavf_asq_cmd_details))); if (ret_code) { iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf); return ret_code; @@ -56,55 +56,55 @@ static iavf_status i40e_alloc_adminq_asq_ring(struct iavf_hw *hw) } /** - * i40e_alloc_adminq_arq_ring - Allocate Admin Queue receive rings + * iavf_alloc_adminq_arq_ring - Allocate Admin Queue receive rings * @hw: pointer to the hardware structure **/ -static iavf_status i40e_alloc_adminq_arq_ring(struct iavf_hw *hw) +static enum iavf_status iavf_alloc_adminq_arq_ring(struct iavf_hw *hw) { - iavf_status ret_code; + enum iavf_status ret_code; ret_code = iavf_allocate_dma_mem(hw, &hw->aq.arq.desc_buf, - i40e_mem_arq_ring, + iavf_mem_arq_ring, (hw->aq.num_arq_entries * - sizeof(struct i40e_aq_desc)), + sizeof(struct iavf_aq_desc)), IAVF_ADMINQ_DESC_ALIGNMENT); return ret_code; } /** - * i40e_free_adminq_asq - Free Admin Queue send rings + * iavf_free_adminq_asq - Free Admin Queue send rings * @hw: pointer to the hardware structure * * This assumes the posted send buffers have already been cleaned * and de-allocated **/ -static void i40e_free_adminq_asq(struct iavf_hw *hw) +static void iavf_free_adminq_asq(struct iavf_hw *hw) { iavf_free_dma_mem(hw, &hw->aq.asq.desc_buf); } /** - * i40e_free_adminq_arq - Free Admin Queue receive rings + * iavf_free_adminq_arq - Free Admin Queue receive rings * @hw: pointer to the hardware structure * * This assumes the posted receive buffers have already been cleaned * and de-allocated **/ -static void i40e_free_adminq_arq(struct iavf_hw *hw) +static void iavf_free_adminq_arq(struct iavf_hw *hw) { iavf_free_dma_mem(hw, &hw->aq.arq.desc_buf); } /** - * i40e_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue + * iavf_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue * @hw: pointer to the hardware structure **/ -static iavf_status i40e_alloc_arq_bufs(struct iavf_hw *hw) +static enum iavf_status iavf_alloc_arq_bufs(struct iavf_hw *hw) { - struct i40e_aq_desc *desc; + struct iavf_aq_desc *desc; struct iavf_dma_mem *bi; - iavf_status ret_code; + enum iavf_status ret_code; int i; /* We'll be allocating the buffer info memory first, then we can @@ -123,7 +123,7 @@ static iavf_status i40e_alloc_arq_bufs(struct iavf_hw *hw) for (i = 0; i < hw->aq.num_arq_entries; i++) { bi = &hw->aq.arq.r.arq_bi[i]; ret_code = iavf_allocate_dma_mem(hw, bi, - i40e_mem_arq_buf, + iavf_mem_arq_buf, hw->aq.arq_buf_size, IAVF_ADMINQ_DESC_ALIGNMENT); if (ret_code) @@ -132,9 +132,9 @@ static iavf_status i40e_alloc_arq_bufs(struct iavf_hw *hw) /* now configure the descriptors for use */ desc = IAVF_ADMINQ_DESC(hw->aq.arq, i); - desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF); - if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF) - desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB); + desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF); + if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF) + desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB); desc->opcode = 0; /* This is in accordance with Admin queue design, there is no * register for buffer size configuration @@ -165,13 +165,13 @@ unwind_alloc_arq_bufs: } /** - * i40e_alloc_asq_bufs - Allocate empty buffer structs for the send queue + * iavf_alloc_asq_bufs - Allocate empty buffer structs for the send queue * @hw: pointer to the hardware structure **/ -static iavf_status i40e_alloc_asq_bufs(struct iavf_hw *hw) +static enum iavf_status iavf_alloc_asq_bufs(struct iavf_hw *hw) { struct iavf_dma_mem *bi; - iavf_status ret_code; + enum iavf_status ret_code; int i; /* No mapped memory needed yet, just the buffer info structures */ @@ -186,7 +186,7 @@ static iavf_status i40e_alloc_asq_bufs(struct iavf_hw *hw) for (i = 0; i < hw->aq.num_asq_entries; i++) { bi = &hw->aq.asq.r.asq_bi[i]; ret_code = iavf_allocate_dma_mem(hw, bi, - i40e_mem_asq_buf, + iavf_mem_asq_buf, hw->aq.asq_buf_size, IAVF_ADMINQ_DESC_ALIGNMENT); if (ret_code) @@ -206,10 +206,10 @@ unwind_alloc_asq_bufs: } /** - * i40e_free_arq_bufs - Free receive queue buffer info elements + * iavf_free_arq_bufs - Free receive queue buffer info elements * @hw: pointer to the hardware structure **/ -static void i40e_free_arq_bufs(struct iavf_hw *hw) +static void iavf_free_arq_bufs(struct iavf_hw *hw) { int i; @@ -225,10 +225,10 @@ static void i40e_free_arq_bufs(struct iavf_hw *hw) } /** - * i40e_free_asq_bufs - Free send queue buffer info elements + * iavf_free_asq_bufs - Free send queue buffer info elements * @hw: pointer to the hardware structure **/ -static void i40e_free_asq_bufs(struct iavf_hw *hw) +static void iavf_free_asq_bufs(struct iavf_hw *hw) { int i; @@ -248,14 +248,14 @@ static void i40e_free_asq_bufs(struct iavf_hw *hw) } /** - * i40e_config_asq_regs - configure ASQ registers + * iavf_config_asq_regs - configure ASQ registers * @hw: pointer to the hardware structure * * Configure base address and length registers for the transmit queue **/ -static iavf_status i40e_config_asq_regs(struct iavf_hw *hw) +static enum iavf_status iavf_config_asq_regs(struct iavf_hw *hw) { - iavf_status ret_code = 0; + enum iavf_status ret_code = 0; u32 reg = 0; /* Clear Head and Tail */ @@ -271,20 +271,20 @@ static iavf_status i40e_config_asq_regs(struct iavf_hw *hw) /* Check one register to verify that config was applied */ reg = rd32(hw, hw->aq.asq.bal); if (reg != lower_32_bits(hw->aq.asq.desc_buf.pa)) - ret_code = I40E_ERR_ADMIN_QUEUE_ERROR; + ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR; return ret_code; } /** - * i40e_config_arq_regs - ARQ register configuration + * iavf_config_arq_regs - ARQ register configuration * @hw: pointer to the hardware structure * * Configure base address and length registers for the receive (event queue) **/ -static iavf_status i40e_config_arq_regs(struct iavf_hw *hw) +static enum iavf_status iavf_config_arq_regs(struct iavf_hw *hw) { - iavf_status ret_code = 0; + enum iavf_status ret_code = 0; u32 reg = 0; /* Clear Head and Tail */ @@ -303,13 +303,13 @@ static iavf_status i40e_config_arq_regs(struct iavf_hw *hw) /* Check one register to verify that config was applied */ reg = rd32(hw, hw->aq.arq.bal); if (reg != lower_32_bits(hw->aq.arq.desc_buf.pa)) - ret_code = I40E_ERR_ADMIN_QUEUE_ERROR; + ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR; return ret_code; } /** - * i40e_init_asq - main initialization routine for ASQ + * iavf_init_asq - main initialization routine for ASQ * @hw: pointer to the hardware structure * * This is the main initialization routine for the Admin Send Queue @@ -321,20 +321,20 @@ static iavf_status i40e_config_arq_regs(struct iavf_hw *hw) * Do *NOT* hold the lock when calling this as the memory allocation routines * called are not going to be atomic context safe **/ -static iavf_status i40e_init_asq(struct iavf_hw *hw) +static enum iavf_status iavf_init_asq(struct iavf_hw *hw) { - iavf_status ret_code = 0; + enum iavf_status ret_code = 0; if (hw->aq.asq.count > 0) { /* queue already initialized */ - ret_code = I40E_ERR_NOT_READY; + ret_code = IAVF_ERR_NOT_READY; goto init_adminq_exit; } /* verify input for valid configuration */ if ((hw->aq.num_asq_entries == 0) || (hw->aq.asq_buf_size == 0)) { - ret_code = I40E_ERR_CONFIG; + ret_code = IAVF_ERR_CONFIG; goto init_adminq_exit; } @@ -342,17 +342,17 @@ static iavf_status i40e_init_asq(struct iavf_hw *hw) hw->aq.asq.next_to_clean = 0; /* allocate the ring memory */ - ret_code = i40e_alloc_adminq_asq_ring(hw); + ret_code = iavf_alloc_adminq_asq_ring(hw); if (ret_code) goto init_adminq_exit; /* allocate buffers in the rings */ - ret_code = i40e_alloc_asq_bufs(hw); + ret_code = iavf_alloc_asq_bufs(hw); if (ret_code) goto init_adminq_free_rings; /* initialize base registers */ - ret_code = i40e_config_asq_regs(hw); + ret_code = iavf_config_asq_regs(hw); if (ret_code) goto init_adminq_free_rings; @@ -361,14 +361,14 @@ static iavf_status i40e_init_asq(struct iavf_hw *hw) goto init_adminq_exit; init_adminq_free_rings: - i40e_free_adminq_asq(hw); + iavf_free_adminq_asq(hw); init_adminq_exit: return ret_code; } /** - * i40e_init_arq - initialize ARQ + * iavf_init_arq - initialize ARQ * @hw: pointer to the hardware structure * * The main initialization routine for the Admin Receive (Event) Queue. @@ -380,20 +380,20 @@ init_adminq_exit: * Do *NOT* hold the lock when calling this as the memory allocation routines * called are not going to be atomic context safe **/ -static iavf_status i40e_init_arq(struct iavf_hw *hw) +static enum iavf_status iavf_init_arq(struct iavf_hw *hw) { - iavf_status ret_code = 0; + enum iavf_status ret_code = 0; if (hw->aq.arq.count > 0) { /* queue already initialized */ - ret_code = I40E_ERR_NOT_READY; + ret_code = IAVF_ERR_NOT_READY; goto init_adminq_exit; } /* verify input for valid configuration */ if ((hw->aq.num_arq_entries == 0) || (hw->aq.arq_buf_size == 0)) { - ret_code = I40E_ERR_CONFIG; + ret_code = IAVF_ERR_CONFIG; goto init_adminq_exit; } @@ -401,17 +401,17 @@ static iavf_status i40e_init_arq(struct iavf_hw *hw) hw->aq.arq.next_to_clean = 0; /* allocate the ring memory */ - ret_code = i40e_alloc_adminq_arq_ring(hw); + ret_code = iavf_alloc_adminq_arq_ring(hw); if (ret_code) goto init_adminq_exit; /* allocate buffers in the rings */ - ret_code = i40e_alloc_arq_bufs(hw); + ret_code = iavf_alloc_arq_bufs(hw); if (ret_code) goto init_adminq_free_rings; /* initialize base registers */ - ret_code = i40e_config_arq_regs(hw); + ret_code = iavf_config_arq_regs(hw); if (ret_code) goto init_adminq_free_rings; @@ -420,26 +420,26 @@ static iavf_status i40e_init_arq(struct iavf_hw *hw) goto init_adminq_exit; init_adminq_free_rings: - i40e_free_adminq_arq(hw); + iavf_free_adminq_arq(hw); init_adminq_exit: return ret_code; } /** - * i40e_shutdown_asq - shutdown the ASQ + * iavf_shutdown_asq - shutdown the ASQ * @hw: pointer to the hardware structure * * The main shutdown routine for the Admin Send Queue **/ -static iavf_status i40e_shutdown_asq(struct iavf_hw *hw) +static enum iavf_status iavf_shutdown_asq(struct iavf_hw *hw) { - iavf_status ret_code = 0; + enum iavf_status ret_code = 0; mutex_lock(&hw->aq.asq_mutex); if (hw->aq.asq.count == 0) { - ret_code = I40E_ERR_NOT_READY; + ret_code = IAVF_ERR_NOT_READY; goto shutdown_asq_out; } @@ -453,7 +453,7 @@ static iavf_status i40e_shutdown_asq(struct iavf_hw *hw) hw->aq.asq.count = 0; /* to indicate uninitialized queue */ /* free ring buffers */ - i40e_free_asq_bufs(hw); + iavf_free_asq_bufs(hw); shutdown_asq_out: mutex_unlock(&hw->aq.asq_mutex); @@ -461,19 +461,19 @@ shutdown_asq_out: } /** - * i40e_shutdown_arq - shutdown ARQ + * iavf_shutdown_arq - shutdown ARQ * @hw: pointer to the hardware structure * * The main shutdown routine for the Admin Receive Queue **/ -static iavf_status i40e_shutdown_arq(struct iavf_hw *hw) +static enum iavf_status iavf_shutdown_arq(struct iavf_hw *hw) { - iavf_status ret_code = 0; + enum iavf_status ret_code = 0; mutex_lock(&hw->aq.arq_mutex); if (hw->aq.arq.count == 0) { - ret_code = I40E_ERR_NOT_READY; + ret_code = IAVF_ERR_NOT_READY; goto shutdown_arq_out; } @@ -487,7 +487,7 @@ static iavf_status i40e_shutdown_arq(struct iavf_hw *hw) hw->aq.arq.count = 0; /* to indicate uninitialized queue */ /* free ring buffers */ - i40e_free_arq_bufs(hw); + iavf_free_arq_bufs(hw); shutdown_arq_out: mutex_unlock(&hw->aq.arq_mutex); @@ -505,32 +505,32 @@ shutdown_arq_out: * - hw->aq.arq_buf_size * - hw->aq.asq_buf_size **/ -iavf_status iavf_init_adminq(struct iavf_hw *hw) +enum iavf_status iavf_init_adminq(struct iavf_hw *hw) { - iavf_status ret_code; + enum iavf_status ret_code; /* verify input for valid configuration */ if ((hw->aq.num_arq_entries == 0) || (hw->aq.num_asq_entries == 0) || (hw->aq.arq_buf_size == 0) || (hw->aq.asq_buf_size == 0)) { - ret_code = I40E_ERR_CONFIG; + ret_code = IAVF_ERR_CONFIG; goto init_adminq_exit; } /* Set up register offsets */ - i40e_adminq_init_regs(hw); + iavf_adminq_init_regs(hw); /* setup ASQ command write back timeout */ - hw->aq.asq_cmd_timeout = I40E_ASQ_CMD_TIMEOUT; + hw->aq.asq_cmd_timeout = IAVF_ASQ_CMD_TIMEOUT; /* allocate the ASQ */ - ret_code = i40e_init_asq(hw); + ret_code = iavf_init_asq(hw); if (ret_code) goto init_adminq_destroy_locks; /* allocate the ARQ */ - ret_code = i40e_init_arq(hw); + ret_code = iavf_init_arq(hw); if (ret_code) goto init_adminq_free_asq; @@ -538,7 +538,7 @@ iavf_status iavf_init_adminq(struct iavf_hw *hw) goto init_adminq_exit; init_adminq_free_asq: - i40e_shutdown_asq(hw); + iavf_shutdown_asq(hw); init_adminq_destroy_locks: init_adminq_exit: @@ -549,53 +549,53 @@ init_adminq_exit: * iavf_shutdown_adminq - shutdown routine for the Admin Queue * @hw: pointer to the hardware structure **/ -iavf_status iavf_shutdown_adminq(struct iavf_hw *hw) +enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw) { - iavf_status ret_code = 0; + enum iavf_status ret_code = 0; if (iavf_check_asq_alive(hw)) iavf_aq_queue_shutdown(hw, true); - i40e_shutdown_asq(hw); - i40e_shutdown_arq(hw); + iavf_shutdown_asq(hw); + iavf_shutdown_arq(hw); return ret_code; } /** - * i40e_clean_asq - cleans Admin send queue + * iavf_clean_asq - cleans Admin send queue * @hw: pointer to the hardware structure * * returns the number of free desc **/ -static u16 i40e_clean_asq(struct iavf_hw *hw) +static u16 iavf_clean_asq(struct iavf_hw *hw) { struct iavf_adminq_ring *asq = &hw->aq.asq; - struct i40e_asq_cmd_details *details; + struct iavf_asq_cmd_details *details; u16 ntc = asq->next_to_clean; - struct i40e_aq_desc desc_cb; - struct i40e_aq_desc *desc; + struct iavf_aq_desc desc_cb; + struct iavf_aq_desc *desc; desc = IAVF_ADMINQ_DESC(*asq, ntc); - details = I40E_ADMINQ_DETAILS(*asq, ntc); + details = IAVF_ADMINQ_DETAILS(*asq, ntc); while (rd32(hw, hw->aq.asq.head) != ntc) { iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "ntc %d head %d.\n", ntc, rd32(hw, hw->aq.asq.head)); if (details->callback) { - I40E_ADMINQ_CALLBACK cb_func = - (I40E_ADMINQ_CALLBACK)details->callback; + IAVF_ADMINQ_CALLBACK cb_func = + (IAVF_ADMINQ_CALLBACK)details->callback; desc_cb = *desc; cb_func(hw, &desc_cb); } - memset((void *)desc, 0, sizeof(struct i40e_aq_desc)); + memset((void *)desc, 0, sizeof(struct iavf_aq_desc)); memset((void *)details, 0, - sizeof(struct i40e_asq_cmd_details)); + sizeof(struct iavf_asq_cmd_details)); ntc++; if (ntc == asq->count) ntc = 0; desc = IAVF_ADMINQ_DESC(*asq, ntc); - details = I40E_ADMINQ_DETAILS(*asq, ntc); + details = IAVF_ADMINQ_DETAILS(*asq, ntc); } asq->next_to_clean = ntc; @@ -629,16 +629,17 @@ bool iavf_asq_done(struct iavf_hw *hw) * This is the main send command driver routine for the Admin Queue send * queue. It runs the queue, cleans the queue, etc **/ -iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, - void *buff, /* can be NULL */ - u16 buff_size, - struct i40e_asq_cmd_details *cmd_details) +enum iavf_status iavf_asq_send_command(struct iavf_hw *hw, + struct iavf_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct iavf_asq_cmd_details *cmd_details) { struct iavf_dma_mem *dma_buff = NULL; - struct i40e_asq_cmd_details *details; - struct i40e_aq_desc *desc_on_ring; + struct iavf_asq_cmd_details *details; + struct iavf_aq_desc *desc_on_ring; bool cmd_completed = false; - iavf_status status = 0; + enum iavf_status status = 0; u16 retval = 0; u32 val = 0; @@ -647,21 +648,21 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, if (hw->aq.asq.count == 0) { iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: Admin queue not initialized.\n"); - status = I40E_ERR_QUEUE_EMPTY; + status = IAVF_ERR_QUEUE_EMPTY; goto asq_send_command_error; } - hw->aq.asq_last_status = I40E_AQ_RC_OK; + hw->aq.asq_last_status = IAVF_AQ_RC_OK; val = rd32(hw, hw->aq.asq.head); if (val >= hw->aq.num_asq_entries) { iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: head overrun at %d\n", val); - status = I40E_ERR_QUEUE_EMPTY; + status = IAVF_ERR_QUEUE_EMPTY; goto asq_send_command_error; } - details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use); + details = IAVF_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use); if (cmd_details) { *details = *cmd_details; @@ -676,7 +677,7 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, cpu_to_le32(lower_32_bits(details->cookie)); } } else { - memset(details, 0, sizeof(struct i40e_asq_cmd_details)); + memset(details, 0, sizeof(struct iavf_asq_cmd_details)); } /* clear requested flags and then set additional flags if defined */ @@ -688,7 +689,7 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, IAVF_DEBUG_AQ_MESSAGE, "AQTX: Invalid buffer size: %d.\n", buff_size); - status = I40E_ERR_INVALID_SIZE; + status = IAVF_ERR_INVALID_SIZE; goto asq_send_command_error; } @@ -696,7 +697,7 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: Async flag not set along with postpone flag"); - status = I40E_ERR_PARAM; + status = IAVF_ERR_PARAM; goto asq_send_command_error; } @@ -707,11 +708,11 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, /* the clean function called here could be called in a separate thread * in case of asynchronous completions */ - if (i40e_clean_asq(hw) == 0) { + if (iavf_clean_asq(hw) == 0) { iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: Error queue is full.\n"); - status = I40E_ERR_ADMIN_QUEUE_FULL; + status = IAVF_ERR_ADMIN_QUEUE_FULL; goto asq_send_command_error; } @@ -780,13 +781,13 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, retval &= 0xff; } cmd_completed = true; - if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_OK) + if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_OK) status = 0; - else if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_EBUSY) - status = I40E_ERR_NOT_READY; + else if ((enum iavf_admin_queue_err)retval == IAVF_AQ_RC_EBUSY) + status = IAVF_ERR_NOT_READY; else - status = I40E_ERR_ADMIN_QUEUE_ERROR; - hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval; + status = IAVF_ERR_ADMIN_QUEUE_ERROR; + hw->aq.asq_last_status = (enum iavf_admin_queue_err)retval; } iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, @@ -803,11 +804,11 @@ iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, if (rd32(hw, hw->aq.asq.len) & IAVF_VF_ATQLEN1_ATQCRIT_MASK) { iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: AQ Critical error.\n"); - status = I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR; + status = IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR; } else { iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQTX: Writeback timeout.\n"); - status = I40E_ERR_ADMIN_QUEUE_TIMEOUT; + status = IAVF_ERR_ADMIN_QUEUE_TIMEOUT; } } @@ -823,12 +824,12 @@ asq_send_command_error: * * Fill the desc with default values **/ -void iavf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc, u16 opcode) +void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode) { /* zero out the desc */ - memset((void *)desc, 0, sizeof(struct i40e_aq_desc)); + memset((void *)desc, 0, sizeof(struct iavf_aq_desc)); desc->opcode = cpu_to_le16(opcode); - desc->flags = cpu_to_le16(I40E_AQ_FLAG_SI); + desc->flags = cpu_to_le16(IAVF_AQ_FLAG_SI); } /** @@ -841,13 +842,13 @@ void iavf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc, u16 opcode) * the contents through e. It can also return how many events are * left to process through 'pending' **/ -iavf_status iavf_clean_arq_element(struct iavf_hw *hw, - struct i40e_arq_event_info *e, - u16 *pending) +enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw, + struct iavf_arq_event_info *e, + u16 *pending) { u16 ntc = hw->aq.arq.next_to_clean; - struct i40e_aq_desc *desc; - iavf_status ret_code = 0; + struct iavf_aq_desc *desc; + enum iavf_status ret_code = 0; struct iavf_dma_mem *bi; u16 desc_idx; u16 datalen; @@ -863,7 +864,7 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw, if (hw->aq.arq.count == 0) { iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQRX: Admin queue not initialized.\n"); - ret_code = I40E_ERR_QUEUE_EMPTY; + ret_code = IAVF_ERR_QUEUE_EMPTY; goto clean_arq_element_err; } @@ -871,7 +872,7 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw, ntu = rd32(hw, hw->aq.arq.head) & IAVF_VF_ARQH1_ARQH_MASK; if (ntu == ntc) { /* nothing to do - shouldn't need to update ring's values */ - ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK; + ret_code = IAVF_ERR_ADMIN_QUEUE_NO_WORK; goto clean_arq_element_out; } @@ -880,10 +881,10 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw, desc_idx = ntc; hw->aq.arq_last_status = - (enum i40e_admin_queue_err)le16_to_cpu(desc->retval); + (enum iavf_admin_queue_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); - if (flags & I40E_AQ_FLAG_ERR) { - ret_code = I40E_ERR_ADMIN_QUEUE_ERROR; + if (flags & IAVF_AQ_FLAG_ERR) { + ret_code = IAVF_ERR_ADMIN_QUEUE_ERROR; iavf_debug(hw, IAVF_DEBUG_AQ_MESSAGE, "AQRX: Event received with error 0x%X.\n", @@ -906,11 +907,11 @@ iavf_status iavf_clean_arq_element(struct iavf_hw *hw, * size */ bi = &hw->aq.arq.r.arq_bi[ntc]; - memset((void *)desc, 0, sizeof(struct i40e_aq_desc)); + memset((void *)desc, 0, sizeof(struct iavf_aq_desc)); - desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF); - if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF) - desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB); + desc->flags = cpu_to_le16(IAVF_AQ_FLAG_BUF); + if (hw->aq.arq_buf_size > IAVF_AQ_LARGE_BUF) + desc->flags |= cpu_to_le16(IAVF_AQ_FLAG_LB); desc->datalen = cpu_to_le16((u16)bi->size); desc->params.external.addr_high = cpu_to_le32(upper_32_bits(bi->pa)); desc->params.external.addr_low = cpu_to_le32(lower_32_bits(bi->pa)); diff --git a/drivers/net/ethernet/intel/iavf/i40e_adminq.h b/drivers/net/ethernet/intel/iavf/iavf_adminq.h index ee983889eab0..baf2fe26f302 100644 --- a/drivers/net/ethernet/intel/iavf/i40e_adminq.h +++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.h @@ -6,10 +6,10 @@ #include "iavf_osdep.h" #include "iavf_status.h" -#include "i40e_adminq_cmd.h" +#include "iavf_adminq_cmd.h" #define IAVF_ADMINQ_DESC(R, i) \ - (&(((struct i40e_aq_desc *)((R).desc_buf.va))[i])) + (&(((struct iavf_aq_desc *)((R).desc_buf.va))[i])) #define IAVF_ADMINQ_DESC_ALIGNMENT 4096 @@ -39,22 +39,22 @@ struct iavf_adminq_ring { }; /* ASQ transaction details */ -struct i40e_asq_cmd_details { - void *callback; /* cast from type I40E_ADMINQ_CALLBACK */ +struct iavf_asq_cmd_details { + void *callback; /* cast from type IAVF_ADMINQ_CALLBACK */ u64 cookie; u16 flags_ena; u16 flags_dis; bool async; bool postpone; - struct i40e_aq_desc *wb_desc; + struct iavf_aq_desc *wb_desc; }; -#define I40E_ADMINQ_DETAILS(R, i) \ - (&(((struct i40e_asq_cmd_details *)((R).cmd_buf.va))[i])) +#define IAVF_ADMINQ_DETAILS(R, i) \ + (&(((struct iavf_asq_cmd_details *)((R).cmd_buf.va))[i])) /* ARQ event information */ -struct i40e_arq_event_info { - struct i40e_aq_desc desc; +struct iavf_arq_event_info { + struct iavf_aq_desc desc; u16 msg_len; u16 buf_len; u8 *msg_buf; @@ -79,45 +79,45 @@ struct iavf_adminq_info { struct mutex arq_mutex; /* Receive queue lock */ /* last status values on send and receive queues */ - enum i40e_admin_queue_err asq_last_status; - enum i40e_admin_queue_err arq_last_status; + enum iavf_admin_queue_err asq_last_status; + enum iavf_admin_queue_err arq_last_status; }; /** - * i40e_aq_rc_to_posix - convert errors to user-land codes + * iavf_aq_rc_to_posix - convert errors to user-land codes * aq_ret: AdminQ handler error code can override aq_rc * aq_rc: AdminQ firmware error code to convert **/ -static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc) +static inline int iavf_aq_rc_to_posix(int aq_ret, int aq_rc) { int aq_to_posix[] = { - 0, /* I40E_AQ_RC_OK */ - -EPERM, /* I40E_AQ_RC_EPERM */ - -ENOENT, /* I40E_AQ_RC_ENOENT */ - -ESRCH, /* I40E_AQ_RC_ESRCH */ - -EINTR, /* I40E_AQ_RC_EINTR */ - -EIO, /* I40E_AQ_RC_EIO */ - -ENXIO, /* I40E_AQ_RC_ENXIO */ - -E2BIG, /* I40E_AQ_RC_E2BIG */ - -EAGAIN, /* I40E_AQ_RC_EAGAIN */ - -ENOMEM, /* I40E_AQ_RC_ENOMEM */ - -EACCES, /* I40E_AQ_RC_EACCES */ - -EFAULT, /* I40E_AQ_RC_EFAULT */ - -EBUSY, /* I40E_AQ_RC_EBUSY */ - -EEXIST, /* I40E_AQ_RC_EEXIST */ - -EINVAL, /* I40E_AQ_RC_EINVAL */ - -ENOTTY, /* I40E_AQ_RC_ENOTTY */ - -ENOSPC, /* I40E_AQ_RC_ENOSPC */ - -ENOSYS, /* I40E_AQ_RC_ENOSYS */ - -ERANGE, /* I40E_AQ_RC_ERANGE */ - -EPIPE, /* I40E_AQ_RC_EFLUSHED */ - -ESPIPE, /* I40E_AQ_RC_BAD_ADDR */ - -EROFS, /* I40E_AQ_RC_EMODE */ - -EFBIG, /* I40E_AQ_RC_EFBIG */ + 0, /* IAVF_AQ_RC_OK */ + -EPERM, /* IAVF_AQ_RC_EPERM */ + -ENOENT, /* IAVF_AQ_RC_ENOENT */ + -ESRCH, /* IAVF_AQ_RC_ESRCH */ + -EINTR, /* IAVF_AQ_RC_EINTR */ + -EIO, /* IAVF_AQ_RC_EIO */ + -ENXIO, /* IAVF_AQ_RC_ENXIO */ + -E2BIG, /* IAVF_AQ_RC_E2BIG */ + -EAGAIN, /* IAVF_AQ_RC_EAGAIN */ + -ENOMEM, /* IAVF_AQ_RC_ENOMEM */ + -EACCES, /* IAVF_AQ_RC_EACCES */ + -EFAULT, /* IAVF_AQ_RC_EFAULT */ + -EBUSY, /* IAVF_AQ_RC_EBUSY */ + -EEXIST, /* IAVF_AQ_RC_EEXIST */ + -EINVAL, /* IAVF_AQ_RC_EINVAL */ + -ENOTTY, /* IAVF_AQ_RC_ENOTTY */ + -ENOSPC, /* IAVF_AQ_RC_ENOSPC */ + -ENOSYS, /* IAVF_AQ_RC_ENOSYS */ + -ERANGE, /* IAVF_AQ_RC_ERANGE */ + -EPIPE, /* IAVF_AQ_RC_EFLUSHED */ + -ESPIPE, /* IAVF_AQ_RC_BAD_ADDR */ + -EROFS, /* IAVF_AQ_RC_EMODE */ + -EFBIG, /* IAVF_AQ_RC_EFBIG */ }; /* aq_rc is invalid if AQ timed out */ - if (aq_ret == I40E_ERR_ADMIN_QUEUE_TIMEOUT) + if (aq_ret == IAVF_ERR_ADMIN_QUEUE_TIMEOUT) return -EAGAIN; if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0])))) @@ -127,9 +127,9 @@ static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc) } /* general information */ -#define I40E_AQ_LARGE_BUF 512 -#define I40E_ASQ_CMD_TIMEOUT 250000 /* usecs */ +#define IAVF_AQ_LARGE_BUF 512 +#define IAVF_ASQ_CMD_TIMEOUT 250000 /* usecs */ -void iavf_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc, u16 opcode); +void iavf_fill_default_direct_cmd_desc(struct iavf_aq_desc *desc, u16 opcode); #endif /* _IAVF_ADMINQ_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h new file mode 100644 index 000000000000..bc512308557b --- /dev/null +++ b/drivers/net/ethernet/intel/iavf/iavf_adminq_cmd.h @@ -0,0 +1,528 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2013 - 2018 Intel Corporation. */ + +#ifndef _IAVF_ADMINQ_CMD_H_ +#define _IAVF_ADMINQ_CMD_H_ + +/* This header file defines the iavf Admin Queue commands and is shared between + * iavf Firmware and Software. + * + * This file needs to comply with the Linux Kernel coding style. + */ + +#define IAVF_FW_API_VERSION_MAJOR 0x0001 +#define IAVF_FW_API_VERSION_MINOR_X722 0x0005 +#define IAVF_FW_API_VERSION_MINOR_X710 0x0008 + +#define IAVF_FW_MINOR_VERSION(_h) ((_h)->mac.type == IAVF_MAC_XL710 ? \ + IAVF_FW_API_VERSION_MINOR_X710 : \ + IAVF_FW_API_VERSION_MINOR_X722) + +/* API version 1.7 implements additional link and PHY-specific APIs */ +#define IAVF_MINOR_VER_GET_LINK_INFO_XL710 0x0007 + +struct iavf_aq_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 retval; + __le32 cookie_high; + __le32 cookie_low; + union { + struct { + __le32 param0; + __le32 param1; + __le32 param2; + __le32 param3; + } internal; + struct { + __le32 param0; + __le32 param1; + __le32 addr_high; + __le32 addr_low; + } external; + u8 raw[16]; + } params; +}; + +/* Flags sub-structure + * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 | + * |DD |CMP|ERR|VFE| * * RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE | + */ + +/* command flags and offsets*/ +#define IAVF_AQ_FLAG_DD_SHIFT 0 +#define IAVF_AQ_FLAG_CMP_SHIFT 1 +#define IAVF_AQ_FLAG_ERR_SHIFT 2 +#define IAVF_AQ_FLAG_VFE_SHIFT 3 +#define IAVF_AQ_FLAG_LB_SHIFT 9 +#define IAVF_AQ_FLAG_RD_SHIFT 10 +#define IAVF_AQ_FLAG_VFC_SHIFT 11 +#define IAVF_AQ_FLAG_BUF_SHIFT 12 +#define IAVF_AQ_FLAG_SI_SHIFT 13 +#define IAVF_AQ_FLAG_EI_SHIFT 14 +#define IAVF_AQ_FLAG_FE_SHIFT 15 + +#define IAVF_AQ_FLAG_DD BIT(IAVF_AQ_FLAG_DD_SHIFT) /* 0x1 */ +#define IAVF_AQ_FLAG_CMP BIT(IAVF_AQ_FLAG_CMP_SHIFT) /* 0x2 */ +#define IAVF_AQ_FLAG_ERR BIT(IAVF_AQ_FLAG_ERR_SHIFT) /* 0x4 */ +#define IAVF_AQ_FLAG_VFE BIT(IAVF_AQ_FLAG_VFE_SHIFT) /* 0x8 */ +#define IAVF_AQ_FLAG_LB BIT(IAVF_AQ_FLAG_LB_SHIFT) /* 0x200 */ +#define IAVF_AQ_FLAG_RD BIT(IAVF_AQ_FLAG_RD_SHIFT) /* 0x400 */ +#define IAVF_AQ_FLAG_VFC BIT(IAVF_AQ_FLAG_VFC_SHIFT) /* 0x800 */ +#define IAVF_AQ_FLAG_BUF BIT(IAVF_AQ_FLAG_BUF_SHIFT) /* 0x1000 */ +#define IAVF_AQ_FLAG_SI BIT(IAVF_AQ_FLAG_SI_SHIFT) /* 0x2000 */ +#define IAVF_AQ_FLAG_EI BIT(IAVF_AQ_FLAG_EI_SHIFT) /* 0x4000 */ +#define IAVF_AQ_FLAG_FE BIT(IAVF_AQ_FLAG_FE_SHIFT) /* 0x8000 */ + +/* error codes */ +enum iavf_admin_queue_err { + IAVF_AQ_RC_OK = 0, /* success */ + IAVF_AQ_RC_EPERM = 1, /* Operation not permitted */ + IAVF_AQ_RC_ENOENT = 2, /* No such element */ + IAVF_AQ_RC_ESRCH = 3, /* Bad opcode */ + IAVF_AQ_RC_EINTR = 4, /* operation interrupted */ + IAVF_AQ_RC_EIO = 5, /* I/O error */ + IAVF_AQ_RC_ENXIO = 6, /* No such resource */ + IAVF_AQ_RC_E2BIG = 7, /* Arg too long */ + IAVF_AQ_RC_EAGAIN = 8, /* Try again */ + IAVF_AQ_RC_ENOMEM = 9, /* Out of memory */ + IAVF_AQ_RC_EACCES = 10, /* Permission denied */ + IAVF_AQ_RC_EFAULT = 11, /* Bad address */ + IAVF_AQ_RC_EBUSY = 12, /* Device or resource busy */ + IAVF_AQ_RC_EEXIST = 13, /* object already exists */ + IAVF_AQ_RC_EINVAL = 14, /* Invalid argument */ + IAVF_AQ_RC_ENOTTY = 15, /* Not a typewriter */ + IAVF_AQ_RC_ENOSPC = 16, /* No space left or alloc failure */ + IAVF_AQ_RC_ENOSYS = 17, /* Function not implemented */ + IAVF_AQ_RC_ERANGE = 18, /* Parameter out of range */ + IAVF_AQ_RC_EFLUSHED = 19, /* Cmd flushed due to prev cmd error */ + IAVF_AQ_RC_BAD_ADDR = 20, /* Descriptor contains a bad pointer */ + IAVF_AQ_RC_EMODE = 21, /* Op not allowed in current dev mode */ + IAVF_AQ_RC_EFBIG = 22, /* File too large */ +}; + +/* Admin Queue command opcodes */ +enum iavf_admin_queue_opc { + /* aq commands */ + iavf_aqc_opc_get_version = 0x0001, + iavf_aqc_opc_driver_version = 0x0002, + iavf_aqc_opc_queue_shutdown = 0x0003, + iavf_aqc_opc_set_pf_context = 0x0004, + + /* resource ownership */ + iavf_aqc_opc_request_resource = 0x0008, + iavf_aqc_opc_release_resource = 0x0009, + + iavf_aqc_opc_list_func_capabilities = 0x000A, + iavf_aqc_opc_list_dev_capabilities = 0x000B, + + /* Proxy commands */ + iavf_aqc_opc_set_proxy_config = 0x0104, + iavf_aqc_opc_set_ns_proxy_table_entry = 0x0105, + + /* LAA */ + iavf_aqc_opc_mac_address_read = 0x0107, + iavf_aqc_opc_mac_address_write = 0x0108, + + /* PXE */ + iavf_aqc_opc_clear_pxe_mode = 0x0110, + + /* WoL commands */ + iavf_aqc_opc_set_wol_filter = 0x0120, + iavf_aqc_opc_get_wake_reason = 0x0121, + + /* internal switch commands */ + iavf_aqc_opc_get_switch_config = 0x0200, + iavf_aqc_opc_add_statistics = 0x0201, + iavf_aqc_opc_remove_statistics = 0x0202, + iavf_aqc_opc_set_port_parameters = 0x0203, + iavf_aqc_opc_get_switch_resource_alloc = 0x0204, + iavf_aqc_opc_set_switch_config = 0x0205, + iavf_aqc_opc_rx_ctl_reg_read = 0x0206, + iavf_aqc_opc_rx_ctl_reg_write = 0x0207, + + iavf_aqc_opc_add_vsi = 0x0210, + iavf_aqc_opc_update_vsi_parameters = 0x0211, + iavf_aqc_opc_get_vsi_parameters = 0x0212, + + iavf_aqc_opc_add_pv = 0x0220, + iavf_aqc_opc_update_pv_parameters = 0x0221, + iavf_aqc_opc_get_pv_parameters = 0x0222, + + iavf_aqc_opc_add_veb = 0x0230, + iavf_aqc_opc_update_veb_parameters = 0x0231, + iavf_aqc_opc_get_veb_parameters = 0x0232, + + iavf_aqc_opc_delete_element = 0x0243, + + iavf_aqc_opc_add_macvlan = 0x0250, + iavf_aqc_opc_remove_macvlan = 0x0251, + iavf_aqc_opc_add_vlan = 0x0252, + iavf_aqc_opc_remove_vlan = 0x0253, + iavf_aqc_opc_set_vsi_promiscuous_modes = 0x0254, + iavf_aqc_opc_add_tag = 0x0255, + iavf_aqc_opc_remove_tag = 0x0256, + iavf_aqc_opc_add_multicast_etag = 0x0257, + iavf_aqc_opc_remove_multicast_etag = 0x0258, + iavf_aqc_opc_update_tag = 0x0259, + iavf_aqc_opc_add_control_packet_filter = 0x025A, + iavf_aqc_opc_remove_control_packet_filter = 0x025B, + iavf_aqc_opc_add_cloud_filters = 0x025C, + iavf_aqc_opc_remove_cloud_filters = 0x025D, + iavf_aqc_opc_clear_wol_switch_filters = 0x025E, + + iavf_aqc_opc_add_mirror_rule = 0x0260, + iavf_aqc_opc_delete_mirror_rule = 0x0261, + + /* Dynamic Device Personalization */ + iavf_aqc_opc_write_personalization_profile = 0x0270, + iavf_aqc_opc_get_personalization_profile_list = 0x0271, + + /* DCB commands */ + iavf_aqc_opc_dcb_ignore_pfc = 0x0301, + iavf_aqc_opc_dcb_updated = 0x0302, + iavf_aqc_opc_set_dcb_parameters = 0x0303, + + /* TX scheduler */ + iavf_aqc_opc_configure_vsi_bw_limit = 0x0400, + iavf_aqc_opc_configure_vsi_ets_sla_bw_limit = 0x0406, + iavf_aqc_opc_configure_vsi_tc_bw = 0x0407, + iavf_aqc_opc_query_vsi_bw_config = 0x0408, + iavf_aqc_opc_query_vsi_ets_sla_config = 0x040A, + iavf_aqc_opc_configure_switching_comp_bw_limit = 0x0410, + + iavf_aqc_opc_enable_switching_comp_ets = 0x0413, + iavf_aqc_opc_modify_switching_comp_ets = 0x0414, + iavf_aqc_opc_disable_switching_comp_ets = 0x0415, + iavf_aqc_opc_configure_switching_comp_ets_bw_limit = 0x0416, + iavf_aqc_opc_configure_switching_comp_bw_config = 0x0417, + iavf_aqc_opc_query_switching_comp_ets_config = 0x0418, + iavf_aqc_opc_query_port_ets_config = 0x0419, + iavf_aqc_opc_query_switching_comp_bw_config = 0x041A, + iavf_aqc_opc_suspend_port_tx = 0x041B, + iavf_aqc_opc_resume_port_tx = 0x041C, + iavf_aqc_opc_configure_partition_bw = 0x041D, + /* hmc */ + iavf_aqc_opc_query_hmc_resource_profile = 0x0500, + iavf_aqc_opc_set_hmc_resource_profile = 0x0501, + + /* phy commands*/ + iavf_aqc_opc_get_phy_abilities = 0x0600, + iavf_aqc_opc_set_phy_config = 0x0601, + iavf_aqc_opc_set_mac_config = 0x0603, + iavf_aqc_opc_set_link_restart_an = 0x0605, + iavf_aqc_opc_get_link_status = 0x0607, + iavf_aqc_opc_set_phy_int_mask = 0x0613, + iavf_aqc_opc_get_local_advt_reg = 0x0614, + iavf_aqc_opc_set_local_advt_reg = 0x0615, + iavf_aqc_opc_get_partner_advt = 0x0616, + iavf_aqc_opc_set_lb_modes = 0x0618, + iavf_aqc_opc_get_phy_wol_caps = 0x0621, + iavf_aqc_opc_set_phy_debug = 0x0622, + iavf_aqc_opc_upload_ext_phy_fm = 0x0625, + iavf_aqc_opc_run_phy_activity = 0x0626, + iavf_aqc_opc_set_phy_register = 0x0628, + iavf_aqc_opc_get_phy_register = 0x0629, + + /* NVM commands */ + iavf_aqc_opc_nvm_read = 0x0701, + iavf_aqc_opc_nvm_erase = 0x0702, + iavf_aqc_opc_nvm_update = 0x0703, + iavf_aqc_opc_nvm_config_read = 0x0704, + iavf_aqc_opc_nvm_config_write = 0x0705, + iavf_aqc_opc_oem_post_update = 0x0720, + iavf_aqc_opc_thermal_sensor = 0x0721, + + /* virtualization commands */ + iavf_aqc_opc_send_msg_to_pf = 0x0801, + iavf_aqc_opc_send_msg_to_vf = 0x0802, + iavf_aqc_opc_send_msg_to_peer = 0x0803, + + /* alternate structure */ + iavf_aqc_opc_alternate_write = 0x0900, + iavf_aqc_opc_alternate_write_indirect = 0x0901, + iavf_aqc_opc_alternate_read = 0x0902, + iavf_aqc_opc_alternate_read_indirect = 0x0903, + iavf_aqc_opc_alternate_write_done = 0x0904, + iavf_aqc_opc_alternate_set_mode = 0x0905, + iavf_aqc_opc_alternate_clear_port = 0x0906, + + /* LLDP commands */ + iavf_aqc_opc_lldp_get_mib = 0x0A00, + iavf_aqc_opc_lldp_update_mib = 0x0A01, + iavf_aqc_opc_lldp_add_tlv = 0x0A02, + iavf_aqc_opc_lldp_update_tlv = 0x0A03, + iavf_aqc_opc_lldp_delete_tlv = 0x0A04, + iavf_aqc_opc_lldp_stop = 0x0A05, + iavf_aqc_opc_lldp_start = 0x0A06, + + /* Tunnel commands */ + iavf_aqc_opc_add_udp_tunnel = 0x0B00, + iavf_aqc_opc_del_udp_tunnel = 0x0B01, + iavf_aqc_opc_set_rss_key = 0x0B02, + iavf_aqc_opc_set_rss_lut = 0x0B03, + iavf_aqc_opc_get_rss_key = 0x0B04, + iavf_aqc_opc_get_rss_lut = 0x0B05, + + /* Async Events */ + iavf_aqc_opc_event_lan_overflow = 0x1001, + + /* OEM commands */ + iavf_aqc_opc_oem_parameter_change = 0xFE00, + iavf_aqc_opc_oem_device_status_change = 0xFE01, + iavf_aqc_opc_oem_ocsd_initialize = 0xFE02, + iavf_aqc_opc_oem_ocbb_initialize = 0xFE03, + + /* debug commands */ + iavf_aqc_opc_debug_read_reg = 0xFF03, + iavf_aqc_opc_debug_write_reg = 0xFF04, + iavf_aqc_opc_debug_modify_reg = 0xFF07, + iavf_aqc_opc_debug_dump_internals = 0xFF08, +}; + +/* command structures and indirect data structures */ + +/* Structure naming conventions: + * - no suffix for direct command descriptor structures + * - _data for indirect sent data + * - _resp for indirect return data (data which is both will use _data) + * - _completion for direct return data + * - _element_ for repeated elements (may also be _data or _resp) + * + * Command structures are expected to overlay the params.raw member of the basic + * descriptor, and as such cannot exceed 16 bytes in length. + */ + +/* This macro is used to generate a compilation error if a structure + * is not exactly the correct length. It gives a divide by zero error if the + * structure is not of the correct size, otherwise it creates an enum that is + * never used. + */ +#define IAVF_CHECK_STRUCT_LEN(n, X) enum iavf_static_assert_enum_##X \ + { iavf_static_assert_##X = (n) / ((sizeof(struct X) == (n)) ? 1 : 0) } + +/* This macro is used extensively to ensure that command structures are 16 + * bytes in length as they have to map to the raw array of that size. + */ +#define IAVF_CHECK_CMD_LENGTH(X) IAVF_CHECK_STRUCT_LEN(16, X) + +/* Queue Shutdown (direct 0x0003) */ +struct iavf_aqc_queue_shutdown { + __le32 driver_unloading; +#define IAVF_AQ_DRIVER_UNLOADING 0x1 + u8 reserved[12]; +}; + +IAVF_CHECK_CMD_LENGTH(iavf_aqc_queue_shutdown); + +struct iavf_aqc_vsi_properties_data { + /* first 96 byte are written by SW */ + __le16 valid_sections; +#define IAVF_AQ_VSI_PROP_SWITCH_VALID 0x0001 +#define IAVF_AQ_VSI_PROP_SECURITY_VALID 0x0002 +#define IAVF_AQ_VSI_PROP_VLAN_VALID 0x0004 +#define IAVF_AQ_VSI_PROP_CAS_PV_VALID 0x0008 +#define IAVF_AQ_VSI_PROP_INGRESS_UP_VALID 0x0010 +#define IAVF_AQ_VSI_PROP_EGRESS_UP_VALID 0x0020 +#define IAVF_AQ_VSI_PROP_QUEUE_MAP_VALID 0x0040 +#define IAVF_AQ_VSI_PROP_QUEUE_OPT_VALID 0x0080 +#define IAVF_AQ_VSI_PROP_OUTER_UP_VALID 0x0100 +#define IAVF_AQ_VSI_PROP_SCHED_VALID 0x0200 + /* switch section */ + __le16 switch_id; /* 12bit id combined with flags below */ +#define IAVF_AQ_VSI_SW_ID_SHIFT 0x0000 +#define IAVF_AQ_VSI_SW_ID_MASK (0xFFF << IAVF_AQ_VSI_SW_ID_SHIFT) +#define IAVF_AQ_VSI_SW_ID_FLAG_NOT_STAG 0x1000 +#define IAVF_AQ_VSI_SW_ID_FLAG_ALLOW_LB 0x2000 +#define IAVF_AQ_VSI_SW_ID_FLAG_LOCAL_LB 0x4000 + u8 sw_reserved[2]; + /* security section */ + u8 sec_flags; +#define IAVF_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD 0x01 +#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK 0x02 +#define IAVF_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK 0x04 + u8 sec_reserved; + /* VLAN section */ + __le16 pvid; /* VLANS include priority bits */ + __le16 fcoe_pvid; + u8 port_vlan_flags; +#define IAVF_AQ_VSI_PVLAN_MODE_SHIFT 0x00 +#define IAVF_AQ_VSI_PVLAN_MODE_MASK (0x03 << \ + IAVF_AQ_VSI_PVLAN_MODE_SHIFT) +#define IAVF_AQ_VSI_PVLAN_MODE_TAGGED 0x01 +#define IAVF_AQ_VSI_PVLAN_MODE_UNTAGGED 0x02 +#define IAVF_AQ_VSI_PVLAN_MODE_ALL 0x03 +#define IAVF_AQ_VSI_PVLAN_INSERT_PVID 0x04 +#define IAVF_AQ_VSI_PVLAN_EMOD_SHIFT 0x03 +#define IAVF_AQ_VSI_PVLAN_EMOD_MASK (0x3 << \ + IAVF_AQ_VSI_PVLAN_EMOD_SHIFT) +#define IAVF_AQ_VSI_PVLAN_EMOD_STR_BOTH 0x0 +#define IAVF_AQ_VSI_PVLAN_EMOD_STR_UP 0x08 +#define IAVF_AQ_VSI_PVLAN_EMOD_STR 0x10 +#define IAVF_AQ_VSI_PVLAN_EMOD_NOTHING 0x18 + u8 pvlan_reserved[3]; + /* ingress egress up sections */ + __le32 ingress_table; /* bitmap, 3 bits per up */ +#define IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT 0 +#define IAVF_AQ_VSI_UP_TABLE_UP0_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP0_SHIFT) +#define IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT 3 +#define IAVF_AQ_VSI_UP_TABLE_UP1_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP1_SHIFT) +#define IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT 6 +#define IAVF_AQ_VSI_UP_TABLE_UP2_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP2_SHIFT) +#define IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT 9 +#define IAVF_AQ_VSI_UP_TABLE_UP3_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP3_SHIFT) +#define IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT 12 +#define IAVF_AQ_VSI_UP_TABLE_UP4_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP4_SHIFT) +#define IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT 15 +#define IAVF_AQ_VSI_UP_TABLE_UP5_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP5_SHIFT) +#define IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT 18 +#define IAVF_AQ_VSI_UP_TABLE_UP6_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP6_SHIFT) +#define IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT 21 +#define IAVF_AQ_VSI_UP_TABLE_UP7_MASK (0x7 << \ + IAVF_AQ_VSI_UP_TABLE_UP7_SHIFT) + __le32 egress_table; /* same defines as for ingress table */ + /* cascaded PV section */ + __le16 cas_pv_tag; + u8 cas_pv_flags; +#define IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT 0x00 +#define IAVF_AQ_VSI_CAS_PV_TAGX_MASK (0x03 << \ + IAVF_AQ_VSI_CAS_PV_TAGX_SHIFT) +#define IAVF_AQ_VSI_CAS_PV_TAGX_LEAVE 0x00 +#define IAVF_AQ_VSI_CAS_PV_TAGX_REMOVE 0x01 +#define IAVF_AQ_VSI_CAS_PV_TAGX_COPY 0x02 +#define IAVF_AQ_VSI_CAS_PV_INSERT_TAG 0x10 +#define IAVF_AQ_VSI_CAS_PV_ETAG_PRUNE 0x20 +#define IAVF_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG 0x40 + u8 cas_pv_reserved; + /* queue mapping section */ + __le16 mapping_flags; +#define IAVF_AQ_VSI_QUE_MAP_CONTIG 0x0 +#define IAVF_AQ_VSI_QUE_MAP_NONCONTIG 0x1 + __le16 queue_mapping[16]; +#define IAVF_AQ_VSI_QUEUE_SHIFT 0x0 +#define IAVF_AQ_VSI_QUEUE_MASK (0x7FF << IAVF_AQ_VSI_QUEUE_SHIFT) + __le16 tc_mapping[8]; +#define IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT 0 +#define IAVF_AQ_VSI_TC_QUE_OFFSET_MASK (0x1FF << \ + IAVF_AQ_VSI_TC_QUE_OFFSET_SHIFT) +#define IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT 9 +#define IAVF_AQ_VSI_TC_QUE_NUMBER_MASK (0x7 << \ + IAVF_AQ_VSI_TC_QUE_NUMBER_SHIFT) + /* queueing option section */ + u8 queueing_opt_flags; +#define IAVF_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA 0x04 +#define IAVF_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA 0x08 +#define IAVF_AQ_VSI_QUE_OPT_TCP_ENA 0x10 +#define IAVF_AQ_VSI_QUE_OPT_FCOE_ENA 0x20 +#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_PF 0x00 +#define IAVF_AQ_VSI_QUE_OPT_RSS_LUT_VSI 0x40 + u8 queueing_opt_reserved[3]; + /* scheduler section */ + u8 up_enable_bits; + u8 sched_reserved; + /* outer up section */ + __le32 outer_up_table; /* same structure and defines as ingress tbl */ + u8 cmd_reserved[8]; + /* last 32 bytes are written by FW */ + __le16 qs_handle[8]; +#define IAVF_AQ_VSI_QS_HANDLE_INVALID 0xFFFF + __le16 stat_counter_idx; + __le16 sched_id; + u8 resp_reserved[12]; +}; + +IAVF_CHECK_STRUCT_LEN(128, iavf_aqc_vsi_properties_data); + +/* Get VEB Parameters (direct 0x0232) + * uses iavf_aqc_switch_seid for the descriptor + */ +struct iavf_aqc_get_veb_parameters_completion { + __le16 seid; + __le16 switch_id; + __le16 veb_flags; /* only the first/last flags from 0x0230 is valid */ + __le16 statistic_index; + __le16 vebs_used; + __le16 vebs_free; + u8 reserved[4]; +}; + +IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_veb_parameters_completion); + +#define IAVF_LINK_SPEED_100MB_SHIFT 0x1 +#define IAVF_LINK_SPEED_1000MB_SHIFT 0x2 +#define IAVF_LINK_SPEED_10GB_SHIFT 0x3 +#define IAVF_LINK_SPEED_40GB_SHIFT 0x4 +#define IAVF_LINK_SPEED_20GB_SHIFT 0x5 +#define IAVF_LINK_SPEED_25GB_SHIFT 0x6 + +enum iavf_aq_link_speed { + IAVF_LINK_SPEED_UNKNOWN = 0, + IAVF_LINK_SPEED_100MB = BIT(IAVF_LINK_SPEED_100MB_SHIFT), + IAVF_LINK_SPEED_1GB = BIT(IAVF_LINK_SPEED_1000MB_SHIFT), + IAVF_LINK_SPEED_10GB = BIT(IAVF_LINK_SPEED_10GB_SHIFT), + IAVF_LINK_SPEED_40GB = BIT(IAVF_LINK_SPEED_40GB_SHIFT), + IAVF_LINK_SPEED_20GB = BIT(IAVF_LINK_SPEED_20GB_SHIFT), + IAVF_LINK_SPEED_25GB = BIT(IAVF_LINK_SPEED_25GB_SHIFT), +}; + +/* Send to PF command (indirect 0x0801) id is only used by PF + * Send to VF command (indirect 0x0802) id is only used by PF + * Send to Peer PF command (indirect 0x0803) + */ +struct iavf_aqc_pf_vf_message { + __le32 id; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +IAVF_CHECK_CMD_LENGTH(iavf_aqc_pf_vf_message); + +struct iavf_aqc_get_set_rss_key { +#define IAVF_AQC_SET_RSS_KEY_VSI_VALID BIT(15) +#define IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT 0 +#define IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK (0x3FF << \ + IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT) + __le16 vsi_id; + u8 reserved[6]; + __le32 addr_high; + __le32 addr_low; +}; + +IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_key); + +struct iavf_aqc_get_set_rss_key_data { + u8 standard_rss_key[0x28]; + u8 extended_hash_key[0xc]; +}; + +IAVF_CHECK_STRUCT_LEN(0x34, iavf_aqc_get_set_rss_key_data); + +struct iavf_aqc_get_set_rss_lut { +#define IAVF_AQC_SET_RSS_LUT_VSI_VALID BIT(15) +#define IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT 0 +#define IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK (0x3FF << \ + IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT) + __le16 vsi_id; +#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT 0 +#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK \ + BIT(IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) + +#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI 0 +#define IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF 1 + __le16 flags; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +IAVF_CHECK_CMD_LENGTH(iavf_aqc_get_set_rss_lut); +#endif /* _IAVF_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_alloc.h b/drivers/net/ethernet/intel/iavf/iavf_alloc.h index bf2753146f30..2711573c14ec 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_alloc.h +++ b/drivers/net/ethernet/intel/iavf/iavf_alloc.h @@ -20,12 +20,15 @@ enum iavf_memory_type { }; /* prototype for functions used for dynamic memory allocation */ -iavf_status iavf_allocate_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem, - enum iavf_memory_type type, - u64 size, u32 alignment); -iavf_status iavf_free_dma_mem(struct iavf_hw *hw, struct iavf_dma_mem *mem); -iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, - struct iavf_virt_mem *mem, u32 size); -iavf_status iavf_free_virt_mem(struct iavf_hw *hw, struct iavf_virt_mem *mem); +enum iavf_status iavf_allocate_dma_mem(struct iavf_hw *hw, + struct iavf_dma_mem *mem, + enum iavf_memory_type type, + u64 size, u32 alignment); +enum iavf_status iavf_free_dma_mem(struct iavf_hw *hw, + struct iavf_dma_mem *mem); +enum iavf_status iavf_allocate_virt_mem(struct iavf_hw *hw, + struct iavf_virt_mem *mem, u32 size); +enum iavf_status iavf_free_virt_mem(struct iavf_hw *hw, + struct iavf_virt_mem *mem); #endif /* _IAVF_ALLOC_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.c b/drivers/net/ethernet/intel/iavf/iavf_client.c index aea45364fd1c..0c77e4171808 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_client.c +++ b/drivers/net/ethernet/intel/iavf/iavf_client.c @@ -10,19 +10,19 @@ static const char iavf_client_interface_version_str[] = IAVF_CLIENT_VERSION_STR; -static struct i40e_client *vf_registered_client; -static LIST_HEAD(i40e_devices); +static struct iavf_client *vf_registered_client; +static LIST_HEAD(iavf_devices); static DEFINE_MUTEX(iavf_device_mutex); -static u32 iavf_client_virtchnl_send(struct i40e_info *ldev, - struct i40e_client *client, +static u32 iavf_client_virtchnl_send(struct iavf_info *ldev, + struct iavf_client *client, u8 *msg, u16 len); -static int iavf_client_setup_qvlist(struct i40e_info *ldev, - struct i40e_client *client, - struct i40e_qvlist_info *qvlist_info); +static int iavf_client_setup_qvlist(struct iavf_info *ldev, + struct iavf_client *client, + struct iavf_qvlist_info *qvlist_info); -static struct i40e_ops iavf_lan_ops = { +static struct iavf_ops iavf_lan_ops = { .virtchnl_send = iavf_client_virtchnl_send, .setup_qvlist = iavf_client_setup_qvlist, }; @@ -33,11 +33,11 @@ static struct i40e_ops iavf_lan_ops = { * @params: client param struct **/ static -void iavf_client_get_params(struct iavf_vsi *vsi, struct i40e_params *params) +void iavf_client_get_params(struct iavf_vsi *vsi, struct iavf_params *params) { int i; - memset(params, 0, sizeof(struct i40e_params)); + memset(params, 0, sizeof(struct iavf_params)); params->mtu = vsi->netdev->mtu; params->link_up = vsi->back->link_up; @@ -57,7 +57,7 @@ void iavf_client_get_params(struct iavf_vsi *vsi, struct i40e_params *params) **/ void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len) { - struct i40e_client_instance *cinst; + struct iavf_client_instance *cinst; if (!vsi) return; @@ -81,8 +81,8 @@ void iavf_notify_client_message(struct iavf_vsi *vsi, u8 *msg, u16 len) **/ void iavf_notify_client_l2_params(struct iavf_vsi *vsi) { - struct i40e_client_instance *cinst; - struct i40e_params params; + struct iavf_client_instance *cinst; + struct iavf_params params; if (!vsi) return; @@ -110,7 +110,7 @@ void iavf_notify_client_l2_params(struct iavf_vsi *vsi) void iavf_notify_client_open(struct iavf_vsi *vsi) { struct iavf_adapter *adapter = vsi->back; - struct i40e_client_instance *cinst = adapter->cinst; + struct iavf_client_instance *cinst = adapter->cinst; int ret; if (!cinst || !cinst->client || !cinst->client->ops || @@ -119,10 +119,10 @@ void iavf_notify_client_open(struct iavf_vsi *vsi) "Cannot locate client instance open function\n"); return; } - if (!(test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state))) { + if (!(test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state))) { ret = cinst->client->ops->open(&cinst->lan_info, cinst->client); if (!ret) - set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); + set_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state); } } @@ -132,17 +132,17 @@ void iavf_notify_client_open(struct iavf_vsi *vsi) * * Return 0 on success or < 0 on error **/ -static int iavf_client_release_qvlist(struct i40e_info *ldev) +static int iavf_client_release_qvlist(struct iavf_info *ldev) { struct iavf_adapter *adapter = ldev->vf; - iavf_status err; + enum iavf_status err; if (adapter->aq_required) return -EAGAIN; err = iavf_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, - I40E_SUCCESS, NULL, 0, NULL); + IAVF_SUCCESS, NULL, 0, NULL); if (err) dev_err(&adapter->pdev->dev, @@ -162,7 +162,7 @@ static int iavf_client_release_qvlist(struct i40e_info *ldev) void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset) { struct iavf_adapter *adapter = vsi->back; - struct i40e_client_instance *cinst = adapter->cinst; + struct iavf_client_instance *cinst = adapter->cinst; if (!cinst || !cinst->client || !cinst->client->ops || !cinst->client->ops->close) { @@ -172,7 +172,7 @@ void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset) } cinst->client->ops->close(&cinst->lan_info, cinst->client, reset); iavf_client_release_qvlist(&cinst->lan_info); - clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); + clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state); } /** @@ -181,13 +181,13 @@ void iavf_notify_client_close(struct iavf_vsi *vsi, bool reset) * * Returns cinst ptr on success, NULL on failure **/ -static struct i40e_client_instance * +static struct iavf_client_instance * iavf_client_add_instance(struct iavf_adapter *adapter) { - struct i40e_client_instance *cinst = NULL; + struct iavf_client_instance *cinst = NULL; struct iavf_vsi *vsi = &adapter->vsi; struct netdev_hw_addr *mac = NULL; - struct i40e_params params; + struct iavf_params params; if (!vf_registered_client) goto out; @@ -205,7 +205,7 @@ iavf_client_add_instance(struct iavf_adapter *adapter) cinst->lan_info.netdev = vsi->netdev; cinst->lan_info.pcidev = adapter->pdev; cinst->lan_info.fid = 0; - cinst->lan_info.ftype = I40E_CLIENT_FTYPE_VF; + cinst->lan_info.ftype = IAVF_CLIENT_FTYPE_VF; cinst->lan_info.hw_addr = adapter->hw.hw_addr; cinst->lan_info.ops = &iavf_lan_ops; cinst->lan_info.version.major = IAVF_CLIENT_VERSION_MAJOR; @@ -213,7 +213,7 @@ iavf_client_add_instance(struct iavf_adapter *adapter) cinst->lan_info.version.build = IAVF_CLIENT_VERSION_BUILD; iavf_client_get_params(vsi, ¶ms); cinst->lan_info.params = params; - set_bit(__I40E_CLIENT_INSTANCE_NONE, &cinst->state); + set_bit(__IAVF_CLIENT_INSTANCE_NONE, &cinst->state); cinst->lan_info.msix_count = adapter->num_iwarp_msix; cinst->lan_info.msix_entries = @@ -250,8 +250,8 @@ void iavf_client_del_instance(struct iavf_adapter *adapter) **/ void iavf_client_subtask(struct iavf_adapter *adapter) { - struct i40e_client *client = vf_registered_client; - struct i40e_client_instance *cinst; + struct iavf_client *client = vf_registered_client; + struct iavf_client_instance *cinst; int ret = 0; if (adapter->state < __IAVF_DOWN) @@ -269,13 +269,13 @@ void iavf_client_subtask(struct iavf_adapter *adapter) dev_info(&adapter->pdev->dev, "Added instance of Client %s\n", client->name); - if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) { + if (!test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) { /* Send an Open request to the client */ if (client->ops && client->ops->open) ret = client->ops->open(&cinst->lan_info, client); if (!ret) - set_bit(__I40E_CLIENT_INSTANCE_OPENED, + set_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state); else /* remove client instance */ @@ -291,11 +291,11 @@ void iavf_client_subtask(struct iavf_adapter *adapter) **/ int iavf_lan_add_device(struct iavf_adapter *adapter) { - struct i40e_device *ldev; + struct iavf_device *ldev; int ret = 0; mutex_lock(&iavf_device_mutex); - list_for_each_entry(ldev, &i40e_devices, list) { + list_for_each_entry(ldev, &iavf_devices, list) { if (ldev->vf == adapter) { ret = -EEXIST; goto out; @@ -308,7 +308,7 @@ int iavf_lan_add_device(struct iavf_adapter *adapter) } ldev->vf = adapter; INIT_LIST_HEAD(&ldev->list); - list_add(&ldev->list, &i40e_devices); + list_add(&ldev->list, &iavf_devices); dev_info(&adapter->pdev->dev, "Added LAN device bus=0x%02x dev=0x%02x func=0x%02x\n", adapter->hw.bus.bus_id, adapter->hw.bus.device, adapter->hw.bus.func); @@ -331,11 +331,11 @@ out: **/ int iavf_lan_del_device(struct iavf_adapter *adapter) { - struct i40e_device *ldev, *tmp; + struct iavf_device *ldev, *tmp; int ret = -ENODEV; mutex_lock(&iavf_device_mutex); - list_for_each_entry_safe(ldev, tmp, &i40e_devices, list) { + list_for_each_entry_safe(ldev, tmp, &iavf_devices, list) { if (ldev->vf == adapter) { dev_info(&adapter->pdev->dev, "Deleted LAN device bus=0x%02x dev=0x%02x func=0x%02x\n", @@ -357,24 +357,24 @@ int iavf_lan_del_device(struct iavf_adapter *adapter) * @client: pointer to the registered client * **/ -static void iavf_client_release(struct i40e_client *client) +static void iavf_client_release(struct iavf_client *client) { - struct i40e_client_instance *cinst; - struct i40e_device *ldev; + struct iavf_client_instance *cinst; + struct iavf_device *ldev; struct iavf_adapter *adapter; mutex_lock(&iavf_device_mutex); - list_for_each_entry(ldev, &i40e_devices, list) { + list_for_each_entry(ldev, &iavf_devices, list) { adapter = ldev->vf; cinst = adapter->cinst; if (!cinst) continue; - if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state)) { + if (test_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state)) { if (client->ops && client->ops->close) client->ops->close(&cinst->lan_info, client, false); iavf_client_release_qvlist(&cinst->lan_info); - clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cinst->state); + clear_bit(__IAVF_CLIENT_INSTANCE_OPENED, &cinst->state); dev_warn(&adapter->pdev->dev, "Client %s instance closed\n", client->name); @@ -392,13 +392,13 @@ static void iavf_client_release(struct i40e_client *client) * @client: pointer to the registered client * **/ -static void iavf_client_prepare(struct i40e_client *client) +static void iavf_client_prepare(struct iavf_client *client) { - struct i40e_device *ldev; + struct iavf_device *ldev; struct iavf_adapter *adapter; mutex_lock(&iavf_device_mutex); - list_for_each_entry(ldev, &i40e_devices, list) { + list_for_each_entry(ldev, &iavf_devices, list) { adapter = ldev->vf; /* Signal the watchdog to service the client */ adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; @@ -415,18 +415,18 @@ static void iavf_client_prepare(struct i40e_client *client) * * Return 0 on success or < 0 on error **/ -static u32 iavf_client_virtchnl_send(struct i40e_info *ldev, - struct i40e_client *client, +static u32 iavf_client_virtchnl_send(struct iavf_info *ldev, + struct iavf_client *client, u8 *msg, u16 len) { struct iavf_adapter *adapter = ldev->vf; - iavf_status err; + enum iavf_status err; if (adapter->aq_required) return -EAGAIN; err = iavf_aq_send_msg_to_pf(&adapter->hw, VIRTCHNL_OP_IWARP, - I40E_SUCCESS, msg, len, NULL); + IAVF_SUCCESS, msg, len, NULL); if (err) dev_err(&adapter->pdev->dev, "Unable to send iWarp message to PF, error %d, aq status %d\n", err, adapter->hw.aq.asq_last_status); @@ -442,16 +442,16 @@ static u32 iavf_client_virtchnl_send(struct i40e_info *ldev, * * Return 0 on success or < 0 on error **/ -static int iavf_client_setup_qvlist(struct i40e_info *ldev, - struct i40e_client *client, - struct i40e_qvlist_info *qvlist_info) +static int iavf_client_setup_qvlist(struct iavf_info *ldev, + struct iavf_client *client, + struct iavf_qvlist_info *qvlist_info) { struct virtchnl_iwarp_qvlist_info *v_qvlist_info; struct iavf_adapter *adapter = ldev->vf; - struct i40e_qv_info *qv_info; - iavf_status err; + struct iavf_qv_info *qv_info; + enum iavf_status err; u32 v_idx, i; - u32 msg_size; + size_t msg_size; if (adapter->aq_required) return -EAGAIN; @@ -469,13 +469,12 @@ static int iavf_client_setup_qvlist(struct i40e_info *ldev, } v_qvlist_info = (struct virtchnl_iwarp_qvlist_info *)qvlist_info; - msg_size = sizeof(struct virtchnl_iwarp_qvlist_info) + - (sizeof(struct virtchnl_iwarp_qv_info) * - (v_qvlist_info->num_vectors - 1)); + msg_size = struct_size(v_qvlist_info, qv_info, + v_qvlist_info->num_vectors - 1); adapter->client_pending |= BIT(VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP); err = iavf_aq_send_msg_to_pf(&adapter->hw, - VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, I40E_SUCCESS, + VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, IAVF_SUCCESS, (u8 *)v_qvlist_info, msg_size, NULL); if (err) { @@ -499,12 +498,12 @@ out: } /** - * iavf_register_client - Register a i40e client driver with the L2 driver - * @client: pointer to the i40e_client struct + * iavf_register_client - Register a iavf client driver with the L2 driver + * @client: pointer to the iavf_client struct * * Returns 0 on success or non-0 on error **/ -int iavf_register_client(struct i40e_client *client) +int iavf_register_client(struct iavf_client *client) { int ret = 0; @@ -550,12 +549,12 @@ out: EXPORT_SYMBOL(iavf_register_client); /** - * iavf_unregister_client - Unregister a i40e client driver with the L2 driver - * @client: pointer to the i40e_client struct + * iavf_unregister_client - Unregister a iavf client driver with the L2 driver + * @client: pointer to the iavf_client struct * * Returns 0 on success or non-0 on error **/ -int iavf_unregister_client(struct i40e_client *client) +int iavf_unregister_client(struct iavf_client *client) { int ret = 0; diff --git a/drivers/net/ethernet/intel/iavf/iavf_client.h b/drivers/net/ethernet/intel/iavf/iavf_client.h index e216fc9dfd81..9a7cf39ea75a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_client.h +++ b/drivers/net/ethernet/intel/iavf/iavf_client.h @@ -17,86 +17,86 @@ __stringify(IAVF_CLIENT_VERSION_MINOR) "." \ __stringify(IAVF_CLIENT_VERSION_BUILD) -struct i40e_client_version { +struct iavf_client_version { u8 major; u8 minor; u8 build; u8 rsvd; }; -enum i40e_client_state { - __I40E_CLIENT_NULL, - __I40E_CLIENT_REGISTERED +enum iavf_client_state { + __IAVF_CLIENT_NULL, + __IAVF_CLIENT_REGISTERED }; -enum i40e_client_instance_state { - __I40E_CLIENT_INSTANCE_NONE, - __I40E_CLIENT_INSTANCE_OPENED, +enum iavf_client_instance_state { + __IAVF_CLIENT_INSTANCE_NONE, + __IAVF_CLIENT_INSTANCE_OPENED, }; -struct i40e_ops; -struct i40e_client; +struct iavf_ops; +struct iavf_client; /* HW does not define a type value for AEQ; only for RX/TX and CEQ. * In order for us to keep the interface simple, SW will define a * unique type value for AEQ. */ -#define I40E_QUEUE_TYPE_PE_AEQ 0x80 -#define I40E_QUEUE_INVALID_IDX 0xFFFF +#define IAVF_QUEUE_TYPE_PE_AEQ 0x80 +#define IAVF_QUEUE_INVALID_IDX 0xFFFF -struct i40e_qv_info { +struct iavf_qv_info { u32 v_idx; /* msix_vector */ u16 ceq_idx; u16 aeq_idx; u8 itr_idx; }; -struct i40e_qvlist_info { +struct iavf_qvlist_info { u32 num_vectors; - struct i40e_qv_info qv_info[1]; + struct iavf_qv_info qv_info[1]; }; -#define I40E_CLIENT_MSIX_ALL 0xFFFFFFFF +#define IAVF_CLIENT_MSIX_ALL 0xFFFFFFFF /* set of LAN parameters useful for clients managed by LAN */ /* Struct to hold per priority info */ -struct i40e_prio_qos_params { +struct iavf_prio_qos_params { u16 qs_handle; /* qs handle for prio */ u8 tc; /* TC mapped to prio */ u8 reserved; }; -#define I40E_CLIENT_MAX_USER_PRIORITY 8 +#define IAVF_CLIENT_MAX_USER_PRIORITY 8 /* Struct to hold Client QoS */ -struct i40e_qos_params { - struct i40e_prio_qos_params prio_qos[I40E_CLIENT_MAX_USER_PRIORITY]; +struct iavf_qos_params { + struct iavf_prio_qos_params prio_qos[IAVF_CLIENT_MAX_USER_PRIORITY]; }; -struct i40e_params { - struct i40e_qos_params qos; +struct iavf_params { + struct iavf_qos_params qos; u16 mtu; u16 link_up; /* boolean */ }; /* Structure to hold LAN device info for a client device */ -struct i40e_info { - struct i40e_client_version version; +struct iavf_info { + struct iavf_client_version version; u8 lanmac[6]; struct net_device *netdev; struct pci_dev *pcidev; u8 __iomem *hw_addr; u8 fid; /* function id, PF id or VF id */ -#define I40E_CLIENT_FTYPE_PF 0 -#define I40E_CLIENT_FTYPE_VF 1 +#define IAVF_CLIENT_FTYPE_PF 0 +#define IAVF_CLIENT_FTYPE_VF 1 u8 ftype; /* function type, PF or VF */ void *vf; /* cast to iavf_adapter */ /* All L2 params that could change during the life span of the device * and needs to be communicated to the client when they change */ - struct i40e_params params; - struct i40e_ops *ops; + struct iavf_params params; + struct iavf_ops *ops; u16 msix_count; /* number of msix vectors*/ /* Array down below will be dynamically allocated based on msix_count */ @@ -104,66 +104,66 @@ struct i40e_info { u16 itr_index; /* Which ITR index the PE driver is suppose to use */ }; -struct i40e_ops { +struct iavf_ops { /* setup_q_vector_list enables queues with a particular vector */ - int (*setup_qvlist)(struct i40e_info *ldev, struct i40e_client *client, - struct i40e_qvlist_info *qv_info); + int (*setup_qvlist)(struct iavf_info *ldev, struct iavf_client *client, + struct iavf_qvlist_info *qv_info); - u32 (*virtchnl_send)(struct i40e_info *ldev, struct i40e_client *client, + u32 (*virtchnl_send)(struct iavf_info *ldev, struct iavf_client *client, u8 *msg, u16 len); /* If the PE Engine is unresponsive, RDMA driver can request a reset.*/ - void (*request_reset)(struct i40e_info *ldev, - struct i40e_client *client); + void (*request_reset)(struct iavf_info *ldev, + struct iavf_client *client); }; -struct i40e_client_ops { +struct iavf_client_ops { /* Should be called from register_client() or whenever the driver is * ready to create a specific client instance. */ - int (*open)(struct i40e_info *ldev, struct i40e_client *client); + int (*open)(struct iavf_info *ldev, struct iavf_client *client); /* Should be closed when netdev is unavailable or when unregister * call comes in. If the close happens due to a reset, set the reset * bit to true. */ - void (*close)(struct i40e_info *ldev, struct i40e_client *client, + void (*close)(struct iavf_info *ldev, struct iavf_client *client, bool reset); /* called when some l2 managed parameters changes - mss */ - void (*l2_param_change)(struct i40e_info *ldev, - struct i40e_client *client, - struct i40e_params *params); + void (*l2_param_change)(struct iavf_info *ldev, + struct iavf_client *client, + struct iavf_params *params); /* called when a message is received from the PF */ - int (*virtchnl_receive)(struct i40e_info *ldev, - struct i40e_client *client, + int (*virtchnl_receive)(struct iavf_info *ldev, + struct iavf_client *client, u8 *msg, u16 len); }; /* Client device */ -struct i40e_client_instance { +struct iavf_client_instance { struct list_head list; - struct i40e_info lan_info; - struct i40e_client *client; + struct iavf_info lan_info; + struct iavf_client *client; unsigned long state; }; -struct i40e_client { +struct iavf_client { struct list_head list; /* list of registered clients */ char name[IAVF_CLIENT_STR_LENGTH]; - struct i40e_client_version version; + struct iavf_client_version version; unsigned long state; /* client state */ atomic_t ref_cnt; /* Count of all the client devices of this kind */ u32 flags; -#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) -#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) +#define IAVF_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0) +#define IAVF_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2) u8 type; -#define I40E_CLIENT_IWARP 0 - struct i40e_client_ops *ops; /* client ops provided by the client */ +#define IAVF_CLIENT_IWARP 0 + struct iavf_client_ops *ops; /* client ops provided by the client */ }; /* used by clients */ -int iavf_register_client(struct i40e_client *client); -int iavf_unregister_client(struct i40e_client *client); +int iavf_register_client(struct iavf_client *client); +int iavf_unregister_client(struct iavf_client *client); #endif /* _IAVF_CLIENT_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_common.c b/drivers/net/ethernet/intel/iavf/iavf_common.c index 768369c89e77..8547fc8fdfd6 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_common.c +++ b/drivers/net/ethernet/intel/iavf/iavf_common.c @@ -2,7 +2,7 @@ /* Copyright(c) 2013 - 2018 Intel Corporation. */ #include "iavf_type.h" -#include "i40e_adminq.h" +#include "iavf_adminq.h" #include "iavf_prototype.h" #include <linux/avf/virtchnl.h> @@ -13,9 +13,9 @@ * This function sets the mac type of the adapter based on the * vendor ID and device ID stored in the hw structure. **/ -iavf_status iavf_set_mac_type(struct iavf_hw *hw) +enum iavf_status iavf_set_mac_type(struct iavf_hw *hw) { - iavf_status status = 0; + enum iavf_status status = 0; if (hw->vendor_id == PCI_VENDOR_ID_INTEL) { switch (hw->device_id) { @@ -32,7 +32,7 @@ iavf_status iavf_set_mac_type(struct iavf_hw *hw) break; } } else { - status = I40E_ERR_DEVICE_NOT_SUPPORTED; + status = IAVF_ERR_DEVICE_NOT_SUPPORTED; } hw_dbg(hw, "found mac: %d, returns: %d\n", hw->mac.type, status); @@ -44,55 +44,55 @@ iavf_status iavf_set_mac_type(struct iavf_hw *hw) * @hw: pointer to the HW structure * @aq_err: the AQ error code to convert **/ -const char *iavf_aq_str(struct iavf_hw *hw, enum i40e_admin_queue_err aq_err) +const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err) { switch (aq_err) { - case I40E_AQ_RC_OK: + case IAVF_AQ_RC_OK: return "OK"; - case I40E_AQ_RC_EPERM: - return "I40E_AQ_RC_EPERM"; - case I40E_AQ_RC_ENOENT: - return "I40E_AQ_RC_ENOENT"; - case I40E_AQ_RC_ESRCH: - return "I40E_AQ_RC_ESRCH"; - case I40E_AQ_RC_EINTR: - return "I40E_AQ_RC_EINTR"; - case I40E_AQ_RC_EIO: - return "I40E_AQ_RC_EIO"; - case I40E_AQ_RC_ENXIO: - return "I40E_AQ_RC_ENXIO"; - case I40E_AQ_RC_E2BIG: - return "I40E_AQ_RC_E2BIG"; - case I40E_AQ_RC_EAGAIN: - return "I40E_AQ_RC_EAGAIN"; - case I40E_AQ_RC_ENOMEM: - return "I40E_AQ_RC_ENOMEM"; - case I40E_AQ_RC_EACCES: - return "I40E_AQ_RC_EACCES"; - case I40E_AQ_RC_EFAULT: - return "I40E_AQ_RC_EFAULT"; - case I40E_AQ_RC_EBUSY: - return "I40E_AQ_RC_EBUSY"; - case I40E_AQ_RC_EEXIST: - return "I40E_AQ_RC_EEXIST"; - case I40E_AQ_RC_EINVAL: - return "I40E_AQ_RC_EINVAL"; - case I40E_AQ_RC_ENOTTY: - return "I40E_AQ_RC_ENOTTY"; - case I40E_AQ_RC_ENOSPC: - return "I40E_AQ_RC_ENOSPC"; - case I40E_AQ_RC_ENOSYS: - return "I40E_AQ_RC_ENOSYS"; - case I40E_AQ_RC_ERANGE: - return "I40E_AQ_RC_ERANGE"; - case I40E_AQ_RC_EFLUSHED: - return "I40E_AQ_RC_EFLUSHED"; - case I40E_AQ_RC_BAD_ADDR: - return "I40E_AQ_RC_BAD_ADDR"; - case I40E_AQ_RC_EMODE: - return "I40E_AQ_RC_EMODE"; - case I40E_AQ_RC_EFBIG: - return "I40E_AQ_RC_EFBIG"; + case IAVF_AQ_RC_EPERM: + return "IAVF_AQ_RC_EPERM"; + case IAVF_AQ_RC_ENOENT: + return "IAVF_AQ_RC_ENOENT"; + case IAVF_AQ_RC_ESRCH: + return "IAVF_AQ_RC_ESRCH"; + case IAVF_AQ_RC_EINTR: + return "IAVF_AQ_RC_EINTR"; + case IAVF_AQ_RC_EIO: + return "IAVF_AQ_RC_EIO"; + case IAVF_AQ_RC_ENXIO: + return "IAVF_AQ_RC_ENXIO"; + case IAVF_AQ_RC_E2BIG: + return "IAVF_AQ_RC_E2BIG"; + case IAVF_AQ_RC_EAGAIN: + return "IAVF_AQ_RC_EAGAIN"; + case IAVF_AQ_RC_ENOMEM: + return "IAVF_AQ_RC_ENOMEM"; + case IAVF_AQ_RC_EACCES: + return "IAVF_AQ_RC_EACCES"; + case IAVF_AQ_RC_EFAULT: + return "IAVF_AQ_RC_EFAULT"; + case IAVF_AQ_RC_EBUSY: + return "IAVF_AQ_RC_EBUSY"; + case IAVF_AQ_RC_EEXIST: + return "IAVF_AQ_RC_EEXIST"; + case IAVF_AQ_RC_EINVAL: + return "IAVF_AQ_RC_EINVAL"; + case IAVF_AQ_RC_ENOTTY: + return "IAVF_AQ_RC_ENOTTY"; + case IAVF_AQ_RC_ENOSPC: + return "IAVF_AQ_RC_ENOSPC"; + case IAVF_AQ_RC_ENOSYS: + return "IAVF_AQ_RC_ENOSYS"; + case IAVF_AQ_RC_ERANGE: + return "IAVF_AQ_RC_ERANGE"; + case IAVF_AQ_RC_EFLUSHED: + return "IAVF_AQ_RC_EFLUSHED"; + case IAVF_AQ_RC_BAD_ADDR: + return "IAVF_AQ_RC_BAD_ADDR"; + case IAVF_AQ_RC_EMODE: + return "IAVF_AQ_RC_EMODE"; + case IAVF_AQ_RC_EFBIG: + return "IAVF_AQ_RC_EFBIG"; } snprintf(hw->err_str, sizeof(hw->err_str), "%d", aq_err); @@ -104,143 +104,143 @@ const char *iavf_aq_str(struct iavf_hw *hw, enum i40e_admin_queue_err aq_err) * @hw: pointer to the HW structure * @stat_err: the status error code to convert **/ -const char *iavf_stat_str(struct iavf_hw *hw, iavf_status stat_err) +const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err) { switch (stat_err) { case 0: return "OK"; - case I40E_ERR_NVM: - return "I40E_ERR_NVM"; - case I40E_ERR_NVM_CHECKSUM: - return "I40E_ERR_NVM_CHECKSUM"; - case I40E_ERR_PHY: - return "I40E_ERR_PHY"; - case I40E_ERR_CONFIG: - return "I40E_ERR_CONFIG"; - case I40E_ERR_PARAM: - return "I40E_ERR_PARAM"; - case I40E_ERR_MAC_TYPE: - return "I40E_ERR_MAC_TYPE"; - case I40E_ERR_UNKNOWN_PHY: - return "I40E_ERR_UNKNOWN_PHY"; - case I40E_ERR_LINK_SETUP: - return "I40E_ERR_LINK_SETUP"; - case I40E_ERR_ADAPTER_STOPPED: - return "I40E_ERR_ADAPTER_STOPPED"; - case I40E_ERR_INVALID_MAC_ADDR: - return "I40E_ERR_INVALID_MAC_ADDR"; - case I40E_ERR_DEVICE_NOT_SUPPORTED: - return "I40E_ERR_DEVICE_NOT_SUPPORTED"; - case I40E_ERR_MASTER_REQUESTS_PENDING: - return "I40E_ERR_MASTER_REQUESTS_PENDING"; - case I40E_ERR_INVALID_LINK_SETTINGS: - return "I40E_ERR_INVALID_LINK_SETTINGS"; - case I40E_ERR_AUTONEG_NOT_COMPLETE: - return "I40E_ERR_AUTONEG_NOT_COMPLETE"; - case I40E_ERR_RESET_FAILED: - return "I40E_ERR_RESET_FAILED"; - case I40E_ERR_SWFW_SYNC: - return "I40E_ERR_SWFW_SYNC"; - case I40E_ERR_NO_AVAILABLE_VSI: - return "I40E_ERR_NO_AVAILABLE_VSI"; - case I40E_ERR_NO_MEMORY: - return "I40E_ERR_NO_MEMORY"; - case I40E_ERR_BAD_PTR: - return "I40E_ERR_BAD_PTR"; - case I40E_ERR_RING_FULL: - return "I40E_ERR_RING_FULL"; - case I40E_ERR_INVALID_PD_ID: - return "I40E_ERR_INVALID_PD_ID"; - case I40E_ERR_INVALID_QP_ID: - return "I40E_ERR_INVALID_QP_ID"; - case I40E_ERR_INVALID_CQ_ID: - return "I40E_ERR_INVALID_CQ_ID"; - case I40E_ERR_INVALID_CEQ_ID: - return "I40E_ERR_INVALID_CEQ_ID"; - case I40E_ERR_INVALID_AEQ_ID: - return "I40E_ERR_INVALID_AEQ_ID"; - case I40E_ERR_INVALID_SIZE: - return "I40E_ERR_INVALID_SIZE"; - case I40E_ERR_INVALID_ARP_INDEX: - return "I40E_ERR_INVALID_ARP_INDEX"; - case I40E_ERR_INVALID_FPM_FUNC_ID: - return "I40E_ERR_INVALID_FPM_FUNC_ID"; - case I40E_ERR_QP_INVALID_MSG_SIZE: - return "I40E_ERR_QP_INVALID_MSG_SIZE"; - case I40E_ERR_QP_TOOMANY_WRS_POSTED: - return "I40E_ERR_QP_TOOMANY_WRS_POSTED"; - case I40E_ERR_INVALID_FRAG_COUNT: - return "I40E_ERR_INVALID_FRAG_COUNT"; - case I40E_ERR_QUEUE_EMPTY: - return "I40E_ERR_QUEUE_EMPTY"; - case I40E_ERR_INVALID_ALIGNMENT: - return "I40E_ERR_INVALID_ALIGNMENT"; - case I40E_ERR_FLUSHED_QUEUE: - return "I40E_ERR_FLUSHED_QUEUE"; - case I40E_ERR_INVALID_PUSH_PAGE_INDEX: - return "I40E_ERR_INVALID_PUSH_PAGE_INDEX"; - case I40E_ERR_INVALID_IMM_DATA_SIZE: - return "I40E_ERR_INVALID_IMM_DATA_SIZE"; - case I40E_ERR_TIMEOUT: - return "I40E_ERR_TIMEOUT"; - case I40E_ERR_OPCODE_MISMATCH: - return "I40E_ERR_OPCODE_MISMATCH"; - case I40E_ERR_CQP_COMPL_ERROR: - return "I40E_ERR_CQP_COMPL_ERROR"; - case I40E_ERR_INVALID_VF_ID: - return "I40E_ERR_INVALID_VF_ID"; - case I40E_ERR_INVALID_HMCFN_ID: - return "I40E_ERR_INVALID_HMCFN_ID"; - case I40E_ERR_BACKING_PAGE_ERROR: - return "I40E_ERR_BACKING_PAGE_ERROR"; - case I40E_ERR_NO_PBLCHUNKS_AVAILABLE: - return "I40E_ERR_NO_PBLCHUNKS_AVAILABLE"; - case I40E_ERR_INVALID_PBLE_INDEX: - return "I40E_ERR_INVALID_PBLE_INDEX"; - case I40E_ERR_INVALID_SD_INDEX: - return "I40E_ERR_INVALID_SD_INDEX"; - case I40E_ERR_INVALID_PAGE_DESC_INDEX: - return "I40E_ERR_INVALID_PAGE_DESC_INDEX"; - case I40E_ERR_INVALID_SD_TYPE: - return "I40E_ERR_INVALID_SD_TYPE"; - case I40E_ERR_MEMCPY_FAILED: - return "I40E_ERR_MEMCPY_FAILED"; - case I40E_ERR_INVALID_HMC_OBJ_INDEX: - return "I40E_ERR_INVALID_HMC_OBJ_INDEX"; - case I40E_ERR_INVALID_HMC_OBJ_COUNT: - return "I40E_ERR_INVALID_HMC_OBJ_COUNT"; - case I40E_ERR_INVALID_SRQ_ARM_LIMIT: - return "I40E_ERR_INVALID_SRQ_ARM_LIMIT"; - case I40E_ERR_SRQ_ENABLED: - return "I40E_ERR_SRQ_ENABLED"; - case I40E_ERR_ADMIN_QUEUE_ERROR: - return "I40E_ERR_ADMIN_QUEUE_ERROR"; - case I40E_ERR_ADMIN_QUEUE_TIMEOUT: - return "I40E_ERR_ADMIN_QUEUE_TIMEOUT"; - case I40E_ERR_BUF_TOO_SHORT: - return "I40E_ERR_BUF_TOO_SHORT"; - case I40E_ERR_ADMIN_QUEUE_FULL: - return "I40E_ERR_ADMIN_QUEUE_FULL"; - case I40E_ERR_ADMIN_QUEUE_NO_WORK: - return "I40E_ERR_ADMIN_QUEUE_NO_WORK"; - case I40E_ERR_BAD_IWARP_CQE: - return "I40E_ERR_BAD_IWARP_CQE"; - case I40E_ERR_NVM_BLANK_MODE: - return "I40E_ERR_NVM_BLANK_MODE"; - case I40E_ERR_NOT_IMPLEMENTED: - return "I40E_ERR_NOT_IMPLEMENTED"; - case I40E_ERR_PE_DOORBELL_NOT_ENABLED: - return "I40E_ERR_PE_DOORBELL_NOT_ENABLED"; - case I40E_ERR_DIAG_TEST_FAILED: - return "I40E_ERR_DIAG_TEST_FAILED"; - case I40E_ERR_NOT_READY: - return "I40E_ERR_NOT_READY"; - case I40E_NOT_SUPPORTED: - return "I40E_NOT_SUPPORTED"; - case I40E_ERR_FIRMWARE_API_VERSION: - return "I40E_ERR_FIRMWARE_API_VERSION"; - case I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR: - return "I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR"; + case IAVF_ERR_NVM: + return "IAVF_ERR_NVM"; + case IAVF_ERR_NVM_CHECKSUM: + return "IAVF_ERR_NVM_CHECKSUM"; + case IAVF_ERR_PHY: + return "IAVF_ERR_PHY"; + case IAVF_ERR_CONFIG: + return "IAVF_ERR_CONFIG"; + case IAVF_ERR_PARAM: + return "IAVF_ERR_PARAM"; + case IAVF_ERR_MAC_TYPE: + return "IAVF_ERR_MAC_TYPE"; + case IAVF_ERR_UNKNOWN_PHY: + return "IAVF_ERR_UNKNOWN_PHY"; + case IAVF_ERR_LINK_SETUP: + return "IAVF_ERR_LINK_SETUP"; + case IAVF_ERR_ADAPTER_STOPPED: + return "IAVF_ERR_ADAPTER_STOPPED"; + case IAVF_ERR_INVALID_MAC_ADDR: + return "IAVF_ERR_INVALID_MAC_ADDR"; + case IAVF_ERR_DEVICE_NOT_SUPPORTED: + return "IAVF_ERR_DEVICE_NOT_SUPPORTED"; + case IAVF_ERR_MASTER_REQUESTS_PENDING: + return "IAVF_ERR_MASTER_REQUESTS_PENDING"; + case IAVF_ERR_INVALID_LINK_SETTINGS: + return "IAVF_ERR_INVALID_LINK_SETTINGS"; + case IAVF_ERR_AUTONEG_NOT_COMPLETE: + return "IAVF_ERR_AUTONEG_NOT_COMPLETE"; + case IAVF_ERR_RESET_FAILED: + return "IAVF_ERR_RESET_FAILED"; + case IAVF_ERR_SWFW_SYNC: + return "IAVF_ERR_SWFW_SYNC"; + case IAVF_ERR_NO_AVAILABLE_VSI: + return "IAVF_ERR_NO_AVAILABLE_VSI"; + case IAVF_ERR_NO_MEMORY: + return "IAVF_ERR_NO_MEMORY"; + case IAVF_ERR_BAD_PTR: + return "IAVF_ERR_BAD_PTR"; + case IAVF_ERR_RING_FULL: + return "IAVF_ERR_RING_FULL"; + case IAVF_ERR_INVALID_PD_ID: + return "IAVF_ERR_INVALID_PD_ID"; + case IAVF_ERR_INVALID_QP_ID: + return "IAVF_ERR_INVALID_QP_ID"; + case IAVF_ERR_INVALID_CQ_ID: + return "IAVF_ERR_INVALID_CQ_ID"; + case IAVF_ERR_INVALID_CEQ_ID: + return "IAVF_ERR_INVALID_CEQ_ID"; + case IAVF_ERR_INVALID_AEQ_ID: + return "IAVF_ERR_INVALID_AEQ_ID"; + case IAVF_ERR_INVALID_SIZE: + return "IAVF_ERR_INVALID_SIZE"; + case IAVF_ERR_INVALID_ARP_INDEX: + return "IAVF_ERR_INVALID_ARP_INDEX"; + case IAVF_ERR_INVALID_FPM_FUNC_ID: + return "IAVF_ERR_INVALID_FPM_FUNC_ID"; + case IAVF_ERR_QP_INVALID_MSG_SIZE: + return "IAVF_ERR_QP_INVALID_MSG_SIZE"; + case IAVF_ERR_QP_TOOMANY_WRS_POSTED: + return "IAVF_ERR_QP_TOOMANY_WRS_POSTED"; + case IAVF_ERR_INVALID_FRAG_COUNT: + return "IAVF_ERR_INVALID_FRAG_COUNT"; + case IAVF_ERR_QUEUE_EMPTY: + return "IAVF_ERR_QUEUE_EMPTY"; + case IAVF_ERR_INVALID_ALIGNMENT: + return "IAVF_ERR_INVALID_ALIGNMENT"; + case IAVF_ERR_FLUSHED_QUEUE: + return "IAVF_ERR_FLUSHED_QUEUE"; + case IAVF_ERR_INVALID_PUSH_PAGE_INDEX: + return "IAVF_ERR_INVALID_PUSH_PAGE_INDEX"; + case IAVF_ERR_INVALID_IMM_DATA_SIZE: + return "IAVF_ERR_INVALID_IMM_DATA_SIZE"; + case IAVF_ERR_TIMEOUT: + return "IAVF_ERR_TIMEOUT"; + case IAVF_ERR_OPCODE_MISMATCH: + return "IAVF_ERR_OPCODE_MISMATCH"; + case IAVF_ERR_CQP_COMPL_ERROR: + return "IAVF_ERR_CQP_COMPL_ERROR"; + case IAVF_ERR_INVALID_VF_ID: + return "IAVF_ERR_INVALID_VF_ID"; + case IAVF_ERR_INVALID_HMCFN_ID: + return "IAVF_ERR_INVALID_HMCFN_ID"; + case IAVF_ERR_BACKING_PAGE_ERROR: + return "IAVF_ERR_BACKING_PAGE_ERROR"; + case IAVF_ERR_NO_PBLCHUNKS_AVAILABLE: + return "IAVF_ERR_NO_PBLCHUNKS_AVAILABLE"; + case IAVF_ERR_INVALID_PBLE_INDEX: + return "IAVF_ERR_INVALID_PBLE_INDEX"; + case IAVF_ERR_INVALID_SD_INDEX: + return "IAVF_ERR_INVALID_SD_INDEX"; + case IAVF_ERR_INVALID_PAGE_DESC_INDEX: + return "IAVF_ERR_INVALID_PAGE_DESC_INDEX"; + case IAVF_ERR_INVALID_SD_TYPE: + return "IAVF_ERR_INVALID_SD_TYPE"; + case IAVF_ERR_MEMCPY_FAILED: + return "IAVF_ERR_MEMCPY_FAILED"; + case IAVF_ERR_INVALID_HMC_OBJ_INDEX: + return "IAVF_ERR_INVALID_HMC_OBJ_INDEX"; + case IAVF_ERR_INVALID_HMC_OBJ_COUNT: + return "IAVF_ERR_INVALID_HMC_OBJ_COUNT"; + case IAVF_ERR_INVALID_SRQ_ARM_LIMIT: + return "IAVF_ERR_INVALID_SRQ_ARM_LIMIT"; + case IAVF_ERR_SRQ_ENABLED: + return "IAVF_ERR_SRQ_ENABLED"; + case IAVF_ERR_ADMIN_QUEUE_ERROR: + return "IAVF_ERR_ADMIN_QUEUE_ERROR"; + case IAVF_ERR_ADMIN_QUEUE_TIMEOUT: + return "IAVF_ERR_ADMIN_QUEUE_TIMEOUT"; + case IAVF_ERR_BUF_TOO_SHORT: + return "IAVF_ERR_BUF_TOO_SHORT"; + case IAVF_ERR_ADMIN_QUEUE_FULL: + return "IAVF_ERR_ADMIN_QUEUE_FULL"; + case IAVF_ERR_ADMIN_QUEUE_NO_WORK: + return "IAVF_ERR_ADMIN_QUEUE_NO_WORK"; + case IAVF_ERR_BAD_IWARP_CQE: + return "IAVF_ERR_BAD_IWARP_CQE"; + case IAVF_ERR_NVM_BLANK_MODE: + return "IAVF_ERR_NVM_BLANK_MODE"; + case IAVF_ERR_NOT_IMPLEMENTED: + return "IAVF_ERR_NOT_IMPLEMENTED"; + case IAVF_ERR_PE_DOORBELL_NOT_ENABLED: + return "IAVF_ERR_PE_DOORBELL_NOT_ENABLED"; + case IAVF_ERR_DIAG_TEST_FAILED: + return "IAVF_ERR_DIAG_TEST_FAILED"; + case IAVF_ERR_NOT_READY: + return "IAVF_ERR_NOT_READY"; + case IAVF_NOT_SUPPORTED: + return "IAVF_NOT_SUPPORTED"; + case IAVF_ERR_FIRMWARE_API_VERSION: + return "IAVF_ERR_FIRMWARE_API_VERSION"; + case IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR: + return "IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR"; } snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err); @@ -260,7 +260,7 @@ const char *iavf_stat_str(struct iavf_hw *hw, iavf_status stat_err) void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask, void *desc, void *buffer, u16 buf_len) { - struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; + struct iavf_aq_desc *aq_desc = (struct iavf_aq_desc *)desc; u8 *buf = (u8 *)buffer; if ((!(mask & hw->debug_mask)) || !desc) @@ -327,17 +327,17 @@ bool iavf_check_asq_alive(struct iavf_hw *hw) * Tell the Firmware that we're shutting down the AdminQ and whether * or not the driver is unloading as well. **/ -iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading) +enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading) { - struct i40e_aq_desc desc; - struct i40e_aqc_queue_shutdown *cmd = - (struct i40e_aqc_queue_shutdown *)&desc.params.raw; - iavf_status status; + struct iavf_aq_desc desc; + struct iavf_aqc_queue_shutdown *cmd = + (struct iavf_aqc_queue_shutdown *)&desc.params.raw; + enum iavf_status status; - iavf_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_queue_shutdown); + iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_queue_shutdown); if (unloading) - cmd->driver_unloading = cpu_to_le32(I40E_AQ_DRIVER_UNLOADING); + cmd->driver_unloading = cpu_to_le32(IAVF_AQ_DRIVER_UNLOADING); status = iavf_asq_send_command(hw, &desc, NULL, 0, NULL); return status; @@ -354,43 +354,43 @@ iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading) * * Internal function to get or set RSS look up table **/ -static iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, - u16 vsi_id, bool pf_lut, - u8 *lut, u16 lut_size, - bool set) +static enum iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, + u16 vsi_id, bool pf_lut, + u8 *lut, u16 lut_size, + bool set) { - iavf_status status; - struct i40e_aq_desc desc; - struct i40e_aqc_get_set_rss_lut *cmd_resp = - (struct i40e_aqc_get_set_rss_lut *)&desc.params.raw; + enum iavf_status status; + struct iavf_aq_desc desc; + struct iavf_aqc_get_set_rss_lut *cmd_resp = + (struct iavf_aqc_get_set_rss_lut *)&desc.params.raw; if (set) iavf_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_set_rss_lut); + iavf_aqc_opc_set_rss_lut); else iavf_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_get_rss_lut); + iavf_aqc_opc_get_rss_lut); /* Indirect command */ - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD); + desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF); + desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD); cmd_resp->vsi_id = cpu_to_le16((u16)((vsi_id << - I40E_AQC_SET_RSS_LUT_VSI_ID_SHIFT) & - I40E_AQC_SET_RSS_LUT_VSI_ID_MASK)); - cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_LUT_VSI_VALID); + IAVF_AQC_SET_RSS_LUT_VSI_ID_SHIFT) & + IAVF_AQC_SET_RSS_LUT_VSI_ID_MASK)); + cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_LUT_VSI_VALID); if (pf_lut) cmd_resp->flags |= cpu_to_le16((u16) - ((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_PF << - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); + ((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_PF << + IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & + IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); else cmd_resp->flags |= cpu_to_le16((u16) - ((I40E_AQC_SET_RSS_LUT_TABLE_TYPE_VSI << - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & - I40E_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); + ((IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_VSI << + IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_SHIFT) & + IAVF_AQC_SET_RSS_LUT_TABLE_TYPE_MASK)); status = iavf_asq_send_command(hw, &desc, lut, lut_size, NULL); @@ -407,8 +407,8 @@ static iavf_status iavf_aq_get_set_rss_lut(struct iavf_hw *hw, * * get the RSS lookup table, PF or VSI type **/ -iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id, - bool pf_lut, u8 *lut, u16 lut_size) +enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id, + bool pf_lut, u8 *lut, u16 lut_size) { return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, false); @@ -424,8 +424,8 @@ iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 vsi_id, * * set the RSS lookup table, PF or VSI type **/ -iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id, - bool pf_lut, u8 *lut, u16 lut_size) +enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id, + bool pf_lut, u8 *lut, u16 lut_size) { return iavf_aq_get_set_rss_lut(hw, vsi_id, pf_lut, lut, lut_size, true); } @@ -439,33 +439,33 @@ iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 vsi_id, * * get the RSS key per VSI **/ -static +static enum iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id, - struct i40e_aqc_get_set_rss_key_data *key, + struct iavf_aqc_get_set_rss_key_data *key, bool set) { - iavf_status status; - struct i40e_aq_desc desc; - struct i40e_aqc_get_set_rss_key *cmd_resp = - (struct i40e_aqc_get_set_rss_key *)&desc.params.raw; - u16 key_size = sizeof(struct i40e_aqc_get_set_rss_key_data); + enum iavf_status status; + struct iavf_aq_desc desc; + struct iavf_aqc_get_set_rss_key *cmd_resp = + (struct iavf_aqc_get_set_rss_key *)&desc.params.raw; + u16 key_size = sizeof(struct iavf_aqc_get_set_rss_key_data); if (set) iavf_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_set_rss_key); + iavf_aqc_opc_set_rss_key); else iavf_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_get_rss_key); + iavf_aqc_opc_get_rss_key); /* Indirect command */ - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF); - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD); + desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_BUF); + desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_RD); cmd_resp->vsi_id = cpu_to_le16((u16)((vsi_id << - I40E_AQC_SET_RSS_KEY_VSI_ID_SHIFT) & - I40E_AQC_SET_RSS_KEY_VSI_ID_MASK)); - cmd_resp->vsi_id |= cpu_to_le16((u16)I40E_AQC_SET_RSS_KEY_VSI_VALID); + IAVF_AQC_SET_RSS_KEY_VSI_ID_SHIFT) & + IAVF_AQC_SET_RSS_KEY_VSI_ID_MASK)); + cmd_resp->vsi_id |= cpu_to_le16((u16)IAVF_AQC_SET_RSS_KEY_VSI_VALID); status = iavf_asq_send_command(hw, &desc, key, key_size, NULL); @@ -479,8 +479,8 @@ iavf_status iavf_aq_get_set_rss_key(struct iavf_hw *hw, u16 vsi_id, * @key: pointer to key info struct * **/ -iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id, - struct i40e_aqc_get_set_rss_key_data *key) +enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id, + struct iavf_aqc_get_set_rss_key_data *key) { return iavf_aq_get_set_rss_key(hw, vsi_id, key, false); } @@ -493,8 +493,8 @@ iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 vsi_id, * * set the RSS key per VSI **/ -iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, - struct i40e_aqc_get_set_rss_key_data *key) +enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, + struct iavf_aqc_get_set_rss_key_data *key) { return iavf_aq_get_set_rss_key(hw, vsi_id, key, true); } @@ -515,7 +515,7 @@ iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 vsi_id, * IF NOT iavf_ptype_lookup[ptype].known * THEN * Packet is unknown - * ELSE IF iavf_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP + * ELSE IF iavf_ptype_lookup[ptype].outer_ip == IAVF_RX_PTYPE_OUTER_IP * Use the rest of the fields to look at the tunnels, inner protocols, etc * ELSE * Use the enum iavf_rx_l2_ptype to decode the packet type @@ -877,24 +877,25 @@ struct iavf_rx_ptype_decoded iavf_ptype_lookup[] = { * is sent asynchronously, i.e. iavf_asq_send_command() does not wait for * completion before returning. **/ -iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, - enum virtchnl_ops v_opcode, - iavf_status v_retval, u8 *msg, u16 msglen, - struct i40e_asq_cmd_details *cmd_details) +enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, + enum virtchnl_ops v_opcode, + enum iavf_status v_retval, + u8 *msg, u16 msglen, + struct iavf_asq_cmd_details *cmd_details) { - struct i40e_asq_cmd_details details; - struct i40e_aq_desc desc; - iavf_status status; + struct iavf_asq_cmd_details details; + struct iavf_aq_desc desc; + enum iavf_status status; - iavf_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_pf); - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_SI); + iavf_fill_default_direct_cmd_desc(&desc, iavf_aqc_opc_send_msg_to_pf); + desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_SI); desc.cookie_high = cpu_to_le32(v_opcode); desc.cookie_low = cpu_to_le32(v_retval); if (msglen) { - desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF - | I40E_AQ_FLAG_RD)); - if (msglen > I40E_AQ_LARGE_BUF) - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); + desc.flags |= cpu_to_le16((u16)(IAVF_AQ_FLAG_BUF + | IAVF_AQ_FLAG_RD)); + if (msglen > IAVF_AQ_LARGE_BUF) + desc.flags |= cpu_to_le16((u16)IAVF_AQ_FLAG_LB); desc.datalen = cpu_to_le16(msglen); } if (!cmd_details) { @@ -948,7 +949,7 @@ void iavf_vf_parse_hw_config(struct iavf_hw *hw, * as none will be forthcoming. Immediately after calling this function, * the admin queue should be shut down and (optionally) reinitialized. **/ -iavf_status iavf_vf_reset(struct iavf_hw *hw) +enum iavf_status iavf_vf_reset(struct iavf_hw *hw) { return iavf_aq_send_msg_to_pf(hw, VIRTCHNL_OP_RESET_VF, 0, NULL, 0, NULL); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index 9f87304109fe..dad3eec8ccd8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -280,10 +280,10 @@ static int iavf_get_link_ksettings(struct net_device *netdev, cmd->base.port = PORT_NONE; /* Set speed and duplex */ switch (adapter->link_speed) { - case I40E_LINK_SPEED_40GB: + case IAVF_LINK_SPEED_40GB: cmd->base.speed = SPEED_40000; break; - case I40E_LINK_SPEED_25GB: + case IAVF_LINK_SPEED_25GB: #ifdef SPEED_25000 cmd->base.speed = SPEED_25000; #else @@ -291,16 +291,16 @@ static int iavf_get_link_ksettings(struct net_device *netdev, "Speed is 25G, display not supported by this version of ethtool.\n"); #endif break; - case I40E_LINK_SPEED_20GB: + case IAVF_LINK_SPEED_20GB: cmd->base.speed = SPEED_20000; break; - case I40E_LINK_SPEED_10GB: + case IAVF_LINK_SPEED_10GB: cmd->base.speed = SPEED_10000; break; - case I40E_LINK_SPEED_1GB: + case IAVF_LINK_SPEED_1GB: cmd->base.speed = SPEED_1000; break; - case I40E_LINK_SPEED_100MB: + case IAVF_LINK_SPEED_100MB: cmd->base.speed = SPEED_100; break; default: @@ -510,7 +510,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) if (changed_flags & IAVF_FLAG_LEGACY_RX) { if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); + queue_work(iavf_wq, &adapter->reset_task); } } @@ -622,7 +622,7 @@ static int iavf_set_ringparam(struct net_device *netdev, if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); + queue_work(iavf_wq, &adapter->reset_task); } return 0; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 4569d69a2b55..9d2b50964a08 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -14,6 +14,8 @@ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter); static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter); static int iavf_close(struct net_device *netdev); +static int iavf_init_get_resources(struct iavf_adapter *adapter); +static int iavf_check_reset_complete(struct iavf_hw *hw); char iavf_driver_name[] = "iavf"; static const char iavf_driver_string[] = @@ -57,7 +59,8 @@ MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver") MODULE_LICENSE("GPL v2"); MODULE_VERSION(DRV_VERSION); -static struct workqueue_struct *iavf_wq; +static const struct net_device_ops iavf_netdev_ops; +struct workqueue_struct *iavf_wq; /** * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code @@ -66,14 +69,14 @@ static struct workqueue_struct *iavf_wq; * @size: size of memory requested * @alignment: what to align the allocation to **/ -iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw, - struct iavf_dma_mem *mem, - u64 size, u32 alignment) +enum iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw, + struct iavf_dma_mem *mem, + u64 size, u32 alignment) { struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; if (!mem) - return I40E_ERR_PARAM; + return IAVF_ERR_PARAM; mem->size = ALIGN(size, alignment); mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size, @@ -81,7 +84,7 @@ iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw, if (mem->va) return 0; else - return I40E_ERR_NO_MEMORY; + return IAVF_ERR_NO_MEMORY; } /** @@ -89,12 +92,13 @@ iavf_status iavf_allocate_dma_mem_d(struct iavf_hw *hw, * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, struct iavf_dma_mem *mem) +enum iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, + struct iavf_dma_mem *mem) { struct iavf_adapter *adapter = (struct iavf_adapter *)hw->back; if (!mem || !mem->va) - return I40E_ERR_PARAM; + return IAVF_ERR_PARAM; dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, (dma_addr_t)mem->pa); return 0; @@ -106,11 +110,11 @@ iavf_status iavf_free_dma_mem_d(struct iavf_hw *hw, struct iavf_dma_mem *mem) * @mem: ptr to mem struct to fill out * @size: size of memory requested **/ -iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, - struct iavf_virt_mem *mem, u32 size) +enum iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, + struct iavf_virt_mem *mem, u32 size) { if (!mem) - return I40E_ERR_PARAM; + return IAVF_ERR_PARAM; mem->size = size; mem->va = kzalloc(size, GFP_KERNEL); @@ -118,7 +122,7 @@ iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, if (mem->va) return 0; else - return I40E_ERR_NO_MEMORY; + return IAVF_ERR_NO_MEMORY; } /** @@ -126,10 +130,11 @@ iavf_status iavf_allocate_virt_mem_d(struct iavf_hw *hw, * @hw: pointer to the HW structure * @mem: ptr to mem struct to free **/ -iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, struct iavf_virt_mem *mem) +enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw, + struct iavf_virt_mem *mem) { if (!mem) - return I40E_ERR_PARAM; + return IAVF_ERR_PARAM; /* it's ok to kfree a NULL pointer */ kfree(mem->va); @@ -168,7 +173,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter) if (!(adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); + queue_work(iavf_wq, &adapter->reset_task); } } @@ -287,7 +292,7 @@ static irqreturn_t iavf_msix_aq(int irq, void *data) rd32(hw, IAVF_VFINT_ICR0_ENA1); /* schedule work on the private workqueue */ - schedule_work(&adapter->adminq_task); + queue_work(iavf_wq, &adapter->adminq_task); return IRQ_HANDLED; } @@ -657,14 +662,13 @@ iavf_vlan_filter *iavf_add_vlan(struct iavf_adapter *adapter, u16 vlan) f = iavf_find_vlan(adapter, vlan); if (!f) { - f = kzalloc(sizeof(*f), GFP_KERNEL); + f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) goto clearout; f->vlan = vlan; - INIT_LIST_HEAD(&f->list); - list_add(&f->list, &adapter->vlan_filter_list); + list_add_tail(&f->list, &adapter->vlan_filter_list); f->add = true; adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER; } @@ -979,7 +983,7 @@ static void iavf_up_complete(struct iavf_adapter *adapter) adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES; if (CLIENT_ENABLED(adapter)) adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN; - mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); } /** @@ -1043,7 +1047,7 @@ void iavf_down(struct iavf_adapter *adapter) adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; } - mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); } /** @@ -1227,8 +1231,8 @@ out: **/ static int iavf_config_rss_aq(struct iavf_adapter *adapter) { - struct i40e_aqc_get_set_rss_key_data *rss_key = - (struct i40e_aqc_get_set_rss_key_data *)adapter->rss_key; + struct iavf_aqc_get_set_rss_key_data *rss_key = + (struct iavf_aqc_get_set_rss_key_data *)adapter->rss_key; struct iavf_hw *hw = &adapter->hw; int ret = 0; @@ -1532,136 +1536,66 @@ err: } /** - * iavf_watchdog_timer - Periodic call-back timer - * @data: pointer to adapter disguised as unsigned long - **/ -static void iavf_watchdog_timer(struct timer_list *t) -{ - struct iavf_adapter *adapter = from_timer(adapter, t, - watchdog_timer); - - schedule_work(&adapter->watchdog_task); - /* timer will be rescheduled in watchdog task */ -} - -/** - * iavf_watchdog_task - Periodic call-back task - * @work: pointer to work_struct + * iavf_process_aq_command - process aq_required flags + * and sends aq command + * @adapter: pointer to iavf adapter structure + * + * Returns 0 on success + * Returns error code if no command was sent + * or error code if the command failed. **/ -static void iavf_watchdog_task(struct work_struct *work) +static int iavf_process_aq_command(struct iavf_adapter *adapter) { - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - watchdog_task); - struct iavf_hw *hw = &adapter->hw; - u32 reg_val; - - if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section)) - goto restart_watchdog; - - if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) { - reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & - IAVF_VFGEN_RSTAT_VFR_STATE_MASK; - if ((reg_val == VIRTCHNL_VFR_VFACTIVE) || - (reg_val == VIRTCHNL_VFR_COMPLETED)) { - /* A chance for redemption! */ - dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); - adapter->state = __IAVF_STARTUP; - adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - schedule_delayed_work(&adapter->init_task, 10); - clear_bit(__IAVF_IN_CRITICAL_TASK, - &adapter->crit_section); - /* Don't reschedule the watchdog, since we've restarted - * the init task. When init_task contacts the PF and - * gets everything set up again, it'll restart the - * watchdog for us. Down, boy. Sit. Stay. Woof. - */ - return; - } - adapter->aq_required = 0; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - goto watchdog_done; - } - - if ((adapter->state < __IAVF_DOWN) || - (adapter->flags & IAVF_FLAG_RESET_PENDING)) - goto watchdog_done; - - /* check for reset */ - reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; - if (!(adapter->flags & IAVF_FLAG_RESET_PENDING) && !reg_val) { - adapter->state = __IAVF_RESETTING; - adapter->flags |= IAVF_FLAG_RESET_PENDING; - dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); - schedule_work(&adapter->reset_task); - adapter->aq_required = 0; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - goto watchdog_done; - } - - /* Process admin queue tasks. After init, everything gets done - * here so we don't race on the admin queue. - */ - if (adapter->current_op) { - if (!iavf_asq_done(hw)) { - dev_dbg(&adapter->pdev->dev, "Admin queue timeout\n"); - iavf_send_api_ver(adapter); - } - goto watchdog_done; - } - if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG) { - iavf_send_vf_config_msg(adapter); - goto watchdog_done; - } - + if (adapter->aq_required & IAVF_FLAG_AQ_GET_CONFIG) + return iavf_send_vf_config_msg(adapter); if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_QUEUES) { iavf_disable_queues(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_MAP_VECTORS) { iavf_map_queues(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_ADD_MAC_FILTER) { iavf_add_ether_addrs(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_ADD_VLAN_FILTER) { iavf_add_vlans(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_DEL_MAC_FILTER) { iavf_del_ether_addrs(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_DEL_VLAN_FILTER) { iavf_del_vlans(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_VLAN_STRIPPING) { iavf_enable_vlan_stripping(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_VLAN_STRIPPING) { iavf_disable_vlan_stripping(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_QUEUES) { iavf_configure_queues(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_QUEUES) { iavf_enable_queues(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_CONFIGURE_RSS) { @@ -1669,81 +1603,414 @@ static void iavf_watchdog_task(struct work_struct *work) * PF, so we don't have to set current_op as we will * not get a response through the ARQ. */ - iavf_init_rss(adapter); adapter->aq_required &= ~IAVF_FLAG_AQ_CONFIGURE_RSS; - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_GET_HENA) { iavf_get_hena(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_SET_HENA) { iavf_set_hena(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_KEY) { iavf_set_rss_key(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_SET_RSS_LUT) { iavf_set_rss_lut(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_PROMISC) { iavf_set_promiscuous(adapter, FLAG_VF_UNICAST_PROMISC | FLAG_VF_MULTICAST_PROMISC); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_ALLMULTI) { iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); - goto watchdog_done; + return 0; } if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) && (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { iavf_set_promiscuous(adapter, 0); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_ENABLE_CHANNELS) { iavf_enable_channels(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_DISABLE_CHANNELS) { iavf_disable_channels(adapter); - goto watchdog_done; + return 0; } - if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) { iavf_add_cloud_filter(adapter); - goto watchdog_done; + return 0; } if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) { iavf_del_cloud_filter(adapter); + return 0; + } + if (adapter->aq_required & IAVF_FLAG_AQ_DEL_CLOUD_FILTER) { + iavf_del_cloud_filter(adapter); + return 0; + } + if (adapter->aq_required & IAVF_FLAG_AQ_ADD_CLOUD_FILTER) { + iavf_add_cloud_filter(adapter); + return 0; + } + return -EAGAIN; +} + +/** + * iavf_startup - first step of driver startup + * @adapter: board private structure + * + * Function process __IAVF_STARTUP driver state. + * When success the state is changed to __IAVF_INIT_VERSION_CHECK + * when fails it returns -EAGAIN + **/ +static int iavf_startup(struct iavf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct iavf_hw *hw = &adapter->hw; + int err; + + WARN_ON(adapter->state != __IAVF_STARTUP); + + /* driver loaded, probe complete */ + adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; + adapter->flags &= ~IAVF_FLAG_RESET_PENDING; + err = iavf_set_mac_type(hw); + if (err) { + dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", err); + goto err; + } + + err = iavf_check_reset_complete(hw); + if (err) { + dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n", + err); + goto err; + } + hw->aq.num_arq_entries = IAVF_AQ_LEN; + hw->aq.num_asq_entries = IAVF_AQ_LEN; + hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE; + hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE; + + err = iavf_init_adminq(hw); + if (err) { + dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", err); + goto err; + } + err = iavf_send_api_ver(adapter); + if (err) { + dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err); + iavf_shutdown_adminq(hw); + goto err; + } + adapter->state = __IAVF_INIT_VERSION_CHECK; +err: + return err; +} + +/** + * iavf_init_version_check - second step of driver startup + * @adapter: board private structure + * + * Function process __IAVF_INIT_VERSION_CHECK driver state. + * When success the state is changed to __IAVF_INIT_GET_RESOURCES + * when fails it returns -EAGAIN + **/ +static int iavf_init_version_check(struct iavf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct iavf_hw *hw = &adapter->hw; + int err = -EAGAIN; + + WARN_ON(adapter->state != __IAVF_INIT_VERSION_CHECK); + + if (!iavf_asq_done(hw)) { + dev_err(&pdev->dev, "Admin queue command never completed\n"); + iavf_shutdown_adminq(hw); + adapter->state = __IAVF_STARTUP; + goto err; + } + + /* aq msg sent, awaiting reply */ + err = iavf_verify_api_ver(adapter); + if (err) { + if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) + err = iavf_send_api_ver(adapter); + else + dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n", + adapter->pf_version.major, + adapter->pf_version.minor, + VIRTCHNL_VERSION_MAJOR, + VIRTCHNL_VERSION_MINOR); + goto err; + } + err = iavf_send_vf_config_msg(adapter); + if (err) { + dev_err(&pdev->dev, "Unable to send config request (%d)\n", + err); + goto err; + } + adapter->state = __IAVF_INIT_GET_RESOURCES; + +err: + return err; +} + +/** + * iavf_init_get_resources - third step of driver startup + * @adapter: board private structure + * + * Function process __IAVF_INIT_GET_RESOURCES driver state and + * finishes driver initialization procedure. + * When success the state is changed to __IAVF_DOWN + * when fails it returns -EAGAIN + **/ +static int iavf_init_get_resources(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + struct iavf_hw *hw = &adapter->hw; + int err = 0, bufsz; + + WARN_ON(adapter->state != __IAVF_INIT_GET_RESOURCES); + /* aq msg sent, awaiting reply */ + if (!adapter->vf_res) { + bufsz = sizeof(struct virtchnl_vf_resource) + + (IAVF_MAX_VF_VSI * + sizeof(struct virtchnl_vsi_resource)); + adapter->vf_res = kzalloc(bufsz, GFP_KERNEL); + if (!adapter->vf_res) + goto err; + } + err = iavf_get_vf_config(adapter); + if (err == IAVF_ERR_ADMIN_QUEUE_NO_WORK) { + err = iavf_send_vf_config_msg(adapter); + goto err; + } else if (err == IAVF_ERR_PARAM) { + /* We only get ERR_PARAM if the device is in a very bad + * state or if we've been disabled for previous bad + * behavior. Either way, we're done now. + */ + iavf_shutdown_adminq(hw); + dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); + return 0; + } + if (err) { + dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err); + goto err_alloc; + } + + if (iavf_process_config(adapter)) + goto err_alloc; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + + adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED; + + netdev->netdev_ops = &iavf_netdev_ops; + iavf_set_ethtool_ops(netdev); + netdev->watchdog_timeo = 5 * HZ; + + /* MTU range: 68 - 9710 */ + netdev->min_mtu = ETH_MIN_MTU; + netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD; + + if (!is_valid_ether_addr(adapter->hw.mac.addr)) { + dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", + adapter->hw.mac.addr); + eth_hw_addr_random(netdev); + ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); + } else { + adapter->flags |= IAVF_FLAG_ADDR_SET_BY_PF; + ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); + } + + adapter->tx_desc_count = IAVF_DEFAULT_TXD; + adapter->rx_desc_count = IAVF_DEFAULT_RXD; + err = iavf_init_interrupt_scheme(adapter); + if (err) + goto err_sw_init; + iavf_map_rings_to_vectors(adapter); + if (adapter->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) + adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE; + + err = iavf_request_misc_irq(adapter); + if (err) + goto err_sw_init; + + netif_carrier_off(netdev); + adapter->link_up = false; + + /* set the semaphore to prevent any callbacks after device registration + * up to time when state of driver will be set to __IAVF_DOWN + */ + rtnl_lock(); + if (!adapter->netdev_registered) { + err = register_netdevice(netdev); + if (err) { + rtnl_unlock(); + goto err_register; + } + } + + adapter->netdev_registered = true; + + netif_tx_stop_all_queues(netdev); + if (CLIENT_ALLOWED(adapter)) { + err = iavf_lan_add_device(adapter); + if (err) { + rtnl_unlock(); + dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n", + err); + } + } + dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr); + if (netdev->features & NETIF_F_GRO) + dev_info(&pdev->dev, "GRO is enabled\n"); + + adapter->state = __IAVF_DOWN; + set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); + rtnl_unlock(); + + iavf_misc_irq_enable(adapter); + wake_up(&adapter->down_waitqueue); + + adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL); + adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL); + if (!adapter->rss_key || !adapter->rss_lut) + goto err_mem; + if (RSS_AQ(adapter)) + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; + else + iavf_init_rss(adapter); + + return err; +err_mem: + iavf_free_rss(adapter); +err_register: + iavf_free_misc_irq(adapter); +err_sw_init: + iavf_reset_interrupt_capability(adapter); +err_alloc: + kfree(adapter->vf_res); + adapter->vf_res = NULL; +err: + return err; +} + +/** + * iavf_watchdog_task - Periodic call-back task + * @work: pointer to work_struct + **/ +static void iavf_watchdog_task(struct work_struct *work) +{ + struct iavf_adapter *adapter = container_of(work, + struct iavf_adapter, + watchdog_task.work); + struct iavf_hw *hw = &adapter->hw; + u32 reg_val; + + if (test_and_set_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section)) + goto restart_watchdog; + + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) + adapter->state = __IAVF_COMM_FAILED; + + switch (adapter->state) { + case __IAVF_COMM_FAILED: + reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & + IAVF_VFGEN_RSTAT_VFR_STATE_MASK; + if (reg_val == VIRTCHNL_VFR_VFACTIVE || + reg_val == VIRTCHNL_VFR_COMPLETED) { + /* A chance for redemption! */ + dev_err(&adapter->pdev->dev, + "Hardware came out of reset. Attempting reinit.\n"); + adapter->state = __IAVF_STARTUP; + adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; + queue_delayed_work(iavf_wq, &adapter->init_task, 10); + clear_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section); + /* Don't reschedule the watchdog, since we've restarted + * the init task. When init_task contacts the PF and + * gets everything set up again, it'll restart the + * watchdog for us. Down, boy. Sit. Stay. Woof. + */ + return; + } + adapter->aq_required = 0; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + clear_bit(__IAVF_IN_CRITICAL_TASK, + &adapter->crit_section); + queue_delayed_work(iavf_wq, + &adapter->watchdog_task, + msecs_to_jiffies(10)); goto watchdog_done; + case __IAVF_RESETTING: + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); + return; + case __IAVF_DOWN: + case __IAVF_DOWN_PENDING: + case __IAVF_TESTING: + case __IAVF_RUNNING: + if (adapter->current_op) { + if (!iavf_asq_done(hw)) { + dev_dbg(&adapter->pdev->dev, + "Admin queue timeout\n"); + iavf_send_api_ver(adapter); + } + } else { + if (!iavf_process_aq_command(adapter) && + adapter->state == __IAVF_RUNNING) + iavf_request_stats(adapter); + } + break; + case __IAVF_REMOVE: + clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); + return; + default: + goto restart_watchdog; } - schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); + /* check for hw reset */ + reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; + if (!reg_val) { + adapter->state = __IAVF_RESETTING; + adapter->flags |= IAVF_FLAG_RESET_PENDING; + adapter->aq_required = 0; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); + queue_work(iavf_wq, &adapter->reset_task); + goto watchdog_done; + } - if (adapter->state == __IAVF_RUNNING) - iavf_request_stats(adapter); + schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); watchdog_done: - if (adapter->state == __IAVF_RUNNING) + if (adapter->state == __IAVF_RUNNING || + adapter->state == __IAVF_COMM_FAILED) iavf_detect_recover_hung(&adapter->vsi); clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); restart_watchdog: - if (adapter->state == __IAVF_REMOVE) - return; if (adapter->aq_required) - mod_timer(&adapter->watchdog_timer, - jiffies + msecs_to_jiffies(20)); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, + msecs_to_jiffies(20)); else - mod_timer(&adapter->watchdog_timer, jiffies + (HZ * 2)); - schedule_work(&adapter->adminq_task); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); + queue_work(iavf_wq, &adapter->adminq_task); } static void iavf_disable_vf(struct iavf_adapter *adapter) @@ -1967,7 +2234,7 @@ continue_reset: adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER; iavf_misc_irq_enable(adapter); - mod_timer(&adapter->watchdog_timer, jiffies + 2); + mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2); /* We were running when the reset started, so we need to restore some * state here. @@ -2020,9 +2287,9 @@ static void iavf_adminq_task(struct work_struct *work) struct iavf_adapter *adapter = container_of(work, struct iavf_adapter, adminq_task); struct iavf_hw *hw = &adapter->hw; - struct i40e_arq_event_info event; + struct iavf_arq_event_info event; enum virtchnl_ops v_op; - iavf_status ret, v_ret; + enum iavf_status ret, v_ret; u32 val, oldval; u16 pending; @@ -2037,7 +2304,7 @@ static void iavf_adminq_task(struct work_struct *work) do { ret = iavf_clean_arq_element(hw, &event, &pending); v_op = (enum virtchnl_ops)le32_to_cpu(event.desc.cookie_high); - v_ret = (iavf_status)le32_to_cpu(event.desc.cookie_low); + v_ret = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); if (ret || !v_op) break; /* No event to process or error cleaning ARQ */ @@ -2239,22 +2506,22 @@ static int iavf_validate_tx_bandwidth(struct iavf_adapter *adapter, int speed = 0, ret = 0; switch (adapter->link_speed) { - case I40E_LINK_SPEED_40GB: + case IAVF_LINK_SPEED_40GB: speed = 40000; break; - case I40E_LINK_SPEED_25GB: + case IAVF_LINK_SPEED_25GB: speed = 25000; break; - case I40E_LINK_SPEED_20GB: + case IAVF_LINK_SPEED_20GB: speed = 20000; break; - case I40E_LINK_SPEED_10GB: + case IAVF_LINK_SPEED_10GB: speed = 10000; break; - case I40E_LINK_SPEED_1GB: + case IAVF_LINK_SPEED_1GB: speed = 1000; break; - case I40E_LINK_SPEED_100MB: + case IAVF_LINK_SPEED_100MB: speed = 100; break; default: @@ -2432,14 +2699,14 @@ exit: /** * iavf_parse_cls_flower - Parse tc flower filters provided by kernel * @adapter: board private structure - * @cls_flower: pointer to struct tc_cls_flower_offload + * @cls_flower: pointer to struct flow_cls_offload * @filter: pointer to cloud filter structure */ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct iavf_cloud_filter *filter) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0; u16 n_proto_key = 0; @@ -2508,7 +2775,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, } else { dev_err(&adapter->pdev->dev, "Bad ether dest mask %pM\n", match.mask->dst); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } } @@ -2518,7 +2785,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, } else { dev_err(&adapter->pdev->dev, "Bad ether src mask %pM\n", match.mask->src); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } } @@ -2553,7 +2820,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, } else { dev_err(&adapter->pdev->dev, "Bad vlan mask %u\n", match.mask->vlan_id); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } } vf->mask.tcp_spec.vlan_id |= cpu_to_be16(0xffff); @@ -2577,7 +2844,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, } else { dev_err(&adapter->pdev->dev, "Bad ip dst mask 0x%08x\n", be32_to_cpu(match.mask->dst)); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } } @@ -2587,13 +2854,13 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, } else { dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", be32_to_cpu(match.mask->dst)); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } } if (field_flags & IAVF_CLOUD_FIELD_TEN_ID) { dev_info(&adapter->pdev->dev, "Tenant id not allowed for ip filter\n"); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } if (match.key->dst) { vf->mask.tcp_spec.dst_ip[0] |= cpu_to_be32(0xffffffff); @@ -2614,7 +2881,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, if (ipv6_addr_any(&match.mask->dst)) { dev_err(&adapter->pdev->dev, "Bad ipv6 dst mask 0x%02x\n", IPV6_ADDR_ANY); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } /* src and dest IPv6 address should not be LOOPBACK @@ -2624,7 +2891,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, ipv6_addr_loopback(&match.key->src)) { dev_err(&adapter->pdev->dev, "ipv6 addr should not be loopback\n"); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } if (!ipv6_addr_any(&match.mask->dst) || !ipv6_addr_any(&match.mask->src)) @@ -2649,7 +2916,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, } else { dev_err(&adapter->pdev->dev, "Bad src port mask %u\n", be16_to_cpu(match.mask->src)); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } } @@ -2659,7 +2926,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, } else { dev_err(&adapter->pdev->dev, "Bad dst port mask %u\n", be16_to_cpu(match.mask->dst)); - return I40E_ERR_CONFIG; + return IAVF_ERR_CONFIG; } } if (match.key->dst) { @@ -2704,10 +2971,10 @@ static int iavf_handle_tclass(struct iavf_adapter *adapter, u32 tc, /** * iavf_configure_clsflower - Add tc flower filters * @adapter: board private structure - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @cls_flower: Pointer to struct flow_cls_offload */ static int iavf_configure_clsflower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { int tc = tc_classid_to_hwtc(adapter->netdev, cls_flower->classid); struct iavf_cloud_filter *filter = NULL; @@ -2783,10 +3050,10 @@ static struct iavf_cloud_filter *iavf_find_cf(struct iavf_adapter *adapter, /** * iavf_delete_clsflower - Remove tc flower filters * @adapter: board private structure - * @cls_flower: Pointer to struct tc_cls_flower_offload + * @cls_flower: Pointer to struct flow_cls_offload */ static int iavf_delete_clsflower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct iavf_cloud_filter *filter = NULL; int err = 0; @@ -2810,17 +3077,17 @@ static int iavf_delete_clsflower(struct iavf_adapter *adapter, * @type_data: offload data */ static int iavf_setup_tc_cls_flower(struct iavf_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { if (cls_flower->common.chain_index) return -EOPNOTSUPP; switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return iavf_configure_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return iavf_delete_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return -EOPNOTSUPP; default: return -EOPNOTSUPP; @@ -2846,34 +3113,7 @@ static int iavf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -/** - * iavf_setup_tc_block - register callbacks for tc - * @netdev: network interface device structure - * @f: tc offload data - * - * This function registers block callbacks for tc - * offloads - **/ -static int iavf_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct iavf_adapter *adapter = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, iavf_setup_tc_block_cb, - adapter, adapter, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, iavf_setup_tc_block_cb, - adapter); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(iavf_block_cb_list); /** * iavf_setup_tc - configure multiple traffic classes @@ -2889,11 +3129,16 @@ static int iavf_setup_tc_block(struct net_device *dev, static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type, void *type_data) { + struct iavf_adapter *adapter = netdev_priv(netdev); + switch (type) { case TC_SETUP_QDISC_MQPRIO: return __iavf_setup_tc(netdev, type_data); case TC_SETUP_BLOCK: - return iavf_setup_tc_block(netdev, type_data); + return flow_block_cb_setup_simple(type_data, + &iavf_block_cb_list, + iavf_setup_tc_block_cb, + adapter, adapter, true); default: return -EOPNOTSUPP; } @@ -2908,7 +3153,7 @@ static int iavf_setup_tc(struct net_device *netdev, enum tc_setup_type type, * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt - * handler is registered with the OS, the watchdog timer is started, + * handler is registered with the OS, the watchdog is started, * and the stack is notified that the interface is ready. **/ static int iavf_open(struct net_device *netdev) @@ -3020,7 +3265,7 @@ static int iavf_close(struct net_device *netdev) status = wait_event_timeout(adapter->down_waitqueue, adapter->state == __IAVF_DOWN, - msecs_to_jiffies(200)); + msecs_to_jiffies(500)); if (!status) netdev_warn(netdev, "Device resources not yet released\n"); return 0; @@ -3043,7 +3288,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; } adapter->flags |= IAVF_FLAG_RESET_NEEDED; - schedule_work(&adapter->reset_task); + queue_work(iavf_wq, &adapter->reset_task); return 0; } @@ -3348,217 +3593,41 @@ int iavf_process_config(struct iavf_adapter *adapter) static void iavf_init_task(struct work_struct *work) { struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - init_task.work); - struct net_device *netdev = adapter->netdev; + struct iavf_adapter, + init_task.work); struct iavf_hw *hw = &adapter->hw; - struct pci_dev *pdev = adapter->pdev; - int err, bufsz; switch (adapter->state) { case __IAVF_STARTUP: - /* driver loaded, probe complete */ - adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - err = iavf_set_mac_type(hw); - if (err) { - dev_err(&pdev->dev, "Failed to set MAC type (%d)\n", - err); - goto err; - } - err = iavf_check_reset_complete(hw); - if (err) { - dev_info(&pdev->dev, "Device is still in reset (%d), retrying\n", - err); - goto err; - } - hw->aq.num_arq_entries = IAVF_AQ_LEN; - hw->aq.num_asq_entries = IAVF_AQ_LEN; - hw->aq.arq_buf_size = IAVF_MAX_AQ_BUF_SIZE; - hw->aq.asq_buf_size = IAVF_MAX_AQ_BUF_SIZE; - - err = iavf_init_adminq(hw); - if (err) { - dev_err(&pdev->dev, "Failed to init Admin Queue (%d)\n", - err); - goto err; - } - err = iavf_send_api_ver(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to send to PF (%d)\n", err); - iavf_shutdown_adminq(hw); - goto err; - } - adapter->state = __IAVF_INIT_VERSION_CHECK; - goto restart; + if (iavf_startup(adapter) < 0) + goto init_failed; + break; case __IAVF_INIT_VERSION_CHECK: - if (!iavf_asq_done(hw)) { - dev_err(&pdev->dev, "Admin queue command never completed\n"); - iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; - goto err; - } - - /* aq msg sent, awaiting reply */ - err = iavf_verify_api_ver(adapter); - if (err) { - if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) - err = iavf_send_api_ver(adapter); - else - dev_err(&pdev->dev, "Unsupported PF API version %d.%d, expected %d.%d\n", - adapter->pf_version.major, - adapter->pf_version.minor, - VIRTCHNL_VERSION_MAJOR, - VIRTCHNL_VERSION_MINOR); - goto err; - } - err = iavf_send_vf_config_msg(adapter); - if (err) { - dev_err(&pdev->dev, "Unable to send config request (%d)\n", - err); - goto err; - } - adapter->state = __IAVF_INIT_GET_RESOURCES; - goto restart; - case __IAVF_INIT_GET_RESOURCES: - /* aq msg sent, awaiting reply */ - if (!adapter->vf_res) { - bufsz = sizeof(struct virtchnl_vf_resource) + - (IAVF_MAX_VF_VSI * - sizeof(struct virtchnl_vsi_resource)); - adapter->vf_res = kzalloc(bufsz, GFP_KERNEL); - if (!adapter->vf_res) - goto err; - } - err = iavf_get_vf_config(adapter); - if (err == I40E_ERR_ADMIN_QUEUE_NO_WORK) { - err = iavf_send_vf_config_msg(adapter); - goto err; - } else if (err == I40E_ERR_PARAM) { - /* We only get ERR_PARAM if the device is in a very bad - * state or if we've been disabled for previous bad - * behavior. Either way, we're done now. - */ - iavf_shutdown_adminq(hw); - dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); - return; - } - if (err) { - dev_err(&pdev->dev, "Unable to get VF config (%d)\n", - err); - goto err_alloc; - } - adapter->state = __IAVF_INIT_SW; + if (iavf_init_version_check(adapter) < 0) + goto init_failed; break; + case __IAVF_INIT_GET_RESOURCES: + if (iavf_init_get_resources(adapter) < 0) + goto init_failed; + return; default: - goto err_alloc; - } - - if (iavf_process_config(adapter)) - goto err_alloc; - adapter->current_op = VIRTCHNL_OP_UNKNOWN; - - adapter->flags |= IAVF_FLAG_RX_CSUM_ENABLED; - - netdev->netdev_ops = &iavf_netdev_ops; - iavf_set_ethtool_ops(netdev); - netdev->watchdog_timeo = 5 * HZ; - - /* MTU range: 68 - 9710 */ - netdev->min_mtu = ETH_MIN_MTU; - netdev->max_mtu = IAVF_MAX_RXBUFFER - IAVF_PACKET_HDR_PAD; - - if (!is_valid_ether_addr(adapter->hw.mac.addr)) { - dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n", - adapter->hw.mac.addr); - eth_hw_addr_random(netdev); - ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); - } else { - adapter->flags |= IAVF_FLAG_ADDR_SET_BY_PF; - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); - ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); - } - - timer_setup(&adapter->watchdog_timer, iavf_watchdog_timer, 0); - mod_timer(&adapter->watchdog_timer, jiffies + 1); - - adapter->tx_desc_count = IAVF_DEFAULT_TXD; - adapter->rx_desc_count = IAVF_DEFAULT_RXD; - err = iavf_init_interrupt_scheme(adapter); - if (err) - goto err_sw_init; - iavf_map_rings_to_vectors(adapter); - if (adapter->vf_res->vf_cap_flags & - VIRTCHNL_VF_OFFLOAD_WB_ON_ITR) - adapter->flags |= IAVF_FLAG_WB_ON_ITR_CAPABLE; - - err = iavf_request_misc_irq(adapter); - if (err) - goto err_sw_init; - - netif_carrier_off(netdev); - adapter->link_up = false; - - if (!adapter->netdev_registered) { - err = register_netdev(netdev); - if (err) - goto err_register; - } - - adapter->netdev_registered = true; - - netif_tx_stop_all_queues(netdev); - if (CLIENT_ALLOWED(adapter)) { - err = iavf_lan_add_device(adapter); - if (err) - dev_info(&pdev->dev, "Failed to add VF to client API service list: %d\n", - err); + goto init_failed; } - dev_info(&pdev->dev, "MAC address: %pM\n", adapter->hw.mac.addr); - if (netdev->features & NETIF_F_GRO) - dev_info(&pdev->dev, "GRO is enabled\n"); - - adapter->state = __IAVF_DOWN; - set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); - iavf_misc_irq_enable(adapter); - wake_up(&adapter->down_waitqueue); - - adapter->rss_key = kzalloc(adapter->rss_key_size, GFP_KERNEL); - adapter->rss_lut = kzalloc(adapter->rss_lut_size, GFP_KERNEL); - if (!adapter->rss_key || !adapter->rss_lut) - goto err_mem; - - if (RSS_AQ(adapter)) { - adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; - mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); - } else { - iavf_init_rss(adapter); - } - return; -restart: - schedule_delayed_work(&adapter->init_task, msecs_to_jiffies(30)); + queue_delayed_work(iavf_wq, &adapter->init_task, + msecs_to_jiffies(30)); return; -err_mem: - iavf_free_rss(adapter); -err_register: - iavf_free_misc_irq(adapter); -err_sw_init: - iavf_reset_interrupt_capability(adapter); -err_alloc: - kfree(adapter->vf_res); - adapter->vf_res = NULL; -err: - /* Things went into the weeds, so try again later */ +init_failed: if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { - dev_err(&pdev->dev, "Failed to communicate with PF; waiting before retry\n"); + dev_err(&adapter->pdev->dev, + "Failed to communicate with PF; waiting before retry\n"); adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; iavf_shutdown_adminq(hw); adapter->state = __IAVF_STARTUP; - schedule_delayed_work(&adapter->init_task, HZ * 5); + queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); return; } - schedule_delayed_work(&adapter->init_task, HZ); + queue_delayed_work(iavf_wq, &adapter->init_task, HZ); } /** @@ -3683,11 +3752,11 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->reset_task, iavf_reset_task); INIT_WORK(&adapter->adminq_task, iavf_adminq_task); - INIT_WORK(&adapter->watchdog_task, iavf_watchdog_task); + INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task); - schedule_delayed_work(&adapter->init_task, - msecs_to_jiffies(5 * (pdev->devfn & 0x07))); + queue_delayed_work(iavf_wq, &adapter->init_task, + msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Setup the wait queue for indicating transition to down status */ init_waitqueue_head(&adapter->down_waitqueue); @@ -3783,7 +3852,7 @@ static int iavf_resume(struct pci_dev *pdev) return err; } - schedule_work(&adapter->reset_task); + queue_work(iavf_wq, &adapter->reset_task); netif_device_attach(netdev); @@ -3843,8 +3912,7 @@ static void iavf_remove(struct pci_dev *pdev) iavf_reset_interrupt_capability(adapter); iavf_free_q_vectors(adapter); - if (adapter->watchdog_timer.function) - del_timer_sync(&adapter->watchdog_timer); + cancel_delayed_work_sync(&adapter->watchdog_task); cancel_work_sync(&adapter->adminq_task); diff --git a/drivers/net/ethernet/intel/iavf/iavf_osdep.h b/drivers/net/ethernet/intel/iavf/iavf_osdep.h index e6e0b0328706..a452ce90679a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_osdep.h +++ b/drivers/net/ethernet/intel/iavf/iavf_osdep.h @@ -44,9 +44,12 @@ struct iavf_virt_mem { #define iavf_allocate_virt_mem(h, m, s) iavf_allocate_virt_mem_d(h, m, s) #define iavf_free_virt_mem(h, m) iavf_free_virt_mem_d(h, m) -#define iavf_debug(h, m, s, ...) iavf_debug_d(h, m, s, ##__VA_ARGS__) -extern void iavf_debug_d(void *hw, u32 mask, char *fmt_str, ...) - __attribute__ ((format(gnu_printf, 3, 4))); +#define iavf_debug(h, m, s, ...) \ +do { \ + if (((m) & (h)->debug_mask)) \ + pr_info("iavf %02x:%02x.%x " s, \ + (h)->bus.bus_id, (h)->bus.device, \ + (h)->bus.func, ##__VA_ARGS__); \ +} while (0) -typedef enum iavf_status_code iavf_status; #endif /* _IAVF_OSDEP_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_prototype.h b/drivers/net/ethernet/intel/iavf/iavf_prototype.h index d6685103af39..edebfbbcffdc 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_prototype.h +++ b/drivers/net/ethernet/intel/iavf/iavf_prototype.h @@ -16,39 +16,40 @@ */ /* adminq functions */ -iavf_status iavf_init_adminq(struct iavf_hw *hw); -iavf_status iavf_shutdown_adminq(struct iavf_hw *hw); -void i40e_adminq_init_ring_data(struct iavf_hw *hw); -iavf_status iavf_clean_arq_element(struct iavf_hw *hw, - struct i40e_arq_event_info *e, - u16 *events_pending); -iavf_status iavf_asq_send_command(struct iavf_hw *hw, struct i40e_aq_desc *desc, - void *buff, /* can be NULL */ - u16 buff_size, - struct i40e_asq_cmd_details *cmd_details); +enum iavf_status iavf_init_adminq(struct iavf_hw *hw); +enum iavf_status iavf_shutdown_adminq(struct iavf_hw *hw); +void iavf_adminq_init_ring_data(struct iavf_hw *hw); +enum iavf_status iavf_clean_arq_element(struct iavf_hw *hw, + struct iavf_arq_event_info *e, + u16 *events_pending); +enum iavf_status iavf_asq_send_command(struct iavf_hw *hw, + struct iavf_aq_desc *desc, + void *buff, /* can be NULL */ + u16 buff_size, + struct iavf_asq_cmd_details *cmd_details); bool iavf_asq_done(struct iavf_hw *hw); /* debug function for adminq */ void iavf_debug_aq(struct iavf_hw *hw, enum iavf_debug_mask mask, void *desc, void *buffer, u16 buf_len); -void i40e_idle_aq(struct iavf_hw *hw); +void iavf_idle_aq(struct iavf_hw *hw); void iavf_resume_aq(struct iavf_hw *hw); bool iavf_check_asq_alive(struct iavf_hw *hw); -iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading); -const char *iavf_aq_str(struct iavf_hw *hw, enum i40e_admin_queue_err aq_err); -const char *iavf_stat_str(struct iavf_hw *hw, iavf_status stat_err); +enum iavf_status iavf_aq_queue_shutdown(struct iavf_hw *hw, bool unloading); +const char *iavf_aq_str(struct iavf_hw *hw, enum iavf_admin_queue_err aq_err); +const char *iavf_stat_str(struct iavf_hw *hw, enum iavf_status stat_err); -iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid, - bool pf_lut, u8 *lut, u16 lut_size); -iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid, - bool pf_lut, u8 *lut, u16 lut_size); -iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid, - struct i40e_aqc_get_set_rss_key_data *key); -iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, - struct i40e_aqc_get_set_rss_key_data *key); +enum iavf_status iavf_aq_get_rss_lut(struct iavf_hw *hw, u16 seid, + bool pf_lut, u8 *lut, u16 lut_size); +enum iavf_status iavf_aq_set_rss_lut(struct iavf_hw *hw, u16 seid, + bool pf_lut, u8 *lut, u16 lut_size); +enum iavf_status iavf_aq_get_rss_key(struct iavf_hw *hw, u16 seid, + struct iavf_aqc_get_set_rss_key_data *key); +enum iavf_status iavf_aq_set_rss_key(struct iavf_hw *hw, u16 seid, + struct iavf_aqc_get_set_rss_key_data *key); -iavf_status iavf_set_mac_type(struct iavf_hw *hw); +enum iavf_status iavf_set_mac_type(struct iavf_hw *hw); extern struct iavf_rx_ptype_decoded iavf_ptype_lookup[]; @@ -59,9 +60,10 @@ static inline struct iavf_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype) void iavf_vf_parse_hw_config(struct iavf_hw *hw, struct virtchnl_vf_resource *msg); -iavf_status iavf_vf_reset(struct iavf_hw *hw); -iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, - enum virtchnl_ops v_opcode, - iavf_status v_retval, u8 *msg, u16 msglen, - struct i40e_asq_cmd_details *cmd_details); +enum iavf_status iavf_vf_reset(struct iavf_hw *hw); +enum iavf_status iavf_aq_send_msg_to_pf(struct iavf_hw *hw, + enum virtchnl_ops v_opcode, + enum iavf_status v_retval, + u8 *msg, u16 msglen, + struct iavf_asq_cmd_details *cmd_details); #endif /* _IAVF_PROTOTYPE_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_status.h b/drivers/net/ethernet/intel/iavf/iavf_status.h index 46742fab7b8c..46e3d1f6b604 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_status.h +++ b/drivers/net/ethernet/intel/iavf/iavf_status.h @@ -5,74 +5,74 @@ #define _IAVF_STATUS_H_ /* Error Codes */ -enum iavf_status_code { - I40E_SUCCESS = 0, - I40E_ERR_NVM = -1, - I40E_ERR_NVM_CHECKSUM = -2, - I40E_ERR_PHY = -3, - I40E_ERR_CONFIG = -4, - I40E_ERR_PARAM = -5, - I40E_ERR_MAC_TYPE = -6, - I40E_ERR_UNKNOWN_PHY = -7, - I40E_ERR_LINK_SETUP = -8, - I40E_ERR_ADAPTER_STOPPED = -9, - I40E_ERR_INVALID_MAC_ADDR = -10, - I40E_ERR_DEVICE_NOT_SUPPORTED = -11, - I40E_ERR_MASTER_REQUESTS_PENDING = -12, - I40E_ERR_INVALID_LINK_SETTINGS = -13, - I40E_ERR_AUTONEG_NOT_COMPLETE = -14, - I40E_ERR_RESET_FAILED = -15, - I40E_ERR_SWFW_SYNC = -16, - I40E_ERR_NO_AVAILABLE_VSI = -17, - I40E_ERR_NO_MEMORY = -18, - I40E_ERR_BAD_PTR = -19, - I40E_ERR_RING_FULL = -20, - I40E_ERR_INVALID_PD_ID = -21, - I40E_ERR_INVALID_QP_ID = -22, - I40E_ERR_INVALID_CQ_ID = -23, - I40E_ERR_INVALID_CEQ_ID = -24, - I40E_ERR_INVALID_AEQ_ID = -25, - I40E_ERR_INVALID_SIZE = -26, - I40E_ERR_INVALID_ARP_INDEX = -27, - I40E_ERR_INVALID_FPM_FUNC_ID = -28, - I40E_ERR_QP_INVALID_MSG_SIZE = -29, - I40E_ERR_QP_TOOMANY_WRS_POSTED = -30, - I40E_ERR_INVALID_FRAG_COUNT = -31, - I40E_ERR_QUEUE_EMPTY = -32, - I40E_ERR_INVALID_ALIGNMENT = -33, - I40E_ERR_FLUSHED_QUEUE = -34, - I40E_ERR_INVALID_PUSH_PAGE_INDEX = -35, - I40E_ERR_INVALID_IMM_DATA_SIZE = -36, - I40E_ERR_TIMEOUT = -37, - I40E_ERR_OPCODE_MISMATCH = -38, - I40E_ERR_CQP_COMPL_ERROR = -39, - I40E_ERR_INVALID_VF_ID = -40, - I40E_ERR_INVALID_HMCFN_ID = -41, - I40E_ERR_BACKING_PAGE_ERROR = -42, - I40E_ERR_NO_PBLCHUNKS_AVAILABLE = -43, - I40E_ERR_INVALID_PBLE_INDEX = -44, - I40E_ERR_INVALID_SD_INDEX = -45, - I40E_ERR_INVALID_PAGE_DESC_INDEX = -46, - I40E_ERR_INVALID_SD_TYPE = -47, - I40E_ERR_MEMCPY_FAILED = -48, - I40E_ERR_INVALID_HMC_OBJ_INDEX = -49, - I40E_ERR_INVALID_HMC_OBJ_COUNT = -50, - I40E_ERR_INVALID_SRQ_ARM_LIMIT = -51, - I40E_ERR_SRQ_ENABLED = -52, - I40E_ERR_ADMIN_QUEUE_ERROR = -53, - I40E_ERR_ADMIN_QUEUE_TIMEOUT = -54, - I40E_ERR_BUF_TOO_SHORT = -55, - I40E_ERR_ADMIN_QUEUE_FULL = -56, - I40E_ERR_ADMIN_QUEUE_NO_WORK = -57, - I40E_ERR_BAD_IWARP_CQE = -58, - I40E_ERR_NVM_BLANK_MODE = -59, - I40E_ERR_NOT_IMPLEMENTED = -60, - I40E_ERR_PE_DOORBELL_NOT_ENABLED = -61, - I40E_ERR_DIAG_TEST_FAILED = -62, - I40E_ERR_NOT_READY = -63, - I40E_NOT_SUPPORTED = -64, - I40E_ERR_FIRMWARE_API_VERSION = -65, - I40E_ERR_ADMIN_QUEUE_CRITICAL_ERROR = -66, +enum iavf_status { + IAVF_SUCCESS = 0, + IAVF_ERR_NVM = -1, + IAVF_ERR_NVM_CHECKSUM = -2, + IAVF_ERR_PHY = -3, + IAVF_ERR_CONFIG = -4, + IAVF_ERR_PARAM = -5, + IAVF_ERR_MAC_TYPE = -6, + IAVF_ERR_UNKNOWN_PHY = -7, + IAVF_ERR_LINK_SETUP = -8, + IAVF_ERR_ADAPTER_STOPPED = -9, + IAVF_ERR_INVALID_MAC_ADDR = -10, + IAVF_ERR_DEVICE_NOT_SUPPORTED = -11, + IAVF_ERR_MASTER_REQUESTS_PENDING = -12, + IAVF_ERR_INVALID_LINK_SETTINGS = -13, + IAVF_ERR_AUTONEG_NOT_COMPLETE = -14, + IAVF_ERR_RESET_FAILED = -15, + IAVF_ERR_SWFW_SYNC = -16, + IAVF_ERR_NO_AVAILABLE_VSI = -17, + IAVF_ERR_NO_MEMORY = -18, + IAVF_ERR_BAD_PTR = -19, + IAVF_ERR_RING_FULL = -20, + IAVF_ERR_INVALID_PD_ID = -21, + IAVF_ERR_INVALID_QP_ID = -22, + IAVF_ERR_INVALID_CQ_ID = -23, + IAVF_ERR_INVALID_CEQ_ID = -24, + IAVF_ERR_INVALID_AEQ_ID = -25, + IAVF_ERR_INVALID_SIZE = -26, + IAVF_ERR_INVALID_ARP_INDEX = -27, + IAVF_ERR_INVALID_FPM_FUNC_ID = -28, + IAVF_ERR_QP_INVALID_MSG_SIZE = -29, + IAVF_ERR_QP_TOOMANY_WRS_POSTED = -30, + IAVF_ERR_INVALID_FRAG_COUNT = -31, + IAVF_ERR_QUEUE_EMPTY = -32, + IAVF_ERR_INVALID_ALIGNMENT = -33, + IAVF_ERR_FLUSHED_QUEUE = -34, + IAVF_ERR_INVALID_PUSH_PAGE_INDEX = -35, + IAVF_ERR_INVALID_IMM_DATA_SIZE = -36, + IAVF_ERR_TIMEOUT = -37, + IAVF_ERR_OPCODE_MISMATCH = -38, + IAVF_ERR_CQP_COMPL_ERROR = -39, + IAVF_ERR_INVALID_VF_ID = -40, + IAVF_ERR_INVALID_HMCFN_ID = -41, + IAVF_ERR_BACKING_PAGE_ERROR = -42, + IAVF_ERR_NO_PBLCHUNKS_AVAILABLE = -43, + IAVF_ERR_INVALID_PBLE_INDEX = -44, + IAVF_ERR_INVALID_SD_INDEX = -45, + IAVF_ERR_INVALID_PAGE_DESC_INDEX = -46, + IAVF_ERR_INVALID_SD_TYPE = -47, + IAVF_ERR_MEMCPY_FAILED = -48, + IAVF_ERR_INVALID_HMC_OBJ_INDEX = -49, + IAVF_ERR_INVALID_HMC_OBJ_COUNT = -50, + IAVF_ERR_INVALID_SRQ_ARM_LIMIT = -51, + IAVF_ERR_SRQ_ENABLED = -52, + IAVF_ERR_ADMIN_QUEUE_ERROR = -53, + IAVF_ERR_ADMIN_QUEUE_TIMEOUT = -54, + IAVF_ERR_BUF_TOO_SHORT = -55, + IAVF_ERR_ADMIN_QUEUE_FULL = -56, + IAVF_ERR_ADMIN_QUEUE_NO_WORK = -57, + IAVF_ERR_BAD_IWARP_CQE = -58, + IAVF_ERR_NVM_BLANK_MODE = -59, + IAVF_ERR_NOT_IMPLEMENTED = -60, + IAVF_ERR_PE_DOORBELL_NOT_ENABLED = -61, + IAVF_ERR_DIAG_TEST_FAILED = -62, + IAVF_ERR_NOT_READY = -63, + IAVF_NOT_SUPPORTED = -64, + IAVF_ERR_FIRMWARE_API_VERSION = -65, + IAVF_ERR_ADMIN_QUEUE_CRITICAL_ERROR = -66, }; #endif /* _IAVF_STATUS_H_ */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_trace.h b/drivers/net/ethernet/intel/iavf/iavf_trace.h index 1474f5539751..1058e68a02b4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_trace.h +++ b/drivers/net/ethernet/intel/iavf/iavf_trace.h @@ -17,8 +17,8 @@ /* See trace-events-sample.h for a detailed description of why this * guard clause is different from most normal include files. */ -#if !defined(_I40E_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) -#define _I40E_TRACE_H_ +#if !defined(_IAVF_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _IAVF_TRACE_H_ #include <linux/tracepoint.h> diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index 06d1509d57f7..0cca1b589b56 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -190,7 +190,7 @@ void iavf_detect_recover_hung(struct iavf_vsi *vsi) static bool iavf_clean_tx_irq(struct iavf_vsi *vsi, struct iavf_ring *tx_ring, int napi_budget) { - u16 i = tx_ring->next_to_clean; + int i = tx_ring->next_to_clean; struct iavf_tx_buffer *tx_buf; struct iavf_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; @@ -379,19 +379,19 @@ static inline unsigned int iavf_itr_divisor(struct iavf_q_vector *q_vector) unsigned int divisor; switch (q_vector->adapter->link_speed) { - case I40E_LINK_SPEED_40GB: + case IAVF_LINK_SPEED_40GB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 1024; break; - case I40E_LINK_SPEED_25GB: - case I40E_LINK_SPEED_20GB: + case IAVF_LINK_SPEED_25GB: + case IAVF_LINK_SPEED_20GB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 512; break; default: - case I40E_LINK_SPEED_10GB: + case IAVF_LINK_SPEED_10GB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 256; break; - case I40E_LINK_SPEED_1GB: - case I40E_LINK_SPEED_100MB: + case IAVF_LINK_SPEED_1GB: + case IAVF_LINK_SPEED_100MB: divisor = IAVF_ITR_ADAPTIVE_MIN_INC * 32; break; } @@ -1236,6 +1236,9 @@ static void iavf_add_rx_frag(struct iavf_ring *rx_ring, unsigned int truesize = SKB_DATA_ALIGN(size + iavf_rx_offset(rx_ring)); #endif + if (!size) + return; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); @@ -1260,6 +1263,9 @@ static struct iavf_rx_buffer *iavf_get_rx_buffer(struct iavf_ring *rx_ring, { struct iavf_rx_buffer *rx_buffer; + if (!size) + return NULL; + rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; prefetchw(rx_buffer->page); @@ -1290,7 +1296,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, struct iavf_rx_buffer *rx_buffer, unsigned int size) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + void *va; #if (PAGE_SIZE < 8192) unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; #else @@ -1299,7 +1305,10 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, unsigned int headlen; struct sk_buff *skb; + if (!rx_buffer) + return NULL; /* prefetch first cache line of first page */ + va = page_address(rx_buffer->page) + rx_buffer->page_offset; prefetch(va); #if L1_CACHE_BYTES < 128 prefetch(va + L1_CACHE_BYTES); @@ -1354,7 +1363,7 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, struct iavf_rx_buffer *rx_buffer, unsigned int size) { - void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; + void *va; #if (PAGE_SIZE < 8192) unsigned int truesize = iavf_rx_pg_size(rx_ring) / 2; #else @@ -1363,7 +1372,10 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, #endif struct sk_buff *skb; + if (!rx_buffer) + return NULL; /* prefetch first cache line of first page */ + va = page_address(rx_buffer->page) + rx_buffer->page_offset; prefetch(va); #if L1_CACHE_BYTES < 128 prefetch(va + L1_CACHE_BYTES); @@ -1398,6 +1410,9 @@ static struct sk_buff *iavf_build_skb(struct iavf_ring *rx_ring, static void iavf_put_rx_buffer(struct iavf_ring *rx_ring, struct iavf_rx_buffer *rx_buffer) { + if (!rx_buffer) + return; + if (iavf_can_reuse_rx_page(rx_buffer)) { /* hand second half of page back to the ring */ iavf_reuse_rx_page(rx_ring, rx_buffer); @@ -1496,11 +1511,12 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) * verified the descriptor has been written back. */ dma_rmb(); +#define IAVF_RXD_DD BIT(IAVF_RX_DESC_STATUS_DD_SHIFT) + if (!iavf_test_staterr(rx_desc, IAVF_RXD_DD)) + break; size = (qword & IAVF_RXD_QW1_LENGTH_PBUF_MASK) >> IAVF_RXD_QW1_LENGTH_PBUF_SHIFT; - if (!size) - break; iavf_trace(clean_rx_irq, rx_ring, rx_desc, skb); rx_buffer = iavf_get_rx_buffer(rx_ring, size); @@ -1516,7 +1532,8 @@ static int iavf_clean_rx_irq(struct iavf_ring *rx_ring, int budget) /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->rx_stats.alloc_buff_failed++; - rx_buffer->pagecnt_bias++; + if (rx_buffer) + rx_buffer->pagecnt_bias++; break; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_type.h b/drivers/net/ethernet/intel/iavf/iavf_type.h index ca89583613fb..7190a40c540c 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_type.h +++ b/drivers/net/ethernet/intel/iavf/iavf_type.h @@ -7,7 +7,7 @@ #include "iavf_status.h" #include "iavf_osdep.h" #include "iavf_register.h" -#include "i40e_adminq.h" +#include "iavf_adminq.h" #include "iavf_devids.h" #define IAVF_RXQ_CTX_DBUFF_SHIFT 7 @@ -21,7 +21,7 @@ /* forward declaration */ struct iavf_hw; -typedef void (*I40E_ADMINQ_CALLBACK)(struct iavf_hw *, struct i40e_aq_desc *); +typedef void (*IAVF_ADMINQ_CALLBACK)(struct iavf_hw *, struct iavf_aq_desc *); /* Data type manipulation macros. */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index e64751da0921..d49d58a6de80 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -22,7 +22,7 @@ static int iavf_send_pf_msg(struct iavf_adapter *adapter, enum virtchnl_ops op, u8 *msg, u16 len) { struct iavf_hw *hw = &adapter->hw; - iavf_status err; + enum iavf_status err; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) return 0; /* nothing to see here, move along */ @@ -41,7 +41,7 @@ static int iavf_send_pf_msg(struct iavf_adapter *adapter, * * Send API version admin queue message to the PF. The reply is not checked * in this function. Returns 0 if the message was successfully - * sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not. + * sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not. **/ int iavf_send_api_ver(struct iavf_adapter *adapter) { @@ -60,16 +60,16 @@ int iavf_send_api_ver(struct iavf_adapter *adapter) * * Compare API versions with the PF. Must be called after admin queue is * initialized. Returns 0 if API versions match, -EIO if they do not, - * I40E_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors + * IAVF_ERR_ADMIN_QUEUE_NO_WORK if the admin queue is empty, and any errors * from the firmware are propagated. **/ int iavf_verify_api_ver(struct iavf_adapter *adapter) { struct virtchnl_version_info *pf_vvi; struct iavf_hw *hw = &adapter->hw; - struct i40e_arq_event_info event; + struct iavf_arq_event_info event; enum virtchnl_ops op; - iavf_status err; + enum iavf_status err; event.buf_len = IAVF_MAX_AQ_BUF_SIZE; event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); @@ -92,7 +92,7 @@ int iavf_verify_api_ver(struct iavf_adapter *adapter) } - err = (iavf_status)le32_to_cpu(event.desc.cookie_low); + err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); if (err) goto out_alloc; @@ -123,7 +123,7 @@ out: * * Send VF configuration request admin queue message to the PF. The reply * is not checked in this function. Returns 0 if the message was - * successfully sent, or one of the I40E_ADMIN_QUEUE_ERROR_ statuses if not. + * successfully sent, or one of the IAVF_ADMIN_QUEUE_ERROR_ statuses if not. **/ int iavf_send_vf_config_msg(struct iavf_adapter *adapter) { @@ -189,9 +189,9 @@ static void iavf_validate_num_queues(struct iavf_adapter *adapter) int iavf_get_vf_config(struct iavf_adapter *adapter) { struct iavf_hw *hw = &adapter->hw; - struct i40e_arq_event_info event; + struct iavf_arq_event_info event; enum virtchnl_ops op; - iavf_status err; + enum iavf_status err; u16 len; len = sizeof(struct virtchnl_vf_resource) + @@ -216,7 +216,7 @@ int iavf_get_vf_config(struct iavf_adapter *adapter) break; } - err = (iavf_status)le32_to_cpu(event.desc.cookie_low); + err = (enum iavf_status)le32_to_cpu(event.desc.cookie_low); memcpy(adapter->vf_res, event.msg_buf, min(event.msg_len, len)); /* some PFs send more queues than we should have so validate that @@ -242,7 +242,8 @@ void iavf_configure_queues(struct iavf_adapter *adapter) struct virtchnl_vsi_queue_config_info *vqci; struct virtchnl_queue_pair_info *vqpi; int pairs = adapter->num_active_queues; - int i, len, max_frame = IAVF_MAX_RXBUFFER; + int i, max_frame = IAVF_MAX_RXBUFFER; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -251,8 +252,7 @@ void iavf_configure_queues(struct iavf_adapter *adapter) return; } adapter->current_op = VIRTCHNL_OP_CONFIG_VSI_QUEUES; - len = sizeof(struct virtchnl_vsi_queue_config_info) + - (sizeof(struct virtchnl_queue_pair_info) * pairs); + len = struct_size(vqci, qpair, pairs); vqci = kzalloc(len, GFP_KERNEL); if (!vqci) return; @@ -351,8 +351,9 @@ void iavf_map_queues(struct iavf_adapter *adapter) { struct virtchnl_irq_map_info *vimi; struct virtchnl_vector_map *vecmap; - int v_idx, q_vectors, len; struct iavf_q_vector *q_vector; + int v_idx, q_vectors; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -364,9 +365,7 @@ void iavf_map_queues(struct iavf_adapter *adapter) q_vectors = adapter->num_msix_vectors - NONQ_VECS; - len = sizeof(struct virtchnl_irq_map_info) + - (adapter->num_msix_vectors * - sizeof(struct virtchnl_vector_map)); + len = struct_size(vimi, vecmap, adapter->num_msix_vectors); vimi = kzalloc(len, GFP_KERNEL); if (!vimi) return; @@ -416,7 +415,7 @@ int iavf_request_queues(struct iavf_adapter *adapter, int num) return -EBUSY; } - vfres.num_queue_pairs = num; + vfres.num_queue_pairs = min_t(int, num, num_online_cpus()); adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES; adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; @@ -433,9 +432,10 @@ int iavf_request_queues(struct iavf_adapter *adapter, int num) void iavf_add_ether_addrs(struct iavf_adapter *adapter) { struct virtchnl_ether_addr_list *veal; - int len, i = 0, count = 0; struct iavf_mac_filter *f; + int i = 0, count = 0; bool more = false; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -457,15 +457,13 @@ void iavf_add_ether_addrs(struct iavf_adapter *adapter) } adapter->current_op = VIRTCHNL_OP_ADD_ETH_ADDR; - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); if (len > IAVF_MAX_AQ_BUF_SIZE) { dev_warn(&adapter->pdev->dev, "Too many add MAC changes in one request\n"); count = (IAVF_MAX_AQ_BUF_SIZE - sizeof(struct virtchnl_ether_addr_list)) / sizeof(struct virtchnl_ether_addr); - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); more = true; } @@ -505,8 +503,9 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) { struct virtchnl_ether_addr_list *veal; struct iavf_mac_filter *f, *ftmp; - int len, i = 0, count = 0; + int i = 0, count = 0; bool more = false; + size_t len; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { /* bail because we already have a command pending */ @@ -528,15 +527,13 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) } adapter->current_op = VIRTCHNL_OP_DEL_ETH_ADDR; - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); if (len > IAVF_MAX_AQ_BUF_SIZE) { dev_warn(&adapter->pdev->dev, "Too many delete MAC changes in one request\n"); count = (IAVF_MAX_AQ_BUF_SIZE - sizeof(struct virtchnl_ether_addr_list)) / sizeof(struct virtchnl_ether_addr); - len = sizeof(struct virtchnl_ether_addr_list) + - (count * sizeof(struct virtchnl_ether_addr)); + len = struct_size(veal, list, count); more = true; } veal = kzalloc(len, GFP_ATOMIC); @@ -938,22 +935,22 @@ static void iavf_print_link_message(struct iavf_adapter *adapter) } switch (adapter->link_speed) { - case I40E_LINK_SPEED_40GB: + case IAVF_LINK_SPEED_40GB: speed = "40 G"; break; - case I40E_LINK_SPEED_25GB: + case IAVF_LINK_SPEED_25GB: speed = "25 G"; break; - case I40E_LINK_SPEED_20GB: + case IAVF_LINK_SPEED_20GB: speed = "20 G"; break; - case I40E_LINK_SPEED_10GB: + case IAVF_LINK_SPEED_10GB: speed = "10 G"; break; - case I40E_LINK_SPEED_1GB: + case IAVF_LINK_SPEED_1GB: speed = "1000 M"; break; - case I40E_LINK_SPEED_100MB: + case IAVF_LINK_SPEED_100MB: speed = "100 M"; break; default: @@ -973,7 +970,7 @@ static void iavf_print_link_message(struct iavf_adapter *adapter) void iavf_enable_channels(struct iavf_adapter *adapter) { struct virtchnl_tc_info *vti = NULL; - u16 len; + size_t len; int i; if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) { @@ -983,9 +980,7 @@ void iavf_enable_channels(struct iavf_adapter *adapter) return; } - len = (adapter->num_tc * sizeof(struct virtchnl_channel_info)) + - sizeof(struct virtchnl_tc_info); - + len = struct_size(vti, list, adapter->num_tc - 1); vti = kzalloc(len, GFP_KERNEL); if (!vti) return; @@ -1184,8 +1179,8 @@ void iavf_request_reset(struct iavf_adapter *adapter) * This function handles the reply messages. **/ void iavf_virtchnl_completion(struct iavf_adapter *adapter, - enum virtchnl_ops v_opcode, iavf_status v_retval, - u8 *msg, u16 msglen) + enum virtchnl_ops v_opcode, + enum iavf_status v_retval, u8 *msg, u16 msglen) { struct net_device *netdev = adapter->netdev; @@ -1238,7 +1233,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) { adapter->flags |= IAVF_FLAG_RESET_PENDING; dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); - schedule_work(&adapter->reset_task); + queue_work(iavf_wq, &adapter->reset_task); } break; default: diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 792e6e42030e..9ee6b55553c0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -44,15 +44,22 @@ extern const char ice_drv_ver[]; #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 -#define ICE_MIN_NUM_DESC ICE_REQ_DESC_MULTIPLE +#define ICE_MIN_NUM_DESC 64 #define ICE_MAX_NUM_DESC 8160 -/* set default number of Rx/Tx descriptors to the minimum between - * ICE_MAX_NUM_DESC and the number of descriptors to fill up an entire page +#define ICE_DFLT_MIN_RX_DESC 512 +/* if the default number of Rx descriptors between ICE_MAX_NUM_DESC and the + * number of descriptors to fill up an entire page is greater than or equal to + * ICE_DFLT_MIN_RX_DESC set it based on page size, otherwise set it to + * ICE_DFLT_MIN_RX_DESC + */ +#define ICE_DFLT_NUM_RX_DESC \ + min_t(u16, ICE_MAX_NUM_DESC, \ + max_t(u16, ALIGN(PAGE_SIZE / sizeof(union ice_32byte_rx_desc), \ + ICE_REQ_DESC_MULTIPLE), \ + ICE_DFLT_MIN_RX_DESC)) +/* set default number of Tx descriptors to the minimum between ICE_MAX_NUM_DESC + * and the number of descriptors to fill up an entire page */ -#define ICE_DFLT_NUM_RX_DESC min_t(u16, ICE_MAX_NUM_DESC, \ - ALIGN(PAGE_SIZE / \ - sizeof(union ice_32byte_rx_desc), \ - ICE_REQ_DESC_MULTIPLE)) #define ICE_DFLT_NUM_TX_DESC min_t(u16, ICE_MAX_NUM_DESC, \ ALIGN(PAGE_SIZE / \ sizeof(struct ice_tx_desc), \ @@ -160,7 +167,7 @@ struct ice_tc_cfg { struct ice_res_tracker { u16 num_entries; - u16 search_hint; + u16 end; u16 list[1]; }; @@ -182,6 +189,7 @@ struct ice_sw { }; enum ice_state { + __ICE_TESTING, __ICE_DOWN, __ICE_NEEDS_RESTART, __ICE_PREPARED_FOR_RESET, /* set by driver when prepared */ @@ -244,8 +252,7 @@ struct ice_vsi { u32 rx_buf_failed; u32 rx_page_failed; int num_q_vectors; - int sw_base_vector; /* Irq base for OS reserved vectors */ - int hw_base_vector; /* HW (absolute) index of a vector */ + int base_vector; /* IRQ base for OS reserved vectors */ enum ice_vsi_type type; u16 vsi_num; /* HW (absolute) index of this VSI */ u16 idx; /* software index in pf->vsi[] */ @@ -277,10 +284,10 @@ struct ice_vsi { struct list_head tmp_sync_list; /* MAC filters to be synced */ struct list_head tmp_unsync_list; /* MAC filters to be unsynced */ - u8 irqs_ready; - u8 current_isup; /* Sync 'link up' logging */ - u8 stat_offsets_loaded; - u8 vlan_ena; + u8 irqs_ready:1; + u8 current_isup:1; /* Sync 'link up' logging */ + u8 stat_offsets_loaded:1; + u8 vlan_ena:1; /* queue information */ u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -330,7 +337,7 @@ enum ice_pf_flags { ICE_FLAG_DCB_CAPABLE, ICE_FLAG_DCB_ENA, ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, - ICE_FLAG_DISABLE_FW_LLDP, + ICE_FLAG_ENABLE_FW_LLDP, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -340,10 +347,12 @@ struct ice_pf { /* OS reserved IRQ details */ struct msix_entry *msix_entries; - struct ice_res_tracker *sw_irq_tracker; - - /* HW reserved Interrupts for this PF */ - struct ice_res_tracker *hw_irq_tracker; + struct ice_res_tracker *irq_tracker; + /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the + * number of MSIX vectors needed for all SR-IOV VFs from the number of + * MSIX vectors allowed on this PF. + */ + u16 sriov_base_vector; struct ice_vsi **vsi; /* VSIs created by the driver */ struct ice_sw *first_sw; /* first switch created by firmware */ @@ -365,10 +374,8 @@ struct ice_pf { struct mutex sw_mutex; /* lock for protecting VSI alloc flow */ u32 msg_enable; u32 hw_csum_rx_error; - u32 sw_oicr_idx; /* Other interrupt cause SW vector index */ + u32 oicr_idx; /* Other interrupt cause MSIX vector index */ u32 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */ - u32 hw_oicr_idx; /* Other interrupt cause vector HW index */ - u32 num_avail_hw_msix; /* remaining HW MSIX vectors left unclaimed */ u32 num_lan_msix; /* Total MSIX vectors for base driver */ u16 num_lan_tx; /* num LAN Tx queues setup */ u16 num_lan_rx; /* num LAN Rx queues setup */ @@ -384,7 +391,7 @@ struct ice_pf { struct ice_hw_port_stats stats; struct ice_hw_port_stats stats_prev; struct ice_hw hw; - u8 stat_prev_loaded; /* has previous stats been loaded */ + u8 stat_prev_loaded:1; /* has previous stats been loaded */ #ifdef CONFIG_DCB u16 dcbx_cap; #endif /* CONFIG_DCB */ @@ -392,6 +399,7 @@ struct ice_pf { unsigned long tx_timeout_last_recovery; u32 tx_timeout_recovery_level; char int_name[ICE_INT_NAME_STR_LEN]; + u32 sw_int_count; }; struct ice_netdev_priv { @@ -409,7 +417,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi, struct ice_q_vector *q_vector) { u32 vector = (vsi && q_vector) ? q_vector->reg_idx : - ((struct ice_pf *)hw->back)->hw_oicr_idx; + ((struct ice_pf *)hw->back)->oicr_idx; int itr = ICE_ITR_NONE; u32 val; @@ -444,17 +452,22 @@ ice_find_vsi_by_type(struct ice_pf *pf, enum ice_vsi_type type) return NULL; } +int ice_vsi_setup_tx_rings(struct ice_vsi *vsi); +int ice_vsi_setup_rx_rings(struct ice_vsi *vsi); void ice_set_ethtool_ops(struct net_device *netdev); int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); +int ice_vsi_cfg(struct ice_vsi *vsi); +struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size); void ice_print_link_msg(struct ice_vsi *vsi, bool isup); -void ice_napi_del(struct ice_vsi *vsi); #ifdef CONFIG_DCB int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked); void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked); #endif /* CONFIG_DCB */ +int ice_open(struct net_device *netdev); +int ice_stop(struct net_device *netdev); #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 6ef083002f5b..765e3c2ed045 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -35,8 +35,8 @@ struct ice_aqc_get_ver { /* Queue Shutdown (direct 0x0003) */ struct ice_aqc_q_shutdown { -#define ICE_AQC_DRIVER_UNLOADING BIT(0) __le32 driver_unloading; +#define ICE_AQC_DRIVER_UNLOADING BIT(0) u8 reserved[12]; }; @@ -120,11 +120,9 @@ struct ice_aqc_manage_mac_read { #define ICE_AQC_MAN_MAC_WOL_ADDR_VALID BIT(7) #define ICE_AQC_MAN_MAC_READ_S 4 #define ICE_AQC_MAN_MAC_READ_M (0xF << ICE_AQC_MAN_MAC_READ_S) - u8 lport_num; - u8 lport_num_valid; -#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID BIT(0) + u8 rsvd[2]; u8 num_addr; /* Used in response */ - u8 reserved[3]; + u8 rsvd1[3]; __le32 addr_high; __le32 addr_low; }; @@ -140,7 +138,7 @@ struct ice_aqc_manage_mac_read_resp { /* Manage MAC address, write command - direct (0x0108) */ struct ice_aqc_manage_mac_write { - u8 port_num; + u8 rsvd; u8 flags; #define ICE_AQC_MAN_MAC_WR_MC_MAG_EN BIT(0) #define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP BIT(1) @@ -920,6 +918,8 @@ struct ice_aqc_get_phy_caps_data { #define ICE_AQC_PHY_EN_LINK BIT(3) #define ICE_AQC_PHY_AN_MODE BIT(4) #define ICE_AQC_GET_PHY_EN_MOD_QUAL BIT(5) +#define ICE_AQC_PHY_EN_AUTO_FEC BIT(7) +#define ICE_AQC_PHY_CAPS_MASK ICE_M(0xff, 0) u8 low_power_ctrl; #define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0) __le16 eee_cap; @@ -932,6 +932,7 @@ struct ice_aqc_get_phy_caps_data { #define ICE_AQC_PHY_EEE_EN_40GBASE_KR4 BIT(6) __le16 eeer_value; u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */ + u8 phy_fw_ver[8]; u8 link_fec_options; #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN BIT(0) #define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ BIT(1) @@ -940,6 +941,8 @@ struct ice_aqc_get_phy_caps_data { #define ICE_AQC_PHY_FEC_25G_RS_544_REQ BIT(4) #define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6) #define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7) +#define ICE_AQC_PHY_FEC_MASK ICE_M(0xdf, 0) + u8 rsvd1; /* Byte 35 reserved */ u8 extended_compliance_code; #define ICE_MODULE_TYPE_TOTAL_BYTE 3 u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE]; @@ -954,13 +957,14 @@ struct ice_aqc_get_phy_caps_data { #define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS 0xA0 #define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS 0x86 u8 qualified_module_count; + u8 rsvd2[7]; /* Bytes 47:41 reserved */ #define ICE_AQC_QUAL_MOD_COUNT_MAX 16 struct { u8 v_oui[3]; - u8 rsvd1; + u8 rsvd3; u8 v_part[16]; __le32 v_rev; - __le64 rsvd8; + __le64 rsvd4; } qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX]; }; @@ -1062,6 +1066,7 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_LINK_25G_KR_FEC_EN BIT(0) #define ICE_AQ_LINK_25G_RS_528_FEC_EN BIT(1) #define ICE_AQ_LINK_25G_RS_544_FEC_EN BIT(2) +#define ICE_AQ_FEC_MASK ICE_M(0x7, 0) /* Pacing Config */ #define ICE_AQ_CFG_PACING_S 3 #define ICE_AQ_CFG_PACING_M (0xF << ICE_AQ_CFG_PACING_S) @@ -1112,6 +1117,14 @@ struct ice_aqc_set_event_mask { u8 reserved1[6]; }; +/* Set MAC Loopback command (direct 0x0620) */ +struct ice_aqc_set_mac_lb { + u8 lb_mode; +#define ICE_AQ_MAC_LB_EN BIT(0) +#define ICE_AQ_MAC_LB_OSC_CLK BIT(1) + u8 reserved[15]; +}; + /* Set Port Identification LED (direct, 0x06E9) */ struct ice_aqc_set_port_id_led { u8 lport_num; @@ -1145,6 +1158,17 @@ struct ice_aqc_nvm { __le32 addr_low; }; +/* NVM Checksum Command (direct, 0x0706) */ +struct ice_aqc_nvm_checksum { + u8 flags; +#define ICE_AQC_NVM_CHECKSUM_VERIFY BIT(0) +#define ICE_AQC_NVM_CHECKSUM_RECALC BIT(1) + u8 rsvd; + __le16 checksum; /* Used only by response */ +#define ICE_AQC_NVM_CHECKSUM_CORRECT 0xBABA + u8 rsvd2[12]; +}; + /** * Send to PF command (indirect 0x0801) ID is only used by PF * @@ -1249,7 +1273,7 @@ struct ice_aqc_get_cee_dcb_cfg_resp { }; /* Set Local LLDP MIB (indirect 0x0A08) - * Used to replace the local MIB of a given LLDP agent. e.g. DCBx + * Used to replace the local MIB of a given LLDP agent. e.g. DCBX */ struct ice_aqc_lldp_set_local_mib { u8 type; @@ -1266,7 +1290,7 @@ struct ice_aqc_lldp_set_local_mib { }; /* Stop/Start LLDP Agent (direct 0x0A09) - * Used for stopping/starting specific LLDP agent. e.g. DCBx. + * Used for stopping/starting specific LLDP agent. e.g. DCBX. * The same structure is used for the response, with the command field * being used as the status field. */ @@ -1539,6 +1563,7 @@ struct ice_aq_desc { struct ice_aqc_query_txsched_res query_sched_res; struct ice_aqc_query_port_ets port_ets; struct ice_aqc_nvm nvm; + struct ice_aqc_nvm_checksum nvm_checksum; struct ice_aqc_pf_vf_msg virt; struct ice_aqc_lldp_get_mib lldp_get_mib; struct ice_aqc_lldp_set_mib_change lldp_set_event; @@ -1554,6 +1579,7 @@ struct ice_aq_desc { struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res; struct ice_aqc_fw_logging fw_logging; struct ice_aqc_get_clear_fw_log get_clear_fw_log; + struct ice_aqc_set_mac_lb set_mac_lb; struct ice_aqc_alloc_free_res_cmd sw_res_ctrl; struct ice_aqc_set_event_mask set_event_mask; struct ice_aqc_get_link_status get_link_status; @@ -1642,10 +1668,12 @@ enum ice_adminq_opc { ice_aqc_opc_restart_an = 0x0605, ice_aqc_opc_get_link_status = 0x0607, ice_aqc_opc_set_event_mask = 0x0613, + ice_aqc_opc_set_mac_lb = 0x0620, ice_aqc_opc_set_port_id_led = 0x06E9, /* NVM commands */ ice_aqc_opc_nvm_read = 0x0701, + ice_aqc_opc_nvm_checksum = 0x0706, /* PF/VF mailbox commands */ ice_mbx_opc_send_msg_to_pf = 0x0801, @@ -1671,6 +1699,7 @@ enum ice_adminq_opc { /* debug commands */ ice_aqc_opc_fw_logging = 0xFF09, + ice_aqc_opc_fw_logging_info = 0xFF10, }; #endif /* _ICE_ADMINQ_CMD_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index da7878529929..2e0731c1e1a3 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -51,9 +51,6 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) */ void ice_dev_onetime_setup(struct ice_hw *hw) { - /* configure Rx - set non pxe mode */ - wr32(hw, GLLAN_RCTL_0, 0x1); - #define MBX_PF_VT_PFALLOC 0x00231E80 /* set VFs per PF */ wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF)); @@ -307,6 +304,8 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse, hw_link_info->an_info = link_data.an_info; hw_link_info->ext_info = link_data.ext_info; hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size); + hw_link_info->fec_info = link_data.cfg & ICE_AQ_FEC_MASK; + hw_link_info->topo_media_conflict = link_data.topo_media_conflict; hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M; /* update fc info */ @@ -476,6 +475,49 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) ICE_FW_LOG_DESC_SIZE(ICE_AQC_FW_LOG_ID_MAX) /** + * ice_get_fw_log_cfg - get FW logging configuration + * @hw: pointer to the HW struct + */ +static enum ice_status ice_get_fw_log_cfg(struct ice_hw *hw) +{ + struct ice_aqc_fw_logging_data *config; + struct ice_aq_desc desc; + enum ice_status status; + u16 size; + + size = ICE_FW_LOG_DESC_SIZE_MAX; + config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL); + if (!config) + return ICE_ERR_NO_MEMORY; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info); + + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_BUF); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + status = ice_aq_send_cmd(hw, &desc, config, size, NULL); + if (!status) { + u16 i; + + /* Save FW logging information into the HW structure */ + for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) { + u16 v, m, flgs; + + v = le16_to_cpu(config->entry[i]); + m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S; + flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S; + + if (m < ICE_AQC_FW_LOG_ID_MAX) + hw->fw_log.evnts[m].cur = flgs; + } + } + + devm_kfree(ice_hw_to_dev(hw), config); + + return status; +} + +/** * ice_cfg_fw_log - configure FW logging * @hw: pointer to the HW struct * @enable: enable certain FW logging events if true, disable all if false @@ -529,6 +571,11 @@ static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable) (!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq))) return 0; + /* Get current FW log settings */ + status = ice_get_fw_log_cfg(hw); + if (status) + return status; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging); cmd = &desc.params.fw_logging; @@ -634,17 +681,17 @@ out: */ void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf) { - ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg Start ]\n"); - ice_debug_array(hw, ICE_DBG_AQ_MSG, 16, 1, (u8 *)buf, + ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n"); + ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf, le16_to_cpu(desc->datalen)); - ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg End ]\n"); + ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n"); } /** * ice_get_itr_intrl_gran - determine int/intrl granularity * @hw: pointer to the HW struct * - * Determines the itr/intrl granularities based on the maximum aggregate + * Determines the ITR/intrl granularities based on the maximum aggregate * bandwidth according to the device's configuration during power-on. */ static void ice_get_itr_intrl_gran(struct ice_hw *hw) @@ -815,6 +862,10 @@ err_unroll_cqinit: /** * ice_deinit_hw - unroll initialization operations done by ice_init_hw * @hw: pointer to the hardware structure + * + * This should be called only during nominal operation, not as a result of + * ice_init_hw() failing since ice_init_hw() will take care of unrolling + * applicable initializations if it fails for any reason. */ void ice_deinit_hw(struct ice_hw *hw) { @@ -1447,6 +1498,7 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, struct ice_hw_func_caps *func_p = NULL; struct ice_hw_dev_caps *dev_p = NULL; struct ice_hw_common_caps *caps; + char const *prefix; u32 i; if (!buf) @@ -1457,9 +1509,11 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, if (opc == ice_aqc_opc_list_dev_caps) { dev_p = &hw->dev_caps; caps = &dev_p->common_cap; + prefix = "dev cap"; } else if (opc == ice_aqc_opc_list_func_caps) { func_p = &hw->func_caps; caps = &func_p->common_cap; + prefix = "func cap"; } else { ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n"); return; @@ -1475,28 +1529,29 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, case ICE_AQC_CAPS_VALID_FUNCTIONS: caps->valid_functions = number; ice_debug(hw, ICE_DBG_INIT, - "HW caps: Valid Functions = %d\n", + "%s: valid functions = %d\n", prefix, caps->valid_functions); break; case ICE_AQC_CAPS_SRIOV: caps->sr_iov_1_1 = (number == 1); ice_debug(hw, ICE_DBG_INIT, - "HW caps: SR-IOV = %d\n", caps->sr_iov_1_1); + "%s: SR-IOV = %d\n", prefix, + caps->sr_iov_1_1); break; case ICE_AQC_CAPS_VF: if (dev_p) { dev_p->num_vfs_exposed = number; ice_debug(hw, ICE_DBG_INIT, - "HW caps: VFs exposed = %d\n", + "%s: VFs exposed = %d\n", prefix, dev_p->num_vfs_exposed); } else if (func_p) { func_p->num_allocd_vfs = number; func_p->vf_base_id = logical_id; ice_debug(hw, ICE_DBG_INIT, - "HW caps: VFs allocated = %d\n", + "%s: VFs allocated = %d\n", prefix, func_p->num_allocd_vfs); ice_debug(hw, ICE_DBG_INIT, - "HW caps: VF base_id = %d\n", + "%s: VF base_id = %d\n", prefix, func_p->vf_base_id); } break; @@ -1504,69 +1559,69 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count, if (dev_p) { dev_p->num_vsi_allocd_to_host = number; ice_debug(hw, ICE_DBG_INIT, - "HW caps: Dev.VSI cnt = %d\n", + "%s: num VSI alloc to host = %d\n", + prefix, dev_p->num_vsi_allocd_to_host); } else if (func_p) { func_p->guar_num_vsi = ice_get_num_per_func(hw, ICE_MAX_VSI); ice_debug(hw, ICE_DBG_INIT, - "HW caps: Func.VSI cnt = %d\n", - number); + "%s: num guaranteed VSI (fw) = %d\n", + prefix, number); + ice_debug(hw, ICE_DBG_INIT, + "%s: num guaranteed VSI = %d\n", + prefix, func_p->guar_num_vsi); } break; case ICE_AQC_CAPS_RSS: caps->rss_table_size = number; caps->rss_table_entry_width = logical_id; ice_debug(hw, ICE_DBG_INIT, - "HW caps: RSS table size = %d\n", + "%s: RSS table size = %d\n", prefix, caps->rss_table_size); ice_debug(hw, ICE_DBG_INIT, - "HW caps: RSS table width = %d\n", + "%s: RSS table width = %d\n", prefix, caps->rss_table_entry_width); break; case ICE_AQC_CAPS_RXQS: caps->num_rxq = number; caps->rxq_first_id = phys_id; ice_debug(hw, ICE_DBG_INIT, - "HW caps: Num Rx Qs = %d\n", caps->num_rxq); + "%s: num Rx queues = %d\n", prefix, + caps->num_rxq); ice_debug(hw, ICE_DBG_INIT, - "HW caps: Rx first queue ID = %d\n", + "%s: Rx first queue ID = %d\n", prefix, caps->rxq_first_id); break; case ICE_AQC_CAPS_TXQS: caps->num_txq = number; caps->txq_first_id = phys_id; ice_debug(hw, ICE_DBG_INIT, - "HW caps: Num Tx Qs = %d\n", caps->num_txq); + "%s: num Tx queues = %d\n", prefix, + caps->num_txq); ice_debug(hw, ICE_DBG_INIT, - "HW caps: Tx first queue ID = %d\n", + "%s: Tx first queue ID = %d\n", prefix, caps->txq_first_id); break; case ICE_AQC_CAPS_MSIX: caps->num_msix_vectors = number; caps->msix_vector_first_id = phys_id; ice_debug(hw, ICE_DBG_INIT, - "HW caps: MSIX vector count = %d\n", + "%s: MSIX vector count = %d\n", prefix, caps->num_msix_vectors); ice_debug(hw, ICE_DBG_INIT, - "HW caps: MSIX first vector index = %d\n", + "%s: MSIX first vector index = %d\n", prefix, caps->msix_vector_first_id); break; case ICE_AQC_CAPS_MAX_MTU: caps->max_mtu = number; - if (dev_p) - ice_debug(hw, ICE_DBG_INIT, - "HW caps: Dev.MaxMTU = %d\n", - caps->max_mtu); - else if (func_p) - ice_debug(hw, ICE_DBG_INIT, - "HW caps: func.MaxMTU = %d\n", - caps->max_mtu); + ice_debug(hw, ICE_DBG_INIT, "%s: max MTU = %d\n", + prefix, caps->max_mtu); break; default: ice_debug(hw, ICE_DBG_INIT, - "HW caps: Unknown capability[%d]: 0x%x\n", i, - cap); + "%s: unknown capability[%d]: 0x%x\n", prefix, + i, cap); break; } } @@ -1947,36 +2002,37 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport, */ enum ice_status ice_update_link_info(struct ice_port_info *pi) { - struct ice_aqc_get_phy_caps_data *pcaps; - struct ice_phy_info *phy_info; + struct ice_link_status *li; enum ice_status status; - struct ice_hw *hw; if (!pi) return ICE_ERR_PARAM; - hw = pi->hw; - - pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL); - if (!pcaps) - return ICE_ERR_NO_MEMORY; + li = &pi->phy.link_info; - phy_info = &pi->phy; status = ice_aq_get_link_info(pi, true, NULL, NULL); if (status) - goto out; + return status; + + if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) { + struct ice_aqc_get_phy_caps_data *pcaps; + struct ice_hw *hw; + + hw = pi->hw; + pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), + GFP_KERNEL); + if (!pcaps) + return ICE_ERR_NO_MEMORY; - if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, NULL); - if (status) - goto out; + if (!status) + memcpy(li->module_type, &pcaps->module_type, + sizeof(li->module_type)); - memcpy(phy_info->link_info.module_type, &pcaps->module_type, - sizeof(phy_info->link_info.module_type)); + devm_kfree(ice_hw_to_dev(hw), pcaps); } -out: - devm_kfree(ice_hw_to_dev(hw), pcaps); + return status; } @@ -2081,6 +2137,74 @@ out: } /** + * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data + * @caps: PHY ability structure to copy date from + * @cfg: PHY configuration structure to copy data to + * + * Helper function to copy AQC PHY get ability data to PHY set configuration + * data structure + */ +void +ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps, + struct ice_aqc_set_phy_cfg_data *cfg) +{ + if (!caps || !cfg) + return; + + cfg->phy_type_low = caps->phy_type_low; + cfg->phy_type_high = caps->phy_type_high; + cfg->caps = caps->caps; + cfg->low_power_ctrl = caps->low_power_ctrl; + cfg->eee_cap = caps->eee_cap; + cfg->eeer_value = caps->eeer_value; + cfg->link_fec_opt = caps->link_fec_options; +} + +/** + * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode + * @cfg: PHY configuration data to set FEC mode + * @fec: FEC mode to configure + * + * Caller should copy ice_aqc_get_phy_caps_data.caps ICE_AQC_PHY_EN_AUTO_FEC + * (bit 7) and ice_aqc_get_phy_caps_data.link_fec_options to cfg.caps + * ICE_AQ_PHY_ENA_AUTO_FEC (bit 7) and cfg.link_fec_options before calling. + */ +void +ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec) +{ + switch (fec) { + case ICE_FEC_BASER: + /* Clear auto FEC and RS bits, and AND BASE-R ability + * bits and OR request bits. + */ + cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; + cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN | + ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN; + cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ | + ICE_AQC_PHY_FEC_25G_KR_REQ; + break; + case ICE_FEC_RS: + /* Clear auto FEC and BASE-R bits, and AND RS ability + * bits and OR request bits. + */ + cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; + cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN; + cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ | + ICE_AQC_PHY_FEC_25G_RS_544_REQ; + break; + case ICE_FEC_NONE: + /* Clear auto FEC and all FEC option bits. */ + cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC; + cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK; + break; + case ICE_FEC_AUTO: + /* AND auto FEC bit, and all caps bits. */ + cfg->caps &= ICE_AQC_PHY_CAPS_MASK; + break; + } +} + +/** * ice_get_link_status - get status of the HW network link * @pi: port information structure * @link_up: pointer to bool (true/false = linkup/linkdown) @@ -2169,6 +2293,29 @@ ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, } /** + * ice_aq_set_mac_loopback + * @hw: pointer to the HW struct + * @ena_lpbk: Enable or Disable loopback + * @cd: pointer to command details structure or NULL + * + * Enable/disable loopback on a given port + */ +enum ice_status +ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd) +{ + struct ice_aqc_set_mac_lb *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.set_mac_lb; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb); + if (ena_lpbk) + cmd->lb_mode = ICE_AQ_MAC_LB_EN; + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** * ice_aq_set_port_id_led * @pi: pointer to the port information * @is_orig_mode: is this LED set to original mode (by the net-list) @@ -2552,7 +2699,7 @@ do_aq: ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n", vmvf_num, hw->adminq.sq_last_status); else - ice_debug(hw, ICE_DBG_SCHED, "disable Q %d failed %d\n", + ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n", le16_to_cpu(qg_list[0].q_id[0]), hw->adminq.sq_last_status); } @@ -2924,7 +3071,6 @@ ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues, if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY) return ICE_ERR_CFG; - if (!num_queues) { /* if queue is disabled already yet the disable queue command * has to be sent to complete the VF reset, then call diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index f1ddebf45231..d1f8353fe6bb 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -9,6 +9,8 @@ #include "ice_switch.h" #include <linux/avf/virtchnl.h> +enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw); + void ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len); enum ice_status ice_init_hw(struct ice_hw *hw); @@ -84,7 +86,11 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport, enum ice_status ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update); - +void +ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec); +void +ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps, + struct ice_aqc_set_phy_cfg_data *cfg); enum ice_status ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link, struct ice_sq_cd *cd); @@ -95,6 +101,9 @@ enum ice_status ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask, struct ice_sq_cd *cd); enum ice_status +ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd); + +enum ice_status ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode, struct ice_sq_cd *cd); diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index cc8cb5fdcdc1..e91ac4df0242 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -439,7 +439,7 @@ do { \ /* free the buffer info list */ \ if ((qi)->ring.cmd_buf) \ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \ - /* free dma head */ \ + /* free DMA head */ \ devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \ } while (0) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index e0585394d984..44945c2165d8 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -35,7 +35,7 @@ enum ice_ctl_q { #define ICE_CTL_Q_SQ_CMD_TIMEOUT 250 /* msecs */ struct ice_ctl_q_ring { - void *dma_head; /* Virtual address to dma head */ + void *dma_head; /* Virtual address to DMA head */ struct ice_dma_mem desc_buf; /* descriptor ring memory */ void *cmd_buf; /* command buffer memory */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 8bbf48e04a1c..c2002ded65f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -82,12 +82,14 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update, * @hw: pointer to the HW struct * @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown * False if LLDP Agent needs to be Stopped + * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across + * reboots * @cd: pointer to command details structure or NULL * * Stop or Shutdown the embedded LLDP Agent (0x0A05) */ enum ice_status -ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, +ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist, struct ice_sq_cd *cd) { struct ice_aqc_lldp_stop *cmd; @@ -100,17 +102,22 @@ ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, if (shutdown_lldp_agent) cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN; + if (persist) + cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS; + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } /** * ice_aq_start_lldp * @hw: pointer to the HW struct + * @persist: True if Start of LLDP Agent needs to be persistent across reboots * @cd: pointer to command details structure or NULL * * Start the embedded LLDP Agent on all ports. (0x0A06) */ -enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd) +enum ice_status +ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd) { struct ice_aqc_lldp_start *cmd; struct ice_aq_desc desc; @@ -121,6 +128,9 @@ enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd) cmd->command = ICE_AQ_LLDP_AGENT_START; + if (persist) + cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA; + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); } @@ -163,7 +173,7 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, * * Get the DCBX status from the Firmware */ -u8 ice_get_dcbx_status(struct ice_hw *hw) +static u8 ice_get_dcbx_status(struct ice_hw *hw) { u32 reg; @@ -614,7 +624,8 @@ ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) * * Parse DCB configuration from the LLDPDU */ -enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg) +static enum ice_status +ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg) { struct ice_lldp_org_tlv *tlv; enum ice_status ret = 0; @@ -658,13 +669,13 @@ enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg) /** * ice_aq_get_dcb_cfg * @hw: pointer to the HW struct - * @mib_type: mib type for the query + * @mib_type: MIB type for the query * @bridgetype: bridge type for the query (remote) * @dcbcfg: store for LLDPDU data * * Query DCB configuration from the firmware */ -static enum ice_status +enum ice_status ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype, struct ice_dcbx_cfg *dcbcfg) { @@ -689,13 +700,13 @@ ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype, } /** - * ice_aq_start_stop_dcbx - Start/Stop DCBx service in FW + * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW * @hw: pointer to the HW struct - * @start_dcbx_agent: True if DCBx Agent needs to be started - * False if DCBx Agent needs to be stopped - * @dcbx_agent_status: FW indicates back the DCBx agent status - * True if DCBx Agent is active - * False if DCBx Agent is stopped + * @start_dcbx_agent: True if DCBX Agent needs to be started + * False if DCBX Agent needs to be stopped + * @dcbx_agent_status: FW indicates back the DCBX agent status + * True if DCBX Agent is active + * False if DCBX Agent is stopped * @cd: pointer to command details structure or NULL * * Start/Stop the embedded dcbx Agent. In case that this wrapper function diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h index e7d4416e3a66..522e1452abe2 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb.h @@ -120,8 +120,9 @@ struct ice_cee_app_prio { u8 prio_map; } __packed; -u8 ice_get_dcbx_status(struct ice_hw *hw); -enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg); +enum ice_status +ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype, + struct ice_dcbx_cfg *dcbcfg); enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi); enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi); enum ice_status ice_init_dcb(struct ice_hw *hw); @@ -131,9 +132,10 @@ ice_query_port_ets(struct ice_port_info *pi, struct ice_sq_cd *cmd_details); #ifdef CONFIG_DCB enum ice_status -ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, +ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist, struct ice_sq_cd *cd); -enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd); +enum ice_status +ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd); enum ice_status ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent, bool *dcbx_agent_status, struct ice_sq_cd *cd); @@ -144,6 +146,7 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update, static inline enum ice_status ice_aq_stop_lldp(struct ice_hw __always_unused *hw, bool __always_unused shutdown_lldp_agent, + bool __always_unused persist, struct ice_sq_cd __always_unused *cd) { return 0; @@ -151,6 +154,7 @@ ice_aq_stop_lldp(struct ice_hw __always_unused *hw, static inline enum ice_status ice_aq_start_lldp(struct ice_hw __always_unused *hw, + bool __always_unused persist, struct ice_sq_cd __always_unused *cd) { return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 3e81af1884fc..fe88b127ca42 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -120,12 +120,14 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf) tc_map = ICE_DFLT_TRAFFIC_CLASS; ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map); - if (ret) + if (ret) { dev_err(&pf->pdev->dev, "Failed to config TC for VSI index: %d\n", pf->vsi[v]->idx); - else - ice_vsi_map_rings_to_vectors(pf->vsi[v]); + continue; + } + + ice_vsi_map_rings_to_vectors(pf->vsi[v]); } } @@ -133,8 +135,10 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf) * ice_pf_dcb_cfg - Apply new DCB configuration * @pf: pointer to the PF struct * @new_cfg: DCBX config to apply + * @locked: is the RTNL held */ -static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg) +static +int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) { struct ice_dcbx_cfg *old_cfg, *curr_cfg; struct ice_aqc_port_ets_elem buf = { 0 }; @@ -163,7 +167,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg) /* avoid race conditions by holding the lock while disabling and * re-enabling the VSI */ - rtnl_lock(); + if (!locked) + rtnl_lock(); ice_pf_dis_all_vsi(pf, true); memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg)); @@ -192,7 +197,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg) out: ice_pf_ena_all_vsi(pf, true); - rtnl_unlock(); + if (!locked) + rtnl_unlock(); devm_kfree(&pf->pdev->dev, old_cfg); return ret; } @@ -271,15 +277,16 @@ dcb_error: prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW; prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec)); - ice_pf_dcb_cfg(pf, prev_cfg); + ice_pf_dcb_cfg(pf, prev_cfg, false); devm_kfree(&pf->pdev->dev, prev_cfg); } /** * ice_dcb_init_cfg - set the initial DCB config in SW - * @pf: pf to apply config to + * @pf: PF to apply config to + * @locked: Is the RTNL held */ -static int ice_dcb_init_cfg(struct ice_pf *pf) +static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked) { struct ice_dcbx_cfg *newcfg; struct ice_port_info *pi; @@ -294,7 +301,7 @@ static int ice_dcb_init_cfg(struct ice_pf *pf) memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg)); dev_info(&pf->pdev->dev, "Configuring initial DCB values\n"); - if (ice_pf_dcb_cfg(pf, newcfg)) + if (ice_pf_dcb_cfg(pf, newcfg, locked)) ret = -EINVAL; devm_kfree(&pf->pdev->dev, newcfg); @@ -304,9 +311,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf) /** * ice_dcb_sw_default_config - Apply a default DCB config - * @pf: pf to apply config to + * @pf: PF to apply config to + * @locked: was this function called with RTNL held */ -static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf) +static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked) { struct ice_aqc_port_ets_elem buf = { 0 }; struct ice_dcbx_cfg *dcbcfg; @@ -338,7 +346,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf) dcbcfg->app[0].priority = 3; dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE; - ret = ice_pf_dcb_cfg(pf, dcbcfg); + ret = ice_pf_dcb_cfg(pf, dcbcfg, locked); devm_kfree(&pf->pdev->dev, dcbcfg); if (ret) return ret; @@ -348,9 +356,10 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf) /** * ice_init_pf_dcb - initialize DCB for a PF - * @pf: pf to initiialize DCB for + * @pf: PF to initialize DCB for + * @locked: Was function called with RTNL held */ -int ice_init_pf_dcb(struct ice_pf *pf) +int ice_init_pf_dcb(struct ice_pf *pf, bool locked) { struct device *dev = &pf->pdev->dev; struct ice_port_info *port_info; @@ -360,33 +369,10 @@ int ice_init_pf_dcb(struct ice_pf *pf) port_info = hw->port_info; - /* check if device is DCB capable */ - if (!hw->func_caps.common_cap.dcb) { - dev_dbg(dev, "DCB not supported\n"); - return -EOPNOTSUPP; - } - - /* Best effort to put DCBx and LLDP into a good state */ - port_info->dcbx_status = ice_get_dcbx_status(hw); - if (port_info->dcbx_status != ICE_DCBX_STATUS_DONE && - port_info->dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) { - bool dcbx_status; - - /* Attempt to start LLDP engine. Ignore errors - * as this will error if it is already started - */ - ice_aq_start_lldp(hw, NULL); - - /* Attempt to start DCBX. Ignore errors as this - * will error if it is already started - */ - ice_aq_start_stop_dcbx(hw, true, &dcbx_status, NULL); - } - err = ice_init_dcb(hw); if (err) { - /* FW LLDP not in usable state, default to SW DCBx/LLDP */ - dev_info(&pf->pdev->dev, "FW LLDP not in usable state\n"); + /* FW LLDP is not active, default to SW DCBX/LLDP */ + dev_info(&pf->pdev->dev, "FW LLDP is not active\n"); hw->port_info->dcbx_status = ICE_DCBX_STATUS_NOT_STARTED; hw->port_info->is_sw_lldp = true; } @@ -398,15 +384,16 @@ int ice_init_pf_dcb(struct ice_pf *pf) if (port_info->is_sw_lldp) { sw_default = 1; dev_info(&pf->pdev->dev, "DCBx/LLDP in SW mode.\n"); + clear_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags); + } else { + set_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags); } - if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) { - sw_default = 1; + if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) dev_info(&pf->pdev->dev, "DCBX not started\n"); - } if (sw_default) { - err = ice_dcb_sw_dflt_cfg(pf); + err = ice_dcb_sw_dflt_cfg(pf, locked); if (err) { dev_err(&pf->pdev->dev, "Failed to set local DCB config %d\n", err); @@ -425,7 +412,7 @@ int ice_init_pf_dcb(struct ice_pf *pf) set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); - err = ice_dcb_init_cfg(pf); + err = ice_dcb_init_cfg(pf, locked); if (err) goto dcb_init_err; @@ -515,6 +502,55 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, } /** + * ice_dcb_need_recfg - Check if DCB needs reconfig + * @pf: board private structure + * @old_cfg: current DCB config + * @new_cfg: new DCB config + */ +static bool ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg, + struct ice_dcbx_cfg *new_cfg) +{ + bool need_reconfig = false; + + /* Check if ETS configuration has changed */ + if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg, + sizeof(new_cfg->etscfg))) { + /* If Priority Table has changed reconfig is needed */ + if (memcmp(&new_cfg->etscfg.prio_table, + &old_cfg->etscfg.prio_table, + sizeof(new_cfg->etscfg.prio_table))) { + need_reconfig = true; + dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n"); + } + + if (memcmp(&new_cfg->etscfg.tcbwtable, + &old_cfg->etscfg.tcbwtable, + sizeof(new_cfg->etscfg.tcbwtable))) + dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n"); + + if (memcmp(&new_cfg->etscfg.tsatable, + &old_cfg->etscfg.tsatable, + sizeof(new_cfg->etscfg.tsatable))) + dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n"); + } + + /* Check if PFC configuration has changed */ + if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) { + need_reconfig = true; + dev_dbg(&pf->pdev->dev, "PFC config change detected.\n"); + } + + /* Check if APP Table has changed */ + if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) { + need_reconfig = true; + dev_dbg(&pf->pdev->dev, "APP Table change detected.\n"); + } + + dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig); + return need_reconfig; +} + +/** * ice_dcb_process_lldp_set_mib_change - Process MIB change * @pf: ptr to ice_pf * @event: pointer to the admin queue receive event @@ -523,29 +559,95 @@ void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { - if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) { - struct ice_dcbx_cfg *dcbcfg, *prev_cfg; - int err; - - prev_cfg = &pf->hw.port_info->local_dcbx_cfg; - dcbcfg = devm_kmemdup(&pf->pdev->dev, prev_cfg, - sizeof(*dcbcfg), GFP_KERNEL); - if (!dcbcfg) + struct ice_aqc_port_ets_elem buf = { 0 }; + struct ice_aqc_lldp_get_mib *mib; + struct ice_dcbx_cfg tmp_dcbx_cfg; + bool need_reconfig = false; + struct ice_port_info *pi; + u8 type; + int ret; + + /* Not DCB capable or capability disabled */ + if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))) + return; + + if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) { + dev_dbg(&pf->pdev->dev, + "MIB Change Event in HOST mode\n"); + return; + } + + pi = pf->hw.port_info; + mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw; + /* Ignore if event is not for Nearest Bridge */ + type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) & + ICE_AQ_LLDP_BRID_TYPE_M); + dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type); + if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID) + return; + + /* Check MIB Type and return if event for Remote MIB update */ + type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M; + dev_dbg(&pf->pdev->dev, + "LLDP event mib type %s\n", type ? "remote" : "local"); + if (type == ICE_AQ_LLDP_MIB_REMOTE) { + /* Update the remote cached instance and return */ + ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE, + ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID, + &pi->remote_dcbx_cfg); + if (ret) { + dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n"); return; + } + } - err = ice_lldp_to_dcb_cfg(event->msg_buf, dcbcfg); - if (!err) - ice_pf_dcb_cfg(pf, dcbcfg); + /* store the old configuration */ + tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg; - devm_kfree(&pf->pdev->dev, dcbcfg); + /* Reset the old DCBX configuration data */ + memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg)); - /* Get updated DCBx data from firmware */ - err = ice_get_dcb_cfg(pf->hw.port_info); - if (err) - dev_err(&pf->pdev->dev, - "Failed to get DCB config\n"); - } else { + /* Get updated DCBX data from firmware */ + ret = ice_get_dcb_cfg(pf->hw.port_info); + if (ret) { + dev_err(&pf->pdev->dev, "Failed to get DCB config\n"); + return; + } + + /* No change detected in DCBX configs */ + if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) { dev_dbg(&pf->pdev->dev, - "MIB Change Event in HOST mode\n"); + "No change detected in DCBX configuration.\n"); + return; + } + + need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg, + &pi->local_dcbx_cfg); + if (!need_reconfig) + return; + + /* Enable DCB tagging only when more than one TC */ + if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) { + dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n"); + set_bit(ICE_FLAG_DCB_ENA, pf->flags); + } else { + dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n"); + clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } + + rtnl_lock(); + ice_pf_dis_all_vsi(pf, true); + + ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); + if (ret) { + dev_err(&pf->pdev->dev, "Query Port ETS failed\n"); + rtnl_unlock(); + return; + } + + /* changes in configuration update VSI */ + ice_pf_dcb_recfg(pf); + + ice_pf_ena_all_vsi(pf, true); + rtnl_unlock(); } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index ca7b76faa03c..819081053ff5 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -14,7 +14,7 @@ void ice_dcb_rebuild(struct ice_pf *pf); u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg); u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi); -int ice_init_pf_dcb(struct ice_pf *pf); +int ice_init_pf_dcb(struct ice_pf *pf, bool locked); void ice_update_dcb_stats(struct ice_pf *pf); int ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, @@ -40,7 +40,8 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg) return 1; } -static inline int ice_init_pf_dcb(struct ice_pf *pf) +static inline int +ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked) { dev_dbg(&pf->pdev->dev, "DCB not supported\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 1341fde8d53f..52083a63dee6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -45,22 +45,40 @@ static int ice_q_stats_len(struct net_device *netdev) ICE_VSI_STATS_LEN + ice_q_stats_len(n)) static const struct ice_stats ice_gstrings_vsi_stats[] = { - ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast), ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast), - ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast), + ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast), ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast), - ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast), + ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast), ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast), - ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes), + ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast), ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes), - ICE_VSI_STAT("rx_discards", eth_stats.rx_discards), - ICE_VSI_STAT("tx_errors", eth_stats.tx_errors), - ICE_VSI_STAT("tx_linearize", tx_linearize), + ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes), + ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards), ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed), ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), + ICE_VSI_STAT("tx_errors", eth_stats.tx_errors), + ICE_VSI_STAT("tx_linearize", tx_linearize), +}; + +enum ice_ethtool_test_id { + ICE_ETH_TEST_REG = 0, + ICE_ETH_TEST_EEPROM, + ICE_ETH_TEST_INTR, + ICE_ETH_TEST_LOOP, + ICE_ETH_TEST_LINK, }; +static const char ice_gstrings_test[][ETH_GSTRING_LEN] = { + "Register test (offline)", + "EEPROM test (offline)", + "Interrupt test (offline)", + "Loopback test (offline)", + "Link test (on/offline)", +}; + +#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN) + /* These PF_STATs might look like duplicates of some NETDEV_STATs, * but they aren't. This device is capable of supporting multiple * VSIs/netdevs on a single PF. The NETDEV_STATs are for individual @@ -71,45 +89,45 @@ static const struct ice_stats ice_gstrings_vsi_stats[] = { * is queried on the base PF netdev. */ static const struct ice_stats ice_gstrings_pf_stats[] = { - ICE_PF_STAT("port.tx_bytes", stats.eth.tx_bytes), - ICE_PF_STAT("port.rx_bytes", stats.eth.rx_bytes), - ICE_PF_STAT("port.tx_unicast", stats.eth.tx_unicast), - ICE_PF_STAT("port.rx_unicast", stats.eth.rx_unicast), - ICE_PF_STAT("port.tx_multicast", stats.eth.tx_multicast), - ICE_PF_STAT("port.rx_multicast", stats.eth.rx_multicast), - ICE_PF_STAT("port.tx_broadcast", stats.eth.tx_broadcast), - ICE_PF_STAT("port.rx_broadcast", stats.eth.rx_broadcast), - ICE_PF_STAT("port.tx_errors", stats.eth.tx_errors), - ICE_PF_STAT("port.tx_size_64", stats.tx_size_64), - ICE_PF_STAT("port.rx_size_64", stats.rx_size_64), - ICE_PF_STAT("port.tx_size_127", stats.tx_size_127), - ICE_PF_STAT("port.rx_size_127", stats.rx_size_127), - ICE_PF_STAT("port.tx_size_255", stats.tx_size_255), - ICE_PF_STAT("port.rx_size_255", stats.rx_size_255), - ICE_PF_STAT("port.tx_size_511", stats.tx_size_511), - ICE_PF_STAT("port.rx_size_511", stats.rx_size_511), - ICE_PF_STAT("port.tx_size_1023", stats.tx_size_1023), - ICE_PF_STAT("port.rx_size_1023", stats.rx_size_1023), - ICE_PF_STAT("port.tx_size_1522", stats.tx_size_1522), - ICE_PF_STAT("port.rx_size_1522", stats.rx_size_1522), - ICE_PF_STAT("port.tx_size_big", stats.tx_size_big), - ICE_PF_STAT("port.rx_size_big", stats.rx_size_big), - ICE_PF_STAT("port.link_xon_tx", stats.link_xon_tx), - ICE_PF_STAT("port.link_xon_rx", stats.link_xon_rx), - ICE_PF_STAT("port.link_xoff_tx", stats.link_xoff_tx), - ICE_PF_STAT("port.link_xoff_rx", stats.link_xoff_rx), - ICE_PF_STAT("port.tx_dropped_link_down", stats.tx_dropped_link_down), - ICE_PF_STAT("port.rx_undersize", stats.rx_undersize), - ICE_PF_STAT("port.rx_fragments", stats.rx_fragments), - ICE_PF_STAT("port.rx_oversize", stats.rx_oversize), - ICE_PF_STAT("port.rx_jabber", stats.rx_jabber), - ICE_PF_STAT("port.rx_csum_bad", hw_csum_rx_error), - ICE_PF_STAT("port.rx_length_errors", stats.rx_len_errors), - ICE_PF_STAT("port.rx_dropped", stats.eth.rx_discards), - ICE_PF_STAT("port.rx_crc_errors", stats.crc_errors), - ICE_PF_STAT("port.illegal_bytes", stats.illegal_bytes), - ICE_PF_STAT("port.mac_local_faults", stats.mac_local_faults), - ICE_PF_STAT("port.mac_remote_faults", stats.mac_remote_faults), + ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes), + ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes), + ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast), + ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast), + ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast), + ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast), + ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast), + ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast), + ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors), + ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64), + ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64), + ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127), + ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127), + ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255), + ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255), + ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511), + ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511), + ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023), + ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023), + ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522), + ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522), + ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big), + ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big), + ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx), + ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx), + ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx), + ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx), + ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down), + ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize), + ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments), + ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize), + ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber), + ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error), + ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors), + ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards), + ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors), + ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes), + ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults), + ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults), }; static const u32 ice_regs_dump_list[] = { @@ -120,6 +138,9 @@ static const u32 ice_regs_dump_list[] = { QINT_RQCTL(0), PFINT_OICR_ENA, QRX_ITR(0), + PF0INT_ITR_0(0), + PF0INT_ITR_1(0), + PF0INT_ITR_2(0), }; struct ice_priv_flag { @@ -134,7 +155,7 @@ struct ice_priv_flag { static const struct ice_priv_flag ice_gstrings_priv_flags[] = { ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA), - ICE_PRIV_FLAG("disable-fw-lldp", ICE_FLAG_DISABLE_FW_LLDP), + ICE_PRIV_FLAG("enable-fw-lldp", ICE_FLAG_ENABLE_FW_LLDP), }; #define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) @@ -278,6 +299,571 @@ out: return ret; } +/** + * ice_active_vfs - check if there are any active VFs + * @pf: board private structure + * + * Returns true if an active VF is found, otherwise returns false + */ +static bool ice_active_vfs(struct ice_pf *pf) +{ + struct ice_vf *vf = pf->vf; + int i; + + for (i = 0; i < pf->num_alloc_vfs; i++, vf++) + if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + return true; + return false; +} + +/** + * ice_link_test - perform a link test on a given net_device + * @netdev: network interface device structure + * + * This function performs one of the self-tests required by ethtool. + * Returns 0 on success, non-zero on failure. + */ +static u64 ice_link_test(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + enum ice_status status; + bool link_up = false; + + netdev_info(netdev, "link test\n"); + status = ice_get_link_status(np->vsi->port_info, &link_up); + if (status) { + netdev_err(netdev, "link query error, status = %d\n", status); + return 1; + } + + if (!link_up) + return 2; + + return 0; +} + +/** + * ice_eeprom_test - perform an EEPROM test on a given net_device + * @netdev: network interface device structure + * + * This function performs one of the self-tests required by ethtool. + * Returns 0 on success, non-zero on failure. + */ +static u64 ice_eeprom_test(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + netdev_info(netdev, "EEPROM test\n"); + return !!(ice_nvm_validate_checksum(&pf->hw)); +} + +/** + * ice_reg_pattern_test + * @hw: pointer to the HW struct + * @reg: reg to be tested + * @mask: bits to be touched + */ +static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask) +{ + struct ice_pf *pf = (struct ice_pf *)hw->back; + static const u32 patterns[] = { + 0x5A5A5A5A, 0xA5A5A5A5, + 0x00000000, 0xFFFFFFFF + }; + u32 val, orig_val; + int i; + + orig_val = rd32(hw, reg); + for (i = 0; i < ARRAY_SIZE(patterns); ++i) { + u32 pattern = patterns[i] & mask; + + wr32(hw, reg, pattern); + val = rd32(hw, reg); + if (val == pattern) + continue; + dev_err(&pf->pdev->dev, + "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n" + , __func__, reg, pattern, val); + return 1; + } + + wr32(hw, reg, orig_val); + val = rd32(hw, reg); + if (val != orig_val) { + dev_err(&pf->pdev->dev, + "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n" + , __func__, reg, orig_val, val); + return 1; + } + + return 0; +} + +/** + * ice_reg_test - perform a register test on a given net_device + * @netdev: network interface device structure + * + * This function performs one of the self-tests required by ethtool. + * Returns 0 on success, non-zero on failure. + */ +static u64 ice_reg_test(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_hw *hw = np->vsi->port_info->hw; + u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ? + hw->func_caps.common_cap.num_msix_vectors - 1 : 1; + struct ice_diag_reg_test_info { + u32 address; + u32 mask; + u32 elem_num; + u32 elem_size; + } ice_reg_list[] = { + {GLINT_ITR(0, 0), 0x00000fff, int_elements, + GLINT_ITR(0, 1) - GLINT_ITR(0, 0)}, + {GLINT_ITR(1, 0), 0x00000fff, int_elements, + GLINT_ITR(1, 1) - GLINT_ITR(1, 0)}, + {GLINT_ITR(0, 0), 0x00000fff, int_elements, + GLINT_ITR(2, 1) - GLINT_ITR(2, 0)}, + {GLINT_CTL, 0xffff0001, 1, 0} + }; + int i; + + netdev_dbg(netdev, "Register test\n"); + for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) { + u32 j; + + for (j = 0; j < ice_reg_list[i].elem_num; ++j) { + u32 mask = ice_reg_list[i].mask; + u32 reg = ice_reg_list[i].address + + (j * ice_reg_list[i].elem_size); + + /* bail on failure (non-zero return) */ + if (ice_reg_pattern_test(hw, reg, mask)) + return 1; + } + } + + return 0; +} + +/** + * ice_lbtest_prepare_rings - configure Tx/Rx test rings + * @vsi: pointer to the VSI structure + * + * Function configures rings of a VSI for loopback test without + * enabling interrupts or informing the kernel about new queues. + * + * Returns 0 on success, negative on failure. + */ +static int ice_lbtest_prepare_rings(struct ice_vsi *vsi) +{ + int status; + + status = ice_vsi_setup_tx_rings(vsi); + if (status) + goto err_setup_tx_ring; + + status = ice_vsi_setup_rx_rings(vsi); + if (status) + goto err_setup_rx_ring; + + status = ice_vsi_cfg(vsi); + if (status) + goto err_setup_rx_ring; + + status = ice_vsi_start_rx_rings(vsi); + if (status) + goto err_start_rx_ring; + + return status; + +err_start_rx_ring: + ice_vsi_free_rx_rings(vsi); +err_setup_rx_ring: + ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0); +err_setup_tx_ring: + ice_vsi_free_tx_rings(vsi); + + return status; +} + +/** + * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test + * @vsi: pointer to the VSI structure + * + * Function stops and frees VSI rings after a loopback test. + * Returns 0 on success, negative on failure. + */ +static int ice_lbtest_disable_rings(struct ice_vsi *vsi) +{ + int status; + + status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0); + if (status) + netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n", + vsi->vsi_num, status); + + status = ice_vsi_stop_rx_rings(vsi); + if (status) + netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n", + vsi->vsi_num, status); + + ice_vsi_free_tx_rings(vsi); + ice_vsi_free_rx_rings(vsi); + + return status; +} + +/** + * ice_lbtest_create_frame - create test packet + * @pf: pointer to the PF structure + * @ret_data: allocated frame buffer + * @size: size of the packet data + * + * Function allocates a frame with a test pattern on specific offsets. + * Returns 0 on success, non-zero on failure. + */ +static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size) +{ + u8 *data; + + if (!pf) + return -EINVAL; + + data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL); + if (!data) + return -ENOMEM; + + /* Since the ethernet test frame should always be at least + * 64 bytes long, fill some octets in the payload with test data. + */ + memset(data, 0xFF, size); + data[32] = 0xDE; + data[42] = 0xAD; + data[44] = 0xBE; + data[46] = 0xEF; + + *ret_data = data; + + return 0; +} + +/** + * ice_lbtest_check_frame - verify received loopback frame + * @frame: pointer to the raw packet data + * + * Function verifies received test frame with a pattern. + * Returns true if frame matches the pattern, false otherwise. + */ +static bool ice_lbtest_check_frame(u8 *frame) +{ + /* Validate bytes of a frame under offsets chosen earlier */ + if (frame[32] == 0xDE && + frame[42] == 0xAD && + frame[44] == 0xBE && + frame[46] == 0xEF && + frame[48] == 0xFF) + return true; + + return false; +} + +/** + * ice_diag_send - send test frames to the test ring + * @tx_ring: pointer to the transmit ring + * @data: pointer to the raw packet data + * @size: size of the packet to send + * + * Function sends loopback packets on a test Tx ring. + */ +static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size) +{ + struct ice_tx_desc *tx_desc; + struct ice_tx_buf *tx_buf; + dma_addr_t dma; + u64 td_cmd; + + tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use); + tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use]; + + dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + return -EINVAL; + + tx_desc->buf_addr = cpu_to_le64(dma); + + /* These flags are required for a descriptor to be pushed out */ + td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS); + tx_desc->cmd_type_offset_bsz = + cpu_to_le64(ICE_TX_DESC_DTYPE_DATA | + (td_cmd << ICE_TXD_QW1_CMD_S) | + ((u64)0 << ICE_TXD_QW1_OFFSET_S) | + ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) | + ((u64)0 << ICE_TXD_QW1_L2TAG1_S)); + + tx_buf->next_to_watch = tx_desc; + + /* Force memory write to complete before letting h/w know + * there are new descriptors to fetch. + */ + wmb(); + + tx_ring->next_to_use++; + if (tx_ring->next_to_use >= tx_ring->count) + tx_ring->next_to_use = 0; + + writel_relaxed(tx_ring->next_to_use, tx_ring->tail); + + /* Wait until the packets get transmitted to the receive queue. */ + usleep_range(1000, 2000); + dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE); + + return 0; +} + +#define ICE_LB_FRAME_SIZE 64 +/** + * ice_lbtest_receive_frames - receive and verify test frames + * @rx_ring: pointer to the receive ring + * + * Function receives loopback packets and verify their correctness. + * Returns number of received valid frames. + */ +static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) +{ + struct ice_rx_buf *rx_buf; + int valid_frames, i; + u8 *received_buf; + + valid_frames = 0; + + for (i = 0; i < rx_ring->count; i++) { + union ice_32b_rx_flex_desc *rx_desc; + + rx_desc = ICE_RX_DESC(rx_ring, i); + + if (!(rx_desc->wb.status_error0 & + cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS))) + continue; + + rx_buf = &rx_ring->rx_buf[i]; + received_buf = page_address(rx_buf->page); + + if (ice_lbtest_check_frame(received_buf)) + valid_frames++; + } + + return valid_frames; +} + +/** + * ice_loopback_test - perform a loopback test on a given net_device + * @netdev: network interface device structure + * + * This function performs one of the self-tests required by ethtool. + * Returns 0 on success, non-zero on failure. + */ +static u64 ice_loopback_test(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *orig_vsi = np->vsi, *test_vsi; + struct ice_pf *pf = orig_vsi->back; + struct ice_ring *tx_ring, *rx_ring; + u8 broadcast[ETH_ALEN], ret = 0; + int num_frames, valid_frames; + LIST_HEAD(tmp_list); + u8 *tx_frame; + int i; + + netdev_info(netdev, "loopback test\n"); + + test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info); + if (!test_vsi) { + netdev_err(netdev, "Failed to create a VSI for the loopback test"); + return 1; + } + + test_vsi->netdev = netdev; + tx_ring = test_vsi->tx_rings[0]; + rx_ring = test_vsi->rx_rings[0]; + + if (ice_lbtest_prepare_rings(test_vsi)) { + ret = 2; + goto lbtest_vsi_close; + } + + if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) { + ret = 3; + goto lbtest_rings_dis; + } + + /* Enable MAC loopback in firmware */ + if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) { + ret = 4; + goto lbtest_mac_dis; + } + + /* Test VSI needs to receive broadcast packets */ + eth_broadcast_addr(broadcast); + if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) { + ret = 5; + goto lbtest_mac_dis; + } + + if (ice_add_mac(&pf->hw, &tmp_list)) { + ret = 6; + goto free_mac_list; + } + + if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) { + ret = 7; + goto remove_mac_filters; + } + + num_frames = min_t(int, tx_ring->count, 32); + for (i = 0; i < num_frames; i++) { + if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) { + ret = 8; + goto lbtest_free_frame; + } + } + + valid_frames = ice_lbtest_receive_frames(rx_ring); + if (!valid_frames) + ret = 9; + else if (valid_frames != num_frames) + ret = 10; + +lbtest_free_frame: + devm_kfree(&pf->pdev->dev, tx_frame); +remove_mac_filters: + if (ice_remove_mac(&pf->hw, &tmp_list)) + netdev_err(netdev, "Could not remove MAC filter for the test VSI"); +free_mac_list: + ice_free_fltr_list(&pf->pdev->dev, &tmp_list); +lbtest_mac_dis: + /* Disable MAC loopback after the test is completed. */ + if (ice_aq_set_mac_loopback(&pf->hw, false, NULL)) + netdev_err(netdev, "Could not disable MAC loopback\n"); +lbtest_rings_dis: + if (ice_lbtest_disable_rings(test_vsi)) + netdev_err(netdev, "Could not disable test rings\n"); +lbtest_vsi_close: + test_vsi->netdev = NULL; + if (ice_vsi_release(test_vsi)) + netdev_err(netdev, "Failed to remove the test VSI"); + + return ret; +} + +/** + * ice_intr_test - perform an interrupt test on a given net_device + * @netdev: network interface device structure + * + * This function performs one of the self-tests required by ethtool. + * Returns 0 on success, non-zero on failure. + */ +static u64 ice_intr_test(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + u16 swic_old = pf->sw_int_count; + + netdev_info(netdev, "interrupt test\n"); + + wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx), + GLINT_DYN_CTL_SW_ITR_INDX_M | + GLINT_DYN_CTL_INTENA_MSK_M | + GLINT_DYN_CTL_SWINT_TRIG_M); + + usleep_range(1000, 2000); + return (swic_old == pf->sw_int_count); +} + +/** + * ice_self_test - handler function for performing a self-test by ethtool + * @netdev: network interface device structure + * @eth_test: ethtool_test structure + * @data: required by ethtool.self_test + * + * This function is called after invoking 'ethtool -t devname' command where + * devname is the name of the network device on which ethtool should operate. + * It performs a set of self-tests to check if a device works properly. + */ +static void +ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test, + u64 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + bool if_running = netif_running(netdev); + struct ice_pf *pf = np->vsi->back; + + if (eth_test->flags == ETH_TEST_FL_OFFLINE) { + netdev_info(netdev, "offline testing starting\n"); + + set_bit(__ICE_TESTING, pf->state); + + if (ice_active_vfs(pf)) { + dev_warn(&pf->pdev->dev, + "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); + data[ICE_ETH_TEST_REG] = 1; + data[ICE_ETH_TEST_EEPROM] = 1; + data[ICE_ETH_TEST_INTR] = 1; + data[ICE_ETH_TEST_LOOP] = 1; + data[ICE_ETH_TEST_LINK] = 1; + eth_test->flags |= ETH_TEST_FL_FAILED; + clear_bit(__ICE_TESTING, pf->state); + goto skip_ol_tests; + } + /* If the device is online then take it offline */ + if (if_running) + /* indicate we're in test mode */ + ice_stop(netdev); + + data[ICE_ETH_TEST_LINK] = ice_link_test(netdev); + data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev); + data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev); + data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev); + data[ICE_ETH_TEST_REG] = ice_reg_test(netdev); + + if (data[ICE_ETH_TEST_LINK] || + data[ICE_ETH_TEST_EEPROM] || + data[ICE_ETH_TEST_LOOP] || + data[ICE_ETH_TEST_INTR] || + data[ICE_ETH_TEST_REG]) + eth_test->flags |= ETH_TEST_FL_FAILED; + + clear_bit(__ICE_TESTING, pf->state); + + if (if_running) { + int status = ice_open(netdev); + + if (status) { + dev_err(&pf->pdev->dev, + "Could not open device %s, err %d", + pf->int_name, status); + } + } + } else { + /* Online tests */ + netdev_info(netdev, "online testing starting\n"); + + data[ICE_ETH_TEST_LINK] = ice_link_test(netdev); + if (data[ICE_ETH_TEST_LINK]) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* Offline only tests, not run in online; pass by default */ + data[ICE_ETH_TEST_REG] = 0; + data[ICE_ETH_TEST_EEPROM] = 0; + data[ICE_ETH_TEST_INTR] = 0; + data[ICE_ETH_TEST_LOOP] = 0; + } + +skip_ol_tests: + netdev_info(netdev, "testing finished\n"); +} + static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct ice_netdev_priv *np = netdev_priv(netdev); @@ -295,17 +881,17 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) ice_for_each_alloc_txq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, - "tx-queue-%u.tx_packets", i); + "tx_queue_%u_packets", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i); + snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; } ice_for_each_alloc_rxq(vsi, i) { snprintf(p, ETH_GSTRING_LEN, - "rx-queue-%u.rx_packets", i); + "rx_queue_%u_packets", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i); + snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i); p += ETH_GSTRING_LEN; } @@ -320,21 +906,24 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.tx-priority-%u-xon", i); + "tx_priority_%u_xon.nic", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "port.tx-priority-%u-xoff", i); + "tx_priority_%u_xoff.nic", i); p += ETH_GSTRING_LEN; } for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.rx-priority-%u-xon", i); + "rx_priority_%u_xon.nic", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "port.rx-priority-%u-xoff", i); + "rx_priority_%u_xoff.nic", i); p += ETH_GSTRING_LEN; } break; + case ETH_SS_TEST: + memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN); + break; case ETH_SS_PRIV_FLAGS: for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) { snprintf(p, ETH_GSTRING_LEN, "%s", @@ -371,6 +960,185 @@ ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) } /** + * ice_set_fec_cfg - Set link FEC options + * @netdev: network interface device structure + * @req_fec: FEC mode to configure + */ +static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_aqc_set_phy_cfg_data config = { 0 }; + struct ice_aqc_get_phy_caps_data *caps; + struct ice_vsi *vsi = np->vsi; + u8 sw_cfg_caps, sw_cfg_fec; + struct ice_port_info *pi; + enum ice_status status; + int err = 0; + + pi = vsi->port_info; + if (!pi) + return -EOPNOTSUPP; + + /* Changing the FEC parameters is not supported if not the PF VSI */ + if (vsi->type != ICE_VSI_PF) { + netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n"); + return -EOPNOTSUPP; + } + + /* Get last SW configuration */ + caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + if (!caps) + return -ENOMEM; + + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, + caps, NULL); + if (status) { + err = -EAGAIN; + goto done; + } + + /* Copy SW configuration returned from PHY caps to PHY config */ + ice_copy_phy_caps_to_cfg(caps, &config); + sw_cfg_caps = caps->caps; + sw_cfg_fec = caps->link_fec_options; + + /* Get toloplogy caps, then copy PHY FEC topoloy caps to PHY config */ + memset(caps, 0, sizeof(*caps)); + + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + caps, NULL); + if (status) { + err = -EAGAIN; + goto done; + } + + config.caps |= (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC); + config.link_fec_opt = caps->link_fec_options; + + ice_cfg_phy_fec(&config, req_fec); + + /* If FEC mode has changed, then set PHY configuration and enable AN. */ + if ((config.caps & ICE_AQ_PHY_ENA_AUTO_FEC) != + (sw_cfg_caps & ICE_AQC_PHY_EN_AUTO_FEC) || + config.link_fec_opt != sw_cfg_fec) { + if (caps->caps & ICE_AQC_PHY_AN_MODE) + config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; + + status = ice_aq_set_phy_cfg(pi->hw, pi->lport, &config, NULL); + + if (status) + err = -EAGAIN; + } + +done: + devm_kfree(&vsi->back->pdev->dev, caps); + return err; +} + +/** + * ice_set_fecparam - Set FEC link options + * @netdev: network interface device structure + * @fecparam: Ethtool structure to retrieve FEC parameters + */ +static int +ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + enum ice_fec_mode fec; + + switch (fecparam->fec) { + case ETHTOOL_FEC_AUTO: + fec = ICE_FEC_AUTO; + break; + case ETHTOOL_FEC_RS: + fec = ICE_FEC_RS; + break; + case ETHTOOL_FEC_BASER: + fec = ICE_FEC_BASER; + break; + case ETHTOOL_FEC_OFF: + case ETHTOOL_FEC_NONE: + fec = ICE_FEC_NONE; + break; + default: + dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n", + fecparam->fec); + return -EINVAL; + } + + return ice_set_fec_cfg(netdev, fec); +} + +/** + * ice_get_fecparam - Get link FEC options + * @netdev: network interface device structure + * @fecparam: Ethtool structure to retrieve FEC parameters + */ +static int +ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_aqc_get_phy_caps_data *caps; + struct ice_link_status *link_info; + struct ice_vsi *vsi = np->vsi; + struct ice_port_info *pi; + enum ice_status status; + int err = 0; + + pi = vsi->port_info; + + if (!pi) + return -EOPNOTSUPP; + link_info = &pi->phy.link_info; + + /* Set FEC mode based on negotiated link info */ + switch (link_info->fec_info) { + case ICE_AQ_LINK_25G_KR_FEC_EN: + fecparam->active_fec = ETHTOOL_FEC_BASER; + break; + case ICE_AQ_LINK_25G_RS_528_FEC_EN: + /* fall through */ + case ICE_AQ_LINK_25G_RS_544_FEC_EN: + fecparam->active_fec = ETHTOOL_FEC_RS; + break; + default: + fecparam->active_fec = ETHTOOL_FEC_OFF; + break; + } + + caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + if (!caps) + return -ENOMEM; + + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + caps, NULL); + if (status) { + err = -EAGAIN; + goto done; + } + + /* Set supported/configured FEC modes based on PHY capability */ + if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC) + fecparam->fec |= ETHTOOL_FEC_AUTO; + if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN || + caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ) + fecparam->fec |= ETHTOOL_FEC_BASER; + if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN) + fecparam->fec |= ETHTOOL_FEC_RS; + if (caps->link_fec_options == 0) + fecparam->fec |= ETHTOOL_FEC_OFF; + +done: + devm_kfree(&vsi->back->pdev->dev, caps); + return err; +} + +/** * ice_get_priv_flags - report device private flags * @netdev: network interface device structure * @@ -433,10 +1201,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS); - if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, change_flags)) { - if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, pf->flags)) { + if (test_bit(ICE_FLAG_ENABLE_FW_LLDP, change_flags)) { + if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) { enum ice_status status; + /* Disable FW LLDP engine */ status = ice_aq_cfg_lldp_mib_change(&pf->hw, false, NULL); /* If unregistering for LLDP events fails, this is @@ -450,7 +1219,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) /* The AQ call to stop the FW LLDP agent will generate * an error if the agent is already stopped. */ - status = ice_aq_stop_lldp(&pf->hw, true, NULL); + status = ice_aq_stop_lldp(&pf->hw, true, true, NULL); if (status) dev_warn(&pf->pdev->dev, "Fail to stop LLDP agent\n"); @@ -458,9 +1227,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) * will likely not need DCB, so failure to init is * not a concern of ethtool */ - status = ice_init_pf_dcb(pf); + status = ice_init_pf_dcb(pf, true); if (status) dev_warn(&pf->pdev->dev, "Fail to init DCB\n"); + + /* Forward LLDP packets to default VSI so that they + * are passed up the stack + */ + ice_cfg_sw_lldp(vsi, false, true); } else { enum ice_status status; bool dcbx_agent_status; @@ -468,12 +1242,12 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) /* AQ command to start FW LLDP agent will return an * error if the agent is already started */ - status = ice_aq_start_lldp(&pf->hw, NULL); + status = ice_aq_start_lldp(&pf->hw, true, NULL); if (status) dev_warn(&pf->pdev->dev, "Fail to start LLDP Agent\n"); - /* AQ command to start FW DCBx agent will fail if + /* AQ command to start FW DCBX agent will fail if * the agent is already started */ status = ice_aq_start_stop_dcbx(&pf->hw, true, @@ -491,15 +1265,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) * registration/init failed but do not return error * state to ethtool */ - status = ice_aq_cfg_lldp_mib_change(&pf->hw, false, - NULL); - if (status) - dev_dbg(&pf->pdev->dev, - "Fail to reg for MIB change\n"); - - status = ice_init_pf_dcb(pf); + status = ice_init_pf_dcb(pf, true); if (status) dev_dbg(&pf->pdev->dev, "Fail to init DCB\n"); + + /* Remove rule to direct LLDP packets to default VSI. + * The FW LLDP engine will now be consuming them. + */ + ice_cfg_sw_lldp(vsi, false, false); } } clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags); @@ -529,6 +1302,8 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) * not safe. */ return ICE_ALL_STATS_LEN(netdev); + case ETH_SS_TEST: + return ICE_TEST_LEN; case ETH_SS_PRIV_FLAGS: return ICE_PRIV_FLAG_ARRAY_SIZE; default: @@ -628,7 +1403,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_100M_SGMII) { ethtool_link_ksettings_add_link_mode(ks, supported, 100baseT_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB) ethtool_link_ksettings_add_link_mode(ks, advertising, 100baseT_Full); } @@ -636,14 +1412,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_1G_SGMII) { ethtool_link_ksettings_add_link_mode(ks, supported, 1000baseT_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB) ethtool_link_ksettings_add_link_mode(ks, advertising, 1000baseT_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX) { ethtool_link_ksettings_add_link_mode(ks, supported, 1000baseKX_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB) ethtool_link_ksettings_add_link_mode(ks, advertising, 1000baseKX_Full); } @@ -651,14 +1429,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_LX) { ethtool_link_ksettings_add_link_mode(ks, supported, 1000baseX_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB) ethtool_link_ksettings_add_link_mode(ks, advertising, 1000baseX_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T) { ethtool_link_ksettings_add_link_mode(ks, supported, 2500baseT_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB) ethtool_link_ksettings_add_link_mode(ks, advertising, 2500baseT_Full); } @@ -666,7 +1446,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX) { ethtool_link_ksettings_add_link_mode(ks, supported, 2500baseX_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB) ethtool_link_ksettings_add_link_mode(ks, advertising, 2500baseX_Full); } @@ -674,7 +1455,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR) { ethtool_link_ksettings_add_link_mode(ks, supported, 5000baseT_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 5000baseT_Full); } @@ -684,28 +1466,32 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_C2C) { ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseT_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 10000baseT_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1) { ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseKR_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 10000baseKR_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_SR) { ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseSR_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 10000baseSR_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_LR) { ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseLR_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 10000baseLR_Full); } @@ -717,7 +1503,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_C2C) { ethtool_link_ksettings_add_link_mode(ks, supported, 25000baseCR_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseCR_Full); } @@ -725,7 +1512,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_LR) { ethtool_link_ksettings_add_link_mode(ks, supported, 25000baseSR_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseSR_Full); } @@ -734,14 +1522,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1) { ethtool_link_ksettings_add_link_mode(ks, supported, 25000baseKR_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 25000baseKR_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) { ethtool_link_ksettings_add_link_mode(ks, supported, 40000baseKR4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 40000baseKR4_Full); } @@ -750,21 +1540,24 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI) { ethtool_link_ksettings_add_link_mode(ks, supported, 40000baseCR4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 40000baseCR4_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_SR4) { ethtool_link_ksettings_add_link_mode(ks, supported, 40000baseSR4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 40000baseSR4_Full); } if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_LR4) { ethtool_link_ksettings_add_link_mode(ks, supported, 40000baseLR4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 40000baseLR4_Full); } @@ -779,7 +1572,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) { ethtool_link_ksettings_add_link_mode(ks, supported, 50000baseCR2_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 50000baseCR2_Full); } @@ -787,7 +1581,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) { ethtool_link_ksettings_add_link_mode(ks, supported, 50000baseKR2_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 50000baseKR2_Full); } @@ -797,7 +1592,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) { ethtool_link_ksettings_add_link_mode(ks, supported, 50000baseSR2_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB) ethtool_link_ksettings_add_link_mode(ks, advertising, 50000baseSR2_Full); } @@ -814,7 +1610,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) { ethtool_link_ksettings_add_link_mode(ks, supported, 100000baseCR4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) need_add_adv_mode = true; } if (need_add_adv_mode) { @@ -826,7 +1623,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) { ethtool_link_ksettings_add_link_mode(ks, supported, 100000baseSR4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) need_add_adv_mode = true; } if (need_add_adv_mode) { @@ -838,7 +1636,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) { ethtool_link_ksettings_add_link_mode(ks, supported, 100000baseLR4_ER4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) need_add_adv_mode = true; } if (need_add_adv_mode) { @@ -851,7 +1650,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev, phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) { ethtool_link_ksettings_add_link_mode(ks, supported, 100000baseKR4_Full); - if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) + if (!hw_link_info->req_speeds || + hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB) need_add_adv_mode = true; } if (need_add_adv_mode) @@ -1275,6 +2075,7 @@ ice_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *ks) { struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_aqc_get_phy_caps_data *caps; struct ice_link_status *hw_link_info; struct ice_vsi *vsi = np->vsi; @@ -1345,6 +2146,40 @@ ice_get_link_ksettings(struct net_device *netdev, break; } + caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + if (!caps) + goto done; + + if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_TOPO_CAP, + caps, NULL)) + netdev_info(netdev, "Get phy capability failed.\n"); + + /* Set supported FEC modes based on PHY capability */ + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE); + + if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN) + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER); + if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN) + ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS); + + if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_SW_CFG, + caps, NULL)) + netdev_info(netdev, "Get phy capability failed.\n"); + + /* Set advertised FEC modes based on PHY capability */ + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE); + + if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ) + ethtool_link_ksettings_add_link_mode(ks, advertising, + FEC_BASER); + if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ) + ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); + +done: + devm_kfree(&vsi->back->pdev->dev, caps); return 0; } @@ -2371,8 +3206,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - wr32(&pf->hw, GLINT_RATE(vsi->hw_base_vector + - rc->ring->q_vector->v_idx), + wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx), ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high, pf->hw.intrl_gran)); } @@ -2533,6 +3367,7 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_regs = ice_get_regs, .get_msglevel = ice_get_msglevel, .set_msglevel = ice_set_msglevel, + .self_test = ice_self_test, .get_link = ethtool_op_get_link, .get_eeprom_len = ice_get_eeprom_len, .get_eeprom = ice_get_eeprom, @@ -2557,6 +3392,8 @@ static const struct ethtool_ops ice_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, .get_per_queue_coalesce = ice_get_per_q_coalesce, .set_per_queue_coalesce = ice_set_per_q_coalesce, + .get_fecparam = ice_get_fecparam, + .set_fecparam = ice_set_fecparam, }; /** diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index ec25f26069b0..6c5ce05742b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -6,6 +6,9 @@ #ifndef _ICE_HW_AUTOGEN_H_ #define _ICE_HW_AUTOGEN_H_ +#define PF0INT_ITR_0(_i) (0x03000004 + ((_i) * 4096)) +#define PF0INT_ITR_1(_i) (0x03000008 + ((_i) * 4096)) +#define PF0INT_ITR_2(_i) (0x0300000C + ((_i) * 4096)) #define QTX_COMM_DBELL(_DBQM) (0x002C0000 + ((_DBQM) * 4)) #define QTX_COMM_HEAD(_DBQM) (0x000E0000 + ((_DBQM) * 4)) #define QTX_COMM_HEAD_HEAD_S 0 @@ -155,6 +158,7 @@ #define PFINT_OICR_HMC_ERR_M BIT(26) #define PFINT_OICR_PE_CRITERR_M BIT(28) #define PFINT_OICR_VFLR_M BIT(29) +#define PFINT_OICR_SWINT_M BIT(31) #define PFINT_OICR_CTL 0x0016CA80 #define PFINT_OICR_CTL_MSIX_INDX_M ICE_M(0x7FF, 0) #define PFINT_OICR_CTL_ITR_INDX_S 11 diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index fbf1eba0cc2a..a19f5920733b 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -137,6 +137,8 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) * for PF or EMP this field should be set to zero */ switch (vsi->type) { + case ICE_VSI_LB: + /* fall through */ case ICE_VSI_PF: tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; break; @@ -251,6 +253,10 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) if (!vsi->rx_rings) goto err_rxrings; + /* There is no need to allocate q_vectors for a loopback VSI. */ + if (vsi->type == ICE_VSI_LB) + return 0; + /* allocate memory for q_vector pointers */ vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors, sizeof(*vsi->q_vectors), GFP_KERNEL); @@ -275,6 +281,8 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) { switch (vsi->type) { case ICE_VSI_PF: + /* fall through */ + case ICE_VSI_LB: vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC; vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC; break; @@ -313,10 +321,14 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = vf->num_vf_qs; /* pf->num_vf_msix includes (VF miscellaneous vector + * data queue interrupts). Since vsi->num_q_vectors is number - * of queues vectors, subtract 1 from the original vector - * count + * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the + * original vector count */ - vsi->num_q_vectors = pf->num_vf_msix - 1; + vsi->num_q_vectors = pf->num_vf_msix - ICE_NONQ_VECS_VF; + break; + case ICE_VSI_LB: + vsi->alloc_txq = 1; + vsi->alloc_rxq = 1; break; default: dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); @@ -516,6 +528,10 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id) if (ice_vsi_alloc_arrays(vsi)) goto err_rings; break; + case ICE_VSI_LB: + if (ice_vsi_alloc_arrays(vsi)) + goto err_rings; + break; default: dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); goto unlock_pf; @@ -732,6 +748,8 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) BIT(cap->rss_table_entry_width)); vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI; break; + case ICE_VSI_LB: + break; default: dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); @@ -924,6 +942,9 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ; break; + case ICE_VSI_LB: + dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type); + return; default: dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); return; @@ -955,6 +976,8 @@ static int ice_vsi_init(struct ice_vsi *vsi) ctxt->info = vsi->info; switch (vsi->type) { + case ICE_VSI_LB: + /* fall through */ case ICE_VSI_PF: ctxt->flags = ICE_AQ_VSI_TYPE_PF; break; @@ -1145,61 +1168,32 @@ err_out: static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; - int num_q_vectors = 0; + u16 num_q_vectors; + + /* SRIOV doesn't grab irq_tracker entries for each VSI */ + if (vsi->type == ICE_VSI_VF) + return 0; - if (vsi->sw_base_vector || vsi->hw_base_vector) { - dev_dbg(&pf->pdev->dev, "VSI %d has non-zero HW base vector %d or SW base vector %d\n", - vsi->vsi_num, vsi->hw_base_vector, vsi->sw_base_vector); + if (vsi->base_vector) { + dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n", + vsi->vsi_num, vsi->base_vector); return -EEXIST; } if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) return -ENOENT; - switch (vsi->type) { - case ICE_VSI_PF: - num_q_vectors = vsi->num_q_vectors; - /* reserve slots from OS requested IRQs */ - vsi->sw_base_vector = ice_get_res(pf, pf->sw_irq_tracker, - num_q_vectors, vsi->idx); - if (vsi->sw_base_vector < 0) { - dev_err(&pf->pdev->dev, - "Failed to get tracking for %d SW vectors for VSI %d, err=%d\n", - num_q_vectors, vsi->vsi_num, - vsi->sw_base_vector); - return -ENOENT; - } - pf->num_avail_sw_msix -= num_q_vectors; - - /* reserve slots from HW interrupts */ - vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker, - num_q_vectors, vsi->idx); - break; - case ICE_VSI_VF: - /* take VF misc vector and data vectors into account */ - num_q_vectors = pf->num_vf_msix; - /* For VF VSI, reserve slots only from HW interrupts */ - vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker, - num_q_vectors, vsi->idx); - break; - default: - dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type); - break; - } - - if (vsi->hw_base_vector < 0) { + num_q_vectors = vsi->num_q_vectors; + /* reserve slots from OS requested IRQs */ + vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors, + vsi->idx); + if (vsi->base_vector < 0) { dev_err(&pf->pdev->dev, - "Failed to get tracking for %d HW vectors for VSI %d, err=%d\n", - num_q_vectors, vsi->vsi_num, vsi->hw_base_vector); - if (vsi->type != ICE_VSI_VF) { - ice_free_res(pf->sw_irq_tracker, - vsi->sw_base_vector, vsi->idx); - pf->num_avail_sw_msix += num_q_vectors; - } + "Failed to get tracking for %d vectors for VSI %d, err=%d\n", + num_q_vectors, vsi->vsi_num, vsi->base_vector); return -ENOENT; } - - pf->num_avail_hw_msix -= num_q_vectors; + pf->num_avail_sw_msix -= num_q_vectors; return 0; } @@ -1842,8 +1836,73 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector) } /** + * ice_cfg_txq_interrupt - configure interrupt on Tx queue + * @vsi: the VSI being configured + * @txq: Tx queue being mapped to MSI-X vector + * @msix_idx: MSI-X vector index within the function + * @itr_idx: ITR index of the interrupt cause + * + * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector + * within the function space. + */ +#ifdef CONFIG_PCI_IOV +void +ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) +#else +static void +ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx) +#endif /* CONFIG_PCI_IOV */ +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + + itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M; + + val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | + ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M); + + wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); +} + +/** + * ice_cfg_rxq_interrupt - configure interrupt on Rx queue + * @vsi: the VSI being configured + * @rxq: Rx queue being mapped to MSI-X vector + * @msix_idx: MSI-X vector index within the function + * @itr_idx: ITR index of the interrupt cause + * + * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector + * within the function space. + */ +#ifdef CONFIG_PCI_IOV +void +ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) +#else +static void +ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx) +#endif /* CONFIG_PCI_IOV */ +{ + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 val; + + itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M; + + val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | + ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M); + + wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + + ice_flush(hw); +} + +/** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured + * + * This configures MSIX mode interrupts for the PF VSI, and should not be used + * for the VF VSI. */ void ice_vsi_cfg_msix(struct ice_vsi *vsi) { @@ -1873,43 +1932,17 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) * tracked for this PF. */ for (q = 0; q < q_vector->num_ring_tx; q++) { - int itr_idx = (q_vector->tx.itr_idx << - QINT_TQCTL_ITR_INDX_S) & - QINT_TQCTL_ITR_INDX_M; - u32 val; - - if (vsi->type == ICE_VSI_VF) - val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | - (((i + 1) << QINT_TQCTL_MSIX_INDX_S) & - QINT_TQCTL_MSIX_INDX_M); - else - val = QINT_TQCTL_CAUSE_ENA_M | itr_idx | - ((reg_idx << QINT_TQCTL_MSIX_INDX_S) & - QINT_TQCTL_MSIX_INDX_M); - wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val); + ice_cfg_txq_interrupt(vsi, txq, reg_idx, + q_vector->tx.itr_idx); txq++; } for (q = 0; q < q_vector->num_ring_rx; q++) { - int itr_idx = (q_vector->rx.itr_idx << - QINT_RQCTL_ITR_INDX_S) & - QINT_RQCTL_ITR_INDX_M; - u32 val; - - if (vsi->type == ICE_VSI_VF) - val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | - (((i + 1) << QINT_RQCTL_MSIX_INDX_S) & - QINT_RQCTL_MSIX_INDX_M); - else - val = QINT_RQCTL_CAUSE_ENA_M | itr_idx | - ((reg_idx << QINT_RQCTL_MSIX_INDX_S) & - QINT_RQCTL_MSIX_INDX_M); - wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val); + ice_cfg_rxq_interrupt(vsi, rxq, reg_idx, + q_vector->rx.itr_idx); rxq++; } } - - ice_flush(hw); } /** @@ -2024,6 +2057,19 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi) } /** + * ice_trigger_sw_intr - trigger a software interrupt + * @hw: pointer to the HW structure + * @q_vector: interrupt vector to trigger the software interrupt for + */ +void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) +{ + wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx), + (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) | + GLINT_DYN_CTL_SWINT_TRIG_M | + GLINT_DYN_CTL_INTENA_M); +} + +/** * ice_vsi_stop_tx_rings - Disable Tx rings * @vsi: the VSI being configured * @rst_src: reset source @@ -2070,8 +2116,9 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, break; for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) { - if (!rings || !rings[q_idx] || - !rings[q_idx]->q_vector) { + struct ice_q_vector *q_vector; + + if (!rings || !rings[q_idx]) { err = -EINVAL; goto err_out; } @@ -2091,9 +2138,10 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, /* trigger a software interrupt for the vector * associated to the queue to schedule NAPI handler */ - wr32(hw, GLINT_DYN_CTL(rings[i]->q_vector->reg_idx), - GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_INTENA_MSK_M); + q_vector = rings[i]->q_vector; + if (q_vector) + ice_trigger_sw_intr(hw, q_vector); + q_idx++; } status = ice_dis_vsi_txq(vsi->port_info, vsi->idx, tc, @@ -2234,7 +2282,14 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi) goto clear_reg_idx; } - q_vector->reg_idx = q_vector->v_idx + vsi->hw_base_vector; + if (vsi->type == ICE_VSI_VF) { + struct ice_vf *vf = &vsi->back->vf[vsi->vf_id]; + + q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector); + } else { + q_vector->reg_idx = + q_vector->v_idx + vsi->base_vector; + } } return 0; @@ -2291,6 +2346,54 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule) } /** + * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling + * @vsi: the VSI being configured + * @tx: bool to determine Tx or Rx rule + * @create: bool to determine create or remove Rule + */ +void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create) +{ + struct ice_fltr_list_entry *list; + struct ice_pf *pf = vsi->back; + LIST_HEAD(tmp_add_list); + enum ice_status status; + + list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL); + if (!list) + return; + + list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE; + list->fltr_info.vsi_handle = vsi->idx; + list->fltr_info.l_data.ethertype_mac.ethertype = ETH_P_LLDP; + + if (tx) { + list->fltr_info.fltr_act = ICE_DROP_PACKET; + list->fltr_info.flag = ICE_FLTR_TX; + list->fltr_info.src_id = ICE_SRC_ID_VSI; + } else { + list->fltr_info.fltr_act = ICE_FWD_TO_VSI; + list->fltr_info.flag = ICE_FLTR_RX; + list->fltr_info.src_id = ICE_SRC_ID_LPORT; + } + + INIT_LIST_HEAD(&list->list_entry); + list_add(&list->list_entry, &tmp_add_list); + + if (create) + status = ice_add_eth_mac(&pf->hw, &tmp_add_list); + else + status = ice_remove_eth_mac(&pf->hw, &tmp_add_list); + + if (status) + dev_err(&pf->pdev->dev, + "Fail %s %s LLDP rule on VSI %i error: %d\n", + create ? "adding" : "removing", tx ? "TX" : "RX", + vsi->vsi_num, status); + + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); +} + +/** * ice_vsi_setup - Set up a VSI by a given type * @pf: board private structure * @pi: pointer to the port_info instance @@ -2310,6 +2413,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct device *dev = &pf->pdev->dev; + enum ice_status status; struct ice_vsi *vsi; int ret, i; @@ -2389,23 +2493,24 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_alloc_q_vector; - /* Setup Vector base only during VF init phase or when VF asks - * for more vectors than assigned number. In all other cases, - * assign hw_base_vector to the value given earlier. - */ - if (test_bit(ICE_VF_STATE_CFG_INTR, pf->vf[vf_id].vf_states)) { - ret = ice_vsi_setup_vector_base(vsi); - if (ret) - goto unroll_vector_base; - } else { - vsi->hw_base_vector = pf->vf[vf_id].first_vector_idx; - } ret = ice_vsi_set_q_vectors_reg_idx(vsi); if (ret) goto unroll_vector_base; pf->q_left_tx -= vsi->alloc_txq; pf->q_left_rx -= vsi->alloc_rxq; + + /* Do not exit if configuring RSS had an issue, at least + * receive traffic on first queue. Hence no need to capture + * return value + */ + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) + ice_vsi_cfg_rss_lut_key(vsi); + break; + case ICE_VSI_LB: + ret = ice_vsi_alloc_rings(vsi); + if (ret) + goto unroll_vsi_init; break; default: /* clean up the resources and exit */ @@ -2416,12 +2521,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, for (i = 0; i < vsi->tc_cfg.numtc; i++) max_txqs[i] = pf->num_lan_tx; - ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); - if (ret) { + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, + max_txqs); + if (status) { dev_err(&pf->pdev->dev, "VSI %d failed lan queue config, error %d\n", - vsi->vsi_num, ret); + vsi->vsi_num, status); goto unroll_vector_base; } @@ -2430,19 +2535,28 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, * out PAUSE or PFC frames. If enabled, FW can still send FC frames. * The rule is added once for PF VSI in order to create appropriate * recipe, since VSI/VSI list is ignored with drop action... + * Also add rules to handle LLDP Tx and Rx packets. Tx LLDP packets + * need to be dropped so that VFs cannot send LLDP packets to reconfig + * DCB settings in the HW. Also, if the FW DCBX engine is not running + * then Rx LLDP packets need to be redirected up the stack. */ - if (vsi->type == ICE_VSI_PF) + if (vsi->type == ICE_VSI_PF) { ice_vsi_add_rem_eth_mac(vsi, true); + /* Tx LLDP packets */ + ice_cfg_sw_lldp(vsi, true, true); + + /* Rx LLDP packets */ + if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) + ice_cfg_sw_lldp(vsi, false, true); + } + return vsi; unroll_vector_base: /* reclaim SW interrupts back to the common pool */ - ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx); + ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; - /* reclaim HW interrupt back to the common pool */ - ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx); - pf->num_avail_hw_msix += vsi->num_q_vectors; unroll_alloc_q_vector: ice_vsi_free_q_vectors(vsi); unroll_vsi_init: @@ -2463,17 +2577,17 @@ unroll_get_qs: static void ice_vsi_release_msix(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; - u16 vector = vsi->hw_base_vector; struct ice_hw *hw = &pf->hw; u32 txq = 0; u32 rxq = 0; int i, q; - for (i = 0; i < vsi->num_q_vectors; i++, vector++) { + for (i = 0; i < vsi->num_q_vectors; i++) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; + u16 reg_idx = q_vector->reg_idx; - wr32(hw, GLINT_ITR(ICE_IDX_ITR0, vector), 0); - wr32(hw, GLINT_ITR(ICE_IDX_ITR1, vector), 0); + wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0); + wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0); for (q = 0; q < q_vector->num_ring_tx; q++) { wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0); txq++; @@ -2495,7 +2609,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) void ice_vsi_free_irq(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; - int base = vsi->sw_base_vector; + int base = vsi->base_vector; if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) { int i; @@ -2591,11 +2705,11 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) int count = 0; int i; - if (!res || index >= res->num_entries) + if (!res || index >= res->end) return -EINVAL; id |= ICE_RES_VALID_BIT; - for (i = index; i < res->num_entries && res->list[i] == id; i++) { + for (i = index; i < res->end && res->list[i] == id; i++) { res->list[i] = 0; count++; } @@ -2613,10 +2727,9 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id) */ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) { - int start = res->search_hint; - int end = start; + int start = 0, end = 0; - if ((start + needed) > res->num_entries) + if (needed > res->end) return -ENOMEM; id |= ICE_RES_VALID_BIT; @@ -2625,7 +2738,7 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) /* skip already allocated entries */ if (res->list[end++] & ICE_RES_VALID_BIT) { start = end; - if ((start + needed) > res->num_entries) + if ((start + needed) > res->end) break; } @@ -2636,13 +2749,9 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) while (i != end) res->list[i++] = id; - if (end == res->num_entries) - end = 0; - - res->search_hint = end; return start; } - } while (1); + } while (end < res->end); return -ENOMEM; } @@ -2654,16 +2763,11 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id) * @needed: size of the block needed * @id: identifier to track owner * - * Returns the base item index of the block, or -ENOMEM for error - * The search_hint trick and lack of advanced fit-finding only works - * because we're highly likely to have all the same sized requests. - * Linear search time and any fragmentation should be minimal. + * Returns the base item index of the block, or negative for error */ int ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) { - int ret; - if (!res || !pf) return -EINVAL; @@ -2674,16 +2778,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) return -EINVAL; } - /* search based on search_hint */ - ret = ice_search_res(res, needed, id); - - if (ret < 0) { - /* previous search failed. Reset search hint and try again */ - res->search_hint = 0; - ret = ice_search_res(res, needed, id); - } - - return ret; + return ice_search_res(res, needed, id); } /** @@ -2692,7 +2787,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id) */ void ice_vsi_dis_irq(struct ice_vsi *vsi) { - int base = vsi->sw_base_vector; + int base = vsi->base_vector; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; u32 val; @@ -2738,6 +2833,21 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) } /** + * ice_napi_del - Remove NAPI handler for the VSI + * @vsi: VSI for which NAPI handler is to be removed + */ +void ice_napi_del(struct ice_vsi *vsi) +{ + int v_idx; + + if (!vsi->netdev) + return; + + ice_for_each_q_vector(vsi, v_idx) + netif_napi_del(&vsi->q_vectors[v_idx]->napi); +} + +/** * ice_vsi_release - Delete a VSI and free its resources * @vsi: the VSI being removed * @@ -2745,60 +2855,61 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi) */ int ice_vsi_release(struct ice_vsi *vsi) { - struct ice_vf *vf = NULL; struct ice_pf *pf; if (!vsi->back) return -ENODEV; pf = vsi->back; - if (vsi->type == ICE_VSI_VF) - vf = &pf->vf[vsi->vf_id]; - /* do not unregister and free netdevs while driver is in the reset - * recovery pending state. Since reset/rebuild happens through PF - * service task workqueue, its not a good idea to unregister netdev - * that is associated to the PF that is running the work queue items - * currently. This is done to avoid check_flush_dependency() warning - * on this wq + /* do not unregister while driver is in the reset recovery pending + * state. Since reset/rebuild happens through PF service task workqueue, + * it's not a good idea to unregister netdev that is associated to the + * PF that is running the work queue items currently. This is done to + * avoid check_flush_dependency() warning on this wq */ - if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) { - ice_napi_del(vsi); + if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) unregister_netdev(vsi->netdev); - free_netdev(vsi->netdev); - vsi->netdev = NULL; - } if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); /* Disable VSI and free resources */ - ice_vsi_dis_irq(vsi); + if (vsi->type != ICE_VSI_LB) + ice_vsi_dis_irq(vsi); ice_vsi_close(vsi); - /* reclaim interrupt vectors back to PF */ + /* SR-IOV determines needed MSIX resources all at once instead of per + * VSI since when VFs are spawned we know how many VFs there are and how + * many interrupts each VF needs. SR-IOV MSIX resources are also + * cleared in the same manner. + */ if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ - ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx); + ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; - /* reclaim HW interrupts back to the common pool */ - ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx); - pf->num_avail_hw_msix += vsi->num_q_vectors; - } else if (test_bit(ICE_VF_STATE_CFG_INTR, vf->vf_states)) { - /* Reclaim VF resources back only while freeing all VFs or - * vector reassignment is requested - */ - ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx, - vsi->idx); - pf->num_avail_hw_msix += pf->num_vf_msix; } - if (vsi->type == ICE_VSI_PF) + if (vsi->type == ICE_VSI_PF) { ice_vsi_add_rem_eth_mac(vsi, false); + ice_cfg_sw_lldp(vsi, true, false); + /* The Rx rule will only exist to remove if the LLDP FW + * engine is currently stopped + */ + if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) + ice_cfg_sw_lldp(vsi, false, false); + } ice_remove_vsi_fltr(&pf->hw, vsi->idx); ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_delete(vsi); ice_vsi_free_q_vectors(vsi); + + /* make sure unregister_netdev() was called by checking __ICE_DOWN */ + if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) { + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } + ice_vsi_clear_rings(vsi); ice_vsi_put_qs(vsi); @@ -2825,6 +2936,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_vf *vf = NULL; + enum ice_status status; struct ice_pf *pf; int ret, i; @@ -2838,24 +2950,17 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ice_vsi_free_q_vectors(vsi); + /* SR-IOV determines needed MSIX resources all at once instead of per + * VSI since when VFs are spawned we know how many VFs there are and how + * many interrupts each VF needs. SR-IOV MSIX resources are also + * cleared in the same manner. + */ if (vsi->type != ICE_VSI_VF) { /* reclaim SW interrupts back to the common pool */ - ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx); + ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx); pf->num_avail_sw_msix += vsi->num_q_vectors; - vsi->sw_base_vector = 0; - /* reclaim HW interrupts back to the common pool */ - ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, - vsi->idx); - pf->num_avail_hw_msix += vsi->num_q_vectors; - } else { - /* Reclaim VF resources back to the common pool for reset and - * and rebuild, with vector reassignment - */ - ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx, - vsi->idx); - pf->num_avail_hw_msix += pf->num_vf_msix; + vsi->base_vector = 0; } - vsi->hw_base_vector = 0; ice_vsi_clear_rings(vsi); ice_vsi_free_arrays(vsi); @@ -2881,10 +2986,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) if (ret) goto err_rings; - ret = ice_vsi_setup_vector_base(vsi); - if (ret) - goto err_vectors; - ret = ice_vsi_set_q_vectors_reg_idx(vsi); if (ret) goto err_vectors; @@ -2929,12 +3030,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi) for (i = 0; i < vsi->tc_cfg.numtc; i++) max_txqs[i] = pf->num_lan_tx; - ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); - if (ret) { + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, + max_txqs); + if (status) { dev_err(&pf->pdev->dev, "VSI %d failed lan queue config, error %d\n", - vsi->vsi_num, ret); + vsi->vsi_num, status); goto err_vectors; } return 0; @@ -2956,7 +3057,7 @@ err_vsi: /** * ice_is_reset_in_progress - check for a reset in progress - * @state: pf state field + * @state: PF state field */ bool ice_is_reset_in_progress(unsigned long *state) { diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index a91d3553cc89..6e43ef03bfc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -19,6 +19,14 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi); void ice_vsi_cfg_msix(struct ice_vsi *vsi); +#ifdef CONFIG_PCI_IOV +void +ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx); + +void +ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); +#endif /* CONFIG_PCI_IOV */ + int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid); int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid); @@ -37,6 +45,8 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); +void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); + void ice_vsi_delete(struct ice_vsi *vsi); int ice_vsi_clear(struct ice_vsi *vsi); @@ -49,6 +59,8 @@ struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, enum ice_vsi_type type, u16 vf_id); +void ice_napi_del(struct ice_vsi *vsi); + int ice_vsi_release(struct ice_vsi *vsi); void ice_vsi_close(struct ice_vsi *vsi); @@ -64,6 +76,8 @@ bool ice_is_reset_in_progress(unsigned long *state); void ice_vsi_free_q_vectors(struct ice_vsi *vsi); +void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); + void ice_vsi_put_qs(struct ice_vsi *vsi); #ifdef CONFIG_DCB diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 7843abf4d44d..28ec0d57941d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -61,9 +61,10 @@ static u32 ice_get_tx_pending(struct ice_ring *ring) static void ice_check_for_hang_subtask(struct ice_pf *pf) { struct ice_vsi *vsi = NULL; + struct ice_hw *hw; unsigned int i; - u32 v, v_idx; int packets; + u32 v; ice_for_each_vsi(pf, v) if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) { @@ -77,12 +78,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) if (!(vsi->netdev && netif_carrier_ok(vsi->netdev))) return; + hw = &vsi->back->hw; + for (i = 0; i < vsi->num_txq; i++) { struct ice_ring *tx_ring = vsi->tx_rings[i]; if (tx_ring && tx_ring->desc) { - int itr = ICE_ITR_NONE; - /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this * queue. @@ -93,12 +94,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) packets = tx_ring->stats.pkts & INT_MAX; if (tx_ring->tx_stats.prev_pkt == packets) { /* Trigger sw interrupt to revive the queue */ - v_idx = tx_ring->q_vector->v_idx; - wr32(&vsi->back->hw, - GLINT_DYN_CTL(vsi->hw_base_vector + v_idx), - (itr << GLINT_DYN_CTL_ITR_INDX_S) | - GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_INTENA_MSK_M); + ice_trigger_sw_intr(hw, tx_ring->q_vector); continue; } @@ -113,6 +109,67 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) } /** + * ice_init_mac_fltr - Set initial MAC filters + * @pf: board private structure + * + * Set initial set of MAC filters for PF VSI; configure filters for permanent + * address and broadcast address. If an error is encountered, netdevice will be + * unregistered. + */ +static int ice_init_mac_fltr(struct ice_pf *pf) +{ + LIST_HEAD(tmp_add_list); + u8 broadcast[ETH_ALEN]; + struct ice_vsi *vsi; + int status; + + vsi = ice_find_vsi_by_type(pf, ICE_VSI_PF); + if (!vsi) + return -EINVAL; + + /* To add a MAC filter, first add the MAC to a list and then + * pass the list to ice_add_mac. + */ + + /* Add a unicast MAC filter so the VSI can get its packets */ + status = ice_add_mac_to_list(vsi, &tmp_add_list, + vsi->port_info->mac.perm_addr); + if (status) + goto unregister; + + /* VSI needs to receive broadcast traffic, so add the broadcast + * MAC address to the list as well. + */ + eth_broadcast_addr(broadcast); + status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast); + if (status) + goto free_mac_list; + + /* Program MAC filters for entries in tmp_add_list */ + status = ice_add_mac(&pf->hw, &tmp_add_list); + if (status) + status = -ENOMEM; + +free_mac_list: + ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); + +unregister: + /* We aren't useful with no MAC filters, so unregister if we + * had an error + */ + if (status && vsi->netdev->reg_state == NETREG_REGISTERED) { + dev_err(&pf->pdev->dev, + "Could not add MAC filters error %d. Unregistering device\n", + status); + unregister_netdev(vsi->netdev); + free_netdev(vsi->netdev); + vsi->netdev = NULL; + } + + return status; +} + +/** * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced * @netdev: the net device on which the sync is happening * @addr: MAC address to sync @@ -567,7 +624,11 @@ static void ice_reset_subtask(struct ice_pf *pf) */ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) { + struct ice_aqc_get_phy_caps_data *caps; + enum ice_status status; + const char *fec_req; const char *speed; + const char *fec; const char *fc; if (!vsi) @@ -584,6 +645,12 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) } switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_100GB: + speed = "100 G"; + break; + case ICE_AQ_LINK_SPEED_50GB: + speed = "50 G"; + break; case ICE_AQ_LINK_SPEED_40GB: speed = "40 G"; break; @@ -615,13 +682,13 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) switch (vsi->port_info->fc.current_mode) { case ICE_FC_FULL: - fc = "RX/TX"; + fc = "Rx/Tx"; break; case ICE_FC_TX_PAUSE: - fc = "TX"; + fc = "Tx"; break; case ICE_FC_RX_PAUSE: - fc = "RX"; + fc = "Rx"; break; case ICE_FC_NONE: fc = "None"; @@ -631,8 +698,47 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) break; } - netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n", - speed, fc); + /* Get FEC mode based on negotiated link info */ + switch (vsi->port_info->phy.link_info.fec_info) { + case ICE_AQ_LINK_25G_RS_528_FEC_EN: + /* fall through */ + case ICE_AQ_LINK_25G_RS_544_FEC_EN: + fec = "RS-FEC"; + break; + case ICE_AQ_LINK_25G_KR_FEC_EN: + fec = "FC-FEC/BASE-R"; + break; + default: + fec = "NONE"; + break; + } + + /* Get FEC mode requested based on PHY caps last SW configuration */ + caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL); + if (!caps) { + fec_req = "Unknown"; + goto done; + } + + status = ice_aq_get_phy_caps(vsi->port_info, false, + ICE_AQC_REPORT_SW_CFG, caps, NULL); + if (status) + netdev_info(vsi->netdev, "Get phy capability failed.\n"); + + if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ) + fec_req = "RS-FEC"; + else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ || + caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ) + fec_req = "FC-FEC/BASE-R"; + else + fec_req = "NONE"; + + devm_kfree(&vsi->back->pdev->dev, caps); + +done: + netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Flow Control: %s\n", + speed, fec_req, fec, fc); } /** @@ -664,7 +770,7 @@ static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up) /** * ice_link_event - process the link event - * @pf: pf that the link event is associated with + * @pf: PF that the link event is associated with * @pi: port_info for the port that the link event is associated with * @link_up: true if the physical link is up and false if it is down * @link_speed: current link speed received from the link event @@ -774,7 +880,7 @@ static int ice_init_link_events(struct ice_port_info *pi) /** * ice_handle_link_event - handle link event via ARQ - * @pf: pf that the link event is associated with + * @pf: PF that the link event is associated with * @event: event structure containing link status info */ static int @@ -1161,16 +1267,16 @@ static void ice_handle_mdd_event(struct ice_pf *pf) } } - /* see if one of the VFs needs to be reset */ - for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) { + /* check to see if one of the VFs caused the MDD */ + for (i = 0; i < pf->num_alloc_vfs; i++) { struct ice_vf *vf = &pf->vf[i]; - mdd_detected = false; + bool vf_mdd_detected = false; reg = rd32(hw, VP_MDET_TX_PQM(i)); if (reg & VP_MDET_TX_PQM_VALID_M) { wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); - mdd_detected = true; + vf_mdd_detected = true; dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", i); } @@ -1178,7 +1284,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) reg = rd32(hw, VP_MDET_TX_TCLAN(i)); if (reg & VP_MDET_TX_TCLAN_VALID_M) { wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); - mdd_detected = true; + vf_mdd_detected = true; dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", i); } @@ -1186,7 +1292,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf) reg = rd32(hw, VP_MDET_TX_TDPU(i)); if (reg & VP_MDET_TX_TDPU_VALID_M) { wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); - mdd_detected = true; + vf_mdd_detected = true; dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n", i); } @@ -1194,19 +1300,18 @@ static void ice_handle_mdd_event(struct ice_pf *pf) reg = rd32(hw, VP_MDET_RX(i)); if (reg & VP_MDET_RX_VALID_M) { wr32(hw, VP_MDET_RX(i), 0xFFFF); - mdd_detected = true; + vf_mdd_detected = true; dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n", i); } - if (mdd_detected) { + if (vf_mdd_detected) { vf->num_mdd_events++; - dev_info(&pf->pdev->dev, - "Use PF Control I/F to re-enable the VF\n"); - set_bit(ICE_VF_STATE_DIS, vf->vf_states); + if (vf->num_mdd_events > 1) + dev_info(&pf->pdev->dev, "VF %d has had %llu MDD events since last boot\n", + i, vf->num_mdd_events); } } - } /** @@ -1327,7 +1432,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) { int q_vectors = vsi->num_q_vectors; struct ice_pf *pf = vsi->back; - int base = vsi->sw_base_vector; + int base = vsi->base_vector; int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; @@ -1408,7 +1513,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf) wr32(hw, PFINT_OICR_ENA, val); /* SW_ITR_IDX = 0, but don't change INTENA */ - wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx), + wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M); } @@ -1430,6 +1535,11 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data) oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); + if (oicr & PFINT_OICR_SWINT_M) { + ena_mask &= ~PFINT_OICR_SWINT_M; + pf->sw_int_count++; + } + if (oicr & PFINT_OICR_MAL_DETECT_M) { ena_mask &= ~PFINT_OICR_MAL_DETECT_M; set_bit(__ICE_MDD_EVENT_PENDING, pf->state); @@ -1556,15 +1666,13 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf) ice_flush(hw); if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) { - synchronize_irq(pf->msix_entries[pf->sw_oicr_idx].vector); + synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); devm_free_irq(&pf->pdev->dev, - pf->msix_entries[pf->sw_oicr_idx].vector, pf); + pf->msix_entries[pf->oicr_idx].vector, pf); } pf->num_avail_sw_msix += 1; - ice_free_res(pf->sw_irq_tracker, pf->sw_oicr_idx, ICE_RES_MISC_VEC_ID); - pf->num_avail_hw_msix += 1; - ice_free_res(pf->hw_irq_tracker, pf->hw_oicr_idx, ICE_RES_MISC_VEC_ID); + ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID); } /** @@ -1618,43 +1726,31 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf) if (ice_is_reset_in_progress(pf->state)) goto skip_req_irq; - /* reserve one vector in sw_irq_tracker for misc interrupts */ - oicr_idx = ice_get_res(pf, pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); + /* reserve one vector in irq_tracker for misc interrupts */ + oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); if (oicr_idx < 0) return oicr_idx; pf->num_avail_sw_msix -= 1; - pf->sw_oicr_idx = oicr_idx; - - /* reserve one vector in hw_irq_tracker for misc interrupts */ - oicr_idx = ice_get_res(pf, pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); - if (oicr_idx < 0) { - ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); - pf->num_avail_sw_msix += 1; - return oicr_idx; - } - pf->num_avail_hw_msix -= 1; - pf->hw_oicr_idx = oicr_idx; + pf->oicr_idx = oicr_idx; err = devm_request_irq(&pf->pdev->dev, - pf->msix_entries[pf->sw_oicr_idx].vector, + pf->msix_entries[pf->oicr_idx].vector, ice_misc_intr, 0, pf->int_name, pf); if (err) { dev_err(&pf->pdev->dev, "devm_request_irq for %s failed: %d\n", pf->int_name, err); - ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); + ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID); pf->num_avail_sw_msix += 1; - ice_free_res(pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID); - pf->num_avail_hw_msix += 1; return err; } skip_req_irq: ice_ena_misc_vector(pf); - ice_ena_ctrlq_interrupts(hw, pf->hw_oicr_idx); - wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx), + ice_ena_ctrlq_interrupts(hw, pf->oicr_idx); + wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx), ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S); ice_flush(hw); @@ -1664,21 +1760,6 @@ skip_req_irq: } /** - * ice_napi_del - Remove NAPI handler for the VSI - * @vsi: VSI for which NAPI handler is to be removed - */ -void ice_napi_del(struct ice_vsi *vsi) -{ - int v_idx; - - if (!vsi->netdev) - return; - - ice_for_each_q_vector(vsi, v_idx) - netif_napi_del(&vsi->q_vectors[v_idx]->napi); -} - -/** * ice_napi_add - register NAPI handler for the VSI * @vsi: VSI for which NAPI handler is to be registered * @@ -1803,8 +1884,8 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) * @pf: board private structure * @pi: pointer to the port_info instance * - * Returns pointer to the successfully allocated VSI sw struct on success, - * otherwise returns NULL on failure. + * Returns pointer to the successfully allocated VSI software struct + * on success, otherwise returns NULL on failure. */ static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) @@ -1813,6 +1894,20 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) } /** + * ice_lb_vsi_setup - Set up a loopback VSI + * @pf: board private structure + * @pi: pointer to the port_info instance + * + * Returns pointer to the successfully allocated VSI software struct + * on success, otherwise returns NULL on failure. + */ +struct ice_vsi * +ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) +{ + return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID); +} + +/** * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload * @netdev: network interface to be adjusted * @proto: unused protocol @@ -1900,8 +1995,6 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, */ static int ice_setup_pf_sw(struct ice_pf *pf) { - LIST_HEAD(tmp_add_list); - u8 broadcast[ETH_ALEN]; struct ice_vsi *vsi; int status = 0; @@ -1926,38 +2019,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf) */ ice_napi_add(vsi); - /* To add a MAC filter, first add the MAC to a list and then - * pass the list to ice_add_mac. - */ - - /* Add a unicast MAC filter so the VSI can get its packets */ - status = ice_add_mac_to_list(vsi, &tmp_add_list, - vsi->port_info->mac.perm_addr); + status = ice_init_mac_fltr(pf); if (status) goto unroll_napi_add; - /* VSI needs to receive broadcast traffic, so add the broadcast - * MAC address to the list as well. - */ - eth_broadcast_addr(broadcast); - status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast); - if (status) - goto free_mac_list; - - /* program MAC filters for entries in tmp_add_list */ - status = ice_add_mac(&pf->hw, &tmp_add_list); - if (status) { - dev_err(&pf->pdev->dev, "Could not add MAC filters\n"); - status = -ENOMEM; - goto free_mac_list; - } - - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); return status; -free_mac_list: - ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list); - unroll_napi_add: if (vsi) { ice_napi_del(vsi); @@ -2149,14 +2216,9 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf) if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) ice_dis_msix(pf); - if (pf->sw_irq_tracker) { - devm_kfree(&pf->pdev->dev, pf->sw_irq_tracker); - pf->sw_irq_tracker = NULL; - } - - if (pf->hw_irq_tracker) { - devm_kfree(&pf->pdev->dev, pf->hw_irq_tracker); - pf->hw_irq_tracker = NULL; + if (pf->irq_tracker) { + devm_kfree(&pf->pdev->dev, pf->irq_tracker); + pf->irq_tracker = NULL; } } @@ -2166,7 +2228,7 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf) */ static int ice_init_interrupt_scheme(struct ice_pf *pf) { - int vectors = 0, hw_vectors = 0; + int vectors; if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) vectors = ice_ena_msix_range(pf); @@ -2177,31 +2239,18 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf) return vectors; /* set up vector assignment tracking */ - pf->sw_irq_tracker = - devm_kzalloc(&pf->pdev->dev, sizeof(*pf->sw_irq_tracker) + + pf->irq_tracker = + devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) + (sizeof(u16) * vectors), GFP_KERNEL); - if (!pf->sw_irq_tracker) { + if (!pf->irq_tracker) { ice_dis_msix(pf); return -ENOMEM; } /* populate SW interrupts pool with number of OS granted IRQs. */ pf->num_avail_sw_msix = vectors; - pf->sw_irq_tracker->num_entries = vectors; - - /* set up HW vector assignment tracking */ - hw_vectors = pf->hw.func_caps.common_cap.num_msix_vectors; - pf->hw_irq_tracker = - devm_kzalloc(&pf->pdev->dev, sizeof(*pf->hw_irq_tracker) + - (sizeof(u16) * hw_vectors), GFP_KERNEL); - if (!pf->hw_irq_tracker) { - ice_clear_interrupt_scheme(pf); - return -ENOMEM; - } - - /* populate HW interrupts pool with number of HW supported irqs. */ - pf->num_avail_hw_msix = hw_vectors; - pf->hw_irq_tracker->num_entries = hw_vectors; + pf->irq_tracker->num_entries = vectors; + pf->irq_tracker->end = pf->irq_tracker->num_entries; return 0; } @@ -2252,7 +2301,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) if (!pf) return -ENOMEM; - /* set up for high or low dma */ + /* set up for high or low DMA */ err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (err) err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); @@ -2302,7 +2351,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_init_pf(pf); - err = ice_init_pf_dcb(pf); + err = ice_init_pf_dcb(pf, false); if (err) { clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); clear_bit(ICE_FLAG_DCB_ENA, pf->flags); @@ -2368,7 +2417,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) err = ice_setup_pf_sw(pf); if (err) { - dev_err(dev, "probe failed due to setup pf switch:%d\n", err); + dev_err(dev, "probe failed due to setup PF switch:%d\n", err); goto err_alloc_sw_unroll; } @@ -2625,7 +2674,7 @@ static int __init ice_module_init(void) status = pci_register_driver(&ice_driver); if (status) { - pr_err("failed to register pci driver, err %d\n", status); + pr_err("failed to register PCI driver, err %d\n", status); destroy_workqueue(ice_wq); } @@ -2725,21 +2774,21 @@ free_lists: ice_free_fltr_list(&pf->pdev->dev, &a_mac_list); if (err) { - netdev_err(netdev, "can't set mac %pM. filter update failed\n", + netdev_err(netdev, "can't set MAC %pM. filter update failed\n", mac); return err; } /* change the netdev's MAC address */ memcpy(netdev->dev_addr, mac, netdev->addr_len); - netdev_dbg(vsi->netdev, "updated mac address to %pM\n", + netdev_dbg(vsi->netdev, "updated MAC address to %pM\n", netdev->dev_addr); /* write new MAC address to the firmware */ flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL; status = ice_aq_manage_mac_write(hw, mac, flags, NULL); if (status) { - netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n", + netdev_err(netdev, "can't set MAC %pM. write to firmware failed.\n", mac); } return 0; @@ -2876,6 +2925,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) (netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) ret = ice_vsi_manage_vlan_insertion(vsi); + if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) && + !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + ret = ice_cfg_vlan_pruning(vsi, true, false); + else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) && + (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + ret = ice_cfg_vlan_pruning(vsi, false, false); + return ret; } @@ -2901,7 +2957,7 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi) * * Return 0 on success and negative value on error */ -static int ice_vsi_cfg(struct ice_vsi *vsi) +int ice_vsi_cfg(struct ice_vsi *vsi) { int err; @@ -2933,7 +2989,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) if (!vsi->netdev) return; - ice_for_each_q_vector(vsi, q_idx) { + ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; if (q_vector->rx.ring || q_vector->tx.ring) @@ -3456,7 +3512,7 @@ int ice_down(struct ice_vsi *vsi) * * Return 0 on success, negative on failure */ -static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) +int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) { int i, err = 0; @@ -3482,7 +3538,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) * * Return 0 on success, negative on failure */ -static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) +int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) { int i, err = 0; @@ -3658,7 +3714,7 @@ static int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked) } /** - * ice_vsi_rebuild_all - rebuild all VSIs in pf + * ice_vsi_rebuild_all - rebuild all VSIs in PF * @pf: the PF */ static int ice_vsi_rebuild_all(struct ice_pf *pf) @@ -3728,7 +3784,7 @@ static int ice_vsi_replay_all(struct ice_pf *pf) /** * ice_rebuild - rebuild after reset - * @pf: pf to rebuild + * @pf: PF to rebuild */ static void ice_rebuild(struct ice_pf *pf) { @@ -3740,7 +3796,7 @@ static void ice_rebuild(struct ice_pf *pf) if (test_bit(__ICE_DOWN, pf->state)) goto clear_recovery; - dev_dbg(dev, "rebuilding pf\n"); + dev_dbg(dev, "rebuilding PF\n"); ret = ice_init_all_ctrlq(hw); if (ret) { @@ -3768,12 +3824,6 @@ static void ice_rebuild(struct ice_pf *pf) ice_dcb_rebuild(pf); - /* reset search_hint of irq_trackers to 0 since interrupts are - * reclaimed and could be allocated from beginning during VSI rebuild - */ - pf->sw_irq_tracker->search_hint = 0; - pf->hw_irq_tracker->search_hint = 0; - err = ice_vsi_rebuild_all(pf); if (err) { dev_err(dev, "ice_vsi_rebuild_all failed\n"); @@ -3857,16 +3907,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) u8 count = 0; if (new_mtu == netdev->mtu) { - netdev_warn(netdev, "mtu is already %u\n", netdev->mtu); + netdev_warn(netdev, "MTU is already %u\n", netdev->mtu); return 0; } if (new_mtu < netdev->min_mtu) { - netdev_err(netdev, "new mtu invalid. min_mtu is %d\n", + netdev_err(netdev, "new MTU invalid. min_mtu is %d\n", netdev->min_mtu); return -EINVAL; } else if (new_mtu > netdev->max_mtu) { - netdev_err(netdev, "new mtu invalid. max_mtu is %d\n", + netdev_err(netdev, "new MTU invalid. max_mtu is %d\n", netdev->min_mtu); return -EINVAL; } @@ -3882,7 +3932,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) } while (count < 100); if (count == 100) { - netdev_err(netdev, "can't change mtu. Device is busy\n"); + netdev_err(netdev, "can't change MTU. Device is busy\n"); return -EBUSY; } @@ -3894,18 +3944,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu) err = ice_down(vsi); if (err) { - netdev_err(netdev, "change mtu if_up err %d\n", err); + netdev_err(netdev, "change MTU if_up err %d\n", err); return err; } err = ice_up(vsi); if (err) { - netdev_err(netdev, "change mtu if_up err %d\n", err); + netdev_err(netdev, "change MTU if_up err %d\n", err); return err; } } - netdev_dbg(netdev, "changed mtu to %d\n", new_mtu); + netdev_info(netdev, "changed MTU to %d\n", new_mtu); return 0; } @@ -4241,7 +4291,7 @@ static void ice_tx_timeout(struct net_device *netdev) * * Returns 0 on success, negative value on failure */ -static int ice_open(struct net_device *netdev) +int ice_open(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; @@ -4278,7 +4328,7 @@ static int ice_open(struct net_device *netdev) * * Returns success only - not allowed to fail */ -static int ice_stop(struct net_device *netdev) +int ice_stop(struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index 62571d33d0d6..bcb431f1bd92 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -119,7 +119,7 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data) status = ice_read_sr_aq(hw, offset, 1, data, true); if (!status) - *data = le16_to_cpu(*(__le16 *)data); + *data = le16_to_cpu(*(__force __le16 *)data); return status; } @@ -174,7 +174,7 @@ ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data) } while (words_read < *words); for (i = 0; i < *words; i++) - data[i] = le16_to_cpu(((__le16 *)data)[i]); + data[i] = le16_to_cpu(((__force __le16 *)data)[i]); read_nvm_buf_aq_exit: *words = words_read; @@ -316,3 +316,34 @@ ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data) return status; } + +/** + * ice_nvm_validate_checksum + * @hw: pointer to the HW struct + * + * Verify NVM PFA checksum validity (0x0706) + */ +enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw) +{ + struct ice_aqc_nvm_checksum *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) + return status; + + cmd = &desc.params.nvm_checksum; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum); + cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + ice_release_nvm(hw); + + if (!status) + if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT) + status = ICE_ERR_NVM_CHECKSUM; + + return status; +} diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 8d49f83be7a5..2a232504379d 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -683,10 +683,10 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, u16 i, num_groups_added = 0; enum ice_status status = 0; struct ice_hw *hw = pi->hw; - u16 buf_size; + size_t buf_size; u32 teid; - buf_size = sizeof(*buf) + sizeof(*buf->generic) * (num_nodes - 1); + buf_size = struct_size(buf, generic, num_nodes - 1); buf = devm_kzalloc(ice_hw_to_dev(hw), buf_size, GFP_KERNEL); if (!buf) return ICE_ERR_NO_MEMORY; diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h index 17afe6acb18a..c01597885629 100644 --- a/drivers/net/ethernet/intel/ice/ice_status.h +++ b/drivers/net/ethernet/intel/ice/ice_status.h @@ -26,6 +26,7 @@ enum ice_status { ICE_ERR_IN_USE = -16, ICE_ERR_MAX_LIMIT = -17, ICE_ERR_RESET_ONGOING = -18, + ICE_ERR_NVM_CHECKSUM = -51, ICE_ERR_BUF_TOO_SHORT = -52, ICE_ERR_NVM_BLANK_MODE = -53, ICE_ERR_AQ_ERROR = -100, diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 9f1f595ae7e6..8271fd651725 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -799,7 +799,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, daddr = f_info->l_data.ethertype_mac.mac_addr; /* fall-through */ case ICE_SW_LKUP_ETHERTYPE: - off = (__be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET); + off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET); *off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype); break; case ICE_SW_LKUP_MAC_VLAN: @@ -829,7 +829,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info, ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr); if (!(vlan_id > ICE_MAX_VLAN_ID)) { - off = (__be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET); + off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET); *off = cpu_to_be16(vlan_id); } @@ -1973,6 +1973,10 @@ ice_add_vlan(struct ice_hw *hw, struct list_head *v_list) * ice_add_eth_mac - Add ethertype and MAC based filter rule * @hw: pointer to the hardware structure * @em_list: list of ether type MAC filter, MAC is optional + * + * This function requires the caller to populate the entries in + * the filter list with the necessary fields (including flags to + * indicate Tx or Rx rules). */ enum ice_status ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list) @@ -1990,7 +1994,6 @@ ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list) l_type != ICE_SW_LKUP_ETHERTYPE) return ICE_ERR_PARAM; - em_list_itr->fltr_info.flag = ICE_FLTR_TX; em_list_itr->status = ice_add_rule_internal(hw, l_type, em_list_itr); if (em_list_itr->status) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index 732b0b9b2e15..cb123fbe30be 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -8,9 +8,11 @@ #define ICE_SW_CFG_MAX_BUF_LEN 2048 #define ICE_DFLT_VSI_INVAL 0xff +#define ICE_FLTR_RX BIT(0) +#define ICE_FLTR_TX BIT(1) +#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX) #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF -#define ICE_INVAL_Q_HANDLE 0xFFFF /* VSI queue context structure */ struct ice_q_ctx { @@ -69,9 +71,6 @@ struct ice_fltr_info { /* rule ID returned by firmware once filter rule is created */ u16 fltr_rule_id; u16 flag; -#define ICE_FLTR_RX BIT(0) -#define ICE_FLTR_TX BIT(1) -#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX) /* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */ u16 src; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 2364eaf33d23..3c83230434b6 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -55,7 +55,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring) if (!tx_ring->tx_buf) return; - /* Free all the Tx ring sk_bufss */ + /* Free all the Tx ring sk_buffs */ for (i = 0; i < tx_ring->count; i++) ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]); @@ -1101,7 +1101,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) * ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic * @port_info: port_info structure containing the current link speed * @avg_pkt_size: average size of Tx or Rx packets based on clean routine - * @itr: itr value to update + * @itr: ITR value to update * * Calculate how big of an increment should be applied to the ITR value passed * in based on wmem_default, SKB overhead, Ethernet overhead, and the current @@ -1316,7 +1316,7 @@ clear_counts: */ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) { - /* The itr value is reported in microseconds, and the register value is + /* The ITR value is reported in microseconds, and the register value is * recorded in 2 microsecond units. For this reason we only need to * shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this * granularity as a shift instead of division. The mask makes sure the @@ -1645,7 +1645,7 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, return; dma_error: - /* clear dma mappings for failed tx_buf map */ + /* clear DMA mappings for failed tx_buf map */ for (;;) { tx_buf = &tx_ring->tx_buf[i]; ice_unmap_and_free_tx_buf(tx_ring, tx_buf); @@ -1874,10 +1874,10 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off) cd_mss = skb_shinfo(skb)->gso_size; /* record cdesc_qw1 with TSO parameters */ - off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX | - (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) | - (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) | - (cd_mss << ICE_TXD_CTX_QW1_MSS_S); + off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | + (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) | + (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) | + (cd_mss << ICE_TXD_CTX_QW1_MSS_S)); first->tx_flags |= ICE_TX_FLAGS_TSO; return 1; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 66e05032ee56..ec76aba347b9 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -58,19 +58,19 @@ struct ice_tx_buf { unsigned int bytecount; unsigned short gso_segs; u32 tx_flags; - DEFINE_DMA_UNMAP_ADDR(dma); DEFINE_DMA_UNMAP_LEN(len); + DEFINE_DMA_UNMAP_ADDR(dma); }; struct ice_tx_offload_params { - u8 header_len; + u64 cd_qw1; + struct ice_ring *tx_ring; u32 td_cmd; u32 td_offset; u32 td_l2tag1; - u16 cd_l2tag2; u32 cd_tunnel_params; - u64 cd_qw1; - struct ice_ring *tx_ring; + u16 cd_l2tag2; + u8 header_len; }; struct ice_rx_buf { @@ -150,6 +150,7 @@ enum ice_rx_dtype { /* descriptor ring, associated with a VSI */ struct ice_ring { + /* CL1 - 1st cacheline starts here */ struct ice_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ @@ -161,11 +162,11 @@ struct ice_ring { struct ice_tx_buf *tx_buf; struct ice_rx_buf *rx_buf; }; + /* CL2 - 2nd cacheline starts here */ u16 q_index; /* Queue number of ring */ - u32 txq_teid; /* Added Tx queue TEID */ -#ifdef CONFIG_DCB - u8 dcb_tc; /* Traffic class of ring */ -#endif /* CONFIG_DCB */ + u16 q_handle; /* Queue handle per TC */ + + u8 ring_active:1; /* is ring online or not */ u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -173,8 +174,7 @@ struct ice_ring { /* used in interrupt processing */ u16 next_to_use; u16 next_to_clean; - - u8 ring_active; /* is ring online or not */ + u16 next_to_alloc; /* stats structs */ struct ice_q_stats stats; @@ -184,10 +184,17 @@ struct ice_ring { struct ice_rxq_stats rx_stats; }; - unsigned int size; /* length of descriptor ring in bytes */ - dma_addr_t dma; /* physical address of ring */ struct rcu_head rcu; /* to avoid race on free */ - u16 next_to_alloc; + /* CLX - the below items are only accessed infrequently and should be + * in their own cache line if possible + */ + dma_addr_t dma; /* physical address of ring */ + unsigned int size; /* length of descriptor ring in bytes */ + u32 txq_teid; /* Added Tx queue TEID */ + u16 rx_buf_len; +#ifdef CONFIG_DCB + u8 dcb_tc; /* Traffic class of ring */ +#endif /* CONFIG_DCB */ } ____cacheline_internodealigned_in_smp; struct ice_ring_container { diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index a862af4cbf78..24bbef8bbe69 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -23,6 +23,7 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc) /* debug masks - set these bits in hw->debug_mask to control output */ #define ICE_DBG_INIT BIT_ULL(1) +#define ICE_DBG_FW_LOG BIT_ULL(3) #define ICE_DBG_LINK BIT_ULL(4) #define ICE_DBG_PHY BIT_ULL(5) #define ICE_DBG_QCTX BIT_ULL(6) @@ -61,6 +62,13 @@ enum ice_fc_mode { ICE_FC_DFLT }; +enum ice_fec_mode { + ICE_FEC_NONE = 0, + ICE_FEC_RS, + ICE_FEC_BASER, + ICE_FEC_AUTO +}; + enum ice_set_fc_aq_failures { ICE_SET_FC_AQ_FAIL_NONE = 0, ICE_SET_FC_AQ_FAIL_GET, @@ -86,12 +94,14 @@ enum ice_media_type { enum ice_vsi_type { ICE_VSI_PF = 0, ICE_VSI_VF, + ICE_VSI_LB = 6, }; struct ice_link_status { /* Refer to ice_aq_phy_type for bits definition */ u64 phy_type_low; u64 phy_type_high; + u8 topo_media_conflict; u16 max_frame_size; u16 link_speed; u16 req_speeds; @@ -99,6 +109,7 @@ struct ice_link_status { u8 link_info; u8 an_info; u8 ext_info; + u8 fec_info; u8 pacing; /* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of * ice_aqc_get_phy_caps structure @@ -423,7 +434,7 @@ struct ice_hw { struct ice_fw_log_cfg fw_log; /* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL - * register. Used for determining the itr/intrl granularity during + * register. Used for determining the ITR/intrl granularity during * initialization. */ #define ICE_MAX_AGG_BW_200G 0x0 diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index a805cbdd69be..5d24b539648f 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -103,7 +103,7 @@ ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe, u16 link_speed; if (link_up) - link_speed = ICE_AQ_LINK_SPEED_40GB; + link_speed = ICE_AQ_LINK_SPEED_100GB; else link_speed = ICE_AQ_LINK_SPEED_UNKNOWN; @@ -141,32 +141,20 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf) } /** - * ice_get_vf_vector - get VF interrupt vector register offset - * @vf_msix: number of MSIx vector per VF on a PF - * @vf_id: VF identifier - * @i: index of MSIx vector - */ -static u32 ice_get_vf_vector(int vf_msix, int vf_id, int i) -{ - return ((i == 0) ? VFINT_DYN_CTLN(vf_id) : - VFINT_DYN_CTLN(((vf_msix - 1) * (vf_id)) + (i - 1))); -} - -/** * ice_free_vf_res - Free a VF's resources * @vf: pointer to the VF info */ static void ice_free_vf_res(struct ice_vf *vf) { struct ice_pf *pf = vf->pf; - int i, pf_vf_msix; + int i, last_vector_idx; /* First, disable VF's configuration API to prevent OS from * accessing the VF's VSI after it's freed or invalidated. */ clear_bit(ICE_VF_STATE_INIT, vf->vf_states); - /* free vsi & disconnect it from the parent uplink */ + /* free VSI and disconnect it from the parent uplink */ if (vf->lan_vsi_idx) { ice_vsi_release(pf->vsi[vf->lan_vsi_idx]); vf->lan_vsi_idx = 0; @@ -174,13 +162,10 @@ static void ice_free_vf_res(struct ice_vf *vf) vf->num_mac = 0; } - pf_vf_msix = pf->num_vf_msix; + last_vector_idx = vf->first_vector_idx + pf->num_vf_msix - 1; /* Disable interrupts so that VF starts in a known state */ - for (i = 0; i < pf_vf_msix; i++) { - u32 reg_idx; - - reg_idx = ice_get_vf_vector(pf_vf_msix, vf->vf_id, i); - wr32(&pf->hw, reg_idx, VFINT_DYN_CTLN_CLEARPBA_M); + for (i = vf->first_vector_idx; i <= last_vector_idx; i++) { + wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M); ice_flush(&pf->hw); } /* reset some of the state variables keeping track of the resources */ @@ -205,8 +190,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) wr32(hw, VPINT_ALLOC(vf->vf_id), 0); wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0); - first = vf->first_vector_idx + - hw->func_caps.common_cap.msix_vector_first_id; + first = vf->first_vector_idx; last = first + pf->num_vf_msix - 1; for (v = first; v <= last; v++) { u32 reg; @@ -232,6 +216,42 @@ static void ice_dis_vf_mappings(struct ice_vf *vf) } /** + * ice_sriov_free_msix_res - Reset/free any used MSIX resources + * @pf: pointer to the PF structure + * + * If MSIX entries from the pf->irq_tracker were needed then we need to + * reset the irq_tracker->end and give back the entries we needed to + * num_avail_sw_msix. + * + * If no MSIX entries were taken from the pf->irq_tracker then just clear + * the pf->sriov_base_vector. + * + * Returns 0 on success, and -EINVAL on error. + */ +static int ice_sriov_free_msix_res(struct ice_pf *pf) +{ + struct ice_res_tracker *res; + + if (!pf) + return -EINVAL; + + res = pf->irq_tracker; + if (!res) + return -EINVAL; + + /* give back irq_tracker resources used */ + if (pf->sriov_base_vector < res->num_entries) { + res->end = res->num_entries; + pf->num_avail_sw_msix += + res->num_entries - pf->sriov_base_vector; + } + + pf->sriov_base_vector = 0; + + return 0; +} + +/** * ice_free_vfs - Free all VFs * @pf: pointer to the PF structure */ @@ -246,15 +266,6 @@ void ice_free_vfs(struct ice_pf *pf) while (test_and_set_bit(__ICE_VF_DIS, pf->state)) usleep_range(1000, 2000); - /* Disable IOV before freeing resources. This lets any VF drivers - * running in the host get themselves cleaned up before we yank - * the carpet out from underneath their feet. - */ - if (!pci_vfs_assigned(pf->pdev)) - pci_disable_sriov(pf->pdev); - else - dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n"); - /* Avoid wait time by stopping all VFs at the same time */ for (i = 0; i < pf->num_alloc_vfs; i++) { struct ice_vsi *vsi; @@ -270,6 +281,15 @@ void ice_free_vfs(struct ice_pf *pf) clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states); } + /* Disable IOV before freeing resources. This lets any VF drivers + * running in the host get themselves cleaned up before we yank + * the carpet out from underneath their feet. + */ + if (!pci_vfs_assigned(pf->pdev)) + pci_disable_sriov(pf->pdev); + else + dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n"); + tmp = pf->num_alloc_vfs; pf->num_vf_qps = 0; pf->num_alloc_vfs = 0; @@ -288,6 +308,10 @@ void ice_free_vfs(struct ice_pf *pf) } } + if (ice_sriov_free_msix_res(pf)) + dev_err(&pf->pdev->dev, + "Failed to free MSIX resources used by SR-IOV\n"); + devm_kfree(&pf->pdev->dev, pf->vf); pf->vf = NULL; @@ -457,6 +481,22 @@ ice_vf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, u16 vf_id) } /** + * ice_calc_vf_first_vector_idx - Calculate absolute MSIX vector index in HW + * @pf: pointer to PF structure + * @vf: pointer to VF that the first MSIX vector index is being calculated for + * + * This returns the first MSIX vector index in HW that is used by this VF and + * this will always be the OICR index in the AVF driver so any functionality + * using vf->first_vector_idx for queue configuration will have to increment by + * 1 to avoid meddling with the OICR index. + */ +static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) +{ + return pf->hw.func_caps.common_cap.msix_vector_first_id + + pf->sriov_base_vector + vf->vf_id * pf->num_vf_msix; +} + +/** * ice_alloc_vsi_res - Setup VF VSI and its resources * @vf: pointer to the VF structure * @@ -470,8 +510,10 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) struct ice_vsi *vsi; int status = 0; - vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id); + /* first vector index is the VFs OICR index */ + vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf); + vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id); if (!vsi) { dev_err(&pf->pdev->dev, "Failed to create VF VSI\n"); return -ENOMEM; @@ -480,14 +522,6 @@ static int ice_alloc_vsi_res(struct ice_vf *vf) vf->lan_vsi_idx = vsi->idx; vf->lan_vsi_num = vsi->vsi_num; - /* first vector index is the VFs OICR index */ - vf->first_vector_idx = vsi->hw_base_vector; - /* Since hw_base_vector holds the vector where data queue interrupts - * starts, increment by 1 since VFs allocated vectors include OICR intr - * as well. - */ - vsi->hw_base_vector += 1; - /* Check if port VLAN exist before, and restore it accordingly */ if (vf->port_vlan_id) { ice_vsi_manage_pvid(vsi, vf->port_vlan_id, true); @@ -580,8 +614,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf) hw = &pf->hw; vsi = pf->vsi[vf->lan_vsi_idx]; - first = vf->first_vector_idx + - hw->func_caps.common_cap.msix_vector_first_id; + first = vf->first_vector_idx; last = (first + pf->num_vf_msix) - 1; abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; @@ -687,6 +720,97 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res) } /** + * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space + * @vf: VF to calculate the register index for + * @q_vector: a q_vector associated to the VF + */ +int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector) +{ + struct ice_pf *pf; + + if (!vf || !q_vector) + return -EINVAL; + + pf = vf->pf; + + /* always add one to account for the OICR being the first MSIX */ + return pf->sriov_base_vector + pf->num_vf_msix * vf->vf_id + + q_vector->v_idx + 1; +} + +/** + * ice_get_max_valid_res_idx - Get the max valid resource index + * @res: pointer to the resource to find the max valid index for + * + * Start from the end of the ice_res_tracker and return right when we find the + * first res->list entry with the ICE_RES_VALID_BIT set. This function is only + * valid for SR-IOV because it is the only consumer that manipulates the + * res->end and this is always called when res->end is set to res->num_entries. + */ +static int ice_get_max_valid_res_idx(struct ice_res_tracker *res) +{ + int i; + + if (!res) + return -EINVAL; + + for (i = res->num_entries - 1; i >= 0; i--) + if (res->list[i] & ICE_RES_VALID_BIT) + return i; + + return 0; +} + +/** + * ice_sriov_set_msix_res - Set any used MSIX resources + * @pf: pointer to PF structure + * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs + * + * This function allows SR-IOV resources to be taken from the end of the PF's + * allowed HW MSIX vectors so in many cases the irq_tracker will not + * be needed. In these cases we just set the pf->sriov_base_vector and return + * success. + * + * If SR-IOV needs to use any pf->irq_tracker entries it updates the + * irq_tracker->end based on the first entry needed for SR-IOV. This makes it + * so any calls to ice_get_res() using the irq_tracker will not try to use + * resources at or beyond the newly set value. + * + * Return 0 on success, and -EINVAL when there are not enough MSIX vectors in + * in the PF's space available for SR-IOV. + */ +static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed) +{ + int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); + u16 pf_total_msix_vectors = + pf->hw.func_caps.common_cap.num_msix_vectors; + struct ice_res_tracker *res = pf->irq_tracker; + int sriov_base_vector; + + if (max_valid_res_idx < 0) + return max_valid_res_idx; + + sriov_base_vector = pf_total_msix_vectors - num_msix_needed; + + /* make sure we only grab irq_tracker entries from the list end and + * that we have enough available MSIX vectors + */ + if (sriov_base_vector <= max_valid_res_idx) + return -EINVAL; + + pf->sriov_base_vector = sriov_base_vector; + + /* dip into irq_tracker entries and update used resources */ + if (num_msix_needed > (pf_total_msix_vectors - res->num_entries)) { + pf->num_avail_sw_msix -= + res->num_entries - pf->sriov_base_vector; + res->end = pf->sriov_base_vector; + } + + return 0; +} + +/** * ice_check_avail_res - check if vectors and queues are available * @pf: pointer to the PF structure * @@ -696,11 +820,16 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res) */ static int ice_check_avail_res(struct ice_pf *pf) { - u16 num_msix, num_txq, num_rxq; + int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker); + u16 num_msix, num_txq, num_rxq, num_avail_msix; - if (!pf->num_alloc_vfs) + if (!pf->num_alloc_vfs || max_valid_res_idx < 0) return -EINVAL; + /* add 1 to max_valid_res_idx to account for it being 0-based */ + num_avail_msix = pf->hw.func_caps.common_cap.num_msix_vectors - + (max_valid_res_idx + 1); + /* Grab from HW interrupts common pool * Note: By the time the user decides it needs more vectors in a VF * its already too late since one must decide this prior to creating the @@ -717,11 +846,11 @@ static int ice_check_avail_res(struct ice_pf *pf) * grab default interrupt vectors (5 as supported by AVF driver). */ if (pf->num_alloc_vfs <= 16) { - num_msix = ice_determine_res(pf, pf->num_avail_hw_msix, + num_msix = ice_determine_res(pf, num_avail_msix, ICE_MAX_INTR_PER_VF, ICE_MIN_INTR_PER_VF); } else if (pf->num_alloc_vfs <= ICE_MAX_VF_COUNT) { - num_msix = ice_determine_res(pf, pf->num_avail_hw_msix, + num_msix = ice_determine_res(pf, num_avail_msix, ICE_DFLT_INTR_PER_VF, ICE_MIN_INTR_PER_VF); } else { @@ -750,6 +879,9 @@ static int ice_check_avail_res(struct ice_pf *pf) if (!num_txq || !num_rxq) return -EIO; + if (ice_sriov_set_msix_res(pf, num_msix * pf->num_alloc_vfs)) + return -EINVAL; + /* since AVF driver works with only queue pairs which means, it expects * to have equal number of Rx and Tx queues, so take the minimum of * available Tx or Rx queues @@ -938,6 +1070,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) vf->num_vf_qs = 0; } + if (ice_sriov_free_msix_res(pf)) + dev_err(&pf->pdev->dev, + "Failed to free MSIX resources used by SR-IOV\n"); + if (ice_check_avail_res(pf)) { dev_err(&pf->pdev->dev, "Cannot allocate VF resources, try with fewer number of VFs\n"); @@ -1119,7 +1255,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) int i, ret; /* Disable global interrupt 0 so we don't try to handle the VFLR. */ - wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx), + wr32(hw, GLINT_DYN_CTL(pf->oicr_idx), ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S); ice_flush(hw); @@ -1134,7 +1270,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) GFP_KERNEL); if (!vfs) { ret = -ENOMEM; - goto err_unroll_sriov; + goto err_pci_disable_sriov; } pf->vf = vfs; @@ -1154,12 +1290,19 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs) pf->num_alloc_vfs = num_alloc_vfs; /* VF resources get allocated during reset */ - if (!ice_reset_all_vfs(pf, true)) + if (!ice_reset_all_vfs(pf, true)) { + ret = -EIO; goto err_unroll_sriov; + } goto err_unroll_intr; err_unroll_sriov: + pf->vf = NULL; + devm_kfree(&pf->pdev->dev, vfs); + vfs = NULL; + pf->num_alloc_vfs = 0; +err_pci_disable_sriov: pci_disable_sriov(pf->pdev); err_unroll_intr: /* rearm interrupts here */ @@ -1168,8 +1311,8 @@ err_unroll_intr: } /** - * ice_pf_state_is_nominal - checks the pf for nominal state - * @pf: pointer to pf to check + * ice_pf_state_is_nominal - checks the PF for nominal state + * @pf: pointer to PF to check * * Check the PF's state for a collection of bits that would indicate * the PF is in a state that would inhibit normal operation for @@ -1496,7 +1639,7 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf) /** * ice_find_vsi_from_id - * @pf: the pf structure to search for the VSI + * @pf: the PF structure to search for the VSI * @id: ID of the VSI it is searching for * * searches for the VSI with the given ID @@ -1807,28 +1950,37 @@ error_param: static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) { enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_irq_map_info *irqmap_info = - (struct virtchnl_irq_map_info *)msg; + struct virtchnl_irq_map_info *irqmap_info; u16 vsi_id, vsi_q_id, vector_id; struct virtchnl_vector_map *map; - struct ice_vsi *vsi = NULL; struct ice_pf *pf = vf->pf; + u16 num_q_vectors_mapped; + struct ice_vsi *vsi; unsigned long qmap; - u16 num_q_vectors; int i; - num_q_vectors = irqmap_info->num_vectors - ICE_NONQ_VECS_VF; + irqmap_info = (struct virtchnl_irq_map_info *)msg; + num_q_vectors_mapped = irqmap_info->num_vectors; + vsi = pf->vsi[vf->lan_vsi_idx]; + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + /* Check to make sure number of VF vectors mapped is not greater than + * number of VF vectors originally allocated, and check that + * there is actually at least a single VF queue vector mapped + */ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - !vsi || vsi->num_q_vectors < num_q_vectors || - irqmap_info->num_vectors == 0) { + pf->num_vf_msix < num_q_vectors_mapped || + !irqmap_info->num_vectors) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } - for (i = 0; i < num_q_vectors; i++) { - struct ice_q_vector *q_vector = vsi->q_vectors[i]; + for (i = 0; i < num_q_vectors_mapped; i++) { + struct ice_q_vector *q_vector; map = &irqmap_info->vecmap[i]; @@ -1836,7 +1988,21 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) vsi_id = map->vsi_id; /* validate msg params */ if (!(vector_id < pf->hw.func_caps.common_cap - .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id)) { + .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) || + (!vector_id && (map->rxq_map || map->txq_map))) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* No need to map VF miscellaneous or rogue vector */ + if (!vector_id) + continue; + + /* Subtract non queue vector from vector_id passed by VF + * to get actual number of VSI queue vector array index + */ + q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF]; + if (!q_vector) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1852,6 +2018,8 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) q_vector->num_ring_rx++; q_vector->rx.itr_idx = map->rxitr_idx; vsi->rx_rings[vsi_q_id]->q_vector = q_vector; + ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id, + q_vector->rx.itr_idx); } qmap = map->txq_map; @@ -1864,11 +2032,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) q_vector->num_ring_tx++; q_vector->tx.itr_idx = map->txitr_idx; vsi->tx_rings[vsi_q_id]->q_vector = q_vector; + ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id, + q_vector->tx.itr_idx); } } - if (vsi) - ice_vsi_cfg_msix(vsi); error_param: /* send the response to the VF */ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret, @@ -1903,9 +2071,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } vsi = pf->vsi[vf->lan_vsi_idx]; - if (!vsi) { + if (!vsi) goto error_param; - } if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF) { dev_err(&pf->pdev->dev, diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 3725aea16840..c3ca522c245a 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -49,29 +49,34 @@ struct ice_vf { struct ice_pf *pf; s16 vf_id; /* VF ID in the PF space */ - u32 driver_caps; /* reported by VF driver */ + u16 lan_vsi_idx; /* index into PF struct */ int first_vector_idx; /* first vector index of this VF */ struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */ struct virtchnl_version_info vf_ver; + u32 driver_caps; /* reported by VF driver */ struct virtchnl_ether_addr dflt_lan_addr; u16 port_vlan_id; - u8 pf_set_mac; /* VF MAC address set by VMM admin */ - u8 trusted; - u16 lan_vsi_idx; /* index into PF struct */ + u8 pf_set_mac:1; /* VF MAC address set by VMM admin */ + u8 trusted:1; + u8 spoofchk:1; + u8 link_forced:1; + u8 link_up:1; /* only valid if VF link is forced */ + /* VSI indices - actual VSI pointers are maintained in the PF structure + * When assigned, these will be non-zero, because VSI 0 is always + * the main LAN VSI for the PF. + */ u16 lan_vsi_num; /* ID as used by firmware */ + unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ + DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ + u64 num_mdd_events; /* number of MDD events detected */ u64 num_inval_msgs; /* number of continuous invalid msgs */ u64 num_valid_msgs; /* number of valid msgs detected */ unsigned long vf_caps; /* VF's adv. capabilities */ - DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ - unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ - u8 link_forced; - u8 link_up; /* only valid if VF link is forced */ - u8 spoofchk; + u8 num_req_qs; /* num of queue pairs requested by VF */ u16 num_mac; u16 num_vlan; u16 num_vf_qs; /* num of queue configured per VF */ - u8 num_req_qs; /* num of queue pairs requested by VF */ }; #ifdef CONFIG_PCI_IOV @@ -96,6 +101,8 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena); + +int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); #else /* CONFIG_PCI_IOV */ #define ice_process_vflr_event(pf) do {} while (0) #define ice_free_vfs(pf) do {} while (0) @@ -161,5 +168,11 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } +static inline int +ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, + struct ice_q_vector __always_unused *q_vector) +{ + return 0; +} #endif /* CONFIG_PCI_IOV */ #endif /* _ICE_VIRTCHNL_PF_H_ */ diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index bafdcf70a353..3ec2ce0725d5 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -638,7 +638,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) dev_spec->sgmii_active = true; break; } - /* fall through for I2C based SGMII */ + /* fall through - for I2C based SGMII */ case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: /* read media type from SFP EEPROM */ ret_val = igb_set_sfp_media_type_82575(hw); diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 0ad737d2f289..9cb49980ec2d 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -409,6 +409,8 @@ do { \ #define E1000_I210_TQAVCC(_n) (0x3004 + ((_n) * 0x40)) #define E1000_I210_TQAVHC(_n) (0x300C + ((_n) * 0x40)) +#define E1000_I210_RR2DCDELAY 0x5BF4 + #define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) #define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index c645d9e648e0..3182b059bf55 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -448,7 +448,7 @@ static void igb_set_msglevel(struct net_device *netdev, u32 data) static int igb_get_regs_len(struct net_device *netdev) { -#define IGB_REGS_LEN 739 +#define IGB_REGS_LEN 740 return IGB_REGS_LEN * sizeof(u32); } @@ -675,41 +675,44 @@ static void igb_get_regs(struct net_device *netdev, regs_buff[554] = adapter->stats.b2ogprc; } - if (hw->mac.type != e1000_82576) - return; - for (i = 0; i < 12; i++) - regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4)); - for (i = 0; i < 4; i++) - regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[607 + i] = rd32(E1000_RDH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[619 + i] = rd32(E1000_RDT(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4)); - - for (i = 0; i < 12; i++) - regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[679 + i] = rd32(E1000_TDH(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[691 + i] = rd32(E1000_TDT(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4)); - for (i = 0; i < 12; i++) - regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4)); + if (hw->mac.type == e1000_82576) { + for (i = 0; i < 12; i++) + regs_buff[555 + i] = rd32(E1000_SRRCTL(i + 4)); + for (i = 0; i < 4; i++) + regs_buff[567 + i] = rd32(E1000_PSRTYPE(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[571 + i] = rd32(E1000_RDBAL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[583 + i] = rd32(E1000_RDBAH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[595 + i] = rd32(E1000_RDLEN(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[607 + i] = rd32(E1000_RDH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[619 + i] = rd32(E1000_RDT(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[631 + i] = rd32(E1000_RXDCTL(i + 4)); + + for (i = 0; i < 12; i++) + regs_buff[643 + i] = rd32(E1000_TDBAL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[655 + i] = rd32(E1000_TDBAH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[667 + i] = rd32(E1000_TDLEN(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[679 + i] = rd32(E1000_TDH(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[691 + i] = rd32(E1000_TDT(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[703 + i] = rd32(E1000_TXDCTL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[715 + i] = rd32(E1000_TDWBAL(i + 4)); + for (i = 0; i < 12; i++) + regs_buff[727 + i] = rd32(E1000_TDWBAH(i + 4)); + } + + if (hw->mac.type == e1000_i210 || hw->mac.type == e1000_i211) + regs_buff[739] = rd32(E1000_I210_RR2DCDELAY); } static int igb_get_eeprom_len(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 39f33afc479c..b4df3e319467 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -753,6 +753,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg) struct net_device *netdev = igb->netdev; hw->hw_addr = NULL; netdev_err(netdev, "PCIe link lost\n"); + WARN(1, "igb: Failed to read reg 0x%x!\n", reg); } return value; @@ -2577,11 +2578,11 @@ static int igb_offload_cbs(struct igb_adapter *adapter, #define VLAN_PRIO_FULL_MASK (0x07) static int igb_parse_cls_flower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, int traffic_class, struct igb_nfc_filter *input) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; struct netlink_ext_ack *extack = f->common.extack; @@ -2659,7 +2660,7 @@ static int igb_parse_cls_flower(struct igb_adapter *adapter, } static int igb_configure_clsflower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct netlink_ext_ack *extack = cls_flower->common.extack; struct igb_nfc_filter *filter, *f; @@ -2721,7 +2722,7 @@ err_parse: } static int igb_delete_clsflower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { struct igb_nfc_filter *filter; int err; @@ -2751,14 +2752,14 @@ out: } static int igb_setup_tc_cls_flower(struct igb_adapter *adapter, - struct tc_cls_flower_offload *cls_flower) + struct flow_cls_offload *cls_flower) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return igb_configure_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return igb_delete_clsflower(adapter, cls_flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return -EOPNOTSUPP; default: return -EOPNOTSUPP; @@ -2782,25 +2783,6 @@ static int igb_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -static int igb_setup_tc_block(struct igb_adapter *adapter, - struct tc_block_offload *f) -{ - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, igb_setup_tc_block_cb, - adapter, adapter, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, igb_setup_tc_block_cb, - adapter); - return 0; - default: - return -EOPNOTSUPP; - } -} - static int igb_offload_txtime(struct igb_adapter *adapter, struct tc_etf_qopt_offload *qopt) { @@ -2824,6 +2806,8 @@ static int igb_offload_txtime(struct igb_adapter *adapter, return 0; } +static LIST_HEAD(igb_block_cb_list); + static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -2833,7 +2817,11 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, case TC_SETUP_QDISC_CBS: return igb_offload_cbs(adapter, type_data); case TC_SETUP_BLOCK: - return igb_setup_tc_block(adapter, type_data); + return flow_block_cb_setup_simple(type_data, + &igb_block_cb_list, + igb_setup_tc_block_cb, + adapter, adapter, true); + case TC_SETUP_QDISC_ETF: return igb_offload_txtime(adapter, type_data); @@ -5687,6 +5675,7 @@ static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, */ if (tx_ring->launchtime_enable) { ts = ns_to_timespec64(first->skb->tstamp); + first->skb->tstamp = 0; context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->seqnum_seed = 0; @@ -6695,7 +6684,7 @@ static int __igb_notify_dca(struct device *dev, void *data) igb_setup_dca(adapter); break; } - /* Fall Through since DCA is disabled. */ + /* Fall Through - since DCA is disabled. */ case DCA_PROVIDER_REMOVE: if (adapter->flags & IGB_FLAG_DCA_ENABLED) { /* without this a class_device is left diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index 51a8b8769c67..59258d791106 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -10,50 +10,6 @@ #include "igc.h" /** - * igc_set_pcie_completion_timeout - set pci-e completion timeout - * @hw: pointer to the HW structure - */ -static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw) -{ - u32 gcr = rd32(IGC_GCR); - u16 pcie_devctl2; - s32 ret_val = 0; - - /* only take action if timeout value is defaulted to 0 */ - if (gcr & IGC_GCR_CMPL_TMOUT_MASK) - goto out; - - /* if capabilities version is type 1 we can write the - * timeout of 10ms to 200ms through the GCR register - */ - if (!(gcr & IGC_GCR_CAP_VER2)) { - gcr |= IGC_GCR_CMPL_TMOUT_10ms; - goto out; - } - - /* for version 2 capabilities we need to write the config space - * directly in order to set the completion timeout value for - * 16ms to 55ms - */ - ret_val = igc_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, - &pcie_devctl2); - if (ret_val) - goto out; - - pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; - - ret_val = igc_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, - &pcie_devctl2); -out: - /* disable completion timeout resend */ - gcr &= ~IGC_GCR_CMPL_TMOUT_RESEND; - - wr32(IGC_GCR, gcr); - - return ret_val; -} - -/** * igc_reset_hw_base - Reset hardware * @hw: pointer to the HW structure * @@ -72,11 +28,6 @@ static s32 igc_reset_hw_base(struct igc_hw *hw) if (ret_val) hw_dbg("PCI-E Master disable polling has failed.\n"); - /* set the completion timeout for interface */ - ret_val = igc_set_pcie_completion_timeout(hw); - if (ret_val) - hw_dbg("PCI-E Set completion timeout has failed.\n"); - hw_dbg("Masking off all interrupts\n"); wr32(IGC_IMC, 0xffffffff); diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index a9a30268de59..fc0ccfe38a20 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -5,8 +5,8 @@ #define _IGC_DEFINES_H_ /* Number of Transmit and Receive Descriptors must be a multiple of 8 */ -#define REQ_TX_DESCRIPTOR_MULTIPLE 8 -#define REQ_RX_DESCRIPTOR_MULTIPLE 8 +#define REQ_TX_DESCRIPTOR_MULTIPLE 8 +#define REQ_RX_DESCRIPTOR_MULTIPLE 8 #define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ @@ -29,12 +29,6 @@ /* Status of Master requests. */ #define IGC_STATUS_GIO_MASTER_ENABLE 0x00080000 -/* PCI Express Control */ -#define IGC_GCR_CMPL_TMOUT_MASK 0x0000F000 -#define IGC_GCR_CMPL_TMOUT_10ms 0x00001000 -#define IGC_GCR_CMPL_TMOUT_RESEND 0x00010000 -#define IGC_GCR_CAP_VER2 0x00040000 - /* Receive Address * Number of high/low register pairs in the RAR. The RAR (Receive Address * Registers) holds the directed and multicast addresses that we monitor. @@ -72,6 +66,9 @@ #define IGC_CONNSW_AUTOSENSE_EN 0x1 +/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */ +#define MAX_JUMBO_FRAME_SIZE 0x2600 + /* PBA constants */ #define IGC_PBA_34K 0x0022 @@ -264,9 +261,6 @@ #define IGC_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ #define IGC_TCTL_MULR 0x10000000 /* Multiple request support */ -#define IGC_CT_SHIFT 4 -#define IGC_COLLISION_THRESHOLD 15 - /* Flow Control Constants */ #define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 #define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 @@ -398,7 +392,7 @@ #define IGC_MDIC_ERROR 0x40000000 #define IGC_MDIC_DEST 0x80000000 -#define IGC_N0_QUEUE -1 +#define IGC_N0_QUEUE -1 #define IGC_MAX_MAC_HDR_LEN 127 #define IGC_MAX_NETWORK_HDR_LEN 511 diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 7c88b7bd4799..1039a224ac80 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -114,11 +114,8 @@ struct igc_nvm_operations { struct igc_phy_operations { s32 (*acquire)(struct igc_hw *hw); - s32 (*check_polarity)(struct igc_hw *hw); s32 (*check_reset_block)(struct igc_hw *hw); s32 (*force_speed_duplex)(struct igc_hw *hw); - s32 (*get_cfg_done)(struct igc_hw *hw); - s32 (*get_cable_length)(struct igc_hw *hw); s32 (*get_phy_info)(struct igc_hw *hw); s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data); void (*release)(struct igc_hw *hw); diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c index f7683d3ae47c..ba4646737288 100644 --- a/drivers/net/ethernet/intel/igc/igc_mac.c +++ b/drivers/net/ethernet/intel/igc/igc_mac.c @@ -8,7 +8,6 @@ #include "igc_hw.h" /* forward declaration */ -static s32 igc_set_default_fc(struct igc_hw *hw); static s32 igc_set_fc_watermarks(struct igc_hw *hw); /** @@ -96,13 +95,10 @@ s32 igc_setup_link(struct igc_hw *hw) goto out; /* If requested flow control is set to default, set flow control - * based on the EEPROM flow control settings. + * to the both 'rx' and 'tx' pause frames. */ - if (hw->fc.requested_mode == igc_fc_default) { - ret_val = igc_set_default_fc(hw); - if (ret_val) - goto out; - } + if (hw->fc.requested_mode == igc_fc_default) + hw->fc.requested_mode = igc_fc_full; /* We want to save off the original Flow Control configuration just * in case we get disconnected and then reconnected into a different @@ -136,19 +132,6 @@ out: } /** - * igc_set_default_fc - Set flow control default values - * @hw: pointer to the HW structure - * - * Read the EEPROM for the default values for flow control and store the - * values. - */ -static s32 igc_set_default_fc(struct igc_hw *hw) -{ - hw->fc.requested_mode = igc_fc_full; - return 0; -} - -/** * igc_force_mac_fc - Force the MAC's flow control settings * @hw: pointer to the HW structure * diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 34fa0e60a780..93f3b4e6185b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -72,6 +72,27 @@ void igc_reset(struct igc_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct igc_hw *hw = &adapter->hw; + struct igc_fc_info *fc = &hw->fc; + u32 pba, hwm; + + /* Repartition PBA for greater than 9k MTU if required */ + pba = IGC_PBA_34K; + + /* flow control settings + * The high water mark must be low enough to fit one full frame + * after transmitting the pause frame. As such we must have enough + * space to allow for us to complete our current transmit and then + * receive the frame that is in progress from the link partner. + * Set it to: + * - the full Rx FIFO size minus one full Tx plus one full Rx frame + */ + hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE); + + fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ + fc->low_water = fc->high_water - 16; + fc->pause_time = 0xFFFF; + fc->send_xon = 1; + fc->current_mode = fc->requested_mode; hw->mac.ops.reset_hw(hw); @@ -3934,6 +3955,7 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg) hw->hw_addr = NULL; netif_device_detach(netdev); netdev_err(netdev, "PCIe link lost, device now detached\n"); + WARN(1, "igc: Failed to read reg 0x%x!\n", reg); } return value; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 08d85e336bd4..39e73ad60352 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -50,8 +50,6 @@ #define IXGBE_MAX_RXD 4096 #define IXGBE_MIN_RXD 64 -#define IXGBE_ETH_P_LLDP 0x88CC - /* flow control */ #define IXGBE_MIN_FCRTL 0x40 #define IXGBE_MAX_FCRTL 0x7FF80 @@ -635,6 +633,7 @@ struct ixgbe_adapter { /* XDP */ int num_xdp_queues; struct ixgbe_ring *xdp_ring[MAX_XDP_QUEUES]; + unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled rings */ /* TX */ struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; @@ -774,11 +773,6 @@ struct ixgbe_adapter { #ifdef CONFIG_IXGBE_IPSEC struct ixgbe_ipsec *ipsec; #endif /* CONFIG_IXGBE_IPSEC */ - - /* AF_XDP zero-copy */ - struct xdp_umem **xsk_umems; - u16 num_xsk_umems_used; - u16 num_xsk_umems; }; static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) @@ -1039,4 +1033,10 @@ static inline int ixgbe_ipsec_vf_add_sa(struct ixgbe_adapter *adapter, static inline int ixgbe_ipsec_vf_del_sa(struct ixgbe_adapter *adapter, u32 *mbuf, u32 vf) { return -EACCES; } #endif /* CONFIG_IXGBE_IPSEC */ + +static inline bool ixgbe_enabled_xdp_adapter(struct ixgbe_adapter *adapter) +{ + return !!adapter->xdp_prog; +} + #endif /* _IXGBE_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index acba067cc15a..7c52ae8ac005 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3226,7 +3226,8 @@ static int ixgbe_get_module_info(struct net_device *dev, page_swap = true; } - if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap) { + if (sff8472_rev == IXGBE_SFF_SFF_8472_UNSUP || page_swap || + !(addr_mode & IXGBE_SFF_DDM_IMPLEMENTED)) { /* We have a SFP, but it does not support SFF-8472 */ modinfo->type = ETH_MODULE_SFF_8079; modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index ff85ce5791a3..31629fc7e820 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -842,6 +842,9 @@ void ixgbe_ipsec_vf_clear(struct ixgbe_adapter *adapter, u32 vf) struct ixgbe_ipsec *ipsec = adapter->ipsec; int i; + if (!ipsec) + return; + /* search rx sa table */ for (i = 0; i < IXGBE_IPSEC_MAX_SA_COUNT && ipsec->num_rx_sa; i++) { if (!ipsec->rx_tbl[i].used) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 57fd9ee6de66..cbaf712d6529 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6288,6 +6288,10 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, if (ixgbe_init_rss_key(adapter)) return -ENOMEM; + adapter->af_xdp_zc_qps = bitmap_zalloc(MAX_XDP_QUEUES, GFP_KERNEL); + if (!adapter->af_xdp_zc_qps) + return -ENOMEM; + /* Set MAC specific capability flags and exceptions */ switch (hw->mac.type) { case ixgbe_mac_82598EB: @@ -9603,27 +9607,6 @@ static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -static int ixgbe_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct ixgbe_adapter *adapter = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb, - adapter, adapter, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb, - adapter); - return 0; - default: - return -EOPNOTSUPP; - } -} - static int ixgbe_setup_tc_mqprio(struct net_device *dev, struct tc_mqprio_qopt *mqprio) { @@ -9631,12 +9614,19 @@ static int ixgbe_setup_tc_mqprio(struct net_device *dev, return ixgbe_setup_tc(dev, mqprio->num_tc); } +static LIST_HEAD(ixgbe_block_cb_list); + static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct ixgbe_adapter *adapter = netdev_priv(dev); + switch (type) { case TC_SETUP_BLOCK: - return ixgbe_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, + &ixgbe_block_cb_list, + ixgbe_setup_tc_block_cb, + adapter, adapter, true); case TC_SETUP_QDISC_MQPRIO: return ixgbe_setup_tc_mqprio(dev, type_data); default: @@ -11161,6 +11151,7 @@ err_sw_init: kfree(adapter->jump_tables[0]); kfree(adapter->mac_table); kfree(adapter->rss_key); + bitmap_free(adapter->af_xdp_zc_qps); err_ioremap: disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); @@ -11249,6 +11240,7 @@ static void ixgbe_remove(struct pci_dev *pdev) kfree(adapter->mac_table); kfree(adapter->rss_key); + bitmap_free(adapter->af_xdp_zc_qps); disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index 214b01085718..6544c4539c0d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -45,6 +45,7 @@ #define IXGBE_SFF_SOFT_RS_SELECT_10G 0x8 #define IXGBE_SFF_SOFT_RS_SELECT_1G 0x0 #define IXGBE_SFF_ADDRESSING_MODE 0x4 +#define IXGBE_SFF_DDM_IMPLEMENTED 0x40 #define IXGBE_SFF_QSFP_DA_ACTIVE_CABLE 0x1 #define IXGBE_SFF_QSFP_DA_PASSIVE_CABLE 0x8 #define IXGBE_SFF_QSFP_CONNECTOR_NOT_SEPARABLE 0x23 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index d81a50dc9535..0be13a90ff79 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -72,13 +72,13 @@ #define IXGBE_INCPER_SHIFT_82599 24 #define IXGBE_OVERFLOW_PERIOD (HZ * 30) -#define IXGBE_PTP_TX_TIMEOUT (HZ * 15) +#define IXGBE_PTP_TX_TIMEOUT (HZ) -/* half of a one second clock period, for use with PPS signal. We have to use - * this instead of something pre-defined like IXGBE_PTP_PPS_HALF_SECOND, in - * order to force at least 64bits of precision for shifting +/* We use our own definitions instead of NSEC_PER_SEC because we want to mark + * the value as a ULL to force precision when bit shifting. */ -#define IXGBE_PTP_PPS_HALF_SECOND 500000000ULL +#define NS_PER_SEC 1000000000ULL +#define NS_PER_HALF_SEC 500000000ULL /* In contrast, the X550 controller has two registers, SYSTIMEH and SYSTIMEL * which contain measurements of seconds and nanoseconds respectively. This @@ -141,23 +141,26 @@ #define MAX_TIMADJ 0x7FFFFFFF /** - * ixgbe_ptp_setup_sdp_x540 + * ixgbe_ptp_setup_sdp_X540 * @adapter: private adapter structure * * this function enables or disables the clock out feature on SDP0 for - * the X540 device. It will create a 1second periodic output that can + * the X540 device. It will create a 1 second periodic output that can * be used as the PPS (via an interrupt). * - * It calculates when the systime will be on an exact second, and then - * aligns the start of the PPS signal to that value. The shift is - * necessary because it can change based on the link speed. + * It calculates when the system time will be on an exact second, and then + * aligns the start of the PPS signal to that value. + * + * This works by using the cycle counter shift and mult values in reverse, and + * assumes that the values we're shifting will not overflow. */ -static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) +static void ixgbe_ptp_setup_sdp_X540(struct ixgbe_adapter *adapter) { + struct cyclecounter *cc = &adapter->hw_cc; struct ixgbe_hw *hw = &adapter->hw; - int shift = adapter->hw_cc.shift; u32 esdp, tsauxc, clktiml, clktimh, trgttiml, trgttimh, rem; - u64 ns = 0, clock_edge = 0; + u64 ns = 0, clock_edge = 0, clock_period; + unsigned long flags; /* disable the pin first */ IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); @@ -177,26 +180,33 @@ static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) /* enable the Clock Out feature on SDP0, and allow * interrupts to occur when the pin changes */ - tsauxc = IXGBE_TSAUXC_EN_CLK | - IXGBE_TSAUXC_SYNCLK | - IXGBE_TSAUXC_SDP0_INT; + tsauxc = (IXGBE_TSAUXC_EN_CLK | + IXGBE_TSAUXC_SYNCLK | + IXGBE_TSAUXC_SDP0_INT); - /* clock period (or pulse length) */ - clktiml = (u32)(IXGBE_PTP_PPS_HALF_SECOND << shift); - clktimh = (u32)((IXGBE_PTP_PPS_HALF_SECOND << shift) >> 32); - - /* Account for the cyclecounter wrap-around value by - * using the converted ns value of the current time to - * check for when the next aligned second would occur. + /* Determine the clock time period to use. This assumes that the + * cycle counter shift is small enough to avoid overflow. */ - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIML); - clock_edge |= (u64)IXGBE_READ_REG(hw, IXGBE_SYSTIMH) << 32; - ns = timecounter_cyc2time(&adapter->hw_tc, clock_edge); + clock_period = div_u64((NS_PER_HALF_SEC << cc->shift), cc->mult); + clktiml = (u32)(clock_period); + clktimh = (u32)(clock_period >> 32); - div_u64_rem(ns, IXGBE_PTP_PPS_HALF_SECOND, &rem); - clock_edge += ((IXGBE_PTP_PPS_HALF_SECOND - (u64)rem) << shift); + /* Read the current clock time, and save the cycle counter value */ + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_read(&adapter->hw_tc); + clock_edge = adapter->hw_tc.cycle_last; + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + /* Figure out how many seconds to add in order to round up */ + div_u64_rem(ns, NS_PER_SEC, &rem); + + /* Figure out how many nanoseconds to add to round the clock edge up + * to the next full second + */ + rem = (NS_PER_SEC - rem); - /* specify the initial clock start time */ + /* Adjust the clock edge to align with the next full second. */ + clock_edge += div_u64(((u64)rem << cc->shift), cc->mult); trgttiml = (u32)clock_edge; trgttimh = (u32)(clock_edge >> 32); @@ -212,8 +222,100 @@ static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) } /** + * ixgbe_ptp_setup_sdp_X550 + * @adapter: private adapter structure + * + * Enable or disable a clock output signal on SDP 0 for X550 hardware. + * + * Use the target time feature to align the output signal on the next full + * second. + * + * This works by using the cycle counter shift and mult values in reverse, and + * assumes that the values we're shifting will not overflow. + */ +static void ixgbe_ptp_setup_sdp_X550(struct ixgbe_adapter *adapter) +{ + u32 esdp, tsauxc, freqout, trgttiml, trgttimh, rem, tssdp; + struct cyclecounter *cc = &adapter->hw_cc; + struct ixgbe_hw *hw = &adapter->hw; + u64 ns = 0, clock_edge = 0; + struct timespec64 ts; + unsigned long flags; + + /* disable the pin first */ + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, 0x0); + IXGBE_WRITE_FLUSH(hw); + + if (!(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED)) + return; + + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + + /* enable the SDP0 pin as output, and connected to the + * native function for Timesync (ClockOut) + */ + esdp |= IXGBE_ESDP_SDP0_DIR | + IXGBE_ESDP_SDP0_NATIVE; + + /* enable the Clock Out feature on SDP0, and use Target Time 0 to + * enable generation of interrupts on the clock change. + */ +#define IXGBE_TSAUXC_DIS_TS_CLEAR 0x40000000 + tsauxc = (IXGBE_TSAUXC_EN_CLK | IXGBE_TSAUXC_ST0 | + IXGBE_TSAUXC_EN_TT0 | IXGBE_TSAUXC_SDP0_INT | + IXGBE_TSAUXC_DIS_TS_CLEAR); + + tssdp = (IXGBE_TSSDP_TS_SDP0_EN | + IXGBE_TSSDP_TS_SDP0_CLK0); + + /* Determine the clock time period to use. This assumes that the + * cycle counter shift is small enough to avoid overflowing a 32bit + * value. + */ + freqout = div_u64(NS_PER_HALF_SEC << cc->shift, cc->mult); + + /* Read the current clock time, and save the cycle counter value */ + spin_lock_irqsave(&adapter->tmreg_lock, flags); + ns = timecounter_read(&adapter->hw_tc); + clock_edge = adapter->hw_tc.cycle_last; + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + /* Figure out how far past the next second we are */ + div_u64_rem(ns, NS_PER_SEC, &rem); + + /* Figure out how many nanoseconds to add to round the clock edge up + * to the next full second + */ + rem = (NS_PER_SEC - rem); + + /* Adjust the clock edge to align with the next full second. */ + clock_edge += div_u64(((u64)rem << cc->shift), cc->mult); + + /* X550 hardware stores the time in 32bits of 'billions of cycles' and + * 32bits of 'cycles'. There's no guarantee that cycles represents + * nanoseconds. However, we can use the math from a timespec64 to + * convert into the hardware representation. + * + * See ixgbe_ptp_read_X550() for more details. + */ + ts = ns_to_timespec64(clock_edge); + trgttiml = (u32)ts.tv_nsec; + trgttimh = (u32)ts.tv_sec; + + IXGBE_WRITE_REG(hw, IXGBE_FREQOUT0, freqout); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIML0, trgttiml); + IXGBE_WRITE_REG(hw, IXGBE_TRGTTIMH0, trgttimh); + + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_REG(hw, IXGBE_TSSDP, tssdp); + IXGBE_WRITE_REG(hw, IXGBE_TSAUXC, tsauxc); + + IXGBE_WRITE_FLUSH(hw); +} + +/** * ixgbe_ptp_read_X550 - read cycle counter value - * @hw_cc: cyclecounter structure + * @cc: cyclecounter structure * * This function reads SYSTIME registers. It is called by the cyclecounter * structure to convert from internal representation into nanoseconds. We need @@ -221,10 +323,10 @@ static void ixgbe_ptp_setup_sdp_x540(struct ixgbe_adapter *adapter) * result of SYSTIME is 32bits of "billions of cycles" and 32 bits of * "cycles", rather than seconds and nanoseconds. */ -static u64 ixgbe_ptp_read_X550(const struct cyclecounter *hw_cc) +static u64 ixgbe_ptp_read_X550(const struct cyclecounter *cc) { struct ixgbe_adapter *adapter = - container_of(hw_cc, struct ixgbe_adapter, hw_cc); + container_of(cc, struct ixgbe_adapter, hw_cc); struct ixgbe_hw *hw = &adapter->hw; struct timespec64 ts; @@ -838,6 +940,15 @@ void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *q_vector, ixgbe_ptp_convert_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); } +/** + * ixgbe_ptp_get_ts_config - get current hardware timestamping configuration + * @adapter: pointer to adapter structure + * @ifr: ioctl data + * + * This function returns the current timestamping settings. Rather than + * attempt to deconstruct registers to fill in the values, simply keep a copy + * of the old settings around, and return a copy when requested. + */ int ixgbe_ptp_get_ts_config(struct ixgbe_adapter *adapter, struct ifreq *ifr) { struct hwtstamp_config *config = &adapter->tstamp_config; @@ -1253,7 +1364,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; - adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_x540; + adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X540; break; case ixgbe_mac_82599EB: snprintf(adapter->ptp_caps.name, @@ -1280,13 +1391,13 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_caps.n_alarm = 0; adapter->ptp_caps.n_ext_ts = 0; adapter->ptp_caps.n_per_out = 0; - adapter->ptp_caps.pps = 0; + adapter->ptp_caps.pps = 1; adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550; adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex; adapter->ptp_caps.settime64 = ixgbe_ptp_settime; adapter->ptp_caps.enable = ixgbe_ptp_feature_enable; - adapter->ptp_setup_sdp = NULL; + adapter->ptp_setup_sdp = ixgbe_ptp_setup_sdp_X550; break; default: adapter->ptp_clock = NULL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 345701af7749..537dfff585e0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -1645,7 +1645,7 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting) IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_LLDP), (IXGBE_ETQF_FILTER_EN | IXGBE_ETQF_TX_ANTISPOOF | - IXGBE_ETH_P_LLDP)); + ETH_P_LLDP)); IXGBE_WRITE_REG(hw, IXGBE_ETQF(IXGBE_ETQF_FILTER_FC), (IXGBE_ETQF_FILTER_EN | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 84f2dba39e36..2be1c4c72435 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1067,6 +1067,7 @@ struct ixgbe_nvm_version { #define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ #define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ #define IXGBE_TSIM 0x08C68 /* TimeSync Interrupt Mask Register - RW */ +#define IXGBE_TSSDP 0x0003C /* TimeSync SDP Configuration Register - RW */ /* Diagnostic Registers */ #define IXGBE_RDSTATCTL 0x02C20 @@ -2240,11 +2241,18 @@ enum { #define IXGBE_RXDCTL_RLPML_EN 0x00008000 #define IXGBE_RXDCTL_VME 0x40000000 /* VLAN mode enable */ -#define IXGBE_TSAUXC_EN_CLK 0x00000004 -#define IXGBE_TSAUXC_SYNCLK 0x00000008 -#define IXGBE_TSAUXC_SDP0_INT 0x00000040 +#define IXGBE_TSAUXC_EN_CLK 0x00000004 +#define IXGBE_TSAUXC_SYNCLK 0x00000008 +#define IXGBE_TSAUXC_SDP0_INT 0x00000040 +#define IXGBE_TSAUXC_EN_TT0 0x00000001 +#define IXGBE_TSAUXC_EN_TT1 0x00000002 +#define IXGBE_TSAUXC_ST0 0x00000010 #define IXGBE_TSAUXC_DISABLE_SYSTIME 0x80000000 +#define IXGBE_TSSDP_TS_SDP0_SEL_MASK 0x000000C0 +#define IXGBE_TSSDP_TS_SDP0_CLK0 0x00000080 +#define IXGBE_TSSDP_TS_SDP0_EN 0x00000100 + #define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index bfe95ce0bd7f..6b609553329f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -14,57 +14,10 @@ struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter, bool xdp_on = READ_ONCE(adapter->xdp_prog); int qid = ring->ring_idx; - if (!adapter->xsk_umems || !adapter->xsk_umems[qid] || - qid >= adapter->num_xsk_umems || !xdp_on) + if (!xdp_on || !test_bit(qid, adapter->af_xdp_zc_qps)) return NULL; - return adapter->xsk_umems[qid]; -} - -static int ixgbe_alloc_xsk_umems(struct ixgbe_adapter *adapter) -{ - if (adapter->xsk_umems) - return 0; - - adapter->num_xsk_umems_used = 0; - adapter->num_xsk_umems = adapter->num_rx_queues; - adapter->xsk_umems = kcalloc(adapter->num_xsk_umems, - sizeof(*adapter->xsk_umems), - GFP_KERNEL); - if (!adapter->xsk_umems) { - adapter->num_xsk_umems = 0; - return -ENOMEM; - } - - return 0; -} - -static int ixgbe_add_xsk_umem(struct ixgbe_adapter *adapter, - struct xdp_umem *umem, - u16 qid) -{ - int err; - - err = ixgbe_alloc_xsk_umems(adapter); - if (err) - return err; - - adapter->xsk_umems[qid] = umem; - adapter->num_xsk_umems_used++; - - return 0; -} - -static void ixgbe_remove_xsk_umem(struct ixgbe_adapter *adapter, u16 qid) -{ - adapter->xsk_umems[qid] = NULL; - adapter->num_xsk_umems_used--; - - if (adapter->num_xsk_umems == 0) { - kfree(adapter->xsk_umems); - adapter->xsk_umems = NULL; - adapter->num_xsk_umems = 0; - } + return xdp_get_umem_from_qid(adapter->netdev, qid); } static int ixgbe_xsk_umem_dma_map(struct ixgbe_adapter *adapter, @@ -113,6 +66,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter, struct xdp_umem *umem, u16 qid) { + struct net_device *netdev = adapter->netdev; struct xdp_umem_fq_reuse *reuseq; bool if_running; int err; @@ -120,12 +74,9 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter, if (qid >= adapter->num_rx_queues) return -EINVAL; - if (adapter->xsk_umems) { - if (qid >= adapter->num_xsk_umems) - return -EINVAL; - if (adapter->xsk_umems[qid]) - return -EBUSY; - } + if (qid >= netdev->real_num_rx_queues || + qid >= netdev->real_num_tx_queues) + return -EINVAL; reuseq = xsk_reuseq_prepare(adapter->rx_ring[0]->count); if (!reuseq) @@ -138,14 +89,12 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter, return err; if_running = netif_running(adapter->netdev) && - READ_ONCE(adapter->xdp_prog); + ixgbe_enabled_xdp_adapter(adapter); if (if_running) ixgbe_txrx_ring_disable(adapter, qid); - err = ixgbe_add_xsk_umem(adapter, umem, qid); - if (err) - return err; + set_bit(qid, adapter->af_xdp_zc_qps); if (if_running) { ixgbe_txrx_ring_enable(adapter, qid); @@ -161,20 +110,21 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter, static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid) { + struct xdp_umem *umem; bool if_running; - if (!adapter->xsk_umems || qid >= adapter->num_xsk_umems || - !adapter->xsk_umems[qid]) + umem = xdp_get_umem_from_qid(adapter->netdev, qid); + if (!umem) return -EINVAL; if_running = netif_running(adapter->netdev) && - READ_ONCE(adapter->xdp_prog); + ixgbe_enabled_xdp_adapter(adapter); if (if_running) ixgbe_txrx_ring_disable(adapter, qid); - ixgbe_xsk_umem_dma_unmap(adapter, adapter->xsk_umems[qid]); - ixgbe_remove_xsk_umem(adapter, qid); + clear_bit(qid, adapter->af_xdp_zc_qps); + ixgbe_xsk_umem_dma_unmap(adapter, umem); if (if_running) ixgbe_txrx_ring_enable(adapter, qid); @@ -621,8 +571,9 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) union ixgbe_adv_tx_desc *tx_desc = NULL; struct ixgbe_tx_buffer *tx_bi; bool work_done = true; - u32 len, cmd_type; + struct xdp_desc desc; dma_addr_t dma; + u32 cmd_type; while (budget-- > 0) { if (unlikely(!ixgbe_desc_unused(xdp_ring)) || @@ -631,15 +582,18 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) break; } - if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &dma, &len)) + if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc)) break; - dma_sync_single_for_device(xdp_ring->dev, dma, len, + dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr); + + dma_sync_single_for_device(xdp_ring->dev, dma, desc.len, DMA_BIDIRECTIONAL); tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use]; - tx_bi->bytecount = len; + tx_bi->bytecount = desc.len; tx_bi->xdpf = NULL; + tx_bi->gso_segs = 1; tx_desc = IXGBE_TX_DESC(xdp_ring, xdp_ring->next_to_use); tx_desc->read.buffer_addr = cpu_to_le64(dma); @@ -648,10 +602,10 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) cmd_type = IXGBE_ADVTXD_DTYP_DATA | IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DCMD_IFCS; - cmd_type |= len | IXGBE_TXD_CMD; + cmd_type |= desc.len | IXGBE_TXD_CMD; tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); tx_desc->read.olinfo_status = - cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT); + cpu_to_le32(desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT); xdp_ring->next_to_use++; if (xdp_ring->next_to_use == xdp_ring->count) @@ -704,7 +658,6 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector, xsk_frames++; tx_bi->xdpf = NULL; - total_bytes += tx_bi->bytecount; tx_bi++; tx_desc++; @@ -753,7 +706,7 @@ int ixgbe_xsk_async_xmit(struct net_device *dev, u32 qid) if (qid >= adapter->num_xdp_queues) return -ENXIO; - if (!adapter->xsk_umems || !adapter->xsk_umems[qid]) + if (!adapter->xdp_ring[qid]->xsk_umem) return -ENXIO; ring = adapter->xdp_ring[qid]; diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 5399787e07af..54459b69c948 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -85,22 +85,16 @@ static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); - struct ixgbe_hw *hw = &adapter->hw; - u32 link_speed = 0; - bool link_up; ethtool_link_ksettings_zero_link_mode(cmd, supported); ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full); cmd->base.autoneg = AUTONEG_DISABLE; cmd->base.port = -1; - hw->mac.get_link_status = 1; - hw->mac.ops.check_link(hw, &link_speed, &link_up, false); - - if (link_up) { + if (adapter->link_up) { __u32 speed = SPEED_10000; - switch (link_speed) { + switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: speed = SPEED_10000; break; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d189ed247665..d2b41f9f87f8 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1423,6 +1423,9 @@ static void ixgbevf_update_itr(struct ixgbevf_q_vector *q_vector, */ /* what was last interrupt timeslice? */ timepassed_us = q_vector->itr >> 2; + if (timepassed_us == 0) + return; + bytes_perint = bytes / timepassed_us; /* bytes/usec */ switch (itr_setting) { diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index cd3b81300cc7..d5ce49636548 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -508,9 +508,8 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw, vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr); } - ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, IXGBE_VFMAILBOX_SIZE); - - return 0; + return ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + IXGBE_VFMAILBOX_SIZE); } /** diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index c5dac6bd2be4..f660cc2b8258 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -64,7 +64,7 @@ struct orion_mdio_dev { void __iomem *regs; - struct clk *clk[3]; + struct clk *clk[4]; /* * If we have access to the error interrupt pin (which is * somewhat misnamed as it not only reflects internal errors @@ -321,11 +321,19 @@ static int orion_mdio_probe(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(dev->clk); i++) { dev->clk[i] = of_clk_get(pdev->dev.of_node, i); + if (PTR_ERR(dev->clk[i]) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto out_clk; + } if (IS_ERR(dev->clk[i])) break; clk_prepare_enable(dev->clk[i]); } + if (!IS_ERR(of_clk_get(pdev->dev.of_node, ARRAY_SIZE(dev->clk)))) + dev_warn(&pdev->dev, "unsupported number of clocks, limiting to the first " + __stringify(ARRAY_SIZE(dev->clk)) "\n"); + dev->err_interrupt = platform_get_irq(pdev, 0); if (dev->err_interrupt > 0 && resource_size(r) < MVMDIO_ERR_INT_MASK + 4) { @@ -362,6 +370,7 @@ out_mdio: if (dev->err_interrupt > 0) writel(0, dev->regs + MVMDIO_ERR_INT_MASK); +out_clk: for (i = 0; i < ARRAY_SIZE(dev->clk); i++) { if (IS_ERR(dev->clk[i])) break; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 269bd73be1a0..895bfed26a8a 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -437,6 +437,7 @@ struct mvneta_port { struct device_node *dn; unsigned int tx_csum_limit; struct phylink *phylink; + struct phylink_config phylink_config; struct phy *comphy; struct mvneta_bm *bm_priv; @@ -1118,7 +1119,7 @@ static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu) SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size)); /* Fill entire long pool */ - num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC); + num = hwbm_pool_add(hwbm_pool, hwbm_pool->size); if (num != hwbm_pool->size) { WARN(1, "pool %d: %d of %d allocated\n", bm_pool->id, num, hwbm_pool->size); @@ -3356,9 +3357,11 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr) return 0; } -static void mvneta_validate(struct net_device *ndev, unsigned long *supported, +static void mvneta_validate(struct phylink_config *config, + unsigned long *supported, struct phylink_link_state *state) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; @@ -3408,9 +3411,10 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported, phylink_helper_basex_speed(state); } -static int mvneta_mac_link_state(struct net_device *ndev, +static int mvneta_mac_link_state(struct phylink_config *config, struct phylink_link_state *state) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 gmac_stat; @@ -3438,8 +3442,9 @@ static int mvneta_mac_link_state(struct net_device *ndev, return 1; } -static void mvneta_mac_an_restart(struct net_device *ndev) +static void mvneta_mac_an_restart(struct phylink_config *config) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); @@ -3449,9 +3454,10 @@ static void mvneta_mac_an_restart(struct net_device *ndev) gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN); } -static void mvneta_mac_config(struct net_device *ndev, unsigned int mode, - const struct phylink_link_state *state) +static void mvneta_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0); u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2); @@ -3581,9 +3587,10 @@ static void mvneta_set_eee(struct mvneta_port *pp, bool enable) mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi_ctl1); } -static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode, - phy_interface_t interface) +static void mvneta_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 val; @@ -3600,10 +3607,11 @@ static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode, mvneta_set_eee(pp, false); } -static void mvneta_mac_link_up(struct net_device *ndev, unsigned int mode, +static void mvneta_mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy) { + struct net_device *ndev = to_net_dev(config->dev); struct mvneta_port *pp = netdev_priv(ndev); u32 val; @@ -4500,8 +4508,14 @@ static int mvneta_probe(struct platform_device *pdev) comphy = NULL; } - phylink = phylink_create(dev, pdev->dev.fwnode, phy_mode, - &mvneta_phylink_ops); + pp = netdev_priv(dev); + spin_lock_init(&pp->lock); + + pp->phylink_config.dev = &dev->dev; + pp->phylink_config.type = PHYLINK_NETDEV; + + phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode, + phy_mode, &mvneta_phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); goto err_free_irq; @@ -4513,8 +4527,6 @@ static int mvneta_probe(struct platform_device *pdev) dev->ethtool_ops = &mvneta_eth_tool_ops; - pp = netdev_priv(dev); - spin_lock_init(&pp->lock); pp->phylink = phylink; pp->comphy = comphy; pp->phy_interface = phy_mode; diff --git a/drivers/net/ethernet/marvell/mvneta_bm.c b/drivers/net/ethernet/marvell/mvneta_bm.c index de468e1bdba9..82ee2bcca6fd 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.c +++ b/drivers/net/ethernet/marvell/mvneta_bm.c @@ -190,7 +190,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id, SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); hwbm_pool->construct = mvneta_bm_construct; hwbm_pool->priv = new_pool; - spin_lock_init(&hwbm_pool->lock); + mutex_init(&hwbm_pool->buf_lock); /* Create new pool */ err = mvneta_bm_pool_create(priv, new_pool); @@ -201,7 +201,7 @@ struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id, } /* Allocate buffers for this pool */ - num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC); + num = hwbm_pool_add(hwbm_pool, hwbm_pool->size); if (num != hwbm_pool->size) { WARN(1, "pool %d: %d of %d allocated\n", new_pool->id, num, hwbm_pool->size); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index 6171270a016c..4d9564ba68f6 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -148,6 +148,8 @@ #define MVPP22_CLS_C2_ATTR2 0x1b6c #define MVPP22_CLS_C2_ATTR2_RSS_EN BIT(30) #define MVPP22_CLS_C2_ATTR3 0x1b70 +#define MVPP22_CLS_C2_TCAM_CTRL 0x1b90 +#define MVPP22_CLS_C2_TCAM_BYPASS_FIFO BIT(0) /* Descriptor Manager Top Registers */ #define MVPP2_RXQ_NUM_REG 0x2040 @@ -327,8 +329,26 @@ #define MVPP22_BM_ADDR_HIGH_VIRT_RLS_MASK 0xff00 #define MVPP22_BM_ADDR_HIGH_VIRT_RLS_SHIFT 8 +/* Packet Processor per-port counters */ +#define MVPP2_OVERRUN_ETH_DROP 0x7000 +#define MVPP2_CLS_ETH_DROP 0x7020 + /* Hit counters registers */ #define MVPP2_CTRS_IDX 0x7040 +#define MVPP22_CTRS_TX_CTR(port, txq) ((txq) | ((port) << 3) | BIT(7)) +#define MVPP2_TX_DESC_ENQ_CTR 0x7100 +#define MVPP2_TX_DESC_ENQ_TO_DDR_CTR 0x7104 +#define MVPP2_TX_BUFF_ENQ_TO_DDR_CTR 0x7108 +#define MVPP2_TX_DESC_ENQ_HW_FWD_CTR 0x710c +#define MVPP2_RX_DESC_ENQ_CTR 0x7120 +#define MVPP2_TX_PKTS_DEQ_CTR 0x7130 +#define MVPP2_TX_PKTS_FULL_QUEUE_DROP_CTR 0x7200 +#define MVPP2_TX_PKTS_EARLY_DROP_CTR 0x7204 +#define MVPP2_TX_PKTS_BM_DROP_CTR 0x7208 +#define MVPP2_TX_PKTS_BM_MC_DROP_CTR 0x720c +#define MVPP2_RX_PKTS_FULL_QUEUE_DROP_CTR 0x7220 +#define MVPP2_RX_PKTS_EARLY_DROP_CTR 0x7224 +#define MVPP2_RX_PKTS_BM_DROP_CTR 0x7228 #define MVPP2_CLS_DEC_TBL_HIT_CTR 0x7700 #define MVPP2_CLS_FLOW_TBL_HIT_CTR 0x7704 @@ -624,6 +644,7 @@ #define MVPP2_N_RFS_RULES (MVPP2_N_RFS_ENTRIES_PER_FLOW * 7) /* RSS constants */ +#define MVPP22_N_RSS_TABLES 8 #define MVPP22_RSS_TABLE_ENTRIES 32 /* IPv6 max L3 address size */ @@ -725,6 +746,10 @@ enum mvpp2_prs_l3_cast { /* Definitions */ struct mvpp2_dbgfs_entries; +struct mvpp2_rss_table { + u32 indir[MVPP22_RSS_TABLE_ENTRIES]; +}; + /* Shared Packet Processor resources */ struct mvpp2 { /* Shared registers' base addresses */ @@ -788,6 +813,9 @@ struct mvpp2 { /* Debugfs entries private data */ struct mvpp2_dbgfs_entries *dbgfs_entries; + + /* RSS Indirection tables */ + struct mvpp2_rss_table *rss_tables[MVPP22_N_RSS_TABLES]; }; struct mvpp2_pcpu_stats { @@ -905,6 +933,7 @@ struct mvpp2_port { phy_interface_t phy_interface; struct phylink *phylink; + struct phylink_config phylink_config; struct phy *comphy; struct mvpp2_bm_pool *pool_long; @@ -919,12 +948,14 @@ struct mvpp2_port { u32 tx_time_coal; - /* RSS indirection table */ - u32 indir[MVPP22_RSS_TABLE_ENTRIES]; - /* List of steering rules active on that port */ - struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_RULES]; + struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_ENTRIES_PER_FLOW]; int n_rfs_rules; + + /* Each port has its own view of the rss contexts, so that it can number + * them from 0 + */ + int rss_ctx[MVPP22_N_RSS_TABLES]; }; /* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index a57d17ab91f0..35478cba2aa5 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -44,17 +44,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* TCP over IPv4 flows, Not fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG, - MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG, - MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_NF_TAG, - MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), @@ -79,17 +79,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* TCP over IPv4 flows, fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP4, MVPP2_FL_IP4_TCP_FRAG_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), @@ -114,17 +114,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* UDP over IPv4 flows, Not fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG, - MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG, - MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_NF_TAG, - MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), @@ -149,17 +149,17 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* UDP over IPv4 flows, fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4 | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OPT | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP4, MVPP2_FL_IP4_UDP_FRAG_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OTHER | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), @@ -178,12 +178,12 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* TCP over IPv6 flows, not fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_TAG, - MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_NF_TAG, - MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), @@ -202,13 +202,13 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* TCP over IPv6 flows, fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_TAG, - MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_TCP6, MVPP2_FL_IP6_TCP_FRAG_TAG, - MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_TCP, MVPP2_PRS_IP_MASK), @@ -228,12 +228,12 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* UDP over IPv6 flows, not fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_TAG, - MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_NF_TAG, - MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_5T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), @@ -252,13 +252,13 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* UDP over IPv6 flows, fragmented, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_TAG, - MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6 | MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_UDP6, MVPP2_FL_IP6_UDP_FRAG_TAG, - MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6_EXT | MVPP2_PRS_RI_IP_FRAG_TRUE | MVPP2_PRS_RI_L4_UDP, MVPP2_PRS_IP_MASK), @@ -279,15 +279,15 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* IPv4 flows, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4, MVPP2_PRS_RI_L3_PROTO_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OPT, MVPP2_PRS_RI_L3_PROTO_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_IP4, MVPP2_FL_IP4_TAG, - MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP4_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP4_OTHER, MVPP2_PRS_RI_L3_PROTO_MASK), @@ -303,11 +303,11 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { /* IPv6 flows, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_TAG, - MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6, MVPP2_PRS_RI_L3_PROTO_MASK), MVPP2_DEF_FLOW(MVPP22_FLOW_IP6, MVPP2_FL_IP6_TAG, - MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_IP6_2T | MVPP22_CLS_HEK_TAGGED, MVPP2_PRS_RI_L3_IP6, MVPP2_PRS_RI_L3_PROTO_MASK), @@ -548,6 +548,8 @@ void mvpp2_cls_c2_read(struct mvpp2 *priv, int index, static int mvpp2_cls_ethtool_flow_to_type(int flow_type) { switch (flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case ETHER_FLOW: + return MVPP22_FLOW_ETHERNET; case TCP_V4_FLOW: return MVPP22_FLOW_TCP4; case TCP_V6_FLOW: @@ -596,7 +598,7 @@ static void mvpp2_cls_flow_init(struct mvpp2 *priv, mvpp2_cls_flow_eng_set(&fe, MVPP22_CLS_ENGINE_C2); mvpp2_cls_flow_port_id_sel(&fe, true); - mvpp2_cls_flow_lu_type_set(&fe, MVPP22_FLOW_ETHERNET); + mvpp2_cls_flow_lu_type_set(&fe, MVPP22_CLS_LU_TYPE_ALL); /* Add all ports */ for (i = 0; i < MVPP2_MAX_PORTS; i++) @@ -655,6 +657,9 @@ static int mvpp2_flow_set_hek_fields(struct mvpp2_cls_flow_entry *fe, case MVPP22_CLS_HEK_OPT_VLAN: field_id = MVPP22_CLS_FIELD_VLAN; break; + case MVPP22_CLS_HEK_OPT_VLAN_PRI: + field_id = MVPP22_CLS_FIELD_VLAN_PRI; + break; case MVPP22_CLS_HEK_OPT_IP4SA: field_id = MVPP22_CLS_FIELD_IP4SA; break; @@ -689,6 +694,10 @@ static int mvpp2_cls_hek_field_size(u32 field) switch (field) { case MVPP22_CLS_HEK_OPT_MAC_DA: return 48; + case MVPP22_CLS_HEK_OPT_VLAN: + return 12; + case MVPP22_CLS_HEK_OPT_VLAN_PRI: + return 3; case MVPP22_CLS_HEK_OPT_IP4SA: case MVPP22_CLS_HEK_OPT_IP4DA: return 32; @@ -777,6 +786,9 @@ u16 mvpp2_flow_get_hek_fields(struct mvpp2_cls_flow_entry *fe) case MVPP22_CLS_FIELD_VLAN: hash_opts |= MVPP22_CLS_HEK_OPT_VLAN; break; + case MVPP22_CLS_FIELD_VLAN_PRI: + hash_opts |= MVPP22_CLS_HEK_OPT_VLAN_PRI; + break; case MVPP22_CLS_FIELD_L3_PROTO: hash_opts |= MVPP22_CLS_HEK_OPT_L3_PROTO; break; @@ -861,7 +873,7 @@ static void mvpp2_port_c2_cls_init(struct mvpp2_port *port) /* Match on Lookup Type */ c2.tcam[4] |= MVPP22_CLS_C2_TCAM_EN(MVPP22_CLS_C2_LU_TYPE(MVPP2_CLS_LU_TYPE_MASK)); - c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP22_FLOW_ETHERNET); + c2.tcam[4] |= MVPP22_CLS_C2_LU_TYPE(MVPP22_CLS_LU_TYPE_ALL); /* Update RSS status after matching this entry */ c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK); @@ -923,6 +935,12 @@ void mvpp2_cls_init(struct mvpp2 *priv) mvpp2_cls_c2_write(priv, &c2); } + /* Disable the FIFO stages in C2 engine, which are only used in BIST + * mode + */ + mvpp2_write(priv, MVPP22_CLS_C2_TCAM_CTRL, + MVPP22_CLS_C2_TCAM_BYPASS_FIFO); + mvpp2_cls_port_init_flows(priv); } @@ -963,12 +981,22 @@ u32 mvpp2_cls_c2_hit_count(struct mvpp2 *priv, int c2_index) return mvpp2_read(priv, MVPP22_CLS_C2_HIT_CTR); } -static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port) +static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port, u32 ctx) { struct mvpp2_cls_c2_entry c2; + u8 qh, ql; mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2); + /* The RxQ number is used to select the RSS table. It that case, we set + * it to be the ctx number. + */ + qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK; + ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK; + + c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) | + MVPP22_CLS_C2_ATTR0_QLOW(ql); + c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN; mvpp2_cls_c2_write(port->priv, &c2); @@ -977,22 +1005,45 @@ static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port) static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port) { struct mvpp2_cls_c2_entry c2; + u8 qh, ql; mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2); + /* Reset the default destination RxQ to the port's first rx queue. */ + qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK; + ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK; + + c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) | + MVPP22_CLS_C2_ATTR0_QLOW(ql); + c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN; mvpp2_cls_c2_write(port->priv, &c2); } -void mvpp22_port_rss_enable(struct mvpp2_port *port) +static inline int mvpp22_rss_ctx(struct mvpp2_port *port, int port_rss_ctx) +{ + return port->rss_ctx[port_rss_ctx]; +} + +int mvpp22_port_rss_enable(struct mvpp2_port *port) { - mvpp2_rss_port_c2_enable(port); + if (mvpp22_rss_ctx(port, 0) < 0) + return -EINVAL; + + mvpp2_rss_port_c2_enable(port, mvpp22_rss_ctx(port, 0)); + + return 0; } -void mvpp22_port_rss_disable(struct mvpp2_port *port) +int mvpp22_port_rss_disable(struct mvpp2_port *port) { + if (mvpp22_rss_ctx(port, 0) < 0) + return -EINVAL; + mvpp2_rss_port_c2_disable(port); + + return 0; } static void mvpp22_port_c2_lookup_disable(struct mvpp2_port *port, int entry) @@ -1029,7 +1080,7 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port, struct flow_action_entry *act; struct mvpp2_cls_c2_entry c2; u8 qh, ql, pmap; - int index; + int index, ctx; memset(&c2, 0, sizeof(c2)); @@ -1042,13 +1093,13 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port, rule->c2_index = c2.index; - c2.tcam[0] = (rule->c2_tcam & 0xffff) | + c2.tcam[3] = (rule->c2_tcam & 0xffff) | ((rule->c2_tcam_mask & 0xffff) << 16); - c2.tcam[1] = ((rule->c2_tcam >> 16) & 0xffff) | + c2.tcam[2] = ((rule->c2_tcam >> 16) & 0xffff) | (((rule->c2_tcam_mask >> 16) & 0xffff) << 16); - c2.tcam[2] = ((rule->c2_tcam >> 32) & 0xffff) | + c2.tcam[1] = ((rule->c2_tcam >> 32) & 0xffff) | (((rule->c2_tcam_mask >> 32) & 0xffff) << 16); - c2.tcam[3] = ((rule->c2_tcam >> 48) & 0xffff) | + c2.tcam[0] = ((rule->c2_tcam >> 48) & 0xffff) | (((rule->c2_tcam_mask >> 48) & 0xffff) << 16); pmap = BIT(port->id); @@ -1069,14 +1120,36 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port, */ c2.act = MVPP22_CLS_C2_ACT_COLOR(MVPP22_C2_COL_NO_UPD_LOCK); + /* Update RSS status after matching this entry */ + if (act->queue.ctx) + c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN; + + /* Always lock the RSS_EN decision. We might have high prio + * rules steering to an RXQ, and a lower one steering to RSS, + * we don't want the low prio RSS rule overwriting this flag. + */ + c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK); + /* Mark packet as "forwarded to software", needed for RSS */ c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK); c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD_LOCK) | MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD_LOCK); - qh = ((act->queue.index + port->first_rxq) >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK; - ql = (act->queue.index + port->first_rxq) & MVPP22_CLS_C2_ATTR0_QLOW_MASK; + if (act->queue.ctx) { + /* Get the global ctx number */ + ctx = mvpp22_rss_ctx(port, act->queue.ctx); + if (ctx < 0) + return -EINVAL; + + qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK; + ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK; + } else { + qh = ((act->queue.index + port->first_rxq) >> 3) & + MVPP22_CLS_C2_ATTR0_QHIGH_MASK; + ql = (act->queue.index + port->first_rxq) & + MVPP22_CLS_C2_ATTR0_QLOW_MASK; + } c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) | MVPP22_CLS_C2_ATTR0_QLOW(ql); @@ -1140,6 +1213,9 @@ static int mvpp2_port_flt_rfs_rule_insert(struct mvpp2_port *port, if (!flow) return 0; + if ((rule->hek_fields & flow->supported_hash_opts) != rule->hek_fields) + continue; + index = MVPP2_CLS_FLT_C2_RFS(port->id, flow->flow_id, rule->loc); mvpp2_cls_flow_read(priv, index, &fe); @@ -1158,7 +1234,44 @@ static int mvpp2_port_flt_rfs_rule_insert(struct mvpp2_port *port, static int mvpp2_cls_c2_build_match(struct mvpp2_rfs_rule *rule) { struct flow_rule *flow = rule->flow; - int offs = 64; + int offs = 0; + + /* The order of insertion in C2 tcam must match the order in which + * the fields are found in the header + */ + if (flow_rule_match_key(flow, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(flow, &match); + if (match.mask->vlan_id) { + rule->hek_fields |= MVPP22_CLS_HEK_OPT_VLAN; + + rule->c2_tcam |= ((u64)match.key->vlan_id) << offs; + rule->c2_tcam_mask |= ((u64)match.mask->vlan_id) << offs; + + /* Don't update the offset yet */ + } + + if (match.mask->vlan_priority) { + rule->hek_fields |= MVPP22_CLS_HEK_OPT_VLAN_PRI; + + /* VLAN pri is always at offset 13 relative to the + * current offset + */ + rule->c2_tcam |= ((u64)match.key->vlan_priority) << + (offs + 13); + rule->c2_tcam_mask |= ((u64)match.mask->vlan_priority) << + (offs + 13); + } + + if (match.mask->vlan_dei) + return -EOPNOTSUPP; + + /* vlan id and prio always seem to take a full 16-bit slot in + * the Header Extracted Key. + */ + offs += 16; + } if (flow_rule_match_key(flow, FLOW_DISSECTOR_KEY_PORTS)) { struct flow_match_ports match; @@ -1166,18 +1279,18 @@ static int mvpp2_cls_c2_build_match(struct mvpp2_rfs_rule *rule) flow_rule_match_ports(flow, &match); if (match.mask->src) { rule->hek_fields |= MVPP22_CLS_HEK_OPT_L4SIP; - offs -= mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4SIP); rule->c2_tcam |= ((u64)ntohs(match.key->src)) << offs; rule->c2_tcam_mask |= ((u64)ntohs(match.mask->src)) << offs; + offs += mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4SIP); } if (match.mask->dst) { rule->hek_fields |= MVPP22_CLS_HEK_OPT_L4DIP; - offs -= mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4DIP); rule->c2_tcam |= ((u64)ntohs(match.key->dst)) << offs; rule->c2_tcam_mask |= ((u64)ntohs(match.mask->dst)) << offs; + offs += mvpp2_cls_hek_field_size(MVPP22_CLS_HEK_OPT_L4DIP); } } @@ -1196,6 +1309,13 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule) if (act->id != FLOW_ACTION_QUEUE && act->id != FLOW_ACTION_DROP) return -EOPNOTSUPP; + /* When both an RSS context and an queue index are set, the index + * is considered as an offset to be added to the indirection table + * entries. We don't support this, so reject this rule. + */ + if (act->queue.ctx && act->queue.index) + return -EOPNOTSUPP; + /* For now, only use the C2 engine which has a HEK size limited to 64 * bits for TCAM matching. */ @@ -1212,7 +1332,7 @@ int mvpp2_ethtool_cls_rule_get(struct mvpp2_port *port, { struct mvpp2_ethtool_fs *efs; - if (rxnfc->fs.location >= MVPP2_N_RFS_RULES) + if (rxnfc->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW) return -EINVAL; efs = port->rfs_rules[rxnfc->fs.location]; @@ -1232,8 +1352,7 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port, struct mvpp2_ethtool_fs *efs, *old_efs; int ret = 0; - if (info->fs.location >= 4 || - info->fs.location < 0) + if (info->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW) return -EINVAL; efs = kzalloc(sizeof(*efs), GFP_KERNEL); @@ -1242,6 +1361,12 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port, input.fs = &info->fs; + /* We need to manually set the rss_ctx, since this info isn't present + * in info->fs + */ + if (info->fs.flow_type & FLOW_RSS) + input.rss_ctx = info->rss_context; + ethtool_rule = ethtool_rx_flow_rule_create(&input); if (IS_ERR(ethtool_rule)) { ret = PTR_ERR(ethtool_rule); @@ -1250,6 +1375,10 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port, efs->rule.flow = ethtool_rule->rule; efs->rule.flow_type = mvpp2_cls_ethtool_flow_to_type(info->fs.flow_type); + if (efs->rule.flow_type < 0) { + ret = efs->rule.flow_type; + goto clean_rule; + } ret = mvpp2_cls_rfs_parse_rule(&efs->rule); if (ret) @@ -1328,19 +1457,160 @@ static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq) return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs); } -void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table) +static void mvpp22_rss_fill_table(struct mvpp2_port *port, + struct mvpp2_rss_table *table, + u32 rss_ctx) { struct mvpp2 *priv = port->priv; int i; for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) { - u32 sel = MVPP22_RSS_INDEX_TABLE(table) | + u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) | MVPP22_RSS_INDEX_TABLE_ENTRY(i); mvpp2_write(priv, MVPP22_RSS_INDEX, sel); mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY, - mvpp22_rxfh_indir(port, port->indir[i])); + mvpp22_rxfh_indir(port, table->indir[i])); + } +} + +static int mvpp22_rss_context_create(struct mvpp2_port *port, u32 *rss_ctx) +{ + struct mvpp2 *priv = port->priv; + u32 ctx; + + /* Find the first free RSS table */ + for (ctx = 0; ctx < MVPP22_N_RSS_TABLES; ctx++) { + if (!priv->rss_tables[ctx]) + break; + } + + if (ctx == MVPP22_N_RSS_TABLES) + return -EINVAL; + + priv->rss_tables[ctx] = kzalloc(sizeof(*priv->rss_tables[ctx]), + GFP_KERNEL); + if (!priv->rss_tables[ctx]) + return -ENOMEM; + + *rss_ctx = ctx; + + /* Set the table width: replace the whole classifier Rx queue number + * with the ones configured in RSS table entries. + */ + mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(ctx)); + mvpp2_write(priv, MVPP22_RSS_WIDTH, 8); + + mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx)); + mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx)); + + return 0; +} + +int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *port_ctx) +{ + u32 rss_ctx; + int ret, i; + + ret = mvpp22_rss_context_create(port, &rss_ctx); + if (ret) + return ret; + + /* Find the first available context number in the port, starting from 1. + * Context 0 on each port is reserved for the default context. + */ + for (i = 1; i < MVPP22_N_RSS_TABLES; i++) { + if (port->rss_ctx[i] < 0) + break; + } + + if (i == MVPP22_N_RSS_TABLES) + return -EINVAL; + + port->rss_ctx[i] = rss_ctx; + *port_ctx = i; + + return 0; +} + +static struct mvpp2_rss_table *mvpp22_rss_table_get(struct mvpp2 *priv, + int rss_ctx) +{ + if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES) + return NULL; + + return priv->rss_tables[rss_ctx]; +} + +int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 port_ctx) +{ + struct mvpp2 *priv = port->priv; + struct ethtool_rxnfc *rxnfc; + int i, rss_ctx, ret; + + rss_ctx = mvpp22_rss_ctx(port, port_ctx); + + if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES) + return -EINVAL; + + /* Invalidate any active classification rule that use this context */ + for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) { + if (!port->rfs_rules[i]) + continue; + + rxnfc = &port->rfs_rules[i]->rxnfc; + if (!(rxnfc->fs.flow_type & FLOW_RSS) || + rxnfc->rss_context != port_ctx) + continue; + + ret = mvpp2_ethtool_cls_rule_del(port, rxnfc); + if (ret) { + netdev_warn(port->dev, + "couldn't remove classification rule %d associated to this context", + rxnfc->fs.location); + } } + + kfree(priv->rss_tables[rss_ctx]); + + priv->rss_tables[rss_ctx] = NULL; + port->rss_ctx[port_ctx] = -1; + + return 0; +} + +int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 port_ctx, + const u32 *indir) +{ + int rss_ctx = mvpp22_rss_ctx(port, port_ctx); + struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv, + rss_ctx); + + if (!rss_table) + return -EINVAL; + + memcpy(rss_table->indir, indir, + MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0])); + + mvpp22_rss_fill_table(port, rss_table, rss_ctx); + + return 0; +} + +int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 port_ctx, + u32 *indir) +{ + int rss_ctx = mvpp22_rss_ctx(port, port_ctx); + struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv, + rss_ctx); + + if (!rss_table) + return -EINVAL; + + memcpy(indir, rss_table->indir, + MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0])); + + return 0; } int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info) @@ -1424,32 +1694,32 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info) return 0; } -void mvpp22_port_rss_init(struct mvpp2_port *port) +int mvpp22_port_rss_init(struct mvpp2_port *port) { - struct mvpp2 *priv = port->priv; - int i; + struct mvpp2_rss_table *table; + u32 context = 0; + int i, ret; - /* Set the table width: replace the whole classifier Rx queue number - * with the ones configured in RSS table entries. - */ - mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id)); - mvpp2_write(priv, MVPP22_RSS_WIDTH, 8); + for (i = 0; i < MVPP22_N_RSS_TABLES; i++) + port->rss_ctx[i] = -1; - /* The default RxQ is used as a key to select the RSS table to use. - * We use one RSS table per port. - */ - mvpp2_write(priv, MVPP22_RSS_INDEX, - MVPP22_RSS_INDEX_QUEUE(port->first_rxq)); - mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, - MVPP22_RSS_TABLE_POINTER(port->id)); + ret = mvpp22_rss_context_create(port, &context); + if (ret) + return ret; + + table = mvpp22_rss_table_get(port->priv, context); + if (!table) + return -EINVAL; + + port->rss_ctx[0] = context; /* Configure the first table to evenly distribute the packets across * real Rx Queues. The table entries map a hash to a port Rx Queue. */ for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) - port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs); + table->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs); - mvpp22_rss_fill_table(port, port->id); + mvpp22_rss_fill_table(port, table, mvpp22_rss_ctx(port, 0)); /* Configure default flows */ mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_IP4, MVPP22_CLS_HEK_IP4_2T); @@ -1458,4 +1728,6 @@ void mvpp22_port_rss_init(struct mvpp2_port *port) mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_TCP6, MVPP22_CLS_HEK_IP6_5T); mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP4, MVPP22_CLS_HEK_IP4_5T); mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP6, MVPP22_CLS_HEK_IP6_5T); + + return 0; } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h index 56b617375a65..8867f25afab4 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h @@ -33,15 +33,16 @@ enum mvpp2_cls_engine { }; #define MVPP22_CLS_HEK_OPT_MAC_DA BIT(0) -#define MVPP22_CLS_HEK_OPT_VLAN BIT(1) -#define MVPP22_CLS_HEK_OPT_L3_PROTO BIT(2) -#define MVPP22_CLS_HEK_OPT_IP4SA BIT(3) -#define MVPP22_CLS_HEK_OPT_IP4DA BIT(4) -#define MVPP22_CLS_HEK_OPT_IP6SA BIT(5) -#define MVPP22_CLS_HEK_OPT_IP6DA BIT(6) -#define MVPP22_CLS_HEK_OPT_L4SIP BIT(7) -#define MVPP22_CLS_HEK_OPT_L4DIP BIT(8) -#define MVPP22_CLS_HEK_N_FIELDS 9 +#define MVPP22_CLS_HEK_OPT_VLAN_PRI BIT(1) +#define MVPP22_CLS_HEK_OPT_VLAN BIT(2) +#define MVPP22_CLS_HEK_OPT_L3_PROTO BIT(3) +#define MVPP22_CLS_HEK_OPT_IP4SA BIT(4) +#define MVPP22_CLS_HEK_OPT_IP4DA BIT(5) +#define MVPP22_CLS_HEK_OPT_IP6SA BIT(6) +#define MVPP22_CLS_HEK_OPT_IP6DA BIT(7) +#define MVPP22_CLS_HEK_OPT_L4SIP BIT(8) +#define MVPP22_CLS_HEK_OPT_L4DIP BIT(9) +#define MVPP22_CLS_HEK_N_FIELDS 10 #define MVPP22_CLS_HEK_L4_OPTS (MVPP22_CLS_HEK_OPT_L4SIP | \ MVPP22_CLS_HEK_OPT_L4DIP) @@ -59,8 +60,12 @@ enum mvpp2_cls_engine { #define MVPP22_CLS_HEK_IP6_5T (MVPP22_CLS_HEK_IP6_2T | \ MVPP22_CLS_HEK_L4_OPTS) +#define MVPP22_CLS_HEK_TAGGED (MVPP22_CLS_HEK_OPT_VLAN | \ + MVPP22_CLS_HEK_OPT_VLAN_PRI) + enum mvpp2_cls_field_id { MVPP22_CLS_FIELD_MAC_DA = 0x03, + MVPP22_CLS_FIELD_VLAN_PRI = 0x05, MVPP22_CLS_FIELD_VLAN = 0x06, MVPP22_CLS_FIELD_L3_PROTO = 0x0f, MVPP22_CLS_FIELD_IP4SA = 0x10, @@ -180,6 +185,11 @@ enum mvpp2_prs_flow { /* LU Type defined for all engines, and specified in the flow table */ #define MVPP2_CLS_LU_TYPE_MASK 0x3f +enum mvpp2_cls_lu_type { + /* rule->loc is used as a lu-type for the entries 0 - 62. */ + MVPP22_CLS_LU_TYPE_ALL = 63, +}; + #define MVPP2_N_FLOWS (MVPP2_FL_LAST - MVPP2_FL_START) struct mvpp2_cls_flow { @@ -249,11 +259,18 @@ struct mvpp2_cls_lookup_entry { u32 data; }; -void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table); -void mvpp22_port_rss_init(struct mvpp2_port *port); +int mvpp22_port_rss_init(struct mvpp2_port *port); + +int mvpp22_port_rss_enable(struct mvpp2_port *port); +int mvpp22_port_rss_disable(struct mvpp2_port *port); + +int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *rss_ctx); +int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 rss_ctx); -void mvpp22_port_rss_enable(struct mvpp2_port *port); -void mvpp22_port_rss_disable(struct mvpp2_port *port); +int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 rss_ctx, + const u32 *indir); +int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 rss_ctx, + u32 *indir); int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info); int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index d8e5241097a9..c51f1d5b550b 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -56,9 +56,9 @@ static struct { /* The prototype is added here to be used in start_dev when using ACPI. This * will be removed once phylink is used for all modes (dt+ACPI). */ -static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state); -static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy); /* Queue modes */ @@ -1258,6 +1258,17 @@ static u64 mvpp2_read_count(struct mvpp2_port *port, return val; } +/* Some counters are accessed indirectly by first writing an index to + * MVPP2_CTRS_IDX. The index can represent various resources depending on the + * register we access, it can be a hit counter for some classification tables, + * a counter specific to a rxq, a txq or a buffer pool. + */ +static u32 mvpp2_read_index(struct mvpp2 *priv, u32 index, u32 reg) +{ + mvpp2_write(priv, MVPP2_CTRS_IDX, index); + return mvpp2_read(priv, reg); +} + /* Due to the fact that software statistics and hardware statistics are, by * design, incremented at different moments in the chain of packet processing, * it is very likely that incoming packets could have been dropped after being @@ -1267,7 +1278,7 @@ static u64 mvpp2_read_count(struct mvpp2_port *port, * Hence, statistics gathered from userspace with ifconfig (software) and * ethtool (hardware) cannot be compared. */ -static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = { +static const struct mvpp2_ethtool_counter mvpp2_ethtool_mib_regs[] = { { MVPP2_MIB_GOOD_OCTETS_RCVD, "good_octets_received", true }, { MVPP2_MIB_BAD_OCTETS_RCVD, "bad_octets_received" }, { MVPP2_MIB_CRC_ERRORS_SENT, "crc_errors_sent" }, @@ -1297,31 +1308,114 @@ static const struct mvpp2_ethtool_counter mvpp2_ethtool_regs[] = { { MVPP2_MIB_LATE_COLLISION, "late_collision" }, }; +static const struct mvpp2_ethtool_counter mvpp2_ethtool_port_regs[] = { + { MVPP2_OVERRUN_ETH_DROP, "rx_fifo_or_parser_overrun_drops" }, + { MVPP2_CLS_ETH_DROP, "rx_classifier_drops" }, +}; + +static const struct mvpp2_ethtool_counter mvpp2_ethtool_txq_regs[] = { + { MVPP2_TX_DESC_ENQ_CTR, "txq_%d_desc_enqueue" }, + { MVPP2_TX_DESC_ENQ_TO_DDR_CTR, "txq_%d_desc_enqueue_to_ddr" }, + { MVPP2_TX_BUFF_ENQ_TO_DDR_CTR, "txq_%d_buff_euqueue_to_ddr" }, + { MVPP2_TX_DESC_ENQ_HW_FWD_CTR, "txq_%d_desc_hardware_forwarded" }, + { MVPP2_TX_PKTS_DEQ_CTR, "txq_%d_packets_dequeued" }, + { MVPP2_TX_PKTS_FULL_QUEUE_DROP_CTR, "txq_%d_queue_full_drops" }, + { MVPP2_TX_PKTS_EARLY_DROP_CTR, "txq_%d_packets_early_drops" }, + { MVPP2_TX_PKTS_BM_DROP_CTR, "txq_%d_packets_bm_drops" }, + { MVPP2_TX_PKTS_BM_MC_DROP_CTR, "txq_%d_packets_rep_bm_drops" }, +}; + +static const struct mvpp2_ethtool_counter mvpp2_ethtool_rxq_regs[] = { + { MVPP2_RX_DESC_ENQ_CTR, "rxq_%d_desc_enqueue" }, + { MVPP2_RX_PKTS_FULL_QUEUE_DROP_CTR, "rxq_%d_queue_full_drops" }, + { MVPP2_RX_PKTS_EARLY_DROP_CTR, "rxq_%d_packets_early_drops" }, + { MVPP2_RX_PKTS_BM_DROP_CTR, "rxq_%d_packets_bm_drops" }, +}; + +#define MVPP2_N_ETHTOOL_STATS(ntxqs, nrxqs) (ARRAY_SIZE(mvpp2_ethtool_mib_regs) + \ + ARRAY_SIZE(mvpp2_ethtool_port_regs) + \ + (ARRAY_SIZE(mvpp2_ethtool_txq_regs) * (ntxqs)) + \ + (ARRAY_SIZE(mvpp2_ethtool_rxq_regs) * (nrxqs))) + static void mvpp2_ethtool_get_strings(struct net_device *netdev, u32 sset, u8 *data) { - if (sset == ETH_SS_STATS) { - int i; + struct mvpp2_port *port = netdev_priv(netdev); + int i, q; - for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) - strscpy(data + i * ETH_GSTRING_LEN, - mvpp2_ethtool_regs[i].string, ETH_GSTRING_LEN); + if (sset != ETH_SS_STATS) + return; + + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_mib_regs); i++) { + strscpy(data, mvpp2_ethtool_mib_regs[i].string, + ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_port_regs); i++) { + strscpy(data, mvpp2_ethtool_port_regs[i].string, + ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + for (q = 0; q < port->ntxqs; q++) { + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++) { + snprintf(data, ETH_GSTRING_LEN, + mvpp2_ethtool_txq_regs[i].string, q); + data += ETH_GSTRING_LEN; + } + } + + for (q = 0; q < port->nrxqs; q++) { + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++) { + snprintf(data, ETH_GSTRING_LEN, + mvpp2_ethtool_rxq_regs[i].string, + q); + data += ETH_GSTRING_LEN; + } } } +static void mvpp2_read_stats(struct mvpp2_port *port) +{ + u64 *pstats; + int i, q; + + pstats = port->ethtool_stats; + + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_mib_regs); i++) + *pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_mib_regs[i]); + + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_port_regs); i++) + *pstats++ += mvpp2_read(port->priv, + mvpp2_ethtool_port_regs[i].offset + + 4 * port->id); + + for (q = 0; q < port->ntxqs; q++) + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++) + *pstats++ += mvpp2_read_index(port->priv, + MVPP22_CTRS_TX_CTR(port->id, i), + mvpp2_ethtool_txq_regs[i].offset); + + /* Rxqs are numbered from 0 from the user standpoint, but not from the + * driver's. We need to add the port->first_rxq offset. + */ + for (q = 0; q < port->nrxqs; q++) + for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++) + *pstats++ += mvpp2_read_index(port->priv, + port->first_rxq + i, + mvpp2_ethtool_rxq_regs[i].offset); +} + static void mvpp2_gather_hw_statistics(struct work_struct *work) { struct delayed_work *del_work = to_delayed_work(work); struct mvpp2_port *port = container_of(del_work, struct mvpp2_port, stats_work); - u64 *pstats; - int i; mutex_lock(&port->gather_stats_lock); - pstats = port->ethtool_stats; - for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) - *pstats++ += mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); + mvpp2_read_stats(port); /* No need to read again the counters right after this function if it * was called asynchronously by the user (ie. use of ethtool). @@ -1345,27 +1439,24 @@ static void mvpp2_ethtool_get_stats(struct net_device *dev, mutex_lock(&port->gather_stats_lock); memcpy(data, port->ethtool_stats, - sizeof(u64) * ARRAY_SIZE(mvpp2_ethtool_regs)); + sizeof(u64) * MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs)); mutex_unlock(&port->gather_stats_lock); } static int mvpp2_ethtool_get_sset_count(struct net_device *dev, int sset) { + struct mvpp2_port *port = netdev_priv(dev); + if (sset == ETH_SS_STATS) - return ARRAY_SIZE(mvpp2_ethtool_regs); + return MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs); return -EOPNOTSUPP; } static void mvpp2_mac_reset_assert(struct mvpp2_port *port) { - unsigned int i; u32 val; - /* Read the GOP statistics to reset the hardware counters */ - for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_regs); i++) - mvpp2_read_count(port, &mvpp2_ethtool_regs[i]); - val = readl(port->base + MVPP2_GMAC_CTRL_2_REG) | MVPP2_GMAC_PORT_RESET_MASK; writel(val, port->base + MVPP2_GMAC_CTRL_2_REG); @@ -3237,9 +3328,9 @@ static void mvpp2_start_dev(struct mvpp2_port *port) struct phylink_link_state state = { .interface = port->phy_interface, }; - mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state); - mvpp2_mac_link_up(port->dev, MLO_AN_INBAND, port->phy_interface, - NULL); + mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state); + mvpp2_mac_link_up(&port->phylink_config, MLO_AN_INBAND, + port->phy_interface, NULL); } netif_tx_start_all_queues(port->dev); @@ -3954,7 +4045,7 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev, ret = mvpp2_ethtool_cls_rule_get(port, info); break; case ETHTOOL_GRXCLSRLALL: - for (i = 0; i < MVPP2_N_RFS_RULES; i++) { + for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) { if (port->rfs_rules[i]) rules[loc++] = i; } @@ -4000,24 +4091,25 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) { struct mvpp2_port *port = netdev_priv(dev); + int ret = 0; if (!mvpp22_rss_is_supported()) return -EOPNOTSUPP; if (indir) - memcpy(indir, port->indir, - ARRAY_SIZE(port->indir) * sizeof(port->indir[0])); + ret = mvpp22_port_rss_ctx_indir_get(port, 0, indir); if (hfunc) *hfunc = ETH_RSS_HASH_CRC32; - return 0; + return ret; } static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mvpp2_port *port = netdev_priv(dev); + int ret = 0; if (!mvpp22_rss_is_supported()) return -EOPNOTSUPP; @@ -4028,15 +4120,58 @@ static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir, if (key) return -EOPNOTSUPP; - if (indir) { - memcpy(port->indir, indir, - ARRAY_SIZE(port->indir) * sizeof(port->indir[0])); - mvpp22_rss_fill_table(port, port->id); - } + if (indir) + ret = mvpp22_port_rss_ctx_indir_set(port, 0, indir); - return 0; + return ret; +} + +static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir, + u8 *key, u8 *hfunc, u32 rss_context) +{ + struct mvpp2_port *port = netdev_priv(dev); + int ret = 0; + + if (!mvpp22_rss_is_supported()) + return -EOPNOTSUPP; + + if (hfunc) + *hfunc = ETH_RSS_HASH_CRC32; + + if (indir) + ret = mvpp22_port_rss_ctx_indir_get(port, rss_context, indir); + + return ret; } +static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev, + const u32 *indir, const u8 *key, + const u8 hfunc, u32 *rss_context, + bool delete) +{ + struct mvpp2_port *port = netdev_priv(dev); + int ret; + + if (!mvpp22_rss_is_supported()) + return -EOPNOTSUPP; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32) + return -EOPNOTSUPP; + + if (key) + return -EOPNOTSUPP; + + if (delete) + return mvpp22_port_rss_ctx_delete(port, *rss_context); + + if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { + ret = mvpp22_port_rss_ctx_create(port, rss_context); + if (ret) + return ret; + } + + return mvpp22_port_rss_ctx_indir_set(port, *rss_context, indir); +} /* Device ops */ static const struct net_device_ops mvpp2_netdev_ops = { @@ -4073,7 +4208,8 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = { .get_rxfh_indir_size = mvpp2_ethtool_get_rxfh_indir_size, .get_rxfh = mvpp2_ethtool_get_rxfh, .set_rxfh = mvpp2_ethtool_set_rxfh, - + .get_rxfh_context = mvpp2_ethtool_get_rxfh_context, + .set_rxfh_context = mvpp2_ethtool_set_rxfh_context, }; /* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that @@ -4327,6 +4463,11 @@ static int mvpp2_port_init(struct mvpp2_port *port) if (err) goto err_free_percpu; + /* Clear all port stats */ + mvpp2_read_stats(port); + memset(port->ethtool_stats, 0, + MVPP2_N_ETHTOOL_STATS(port->ntxqs, port->nrxqs) * sizeof(u64)); + return 0; err_free_percpu: @@ -4416,11 +4557,12 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv, eth_hw_addr_random(dev); } -static void mvpp2_phylink_validate(struct net_device *dev, +static void mvpp2_phylink_validate(struct phylink_config *config, unsigned long *supported, struct phylink_link_state *state) { - struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port *port = container_of(config, struct mvpp2_port, + phylink_config); __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; /* Invalid combinations */ @@ -4544,10 +4686,11 @@ static void mvpp2_gmac_link_state(struct mvpp2_port *port, state->pause |= MLO_PAUSE_TX; } -static int mvpp2_phylink_mac_link_state(struct net_device *dev, +static int mvpp2_phylink_mac_link_state(struct phylink_config *config, struct phylink_link_state *state) { - struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port *port = container_of(config, struct mvpp2_port, + phylink_config); if (port->priv->hw_version == MVPP22 && port->gop_id == 0) { u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG); @@ -4563,9 +4706,10 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev, return 1; } -static void mvpp2_mac_an_restart(struct net_device *dev) +static void mvpp2_mac_an_restart(struct phylink_config *config) { - struct mvpp2_port *port = netdev_priv(dev); + struct mvpp2_port *port = container_of(config, struct mvpp2_port, + phylink_config); u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG); writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN, @@ -4750,9 +4894,10 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode, } } -static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) { + struct net_device *dev = to_net_dev(config->dev); struct mvpp2_port *port = netdev_priv(dev); bool change_interface = port->phy_interface != state->interface; @@ -4792,9 +4937,10 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode, mvpp2_port_enable(port); } -static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, +static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode, phy_interface_t interface, struct phy_device *phy) { + struct net_device *dev = to_net_dev(config->dev); struct mvpp2_port *port = netdev_priv(dev); u32 val; @@ -4819,9 +4965,10 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode, netif_tx_wake_all_queues(dev); } -static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode, - phy_interface_t interface) +static void mvpp2_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) { + struct net_device *dev = to_net_dev(config->dev); struct mvpp2_port *port = netdev_priv(dev); u32 val; @@ -5002,7 +5149,7 @@ static int mvpp2_port_probe(struct platform_device *pdev, } port->ethtool_stats = devm_kcalloc(&pdev->dev, - ARRAY_SIZE(mvpp2_ethtool_regs), + MVPP2_N_ETHTOOL_STATS(ntxqs, nrxqs), sizeof(u64), GFP_KERNEL); if (!port->ethtool_stats) { err = -ENOMEM; @@ -5078,8 +5225,11 @@ static int mvpp2_port_probe(struct platform_device *pdev, /* Phylink isn't used w/ ACPI as of now */ if (port_node) { - phylink = phylink_create(dev, port_fwnode, phy_mode, - &mvpp2_phylink_ops); + port->phylink_config.dev = &dev->dev; + port->phylink_config.type = PHYLINK_NETDEV; + + phylink = phylink_create(&port->phylink_config, port_fwnode, + phy_mode, &mvpp2_phylink_ops); if (IS_ERR(phylink)) { err = PTR_ERR(phylink); goto err_free_port_pcpu; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c index ae2240074d8e..5692c6087bbb 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c @@ -312,7 +312,8 @@ static void mvpp2_prs_sram_shift_set(struct mvpp2_prs_entry *pe, int shift, } /* Set value */ - pe->sram[MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_SHIFT_OFFS)] = shift & MVPP2_PRS_SRAM_SHIFT_MASK; + pe->sram[MVPP2_BIT_TO_WORD(MVPP2_PRS_SRAM_SHIFT_OFFS)] |= + shift & MVPP2_PRS_SRAM_SHIFT_MASK; /* Reset and set operation */ mvpp2_prs_sram_bits_clear(pe, MVPP2_PRS_SRAM_OP_SEL_SHIFT_OFFS, diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile index d41a2414c575..2d8362f9341b 100644 --- a/drivers/net/ethernet/mediatek/Makefile +++ b/drivers/net/ethernet/mediatek/Makefile @@ -3,4 +3,5 @@ # Makefile for the Mediatek SoCs built-in ethernet macs # -obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth_soc.o +obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o +mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o diff --git a/drivers/net/ethernet/mediatek/mtk_eth_path.c b/drivers/net/ethernet/mediatek/mtk_eth_path.c new file mode 100644 index 000000000000..7f05880cf9ef --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_eth_path.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018-2019 MediaTek Inc. + +/* A library for configuring path from GMAC/GDM to target PHY + * + * Author: Sean Wang <sean.wang@mediatek.com> + * + */ + +#include <linux/phy.h> +#include <linux/regmap.h> + +#include "mtk_eth_soc.h" + +struct mtk_eth_muxc { + const char *name; + int cap_bit; + int (*set_path)(struct mtk_eth *eth, int path); +}; + +static const char *mtk_eth_path_name(int path) +{ + switch (path) { + case MTK_ETH_PATH_GMAC1_RGMII: + return "gmac1_rgmii"; + case MTK_ETH_PATH_GMAC1_TRGMII: + return "gmac1_trgmii"; + case MTK_ETH_PATH_GMAC1_SGMII: + return "gmac1_sgmii"; + case MTK_ETH_PATH_GMAC2_RGMII: + return "gmac2_rgmii"; + case MTK_ETH_PATH_GMAC2_SGMII: + return "gmac2_sgmii"; + case MTK_ETH_PATH_GMAC2_GEPHY: + return "gmac2_gephy"; + case MTK_ETH_PATH_GDM1_ESW: + return "gdm1_esw"; + default: + return "unknown path"; + } +} + +static int set_mux_gdm1_to_gmac1_esw(struct mtk_eth *eth, int path) +{ + bool updated = true; + u32 val, mask, set; + + switch (path) { + case MTK_ETH_PATH_GMAC1_SGMII: + mask = ~(u32)MTK_MUX_TO_ESW; + set = 0; + break; + case MTK_ETH_PATH_GDM1_ESW: + mask = ~(u32)MTK_MUX_TO_ESW; + set = MTK_MUX_TO_ESW; + break; + default: + updated = false; + break; + }; + + if (updated) { + val = mtk_r32(eth, MTK_MAC_MISC); + val = (val & mask) | set; + mtk_w32(eth, val, MTK_MAC_MISC); + } + + dev_dbg(eth->dev, "path %s in %s updated = %d\n", + mtk_eth_path_name(path), __func__, updated); + + return 0; +} + +static int set_mux_gmac2_gmac0_to_gephy(struct mtk_eth *eth, int path) +{ + unsigned int val = 0; + bool updated = true; + + switch (path) { + case MTK_ETH_PATH_GMAC2_GEPHY: + val = ~(u32)GEPHY_MAC_SEL; + break; + default: + updated = false; + break; + } + + if (updated) + regmap_update_bits(eth->infra, INFRA_MISC2, GEPHY_MAC_SEL, val); + + dev_dbg(eth->dev, "path %s in %s updated = %d\n", + mtk_eth_path_name(path), __func__, updated); + + return 0; +} + +static int set_mux_u3_gmac2_to_qphy(struct mtk_eth *eth, int path) +{ + unsigned int val = 0; + bool updated = true; + + switch (path) { + case MTK_ETH_PATH_GMAC2_SGMII: + val = CO_QPHY_SEL; + break; + default: + updated = false; + break; + } + + if (updated) + regmap_update_bits(eth->infra, INFRA_MISC2, CO_QPHY_SEL, val); + + dev_dbg(eth->dev, "path %s in %s updated = %d\n", + mtk_eth_path_name(path), __func__, updated); + + return 0; +} + +static int set_mux_gmac1_gmac2_to_sgmii_rgmii(struct mtk_eth *eth, int path) +{ + unsigned int val = 0; + bool updated = true; + + switch (path) { + case MTK_ETH_PATH_GMAC1_SGMII: + val = SYSCFG0_SGMII_GMAC1; + break; + case MTK_ETH_PATH_GMAC2_SGMII: + val = SYSCFG0_SGMII_GMAC2; + break; + case MTK_ETH_PATH_GMAC1_RGMII: + case MTK_ETH_PATH_GMAC2_RGMII: + regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); + val &= SYSCFG0_SGMII_MASK; + + if ((path == MTK_GMAC1_RGMII && val == SYSCFG0_SGMII_GMAC1) || + (path == MTK_GMAC2_RGMII && val == SYSCFG0_SGMII_GMAC2)) + val = 0; + else + updated = false; + break; + default: + updated = false; + break; + }; + + if (updated) + regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0, + SYSCFG0_SGMII_MASK, val); + + dev_dbg(eth->dev, "path %s in %s updated = %d\n", + mtk_eth_path_name(path), __func__, updated); + + return 0; +} + +static int set_mux_gmac12_to_gephy_sgmii(struct mtk_eth *eth, int path) +{ + unsigned int val = 0; + bool updated = true; + + regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); + + switch (path) { + case MTK_ETH_PATH_GMAC1_SGMII: + val |= SYSCFG0_SGMII_GMAC1_V2; + break; + case MTK_ETH_PATH_GMAC2_GEPHY: + val &= ~(u32)SYSCFG0_SGMII_GMAC2_V2; + break; + case MTK_ETH_PATH_GMAC2_SGMII: + val |= SYSCFG0_SGMII_GMAC2_V2; + break; + default: + updated = false; + }; + + if (updated) + regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0, + SYSCFG0_SGMII_MASK, val); + + dev_dbg(eth->dev, "path %s in %s updated = %d\n", + mtk_eth_path_name(path), __func__, updated); + + return 0; +} + +static const struct mtk_eth_muxc mtk_eth_muxc[] = { + { + .name = "mux_gdm1_to_gmac1_esw", + .cap_bit = MTK_ETH_MUX_GDM1_TO_GMAC1_ESW, + .set_path = set_mux_gdm1_to_gmac1_esw, + }, { + .name = "mux_gmac2_gmac0_to_gephy", + .cap_bit = MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY, + .set_path = set_mux_gmac2_gmac0_to_gephy, + }, { + .name = "mux_u3_gmac2_to_qphy", + .cap_bit = MTK_ETH_MUX_U3_GMAC2_TO_QPHY, + .set_path = set_mux_u3_gmac2_to_qphy, + }, { + .name = "mux_gmac1_gmac2_to_sgmii_rgmii", + .cap_bit = MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII, + .set_path = set_mux_gmac1_gmac2_to_sgmii_rgmii, + }, { + .name = "mux_gmac12_to_gephy_sgmii", + .cap_bit = MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII, + .set_path = set_mux_gmac12_to_gephy_sgmii, + }, +}; + +static int mtk_eth_mux_setup(struct mtk_eth *eth, int path) +{ + int i, err = 0; + + if (!MTK_HAS_CAPS(eth->soc->caps, path)) { + dev_err(eth->dev, "path %s isn't support on the SoC\n", + mtk_eth_path_name(path)); + return -EINVAL; + } + + if (!MTK_HAS_CAPS(eth->soc->caps, MTK_MUX)) + return 0; + + /* Setup MUX in path fabric */ + for (i = 0; i < ARRAY_SIZE(mtk_eth_muxc); i++) { + if (MTK_HAS_CAPS(eth->soc->caps, mtk_eth_muxc[i].cap_bit)) { + err = mtk_eth_muxc[i].set_path(eth, path); + if (err) + goto out; + } else { + dev_dbg(eth->dev, "mux %s isn't present on the SoC\n", + mtk_eth_muxc[i].name); + } + } + +out: + return err; +} + +static int mtk_gmac_sgmii_path_setup(struct mtk_eth *eth, int mac_id) +{ + unsigned int val = 0; + int sid, err, path; + + path = (mac_id == 0) ? MTK_ETH_PATH_GMAC1_SGMII : + MTK_ETH_PATH_GMAC2_SGMII; + + /* Setup proper MUXes along the path */ + err = mtk_eth_mux_setup(eth, path); + if (err) + return err; + + /* The path GMAC to SGMII will be enabled once the SGMIISYS is being + * setup done. + */ + regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); + + regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0, + SYSCFG0_SGMII_MASK, ~(u32)SYSCFG0_SGMII_MASK); + + /* Decide how GMAC and SGMIISYS be mapped */ + sid = (MTK_HAS_CAPS(eth->soc->caps, MTK_SHARED_SGMII)) ? 0 : mac_id; + + /* Setup SGMIISYS with the determined property */ + if (MTK_HAS_FLAGS(eth->sgmii->flags[sid], MTK_SGMII_PHYSPEED_AN)) + err = mtk_sgmii_setup_mode_an(eth->sgmii, sid); + else + err = mtk_sgmii_setup_mode_force(eth->sgmii, sid); + + if (err) + return err; + + regmap_update_bits(eth->ethsys, ETHSYS_SYSCFG0, + SYSCFG0_SGMII_MASK, val); + + return 0; +} + +static int mtk_gmac_gephy_path_setup(struct mtk_eth *eth, int mac_id) +{ + int err, path = 0; + + if (mac_id == 1) + path = MTK_ETH_PATH_GMAC2_GEPHY; + + if (!path) + return -EINVAL; + + /* Setup proper MUXes along the path */ + err = mtk_eth_mux_setup(eth, path); + if (err) + return err; + + return 0; +} + +static int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id) +{ + int err, path; + + path = (mac_id == 0) ? MTK_ETH_PATH_GMAC1_RGMII : + MTK_ETH_PATH_GMAC2_RGMII; + + /* Setup proper MUXes along the path */ + err = mtk_eth_mux_setup(eth, path); + if (err) + return err; + + return 0; +} + +int mtk_setup_hw_path(struct mtk_eth *eth, int mac_id, int phymode) +{ + int err; + + switch (phymode) { + case PHY_INTERFACE_MODE_TRGMII: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_REVMII: + case PHY_INTERFACE_MODE_RMII: + if (MTK_HAS_CAPS(eth->soc->caps, MTK_RGMII)) { + err = mtk_gmac_rgmii_path_setup(eth, mac_id); + if (err) + return err; + } + break; + case PHY_INTERFACE_MODE_SGMII: + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) { + err = mtk_gmac_sgmii_path_setup(eth, mac_id); + if (err) + return err; + } + break; + case PHY_INTERFACE_MODE_GMII: + if (MTK_HAS_CAPS(eth->soc->caps, MTK_GEPHY)) { + err = mtk_gmac_gephy_path_setup(eth, mac_id); + if (err) + return err; + } + break; + default: + break; + } + + return 0; +} diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 6cfffb64cd51..b20b3a5a1ebb 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -48,8 +48,10 @@ static const struct mtk_ethtool_stats { }; static const char * const mtk_clks_source_name[] = { - "ethif", "esw", "gp0", "gp1", "gp2", "trgpll", "sgmii_tx250m", - "sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb", "sgmii_ck", "eth2pll" + "ethif", "sgmiitop", "esw", "gp0", "gp1", "gp2", "fe", "trgpll", + "sgmii_tx250m", "sgmii_rx250m", "sgmii_cdr_ref", "sgmii_cdr_fb", + "sgmii2_tx250m", "sgmii2_rx250m", "sgmii2_cdr_ref", "sgmii2_cdr_fb", + "sgmii_ck", "eth2pll", }; void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg) @@ -132,6 +134,31 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) return _mtk_mdio_read(eth, phy_addr, phy_reg); } +static int mt7621_gmac0_rgmii_adjust(struct mtk_eth *eth, + phy_interface_t interface) +{ + u32 val; + + /* Check DDR memory type. + * Currently TRGMII mode with DDR2 memory is not supported. + */ + regmap_read(eth->ethsys, ETHSYS_SYSCFG, &val); + if (interface == PHY_INTERFACE_MODE_TRGMII && + val & SYSCFG_DRAM_TYPE_DDR2) { + dev_err(eth->dev, + "TRGMII mode with DDR2 memory is not supported!\n"); + return -EOPNOTSUPP; + } + + val = (interface == PHY_INTERFACE_MODE_TRGMII) ? + ETHSYS_TRGMII_MT7621_DDR_PLL : 0; + + regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0, + ETHSYS_TRGMII_MT7621_MASK, val); + + return 0; +} + static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed) { u32 val; @@ -159,47 +186,6 @@ static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed) mtk_w32(eth, val, TRGMII_TCK_CTRL); } -static void mtk_gmac_sgmii_hw_setup(struct mtk_eth *eth, int mac_id) -{ - u32 val; - - /* Setup the link timer and QPHY power up inside SGMIISYS */ - regmap_write(eth->sgmiisys, SGMSYS_PCS_LINK_TIMER, - SGMII_LINK_TIMER_DEFAULT); - - regmap_read(eth->sgmiisys, SGMSYS_SGMII_MODE, &val); - val |= SGMII_REMOTE_FAULT_DIS; - regmap_write(eth->sgmiisys, SGMSYS_SGMII_MODE, val); - - regmap_read(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, &val); - val |= SGMII_AN_RESTART; - regmap_write(eth->sgmiisys, SGMSYS_PCS_CONTROL_1, val); - - regmap_read(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, &val); - val &= ~SGMII_PHYA_PWD; - regmap_write(eth->sgmiisys, SGMSYS_QPHY_PWR_STATE_CTRL, val); - - /* Determine MUX for which GMAC uses the SGMII interface */ - if (MTK_HAS_CAPS(eth->soc->caps, MTK_DUAL_GMAC_SHARED_SGMII)) { - regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); - val &= ~SYSCFG0_SGMII_MASK; - val |= !mac_id ? SYSCFG0_SGMII_GMAC1 : SYSCFG0_SGMII_GMAC2; - regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); - - dev_info(eth->dev, "setup shared sgmii for gmac=%d\n", - mac_id); - } - - /* Setup the GMAC1 going through SGMII path when SoC also support - * ESW on GMAC1 - */ - if (MTK_HAS_CAPS(eth->soc->caps, MTK_GMAC1_ESW | MTK_GMAC1_SGMII) && - !mac_id) { - mtk_w32(eth, 0, MTK_MAC_MISC); - dev_info(eth->dev, "setup gmac1 going through sgmii"); - } -} - static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -222,9 +208,17 @@ static void mtk_phy_link_adjust(struct net_device *dev) break; } - if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) && - !mac->id && !mac->trgmii) - mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed); + if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_GMAC1_TRGMII) && !mac->id) { + if (MTK_HAS_CAPS(mac->hw->soc->caps, MTK_TRGMII_MT7621_CLK)) { + if (mt7621_gmac0_rgmii_adjust(mac->hw, + dev->phydev->interface)) + return; + } else { + if (!mac->trgmii) + mtk_gmac0_rgmii_adjust(mac->hw, + dev->phydev->speed); + } + } if (dev->phydev->link) mcr |= MAC_MCR_FORCE_LINK; @@ -289,6 +283,7 @@ static int mtk_phy_connect(struct net_device *dev) struct mtk_eth *eth; struct device_node *np; u32 val; + int err; eth = mac->hw; np = of_parse_phandle(mac->of_node, "phy-handle", 0); @@ -298,6 +293,10 @@ static int mtk_phy_connect(struct net_device *dev) if (!np) return -ENODEV; + err = mtk_setup_hw_path(eth, mac->id, of_get_phy_mode(np)); + if (err) + goto err_phy; + mac->ge_mode = 0; switch (of_get_phy_mode(np)) { case PHY_INTERFACE_MODE_TRGMII: @@ -306,12 +305,10 @@ static int mtk_phy_connect(struct net_device *dev) case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: - break; case PHY_INTERFACE_MODE_SGMII: - if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) - mtk_gmac_sgmii_hw_setup(eth, mac->id); break; case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: mac->ge_mode = 1; break; case PHY_INTERFACE_MODE_REVMII: @@ -2477,16 +2474,28 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->ethsys); } - if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) { - eth->sgmiisys = - syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "mediatek,sgmiisys"); - if (IS_ERR(eth->sgmiisys)) { - dev_err(&pdev->dev, "no sgmiisys regmap found\n"); - return PTR_ERR(eth->sgmiisys); + if (MTK_HAS_CAPS(eth->soc->caps, MTK_INFRA)) { + eth->infra = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, + "mediatek,infracfg"); + if (IS_ERR(eth->infra)) { + dev_err(&pdev->dev, "no infracfg regmap found\n"); + return PTR_ERR(eth->infra); } } + if (MTK_HAS_CAPS(eth->soc->caps, MTK_SGMII)) { + eth->sgmii = devm_kzalloc(eth->dev, sizeof(*eth->sgmii), + GFP_KERNEL); + if (!eth->sgmii) + return -ENOMEM; + + err = mtk_sgmii_init(eth->sgmii, pdev->dev.of_node, + eth->soc->ana_rgc3); + + if (err) + return err; + } + if (eth->soc->required_pctl) { eth->pctl = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "mediatek,pctl"); @@ -2625,34 +2634,43 @@ static int mtk_remove(struct platform_device *pdev) } static const struct mtk_soc_data mt2701_data = { - .caps = MTK_GMAC1_TRGMII | MTK_HWLRO, + .caps = MT7623_CAPS | MTK_HWLRO, .required_clks = MT7623_CLKS_BITMAP, .required_pctl = true, }; static const struct mtk_soc_data mt7621_data = { - .caps = MTK_SHARED_INT, + .caps = MT7621_CAPS, .required_clks = MT7621_CLKS_BITMAP, .required_pctl = false, }; static const struct mtk_soc_data mt7622_data = { - .caps = MTK_DUAL_GMAC_SHARED_SGMII | MTK_GMAC1_ESW | MTK_HWLRO, + .ana_rgc3 = 0x2028, + .caps = MT7622_CAPS | MTK_HWLRO, .required_clks = MT7622_CLKS_BITMAP, .required_pctl = false, }; static const struct mtk_soc_data mt7623_data = { - .caps = MTK_GMAC1_TRGMII | MTK_HWLRO, + .caps = MT7623_CAPS | MTK_HWLRO, .required_clks = MT7623_CLKS_BITMAP, .required_pctl = true, }; +static const struct mtk_soc_data mt7629_data = { + .ana_rgc3 = 0x128, + .caps = MT7629_CAPS | MTK_HWLRO, + .required_clks = MT7629_CLKS_BITMAP, + .required_pctl = false, +}; + const struct of_device_id of_mtk_match[] = { { .compatible = "mediatek,mt2701-eth", .data = &mt2701_data}, { .compatible = "mediatek,mt7621-eth", .data = &mt7621_data}, { .compatible = "mediatek,mt7622-eth", .data = &mt7622_data}, { .compatible = "mediatek,mt7623-eth", .data = &mt7623_data}, + { .compatible = "mediatek,mt7629-eth", .data = &mt7629_data}, {}, }; MODULE_DEVICE_TABLE(of, of_mtk_match); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index baa85d5601e7..c6be599ed94d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -9,6 +9,10 @@ #ifndef MTK_ETH_H #define MTK_ETH_H +#include <linux/dma-mapping.h> +#include <linux/netdevice.h> +#include <linux/of_net.h> +#include <linux/u64_stats_sync.h> #include <linux/refcount.h> #define MTK_QDMA_PAGE_SIZE 2048 @@ -359,17 +363,27 @@ #define MT7622_ETH 7622 #define MT7621_ETH 7621 +/* ethernet system control register */ +#define ETHSYS_SYSCFG 0x10 +#define SYSCFG_DRAM_TYPE_DDR2 BIT(4) + /* ethernet subsystem config register */ #define ETHSYS_SYSCFG0 0x14 #define SYSCFG0_GE_MASK 0x3 #define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2))) -#define SYSCFG0_SGMII_MASK (3 << 8) -#define SYSCFG0_SGMII_GMAC1 ((2 << 8) & GENMASK(9, 8)) -#define SYSCFG0_SGMII_GMAC2 ((3 << 8) & GENMASK(9, 8)) +#define SYSCFG0_SGMII_MASK GENMASK(9, 8) +#define SYSCFG0_SGMII_GMAC1 ((2 << 8) & SYSCFG0_SGMII_MASK) +#define SYSCFG0_SGMII_GMAC2 ((3 << 8) & SYSCFG0_SGMII_MASK) +#define SYSCFG0_SGMII_GMAC1_V2 BIT(9) +#define SYSCFG0_SGMII_GMAC2_V2 BIT(8) + /* ethernet subsystem clock register */ #define ETHSYS_CLKCFG0 0x2c #define ETHSYS_TRGMII_CLK_SEL362_5 BIT(11) +#define ETHSYS_TRGMII_MT7621_MASK (BIT(5) | BIT(6)) +#define ETHSYS_TRGMII_MT7621_APLL BIT(6) +#define ETHSYS_TRGMII_MT7621_DDR_PLL BIT(5) /* ethernet reset control register */ #define ETHSYS_RSTCTRL 0x34 @@ -393,6 +407,11 @@ #define SGMSYS_QPHY_PWR_STATE_CTRL 0xe8 #define SGMII_PHYA_PWD BIT(4) +/* Infrasys subsystem config registers */ +#define INFRA_MISC2 0x70c +#define CO_QPHY_SEL BIT(0) +#define GEPHY_MAC_SEL BIT(1) + struct mtk_rx_dma { unsigned int rxd1; unsigned int rxd2; @@ -457,15 +476,21 @@ enum mtk_tx_flags { */ enum mtk_clks_map { MTK_CLK_ETHIF, + MTK_CLK_SGMIITOP, MTK_CLK_ESW, MTK_CLK_GP0, MTK_CLK_GP1, MTK_CLK_GP2, + MTK_CLK_FE, MTK_CLK_TRGPLL, MTK_CLK_SGMII_TX_250M, MTK_CLK_SGMII_RX_250M, MTK_CLK_SGMII_CDR_REF, MTK_CLK_SGMII_CDR_FB, + MTK_CLK_SGMII2_TX_250M, + MTK_CLK_SGMII2_RX_250M, + MTK_CLK_SGMII2_CDR_REF, + MTK_CLK_SGMII2_CDR_FB, MTK_CLK_SGMII_CK, MTK_CLK_ETH2PLL, MTK_CLK_MAX @@ -484,6 +509,19 @@ enum mtk_clks_map { BIT(MTK_CLK_SGMII_CK) | \ BIT(MTK_CLK_ETH2PLL)) #define MT7621_CLKS_BITMAP (0) +#define MT7629_CLKS_BITMAP (BIT(MTK_CLK_ETHIF) | BIT(MTK_CLK_ESW) | \ + BIT(MTK_CLK_GP0) | BIT(MTK_CLK_GP1) | \ + BIT(MTK_CLK_GP2) | BIT(MTK_CLK_FE) | \ + BIT(MTK_CLK_SGMII_TX_250M) | \ + BIT(MTK_CLK_SGMII_RX_250M) | \ + BIT(MTK_CLK_SGMII_CDR_REF) | \ + BIT(MTK_CLK_SGMII_CDR_FB) | \ + BIT(MTK_CLK_SGMII2_TX_250M) | \ + BIT(MTK_CLK_SGMII2_RX_250M) | \ + BIT(MTK_CLK_SGMII2_CDR_REF) | \ + BIT(MTK_CLK_SGMII2_CDR_FB) | \ + BIT(MTK_CLK_SGMII_CK) | \ + BIT(MTK_CLK_ETH2PLL) | BIT(MTK_CLK_SGMIITOP)) enum mtk_dev_state { MTK_HW_INIT, @@ -554,21 +592,120 @@ struct mtk_rx_ring { u32 crx_idx_reg; }; -#define MTK_TRGMII BIT(0) -#define MTK_GMAC1_TRGMII (BIT(1) | MTK_TRGMII) -#define MTK_ESW BIT(4) -#define MTK_GMAC1_ESW (BIT(5) | MTK_ESW) -#define MTK_SGMII BIT(8) -#define MTK_GMAC1_SGMII (BIT(9) | MTK_SGMII) -#define MTK_GMAC2_SGMII (BIT(10) | MTK_SGMII) -#define MTK_DUAL_GMAC_SHARED_SGMII (BIT(11) | MTK_GMAC1_SGMII | \ - MTK_GMAC2_SGMII) -#define MTK_HWLRO BIT(12) -#define MTK_SHARED_INT BIT(13) +enum mkt_eth_capabilities { + MTK_RGMII_BIT = 0, + MTK_TRGMII_BIT, + MTK_SGMII_BIT, + MTK_ESW_BIT, + MTK_GEPHY_BIT, + MTK_MUX_BIT, + MTK_INFRA_BIT, + MTK_SHARED_SGMII_BIT, + MTK_HWLRO_BIT, + MTK_SHARED_INT_BIT, + MTK_TRGMII_MT7621_CLK_BIT, + + /* MUX BITS*/ + MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT, + MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT, + MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT, + MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT, + MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT, + + /* PATH BITS */ + MTK_ETH_PATH_GMAC1_RGMII_BIT, + MTK_ETH_PATH_GMAC1_TRGMII_BIT, + MTK_ETH_PATH_GMAC1_SGMII_BIT, + MTK_ETH_PATH_GMAC2_RGMII_BIT, + MTK_ETH_PATH_GMAC2_SGMII_BIT, + MTK_ETH_PATH_GMAC2_GEPHY_BIT, + MTK_ETH_PATH_GDM1_ESW_BIT, +}; + +/* Supported hardware group on SoCs */ +#define MTK_RGMII BIT(MTK_RGMII_BIT) +#define MTK_TRGMII BIT(MTK_TRGMII_BIT) +#define MTK_SGMII BIT(MTK_SGMII_BIT) +#define MTK_ESW BIT(MTK_ESW_BIT) +#define MTK_GEPHY BIT(MTK_GEPHY_BIT) +#define MTK_MUX BIT(MTK_MUX_BIT) +#define MTK_INFRA BIT(MTK_INFRA_BIT) +#define MTK_SHARED_SGMII BIT(MTK_SHARED_SGMII_BIT) +#define MTK_HWLRO BIT(MTK_HWLRO_BIT) +#define MTK_SHARED_INT BIT(MTK_SHARED_INT_BIT) +#define MTK_TRGMII_MT7621_CLK BIT(MTK_TRGMII_MT7621_CLK_BIT) + +#define MTK_ETH_MUX_GDM1_TO_GMAC1_ESW \ + BIT(MTK_ETH_MUX_GDM1_TO_GMAC1_ESW_BIT) +#define MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY \ + BIT(MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY_BIT) +#define MTK_ETH_MUX_U3_GMAC2_TO_QPHY \ + BIT(MTK_ETH_MUX_U3_GMAC2_TO_QPHY_BIT) +#define MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII \ + BIT(MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII_BIT) +#define MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII \ + BIT(MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII_BIT) + +/* Supported path present on SoCs */ +#define MTK_ETH_PATH_GMAC1_RGMII BIT(MTK_ETH_PATH_GMAC1_RGMII_BIT) +#define MTK_ETH_PATH_GMAC1_TRGMII BIT(MTK_ETH_PATH_GMAC1_TRGMII_BIT) +#define MTK_ETH_PATH_GMAC1_SGMII BIT(MTK_ETH_PATH_GMAC1_SGMII_BIT) +#define MTK_ETH_PATH_GMAC2_RGMII BIT(MTK_ETH_PATH_GMAC2_RGMII_BIT) +#define MTK_ETH_PATH_GMAC2_SGMII BIT(MTK_ETH_PATH_GMAC2_SGMII_BIT) +#define MTK_ETH_PATH_GMAC2_GEPHY BIT(MTK_ETH_PATH_GMAC2_GEPHY_BIT) +#define MTK_ETH_PATH_GDM1_ESW BIT(MTK_ETH_PATH_GDM1_ESW_BIT) + +#define MTK_GMAC1_RGMII (MTK_ETH_PATH_GMAC1_RGMII | MTK_RGMII) +#define MTK_GMAC1_TRGMII (MTK_ETH_PATH_GMAC1_TRGMII | MTK_TRGMII) +#define MTK_GMAC1_SGMII (MTK_ETH_PATH_GMAC1_SGMII | MTK_SGMII) +#define MTK_GMAC2_RGMII (MTK_ETH_PATH_GMAC2_RGMII | MTK_RGMII) +#define MTK_GMAC2_SGMII (MTK_ETH_PATH_GMAC2_SGMII | MTK_SGMII) +#define MTK_GMAC2_GEPHY (MTK_ETH_PATH_GMAC2_GEPHY | MTK_GEPHY) +#define MTK_GDM1_ESW (MTK_ETH_PATH_GDM1_ESW | MTK_ESW) + +/* MUXes present on SoCs */ +/* 0: GDM1 -> GMAC1, 1: GDM1 -> ESW */ +#define MTK_MUX_GDM1_TO_GMAC1_ESW (MTK_ETH_MUX_GDM1_TO_GMAC1_ESW | MTK_MUX) + +/* 0: GMAC2 -> GEPHY, 1: GMAC0 -> GePHY */ +#define MTK_MUX_GMAC2_GMAC0_TO_GEPHY \ + (MTK_ETH_MUX_GMAC2_GMAC0_TO_GEPHY | MTK_MUX | MTK_INFRA) + +/* 0: U3 -> QPHY, 1: GMAC2 -> QPHY */ +#define MTK_MUX_U3_GMAC2_TO_QPHY \ + (MTK_ETH_MUX_U3_GMAC2_TO_QPHY | MTK_MUX | MTK_INFRA) + +/* 2: GMAC1 -> SGMII, 3: GMAC2 -> SGMII */ +#define MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII \ + (MTK_ETH_MUX_GMAC1_GMAC2_TO_SGMII_RGMII | MTK_MUX | \ + MTK_SHARED_SGMII) + +/* 0: GMACx -> GEPHY, 1: GMACx -> SGMII where x is 1 or 2 */ +#define MTK_MUX_GMAC12_TO_GEPHY_SGMII \ + (MTK_ETH_MUX_GMAC12_TO_GEPHY_SGMII | MTK_MUX) + #define MTK_HAS_CAPS(caps, _x) (((caps) & (_x)) == (_x)) +#define MT7621_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | \ + MTK_GMAC2_RGMII | MTK_SHARED_INT | MTK_TRGMII_MT7621_CLK) + +#define MT7622_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_SGMII | MTK_GMAC2_RGMII | \ + MTK_GMAC2_SGMII | MTK_GDM1_ESW | \ + MTK_MUX_GDM1_TO_GMAC1_ESW | \ + MTK_MUX_GMAC1_GMAC2_TO_SGMII_RGMII) + +#define MT7623_CAPS (MTK_GMAC1_RGMII | MTK_GMAC1_TRGMII | MTK_GMAC2_RGMII) + +#define MT7629_CAPS (MTK_GMAC1_SGMII | MTK_GMAC2_SGMII | MTK_GMAC2_GEPHY | \ + MTK_GDM1_ESW | MTK_MUX_GDM1_TO_GMAC1_ESW | \ + MTK_MUX_GMAC2_GMAC0_TO_GEPHY | \ + MTK_MUX_U3_GMAC2_TO_QPHY | \ + MTK_MUX_GMAC12_TO_GEPHY_SGMII) + /* struct mtk_eth_data - This is the structure holding all differences * among various plaforms + * @ana_rgc3: The offset for register ANA_RGC3 related to + * sgmiisys syscon * @caps Flags shown the extra capability for the SoC * @required_clks Flags shown the bitmap for required clocks on * the target SoC @@ -576,6 +713,7 @@ struct mtk_rx_ring { * the extra setup for those pins used by GMAC. */ struct mtk_soc_data { + u32 ana_rgc3; u32 caps; u32 required_clks; bool required_pctl; @@ -584,6 +722,26 @@ struct mtk_soc_data { /* currently no SoC has more than 2 macs */ #define MTK_MAX_DEVS 2 +#define MTK_SGMII_PHYSPEED_AN BIT(31) +#define MTK_SGMII_PHYSPEED_MASK GENMASK(0, 2) +#define MTK_SGMII_PHYSPEED_1000 BIT(0) +#define MTK_SGMII_PHYSPEED_2500 BIT(1) +#define MTK_HAS_FLAGS(flags, _x) (((flags) & (_x)) == (_x)) + +/* struct mtk_sgmii - This is the structure holding sgmii regmap and its + * characteristics + * @regmap: The register map pointing at the range used to setup + * SGMII modes + * @flags: The enum refers to which mode the sgmii wants to run on + * @ana_rgc3: The offset refers to register ANA_RGC3 related to regmap + */ + +struct mtk_sgmii { + struct regmap *regmap[MTK_MAX_DEVS]; + u32 flags[MTK_MAX_DEVS]; + u32 ana_rgc3; +}; + /* struct mtk_eth - This is the main datasructure for holding the state * of the driver * @dev: The device pointer @@ -599,8 +757,8 @@ struct mtk_soc_data { * @msg_enable: Ethtool msg level * @ethsys: The register map pointing at the range used to setup * MII modes - * @sgmiisys: The register map pointing at the range used to setup - * SGMII modes + * @infra: The register map pointing at the range used to setup + * SGMII and GePHY path * @pctl: The register map pointing at the range used to setup * GMAC port drive/slew values * @dma_refcnt: track how many netdevs are using the DMA engine @@ -632,7 +790,8 @@ struct mtk_eth { u32 msg_enable; unsigned long sysclk; struct regmap *ethsys; - struct regmap *sgmiisys; + struct regmap *infra; + struct mtk_sgmii *sgmii; struct regmap *pctl; bool hwlro; refcount_t dma_refcnt; @@ -683,4 +842,10 @@ void mtk_stats_update_mac(struct mtk_mac *mac); void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg); u32 mtk_r32(struct mtk_eth *eth, unsigned reg); +int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *np, + u32 ana_rgc3); +int mtk_sgmii_setup_mode_an(struct mtk_sgmii *ss, int id); +int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id); +int mtk_setup_hw_path(struct mtk_eth *eth, int mac_id, int phymode); + #endif /* MTK_ETH_H */ diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c new file mode 100644 index 000000000000..136f90ce5a65 --- /dev/null +++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018-2019 MediaTek Inc. + +/* A library for MediaTek SGMII circuit + * + * Author: Sean Wang <sean.wang@mediatek.com> + * + */ + +#include <linux/mfd/syscon.h> +#include <linux/of.h> +#include <linux/regmap.h> + +#include "mtk_eth_soc.h" + +int mtk_sgmii_init(struct mtk_sgmii *ss, struct device_node *r, u32 ana_rgc3) +{ + struct device_node *np; + const char *str; + int i, err; + + ss->ana_rgc3 = ana_rgc3; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + np = of_parse_phandle(r, "mediatek,sgmiisys", i); + if (!np) + break; + + ss->regmap[i] = syscon_node_to_regmap(np); + if (IS_ERR(ss->regmap[i])) + return PTR_ERR(ss->regmap[i]); + + err = of_property_read_string(np, "mediatek,physpeed", &str); + if (err) + return err; + + if (!strcmp(str, "2500")) + ss->flags[i] |= MTK_SGMII_PHYSPEED_2500; + else if (!strcmp(str, "1000")) + ss->flags[i] |= MTK_SGMII_PHYSPEED_1000; + else if (!strcmp(str, "auto")) + ss->flags[i] |= MTK_SGMII_PHYSPEED_AN; + else + return -EINVAL; + } + + return 0; +} + +int mtk_sgmii_setup_mode_an(struct mtk_sgmii *ss, int id) +{ + unsigned int val; + + if (!ss->regmap[id]) + return -EINVAL; + + /* Setup the link timer and QPHY power up inside SGMIISYS */ + regmap_write(ss->regmap[id], SGMSYS_PCS_LINK_TIMER, + SGMII_LINK_TIMER_DEFAULT); + + regmap_read(ss->regmap[id], SGMSYS_SGMII_MODE, &val); + val |= SGMII_REMOTE_FAULT_DIS; + regmap_write(ss->regmap[id], SGMSYS_SGMII_MODE, val); + + regmap_read(ss->regmap[id], SGMSYS_PCS_CONTROL_1, &val); + val |= SGMII_AN_RESTART; + regmap_write(ss->regmap[id], SGMSYS_PCS_CONTROL_1, val); + + regmap_read(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, &val); + val &= ~SGMII_PHYA_PWD; + regmap_write(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, val); + + return 0; +} + +int mtk_sgmii_setup_mode_force(struct mtk_sgmii *ss, int id) +{ + unsigned int val; + int mode; + + if (!ss->regmap[id]) + return -EINVAL; + + regmap_read(ss->regmap[id], ss->ana_rgc3, &val); + val &= ~GENMASK(2, 3); + mode = ss->flags[id] & MTK_SGMII_PHYSPEED_MASK; + val |= (mode == MTK_SGMII_PHYSPEED_1000) ? 0 : BIT(2); + regmap_write(ss->regmap[id], ss->ana_rgc3, val); + + /* Disable SGMII AN */ + regmap_read(ss->regmap[id], SGMSYS_PCS_CONTROL_1, &val); + val &= ~BIT(12); + regmap_write(ss->regmap[id], SGMSYS_PCS_CONTROL_1, val); + + /* SGMII force mode setting */ + val = 0x31120019; + regmap_write(ss->regmap[id], SGMSYS_SGMII_MODE, val); + + /* Release PHYA power down state */ + regmap_read(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, &val); + val &= ~SGMII_PHYA_PWD; + regmap_write(ss->regmap[id], SGMSYS_QPHY_PWR_STATE_CTRL, val); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 2391e3cfb56b..37fef8cd25e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -34,6 +34,7 @@ config MLX5_CORE_EN depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE depends on IPV6=y || IPV6=n || MLX5_CORE=m select PAGE_POOL + select DIMLIB default n ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. @@ -96,26 +97,60 @@ config MLX5_CORE_IPOIB ---help--- MLX5 IPoIB offloads & acceleration support. +config MLX5_FPGA_IPSEC + bool "Mellanox Technologies IPsec Innova support" + depends on MLX5_CORE + depends on MLX5_FPGA + default n + help + Build IPsec support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + config MLX5_EN_IPSEC bool "IPSec XFRM cryptography-offload accelaration" - depends on MLX5_ACCEL depends on MLX5_CORE_EN depends on XFRM_OFFLOAD depends on INET_ESP_OFFLOAD || INET6_ESP_OFFLOAD + depends on MLX5_FPGA_IPSEC default n - ---help--- + help Build support for IPsec cryptography-offload accelaration in the NIC. Note: Support for hardware with this capability needs to be selected for this option to become available. -config MLX5_EN_TLS - bool "TLS cryptography-offload accelaration" +config MLX5_FPGA_TLS + bool "Mellanox Technologies TLS Innova support" + depends on TLS_DEVICE + depends on TLS=y || MLX5_CORE=m + depends on MLX5_FPGA + default n + help + Build TLS support for the Innova family of network cards by Mellanox + Technologies. Innova network cards are comprised of a ConnectX chip + and an FPGA chip on one board. If you select this option, the + mlx5_core driver will include the Innova FPGA core and allow building + sandbox-specific client drivers. + +config MLX5_TLS + bool "Mellanox Technologies TLS Connect-X support" depends on MLX5_CORE_EN depends on TLS_DEVICE depends on TLS=y || MLX5_CORE=m - depends on MLX5_ACCEL + select MLX5_ACCEL default n - ---help--- - Build support for TLS cryptography-offload accelaration in the NIC. - Note: Support for hardware with this capability needs to be selected - for this option to become available. + help + Build TLS support for the Connect-X family of network cards by Mellanox + Technologies. + +config MLX5_EN_TLS + bool "TLS cryptography-offload accelaration" + depends on MLX5_CORE_EN + depends on MLX5_FPGA_TLS || MLX5_TLS + default y + help + Build support for TLS cryptography-offload accelaration in the NIC. + Note: Support for hardware with this capability needs to be selected + for this option to become available. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 243368dc23db..57d2cc666fe3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -13,9 +13,10 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o # mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \ - transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o + lib/devcom.o lib/pci_vsc.o diag/fs_tracepoint.o \ + diag/fw_tracer.o diag/crdump.o devlink.o # # Netdev basic @@ -23,7 +24,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \ en_selftest.o en/port.o en/monitor_stats.o en/reporter_tx.o \ - en/params.o + en/params.o en/xsk/umem.o en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o # # Netdev extra @@ -31,12 +32,15 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \ + lib/geneve.o en/tc_tun_vxlan.o en/tc_tun_gre.o \ + en/tc_tun_geneve.o # # Core extra # -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o rdma.o +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ + ecpf.o rdma.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o @@ -49,12 +53,14 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib # # Accelerations & FPGA # -mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o accel/tls.o +mlx5_core-$(CONFIG_MLX5_FPGA_IPSEC) += fpga/ipsec.o +mlx5_core-$(CONFIG_MLX5_FPGA_TLS) += fpga/tls.o +mlx5_core-$(CONFIG_MLX5_ACCEL) += lib/crypto.o accel/tls.o accel/ipsec.o -mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ - fpga/ipsec.o fpga/tls.o +mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o -mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o +mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \ + en_accel/ktls.o en_accel/ktls_tx.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c index 9f1b1939716a..eddc34e4a762 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c @@ -31,6 +31,8 @@ * */ +#ifdef CONFIG_MLX5_FPGA_IPSEC + #include <linux/mlx5/device.h> #include "accel/ipsec.h" @@ -74,6 +76,11 @@ int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) return mlx5_fpga_ipsec_init(mdev); } +void mlx5_accel_ipsec_build_fs_cmds(void) +{ + mlx5_fpga_ipsec_build_fs_cmds(); +} + void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) { mlx5_fpga_ipsec_cleanup(mdev); @@ -107,3 +114,5 @@ int mlx5_accel_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, return mlx5_fpga_esp_modify_xfrm(xfrm, attrs); } EXPORT_SYMBOL_GPL(mlx5_accel_esp_modify_xfrm); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h index 024dbd22a89b..530e428d46ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h @@ -37,7 +37,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/accel.h> -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_FPGA_IPSEC #define MLX5_IPSEC_DEV(mdev) (mlx5_accel_ipsec_device_caps(mdev) & \ MLX5_ACCEL_IPSEC_CAP_DEVICE) @@ -54,6 +54,7 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev, void mlx5_accel_esp_free_hw_context(void *context); int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev); +void mlx5_accel_ipsec_build_fs_cmds(void); void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev); #else @@ -79,6 +80,10 @@ static inline int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev) return 0; } +static inline void mlx5_accel_ipsec_build_fs_cmds(void) +{ +} + static inline void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c index da7bd26368f9..cab708af3422 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c @@ -35,6 +35,9 @@ #include "accel/tls.h" #include "mlx5_core.h" +#include "lib/mlx5.h" + +#ifdef CONFIG_MLX5_FPGA_TLS #include "fpga/tls.h" int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow, @@ -61,7 +64,8 @@ int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { - return mlx5_fpga_is_tls_device(mdev); + return mlx5_fpga_is_tls_device(mdev) || + mlx5_accel_is_ktls_device(mdev); } u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) @@ -78,3 +82,42 @@ void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { mlx5_fpga_tls_cleanup(mdev); } +#endif + +#ifdef CONFIG_MLX5_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) +{ + u32 sz_bytes; + void *key; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + case TLS_CIPHER_AES_GCM_256: { + struct tls12_crypto_info_aes_gcm_256 *info = + (struct tls12_crypto_info_aes_gcm_256 *)crypto_info; + + key = info->key; + sz_bytes = sizeof(info->key); + break; + } + default: + return -EINVAL; + } + + return mlx5_create_encryption_key(mdev, key, sz_bytes, p_key_id); +} + +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_destroy_encryption_key(mdev, key_id); +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h index def4093ebfae..879321b21616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h @@ -37,8 +37,51 @@ #include <linux/mlx5/driver.h> #include <linux/tls.h> -#ifdef CONFIG_MLX5_ACCEL +#ifdef CONFIG_MLX5_TLS +int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id); +void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); +static inline bool mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, tls)) + return false; + + if (!MLX5_CAP_GEN(mdev, log_max_dek)) + return false; + + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); +} + +static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) +{ + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (crypto_info->version == TLS_1_2_VERSION) + return MLX5_CAP_TLS(mdev, tls_1_2_aes_gcm_128); + break; + } + + return false; +} +#else +static inline int +mlx5_ktls_create_key(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info, + u32 *p_key_id) { return -ENOTSUPP; } +static inline void +mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) {} + +static inline bool +mlx5_accel_is_ktls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool +mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, + struct tls_crypto_info *crypto_info) { return false; } +#endif + +#ifdef CONFIG_MLX5_FPGA_TLS enum { MLX5_ACCEL_TLS_TX = BIT(0), MLX5_ACCEL_TLS_RX = BIT(1), @@ -84,11 +127,13 @@ static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid, bool direction_sx) { } static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq, u64 rcd_sn) { return 0; } -static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) { return false; } +static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev) +{ + return mlx5_accel_is_ktls_device(mdev); +} static inline u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev) { return 0; } static inline int mlx5_accel_tls_init(struct mlx5_core_dev *mdev) { return 0; } static inline void mlx5_accel_tls_cleanup(struct mlx5_core_dev *mdev) { } - #endif #endif /* __MLX5_ACCEL_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e94686c42000..8cdd7e66f8df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -316,7 +316,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: case MLX5_CMD_OP_DEALLOC_MEMIC: case MLX5_CMD_OP_PAGE_FAULT_RESUME: - case MLX5_CMD_OP_QUERY_HOST_PARAMS: + case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -632,7 +632,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); - MLX5_COMMAND_STR_CASE(QUERY_HOST_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_ESW_FUNCTIONS); MLX5_COMMAND_STR_CASE(CREATE_UCTX); MLX5_COMMAND_STR_CASE(DESTROY_UCTX); MLX5_COMMAND_STR_CASE(CREATE_UMEM); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 713a17ee3751..818edc63e428 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data) list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) { list_del_init(&mcq->tasklet_ctx.list); - mcq->tasklet_ctx.comp(mcq); + mcq->tasklet_ctx.comp(mcq, NULL); mlx5_cq_put(mcq); if (time_after(jiffies, end)) break; @@ -68,7 +68,8 @@ void mlx5_cq_tasklet_cb(unsigned long data) tasklet_schedule(&ctx->task); } -static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) +static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, + struct mlx5_eqe *eqe) { unsigned long flags; struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; @@ -87,11 +88,10 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq) } int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - u32 *in, int inlen) + u32 *in, int inlen, u32 *out, int outlen) { int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; - u32 out[MLX5_ST_SZ_DW(create_cq_out)]; u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; struct mlx5_eq_comp *eq; int err; @@ -100,9 +100,9 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, if (IS_ERR(eq)) return PTR_ERR(eq); - memset(out, 0, sizeof(out)); + memset(out, 0, outlen); MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); - err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) return err; @@ -158,13 +158,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; int err; - err = mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); - if (err) - return err; - - err = mlx5_eq_del_cq(&cq->eq->core, cq); - if (err) - return err; + mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); + mlx5_eq_del_cq(&cq->eq->core, cq); MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index f6b1da99e6c2..5bb6a26ea267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -311,13 +311,20 @@ static u32 mlx5_gen_pci_id(struct mlx5_core_dev *dev) /* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { - u32 pci_id = mlx5_gen_pci_id(dev); struct mlx5_core_dev *res = NULL; struct mlx5_core_dev *tmp_dev; struct mlx5_priv *priv; + u32 pci_id; + if (!mlx5_core_is_pf(dev)) + return NULL; + + pci_id = mlx5_gen_pci_id(dev); list_for_each_entry(priv, &mlx5_dev_list, dev_list) { tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if (!mlx5_core_is_pf(tmp_dev)) + continue; + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { res = tmp_dev; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c new file mode 100644 index 000000000000..a400f4430c28 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <devlink.h> + +#include "mlx5_core.h" +#include "eswitch.h" + +static int mlx5_devlink_flash_update(struct devlink *devlink, + const char *file_name, + const char *component, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + const struct firmware *fw; + int err; + + if (component) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, file_name, &dev->pdev->dev); + if (err) + return err; + + return mlx5_firmware_flash(dev, fw, extack); +} + +static u8 mlx5_fw_ver_major(u32 version) +{ + return (version >> 24) & 0xff; +} + +static u8 mlx5_fw_ver_minor(u32 version) +{ + return (version >> 16) & 0xff; +} + +static u16 mlx5_fw_ver_subminor(u32 version) +{ + return version & 0xffff; +} + +#define DEVLINK_FW_STRING_LEN 32 + +static int +mlx5_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char version_str[DEVLINK_FW_STRING_LEN]; + u32 running_fw, stored_fw; + int err; + + err = devlink_info_driver_name_put(req, DRIVER_NAME); + if (err) + return err; + + err = devlink_info_version_fixed_put(req, "fw.psid", dev->board_id); + if (err) + return err; + + err = mlx5_fw_version_query(dev, &running_fw, &stored_fw); + if (err) + return err; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(running_fw), mlx5_fw_ver_minor(running_fw), + mlx5_fw_ver_subminor(running_fw)); + err = devlink_info_version_running_put(req, "fw.version", version_str); + if (err) + return err; + + /* no pending version, return running (stored) version */ + if (stored_fw == 0) + stored_fw = running_fw; + + snprintf(version_str, sizeof(version_str), "%d.%d.%04d", + mlx5_fw_ver_major(stored_fw), mlx5_fw_ver_minor(stored_fw), + mlx5_fw_ver_subminor(stored_fw)); + err = devlink_info_version_stored_put(req, "fw.version", version_str); + if (err) + return err; + + return 0; +} + +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_ESWITCH + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, + .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, + .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, +#endif + .flash_update = mlx5_devlink_flash_update, + .info_get = mlx5_devlink_info_get, +}; + +struct devlink *mlx5_devlink_alloc(void) +{ + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); +} + +void mlx5_devlink_free(struct devlink *devlink) +{ + devlink_free(devlink); +} + +int mlx5_devlink_register(struct devlink *devlink, struct device *dev) +{ + return devlink_register(devlink, dev); +} + +void mlx5_devlink_unregister(struct devlink *devlink) +{ + devlink_unregister(devlink); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h new file mode 100644 index 000000000000..d0ba03774ddf --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019, Mellanox Technologies */ + +#ifndef __MLX5_DEVLINK_H__ +#define __MLX5_DEVLINK_H__ + +#include <net/devlink.h> + +struct devlink *mlx5_devlink_alloc(void); +void mlx5_devlink_free(struct devlink *devlink); +int mlx5_devlink_register(struct devlink *devlink, struct device *dev); +void mlx5_devlink_unregister(struct devlink *devlink); + +#endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c new file mode 100644 index 000000000000..28d02749d3c4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/crdump.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/pci_vsc.h" +#include "lib/mlx5.h" + +#define BAD_ACCESS 0xBADACCE5 +#define MLX5_PROTECTED_CR_SCAN_CRSPACE 0x7 + +static bool mlx5_crdump_enabled(struct mlx5_core_dev *dev) +{ + return !!dev->priv.health.crdump_size; +} + +static int mlx5_crdump_fill(struct mlx5_core_dev *dev, u32 *cr_data) +{ + u32 crdump_size = dev->priv.health.crdump_size; + int i, ret; + + for (i = 0; i < (crdump_size / 4); i++) + cr_data[i] = BAD_ACCESS; + + ret = mlx5_vsc_gw_read_block_fast(dev, cr_data, crdump_size); + if (ret <= 0) { + if (ret == 0) + return -EIO; + return ret; + } + + if (crdump_size != ret) { + mlx5_core_warn(dev, "failed to read full dump, read %d out of %u\n", + ret, crdump_size); + return -EINVAL; + } + + return 0; +} + +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data) +{ + int ret; + + if (!mlx5_crdump_enabled(dev)) + return -ENODEV; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) { + mlx5_core_warn(dev, "crdump: failed to lock vsc gw err %d\n", + ret); + return ret; + } + /* Verify no other PF is running cr-dump or sw reset */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, + MLX5_VSC_LOCK); + if (ret) { + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + goto unlock_gw; + } + + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, NULL); + if (ret) + goto unlock_sem; + + ret = mlx5_crdump_fill(dev, cr_data); + +unlock_sem: + mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, MLX5_VSC_UNLOCK); +unlock_gw: + mlx5_vsc_gw_unlock(dev); + return ret; +} + +int mlx5_crdump_enable(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + u32 space_size; + int ret; + + if (!mlx5_core_is_pf(dev) || !mlx5_vsc_accessible(dev) || + mlx5_crdump_enabled(dev)) + return 0; + + ret = mlx5_vsc_gw_lock(dev); + if (ret) + return ret; + + /* Check if space is supported and get space size */ + ret = mlx5_vsc_gw_set_space(dev, MLX5_VSC_SPACE_SCAN_CRSPACE, + &space_size); + if (ret) { + /* Unlock and mask error since space is not supported */ + mlx5_vsc_gw_unlock(dev); + return 0; + } + + if (!space_size) { + mlx5_core_warn(dev, "Invalid Crspace size, zero\n"); + mlx5_vsc_gw_unlock(dev); + return -EINVAL; + } + + ret = mlx5_vsc_gw_unlock(dev); + if (ret) + return ret; + + priv->health.crdump_size = space_size; + return 0; +} + +void mlx5_crdump_disable(struct mlx5_core_dev *dev) +{ + dev->priv.health.crdump_size = 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index a4cf123e3f17..ddf1b87f1bc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -187,6 +187,7 @@ TRACE_EVENT(mlx5_fs_set_fte, __field(u32, index) __field(u32, action) __field(u32, flow_tag) + __field(u32, flow_source) __field(u8, mask_enable) __field(int, new_fte) __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) @@ -204,7 +205,8 @@ TRACE_EVENT(mlx5_fs_set_fte, __entry->index = fte->index; __entry->action = fte->action.action; __entry->mask_enable = __entry->fg->mask.match_criteria_enable; - __entry->flow_tag = fte->action.flow_tag; + __entry->flow_tag = fte->flow_context.flow_tag; + __entry->flow_source = fte->flow_context.flow_source; memcpy(__entry->mask_outer, MLX5_ADDR_OF(fte_match_param, &__entry->fg->mask.match_criteria, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 6999f4486e9e..8a4930c8bf62 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -243,6 +243,19 @@ free_strings_db: return -ENOMEM; } +static void +mlx5_fw_tracer_init_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + tracer->st_arr.saved_traces_index = 0; + mutex_init(&tracer->st_arr.lock); +} + +static void +mlx5_fw_tracer_clean_saved_traces_array(struct mlx5_fw_tracer *tracer) +{ + mutex_destroy(&tracer->st_arr.lock); +} + static void mlx5_tracer_read_strings_db(struct work_struct *work) { struct mlx5_fw_tracer *tracer = container_of(work, struct mlx5_fw_tracer, @@ -522,6 +535,24 @@ static void mlx5_fw_tracer_clean_ready_list(struct mlx5_fw_tracer *tracer) list_del(&str_frmt->list); } +static void mlx5_fw_tracer_save_trace(struct mlx5_fw_tracer *tracer, + u64 timestamp, bool lost, + u8 event_id, char *msg) +{ + struct mlx5_fw_trace_data *trace_data; + + mutex_lock(&tracer->st_arr.lock); + trace_data = &tracer->st_arr.straces[tracer->st_arr.saved_traces_index]; + trace_data->timestamp = timestamp; + trace_data->lost = lost; + trace_data->event_id = event_id; + strncpy(trace_data->msg, msg, TRACE_STR_MSG); + + tracer->st_arr.saved_traces_index = + (tracer->st_arr.saved_traces_index + 1) & (SAVED_TRACES_NUM - 1); + mutex_unlock(&tracer->st_arr.lock); +} + static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, struct mlx5_core_dev *dev, u64 trace_timestamp) @@ -540,6 +571,9 @@ static void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, str_frmt->event_id, tmp); + mlx5_fw_tracer_save_trace(dev->tracer, trace_timestamp, + str_frmt->lost, str_frmt->event_id, tmp); + /* remove it from hash */ mlx5_tracer_clean_message(str_frmt); } @@ -786,6 +820,109 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_fw_tracer_start(tracer); } +static int mlx5_fw_tracer_set_core_dump_reg(struct mlx5_core_dev *dev, + u32 *in, int size_in) +{ + u32 out[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) && + !MLX5_CAP_DEBUG(dev, core_dump_qp)) + return -EOPNOTSUPP; + + return mlx5_core_access_reg(dev, in, size_in, out, sizeof(out), + MLX5_REG_CORE_DUMP, 0, 1); +} + +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev) +{ + struct mlx5_fw_tracer *tracer = dev->tracer; + u32 in[MLX5_ST_SZ_DW(core_dump_reg)] = {}; + int err; + + if (!MLX5_CAP_DEBUG(dev, core_dump_general) || !tracer) + return -EOPNOTSUPP; + if (!tracer->owner) + return -EPERM; + + MLX5_SET(core_dump_reg, in, core_dump_type, 0x0); + + err = mlx5_fw_tracer_set_core_dump_reg(dev, in, sizeof(in)); + if (err) + return err; + queue_work(tracer->work_queue, &tracer->handle_traces_work); + flush_workqueue(tracer->work_queue); + return 0; +} + +static int +mlx5_devlink_fmsg_fill_trace(struct devlink_fmsg *fmsg, + struct mlx5_fw_trace_data *trace_data) +{ + int err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "timestamp", trace_data->timestamp); + if (err) + return err; + + err = devlink_fmsg_bool_pair_put(fmsg, "lost", trace_data->lost); + if (err) + return err; + + err = devlink_fmsg_u8_pair_put(fmsg, "event_id", trace_data->event_id); + if (err) + return err; + + err = devlink_fmsg_string_pair_put(fmsg, "msg", trace_data->msg); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return 0; +} + +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg) +{ + struct mlx5_fw_trace_data *straces = tracer->st_arr.straces; + u32 index, start_index, end_index; + u32 saved_traces_index; + int err; + + if (!straces[0].timestamp) + return -ENOMSG; + + mutex_lock(&tracer->st_arr.lock); + saved_traces_index = tracer->st_arr.saved_traces_index; + if (straces[saved_traces_index].timestamp) + start_index = saved_traces_index; + else + start_index = 0; + end_index = (saved_traces_index - 1) & (SAVED_TRACES_NUM - 1); + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "dump fw traces"); + if (err) + goto unlock; + index = start_index; + while (index != end_index) { + err = mlx5_devlink_fmsg_fill_trace(fmsg, &straces[index]); + if (err) + goto unlock; + + index = (index + 1) & (SAVED_TRACES_NUM - 1); + } + + err = devlink_fmsg_arr_pair_nest_end(fmsg); +unlock: + mutex_unlock(&tracer->st_arr.lock); + return err; +} + /* Create software resources (Buffers, etc ..) */ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) { @@ -833,6 +970,7 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) goto free_log_buf; } + mlx5_fw_tracer_init_saved_traces_array(tracer); mlx5_core_dbg(dev, "FWTracer: Tracer created\n"); return tracer; @@ -917,6 +1055,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) cancel_work_sync(&tracer->read_fw_strings_work); mlx5_fw_tracer_clean_ready_list(tracer); mlx5_fw_tracer_clean_print_hash(tracer); + mlx5_fw_tracer_clean_saved_traces_array(tracer); mlx5_fw_tracer_free_strings_db(tracer); mlx5_fw_tracer_destroy_log_buf(tracer); flush_workqueue(tracer->work_queue); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index a8b8747f2b61..40601fba80ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -46,6 +46,9 @@ #define TRACER_BLOCK_SIZE_BYTE 256 #define TRACES_PER_BLOCK 32 +#define TRACE_STR_MSG 256 +#define SAVED_TRACES_NUM 8192 + #define TRACER_MAX_PARAMS 7 #define MESSAGE_HASH_BITS 6 #define MESSAGE_HASH_SIZE BIT(MESSAGE_HASH_BITS) @@ -53,6 +56,13 @@ #define MASK_52_7 (0x1FFFFFFFFFFF80) #define MASK_6_0 (0x7F) +struct mlx5_fw_trace_data { + u64 timestamp; + bool lost; + u8 event_id; + char msg[TRACE_STR_MSG]; +}; + struct mlx5_fw_tracer { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -83,6 +93,13 @@ struct mlx5_fw_tracer { u32 consumer_index; } buff; + /* Saved Traces Array */ + struct { + struct mlx5_fw_trace_data straces[SAVED_TRACES_NUM]; + u32 saved_traces_index; + struct mutex lock; /* Protect st_arr access */ + } st_arr; + u64 last_timestamp; struct work_struct handle_traces_work; struct hlist_head hash[MESSAGE_HASH_SIZE]; @@ -171,5 +188,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev); int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer); void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer); +int mlx5_fw_tracer_trigger_core_dump_general(struct mlx5_core_dev *dev); +int mlx5_fw_tracer_get_saved_traces_objects(struct mlx5_fw_tracer *tracer, + struct devlink_fmsg *fmsg); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 0ccd6d40baf7..d2228e37450f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -83,30 +83,3 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) mlx5_peer_pf_cleanup(dev); } - -static int mlx5_query_host_params_context(struct mlx5_core_dev *dev, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_host_params_in)] = {}; - - MLX5_SET(query_host_params_in, in, opcode, - MLX5_CMD_OP_QUERY_HOST_PARAMS); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); -} - -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ - u32 out[MLX5_ST_SZ_DW(query_host_params_out)] = {}; - int err; - - err = mlx5_query_host_params_context(dev, out, sizeof(out)); - if (err) - return err; - - *num_vf = MLX5_GET(query_host_params_out, out, - host_params_context.host_num_of_vfs); - mlx5_core_dbg(dev, "host_num_of_vfs %d\n", *num_vf); - - return 0; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h index 346372df218f..d3d7a00a02ac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.h @@ -16,7 +16,6 @@ enum { bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev); int mlx5_ec_init(struct mlx5_core_dev *dev); void mlx5_ec_cleanup(struct mlx5_core_dev *dev); -int mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf); #else /* CONFIG_MLX5_ESWITCH */ @@ -24,9 +23,6 @@ static inline bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev) { return false; } static inline int mlx5_ec_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_ec_cleanup(struct mlx5_core_dev *dev) {} -static inline int -mlx5_query_host_params_num_vfs(struct mlx5_core_dev *dev, int *num_vf) -{ return -EOPNOTSUPP; } #endif /* CONFIG_MLX5_ESWITCH */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index cc6797e24571..263558875f20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -48,7 +48,7 @@ #include <linux/rhashtable.h> #include <net/switchdev.h> #include <net/xdp.h> -#include <linux/net_dim.h> +#include <linux/dim.h> #include <linux/bits.h> #include "wq.h" #include "mlx5_core.h" @@ -137,6 +137,7 @@ struct page_pool; #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 +#define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ #define MLX5E_UMR_WQE_INLINE_SZ \ @@ -155,6 +156,11 @@ do { \ ##__VA_ARGS__); \ } while (0) +enum mlx5e_rq_group { + MLX5E_RQ_GROUP_REGULAR, + MLX5E_RQ_GROUP_XSK, + MLX5E_NUM_RQ_GROUPS /* Keep last. */ +}; static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) { @@ -179,7 +185,8 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) /* Use this function to get max num channels after netdev was created */ static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev) { - return min_t(unsigned int, netdev->num_rx_queues, + return min_t(unsigned int, + netdev->num_rx_queues / MLX5E_NUM_RQ_GROUPS, netdev->num_tx_queues); } @@ -202,7 +209,10 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; - struct mlx5_mtt inline_mtts[0]; + union { + struct mlx5_mtt inline_mtts[0]; + u8 tls_static_params_ctx[0]; + }; }; extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; @@ -238,9 +248,9 @@ struct mlx5e_params { u16 num_channels; u8 num_tc; bool rx_cqe_compress_def; - struct net_dim_cq_moder rx_cq_moderation; - struct net_dim_cq_moder tx_cq_moderation; bool tunneled_offload_en; + struct dim_cq_moder rx_cq_moderation; + struct dim_cq_moder tx_cq_moderation; bool lro_en; u8 tx_min_inline_mode; bool vlan_strip_disable; @@ -250,6 +260,7 @@ struct mlx5e_params { u32 lro_timeout; u32 pflags; struct bpf_prog *xdp_prog; + struct mlx5e_xsk *xsk; unsigned int sw_mtu; int hard_mtu; }; @@ -325,6 +336,9 @@ struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; u8 num_dma; +#ifdef CONFIG_MLX5_EN_TLS + skb_frag_t *resync_dump_frag; +#endif }; enum mlx5e_dma_map_type { @@ -348,6 +362,13 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; + + /* Auxiliary data for different opcodes. */ + union { + struct { + struct mlx5e_rq *rq; + } umr; + }; }; struct mlx5e_txqsq { @@ -356,7 +377,7 @@ struct mlx5e_txqsq { /* dirtied @completion */ u16 cc; u32 dma_fifo_cc; - struct net_dim dim; /* Adaptive Moderation */ + struct dim dim; /* Adaptive Moderation */ /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; @@ -375,6 +396,7 @@ struct mlx5e_txqsq { void __iomem *uar_map; struct netdev_queue *txq; u32 sqn; + u16 stop_room; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; @@ -392,14 +414,55 @@ struct mlx5e_txqsq { } ____cacheline_aligned_in_smp; struct mlx5e_dma_info { - struct page *page; - dma_addr_t addr; + dma_addr_t addr; + union { + struct page *page; + struct { + u64 handle; + void *data; + } xsk; + }; +}; + +/* XDP packets can be transmitted in different ways. On completion, we need to + * distinguish between them to clean up things in a proper way. + */ +enum mlx5e_xdp_xmit_mode { + /* An xdp_frame was transmitted due to either XDP_REDIRECT from another + * device or XDP_TX from an XSK RQ. The frame has to be unmapped and + * returned. + */ + MLX5E_XDP_XMIT_MODE_FRAME, + + /* The xdp_frame was created in place as a result of XDP_TX from a + * regular RQ. No DMA remapping happened, and the page belongs to us. + */ + MLX5E_XDP_XMIT_MODE_PAGE, + + /* No xdp_frame was created at all, the transmit happened from a UMEM + * page. The UMEM Completion Ring producer pointer has to be increased. + */ + MLX5E_XDP_XMIT_MODE_XSK, }; struct mlx5e_xdp_info { - struct xdp_frame *xdpf; - dma_addr_t dma_addr; - struct mlx5e_dma_info di; + enum mlx5e_xdp_xmit_mode mode; + union { + struct { + struct xdp_frame *xdpf; + dma_addr_t dma_addr; + } frame; + struct { + struct mlx5e_rq *rq; + struct mlx5e_dma_info di; + } page; + }; +}; + +struct mlx5e_xdp_xmit_data { + dma_addr_t dma_addr; + void *data; + u32 len; }; struct mlx5e_xdp_info_fifo { @@ -425,8 +488,12 @@ struct mlx5e_xdp_mpwqe { }; struct mlx5e_xdpsq; -typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq*, - struct mlx5e_xdp_info*); +typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); +typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, + struct mlx5e_xdp_xmit_data *, + struct mlx5e_xdp_info *, + int); + struct mlx5e_xdpsq { /* data path */ @@ -443,8 +510,10 @@ struct mlx5e_xdpsq { struct mlx5e_cq cq; /* read only */ + struct xdp_umem *umem; struct mlx5_wq_cyc wq; struct mlx5e_xdpsq_stats *stats; + mlx5e_fp_xmit_xdp_frame_check xmit_xdp_frame_check; mlx5e_fp_xmit_xdp_frame xmit_xdp_frame; struct { struct mlx5e_xdp_wqe_info *wqe_info; @@ -487,12 +556,6 @@ struct mlx5e_icosq { struct mlx5e_channel *channel; } ____cacheline_aligned_in_smp; -static inline bool -mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) -{ - return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); -} - struct mlx5e_wqe_frag_info { struct mlx5e_dma_info *di; u32 offset; @@ -571,9 +634,11 @@ struct mlx5e_rq { u8 log_stride_sz; u8 umr_in_progress; u8 umr_last_bulk; + u8 umr_completed; } mpwqe; }; struct { + u16 umem_headroom; u16 headroom; u8 map_dir; /* dma map direction */ } buff; @@ -596,14 +661,18 @@ struct mlx5e_rq { int ix; unsigned int hw_mtu; - struct net_dim dim; /* Dynamic Interrupt Moderation */ + struct dim dim; /* Dynamic Interrupt Moderation */ /* XDP */ struct bpf_prog *xdp_prog; - struct mlx5e_xdpsq xdpsq; + struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); struct page_pool *page_pool; + /* AF_XDP zero-copy */ + struct zero_copy_allocator zca; + struct xdp_umem *umem; + /* control */ struct mlx5_wq_ctrl wq_ctrl; __be32 mkey_be; @@ -616,9 +685,15 @@ struct mlx5e_rq { struct xdp_rxq_info xdp_rxq; } ____cacheline_aligned_in_smp; +enum mlx5e_channel_state { + MLX5E_CHANNEL_STATE_XSK, + MLX5E_CHANNEL_NUM_STATES +}; + struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_xdpsq rq_xdpsq; struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_icosq icosq; /* internal control operations */ bool xdp; @@ -631,6 +706,13 @@ struct mlx5e_channel { /* XDP_REDIRECT */ struct mlx5e_xdpsq xdpsq; + /* AF_XDP zero-copy */ + struct mlx5e_rq xskrq; + struct mlx5e_xdpsq xsksq; + struct mlx5e_icosq xskicosq; + /* xskicosq can be accessed from any CPU - the spinlock protects it. */ + spinlock_t xskicosq_lock; + /* data path - accessed per napi poll */ struct irq_desc *irq_desc; struct mlx5e_ch_stats *stats; @@ -639,6 +721,7 @@ struct mlx5e_channel { struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; + DECLARE_BITMAP(state, MLX5E_CHANNEL_NUM_STATES); int ix; int cpu; cpumask_var_t xps_cpumask; @@ -654,14 +737,17 @@ struct mlx5e_channel_stats { struct mlx5e_ch_stats ch; struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC]; struct mlx5e_rq_stats rq; + struct mlx5e_rq_stats xskrq; struct mlx5e_xdpsq_stats rq_xdpsq; struct mlx5e_xdpsq_stats xdpsq; + struct mlx5e_xdpsq_stats xsksq; } ____cacheline_aligned_in_smp; enum { MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, MLX5E_STATE_XDP_TX_ENABLED, + MLX5E_STATE_XDP_OPEN, }; struct mlx5e_rqt { @@ -694,6 +780,17 @@ struct mlx5e_modify_sq_param { int rl_index; }; +struct mlx5e_xsk { + /* UMEMs are stored separately from channels, because we don't want to + * lose them when channels are recreated. The kernel also stores UMEMs, + * but it doesn't distinguish between zero-copy and non-zero-copy UMEMs, + * so rely on our mechanism. + */ + struct xdp_umem **umems; + u16 refcnt; + bool ever_used; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; @@ -714,6 +811,7 @@ struct mlx5e_priv { struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir xsk_tir[MLX5E_MAX_NUM_CHANNELS]; struct mlx5e_rss_params rss_params; u32 tx_rates[MLX5E_MAX_NUM_SQS]; @@ -750,6 +848,7 @@ struct mlx5e_priv { struct mlx5e_tls *tls; #endif struct devlink_health_reporter *tx_reporter; + struct mlx5e_xsk xsk; }; struct mlx5e_profile { @@ -763,6 +862,7 @@ struct mlx5e_profile { void (*cleanup_tx)(struct mlx5e_priv *priv); void (*enable)(struct mlx5e_priv *priv); void (*disable)(struct mlx5e_priv *priv); + int (*update_rx)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); struct { @@ -781,7 +881,7 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more); void mlx5e_trigger_irq(struct mlx5e_icosq *sq); -void mlx5e_completion_event(struct mlx5_core_cq *mcq); +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); @@ -793,11 +893,13 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params); void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info); -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle); +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq); bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); @@ -853,6 +955,30 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params, void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen); struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt); +struct mlx5e_xsk_param; + +struct mlx5e_rq_param; +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq); +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time); +void mlx5e_deactivate_rq(struct mlx5e_rq *rq); +void mlx5e_close_rq(struct mlx5e_rq *rq); + +struct mlx5e_sq_param; +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); +void mlx5e_close_icosq(struct mlx5e_icosq *sq); +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect); +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq); + +struct mlx5e_cq_param; +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq); +void mlx5e_close_cq(struct mlx5e_cq *cq); + int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -898,102 +1024,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, swp_csum) && MLX5_CAP_ETH(mdev, swp_lso); } -struct mlx5e_swp_spec { - __be16 l3_proto; - u8 l4_proto; - u8 is_tun; - __be16 tun_l3_proto; - u8 tun_l4_proto; -}; - -static inline void -mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, - struct mlx5e_swp_spec *swp_spec) -{ - /* SWP offsets are in 2-bytes words */ - eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; - if (swp_spec->l3_proto == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; - if (swp_spec->l4_proto) { - eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; - if (swp_spec->l4_proto == IPPROTO_UDP) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; - } - - if (swp_spec->is_tun) { - eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; - if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ - eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; - if (swp_spec->l3_proto == htons(ETH_P_IPV6)) - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - } - switch (swp_spec->tun_l4_proto) { - case IPPROTO_UDP: - eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; - /* fall through */ - case IPPROTO_TCP: - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; - break; - } -} - -static inline void mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, - struct mlx5e_tx_wqe **wqe, - u16 *pi) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - - *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); - memset(*wqe, 0, sizeof(**wqe)); -} - -static inline -struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) -{ - u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - - memset(cseg, 0, sizeof(*cseg)); - - cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); - cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); - - (*pc)++; - - return wqe; -} - -static inline -void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, - void __iomem *uar_map, - struct mlx5_wqe_ctrl_seg *ctrl) -{ - ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - /* ensure wqe is visible to device before updating doorbell record */ - dma_wmb(); - - *wq->db = cpu_to_be32(pc); - - /* ensure doorbell record is visible to device before ringing the - * doorbell - */ - wmb(); - - mlx5_write64((__be32 *)ctrl, uar_map); -} - -static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) -{ - struct mlx5_core_cq *mcq; - - mcq = &cq->mcq; - mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); -} - extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; @@ -1023,17 +1053,17 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); -int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, - u32 underlay_qpn, u32 *tisn); +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); int mlx5e_create_tises(struct mlx5e_priv *priv); +int mlx5e_update_nic_rx(struct mlx5e_priv *priv); void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); @@ -1075,8 +1105,6 @@ u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); -int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, - struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, @@ -1097,6 +1125,7 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index d3744bffbae3..79301d116667 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -3,65 +3,102 @@ #include "en/params.h" -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params) +static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u16 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - u32 frag_sz; + return params->xdp_prog || xsk; +} + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u16 headroom = NET_IP_ALIGN; + + if (mlx5e_rx_is_xdp(params, xsk)) { + headroom += XDP_PACKET_HEADROOM; + if (xsk) + headroom += xsk->headroom; + } else { + headroom += MLX5_RX_HEADROOM; + } + + return headroom; +} + +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + u32 hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + u16 linear_rq_headroom = mlx5e_get_linear_rq_headroom(params, xsk); + u32 frag_sz = linear_rq_headroom + hw_mtu; - linear_rq_headroom += NET_IP_ALIGN; + /* AF_XDP doesn't build SKBs in place. */ + if (!xsk) + frag_sz = MLX5_SKB_FRAG_SZ(frag_sz); - frag_sz = MLX5_SKB_FRAG_SZ(linear_rq_headroom + hw_mtu); + /* XDP in mlx5e doesn't support multiple packets per page. */ + if (mlx5e_rx_is_xdp(params, xsk)) + frag_sz = max_t(u32, frag_sz, PAGE_SIZE); - if (params->xdp_prog && frag_sz < PAGE_SIZE) - frag_sz = PAGE_SIZE; + /* Even if we can go with a smaller fragment size, we must not put + * multiple packets into a single frame. + */ + if (xsk) + frag_sz = max_t(u32, frag_sz, xsk->chunk_size); return frag_sz; } -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params) +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params); + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); return MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); } -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params) +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + /* AF_XDP allocates SKBs on XDP_PASS - ensure they don't occupy more + * than one page. For this, check both with and without xsk. + */ + u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk), + mlx5e_rx_get_linear_frag_sz(params, NULL)); - return !params->lro_en && frag_sz <= PAGE_SIZE; + return !params->lro_en && linear_frag_sz <= PAGE_SIZE; } #define MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ ((BIT(__mlx5_bit_sz(wq, log_wqe_stride_size)) - 1) + \ MLX5_MPWQE_LOG_STRIDE_SZ_BASE) bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u32 frag_sz = mlx5e_rx_get_linear_frag_sz(params); + u32 linear_frag_sz = mlx5e_rx_get_linear_frag_sz(params, xsk); s8 signed_log_num_strides_param; u8 log_num_strides; - if (!mlx5e_rx_is_linear_skb(params)) + if (!mlx5e_rx_is_linear_skb(params, xsk)) return false; - if (order_base_2(frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) + if (order_base_2(linear_frag_sz) > MLX5_MAX_MPWQE_LOG_WQE_STRIDE_SZ) return false; if (MLX5_CAP_GEN(mdev, ext_stride_num_range)) return true; - log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(frag_sz); + log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - order_base_2(linear_frag_sz); signed_log_num_strides_param = (s8)log_num_strides - MLX5_MPWQE_LOG_NUM_STRIDES_BASE; return signed_log_num_strides_param >= 0; } -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params); + u8 log_pkts_per_wqe = mlx5e_mpwqe_log_pkts_per_wqe(params, xsk); /* Numbers are unsigned, don't subtract to avoid underflow. */ if (params->log_rq_mtu_frames < @@ -72,33 +109,30 @@ u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params) } u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) - return order_base_2(mlx5e_rx_get_linear_frag_sz(params)); + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + return order_base_2(mlx5e_rx_get_linear_frag_sz(params, xsk)); return MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); } u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { return MLX5_MPWRQ_LOG_WQE_SZ - - mlx5e_mpwqe_get_log_stride_size(mdev, params); + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); } u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) { - u16 linear_rq_headroom = params->xdp_prog ? - XDP_PACKET_HEADROOM : MLX5_RX_HEADROOM; - bool is_linear_skb; - - linear_rq_headroom += NET_IP_ALIGN; - - is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? - mlx5e_rx_is_linear_skb(params) : - mlx5e_rx_mpwqe_is_linear_skb(mdev, params); + bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ? + mlx5e_rx_is_linear_skb(params, xsk) : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); - return is_linear_skb ? linear_rq_headroom : 0; + return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index b106a0236f36..bd882b5ee9a7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -6,17 +6,119 @@ #include "en.h" -u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params); -u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params); -bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params); +struct mlx5e_xsk_param { + u16 headroom; + u16 chunk_size; +}; + +struct mlx5e_rq_param { + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + struct mlx5e_rq_frags_info frags_info; +}; + +struct mlx5e_sq_param { + u32 sqc[MLX5_ST_SZ_DW(sqc)]; + struct mlx5_wq_param wq; + bool is_mpw; +}; + +struct mlx5e_cq_param { + u32 cqc[MLX5_ST_SZ_DW(cqc)]; + struct mlx5_wq_param wq; + u16 eq_ix; + u8 cq_period_mode; +}; + +struct mlx5e_channel_param { + struct mlx5e_rq_param rq; + struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; + struct mlx5e_sq_param icosq; + struct mlx5e_cq_param rx_cq; + struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; +}; + +static inline bool mlx5e_qid_get_ch_if_in_group(struct mlx5e_params *params, + u16 qid, + enum mlx5e_rq_group group, + u16 *ix) +{ + int nch = params->num_channels; + int ch = qid - nch * group; + + if (ch < 0 || ch >= nch) + return false; + + *ix = ch; + return true; +} + +static inline void mlx5e_qid_get_ch_and_group(struct mlx5e_params *params, + u16 qid, + u16 *ix, + enum mlx5e_rq_group *group) +{ + u16 nch = params->num_channels; + + *ix = qid % nch; + *group = qid / nch; +} + +static inline bool mlx5e_qid_validate(struct mlx5e_params *params, u64 qid) +{ + return qid < params->num_channels * MLX5E_NUM_RQ_GROUPS; +} + +/* Parameter calculations */ + +u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u32 mlx5e_rx_get_linear_frag_sz(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_log_pkts_per_wqe(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); bool mlx5e_rx_mpwqe_is_linear_skb(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); -u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); +u8 mlx5e_mpwqe_get_log_rq_size(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev, - struct mlx5e_params *params); + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk); + +/* Build queue parameters */ + +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param); +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param); +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param); +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param); +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param); +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param); +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param); #endif /* __MLX5_EN_PARAMS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 231e7cdfc6f7..a6a52806be45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -3,8 +3,22 @@ #include <net/vxlan.h> #include <net/gre.h> -#include "lib/vxlan.h" +#include <net/geneve.h> #include "en/tc_tun.h" +#include "en_tc.h" + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return &vxlan_tunnel; + else if (netif_is_geneve(tunnel_dev)) + return &geneve_tunnel; + else if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return &gre_tunnel; + else + return NULL; +} static int get_route_and_out_devs(struct mlx5e_priv *priv, struct net_device *dev, @@ -34,7 +48,8 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, *route_dev = dev; if (is_vlan_dev(*route_dev)) *out_dev = uplink_dev; - else if (mlx5e_eswitch_rep(dev)) + else if (mlx5e_eswitch_rep(dev) && + mlx5e_is_valid_eswitch_fwd_dev(priv, dev)) *out_dev = *route_dev; else return -EOPNOTSUPP; @@ -142,63 +157,15 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv, return 0; } -static int mlx5e_gen_vxlan_header(char buf[], struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - struct udphdr *udp = (struct udphdr *)(buf); - struct vxlanhdr *vxh = (struct vxlanhdr *) - ((char *)udp + sizeof(struct udphdr)); - - udp->dest = tun_key->tp_dst; - vxh->vx_flags = VXLAN_HF_VNI; - vxh->vx_vni = vxlan_vni_field(tun_id); - - return 0; -} - -static int mlx5e_gen_gre_header(char buf[], struct ip_tunnel_key *tun_key) -{ - __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); - int hdr_len; - struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); - - /* the HW does not calculate GRE csum or sequences */ - if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) - return -EOPNOTSUPP; - - greh->protocol = htons(ETH_P_TEB); - - /* GRE key */ - hdr_len = gre_calc_hlen(tun_key->tun_flags); - greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); - if (tun_key->tun_flags & TUNNEL_KEY) { - __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); - - *ptr = tun_id; - } - - return 0; -} - static int mlx5e_gen_ip_tunnel_header(char buf[], __u8 *ip_proto, struct mlx5e_encap_entry *e) { - int err = 0; - struct ip_tunnel_key *key = &e->tun_info.key; - - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *ip_proto = IPPROTO_UDP; - err = mlx5e_gen_vxlan_header(buf, key); - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *ip_proto = IPPROTO_GRE; - err = mlx5e_gen_gre_header(buf, key); - } else { - pr_warn("mlx5: Cannot generate tunnel header for tunnel type (%d)\n" - , e->tunnel_type); - err = -EOPNOTSUPP; + if (!e->tunnel) { + pr_warn("mlx5: Cannot generate tunnel header for this tunnel\n"); + return -EOPNOTSUPP; } - return err; + return e->tunnel->generate_ip_tun_hdr(buf, ip_proto, e); } static char *gen_eth_tnl_hdr(char *buf, struct net_device *dev, @@ -230,7 +197,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; @@ -254,7 +221,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, ipv4_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct iphdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -346,7 +313,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); - struct ip_tunnel_key *tun_key = &e->tun_info.key; + const struct ip_tunnel_key *tun_key = &e->tun_info->key; struct net_device *out_dev, *route_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; @@ -370,7 +337,7 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, ipv6_encap_size = (is_vlan_dev(route_dev) ? VLAN_ETH_HLEN : ETH_HLEN) + sizeof(struct ipv6hdr) + - e->tunnel_hlen; + e->tunnel->calc_hlen(e); if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -456,27 +423,12 @@ out: return err; } -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev) -{ - if (netif_is_vxlan(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_VXLAN; - else if (netif_is_gretap(tunnel_dev) || - netif_is_ip6gretap(tunnel_dev)) - return MLX5E_TC_TUNNEL_TYPE_GRETAP; - else - return MLX5E_TC_TUNNEL_TYPE_UNKNOWN; -} - bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev) { - int tunnel_type = mlx5e_tc_tun_get_type(netdev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(netdev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN && - MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) - return true; - else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP && - MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) + if (tunnel && tunnel->can_offload(priv)) return true; else return false; @@ -487,71 +439,87 @@ int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_encap_entry *e, struct netlink_ext_ack *extack) { - e->tunnel_type = mlx5e_tc_tun_get_type(tunnel_dev); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(tunnel_dev); - if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - int dst_port = be16_to_cpu(e->tun_info.key.tp_dst); - - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { - NL_SET_ERR_MSG_MOD(extack, - "vxlan udp dport was not registered with the HW"); - netdev_warn(priv->netdev, - "%d isn't an offloaded vxlan udp dport\n", - dst_port); - return -EOPNOTSUPP; - } - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; - e->tunnel_hlen = VXLAN_HLEN; - } else if (e->tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; - e->tunnel_hlen = gre_calc_hlen(e->tun_info.key.tun_flags); - } else { + if (!tunnel) { e->reformat_type = -1; - e->tunnel_hlen = -1; return -EOPNOTSUPP; } - return 0; + + return tunnel->init_encap_attr(tunnel_dev, priv, e, extack); } -static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v) +int mlx5e_tc_tun_parse(struct net_device *filter_dev, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v, u8 *match_level) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev); + int err = 0; + + if (!tunnel) { + netdev_warn(priv->netdev, + "decapsulation offload is not supported for %s net device\n", + mlx5e_netdev_kind(filter_dev)); + err = -EOPNOTSUPP; + goto out; + } + + *match_level = tunnel->match_level; + + if (tunnel->parse_udp_ports) { + err = tunnel->parse_udp_ports(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + + if (tunnel->parse_tunnel) { + err = tunnel->parse_tunnel(priv, spec, f, + headers_c, headers_v); + if (err) + goto out; + } + +out: + return err; +} + +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; - void *misc_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, - spec->match_value, - misc_parameters); struct flow_match_ports enc_ports; - flow_rule_match_enc_ports(rule, &enc_ports); - /* Full udp dst port must be given */ - if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || - memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { NL_SET_ERR_MSG_MOD(extack, - "VXLAN decap filter must include enc_dst_port condition"); + "UDP tunnel decap filter must include enc_dst_port condition"); netdev_warn(priv->netdev, - "VXLAN decap filter must include enc_dst_port condition\n"); + "UDP tunnel decap filter must include enc_dst_port condition\n"); return -EOPNOTSUPP; } - /* udp dst port must be knonwn as a VXLAN port */ - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { + flow_rule_match_enc_ports(rule, &enc_ports); + + if (memchr_inv(&enc_ports.mask->dst, 0xff, + sizeof(enc_ports.mask->dst))) { NL_SET_ERR_MSG_MOD(extack, - "Matched UDP port is not registered as a VXLAN port"); + "UDP tunnel decap filter must match enc_dst_port fully"); netdev_warn(priv->netdev, - "UDP port %d is not registered as a VXLAN port\n", - be16_to_cpu(enc_ports.key->dst)); + "UDP tunnel decap filter must match enc_dst_port fully\n"); return -EOPNOTSUPP; } - /* dst UDP port is valid here */ + /* match on UDP protocol and dst port number */ + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); @@ -560,92 +528,15 @@ static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(enc_ports.key->dst)); + /* UDP src port on outer header is generated by HW, + * so it is probably a bad idea to request matching it. + * Nonetheless, it is allowed. + */ + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(enc_ports.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(enc_ports.key->src)); - /* match on VNI */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - - MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(enc_keyid.key->keyid)); - } - return 0; -} - -static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *outer_headers_c, - void *outer_headers_v) -{ - void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - - if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { - NL_SET_ERR_MSG_MOD(f->common.extack, - "GRE HW offloading is not supported"); - netdev_warn(priv->netdev, "GRE HW offloading is not supported\n"); - return -EOPNOTSUPP; - } - - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, - ip_protocol, IPPROTO_GRE); - - /* gre protocol*/ - MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); - MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); - - /* gre key */ - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_match_enc_keyid enc_keyid; - - flow_rule_match_enc_keyid(rule, &enc_keyid); - MLX5_SET(fte_match_set_misc, misc_c, - gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); - MLX5_SET(fte_match_set_misc, misc_v, - gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); - } - return 0; } - -int mlx5e_tc_tun_parse(struct net_device *filter_dev, - struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, - void *headers_c, - void *headers_v, u8 *match_level) -{ - int tunnel_type; - int err = 0; - - tunnel_type = mlx5e_tc_tun_get_type(filter_dev); - if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { - *match_level = MLX5_MATCH_L4; - err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, - headers_c, headers_v); - } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { - *match_level = MLX5_MATCH_L3; - err = mlx5e_tc_tun_parse_gretap(priv, spec, f, - headers_c, headers_v); - } else { - netdev_warn(priv->netdev, - "decapsulation offload is not supported for %s (kind: \"%s\")\n", - netdev_name(filter_dev), - mlx5e_netdev_kind(filter_dev)); - - return -EOPNOTSUPP; - } - return err; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h index b63f15de899d..c362b9225dc2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h @@ -14,9 +14,41 @@ enum { MLX5E_TC_TUNNEL_TYPE_UNKNOWN, MLX5E_TC_TUNNEL_TYPE_VXLAN, - MLX5E_TC_TUNNEL_TYPE_GRETAP + MLX5E_TC_TUNNEL_TYPE_GENEVE, + MLX5E_TC_TUNNEL_TYPE_GRETAP, }; +struct mlx5e_tc_tunnel { + int tunnel_type; + enum mlx5_flow_match_level match_level; + + bool (*can_offload)(struct mlx5e_priv *priv); + int (*calc_hlen)(struct mlx5e_encap_entry *e); + int (*init_encap_attr)(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack); + int (*generate_ip_tun_hdr)(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e); + int (*parse_udp_ports)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + int (*parse_tunnel)(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); +}; + +extern struct mlx5e_tc_tunnel vxlan_tunnel; +extern struct mlx5e_tc_tunnel geneve_tunnel; +extern struct mlx5e_tc_tunnel gre_tunnel; + +struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev); + int mlx5e_tc_tun_init_encap_attr(struct net_device *tunnel_dev, struct mlx5e_priv *priv, struct mlx5e_encap_entry *e, @@ -30,15 +62,20 @@ int mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, struct mlx5e_encap_entry *e); -int mlx5e_tc_tun_get_type(struct net_device *tunnel_dev); bool mlx5e_tc_tun_device_to_offload(struct mlx5e_priv *priv, struct net_device *netdev); int mlx5e_tc_tun_parse(struct net_device *filter_dev, struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, void *headers_c, void *headers_v, u8 *match_level); +int mlx5e_tc_tun_parse_udp_ports(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v); + #endif //__MLX5_EN_TC_TUNNEL_H__ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c new file mode 100644 index 000000000000..951ea26d96bc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/geneve.h> +#include "lib/geneve.h" +#include "en/tc_tun.h" + +#define MLX5E_GENEVE_VER 0 + +static bool mlx5e_tc_tun_can_offload_geneve(struct mlx5e_priv *priv) +{ + return !!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & MLX5_FLEX_PROTO_GENEVE); +} + +static int mlx5e_tc_tun_calc_hlen_geneve(struct mlx5e_encap_entry *e) +{ + return sizeof(struct udphdr) + + sizeof(struct genevehdr) + + e->tun_info->options_len; +} + +static int mlx5e_tc_tun_check_udp_dport_geneve(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* Currently we support only default GENEVE + * port, so udp dst port must match. + */ + if (be16_to_cpu(enc_ports.key->dst) != GENEVE_UDP_PORT) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a GENEVE port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a GENEVE port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_geneve(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_geneve(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &geneve_tunnel; + + /* Reformat type for GENEVE encap is similar to VXLAN: + * in both cases the HW adds in the same place a + * defined encapsulation header that the SW provides. + */ + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static void mlx5e_tunnel_id_to_vni(__be64 tun_id, __u8 *vni) +{ +#ifdef __BIG_ENDIAN + vni[0] = (__force __u8)(tun_id >> 16); + vni[1] = (__force __u8)(tun_id >> 8); + vni[2] = (__force __u8)tun_id; +#else + vni[0] = (__force __u8)((__force u64)tun_id >> 40); + vni[1] = (__force __u8)((__force u64)tun_id >> 48); + vni[2] = (__force __u8)((__force u64)tun_id >> 56); +#endif +} + +static int mlx5e_gen_ip_tunnel_header_geneve(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_info *tun_info = e->tun_info; + struct udphdr *udp = (struct udphdr *)(buf); + struct genevehdr *geneveh; + + geneveh = (struct genevehdr *)((char *)udp + sizeof(struct udphdr)); + + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_info->key.tp_dst; + + memset(geneveh, 0, sizeof(*geneveh)); + geneveh->ver = MLX5E_GENEVE_VER; + geneveh->opt_len = tun_info->options_len / 4; + geneveh->oam = !!(tun_info->key.tun_flags & TUNNEL_OAM); + geneveh->critical = !!(tun_info->key.tun_flags & TUNNEL_CRIT_OPT); + mlx5e_tunnel_id_to_vni(tun_info->key.tun_id, geneveh->vni); + geneveh->proto_type = htons(ETH_P_TEB); + + if (tun_info->key.tun_flags & TUNNEL_GENEVE_OPT) { + if (!geneveh->opt_len) + return -EOPNOTSUPP; + ip_tunnel_info_opts_get(geneveh->options, tun_info); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_vni(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_vni)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE VNI is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_vni, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, geneve_vni, be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + u8 max_tlv_option_data_len = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_option_data_len); + u8 max_tlv_options = MLX5_CAP_GEN(priv->mdev, max_geneve_tlv_options); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + void *misc_c, *misc_v, *misc_3_c, *misc_3_v; + struct geneve_opt *option_key, *option_mask; + __be32 opt_data_key = 0, opt_data_mask = 0; + struct flow_match_enc_opts enc_opts; + int res = 0; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + misc_3_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_3); + misc_3_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_3); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) + return 0; + + flow_rule_match_enc_opts(rule, &enc_opts); + + if (memchr_inv(&enc_opts.mask->data, 0, sizeof(enc_opts.mask->data)) && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_data)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options is not supported\n"); + return -EOPNOTSUPP; + } + + /* make sure that we're talking about GENEVE options */ + + if (enc_opts.key->dst_opt_type != TUNNEL_GENEVE_OPT) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: option type is not GENEVE"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: option type is not GENEVE\n"); + return -EOPNOTSUPP; + } + + if (enc_opts.mask->len && + !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_opt_len)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options len is not supported"); + netdev_warn(priv->netdev, + "Matching on GENEVE options len is not supported\n"); + return -EOPNOTSUPP; + } + + /* max_geneve_tlv_option_data_len comes in multiples of 4 bytes, and it + * doesn't include the TLV option header. 'geneve_opt_len' is a total + * len of all the options, including the headers, also multiples of 4 + * bytes. Len that comes from the dissector is in bytes. + */ + + if ((enc_opts.key->len / 4) > ((max_tlv_option_data_len + 1) * max_tlv_options)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported options len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported options len (len=%d)\n", + enc_opts.key->len); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, geneve_opt_len, enc_opts.mask->len / 4); + MLX5_SET(fte_match_set_misc, misc_v, geneve_opt_len, enc_opts.key->len / 4); + + /* we support matching on one option only, so just get it */ + option_key = (struct geneve_opt *)&enc_opts.key->data[0]; + option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + + if (option_key->length > max_tlv_option_data_len) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: unsupported option len"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: unsupported option len (key=%d, mask=%d)\n", + option_key->length, option_mask->length); + return -EOPNOTSUPP; + } + + /* data can't be all 0 - fail to offload such rule */ + if (!memchr_inv(option_key->opt_data, 0, option_key->length * 4)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: can't match on 0 data field"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: can't match on 0 data field\n"); + return -EOPNOTSUPP; + } + + /* add new GENEVE TLV options object */ + res = mlx5_geneve_tlv_option_add(priv->mdev->geneve, option_key); + if (res) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on GENEVE options: failed creating TLV opt object"); + netdev_warn(priv->netdev, + "Matching on GENEVE options: failed creating TLV opt object (class:type:len = 0x%x:0x%x:%d)\n", + be16_to_cpu(option_key->opt_class), + option_key->type, option_key->length); + return res; + } + + /* In general, after creating the object, need to query it + * in order to check which option data to set in misc3. + * But we support only geneve_tlv_option_0_data, so no + * point querying at this stage. + */ + + memcpy(&opt_data_key, option_key->opt_data, option_key->length * 4); + memcpy(&opt_data_mask, option_mask->opt_data, option_mask->length * 4); + MLX5_SET(fte_match_set_misc3, misc_3_v, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); + MLX5_SET(fte_match_set_misc3, misc_3_c, + geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve_params(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct netlink_ext_ack *extack = f->common.extack; + + /* match on OAM - packets with OAM bit on should NOT be offloaded */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_geneve_oam)) { + NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE OAM is not supported"); + netdev_warn(priv->netdev, "Matching on GENEVE OAM is not supported\n"); + return -EOPNOTSUPP; + } + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_oam); + MLX5_SET(fte_match_set_misc, misc_v, geneve_oam, 0); + + /* Match on GENEVE protocol. We support only Transparent Eth Bridge. */ + + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_geneve_protocol_type)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_protocol_type); + MLX5_SET(fte_match_set_misc, misc_v, geneve_protocol_type, ETH_P_TEB); + } + + return 0; +} + +static int mlx5e_tc_tun_parse_geneve(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err; + + err = mlx5e_tc_tun_parse_geneve_params(priv, spec, f); + if (err) + return err; + + err = mlx5e_tc_tun_parse_geneve_vni(priv, spec, f); + if (err) + return err; + + return mlx5e_tc_tun_parse_geneve_options(priv, spec, f); +} + +struct mlx5e_tc_tunnel geneve_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GENEVE, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_geneve, + .calc_hlen = mlx5e_tc_tun_calc_hlen_geneve, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_geneve, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_geneve, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_geneve, + .parse_tunnel = mlx5e_tc_tun_parse_geneve, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c new file mode 100644 index 000000000000..58b13192df23 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_gre.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/gre.h> +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_gretap(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_gretap(struct mlx5e_encap_entry *e) +{ + return gre_calc_hlen(e->tun_info->key.tun_flags); +} + +static int mlx5e_tc_tun_init_encap_attr_gretap(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + e->tunnel = &gre_tunnel; + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_NVGRE; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_gretap(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + struct gre_base_hdr *greh = (struct gre_base_hdr *)(buf); + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + int hdr_len; + + *ip_proto = IPPROTO_GRE; + + /* the HW does not calculate GRE csum or sequences */ + if (tun_key->tun_flags & (TUNNEL_CSUM | TUNNEL_SEQ)) + return -EOPNOTSUPP; + + greh->protocol = htons(ETH_P_TEB); + + /* GRE key */ + hdr_len = mlx5e_tc_tun_calc_hlen_gretap(e); + greh->flags = gre_tnl_flags_to_gre_flags(tun_key->tun_flags); + if (tun_key->tun_flags & TUNNEL_KEY) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + *ptr = tun_id; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_gretap(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE); + + /* gre protocol */ + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, gre_protocol); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB); + + /* gre key */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); + } + + return 0; +} + +struct mlx5e_tc_tunnel gre_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_GRETAP, + .match_level = MLX5_MATCH_L3, + .can_offload = mlx5e_tc_tun_can_offload_gretap, + .calc_hlen = mlx5e_tc_tun_calc_hlen_gretap, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_gretap, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_gretap, + .parse_udp_ports = NULL, + .parse_tunnel = mlx5e_tc_tun_parse_gretap, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c new file mode 100644 index 000000000000..37b176801bcc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2018 Mellanox Technologies. */ + +#include <net/vxlan.h> +#include "lib/vxlan.h" +#include "en/tc_tun.h" + +static bool mlx5e_tc_tun_can_offload_vxlan(struct mlx5e_priv *priv) +{ + return !!MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap); +} + +static int mlx5e_tc_tun_calc_hlen_vxlan(struct mlx5e_encap_entry *e) +{ + return VXLAN_HLEN; +} + +static int mlx5e_tc_tun_check_udp_dport_vxlan(struct mlx5e_priv *priv, + struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_ports enc_ports; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) + return -EOPNOTSUPP; + + flow_rule_match_enc_ports(rule, &enc_ports); + + /* check the UDP destination port validity */ + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, + be16_to_cpu(enc_ports.key->dst))) { + NL_SET_ERR_MSG_MOD(extack, + "Matched UDP dst port is not registered as a VXLAN port"); + netdev_warn(priv->netdev, + "UDP port %d is not registered as a VXLAN port\n", + be16_to_cpu(enc_ports.key->dst)); + return -EOPNOTSUPP; + } + + return 0; +} + +static int mlx5e_tc_tun_parse_udp_ports_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + int err = 0; + + err = mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v); + if (err) + return err; + + return mlx5e_tc_tun_check_udp_dport_vxlan(priv, f); +} + +static int mlx5e_tc_tun_init_encap_attr_vxlan(struct net_device *tunnel_dev, + struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct netlink_ext_ack *extack) +{ + int dst_port = be16_to_cpu(e->tun_info->key.tp_dst); + + e->tunnel = &vxlan_tunnel; + + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, dst_port)) { + NL_SET_ERR_MSG_MOD(extack, + "vxlan udp dport was not registered with the HW"); + netdev_warn(priv->netdev, + "%d isn't an offloaded vxlan udp dport\n", + dst_port); + return -EOPNOTSUPP; + } + + e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; + return 0; +} + +static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], + __u8 *ip_proto, + struct mlx5e_encap_entry *e) +{ + const struct ip_tunnel_key *tun_key = &e->tun_info->key; + __be32 tun_id = tunnel_id_to_key32(tun_key->tun_id); + struct udphdr *udp = (struct udphdr *)(buf); + struct vxlanhdr *vxh; + + vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); + *ip_proto = IPPROTO_UDP; + + udp->dest = tun_key->tp_dst; + vxh->vx_flags = VXLAN_HF_VNI; + vxh->vx_vni = vxlan_vni_field(tun_id); + + return 0; +} + +static int mlx5e_tc_tun_parse_vxlan(struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec, + struct flow_cls_offload *f, + void *headers_c, + void *headers_v) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct netlink_ext_ack *extack = f->common.extack; + struct flow_match_enc_keyid enc_keyid; + void *misc_c, *misc_v; + + misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) + return 0; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid) + return 0; + + /* match on VNI is required */ + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.outer_vxlan_vni)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on VXLAN VNI is not supported"); + netdev_warn(priv->netdev, + "Matching on VXLAN VNI is not supported\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, + be32_to_cpu(enc_keyid.mask->keyid)); + MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, + be32_to_cpu(enc_keyid.key->keyid)); + + return 0; +} + +struct mlx5e_tc_tunnel vxlan_tunnel = { + .tunnel_type = MLX5E_TC_TUNNEL_TYPE_VXLAN, + .match_level = MLX5_MATCH_L4, + .can_offload = mlx5e_tc_tun_can_offload_vxlan, + .calc_hlen = mlx5e_tc_tun_calc_hlen_vxlan, + .init_encap_attr = mlx5e_tc_tun_init_encap_attr_vxlan, + .generate_ip_tun_hdr = mlx5e_gen_ip_tunnel_header_vxlan, + .parse_udp_ports = mlx5e_tc_tun_parse_udp_ports_vxlan, + .parse_tunnel = mlx5e_tc_tun_parse_vxlan, +}; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h new file mode 100644 index 000000000000..ddfe19adb3d9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_TXRX_H___ +#define __MLX5_EN_TXRX_H___ + +#include "en.h" + +#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS +#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ + MLX5E_SQ_NOPS_ROOM) + +#ifndef CONFIG_MLX5_EN_TLS +#define MLX5E_SQ_TLS_ROOM (0) +#else +/* TLS offload requires additional stop_room for: + * - a resync SKB. + * kTLS offload requires additional stop_room for: + * - static params WQE, + * - progress params WQE, and + * - resync DUMP per frag. + */ +#define MLX5E_SQ_TLS_ROOM \ + (MLX5_SEND_WQE_MAX_WQEBBS + \ + MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + \ + MAX_SKB_FRAGS * MLX5E_KTLS_MAX_DUMP_WQEBBS) +#endif + +#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc); +} + +static inline void * +mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + void *wqe; + + *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + wqe = mlx5_wq_cyc_get_wqe(wq, *pi); + memset(wqe, 0, size); + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + + (*pc)++; + + return wqe; +} + +static inline struct mlx5e_tx_wqe * +mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(wq, *pc); + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + cseg->fm_ce_se = MLX5_FENCE_MODE_INITIATOR_SMALL; + + (*pc)++; + + return wqe; +} + +static inline void +mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, + u16 pi, u16 nnops) +{ + struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; + + edge_wi = wi + nnops; + + /* fill sq frag edge with nops to avoid wqe wrapping two pages */ + for (; wi < edge_wi; wi++) { + wi->skb = NULL; + wi->num_wqebbs = 1; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + } + sq->stats->nop += nnops; +} + +static inline void +mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + *wq->db = cpu_to_be32(pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + mlx5_write64((__be32 *)ctrl, uar_map); +} + +static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5e_tx_wqe *wqe) +{ + return !!wqe->ctrl.tisn; +} + +static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) +{ + struct mlx5_core_cq *mcq; + + mcq = &cq->mcq; + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); +} + +static inline struct mlx5e_sq_dma * +mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) +{ + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; +} + +static inline void +mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, + enum mlx5e_dma_map_type map_type) +{ + struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); + + dma->addr = addr; + dma->size = size; + dma->type = map_type; +} + +static inline void +mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) +{ + switch (dma->type) { + case MLX5E_DMA_MAP_SINGLE: + dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + case MLX5E_DMA_MAP_PAGE: + dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); + break; + default: + WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); + } +} + +/* SW parser related functions */ + +struct mlx5e_swp_spec { + __be16 l3_proto; + u8 l4_proto; + u8 is_tun; + __be16 tun_l3_proto; + u8 tun_l4_proto; +}; + +static inline void +mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, + struct mlx5e_swp_spec *swp_spec) +{ + /* SWP offsets are in 2-bytes words */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + if (swp_spec->l4_proto) { + eseg->swp_outer_l4_offset = skb_transport_offset(skb) / 2; + if (swp_spec->l4_proto == IPPROTO_UDP) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L4_UDP; + } + + if (swp_spec->is_tun) { + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + if (swp_spec->tun_l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } else { /* typically for ipsec when xfrm mode != XFRM_MODE_TUNNEL */ + eseg->swp_inner_l3_offset = skb_network_offset(skb) / 2; + if (swp_spec->l3_proto == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + } + switch (swp_spec->tun_l4_proto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + /* fall through */ + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + } +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index eb8ef78e5626..b0b982cf69bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -31,11 +31,13 @@ */ #include <linux/bpf_trace.h> +#include <net/xdp_sock.h> #include "en/xdp.h" +#include "en/params.h" -int mlx5e_xdp_max_mtu(struct mlx5e_params *params) +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) { - int hr = NET_IP_ALIGN + XDP_PACKET_HEADROOM; + int hr = mlx5e_get_linear_rq_headroom(params, xsk); /* Let S := SKB_DATA_ALIGN(sizeof(struct skb_shared_info)). * The condition checked in mlx5e_rx_is_linear_skb is: @@ -54,25 +56,70 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params) } static inline bool -mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_dma_info *di, - struct xdp_buff *xdp) +mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, struct xdp_buff *xdp) { + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; + struct xdp_frame *xdpf; + dma_addr_t dma_addr; - xdpi.xdpf = convert_to_xdp_frame(xdp); - if (unlikely(!xdpi.xdpf)) + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) return false; - xdpi.dma_addr = di->addr + (xdpi.xdpf->data - (void *)xdpi.xdpf); - dma_sync_single_for_device(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, PCI_DMA_TODEVICE); - xdpi.di = *di; - return sq->xmit_xdp_frame(sq, &xdpi); + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + + if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) { + /* The xdp_buff was in the UMEM and was copied into a newly + * allocated page. The UMEM page was returned via the ZCA, and + * this new page has to be mapped at this point and has to be + * unmapped and returned via xdp_return_frame on completion. + */ + + /* Prevent double recycling of the UMEM page. Even in case this + * function returns false, the xdp_buff shouldn't be recycled, + * as it was already done in xdp_convert_zc_to_xdp_frame. + */ + __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + + dma_addr = dma_map_single(sq->pdev, xdptxd.data, xdptxd.len, + DMA_TO_DEVICE); + if (dma_mapping_error(sq->pdev, dma_addr)) { + xdp_return_frame(xdpf); + return false; + } + + xdptxd.dma_addr = dma_addr; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = dma_addr; + } else { + /* Driver assumes that convert_to_xdp_frame returns an xdp_frame + * that points to the same memory region as the original + * xdp_buff. It allows to map the memory only once and to use + * the DMA_BIDIRECTIONAL mode. + */ + + xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE; + + dma_addr = di->addr + (xdpf->data - (void *)xdpf); + dma_sync_single_for_device(sq->pdev, dma_addr, xdptxd.len, + DMA_TO_DEVICE); + + xdptxd.dma_addr = dma_addr; + xdpi.page.rq = rq; + xdpi.page.di = *di; + } + + return sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0); } /* returns true if packet was consumed by xdp */ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len) + void *va, u16 *rx_headroom, u32 *len, bool xsk) { struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); struct xdp_buff xdp; @@ -86,16 +133,20 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + *len; xdp.data_hard_start = va; + if (xsk) + xdp.handle = di->xsk.handle; xdp.rxq = &rq->xdp_rxq; act = bpf_prog_run_xdp(prog, &xdp); + if (xsk) + xdp.handle += xdp.data - xdp.data_hard_start; switch (act) { case XDP_PASS: *rx_headroom = xdp.data - xdp.data_hard_start; *len = xdp.data_end - xdp.data; return false; case XDP_TX: - if (unlikely(!mlx5e_xmit_xdp_buff(&rq->xdpsq, di, &xdp))) + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp))) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ return true; @@ -106,7 +157,8 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); - mlx5e_page_dma_unmap(rq, di); + if (!xsk) + mlx5e_page_dma_unmap(rq, di); rq->stats->xdp_redirect++; return true; default: @@ -160,7 +212,7 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) stats->mpwqe++; } -static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; @@ -183,32 +235,55 @@ static void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) session->wqe = NULL; /* Close session */ } +enum { + MLX5E_XDP_CHECK_OK = 1, + MLX5E_XDP_CHECK_START_MPWQE = 2, +}; + +static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!sq->mpwqe.wqe)) { + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, + MLX5_SEND_WQE_MAX_WQEBBS))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_START_MPWQE; + } + + return MLX5E_XDP_CHECK_OK; +} + static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, - struct mlx5e_xdp_info *xdpi) + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; - struct xdp_frame *xdpf = xdpi->xdpf; - - if (unlikely(sq->hw_mtu < xdpf->len)) { + if (unlikely(xdptxd->len > sq->hw_mtu)) { stats->err++; return false; } - if (unlikely(!session->wqe)) { - if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5_SEND_WQE_MAX_WQEBBS))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; - return false; - } + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check_mpwqe(sq); + if (unlikely(check_result < 0)) + return false; + if (check_result == MLX5E_XDP_CHECK_START_MPWQE) { + /* Start the session when nothing can fail, so it's guaranteed + * that if there is an active session, it has at least one dseg, + * and it's safe to complete it at any time. + */ mlx5e_xdp_mpwqe_session_start(sq); } - mlx5e_xdp_mpwqe_add_dseg(sq, xdpi, stats); + mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats); if (unlikely(session->complete || session->ds_count == session->max_ds_count)) @@ -219,7 +294,22 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, return true; } -static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi) +static int mlx5e_xmit_xdp_frame_check(struct mlx5e_xdpsq *sq) +{ + if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->stats->full++; + return -EBUSY; + } + + return MLX5E_XDP_CHECK_OK; +} + +static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, + struct mlx5e_xdp_info *xdpi, + int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@ -229,9 +319,8 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg = wqe->data; - struct xdp_frame *xdpf = xdpi->xdpf; - dma_addr_t dma_addr = xdpi->dma_addr; - unsigned int dma_len = xdpf->len; + dma_addr_t dma_addr = xdptxd->dma_addr; + u32 dma_len = xdptxd->len; struct mlx5e_xdpsq_stats *stats = sq->stats; @@ -242,18 +331,16 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * return false; } - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { - /* SQ is full, ring doorbell */ - mlx5e_xmit_xdp_doorbell(sq); - stats->full++; + if (!check_result) + check_result = mlx5e_xmit_xdp_frame_check(sq); + if (unlikely(check_result < 0)) return false; - } cseg->fm_ce_se = 0; /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdpf->data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; @@ -277,7 +364,7 @@ static bool mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info * static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_wqe_info *wi, - struct mlx5e_rq *rq, + u32 *xsk_frames, bool recycle) { struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo; @@ -286,22 +373,32 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq, for (i = 0; i < wi->num_pkts; i++) { struct mlx5e_xdp_info xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo); - if (rq) { - /* XDP_TX */ - mlx5e_page_release(rq, &xdpi.di, recycle); - } else { - /* XDP_REDIRECT */ - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpi.xdpf->len, DMA_TO_DEVICE); - xdp_return_frame(xdpi.xdpf); + switch (xdpi.mode) { + case MLX5E_XDP_XMIT_MODE_FRAME: + /* XDP_TX from the XSK RQ and XDP_REDIRECT */ + dma_unmap_single(sq->pdev, xdpi.frame.dma_addr, + xdpi.frame.xdpf->len, DMA_TO_DEVICE); + xdp_return_frame(xdpi.frame.xdpf); + break; + case MLX5E_XDP_XMIT_MODE_PAGE: + /* XDP_TX from the regular RQ */ + mlx5e_page_release_dynamic(xdpi.page.rq, &xdpi.page.di, recycle); + break; + case MLX5E_XDP_XMIT_MODE_XSK: + /* AF_XDP send */ + (*xsk_frames)++; + break; + default: + WARN_ON_ONCE(true); } } } -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { struct mlx5e_xdpsq *sq; struct mlx5_cqe64 *cqe; + u32 xsk_frames = 0; u16 sqcc; int i; @@ -343,10 +440,13 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) sqcc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, rq, true); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true); } while (!last_wqe); } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); + sq->stats->cqes += i; mlx5_cqwq_update_db_record(&cq->wq); @@ -358,8 +458,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) { + u32 xsk_frames = 0; + while (sq->cc != sq->pc) { struct mlx5e_xdp_wqe_info *wi; u16 ci; @@ -369,8 +471,11 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) sq->cc += wi->num_wqebbs; - mlx5e_free_xdpsq_desc(sq, wi, rq, false); + mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false); } + + if (xsk_frames) + xsk_umem_complete_tx(sq->umem, xsk_frames); } int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -398,21 +503,27 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; + struct mlx5e_xdp_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; - xdpi.dma_addr = dma_map_single(sq->pdev, xdpf->data, xdpf->len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(sq->pdev, xdpi.dma_addr))) { + xdptxd.data = xdpf->data; + xdptxd.len = xdpf->len; + xdptxd.dma_addr = dma_map_single(sq->pdev, xdptxd.data, + xdptxd.len, DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(sq->pdev, xdptxd.dma_addr))) { xdp_return_frame_rx_napi(xdpf); drops++; continue; } - xdpi.xdpf = xdpf; + xdpi.mode = MLX5E_XDP_XMIT_MODE_FRAME; + xdpi.frame.xdpf = xdpf; + xdpi.frame.dma_addr = xdptxd.dma_addr; - if (unlikely(!sq->xmit_xdp_frame(sq, &xdpi))) { - dma_unmap_single(sq->pdev, xdpi.dma_addr, - xdpf->len, DMA_TO_DEVICE); + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, 0))) { + dma_unmap_single(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_TO_DEVICE); xdp_return_frame_rx_napi(xdpf); drops++; } @@ -429,7 +540,7 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) { - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq = rq->xdpsq; if (xdpsq->mpwqe.wqe) mlx5e_xdp_mpwqe_complete(xdpsq); @@ -444,6 +555,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq) void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw) { + sq->xmit_xdp_frame_check = is_mpw ? + mlx5e_xmit_xdp_frame_check_mpwqe : mlx5e_xmit_xdp_frame_check; sq->xmit_xdp_frame = is_mpw ? mlx5e_xmit_xdp_frame_mpwqe : mlx5e_xmit_xdp_frame; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index 8b537a4b0840..b90923932668 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -33,17 +33,20 @@ #define __MLX5_EN_XDP_H__ #include "en.h" +#include "en/txrx.h" #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) #define MLX5E_XDP_TX_EMPTY_DS_COUNT \ (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */) -int mlx5e_xdp_max_mtu(struct mlx5e_params *params); +struct mlx5e_xsk_param; +int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, - void *va, u16 *rx_headroom, u32 *len); -bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq); -void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq); + void *va, u16 *rx_headroom, u32 *len, bool xsk); +void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); void mlx5e_set_xmit_fp(struct mlx5e_xdpsq *sq, bool is_mpw); void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, @@ -66,6 +69,21 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); } +static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv) +{ + set_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv) +{ + clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + +static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv) +{ + return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state); +} + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { if (sq->doorbell_cseg) { @@ -97,15 +115,14 @@ static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) } static inline void -mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_xmit_data *xdptxd, struct mlx5e_xdpsq_stats *stats) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; - dma_addr_t dma_addr = xdpi->dma_addr; - struct xdp_frame *xdpf = xdpi->xdpf; struct mlx5_wqe_data_seg *dseg = (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; - u16 dma_len = xdpf->len; + u32 dma_len = xdptxd->len; session->pkt_count++; @@ -124,7 +141,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, } inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); - memcpy(inline_dseg->data, xdpf->data, dma_len); + memcpy(inline_dseg->data, xdptxd->data, dma_len); session->ds_count += ds_cnt; stats->inlnw++; @@ -132,7 +149,7 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, } no_inline: - dseg->addr = cpu_to_be64(dma_addr); + dseg->addr = cpu_to_be64(xdptxd->dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; session->ds_count++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile new file mode 100644 index 000000000000..5ee42991900a --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/../.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c new file mode 100644 index 000000000000..6a55573ec8f2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "rx.h" +#include "en/xdp.h" +#include <net/xdp_sock.h> + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count) +{ + /* Check in advance that we have enough frames, instead of allocating + * one-by-one, failing and moving frames to the Reuse Ring. + */ + return xsk_umem_has_addrs_rq(rq->umem, count); +} + +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct xdp_umem *umem = rq->umem; + u64 handle; + + if (!xsk_umem_peek_addr_rq(umem, &handle)) + return -ENOMEM; + + dma_info->xsk.handle = handle + rq->buff.umem_headroom; + dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle); + + /* No need to add headroom to the DMA address. In striding RQ case, we + * just provide pages for UMR, and headroom is counted at the setup + * stage when creating a WQE. In non-striding RQ case, headroom is + * accounted in mlx5e_alloc_rx_wqe. + */ + dma_info->addr = xdp_umem_get_dma(umem, handle); + + xsk_umem_discard_addr_rq(umem); + + dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + return 0; +} + +static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle) +{ + xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask); +} + +/* XSKRQ uses pages from UMEM, they must not be released. They are returned to + * the userspace if possible, and if not, this function is called to reuse them + * in the driver. + */ +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle); +} + +/* Return a frame back to the hardware to fill in again. It is used by XDP when + * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP. + */ +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle) +{ + struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca); + + mlx5e_xsk_recycle_frame(rq, handle); +} + +static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data, + u32 cqe_bcnt) +{ + struct sk_buff *skb; + + skb = napi_alloc_skb(rq->cq.napi, cqe_bcnt); + if (unlikely(!skb)) { + rq->stats->buff_alloc_err++; + return NULL; + } + + skb_put_data(skb, data, cqe_bcnt); + + return skb; +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx) +{ + struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx]; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + u32 cqe_bcnt32 = cqe_bcnt; + void *va, *data; + u32 frag_size; + bool consumed; + + /* Check packet size. Note LRO doesn't use linear SKB */ + if (unlikely(cqe_bcnt > rq->hw_mtu)) { + rq->stats->oversize_pkts_sw_drop++; + return NULL; + } + + /* head_offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * head_offset should always be 0. + */ + WARN_ON_ONCE(head_offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt32; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true); + rcu_read_unlock(); + + /* Possible flows: + * - XDP_REDIRECT to XSKMAP: + * The page is owned by the userspace from now. + * - XDP_TX and other XDP_REDIRECTs: + * The page was returned by ZCA and recycled. + * - XDP_DROP: + * Recycle the page. + * - XDP_PASS: + * Allocate an SKB, copy the data and recycle the page. + * + * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its + * size is the same as the Driver RX Ring's size, and pages for WQEs are + * allocated first from the Reuse Ring, so it has enough space. + */ + + if (likely(consumed)) { + if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) + __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ + return NULL; /* page/packet was consumed by XDP */ + } + + /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the + * frame. On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32); +} + +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt) +{ + struct mlx5e_dma_info *di = wi->di; + u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom; + void *va, *data; + bool consumed; + u32 frag_size; + + /* wi->offset is not used in this function, because di->xsk.data and + * di->addr point directly to the necessary place. Furthermore, in the + * current implementation, one page = one packet = one frame, so + * wi->offset should always be 0. + */ + WARN_ON_ONCE(wi->offset); + + va = di->xsk.data; + data = va + rx_headroom; + frag_size = rq->buff.headroom + cqe_bcnt; + + dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL); + prefetch(data); + + if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) { + rq->stats->wqe_err++; + return NULL; + } + + rcu_read_lock(); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true); + rcu_read_unlock(); + + if (likely(consumed)) + return NULL; /* page/packet was consumed by XDP */ + + /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse + * will be handled by mlx5e_put_rx_frag. + * On SKB allocation failure, NULL is returned. + */ + return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h new file mode 100644 index 000000000000..307b923a1361 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_RX_H__ +#define __MLX5_EN_XSK_RX_H__ + +#include "en.h" + +/* RX data path */ + +bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count); +int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info); +void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle); +struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + u16 cqe_bcnt, + u32 head_offset, + u32 page_idx); +struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_wqe_frag_info *wi, + u32 cqe_bcnt); + +#endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c new file mode 100644 index 000000000000..aaffa6f68dc0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "setup.h" +#include "en/params.h" + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev) +{ + /* AF_XDP doesn't support frames larger than PAGE_SIZE, and the current + * mlx5e XDP implementation doesn't support multiple packets per page. + */ + if (xsk->chunk_size != PAGE_SIZE) + return false; + + /* Current MTU and XSK headroom don't allow packets to fit the frames. */ + if (mlx5e_rx_get_linear_frag_sz(params, xsk) > xsk->chunk_size) + return false; + + /* frag_sz is different for regular and XSK RQs, so ensure that linear + * SKB mode is possible. + */ + switch (params->rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk); + default: /* MLX5_WQ_TYPE_CYCLIC */ + return mlx5e_rx_is_linear_skb(params, xsk); + } +} + +static void mlx5e_build_xskicosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); +} + +static void mlx5e_build_xsk_cparam(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_channel_param *cparam) +{ + const u8 xskicosq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + + mlx5e_build_rq_param(priv, params, xsk, &cparam->rq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_xskicosq_param(priv, xskicosq_size, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, xsk, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, xskicosq_size, &cparam->icosq_cq); +} + +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c) +{ + struct mlx5e_channel_param cparam = {}; + struct dim_cq_moder icocq_moder = {}; + int err; + + if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) + return -EINVAL; + + mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + if (unlikely(err)) + return err; + + err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + if (unlikely(err)) + goto err_close_rx_cq; + + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + if (unlikely(err)) + goto err_close_rq; + + /* Create a separate SQ, so that when the UMEM is disabled, we could + * close this SQ safely and stop receiving CQEs. In other case, e.g., if + * the XDPSQ was used instead, we might run into trouble when the UMEM + * is disabled and then reenabled, but the SQ continues receiving CQEs + * from the old UMEM. + */ + err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + if (unlikely(err)) + goto err_close_tx_cq; + + err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + if (unlikely(err)) + goto err_close_sq; + + /* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be + * triggered and NAPI to be called on the correct CPU. + */ + err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + if (unlikely(err)) + goto err_close_icocq; + + spin_lock_init(&c->xskicosq_lock); + + set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + + return 0; + +err_close_icocq: + mlx5e_close_cq(&c->xskicosq.cq); + +err_close_sq: + mlx5e_close_xdpsq(&c->xsksq); + +err_close_tx_cq: + mlx5e_close_cq(&c->xsksq.cq); + +err_close_rq: + mlx5e_close_rq(&c->xskrq); + +err_close_rx_cq: + mlx5e_close_cq(&c->xskrq.cq); + + return err; +} + +void mlx5e_close_xsk(struct mlx5e_channel *c) +{ + clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + napi_synchronize(&c->napi); + + mlx5e_close_rq(&c->xskrq); + mlx5e_close_cq(&c->xskrq.cq); + mlx5e_close_icosq(&c->xskicosq); + mlx5e_close_cq(&c->xskicosq.cq); + mlx5e_close_xdpsq(&c->xsksq); + mlx5e_close_cq(&c->xsksq.cq); +} + +void mlx5e_activate_xsk(struct mlx5e_channel *c) +{ + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); + /* TX queue is created active. */ + mlx5e_trigger_irq(&c->xskicosq); +} + +void mlx5e_deactivate_xsk(struct mlx5e_channel *c) +{ + mlx5e_deactivate_rq(&c->xskrq); + /* TX queue is disabled on close. */ +} + +static int mlx5e_redirect_xsk_rqt(struct mlx5e_priv *priv, u16 ix, u32 rqn) +{ + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = rqn, + }, + }; + + u32 rqtn = priv->xsk_tir[ix].rqt.rqtn; + + return mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); +} + +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) +{ + return mlx5e_redirect_xsk_rqt(priv, c->ix, c->xskrq.rqn); +} + +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) +{ + return mlx5e_redirect_xsk_rqt(priv, ix, priv->drop_rq.rqn); +} + +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int err, i; + + if (!priv->xsk.refcnt) + return 0; + + for (i = 0; i < chs->num; i++) { + struct mlx5e_channel *c = chs->c[i]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + continue; + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); + if (unlikely(err)) + goto err_stop; + } + + return 0; + +err_stop: + for (i--; i >= 0; i--) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } + + return err; +} + +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) +{ + int i; + + if (!priv->xsk.refcnt) + return; + + for (i = 0; i < chs->num; i++) { + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) + continue; + + mlx5e_xsk_redirect_rqt_to_drop(priv, i); + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h new file mode 100644 index 000000000000..0dd11b81c046 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_SETUP_H__ +#define __MLX5_EN_XSK_SETUP_H__ + +#include "en.h" + +struct mlx5e_xsk_param; + +bool mlx5e_validate_xsk_param(struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5_core_dev *mdev); +int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, + struct mlx5e_channel *c); +void mlx5e_close_xsk(struct mlx5e_channel *c); +void mlx5e_activate_xsk(struct mlx5e_channel *c); +void mlx5e_deactivate_xsk(struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); +int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); +int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); +void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); + +#endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c new file mode 100644 index 000000000000..35e188cf4ea4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include "tx.h" +#include "umem.h" +#include "en/xdp.h" +#include "en/params.h" +#include <net/xdp_sock.h> + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_channel *c; + u16 ix; + + if (unlikely(!mlx5e_xdp_is_open(priv))) + return -ENETDOWN; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + c = priv->channels.c[ix]; + + if (unlikely(!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))) + return -ENXIO; + + if (!napi_if_scheduled_mark_missed(&c->napi)) { + spin_lock(&c->xskicosq_lock); + mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); + } + + return 0; +} + +/* When TX fails (because of the size of the packet), we need to get completions + * in order, so post a NOP to get a CQE. Since AF_XDP doesn't distinguish + * between successful TX and errors, handling in mlx5e_poll_xdpsq_cq is the + * same. + */ +static void mlx5e_xsk_tx_post_err(struct mlx5e_xdpsq *sq, + struct mlx5e_xdp_info *xdpi) +{ + u16 pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + struct mlx5e_xdp_wqe_info *wi = &sq->db.wqe_info[pi]; + struct mlx5e_tx_wqe *nopwqe; + + wi->num_wqebbs = 1; + wi->num_pkts = 1; + + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); + sq->doorbell_cseg = &nopwqe->ctrl; +} + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget) +{ + struct xdp_umem *umem = sq->umem; + struct mlx5e_xdp_info xdpi; + struct mlx5e_xdp_xmit_data xdptxd; + bool work_done = true; + bool flush = false; + + xdpi.mode = MLX5E_XDP_XMIT_MODE_XSK; + + for (; budget; budget--) { + int check_result = sq->xmit_xdp_frame_check(sq); + struct xdp_desc desc; + + if (unlikely(check_result < 0)) { + work_done = false; + break; + } + + if (!xsk_umem_consume_tx(umem, &desc)) { + /* TX will get stuck until something wakes it up by + * triggering NAPI. Currently it's expected that the + * application calls sendto() if there are consumed, but + * not completed frames. + */ + break; + } + + xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr); + xdptxd.data = xdp_umem_get_data(umem, desc.addr); + xdptxd.len = desc.len; + + dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr, + xdptxd.len, DMA_BIDIRECTIONAL); + + if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + mlx5e_xsk_tx_post_err(sq, &xdpi); + } + + flush = true; + } + + if (flush) { + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + mlx5e_xmit_xdp_doorbell(sq); + + xsk_umem_consume_tx_done(umem); + } + + return !(budget && work_done); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h new file mode 100644 index 000000000000..7add18bf78d8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_TX_H__ +#define __MLX5_EN_XSK_TX_H__ + +#include "en.h" + +/* TX data path */ + +int mlx5e_xsk_async_xmit(struct net_device *dev, u32 qid); + +bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget); + +#endif /* __MLX5_EN_XSK_TX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c new file mode 100644 index 000000000000..4baaa5788320 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <net/xdp_sock.h> +#include "umem.h" +#include "setup.h" +#include "en/params.h" + +static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE, + DMA_BIDIRECTIONAL); + + if (unlikely(dma_mapping_error(dev, dma))) + goto err_unmap; + umem->pages[i].dma = dma; + } + + return 0; + +err_unmap: + while (i--) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } + + return -ENOMEM; +} + +static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv, + struct xdp_umem *umem) +{ + struct device *dev = priv->mdev->device; + u32 i; + + for (i = 0; i < umem->npgs; i++) { + dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); + umem->pages[i].dma = 0; + } +} + +static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk) +{ + if (!xsk->umems) { + xsk->umems = kcalloc(MLX5E_MAX_NUM_CHANNELS, + sizeof(*xsk->umems), GFP_KERNEL); + if (unlikely(!xsk->umems)) + return -ENOMEM; + } + + xsk->refcnt++; + xsk->ever_used = true; + + return 0; +} + +static void mlx5e_xsk_put_umems(struct mlx5e_xsk *xsk) +{ + if (!--xsk->refcnt) { + kfree(xsk->umems); + xsk->umems = NULL; + } +} + +static int mlx5e_xsk_add_umem(struct mlx5e_xsk *xsk, struct xdp_umem *umem, u16 ix) +{ + int err; + + err = mlx5e_xsk_get_umems(xsk); + if (unlikely(err)) + return err; + + xsk->umems[ix] = umem; + return 0; +} + +static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix) +{ + xsk->umems[ix] = NULL; + + mlx5e_xsk_put_umems(xsk); +} + +static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem) +{ + return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff; +} + +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk) +{ + xsk->headroom = umem->headroom; + xsk->chunk_size = umem->chunk_size_nohr + umem->headroom; +} + +static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, + struct xdp_umem *umem, u16 ix) +{ + struct mlx5e_params *params = &priv->channels.params; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + int err; + + if (unlikely(mlx5e_xsk_get_umem(&priv->channels.params, &priv->xsk, ix))) + return -EBUSY; + + if (unlikely(!mlx5e_xsk_is_umem_sane(umem))) + return -EINVAL; + + err = mlx5e_xsk_map_umem(priv, umem); + if (unlikely(err)) + return err; + + err = mlx5e_xsk_add_umem(&priv->xsk, umem, ix); + if (unlikely(err)) + goto err_unmap_umem; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + /* XSK objects will be created on open. */ + goto validate_closed; + } + + if (!params->xdp_prog) { + /* XSK objects will be created when an XDP program is set, + * and the channels are reopened. + */ + goto validate_closed; + } + + c = priv->channels.c[ix]; + + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_remove_umem; + + mlx5e_activate_xsk(c); + + /* Don't wait for WQEs, because the newer xdpsock sample doesn't provide + * any Fill Ring entries at the setup stage. + */ + + err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + if (unlikely(err)) + goto err_deactivate; + + return 0; + +err_deactivate: + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +err_remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + +err_unmap_umem: + mlx5e_xsk_unmap_umem(priv, umem); + + return err; + +validate_closed: + /* Check the configuration in advance, rather than fail at a later stage + * (in mlx5e_xdp_set or on open) and end up with no channels. + */ + if (!mlx5e_validate_xsk_param(params, &xsk, priv->mdev)) { + err = -EINVAL; + goto err_remove_umem; + } + + return 0; +} + +static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) +{ + struct xdp_umem *umem = mlx5e_xsk_get_umem(&priv->channels.params, + &priv->xsk, ix); + struct mlx5e_channel *c; + + if (unlikely(!umem)) + return -EINVAL; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto remove_umem; + + /* XSK RQ and SQ are only created if XDP program is set. */ + if (!priv->channels.params.xdp_prog) + goto remove_umem; + + c = priv->channels.c[ix]; + mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_deactivate_xsk(c); + mlx5e_close_xsk(c); + +remove_umem: + mlx5e_xsk_remove_umem(&priv->xsk, ix); + mlx5e_xsk_unmap_umem(priv, umem); + + return 0; +} + +static int mlx5e_xsk_enable_umem(struct mlx5e_priv *priv, struct xdp_umem *umem, + u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_enable_locked(priv, umem, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +static int mlx5e_xsk_disable_umem(struct mlx5e_priv *priv, u16 ix) +{ + int err; + + mutex_lock(&priv->state_lock); + err = mlx5e_xsk_disable_locked(priv, ix); + mutex_unlock(&priv->state_lock); + + return err; +} + +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_params *params = &priv->channels.params; + u16 ix; + + if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix))) + return -EINVAL; + + return umem ? mlx5e_xsk_enable_umem(priv, umem, ix) : + mlx5e_xsk_disable_umem(priv, ix); +} + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries) +{ + struct xdp_umem_fq_reuse *reuseq; + + reuseq = xsk_reuseq_prepare(nentries); + if (unlikely(!reuseq)) + return -ENOMEM; + xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq)); + + return 0; +} + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk) +{ + u16 res = xsk->refcnt ? params->num_channels : 0; + + while (res) { + if (mlx5e_xsk_get_umem(params, xsk, res - 1)) + break; + --res; + } + + return res; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h new file mode 100644 index 000000000000..25b4cbe58b54 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_EN_XSK_UMEM_H__ +#define __MLX5_EN_XSK_UMEM_H__ + +#include "en.h" + +static inline struct xdp_umem *mlx5e_xsk_get_umem(struct mlx5e_params *params, + struct mlx5e_xsk *xsk, u16 ix) +{ + if (!xsk || !xsk->umems) + return NULL; + + if (unlikely(ix >= params->num_channels)) + return NULL; + + return xsk->umems[ix]; +} + +struct mlx5e_xsk_param; +void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk); + +/* .ndo_bpf callback. */ +int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid); + +int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries); + +u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk); + +#endif /* __MLX5_EN_XSK_UMEM_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 6da7c88742dc..3022463f2284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -39,6 +39,7 @@ #include "en_accel/ipsec_rxtx.h" #include "en_accel/tls_rxtx.h" #include "en.h" +#include "en/txrx.h" #if IS_ENABLED(CONFIG_GENEVE) static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index ca47c0540904..db84500b024f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -39,6 +39,7 @@ #include <linux/skbuff.h> #include <net/xfrm.h> #include "en.h" +#include "en/txrx.h" struct sk_buff *mlx5e_ipsec_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, u32 *cqe_bcnt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c new file mode 100644 index 000000000000..d2ff74d52720 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "en.h" +#include "en_accel/ktls.h" + +static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, tls_en, 1); + + return mlx5e_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_ktls_offload_context_tx *tx_priv; + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + if (WARN_ON(direction != TLS_OFFLOAD_CTX_DIR_TX)) + return -EINVAL; + + if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info))) + return -EOPNOTSUPP; + + tx_priv = kvzalloc(sizeof(*tx_priv), GFP_KERNEL); + if (!tx_priv) + return -ENOMEM; + + tx_priv->expected_seq = start_offload_tcp_sn; + tx_priv->crypto_info = crypto_info; + mlx5e_set_ktls_tx_priv_ctx(tls_ctx, tx_priv); + + /* tc and underlay_qpn values are not in use for tls tis */ + err = mlx5e_ktls_create_tis(mdev, &tx_priv->tisn); + if (err) + goto create_tis_fail; + + err = mlx5_ktls_create_key(mdev, crypto_info, &tx_priv->key_id); + if (err) + goto encryption_key_create_fail; + + mlx5e_ktls_tx_offload_set_pending(tx_priv); + + return 0; + +encryption_key_create_fail: + mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); +create_tis_fail: + kvfree(tx_priv); + return err; +} + +static void mlx5e_ktls_del(struct net_device *netdev, + struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_ktls_offload_context_tx *tx_priv = + mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + mlx5_ktls_destroy_key(priv->mdev, tx_priv->key_id); + mlx5e_destroy_tis(priv->mdev, tx_priv->tisn); + kvfree(tx_priv); +} + +static const struct tlsdev_ops mlx5e_ktls_ops = { + .tls_dev_add = mlx5e_ktls_add, + .tls_dev_del = mlx5e_ktls_del, +}; + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + + if (!mlx5_accel_is_ktls_device(priv->mdev)) + return; + + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + + netdev->tlsdev_ops = &mlx5e_ktls_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h new file mode 100644 index 000000000000..407da83474ef --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5E_KTLS_H__ +#define __MLX5E_KTLS_H__ + +#include "en.h" + +#ifdef CONFIG_MLX5_EN_TLS +#include <net/tls.h> +#include "accel/tls.h" + +#define MLX5E_KTLS_STATIC_UMR_WQE_SZ \ + (sizeof(struct mlx5e_umr_wqe) + MLX5_ST_SZ_BYTES(tls_static_params)) +#define MLX5E_KTLS_STATIC_WQEBBS \ + (DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_BB)) + +#define MLX5E_KTLS_PROGRESS_WQE_SZ \ + (sizeof(struct mlx5e_tx_wqe) + MLX5_ST_SZ_BYTES(tls_progress_params)) +#define MLX5E_KTLS_PROGRESS_WQEBBS \ + (DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_BB)) +#define MLX5E_KTLS_MAX_DUMP_WQEBBS 2 + +enum { + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD = 0, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_OFFLOAD = 1, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_AUTHENTICATION = 2, +}; + +enum { + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2, +}; + +struct mlx5e_ktls_offload_context_tx { + struct tls_offload_context_tx *tx_ctx; + struct tls_crypto_info *crypto_info; + u32 expected_seq; + u32 tisn; + u32 key_id; + bool ctx_post_pending; +}; + +struct mlx5e_ktls_offload_context_tx_shadow { + struct tls_offload_context_tx tx_ctx; + struct mlx5e_ktls_offload_context_tx *priv_tx; +}; + +static inline void +mlx5e_set_ktls_tx_priv_ctx(struct tls_context *tls_ctx, + struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx); + struct mlx5e_ktls_offload_context_tx_shadow *shadow; + + BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX); + + shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx; + + shadow->priv_tx = priv_tx; + priv_tx->tx_ctx = tx_ctx; +} + +static inline struct mlx5e_ktls_offload_context_tx * +mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) +{ + struct tls_offload_context_tx *tx_ctx = tls_offload_ctx_tx(tls_ctx); + struct mlx5e_ktls_offload_context_tx_shadow *shadow; + + BUILD_BUG_ON(sizeof(*shadow) > TLS_OFFLOAD_CONTEXT_SIZE_TX); + + shadow = (struct mlx5e_ktls_offload_context_tx_shadow *)tx_ctx; + + return shadow->priv_tx; +} + +void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx); + +struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, u16 *pi); +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + struct mlx5e_sq_dma *dma); + +#else + +static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) +{ +} + +#endif + +#endif /* __MLX5E_TLS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c new file mode 100644 index 000000000000..5c08891806f0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <linux/tls.h> +#include "en.h" +#include "en/txrx.h" +#include "en_accel/ktls.h" + +enum { + MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2 = 0x2, +}; + +enum { + MLX5E_ENCRYPTION_STANDARD_TLS = 0x1, +}; + +#define EXTRACT_INFO_FIELDS do { \ + salt = info->salt; \ + rec_seq = info->rec_seq; \ + salt_sz = sizeof(info->salt); \ + rec_seq_sz = sizeof(info->rec_seq); \ +} while (0) + +static void +fill_static_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + char *initial_rn, *gcm_iv; + u16 salt_sz, rec_seq_sz; + char *salt, *rec_seq; + u8 tls_version; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + EXTRACT_INFO_FIELDS; + break; + } + default: + WARN_ON(1); + return; + } + + gcm_iv = MLX5_ADDR_OF(tls_static_params, ctx, gcm_iv); + initial_rn = MLX5_ADDR_OF(tls_static_params, ctx, initial_record_number); + + memcpy(gcm_iv, salt, salt_sz); + memcpy(initial_rn, rec_seq, rec_seq_sz); + + tls_version = MLX5E_STATIC_PARAMS_CONTEXT_TLS_1_2; + + MLX5_SET(tls_static_params, ctx, tls_version, tls_version); + MLX5_SET(tls_static_params, ctx, const_1, 1); + MLX5_SET(tls_static_params, ctx, const_2, 2); + MLX5_SET(tls_static_params, ctx, encryption_standard, + MLX5E_ENCRYPTION_STANDARD_TLS); + MLX5_SET(tls_static_params, ctx, dek_index, priv_tx->key_id); +} + +static void +build_static_params(struct mlx5e_umr_wqe *wqe, u16 pc, u32 sqn, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + +#define STATIC_PARAMS_DS_CNT \ + DIV_ROUND_UP(MLX5E_KTLS_STATIC_UMR_WQE_SZ, MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = cpu_to_be32((pc << 8) | MLX5_OPCODE_UMR | + (MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + STATIC_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + cseg->imm = cpu_to_be32(priv_tx->tisn); + + ucseg->flags = MLX5_UMR_INLINE; + ucseg->bsf_octowords = cpu_to_be16(MLX5_ST_SZ_BYTES(tls_static_params) / 16); + + fill_static_params_ctx(wqe->tls_static_params_ctx, priv_tx); +} + +static void +fill_progress_params_ctx(void *ctx, struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + MLX5_SET(tls_progress_params, ctx, pd, priv_tx->tisn); + MLX5_SET(tls_progress_params, ctx, record_tracker_state, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START); + MLX5_SET(tls_progress_params, ctx, auth_state, + MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD); +} + +static void +build_progress_params(struct mlx5e_tx_wqe *wqe, u16 pc, u32 sqn, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + +#define PROGRESS_PARAMS_DS_CNT \ + DIV_ROUND_UP(MLX5E_KTLS_PROGRESS_WQE_SZ, MLX5_SEND_WQE_DS) + + cseg->opmod_idx_opcode = + cpu_to_be32((pc << 8) | MLX5_OPCODE_SET_PSV | + (MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS << 24)); + cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + PROGRESS_PARAMS_DS_CNT); + cseg->fm_ce_se = fence ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + fill_progress_params_ctx(wqe->data, priv_tx); +} + +static void tx_fill_wi(struct mlx5e_txqsq *sq, + u16 pi, u8 num_wqebbs, + skb_frag_t *resync_dump_frag) +{ + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + wi->skb = NULL; + wi->num_wqebbs = num_wqebbs; + wi->resync_dump_frag = resync_dump_frag; +} + +void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + priv_tx->ctx_post_pending = true; +} + +static bool +mlx5e_ktls_tx_offload_test_and_clear_pending(struct mlx5e_ktls_offload_context_tx *priv_tx) +{ + bool ret = priv_tx->ctx_post_pending; + + priv_tx->ctx_post_pending = false; + + return ret; +} + +static void +post_static_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_umr_wqe *umr_wqe; + u16 pi; + + umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi); + build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence); + tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, NULL); + sq->pc += MLX5E_KTLS_STATIC_WQEBBS; +} + +static void +post_progress_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool fence) +{ + struct mlx5e_tx_wqe *wqe; + u16 pi; + + wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi); + build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence); + tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, NULL); + sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS; +} + +static void +mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + bool skip_static_post, bool fence_first_post) +{ + bool progress_fence = skip_static_post || !fence_first_post; + + if (!skip_static_post) + post_static_params(sq, priv_tx, fence_first_post); + + post_progress_params(sq, priv_tx, progress_fence); +} + +struct tx_sync_info { + u64 rcd_sn; + s32 sync_len; + int nr_frags; + skb_frag_t *frags[MAX_SKB_FRAGS]; +}; + +static bool tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, + u32 tcp_seq, struct tx_sync_info *info) +{ + struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + struct tls_record_info *record; + int remaining, i = 0; + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&tx_ctx->lock, flags); + record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); + + if (unlikely(!record)) { + ret = false; + goto out; + } + + if (unlikely(tcp_seq < tls_record_start_seq(record))) { + if (!tls_record_is_start_marker(record)) + ret = false; + goto out; + } + + info->sync_len = tcp_seq - tls_record_start_seq(record); + remaining = info->sync_len; + while (remaining > 0) { + skb_frag_t *frag = &record->frags[i]; + + __skb_frag_ref(frag); + remaining -= skb_frag_size(frag); + info->frags[i++] = frag; + } + /* reduce the part which will be sent with the original SKB */ + if (remaining < 0) + skb_frag_size_add(info->frags[i - 1], remaining); + info->nr_frags = i; +out: + spin_unlock_irqrestore(&tx_ctx->lock, flags); + return ret; +} + +static void +tx_post_resync_params(struct mlx5e_txqsq *sq, + struct mlx5e_ktls_offload_context_tx *priv_tx, + u64 rcd_sn) +{ + struct tls_crypto_info *crypto_info = priv_tx->crypto_info; + __be64 rn_be = cpu_to_be64(rcd_sn); + bool skip_static_post; + u16 rec_seq_sz; + char *rec_seq; + + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: { + struct tls12_crypto_info_aes_gcm_128 *info = + (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + + rec_seq = info->rec_seq; + rec_seq_sz = sizeof(info->rec_seq); + break; + } + default: + WARN_ON(1); + return; + } + + skip_static_post = !memcmp(rec_seq, &rn_be, rec_seq_sz); + if (!skip_static_post) + memcpy(rec_seq, &rn_be, rec_seq_sz); + + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, skip_static_post, true); +} + +static int +tx_post_resync_dump(struct mlx5e_txqsq *sq, struct sk_buff *skb, + skb_frag_t *frag, u32 tisn, bool first) +{ + struct mlx5_wqe_ctrl_seg *cseg; + struct mlx5_wqe_eth_seg *eseg; + struct mlx5_wqe_data_seg *dseg; + struct mlx5e_tx_wqe *wqe; + dma_addr_t dma_addr = 0; + u16 ds_cnt, ds_cnt_inl; + u8 num_wqebbs; + u16 pi, ihs; + int fsz; + + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + ihs = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); + ds_cnt_inl = DIV_ROUND_UP(ihs - INL_HDR_START_SZ, MLX5_SEND_WQE_DS); + ds_cnt += ds_cnt_inl; + ds_cnt += 1; /* one frag */ + + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + + cseg = &wqe->ctrl; + eseg = &wqe->eth; + dseg = wqe->data; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + cseg->imm = cpu_to_be32(tisn); + cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; + + eseg->inline_hdr.sz = cpu_to_be16(ihs); + memcpy(eseg->inline_hdr.start, skb->data, ihs); + dseg += ds_cnt_inl; + + fsz = skb_frag_size(frag); + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) + return -ENOMEM; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; + dseg->byte_count = cpu_to_be32(fsz); + mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); + + tx_fill_wi(sq, pi, num_wqebbs, frag); + sq->pc += num_wqebbs; + + WARN(num_wqebbs > MLX5E_KTLS_MAX_DUMP_WQEBBS, + "unexpected DUMP num_wqebbs, %d > %d", + num_wqebbs, MLX5E_KTLS_MAX_DUMP_WQEBBS); + + return 0; +} + +void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, + struct mlx5e_tx_wqe_info *wi, + struct mlx5e_sq_dma *dma) +{ + struct mlx5e_sq_stats *stats = sq->stats; + + mlx5e_tx_dma_unmap(sq->pdev, dma); + __skb_frag_unref(wi->resync_dump_frag); + stats->tls_dump_packets++; + stats->tls_dump_bytes += wi->num_bytes; +} + +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + +static struct sk_buff * +mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + u32 seq) +{ + struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5_wq_cyc *wq = &sq->wq; + struct tx_sync_info info = {}; + u16 contig_wqebbs_room, pi; + u8 num_wqebbs; + int i; + + if (!tx_sync_info_get(priv_tx, seq, &info)) { + /* We might get here if a retransmission reaches the driver + * after the relevant record is acked. + * It should be safe to drop the packet in this case + */ + stats->tls_drop_no_sync_data++; + goto err_out; + } + + if (unlikely(info.sync_len < 0)) { + u32 payload; + int headln; + + headln = skb_transport_offset(skb) + tcp_hdrlen(skb); + payload = skb->len - headln; + if (likely(payload <= -info.sync_len)) + return skb; + + stats->tls_drop_bypass_req++; + goto err_out; + } + + stats->tls_ooo++; + + num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS + + (info.nr_frags ? info.nr_frags * MLX5E_KTLS_MAX_DUMP_WQEBBS : 1); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + + tx_post_resync_params(sq, priv_tx, info.rcd_sn); + + for (i = 0; i < info.nr_frags; i++) + if (tx_post_resync_dump(sq, skb, info.frags[i], + priv_tx->tisn, !i)) + goto err_out; + + /* If no dump WQE was sent, we need to have a fence NOP WQE before the + * actual data xmit. + */ + if (!info.nr_frags) + tx_post_fence_nop(sq); + + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} + +struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev, + struct mlx5e_txqsq *sq, + struct sk_buff *skb, + struct mlx5e_tx_wqe **wqe, u16 *pi) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_sq_stats *stats = sq->stats; + struct mlx5_wqe_ctrl_seg *cseg; + struct tls_context *tls_ctx; + int datalen; + u32 seq; + + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + goto out; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + if (!datalen) + goto out; + + tls_ctx = tls_get_ctx(skb->sk); + if (unlikely(tls_ctx->netdev != netdev)) + goto err_out; + + priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); + + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { + mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + stats->tls_ctx++; + } + + seq = ntohl(tcp_hdr(skb)->seq); + if (unlikely(priv_tx->expected_seq != seq)) { + skb = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, skb, seq); + if (unlikely(!skb)) + goto out; + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); + } + + priv_tx->expected_seq = seq + datalen; + + cseg = &(*wqe)->ctrl; + cseg->imm = cpu_to_be32(priv_tx->tisn); + + stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1; + stats->tls_encrypted_bytes += datalen; + +out: + return skb; + +err_out: + dev_kfree_skb_any(skb); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c index e88340e196f7..fba561ffe1d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c @@ -160,25 +160,31 @@ static void mlx5e_tls_del(struct net_device *netdev, direction == TLS_OFFLOAD_CTX_DIR_TX); } -static void mlx5e_tls_resync_rx(struct net_device *netdev, struct sock *sk, - u32 seq, u64 rcd_sn) +static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk, + u32 seq, u8 *rcd_sn_data, + enum tls_offload_ctx_dir direction) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_tls_offload_context_rx *rx_ctx; + u64 rcd_sn = *(u64 *)rcd_sn_data; + if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX)) + return -EINVAL; rx_ctx = mlx5e_get_tls_rx_context(tls_ctx); netdev_info(netdev, "resyncing seq %d rcd %lld\n", seq, be64_to_cpu(rcd_sn)); mlx5_accel_tls_resync_rx(priv->mdev, rx_ctx->handle, seq, rcd_sn); atomic64_inc(&priv->tls->sw_stats.rx_tls_resync_reply); + + return 0; } static const struct tlsdev_ops mlx5e_tls_ops = { .tls_dev_add = mlx5e_tls_add, .tls_dev_del = mlx5e_tls_del, - .tls_dev_resync_rx = mlx5e_tls_resync_rx, + .tls_dev_resync = mlx5e_tls_resync, }; void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) @@ -186,6 +192,11 @@ void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) struct net_device *netdev = priv->netdev; u32 caps; + if (mlx5_accel_is_ktls_device(priv->mdev)) { + mlx5e_ktls_build_netdev(priv); + return; + } + if (!mlx5_accel_is_tls_device(priv->mdev)) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h index 3f5d72163b56..9015f3f7792d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h @@ -33,8 +33,10 @@ #ifndef __MLX5E_TLS_H__ #define __MLX5E_TLS_H__ -#ifdef CONFIG_MLX5_EN_TLS +#include "accel/tls.h" +#include "en_accel/ktls.h" +#ifdef CONFIG_MLX5_EN_TLS #include <net/tls.h> #include "en.h" @@ -94,7 +96,12 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data); #else -static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) { } +static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv) +{ + if (mlx5_accel_is_ktls_device(priv->mdev)) + mlx5e_ktls_build_netdev(priv); +} + static inline int mlx5e_tls_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tls_cleanup(struct mlx5e_priv *priv) { } static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index 439bf5953885..71384ad1a443 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -248,7 +248,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context, mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln, cpu_to_be64(info.rcd_sn)); mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true); - mlx5e_sq_fetch_wqe(sq, wqe, pi); + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); return skb; err_out: @@ -269,6 +269,11 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, int datalen; u32 skb_seq; + if (MLX5_CAP_GEN(sq->channel->mdev, tls)) { + skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi); + goto out; + } + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h index 311667ec71b8..90bc1f2384c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h @@ -38,6 +38,7 @@ #include <linux/skbuff.h> #include "en.h" +#include "en/txrx.h" struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 554672edf8c3..8dd31b5c740c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -680,7 +680,7 @@ static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev, memset(perm_addr, 0xff, MAX_ADDR_LEN); - mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr); + mlx5_query_mac_address(priv->mdev, perm_addr); } static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c index d67adf70a97b..ca9cfbf57d8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dim.c @@ -30,22 +30,22 @@ * SOFTWARE. */ -#include <linux/net_dim.h> +#include <linux/dim.h> #include "en.h" static void -mlx5e_complete_dim_work(struct net_dim *dim, struct net_dim_cq_moder moder, +mlx5e_complete_dim_work(struct dim *dim, struct dim_cq_moder moder, struct mlx5_core_dev *mdev, struct mlx5_core_cq *mcq) { mlx5_core_modify_cq_moderation(mdev, mcq, moder.usec, moder.pkts); - dim->state = NET_DIM_START_MEASURE; + dim->state = DIM_START_MEASURE; } void mlx5e_rx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_rq *rq = container_of(dim, struct mlx5e_rq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, rq->mdev, &rq->cq.mcq); @@ -53,9 +53,9 @@ void mlx5e_rx_dim_work(struct work_struct *work) void mlx5e_tx_dim_work(struct work_struct *work) { - struct net_dim *dim = container_of(work, struct net_dim, work); + struct dim *dim = container_of(work, struct dim, work); struct mlx5e_txqsq *sq = container_of(dim, struct mlx5e_txqsq, dim); - struct net_dim_cq_moder cur_moder = + struct dim_cq_moder cur_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); mlx5e_complete_dim_work(dim, cur_moder, sq->cq.mdev, &sq->cq.mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index dd764e0471f2..126ec4181286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -32,6 +32,7 @@ #include "en.h" #include "en/port.h" +#include "en/xsk/umem.h" #include "lib/clock.h" void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, @@ -46,7 +47,7 @@ void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv, "%d.%d.%04d (%.16s)", fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), mdev->board_id); - strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), + strlcpy(drvinfo->bus_info, dev_name(mdev->device), sizeof(drvinfo->bus_info)); } @@ -388,8 +389,17 @@ static int mlx5e_set_ringparam(struct net_device *dev, void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + mutex_lock(&priv->state_lock); + ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev); ch->combined_count = priv->channels.params.num_channels; + if (priv->xsk.refcnt) { + /* The upper half are XSK queues. */ + ch->max_combined *= 2; + ch->combined_count *= 2; + } + + mutex_unlock(&priv->state_lock); } static void mlx5e_get_channels(struct net_device *dev, @@ -403,6 +413,7 @@ static void mlx5e_get_channels(struct net_device *dev, int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { + struct mlx5e_params *cur_params = &priv->channels.params; unsigned int count = ch->combined_count; struct mlx5e_channels new_channels = {}; bool arfs_enabled; @@ -414,16 +425,26 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, return -EINVAL; } - if (priv->channels.params.num_channels == count) + if (cur_params->num_channels == count) return 0; mutex_lock(&priv->state_lock); + /* Don't allow changing the number of channels if there is an active + * XSK, because the numeration of the XSK and regular RQs will change. + */ + if (priv->xsk.refcnt) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", + __func__); + goto out; + } + new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; + *cur_params = new_channels.params; if (!netif_is_rxfh_configured(priv->netdev)) mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); @@ -466,7 +487,7 @@ static int mlx5e_set_channels(struct net_device *dev, int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; @@ -521,7 +542,7 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct net_dim_cq_moder *rx_moder, *tx_moder; + struct dim_cq_moder *rx_moder, *tx_moder; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channels new_channels = {}; int err = 0; @@ -1867,40 +1888,6 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) return priv->channels.params.pflags; } -int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, - struct ethtool_flash *flash) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct net_device *dev = priv->netdev; - const struct firmware *fw; - int err; - - if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) - return -EOPNOTSUPP; - - err = request_firmware_direct(&fw, flash->data, &dev->dev); - if (err) - return err; - - dev_hold(dev); - rtnl_unlock(); - - err = mlx5_firmware_flash(mdev, fw); - release_firmware(fw); - - rtnl_lock(); - dev_put(dev); - return err; -} - -static int mlx5e_flash_device(struct net_device *dev, - struct ethtool_flash *flash) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - return mlx5e_ethtool_flash_device(priv, flash); -} - #ifndef CONFIG_MLX5_EN_RXNFC /* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS * otherwise this function will be defined from en_fs_ethtool.c @@ -1939,7 +1926,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { #ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, #endif - .flash_device = mlx5e_flash_device, .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 4421c10f58ae..ea3a490b569a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -32,6 +32,8 @@ #include <linux/mlx5/fs.h> #include "en.h" +#include "en/params.h" +#include "en/xsk/umem.h" struct mlx5e_ethtool_rule { struct list_head list; @@ -414,6 +416,14 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, if (fs->ring_cookie == RX_CLS_FLOW_DISC) { flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; } else { + struct mlx5e_params *params = &priv->channels.params; + enum mlx5e_rq_group group; + struct mlx5e_tir *tir; + u16 ix; + + mlx5e_qid_get_ch_and_group(params, fs->ring_cookie, &ix, &group); + tir = group == MLX5E_RQ_GROUP_XSK ? priv->xsk_tir : priv->direct_tir; + dst = kzalloc(sizeof(*dst), GFP_KERNEL); if (!dst) { err = -ENOMEM; @@ -421,12 +431,12 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn; + dst->tir_num = tir[ix].tirn; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)); - flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + spec->flow_context.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, dst ? 1 : 0); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -600,9 +610,9 @@ static int validate_flow(struct mlx5e_priv *priv, if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) return -ENOSPC; - if (fs->ring_cookie >= priv->channels.params.num_channels && - fs->ring_cookie != RX_CLS_FLOW_DISC) - return -EINVAL; + if (fs->ring_cookie != RX_CLS_FLOW_DISC) + if (!mlx5e_qid_validate(&priv->channels.params, fs->ring_cookie)) + return -EINVAL; switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) { case ETHER_FLOW: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a8e8350b38aa..6d0ae87c8ded 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -38,8 +38,10 @@ #include <linux/bpf.h> #include <linux/if_bridge.h> #include <net/page_pool.h> +#include <net/xdp_sock.h> #include "eswitch.h" #include "en.h" +#include "en/txrx.h" #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" @@ -56,35 +58,11 @@ #include "en/monitor_stats.h" #include "en/reporter.h" #include "en/params.h" +#include "en/xsk/umem.h" +#include "en/xsk/setup.h" +#include "en/xsk/rx.h" +#include "en/xsk/tx.h" -struct mlx5e_rq_param { - u32 rqc[MLX5_ST_SZ_DW(rqc)]; - struct mlx5_wq_param wq; - struct mlx5e_rq_frags_info frags_info; -}; - -struct mlx5e_sq_param { - u32 sqc[MLX5_ST_SZ_DW(sqc)]; - struct mlx5_wq_param wq; - bool is_mpw; -}; - -struct mlx5e_cq_param { - u32 cqc[MLX5_ST_SZ_DW(cqc)]; - struct mlx5_wq_param wq; - u16 eq_ix; - u8 cq_period_mode; -}; - -struct mlx5e_channel_param { - struct mlx5e_rq_param rq; - struct mlx5e_sq_param sq; - struct mlx5e_sq_param xdp_sq; - struct mlx5e_sq_param icosq; - struct mlx5e_cq_param rx_cq; - struct mlx5e_cq_param tx_cq; - struct mlx5e_cq_param icosq_cq; -}; bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -114,18 +92,31 @@ void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev, mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? - BIT(mlx5e_mpwqe_get_log_rq_size(params)) : + BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) : BIT(params->log_rq_mtu_frames), - BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params)), + BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)), MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - return mlx5e_check_fragmented_striding_rq_cap(mdev) && - !MLX5_IPSEC_DEV(mdev) && - !(params->xdp_prog && !mlx5e_rx_mpwqe_is_linear_skb(mdev, params)); + if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) + return false; + + if (MLX5_IPSEC_DEV(mdev)) + return false; + + if (params->xdp_prog) { + /* XSK params are not considered here. If striding RQ is in use, + * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will + * be called with the known XSK params. + */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) + return false; + } + + return true; } void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params) @@ -394,6 +385,8 @@ static void mlx5e_free_di_list(struct mlx5e_rq *rq) static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq_param *rqp, struct mlx5e_rq *rq) { @@ -401,6 +394,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 num_xsk_frames = 0; + u32 rq_xdp_ix; u32 pool_size; int wq_sz; int err; @@ -417,7 +412,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->ix = c->ix; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - rq->stats = &c->priv->channel_stats[c->ix].rq; + rq->xdpsq = &c->rq_xdpsq; + rq->umem = umem; + + if (rq->umem) + rq->stats = &c->priv->channel_stats[c->ix].xskrq; + else + rq->stats = &c->priv->channel_stats[c->ix].rq; rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { @@ -426,12 +427,16 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix); + rq_xdp_ix = rq->ix; + if (xsk) + rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; + err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix); if (err < 0) goto err_rq_wq_destroy; rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params); + rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); + rq->buff.umem_headroom = xsk ? xsk->headroom : 0; pool_size = 1 << params->log_rq_mtu_frames; switch (rq->wq_type) { @@ -445,7 +450,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); - pool_size = MLX5_MPWRQ_PAGES_PER_WQE << mlx5e_mpwqe_get_log_rq_size(params); + if (xsk) + num_xsk_frames = wq_sz << + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); + + pool_size = MLX5_MPWRQ_PAGES_PER_WQE << + mlx5e_mpwqe_get_log_rq_size(params, xsk); rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; @@ -464,12 +474,15 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe.skb_from_cqe_mpwrq = - mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ? - mlx5e_skb_from_cqe_mpwrq_linear : - mlx5e_skb_from_cqe_mpwrq_nonlinear; - rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params); - rq->mpwqe.num_strides = BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params)); + rq->mpwqe.skb_from_cqe_mpwrq = xsk ? + mlx5e_xsk_skb_from_cqe_mpwrq_linear : + mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ? + mlx5e_skb_from_cqe_mpwrq_linear : + mlx5e_skb_from_cqe_mpwrq_nonlinear; + + rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + rq->mpwqe.num_strides = + BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk)); err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -490,6 +503,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq); + if (xsk) + num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags; + rq->wqe.info = rqp->frags_info; rq->wqe.frags = kvzalloc_node(array_size(sizeof(*rq->wqe.frags), @@ -503,6 +519,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5e_init_di_list(rq, wq_sz, c->cpu); if (err) goto err_free; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; @@ -518,33 +535,49 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_free; } - rq->wqe.skb_from_cqe = mlx5e_rx_is_linear_skb(params) ? - mlx5e_skb_from_cqe_linear : - mlx5e_skb_from_cqe_nonlinear; + rq->wqe.skb_from_cqe = xsk ? + mlx5e_xsk_skb_from_cqe_linear : + mlx5e_rx_is_linear_skb(params, NULL) ? + mlx5e_skb_from_cqe_linear : + mlx5e_skb_from_cqe_nonlinear; rq->mkey_be = c->mkey_be; } - /* Create a page_pool and register it with rxq */ - pp_params.order = 0; - pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ - pp_params.pool_size = pool_size; - pp_params.nid = cpu_to_node(c->cpu); - pp_params.dev = c->pdev; - pp_params.dma_dir = rq->buff.map_dir; - - /* page_pool can be used even when there is no rq->xdp_prog, - * given page_pool does not handle DMA mapping there is no - * required state to clear. And page_pool gracefully handle - * elevated refcnt. - */ - rq->page_pool = page_pool_create(&pp_params); - if (IS_ERR(rq->page_pool)) { - err = PTR_ERR(rq->page_pool); - rq->page_pool = NULL; - goto err_free; + if (xsk) { + err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames); + if (unlikely(err)) { + mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n", + num_xsk_frames); + goto err_free; + } + + rq->zca.free = mlx5e_xsk_zca_free; + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_ZERO_COPY, + &rq->zca); + } else { + /* Create a page_pool and register it with rxq */ + pp_params.order = 0; + pp_params.flags = 0; /* No-internal DMA mapping in page_pool */ + pp_params.pool_size = pool_size; + pp_params.nid = cpu_to_node(c->cpu); + pp_params.dev = c->pdev; + pp_params.dma_dir = rq->buff.map_dir; + + /* page_pool can be used even when there is no rq->xdp_prog, + * given page_pool does not handle DMA mapping there is no + * required state to clear. And page_pool gracefully handle + * elevated refcnt. + */ + rq->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rq->page_pool)) { + err = PTR_ERR(rq->page_pool); + rq->page_pool = NULL; + goto err_free; + } + err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, + MEM_TYPE_PAGE_POOL, rq->page_pool); } - err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, - MEM_TYPE_PAGE_POOL, rq->page_pool); if (err) goto err_free; @@ -584,11 +617,11 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, switch (params->rx_cq_moderation.cq_period_mode) { case MLX5_CQ_PERIOD_MODE_START_FROM_CQE: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; break; case MLX5_CQ_PERIOD_MODE_START_FROM_EQE: default: - rq->dim.mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } rq->page_cache.head = 0; @@ -611,8 +644,7 @@ err_rq_wq_destroy: if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); return err; @@ -625,10 +657,6 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); - xdp_rxq_info_unreg(&rq->xdp_rxq); - if (rq->page_pool) - page_pool_destroy(rq->page_pool); - switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); @@ -643,8 +671,15 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; - mlx5e_page_release(rq, dma_info, false); + /* With AF_XDP, page_cache is not used, so this loop is not + * entered, and it's safe to call mlx5e_page_release_dynamic + * directly. + */ + mlx5e_page_release_dynamic(rq, dma_info, false); } + + xdp_rxq_info_unreg(&rq->xdp_rxq); + page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); } @@ -778,7 +813,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5_core_destroy_rq(rq->mdev, rq->rqn); } -static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) +int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) { unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time); struct mlx5e_channel *c = rq->channel; @@ -836,14 +871,13 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } -static int mlx5e_open_rq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_rq_param *param, - struct mlx5e_rq *rq) +int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk, + struct xdp_umem *umem, struct mlx5e_rq *rq) { int err; - err = mlx5e_alloc_rq(c, params, param, rq); + err = mlx5e_alloc_rq(c, params, xsk, umem, param, rq); if (err) return err; @@ -881,13 +915,13 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq) mlx5e_trigger_irq(&rq->channel->icosq); } -static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) +void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ } -static void mlx5e_close_rq(struct mlx5e_rq *rq) +void mlx5e_close_rq(struct mlx5e_rq *rq) { cancel_work_sync(&rq->dim.work); mlx5e_destroy_rq(rq); @@ -940,6 +974,7 @@ static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct xdp_umem *umem, struct mlx5e_sq_param *param, struct mlx5e_xdpsq *sq, bool is_redirect) @@ -955,9 +990,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); - sq->stats = is_redirect ? - &c->priv->channel_stats[c->ix].xdpsq : - &c->priv->channel_stats[c->ix].rq_xdpsq; + sq->umem = umem; + + sq->stats = sq->umem ? + &c->priv->channel_stats[c->ix].xsksq : + is_redirect ? + &c->priv->channel_stats[c->ix].xdpsq : + &c->priv->channel_stats[c->ix].rq_xdpsq; param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1087,11 +1126,14 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->uar_map = mdev->mlx5e_res.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; + sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); - if (mlx5_accel_is_tls_device(c->priv->mdev)) + if (mlx5_accel_is_tls_device(c->priv->mdev)) { set_bit(MLX5E_SQ_STATE_TLS, &sq->state); + sq->stop_room += MLX5E_SQ_TLS_ROOM; + } param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@ -1337,10 +1379,8 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_tx_reporter_err_cqe(sq); } -static int mlx5e_open_icosq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) +int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) { struct mlx5e_create_sq_param csp = {}; int err; @@ -1366,7 +1406,7 @@ err_free_icosq: return err; } -static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1377,16 +1417,14 @@ static void mlx5e_close_icosq(struct mlx5e_icosq *sq) mlx5e_free_icosq(sq); } -static int mlx5e_open_xdpsq(struct mlx5e_channel *c, - struct mlx5e_params *params, - struct mlx5e_sq_param *param, - struct mlx5e_xdpsq *sq, - bool is_redirect) +int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct xdp_umem *umem, + struct mlx5e_xdpsq *sq, bool is_redirect) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_xdpsq(c, params, param, sq, is_redirect); + err = mlx5e_alloc_xdpsq(c, params, umem, param, sq, is_redirect); if (err) return err; @@ -1440,7 +1478,7 @@ err_free_xdpsq: return err; } -static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) +void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1448,7 +1486,7 @@ static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq) napi_synchronize(&c->napi); mlx5e_destroy_sq(c->mdev, sq->sqn); - mlx5e_free_xdpsq_descs(sq, rq); + mlx5e_free_xdpsq_descs(sq); mlx5e_free_xdpsq(sq); } @@ -1518,6 +1556,7 @@ static void mlx5e_free_cq(struct mlx5e_cq *cq) static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_core_dev *mdev = cq->mdev; struct mlx5_core_cq *mcq = &cq->mcq; @@ -1552,7 +1591,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); - err = mlx5_core_create_cq(mdev, mcq, in, inlen); + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); kvfree(in); @@ -1569,10 +1608,8 @@ static void mlx5e_destroy_cq(struct mlx5e_cq *cq) mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } -static int mlx5e_open_cq(struct mlx5e_channel *c, - struct net_dim_cq_moder moder, - struct mlx5e_cq_param *param, - struct mlx5e_cq *cq) +int mlx5e_open_cq(struct mlx5e_channel *c, struct dim_cq_moder moder, + struct mlx5e_cq_param *param, struct mlx5e_cq *cq) { struct mlx5_core_dev *mdev = c->mdev; int err; @@ -1595,7 +1632,7 @@ err_free_cq: return err; } -static void mlx5e_close_cq(struct mlx5e_cq *cq) +void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_destroy_cq(cq); mlx5e_free_cq(cq); @@ -1769,49 +1806,16 @@ static void mlx5e_free_xps_cpumask(struct mlx5e_channel *c) free_cpumask_var(c->xps_cpumask); } -static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, - struct mlx5e_params *params, - struct mlx5e_channel_param *cparam, - struct mlx5e_channel **cp) +static int mlx5e_open_queues(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) { - int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); - struct net_dim_cq_moder icocq_moder = {0, 0}; - struct net_device *netdev = priv->netdev; - struct mlx5e_channel *c; - unsigned int irq; + struct dim_cq_moder icocq_moder = {0, 0}; int err; - int eqn; - - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); - if (err) - return err; - - c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); - if (!c) - return -ENOMEM; - - c->priv = priv; - c->mdev = priv->mdev; - c->tstamp = &priv->tstamp; - c->ix = ix; - c->cpu = cpu; - c->pdev = priv->mdev->device; - c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - c->num_tc = params->num_tc; - c->xdp = !!params->xdp_prog; - c->stats = &priv->channel_stats[ix].ch; - c->irq_desc = irq_to_desc(irq); - - err = mlx5e_alloc_xps_cpumask(c, params); - if (err) - goto err_free_channel; - - netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); if (err) - goto err_napi_del; + return err; err = mlx5e_open_tx_cqs(c, params, cparam); if (err) @@ -1827,7 +1831,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, /* XDP SQ CQ params are same as normal TXQ sq CQ params */ err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, - &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; + &cparam->tx_cq, &c->rq_xdpsq.cq) : 0; if (err) goto err_close_rx_cq; @@ -1841,20 +1845,21 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_icosq; - err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq, false) : 0; - if (err) - goto err_close_sqs; + if (c->xdp) { + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, + &c->rq_xdpsq, false); + if (err) + goto err_close_sqs; + } - err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); + err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq); if (err) goto err_close_xdp_sq; - err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->xdpsq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); if (err) goto err_close_rq; - *cp = c; - return 0; err_close_rq: @@ -1862,7 +1867,7 @@ err_close_rq: err_close_xdp_sq: if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); + mlx5e_close_xdpsq(&c->rq_xdpsq); err_close_sqs: mlx5e_close_sqs(c); @@ -1872,8 +1877,9 @@ err_close_icosq: err_disable_napi: napi_disable(&c->napi); + if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); + mlx5e_close_cq(&c->rq_xdpsq.cq); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); @@ -1887,6 +1893,85 @@ err_close_tx_cqs: err_close_icosq_cq: mlx5e_close_cq(&c->icosq.cq); + return err; +} + +static void mlx5e_close_queues(struct mlx5e_channel *c) +{ + mlx5e_close_xdpsq(&c->xdpsq); + mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_xdpsq(&c->rq_xdpsq); + mlx5e_close_sqs(c); + mlx5e_close_icosq(&c->icosq); + napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->rq_xdpsq.cq); + mlx5e_close_cq(&c->rq.cq); + mlx5e_close_cq(&c->xdpsq.cq); + mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); +} + +static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam, + struct xdp_umem *umem, + struct mlx5e_channel **cp) +{ + int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix)); + struct net_device *netdev = priv->netdev; + struct mlx5e_xsk_param xsk; + struct mlx5e_channel *c; + unsigned int irq; + int err; + int eqn; + + err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + if (err) + return err; + + c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); + if (!c) + return -ENOMEM; + + c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; + c->ix = ix; + c->cpu = cpu; + c->pdev = priv->mdev->device; + c->netdev = priv->netdev; + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; + c->stats = &priv->channel_stats[ix].ch; + c->irq_desc = irq_to_desc(irq); + + err = mlx5e_alloc_xps_cpumask(c, params); + if (err) + goto err_free_channel; + + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); + + err = mlx5e_open_queues(c, params, cparam); + if (unlikely(err)) + goto err_napi_del; + + if (umem) { + mlx5e_build_xsk_param(umem, &xsk); + err = mlx5e_open_xsk(priv, params, &xsk, umem, c); + if (unlikely(err)) + goto err_close_queues; + } + + *cp = c; + + return 0; + +err_close_queues: + mlx5e_close_queues(c); + err_napi_del: netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -1905,12 +1990,18 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_txqsq(&c->sq[tc]); mlx5e_activate_rq(&c->rq); netif_set_xps_queue(c->netdev, c->xps_cpumask, c->ix); + + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_activate_xsk(c); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) { int tc; + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_deactivate_xsk(c); + mlx5e_deactivate_rq(&c->rq); for (tc = 0; tc < c->num_tc; tc++) mlx5e_deactivate_txqsq(&c->sq[tc]); @@ -1918,19 +2009,9 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c) static void mlx5e_close_channel(struct mlx5e_channel *c) { - mlx5e_close_xdpsq(&c->xdpsq, NULL); - mlx5e_close_rq(&c->rq); - if (c->xdp) - mlx5e_close_xdpsq(&c->rq.xdpsq, &c->rq); - mlx5e_close_sqs(c); - mlx5e_close_icosq(&c->icosq); - napi_disable(&c->napi); - if (c->xdp) - mlx5e_close_cq(&c->rq.xdpsq.cq); - mlx5e_close_cq(&c->rq.cq); - mlx5e_close_cq(&c->xdpsq.cq); - mlx5e_close_tx_cqs(c); - mlx5e_close_cq(&c->icosq.cq); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_close_xsk(c); + mlx5e_close_queues(c); netif_napi_del(&c->napi); mlx5e_free_xps_cpumask(c); @@ -1941,6 +2022,7 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, struct mlx5e_rq_frags_info *info) { u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu); @@ -1953,10 +2035,10 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, byte_count += MLX5E_METADATA_ETHER_LEN; #endif - if (mlx5e_rx_is_linear_skb(params)) { + if (mlx5e_rx_is_linear_skb(params, xsk)) { int frag_stride; - frag_stride = mlx5e_rx_get_linear_frag_sz(params); + frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk); frag_stride = roundup_pow_of_two(frag_stride); info->arr[0].frag_size = byte_count; @@ -2014,9 +2096,10 @@ static u8 mlx5e_get_rq_log_wq_sz(void *rqc) return MLX5_GET(wq, wq, log_wq_sz); } -static void mlx5e_build_rq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_rq_param *param) +void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_rq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; @@ -2026,16 +2109,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: MLX5_SET(wq, wq, log_wqe_num_of_strides, - mlx5e_mpwqe_get_log_num_strides(mdev, params) - + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) - MLX5_MPWQE_LOG_NUM_STRIDES_BASE); MLX5_SET(wq, wq, log_wqe_stride_size, - mlx5e_mpwqe_get_log_stride_size(mdev, params) - + mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) - MLX5_MPWQE_LOG_STRIDE_SZ_BASE); - MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params)); + MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk)); break; default: /* MLX5_WQ_TYPE_CYCLIC */ MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames); - mlx5e_build_rq_frags_info(mdev, params, ¶m->frags_info); + mlx5e_build_rq_frags_info(mdev, params, xsk, ¶m->frags_info); ndsegs = param->frags_info.num_frags; } @@ -2066,8 +2149,8 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(mdev->device); } -static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) +void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2103,9 +2186,10 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } -static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk, + struct mlx5e_cq_param *param) { struct mlx5_core_dev *mdev = priv->mdev; void *cqc = param->cqc; @@ -2113,8 +2197,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = mlx5e_mpwqe_get_log_rq_size(params) + - mlx5e_mpwqe_get_log_num_strides(mdev, params); + log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) + + mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk); break; default: /* MLX5_WQ_TYPE_CYCLIC */ log_cq_size = params->log_rq_mtu_frames; @@ -2130,9 +2214,9 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } -static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_cq_param *param) +void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2142,9 +2226,9 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, param->cq_period_mode = params->tx_cq_moderation.cq_period_mode; } -static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_cq_param *param) +void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -2152,12 +2236,12 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, mlx5e_build_common_cq_param(priv, param); - param->cq_period_mode = NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; } -static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - u8 log_wq_size, - struct mlx5e_sq_param *param) +void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + u8 log_wq_size, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2168,9 +2252,9 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); } -static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, - struct mlx5e_params *params, - struct mlx5e_sq_param *param) +void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -2198,14 +2282,14 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv, { u8 icosq_log_wq_sz; - mlx5e_build_rq_param(priv, params, &cparam->rq); + mlx5e_build_rq_param(priv, params, NULL, &cparam->rq); icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq); mlx5e_build_sq_param(priv, params, &cparam->sq); mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); - mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_rx_cq_param(priv, params, NULL, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); } @@ -2226,7 +2310,12 @@ int mlx5e_open_channels(struct mlx5e_priv *priv, mlx5e_build_channel_param(priv, &chs->params, cparam); for (i = 0; i < chs->num; i++) { - err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); + struct xdp_umem *umem = NULL; + + if (chs->params.xdp_prog) + umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, i); + + err = mlx5e_open_channel(priv, i, &chs->params, cparam, umem, &chs->c[i]); if (err) goto err_close_channels; } @@ -2268,6 +2357,10 @@ static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT; err |= mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq, timeout); + + /* Don't wait on the XSK RQ, because the newer xdpsock sample + * doesn't provide any Fill Ring entries at the setup stage. + */ } return err ? -ETIMEDOUT : 0; @@ -2340,35 +2433,35 @@ int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) return err; } -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - struct mlx5e_rqt *rqt; + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int err; int ix; - for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) { - rqt = &priv->direct_tir[ix].rqt; - err = mlx5e_create_rqt(priv, 1 /*size */, rqt); - if (err) + for (ix = 0; ix < max_nch; ix++) { + err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt); + if (unlikely(err)) goto err_destroy_rqts; } return 0; err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create rqts failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); + mlx5e_destroy_rqt(priv, &tirs[ix].rqt); return err; } -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_rqt(priv, &tirs[i].rqt); } static int mlx5e_rx_hash_fn(int hfunc) @@ -2788,11 +2881,12 @@ static void mlx5e_build_tx2sq_maps(struct mlx5e_priv *priv) void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { int num_txqs = priv->channels.num * priv->channels.params.num_tc; + int num_rxqs = priv->channels.num * MLX5E_NUM_RQ_GROUPS; struct net_device *netdev = priv->netdev; mlx5e_netdev_set_tcs(netdev); netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, priv->channels.num); + netif_set_real_num_rx_queues(netdev, num_rxqs); mlx5e_build_tx2sq_maps(priv); mlx5e_activate_channels(&priv->channels); @@ -2804,10 +2898,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_wait_channels_min_rx_wqes(&priv->channels); mlx5e_redirect_rqts_to_channels(priv, &priv->channels); + + mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { + mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); + mlx5e_redirect_rqts_to_drop(priv); if (mlx5e_is_vport_rep(priv)) @@ -2847,7 +2945,7 @@ static void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, if (hw_modify) hw_modify(priv); - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); /* return carrier back if needed */ @@ -2886,15 +2984,18 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + bool is_xdp = priv->channels.params.xdp_prog; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); + if (is_xdp) + mlx5e_xdp_set_open(priv); err = mlx5e_open_channels(priv, &priv->channels); if (err) goto err_clear_state_opened_flag; - mlx5e_refresh_tirs(priv, false); + priv->profile->update_rx(priv); mlx5e_activate_priv_channels(priv); if (priv->profile->update_carrier) priv->profile->update_carrier(priv); @@ -2903,6 +3004,8 @@ int mlx5e_open_locked(struct net_device *netdev) return 0; err_clear_state_opened_flag: + if (is_xdp) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; } @@ -2934,6 +3037,8 @@ int mlx5e_close_locked(struct net_device *netdev) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) return 0; + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_closed(priv); clear_bit(MLX5E_STATE_OPENED, &priv->state); netif_carrier_off(priv->netdev); @@ -3045,20 +3150,19 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) mlx5e_free_cq(&drop_rq->cq); } -int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, - u32 underlay_qpn, u32 *tisn) +int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - MLX5_SET(tisc, tisc, prio, tc << 1); - MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + if (MLX5_GET(tisc, tisc, tls_en)) + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn); + if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); - return mlx5_core_create_tis(mdev, in, sizeof(in), tisn); + return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn); } void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) @@ -3072,7 +3176,14 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) { - err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]); + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, prio, tc << 1); + + err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[tc]); if (err) goto err_close_tises; } @@ -3190,13 +3301,13 @@ err_destroy_inner_tirs: return err; } -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); struct mlx5e_tir *tir; void *tirc; int inlen; - int err; + int err = 0; u32 *in; int ix; @@ -3205,25 +3316,24 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) if (!in) return -ENOMEM; - for (ix = 0; ix < nch; ix++) { + for (ix = 0; ix < max_nch; ix++) { memset(in, 0, inlen); - tir = &priv->direct_tir[ix]; + tir = &tirs[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); + mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) + if (unlikely(err)) goto err_destroy_ch_tirs; } - kvfree(in); - - return 0; + goto out; err_destroy_ch_tirs: - mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); + mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); for (ix--; ix >= 0; ix--) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); + mlx5e_destroy_tir(priv->mdev, &tirs[ix]); +out: kvfree(in); return err; @@ -3243,13 +3353,13 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs) { - int nch = mlx5e_get_netdev_max_channels(priv->netdev); + const int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; - for (i = 0; i < nch; i++) - mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); + for (i = 0; i < max_nch; i++) + mlx5e_destroy_tir(priv->mdev, &tirs[i]); } static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) @@ -3316,17 +3426,17 @@ out: #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *cls_flower, + struct flow_cls_offload *cls_flower, int flags) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return mlx5e_configure_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return mlx5e_delete_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return mlx5e_stats_flower(priv->netdev, priv, cls_flower, flags); default: @@ -3347,36 +3457,22 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, return -EOPNOTSUPP; } } - -static int mlx5e_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb, - priv, priv, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb, - priv); - return 0; - default: - return -EOPNOTSUPP; - } -} #endif +static LIST_HEAD(mlx5e_block_cb_list); + static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct mlx5e_priv *priv = netdev_priv(dev); + switch (type) { #ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_BLOCK: - return mlx5e_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, + &mlx5e_block_cb_list, + mlx5e_setup_tc_block_cb, + priv, priv, true); #endif case TC_SETUP_QDISC_MQPRIO: return mlx5e_setup_tc_mqprio(dev, type_data); @@ -3391,11 +3487,12 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s) for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; int j; - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; + s->rx_packets += rq_stats->packets + xskrq_stats->packets; + s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -3494,6 +3591,13 @@ static int set_feature_lro(struct net_device *netdev, bool enable) mutex_lock(&priv->state_lock); + if (enable && priv->xsk.refcnt) { + netdev_warn(netdev, "LRO is incompatible with AF_XDP (%hu XSKs are active)\n", + priv->xsk.refcnt); + err = -EINVAL; + goto out; + } + old_params = &priv->channels.params; if (enable && !MLX5E_GET_PFLAG(old_params, MLX5E_PFLAG_RX_STRIDING_RQ)) { netdev_warn(netdev, "can't set LRO with legacy RQ\n"); @@ -3507,8 +3611,8 @@ static int set_feature_lro(struct net_device *netdev, bool enable) new_channels.params.lro_en = enable; if (old_params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) { - if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params) == - mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params)) + if (mlx5e_rx_mpwqe_is_linear_skb(mdev, old_params, NULL) == + mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_channels.params, NULL)) reset = false; } @@ -3698,6 +3802,43 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, return features; } +static bool mlx5e_xsk_validate_mtu(struct net_device *netdev, + struct mlx5e_channels *chs, + struct mlx5e_params *new_params, + struct mlx5_core_dev *mdev) +{ + u16 ix; + + for (ix = 0; ix < chs->params.num_channels; ix++) { + struct xdp_umem *umem = mlx5e_xsk_get_umem(&chs->params, chs->params.xsk, ix); + struct mlx5e_xsk_param xsk; + + if (!umem) + continue; + + mlx5e_build_xsk_param(umem, &xsk); + + if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) { + u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk); + int max_mtu_frame, max_mtu_page, max_mtu; + + /* Two criteria must be met: + * 1. HW MTU + all headrooms <= XSK frame size. + * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE. + */ + max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr); + max_mtu_page = mlx5e_xdp_max_mtu(new_params, &xsk); + max_mtu = min(max_mtu_frame, max_mtu_page); + + netdev_err(netdev, "MTU %d is too big for an XSK running on channel %hu. Try MTU <= %d\n", + new_params->sw_mtu, ix, max_mtu); + return false; + } + } + + return true; +} + int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, change_hw_mtu_cb set_mtu_cb) { @@ -3718,18 +3859,31 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu, new_channels.params.sw_mtu = new_mtu; if (params->xdp_prog && - !mlx5e_rx_is_linear_skb(&new_channels.params)) { + !mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_err(netdev, "MTU(%d) > %d is not allowed while XDP enabled\n", - new_mtu, mlx5e_xdp_max_mtu(params)); + new_mtu, mlx5e_xdp_max_mtu(params, NULL)); + err = -EINVAL; + goto out; + } + + if (priv->xsk.refcnt && + !mlx5e_xsk_validate_mtu(netdev, &priv->channels, + &new_channels.params, priv->mdev)) { err = -EINVAL; goto out; } if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { - bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, &new_channels.params); - u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params); - u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params); + bool is_linear = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, + &new_channels.params, + NULL); + u8 ppw_old = mlx5e_mpwqe_log_pkts_per_wqe(params, NULL); + u8 ppw_new = mlx5e_mpwqe_log_pkts_per_wqe(&new_channels.params, NULL); + + /* If XSK is active, XSK RQs are linear. */ + is_linear |= priv->xsk.refcnt; + /* Always reset in linear mode - hw_mtu is used in data path. */ reset = reset && (is_linear || (ppw_old != ppw_new)); } @@ -4162,16 +4316,29 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) new_channels.params = priv->channels.params; new_channels.params.xdp_prog = prog; - if (!mlx5e_rx_is_linear_skb(&new_channels.params)) { + /* No XSK params: AF_XDP can't be enabled yet at the point of setting + * the XDP program. + */ + if (!mlx5e_rx_is_linear_skb(&new_channels.params, NULL)) { netdev_warn(netdev, "XDP is not allowed with MTU(%d) > %d\n", new_channels.params.sw_mtu, - mlx5e_xdp_max_mtu(&new_channels.params)); + mlx5e_xdp_max_mtu(&new_channels.params, NULL)); return -EINVAL; } return 0; } +static int mlx5e_xdp_update_state(struct mlx5e_priv *priv) +{ + if (priv->channels.params.xdp_prog) + mlx5e_xdp_set_open(priv); + else + mlx5e_xdp_set_closed(priv); + + return 0; +} + static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -4192,8 +4359,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* no need for full reset when exchanging programs */ reset = (!priv->channels.params.xdp_prog || !prog); - if (was_opened && reset) - mlx5e_close_locked(netdev); if (was_opened && !reset) { /* num_channels is invariant here, so we can take the * batched reference right upfront. @@ -4205,20 +4370,31 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) } } - /* exchange programs, extra prog reference we got from caller - * as long as we don't fail from this point onwards. - */ - old_prog = xchg(&priv->channels.params.xdp_prog, prog); + if (was_opened && reset) { + struct mlx5e_channels new_channels = {}; + + new_channels.params = priv->channels.params; + new_channels.params.xdp_prog = prog; + mlx5e_set_rq_type(priv->mdev, &new_channels.params); + old_prog = priv->channels.params.xdp_prog; + + err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state); + if (err) + goto unlock; + } else { + /* exchange programs, extra prog reference we got from caller + * as long as we don't fail from this point onwards. + */ + old_prog = xchg(&priv->channels.params.xdp_prog, prog); + } + if (old_prog) bpf_prog_put(old_prog); - if (reset) /* change RQ type according to priv->xdp_prog */ + if (!was_opened && reset) /* change RQ type according to priv->xdp_prog */ mlx5e_set_rq_type(priv->mdev, &priv->channels.params); - if (was_opened && reset) - err = mlx5e_open_locked(netdev); - - if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + if (!was_opened || reset) goto unlock; /* exchanging programs w/o reset, we update ref counts on behalf @@ -4226,19 +4402,29 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) */ for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); napi_synchronize(&c->napi); /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ old_prog = xchg(&c->rq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (xsk_open) { + old_prog = xchg(&c->xskrq.xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); + if (xsk_open) + set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* napi_schedule in case we have missed anything */ napi_schedule(&c->napi); - - if (old_prog) - bpf_prog_put(old_prog); } unlock: @@ -4269,6 +4455,9 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp) case XDP_QUERY_PROG: xdp->prog_id = mlx5e_xdp_query(dev); return 0; + case XDP_SETUP_XSK_UMEM: + return mlx5e_xsk_setup_umem(dev, xdp->xsk.umem, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -4351,6 +4540,7 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_tx_timeout = mlx5e_tx_timeout, .ndo_bpf = mlx5e_xdp, .ndo_xdp_xmit = mlx5e_xdp_xmit, + .ndo_xsk_async_xmit = mlx5e_xsk_async_xmit, #ifdef CONFIG_MLX5_EN_ARFS .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -4420,9 +4610,9 @@ static bool slow_pci_heuristic(struct mlx5_core_dev *mdev) link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw; } -static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; @@ -4433,9 +4623,9 @@ static struct net_dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode) return moder; } -static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) +static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) { - struct net_dim_cq_moder moder; + struct dim_cq_moder moder; moder.cq_period_mode = cq_period_mode; moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; @@ -4449,8 +4639,8 @@ static struct net_dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode) static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode) { return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ? - NET_DIM_CQ_PERIOD_MODE_START_FROM_CQE : - NET_DIM_CQ_PERIOD_MODE_START_FROM_EQE; + DIM_CQ_PERIOD_MODE_START_FROM_CQE : + DIM_CQ_PERIOD_MODE_START_FROM_EQE; } void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) @@ -4502,11 +4692,13 @@ void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, * - Striding RQ configuration is not possible/supported. * - Slow PCI heuristic. * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + * + * No XSK params: checking the availability of striding RQ in general. */ if (!slow_pci_heuristic(mdev) && mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) || - !mlx5e_rx_is_linear_skb(params))) + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) || + !mlx5e_rx_is_linear_skb(params, NULL))) MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); mlx5e_set_rq_type(mdev, params); mlx5e_init_rq_type_params(mdev, params); @@ -4528,6 +4720,7 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, } void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_xsk *xsk, struct mlx5e_rss_params *rss_params, struct mlx5e_params *params, u16 max_channels, u16 mtu) @@ -4563,9 +4756,11 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* HW LRO */ /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ - if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params)) + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + /* No XSK params: checking the availability of striding RQ in general. */ + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL)) params->lro_en = !slow_pci_heuristic(mdev); + } params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ @@ -4584,13 +4779,16 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, mlx5e_build_rss_params(rss_params, params->num_channels); params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev); + + /* AF_XDP */ + params->xsk = xsk; } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5_query_nic_vport_mac_address(priv->mdev, 0, netdev->dev_addr); + mlx5_query_mac_address(priv->mdev, netdev->dev_addr); if (is_zero_ether_addr(netdev->dev_addr) && !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) { eth_hw_addr_random(netdev); @@ -4619,14 +4817,18 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->ethtool_ops = &mlx5e_ethtool_ops; netdev->vlan_features |= NETIF_F_SG; - netdev->vlan_features |= NETIF_F_IP_CSUM; - netdev->vlan_features |= NETIF_F_IPV6_CSUM; + netdev->vlan_features |= NETIF_F_HW_CSUM; netdev->vlan_features |= NETIF_F_GRO; netdev->vlan_features |= NETIF_F_TSO; netdev->vlan_features |= NETIF_F_TSO6; netdev->vlan_features |= NETIF_F_RXCSUM; netdev->vlan_features |= NETIF_F_RXHASH; + netdev->mpls_features |= NETIF_F_SG; + netdev->mpls_features |= NETIF_F_HW_CSUM; + netdev->mpls_features |= NETIF_F_TSO; + netdev->mpls_features |= NETIF_F_TSO6; + netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_TX; netdev->hw_enc_features |= NETIF_F_HW_VLAN_CTAG_RX; @@ -4642,8 +4844,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { - netdev->hw_enc_features |= NETIF_F_IP_CSUM; - netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; + netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; @@ -4756,7 +4957,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (err) return err; - mlx5e_build_nic_params(mdev, rss, &priv->channels.params, + mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); @@ -4798,7 +4999,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -4806,14 +5007,22 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; + err = mlx5e_create_direct_rqts(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_direct_tirs; + + err = mlx5e_create_direct_tirs(priv, priv->xsk_tir); + if (unlikely(err)) + goto err_destroy_xsk_rqts; + err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_direct_tirs; + goto err_destroy_xsk_tirs; } err = mlx5e_tc_nic_init(priv); @@ -4824,12 +5033,16 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); +err_destroy_xsk_tirs: + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); +err_destroy_xsk_rqts: + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -4843,9 +5056,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->xsk_tir); + mlx5e_destroy_direct_rqts(priv, priv->xsk_tir); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -4927,6 +5142,11 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } +int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +{ + return mlx5e_refresh_tirs(priv, false); +} + static const struct mlx5e_profile mlx5e_nic_profile = { .init = mlx5e_nic_init, .cleanup = mlx5e_nic_cleanup, @@ -4936,6 +5156,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, + .update_rx = mlx5e_update_nic_rx, .update_stats = mlx5e_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, @@ -4995,7 +5216,7 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, - nch); + nch * MLX5E_NUM_RQ_GROUPS); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); return NULL; @@ -5133,7 +5354,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) #ifdef CONFIG_MLX5_ESWITCH if (MLX5_ESWITCH_MANAGER(mdev) && - mlx5_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { + mlx5_eswitch_mode(mdev->priv.eswitch) == MLX5_ESWITCH_OFFLOADS) { mlx5e_rep_register_vport_reps(mdev); return mdev; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2f406b161bcf..10ef90a7bddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -37,6 +37,7 @@ #include <net/act_api.h> #include <net/netevent.h> #include <net/arp.h> +#include <net/devlink.h> #include "eswitch.h" #include "en.h" @@ -128,7 +129,7 @@ static void mlx5e_rep_get_strings(struct net_device *dev, } } -static void mlx5e_vf_rep_update_hw_counters(struct mlx5e_priv *priv) +static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -166,17 +167,6 @@ static void mlx5e_uplink_rep_update_hw_counters(struct mlx5e_priv *priv) vport_stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); } -static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) -{ - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - - if (rep->vport == MLX5_VPORT_UPLINK) - mlx5e_uplink_rep_update_hw_counters(priv); - else - mlx5e_vf_rep_update_hw_counters(priv); -} - static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; @@ -203,7 +193,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); mlx5e_rep_update_sw_counters(priv); - mlx5e_rep_update_hw_counters(priv); + priv->profile->update_stats(priv); mutex_unlock(&priv->state_lock); for (i = 0; i < NUM_VPORT_REP_SW_COUNTERS; i++) @@ -363,7 +353,7 @@ static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev, return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings); } -static const struct ethtool_ops mlx5e_vf_rep_ethtool_ops = { +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, @@ -402,30 +392,19 @@ static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = { static int mlx5e_rep_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid) { - struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_upper = NULL; - struct mlx5e_priv *uplink_priv = NULL; - struct net_device *uplink_dev; - - if (esw->mode == SRIOV_NONE) - return -EOPNOTSUPP; + struct mlx5_eswitch *esw; + struct mlx5e_priv *priv; + u64 parent_id; - uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - if (uplink_dev) { - uplink_upper = netdev_master_upper_dev_get(uplink_dev); - uplink_priv = netdev_priv(uplink_dev); - } + priv = netdev_priv(dev); + esw = priv->mdev->priv.eswitch; - ppid->id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { - ether_addr_copy(ppid->id, uplink_upper->dev_addr); - } else { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; + if (esw->mode == MLX5_ESWITCH_NONE) + return -EOPNOTSUPP; - ether_addr_copy(ppid->id, rep->hw_id); - } + parent_id = mlx5_query_nic_system_image_guid(priv->mdev); + ppid->id_len = sizeof(parent_id); + memcpy(ppid->id, &parent_id, sizeof(parent_id)); return 0; } @@ -436,7 +415,7 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, struct mlx5e_rep_sq *rep_sq, *tmp; struct mlx5e_rep_priv *rpriv; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return; rpriv = mlx5e_rep_to_rep_priv(rep); @@ -457,7 +436,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, int err; int i; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0; rpriv = mlx5e_rep_to_rep_priv(rep); @@ -677,7 +656,7 @@ static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) static int mlx5e_rep_indr_offload(struct net_device *netdev, - struct tc_cls_flower_offload *flower, + struct flow_cls_offload *flower, struct mlx5e_rep_indr_block_priv *indr_priv) { struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev); @@ -685,13 +664,13 @@ mlx5e_rep_indr_offload(struct net_device *netdev, int err = 0; switch (flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: err = mlx5e_configure_flower(netdev, priv, flower, flags); break; - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: err = mlx5e_delete_flower(netdev, priv, flower, flags); break; - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: err = mlx5e_stats_flower(netdev, priv, flower, flags); break; default: @@ -714,23 +693,39 @@ static int mlx5e_rep_indr_setup_block_cb(enum tc_setup_type type, } } +static void mlx5e_rep_indr_tc_block_unbind(void *cb_priv) +{ + struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv; + + list_del(&indr_priv->list); + kfree(indr_priv); +} + +static LIST_HEAD(mlx5e_block_cb_list); + static int mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, struct mlx5e_rep_priv *rpriv, - struct tc_block_offload *f) + struct flow_block_offload *f) { struct mlx5e_rep_indr_block_priv *indr_priv; - int err = 0; + struct flow_block_cb *block_cb; - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + f->driver_block_list = &mlx5e_block_cb_list; + switch (f->command) { - case TC_BLOCK_BIND: + case FLOW_BLOCK_BIND: indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); if (indr_priv) return -EEXIST; + if (flow_block_cb_is_busy(mlx5e_rep_indr_setup_block_cb, + indr_priv, &mlx5e_block_cb_list)) + return -EBUSY; + indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL); if (!indr_priv) return -ENOMEM; @@ -740,26 +735,32 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, list_add(&indr_priv->list, &rpriv->uplink_priv.tc_indr_block_priv_list); - err = tcf_block_cb_register(f->block, - mlx5e_rep_indr_setup_block_cb, - indr_priv, indr_priv, f->extack); - if (err) { + block_cb = flow_block_cb_alloc(f->net, + mlx5e_rep_indr_setup_block_cb, + indr_priv, indr_priv, + mlx5e_rep_indr_tc_block_unbind); + if (IS_ERR(block_cb)) { list_del(&indr_priv->list); kfree(indr_priv); + return PTR_ERR(block_cb); } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list); - return err; - case TC_BLOCK_UNBIND: + return 0; + case FLOW_BLOCK_UNBIND: indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev); if (!indr_priv) return -ENOENT; - tcf_block_cb_unregister(f->block, - mlx5e_rep_indr_setup_block_cb, - indr_priv); - list_del(&indr_priv->list); - kfree(indr_priv); + block_cb = flow_block_cb_lookup(f, + mlx5e_rep_indr_setup_block_cb, + indr_priv); + if (!block_cb) + return -ENOENT; + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); return 0; default: return -EOPNOTSUPP; @@ -1101,7 +1102,7 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type); } -static int mlx5e_vf_rep_open(struct net_device *dev) +static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1124,7 +1125,7 @@ unlock: return err; } -static int mlx5e_vf_rep_close(struct net_device *dev) +static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1141,42 +1142,18 @@ static int mlx5e_vf_rep_close(struct net_device *dev) return ret; } -static int mlx5e_rep_get_phys_port_name(struct net_device *dev, - char *buf, size_t len) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; - unsigned int fn; - int ret; - - fn = PCI_FUNC(priv->mdev->pdev->devfn); - if (fn >= MLX5_MAX_PORTS) - return -EOPNOTSUPP; - - if (rep->vport == MLX5_VPORT_UPLINK) - ret = snprintf(buf, len, "p%d", fn); - else - ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); - - if (ret >= len) - return -EOPNOTSUPP; - - return 0; -} - static int mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *cls_flower, int flags) + struct flow_cls_offload *cls_flower, int flags) { switch (cls_flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return mlx5e_configure_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return mlx5e_delete_flower(priv->netdev, priv, cls_flower, flags); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return mlx5e_stats_flower(priv->netdev, priv, cls_flower, flags); default: @@ -1198,32 +1175,16 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, } } -static int mlx5e_rep_setup_tc_block(struct net_device *dev, - struct tc_block_offload *f) -{ - struct mlx5e_priv *priv = netdev_priv(dev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb, - priv, priv, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv); - return 0; - default: - return -EOPNOTSUPP; - } -} - static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { + struct mlx5e_priv *priv = netdev_priv(dev); + switch (type) { case TC_SETUP_BLOCK: - return mlx5e_rep_setup_tc_block(dev, type_data); + return flow_block_cb_setup_simple(type_data, NULL, + mlx5e_rep_setup_tc_cb, + priv, priv, true); default: return -EOPNOTSUPP; } @@ -1276,7 +1237,7 @@ static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev } static void -mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) +mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -1285,7 +1246,7 @@ mlx5e_vf_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } -static int mlx5e_vf_rep_change_mtu(struct net_device *netdev, int new_mtu) +static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) { return mlx5e_change_mtu(netdev, new_mtu, NULL); } @@ -1318,17 +1279,24 @@ static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan return 0; } -static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { - .ndo_open = mlx5e_vf_rep_open, - .ndo_stop = mlx5e_vf_rep_close, +static struct devlink_port *mlx5e_get_devlink_port(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + return &rpriv->dl_port; +} + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_rep_open, + .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, - .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, - .ndo_get_stats64 = mlx5e_vf_rep_get_stats, + .ndo_get_devlink_port = mlx5e_get_devlink_port, + .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, - .ndo_change_mtu = mlx5e_vf_rep_change_mtu, - .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, + .ndo_change_mtu = mlx5e_rep_change_mtu, }; static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { @@ -1336,8 +1304,8 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, .ndo_set_mac_address = mlx5e_uplink_rep_set_mac, - .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, .ndo_setup_tc = mlx5e_rep_setup_tc, + .ndo_get_devlink_port = mlx5e_get_devlink_port, .ndo_get_stats64 = mlx5e_get_stats, .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, @@ -1350,13 +1318,12 @@ static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, - .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, .ndo_set_features = mlx5e_set_features, }; bool mlx5e_eswitch_rep(struct net_device *netdev) { - if (netdev->netdev_ops == &mlx5e_netdev_ops_vf_rep || + if (netdev->netdev_ops == &mlx5e_netdev_ops_rep || netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep) return true; @@ -1412,16 +1379,16 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep; /* we want a persistent mac for the uplink rep */ - mlx5_query_nic_vport_mac_address(mdev, 0, netdev->dev_addr); + mlx5_query_mac_address(mdev, netdev->dev_addr); netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB if (MLX5_CAP_GEN(mdev, qos)) netdev->dcbnl_ops = &mlx5e_dcbnl_ops; #endif } else { - netdev->netdev_ops = &mlx5e_netdev_ops_vf_rep; + netdev->netdev_ops = &mlx5e_netdev_ops_rep; eth_hw_addr_random(netdev); - netdev->ethtool_ops = &mlx5e_vf_rep_ethtool_ops; + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; } netdev->watchdog_timeo = 15 * HZ; @@ -1530,7 +1497,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -1538,7 +1505,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -1555,11 +1522,11 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, false); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -1573,9 +1540,9 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5_del_flow_rules(rpriv->vport_rx_rule); mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, false); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); } @@ -1642,11 +1609,16 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv) } } -static void mlx5e_vf_rep_enable(struct mlx5e_priv *priv) +static void mlx5e_rep_enable(struct mlx5e_priv *priv) { mlx5e_set_netdev_mtu_boundaries(priv); } +static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) +{ + return 0; +} + static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); @@ -1714,15 +1686,16 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv) mlx5_lag_remove(mdev); } -static const struct mlx5e_profile mlx5e_vf_rep_profile = { +static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .cleanup = mlx5e_cleanup_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_rep_tx, - .enable = mlx5e_vf_rep_enable, - .update_stats = mlx5e_vf_rep_update_hw_counters, + .enable = mlx5e_rep_enable, + .update_rx = mlx5e_update_rep_rx, + .update_stats = mlx5e_rep_update_hw_counters, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = 1, @@ -1737,6 +1710,7 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .cleanup_tx = mlx5e_cleanup_rep_tx, .enable = mlx5e_uplink_rep_enable, .disable = mlx5e_uplink_rep_disable, + .update_rx = mlx5e_update_rep_rx, .update_stats = mlx5e_uplink_rep_update_hw_counters, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, @@ -1744,6 +1718,55 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { .max_tc = MLX5E_MAX_NUM_TC, }; +static bool +is_devlink_port_supported(const struct mlx5_core_dev *dev, + const struct mlx5e_rep_priv *rpriv) +{ + return rpriv->rep->vport == MLX5_VPORT_UPLINK || + rpriv->rep->vport == MLX5_VPORT_PF || + mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport); +} + +static int register_devlink_port(struct mlx5_core_dev *dev, + struct mlx5e_rep_priv *rpriv) +{ + struct devlink *devlink = priv_to_devlink(dev); + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct netdev_phys_item_id ppid = {}; + int ret; + + if (!is_devlink_port_supported(dev, rpriv)) + return 0; + + ret = mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid); + if (ret) + return ret; + + if (rep->vport == MLX5_VPORT_UPLINK) + devlink_port_attrs_set(&rpriv->dl_port, + DEVLINK_PORT_FLAVOUR_PHYSICAL, + PCI_FUNC(dev->pdev->devfn), false, 0, + &ppid.id[0], ppid.id_len); + else if (rep->vport == MLX5_VPORT_PF) + devlink_port_attrs_pci_pf_set(&rpriv->dl_port, + &ppid.id[0], ppid.id_len, + dev->pdev->devfn); + else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport)) + devlink_port_attrs_pci_vf_set(&rpriv->dl_port, + &ppid.id[0], ppid.id_len, + dev->pdev->devfn, + rep->vport - 1); + + return devlink_port_register(devlink, &rpriv->dl_port, rep->vport); +} + +static void unregister_devlink_port(struct mlx5_core_dev *dev, + struct mlx5e_rep_priv *rpriv) +{ + if (is_devlink_port_supported(dev, rpriv)) + devlink_port_unregister(&rpriv->dl_port); +} + /* e-Switch vport representors */ static int mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) @@ -1761,7 +1784,8 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) rpriv->rep = rep; nch = mlx5e_get_max_num_channels(dev); - profile = (rep->vport == MLX5_VPORT_UPLINK) ? &mlx5e_uplink_rep_profile : &mlx5e_vf_rep_profile; + profile = (rep->vport == MLX5_VPORT_UPLINK) ? + &mlx5e_uplink_rep_profile : &mlx5e_rep_profile; netdev = mlx5e_create_netdev(dev, profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", @@ -1771,7 +1795,7 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) } rpriv->netdev = netdev; - rep->rep_if[REP_ETH].priv = rpriv; + rep->rep_data[REP_ETH].priv = rpriv; INIT_LIST_HEAD(&rpriv->vport_sqs_list); if (rep->vport == MLX5_VPORT_UPLINK) { @@ -1794,15 +1818,27 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto err_detach_netdev; } + err = register_devlink_port(dev, rpriv); + if (err) { + esw_warn(dev, "Failed to register devlink port %d\n", + rep->vport); + goto err_neigh_cleanup; + } + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_neigh_cleanup; + goto err_devlink_cleanup; } + if (is_devlink_port_supported(dev, rpriv)) + devlink_port_type_eth_set(&rpriv->dl_port, netdev); return 0; +err_devlink_cleanup: + unregister_devlink_port(dev, rpriv); + err_neigh_cleanup: mlx5e_rep_neigh_cleanup(rpriv); @@ -1825,9 +1861,13 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; void *ppriv = priv->ppriv; + if (is_devlink_port_supported(dev, rpriv)) + devlink_port_type_clear(&rpriv->dl_port); unregister_netdev(netdev); + unregister_devlink_port(dev, rpriv); mlx5e_rep_neigh_cleanup(rpriv); mlx5e_detach_netdev(priv); if (rep->vport == MLX5_VPORT_UPLINK) @@ -1845,16 +1885,17 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static const struct mlx5_eswitch_rep_ops rep_ops = { + .load = mlx5e_vport_rep_load, + .unload = mlx5e_vport_rep_unload, + .get_proto_dev = mlx5e_vport_rep_get_proto_dev +}; + void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep_if rep_if = {}; - - rep_if.load = mlx5e_vport_rep_load; - rep_if.unload = mlx5e_vport_rep_unload; - rep_if.get_proto_dev = mlx5e_vport_rep_get_proto_dev; - mlx5_eswitch_register_vport_reps(esw, &rep_if, REP_ETH); + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_ETH); } void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 83b573b1abac..c56e6ee4350c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -86,12 +86,13 @@ struct mlx5e_rep_priv { struct mlx5_flow_handle *vport_rx_rule; struct list_head vport_sqs_list; struct mlx5_rep_uplink_priv uplink_priv; /* valid for uplink rep */ + struct devlink_port dl_port; }; static inline struct mlx5e_rep_priv *mlx5e_rep_to_rep_priv(struct mlx5_eswitch_rep *rep) { - return (struct mlx5e_rep_priv *)rep->rep_if[REP_ETH].priv; + return rep->rep_data[REP_ETH].priv; } struct mlx5e_neigh { @@ -150,13 +151,12 @@ struct mlx5e_encap_entry { struct hlist_node encap_hlist; struct list_head flows; u32 encap_id; - struct ip_tunnel_info tun_info; + const struct ip_tunnel_info *tun_info; unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ struct net_device *out_dev; struct net_device *route_dev; - int tunnel_type; - int tunnel_hlen; + struct mlx5e_tc_tunnel *tunnel; int reformat_type; u8 flags; char *encap_header; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 13133e7f088e..56a2f4666c47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -34,6 +34,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/tcp.h> +#include <linux/indirect_call_wrapper.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> #include <net/inet_ecn.h> @@ -46,6 +47,7 @@ #include "en_accel/tls_rxtx.h" #include "lib/clock.h" #include "en/xdp.h" +#include "en/xsk/rx.h" static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) { @@ -234,8 +236,8 @@ static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, return true; } -static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, - struct mlx5e_dma_info *dma_info) +static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) { if (mlx5e_rx_cache_get(rq, dma_info)) return 0; @@ -247,7 +249,7 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, PAGE_SIZE, rq->buff.map_dir); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { - put_page(dma_info->page); + page_pool_recycle_direct(rq->page_pool, dma_info->page); dma_info->page = NULL; return -ENOMEM; } @@ -255,13 +257,23 @@ static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, return 0; } +static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + if (rq->umem) + return mlx5e_xsk_page_alloc_umem(rq, dma_info); + else + return mlx5e_page_alloc_pool(rq, dma_info); +} + void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); } -void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, - bool recycle) +void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) { if (likely(recycle)) { if (mlx5e_rx_cache_put(rq, dma_info)) @@ -271,10 +283,25 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, page_pool_recycle_direct(rq->page_pool, dma_info->page); } else { mlx5e_page_dma_unmap(rq, dma_info); + page_pool_release_page(rq->page_pool, dma_info->page); put_page(dma_info->page); } } +static inline void mlx5e_page_release(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (rq->umem) + /* The `recycle` parameter is ignored, and the page is always + * put into the Reuse Ring, because there is no way to return + * the page to the userspace when the interface goes down. + */ + mlx5e_xsk_page_release(rq, dma_info); + else + mlx5e_page_release_dynamic(rq, dma_info, recycle); +} + static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *frag) { @@ -286,7 +313,7 @@ static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq, * offset) should just use the new one without replenishing again * by themselves. */ - err = mlx5e_page_alloc_mapped(rq, frag->di); + err = mlx5e_page_alloc(rq, frag->di); return err; } @@ -352,6 +379,13 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk) int err; int i; + if (rq->umem) { + int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags; + + if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired))) + return -ENOMEM; + } + for (i = 0; i < wqe_bulk; i++) { struct mlx5e_rx_wqe_cyc *wqe = mlx5_wq_cyc_get_wqe(wq, ix + i); @@ -399,11 +433,17 @@ mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb, static void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle) { - const bool no_xdp_xmit = - bitmap_empty(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE); + bool no_xdp_xmit; struct mlx5e_dma_info *dma_info = wi->umr.dma_info; int i; + /* A common case for AF_XDP. */ + if (bitmap_full(wi->xdp_xmit_bitmap, MLX5_MPWRQ_PAGES_PER_WQE)) + return; + + no_xdp_xmit = bitmap_empty(wi->xdp_xmit_bitmap, + MLX5_MPWRQ_PAGES_PER_WQE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) if (no_xdp_xmit || !test_bit(i, wi->xdp_xmit_bitmap)) mlx5e_page_release(rq, &dma_info[i], recycle); @@ -425,11 +465,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n) mlx5_wq_ll_update_db_record(wq); } -static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq) -{ - return mlx5_wq_cyc_get_ctr_wrap_cnt(&sq->wq, sq->pc); -} - static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, struct mlx5_wq_cyc *wq, u16 pi, u16 nnops) @@ -457,6 +492,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) int err; int i; + if (rq->umem && + unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) { + err = -ENOMEM; + goto err; + } + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { @@ -465,12 +506,10 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) } umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi); - if (unlikely(mlx5e_icosq_wrap_cnt(sq) < 2)) - memcpy(umr_wqe, &rq->mpwqe.umr_wqe, - offsetof(struct mlx5e_umr_wqe, inline_mtts)); + memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts)); for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { - err = mlx5e_page_alloc_mapped(rq, dma_info); + err = mlx5e_page_alloc(rq, dma_info); if (unlikely(err)) goto err_unmap; umr_wqe->inline_mtts[i].ptag = cpu_to_be64(dma_info->addr | MLX5_EN_WR); @@ -485,6 +524,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].umr.rq = rq; sq->pc += MLX5E_UMR_WQEBBS; sq->doorbell_cseg = &umr_wqe->ctrl; @@ -496,6 +536,8 @@ err_unmap: dma_info--; mlx5e_page_release(rq, dma_info, true); } + +err: rq->stats->buff_alloc_err++; return err; @@ -542,11 +584,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !!err; } -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); struct mlx5_cqe64 *cqe; - u8 completed_umr = 0; u16 sqcc; int i; @@ -587,7 +628,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) if (likely(wi->opcode == MLX5_OPCODE_UMR)) { sqcc += MLX5E_UMR_WQEBBS; - completed_umr++; + wi->umr.rq->mpwqe.umr_completed++; } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) { sqcc++; } else { @@ -603,24 +644,25 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) sq->cc = sqcc; mlx5_cqwq_update_db_record(&cq->wq); - - if (likely(completed_umr)) { - mlx5e_post_rx_mpwqe(rq, completed_umr); - rq->mpwqe.umr_in_progress -= completed_umr; - } } bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) { struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_ll *wq = &rq->mpwqe.wq; + u8 umr_completed = rq->mpwqe.umr_completed; + int alloc_err = 0; u8 missing, i; u16 head; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; - mlx5e_poll_ico_cq(&sq->cq, rq); + if (umr_completed) { + mlx5e_post_rx_mpwqe(rq, umr_completed); + rq->mpwqe.umr_in_progress -= umr_completed; + rq->mpwqe.umr_completed = 0; + } missing = mlx5_wq_ll_missing(wq) - rq->mpwqe.umr_in_progress; @@ -634,7 +676,9 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) head = rq->mpwqe.actual_wq_head; i = missing; do { - if (unlikely(mlx5e_alloc_rx_mpwqe(rq, head))) + alloc_err = mlx5e_alloc_rx_mpwqe(rq, head); + + if (unlikely(alloc_err)) break; head = mlx5_wq_ll_get_wqe_next_ix(wq, head); } while (--i); @@ -648,6 +692,12 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) rq->mpwqe.umr_in_progress += rq->mpwqe.umr_last_bulk; rq->mpwqe.actual_wq_head = head; + /* If XSK Fill Ring doesn't have enough frames, busy poll by + * rescheduling the NAPI poll. + */ + if (unlikely(alloc_err == -ENOMEM && rq->umem)) + return true; + return false; } @@ -1016,7 +1066,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, } rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false); rcu_read_unlock(); if (consumed) return NULL; /* page/packet was consumed by XDP */ @@ -1092,7 +1142,10 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (!skb) { /* probably for XDP */ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { @@ -1230,7 +1283,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, prefetch(data); rcu_read_lock(); - consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32); + consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false); rcu_read_unlock(); if (consumed) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) @@ -1279,8 +1332,10 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); - skb = rq->mpwqe.skb_from_cqe_mpwrq(rq, wi, cqe_bcnt, head_offset, - page_idx); + skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, + mlx5e_skb_from_cqe_mpwrq_linear, + mlx5e_skb_from_cqe_mpwrq_nonlinear, + rq, wi, cqe_bcnt, head_offset, page_idx); if (!skb) goto mpwrq_cqe_out; @@ -1327,7 +1382,8 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(cqwq); - rq->handle_rx_cqe(rq, cqe); + INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq, + mlx5e_handle_rx_cqe, rq, cqe); } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq))); out: @@ -1437,7 +1493,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (!skb) goto wq_free_wqe; @@ -1469,7 +1528,10 @@ void mlx5e_ipsec_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wi = get_frag(rq, ci); cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb = rq->wqe.skb_from_cqe(rq, cqe, wi, cqe_bcnt); + skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, + mlx5e_skb_from_cqe_linear, + mlx5e_skb_from_cqe_nonlinear, + rq, cqe, wi, cqe_bcnt); if (unlikely(!skb)) { /* a DROP, save the page-reuse checks */ mlx5e_free_rx_wqe(rq, wi, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 4382ef85488c..840ec945ccba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -64,7 +64,7 @@ static int mlx5e_test_health_info(struct mlx5e_priv *priv) { struct mlx5_core_health *health = &priv->mdev->priv.health; - return health->sick ? 1 : 0; + return health->fatal_error ? 1 : 0; } static int mlx5e_test_link_state(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 483d321d2151..539b4d3656da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -48,8 +48,15 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, #ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_resync_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_no_sync_data) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_drop_bypass_req) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) }, #endif { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, @@ -104,7 +111,33 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_aff_change) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_force_irq) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_eq_rearm) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_ecn_mark) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_removed_vlan_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_xdp_redirect) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_cqes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_mpwqe_filler_strides) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xsk_arfs_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_xmit) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_mpwqe) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_inlnw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xsk_cqes) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) @@ -144,6 +177,8 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) &priv->channel_stats[i]; struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq; struct mlx5e_xdpsq_stats *xdpsq_stats = &channel_stats->rq_xdpsq; + struct mlx5e_xdpsq_stats *xsksq_stats = &channel_stats->xsksq; + struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq; struct mlx5e_rq_stats *rq_stats = &channel_stats->rq; struct mlx5e_ch_stats *ch_stats = &channel_stats->ch; int j; @@ -186,6 +221,7 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->ch_poll += ch_stats->poll; s->ch_arm += ch_stats->arm; s->ch_aff_change += ch_stats->aff_change; + s->ch_force_irq += ch_stats->force_irq; s->ch_eq_rearm += ch_stats->eq_rearm; /* xdp redirect */ s->tx_xdp_xmit += xdpsq_red_stats->xmit; @@ -194,6 +230,32 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_xdp_full += xdpsq_red_stats->full; s->tx_xdp_err += xdpsq_red_stats->err; s->tx_xdp_cqes += xdpsq_red_stats->cqes; + /* AF_XDP zero-copy */ + s->rx_xsk_packets += xskrq_stats->packets; + s->rx_xsk_bytes += xskrq_stats->bytes; + s->rx_xsk_csum_complete += xskrq_stats->csum_complete; + s->rx_xsk_csum_unnecessary += xskrq_stats->csum_unnecessary; + s->rx_xsk_csum_unnecessary_inner += xskrq_stats->csum_unnecessary_inner; + s->rx_xsk_csum_none += xskrq_stats->csum_none; + s->rx_xsk_ecn_mark += xskrq_stats->ecn_mark; + s->rx_xsk_removed_vlan_packets += xskrq_stats->removed_vlan_packets; + s->rx_xsk_xdp_drop += xskrq_stats->xdp_drop; + s->rx_xsk_xdp_redirect += xskrq_stats->xdp_redirect; + s->rx_xsk_wqe_err += xskrq_stats->wqe_err; + s->rx_xsk_mpwqe_filler_cqes += xskrq_stats->mpwqe_filler_cqes; + s->rx_xsk_mpwqe_filler_strides += xskrq_stats->mpwqe_filler_strides; + s->rx_xsk_oversize_pkts_sw_drop += xskrq_stats->oversize_pkts_sw_drop; + s->rx_xsk_buff_alloc_err += xskrq_stats->buff_alloc_err; + s->rx_xsk_cqe_compress_blks += xskrq_stats->cqe_compress_blks; + s->rx_xsk_cqe_compress_pkts += xskrq_stats->cqe_compress_pkts; + s->rx_xsk_congst_umr += xskrq_stats->congst_umr; + s->rx_xsk_arfs_err += xskrq_stats->arfs_err; + s->tx_xsk_xmit += xsksq_stats->xmit; + s->tx_xsk_mpwqe += xsksq_stats->mpwqe; + s->tx_xsk_inlnw += xsksq_stats->inlnw; + s->tx_xsk_full += xsksq_stats->full; + s->tx_xsk_err += xsksq_stats->err; + s->tx_xsk_cqes += xsksq_stats->cqes; for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@ -216,8 +278,15 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->tx_csum_none += sq_stats->csum_none; s->tx_csum_partial += sq_stats->csum_partial; #ifdef CONFIG_MLX5_EN_TLS - s->tx_tls_ooo += sq_stats->tls_ooo; - s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets; + s->tx_tls_encrypted_bytes += sq_stats->tls_encrypted_bytes; + s->tx_tls_ctx += sq_stats->tls_ctx; + s->tx_tls_ooo += sq_stats->tls_ooo; + s->tx_tls_resync_bytes += sq_stats->tls_resync_bytes; + s->tx_tls_drop_no_sync_data += sq_stats->tls_drop_no_sync_data; + s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req; + s->tx_tls_dump_bytes += sq_stats->tls_dump_bytes; + s->tx_tls_dump_packets += sq_stats->tls_dump_packets; #endif s->tx_cqes += sq_stats->cqes; } @@ -1238,6 +1307,16 @@ static const struct counter_desc sq_stats_desc[] = { { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, +#ifdef CONFIG_MLX5_EN_TLS + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_no_sync_data) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_drop_bypass_req) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) }, +#endif { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, @@ -1266,11 +1345,43 @@ static const struct counter_desc xdpsq_stats_desc[] = { { MLX5E_DECLARE_XDPSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, }; +static const struct counter_desc xskrq_stats_desc[] = { + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, ecn_mark) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, xdp_redirect) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_XSKRQ_STAT(struct mlx5e_rq_stats, arfs_err) }, +}; + +static const struct counter_desc xsksq_stats_desc[] = { + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, xmit) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, mpwqe) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, inlnw) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, full) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, err) }, + { MLX5E_DECLARE_XSKSQ_STAT(struct mlx5e_xdpsq_stats, cqes) }, +}; + static const struct counter_desc ch_stats_desc[] = { { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, events) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, poll) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, arm) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, aff_change) }, + { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, force_irq) }, { MLX5E_DECLARE_CH_STAT(struct mlx5e_ch_stats, eq_rearm) }, }; @@ -1278,6 +1389,8 @@ static const struct counter_desc ch_stats_desc[] = { #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) #define NUM_XDPSQ_STATS ARRAY_SIZE(xdpsq_stats_desc) #define NUM_RQ_XDPSQ_STATS ARRAY_SIZE(rq_xdpsq_stats_desc) +#define NUM_XSKRQ_STATS ARRAY_SIZE(xskrq_stats_desc) +#define NUM_XSKSQ_STATS ARRAY_SIZE(xsksq_stats_desc) #define NUM_CH_STATS ARRAY_SIZE(ch_stats_desc) static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) @@ -1288,13 +1401,16 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) (NUM_CH_STATS * max_nch) + (NUM_SQ_STATS * max_nch * priv->max_opened_tc) + (NUM_RQ_XDPSQ_STATS * max_nch) + - (NUM_XDPSQ_STATS * max_nch); + (NUM_XDPSQ_STATS * max_nch) + + (NUM_XSKRQ_STATS * max_nch * priv->xsk.ever_used) + + (NUM_XSKSQ_STATS * max_nch * priv->xsk.ever_used); } static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1306,6 +1422,9 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xskrq_stats_desc[j].format, i); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_xdpsq_stats_desc[j].format, i); @@ -1318,10 +1437,14 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, sq_stats_desc[j].format, priv->channel_tc2txq[i][tc]); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + xsksq_stats_desc[j].format, i); for (j = 0; j < NUM_XDPSQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, xdpsq_stats_desc[j].format, i); + } return idx; } @@ -1330,6 +1453,7 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + bool is_xsk = priv->xsk.ever_used; int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1343,6 +1467,10 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq, rq_stats_desc, j); + for (j = 0; j < NUM_XSKRQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xskrq, + xskrq_stats_desc, j); for (j = 0; j < NUM_RQ_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].rq_xdpsq, @@ -1356,11 +1484,16 @@ static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].sq[tc], sq_stats_desc, j); - for (i = 0; i < max_nch; i++) + for (i = 0; i < max_nch; i++) { + for (j = 0; j < NUM_XSKSQ_STATS * is_xsk; j++) + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xsksq, + xsksq_stats_desc, j); for (j = 0; j < NUM_XDPSQ_STATS; j++) data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel_stats[i].xdpsq, xdpsq_stats_desc, j); + } return idx; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index cdddcc46971b..76ac111e14d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -46,6 +46,8 @@ #define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_XDPSQ_STAT(type, fld) "tx%d_xdp_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_RQ_XDPSQ_STAT(type, fld) "rx%d_xdp_tx_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKRQ_STAT(type, fld) "rx%d_xsk_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_XSKSQ_STAT(type, fld) "tx%d_xsk_"#fld, offsetof(type, fld) #define MLX5E_DECLARE_CH_STAT(type, fld) "ch%d_"#fld, offsetof(type, fld) struct counter_desc { @@ -116,12 +118,46 @@ struct mlx5e_sw_stats { u64 ch_poll; u64 ch_arm; u64 ch_aff_change; + u64 ch_force_irq; u64 ch_eq_rearm; #ifdef CONFIG_MLX5_EN_TLS + u64 tx_tls_encrypted_packets; + u64 tx_tls_encrypted_bytes; + u64 tx_tls_ctx; u64 tx_tls_ooo; u64 tx_tls_resync_bytes; + u64 tx_tls_drop_no_sync_data; + u64 tx_tls_drop_bypass_req; + u64 tx_tls_dump_packets; + u64 tx_tls_dump_bytes; #endif + + u64 rx_xsk_packets; + u64 rx_xsk_bytes; + u64 rx_xsk_csum_complete; + u64 rx_xsk_csum_unnecessary; + u64 rx_xsk_csum_unnecessary_inner; + u64 rx_xsk_csum_none; + u64 rx_xsk_ecn_mark; + u64 rx_xsk_removed_vlan_packets; + u64 rx_xsk_xdp_drop; + u64 rx_xsk_xdp_redirect; + u64 rx_xsk_wqe_err; + u64 rx_xsk_mpwqe_filler_cqes; + u64 rx_xsk_mpwqe_filler_strides; + u64 rx_xsk_oversize_pkts_sw_drop; + u64 rx_xsk_buff_alloc_err; + u64 rx_xsk_cqe_compress_blks; + u64 rx_xsk_cqe_compress_pkts; + u64 rx_xsk_congst_umr; + u64 rx_xsk_arfs_err; + u64 tx_xsk_xmit; + u64 tx_xsk_mpwqe; + u64 tx_xsk_inlnw; + u64 tx_xsk_full; + u64 tx_xsk_err; + u64 tx_xsk_cqes; }; struct mlx5e_qcounter_stats { @@ -227,8 +263,15 @@ struct mlx5e_sq_stats { u64 added_vlan_packets; u64 nop; #ifdef CONFIG_MLX5_EN_TLS + u64 tls_encrypted_packets; + u64 tls_encrypted_bytes; + u64 tls_ctx; u64 tls_ooo; u64 tls_resync_bytes; + u64 tls_drop_no_sync_data; + u64 tls_drop_bypass_req; + u64 tls_dump_packets; + u64 tls_dump_bytes; #endif /* less likely accessed in data path */ u64 csum_none; @@ -256,6 +299,7 @@ struct mlx5e_ch_stats { u64 poll; u64 arm; u64 aff_change; + u64 force_irq; u64 eq_rearm; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e40c60d1631f..2d6436257f9d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -53,6 +53,7 @@ #include "en/port.h" #include "en/tc_tun.h" #include "lib/devcom.h" +#include "lib/geneve.h" struct mlx5_nic_flow_attr { u32 action; @@ -126,7 +127,7 @@ struct mlx5e_tc_flow { }; struct mlx5e_tc_flow_parse_attr { - struct ip_tunnel_info tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; + const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS]; struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; @@ -716,19 +717,22 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context; struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .flow_tag = attr->flow_tag, .reformat_id = 0, - .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, + .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; bool table_created = false; int err, dest_ix = 0; + flow_context->flags |= FLOW_CONTEXT_HAS_TAG; + flow_context->flow_tag = attr->flow_tag; + if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { @@ -799,7 +803,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->match_level != MLX5_MATCH_NONE) - parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); @@ -1063,6 +1067,19 @@ err_max_prio_chain: return err; } +static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec; + void *headers_v = MLX5_ADDR_OF(fte_match_param, + spec->match_value, + misc_parameters_3); + u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3, + headers_v, + geneve_tlv_option_0_data); + + return !!geneve_tlv_opt_0_data; +} + static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -1084,6 +1101,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } + if (mlx5_flow_has_geneve_opt(flow)) + mlx5_geneve_tlv_option_del(priv->mdev->geneve); + mlx5_eswitch_del_vlan_action(esw, attr); for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) @@ -1330,7 +1350,7 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; @@ -1338,8 +1358,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); - struct flow_match_control enc_control; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); int err; err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, @@ -1350,9 +1369,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow_rule_match_enc_control(rule, &enc_control); - - if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { struct flow_match_ipv4_addrs match; flow_rule_match_enc_ipv4_addrs(rule, &match); @@ -1372,7 +1389,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { struct flow_match_ipv6_addrs match; flow_rule_match_enc_ipv6_addrs(rule, &match); @@ -1461,7 +1478,7 @@ static void *get_match_headers_value(u32 flags, static int __parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct net_device *filter_dev, u8 *match_level, u8 *tunnel_match_level) { @@ -1474,7 +1491,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; @@ -1497,29 +1514,21 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | - BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", dissector->used_keys); return -EOPNOTSUPP; } - if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_match_control match; - - flow_rule_match_enc_control(rule, &match); - switch (match.key->addr_type) { - case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) - return -EOPNOTSUPP; - break; - default: + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) { + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; - } /* In decap flow, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr @@ -1822,7 +1831,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5_flow_spec *spec, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct net_device *filter_dev) { struct netlink_ext_ack *extack = f->common.extack; @@ -2581,21 +2590,21 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, } struct encap_key { - struct ip_tunnel_key *ip_tun_key; - int tunnel_type; + const struct ip_tunnel_key *ip_tun_key; + struct mlx5e_tc_tunnel *tc_tunnel; }; static inline int cmp_encap_info(struct encap_key *a, struct encap_key *b) { return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) || - a->tunnel_type != b->tunnel_type; + a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), - key->tunnel_type); + key->tc_tunnel->tunnel_type); } @@ -2625,7 +2634,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct ip_tunnel_info *tun_info; + const struct ip_tunnel_info *tun_info; struct encap_key key, e_key; struct mlx5e_encap_entry *e; unsigned short family; @@ -2634,17 +2643,17 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, int err = 0; parse_attr = attr->parse_attr; - tun_info = &parse_attr->tun_info[out_index]; + tun_info = parse_attr->tun_info[out_index]; family = ip_tunnel_info_af(tun_info); key.ip_tun_key = &tun_info->key; - key.tunnel_type = mlx5e_tc_tun_get_type(mirred_dev); + key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev); hash_key = hash_encap_info(&key); hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info.key; - e_key.tunnel_type = e->tunnel_type; + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; if (!cmp_encap_info(&e_key, &key)) { found = true; break; @@ -2659,7 +2668,7 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, if (!e) return -ENOMEM; - e->tun_info = *tun_info; + e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); if (err) goto out_err; @@ -2793,6 +2802,16 @@ static int add_vlan_pop_action(struct mlx5e_priv *priv, return err; } +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev) +{ + if (is_merged_eswitch_dev(priv, out_dev)) + return true; + + return mlx5e_eswitch_rep(out_dev) && + same_hw_devs(priv, netdev_priv(out_dev)); +} + static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, @@ -2858,9 +2877,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (netdev_port_same_parent_id(priv->netdev, - out_dev) || - is_merged_eswitch_dev(priv, out_dev)) { + if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); @@ -2877,6 +2894,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, if (err) return err; } + if (is_vlan_dev(parse_attr->filter_dev)) { err = add_vlan_pop_action(priv, attr, &action); @@ -2884,8 +2902,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, return err; } - if (!mlx5e_eswitch_rep(out_dev)) + if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); return -EOPNOTSUPP; + } out_priv = netdev_priv(out_dev); rpriv = out_priv->ppriv; @@ -2895,7 +2918,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } else if (encap) { parse_attr->mirred_ifindex[attr->out_count] = out_dev->ifindex; - parse_attr->tun_info[attr->out_count] = *info; + parse_attr->tun_info[attr->out_count] = info; encap = false; attr->dests[attr->out_count].flags |= MLX5_ESW_DEST_ENCAP; @@ -3092,7 +3115,7 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, - struct tc_cls_flower_offload *f, u16 flow_flags, + struct flow_cls_offload *f, u16 flow_flags, struct mlx5e_tc_flow_parse_attr **__parse_attr, struct mlx5e_tc_flow **__flow) { @@ -3126,7 +3149,7 @@ static void mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) { @@ -3148,13 +3171,13 @@ mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr, static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -3198,7 +3221,7 @@ out: return ERR_PTR(err); } -static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, +static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, u16 flow_flags) { @@ -3250,7 +3273,7 @@ out: static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) @@ -3284,12 +3307,12 @@ out: static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -3335,7 +3358,7 @@ out: static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, int flags, struct net_device *filter_dev, struct mlx5e_tc_flow **flow) @@ -3349,7 +3372,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, if (!tc_can_offload_extack(priv->netdev, f->common.extack)) return -EOPNOTSUPP; - if (esw && esw->mode == SRIOV_OFFLOADS) + if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS) err = mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, flow); else @@ -3360,7 +3383,7 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv, } int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) + struct flow_cls_offload *f, int flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@ -3407,7 +3430,7 @@ static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) } int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) + struct flow_cls_offload *f, int flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; @@ -3426,7 +3449,7 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, } int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) + struct flow_cls_offload *f, int flags) { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index f62e81902d27..3ab39275ca7d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -54,12 +54,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht); void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht); int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags); + struct flow_cls_offload *f, int flags); int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags); + struct flow_cls_offload *f, int flags); int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags); + struct flow_cls_offload *f, int flags); struct mlx5e_encap_entry; void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -74,6 +74,9 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); +bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, + struct net_device *out_dev); + #else /* CONFIG_MLX5_ESWITCH */ static inline int mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; } static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 701e5dc75bb0..600e92cb629a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -35,55 +35,12 @@ #include <net/geneve.h> #include <net/dsfield.h> #include "en.h" +#include "en/txrx.h" #include "ipoib/ipoib.h" #include "en_accel/en_accel.h" +#include "en_accel/ktls.h" #include "lib/clock.h" -#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS - -#ifndef CONFIG_MLX5_EN_TLS -#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) -#else -/* TLS offload requires MLX5E_SQ_STOP_ROOM to have - * enough room for a resync SKB, a normal SKB and a NOP - */ -#define MLX5E_SQ_STOP_ROOM (2 * MLX5_SEND_WQE_MAX_WQEBBS +\ - MLX5E_SQ_NOPS_ROOM) -#endif - -static inline void mlx5e_tx_dma_unmap(struct device *pdev, - struct mlx5e_sq_dma *dma) -{ - switch (dma->type) { - case MLX5E_DMA_MAP_SINGLE: - dma_unmap_single(pdev, dma->addr, dma->size, DMA_TO_DEVICE); - break; - case MLX5E_DMA_MAP_PAGE: - dma_unmap_page(pdev, dma->addr, dma->size, DMA_TO_DEVICE); - break; - default: - WARN_ONCE(true, "mlx5e_tx_dma_unmap unknown DMA type!\n"); - } -} - -static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) -{ - return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; -} - -static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq, - dma_addr_t addr, - u32 size, - enum mlx5e_dma_map_type map_type) -{ - struct mlx5e_sq_dma *dma = mlx5e_dma_get(sq, sq->dma_fifo_pc++); - - dma->addr = addr; - dma->size = size; - dma->type = map_type; -} - static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { int i; @@ -277,23 +234,6 @@ dma_unmap_wqe_err: return -ENOMEM; } -static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, - struct mlx5_wq_cyc *wq, - u16 pi, u16 nnops) -{ - struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; - - edge_wi = wi + nnops; - - /* fill sq frag edge with nops to avoid wqe wrapping two pages */ - for (; wi < edge_wi; wi++) { - wi->skb = NULL; - wi->num_wqebbs = 1; - mlx5e_post_nop(wq, sq->sqn, &sq->pc); - } - sq->stats->nop += nnops; -} - static inline void mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, u8 opcode, u16 ds_cnt, u8 num_wqebbs, u32 num_bytes, u8 num_dma, @@ -301,6 +241,7 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) { struct mlx5_wq_cyc *wq = &sq->wq; + bool send_doorbell; wi->num_bytes = num_bytes; wi->num_dma = num_dma; @@ -310,23 +251,21 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - netdev_tx_sent_queue(sq->txq, num_bytes); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; sq->pc += wi->num_wqebbs; - if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room))) { netif_tx_stop_queue(sq->txq); sq->stats->stopped++; } - if (!xmit_more || netif_xmit_stopped(sq->txq)) + send_doorbell = __netdev_tx_sent_queue(sq->txq, num_bytes, + xmit_more); + if (send_doorbell) mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); } -#define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start)) - netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more) { @@ -353,9 +292,12 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; stats->packets += skb_shinfo(skb)->gso_segs; } else { + u8 mode = mlx5e_transport_inline_tx_wqe(wqe) ? + MLX5_INLINE_MODE_TCP_UDP : sq->min_inline_mode; + opcode = MLX5_OPCODE_SEND; mss = 0; - ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + ihs = mlx5e_calc_min_inline(mode, skb); num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); stats->packets++; } @@ -380,11 +322,17 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, #ifdef CONFIG_MLX5_EN_IPSEC struct mlx5_wqe_eth_seg cur_eth = wqe->eth; #endif +#ifdef CONFIG_MLX5_EN_TLS + struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl; +#endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); - mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); #ifdef CONFIG_MLX5_EN_IPSEC wqe->eth = cur_eth; #endif +#ifdef CONFIG_MLX5_EN_TLS + wqe->ctrl = cur_ctrl; +#endif } /* fill wqe */ @@ -443,7 +391,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) u16 pi; sq = priv->txq2sq[skb_get_queue_mapping(skb)]; - mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi); /* might send skbs and update wqe and pi */ skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi); @@ -531,8 +479,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wi = &sq->db.wqe_info[ci]; skb = wi->skb; - if (unlikely(!skb)) { /* nop */ - sqcc++; + if (unlikely(!skb)) { +#ifdef CONFIG_MLX5_EN_TLS + if (wi->resync_dump_frag) { + struct mlx5e_sq_dma *dma = + mlx5e_dma_get(sq, dma_fifo_cc++); + + mlx5e_ktls_tx_handle_resync_dump_comp(sq, wi, dma); + } +#endif + sqcc += wi->num_wqebbs; continue; } @@ -574,8 +530,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, - MLX5E_SQ_STOP_ROOM) && + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room) && !test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) { netif_tx_wake_queue(sq->txq); stats->wake++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index f9862bf75491..c50b6f0769c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -33,6 +33,7 @@ #include <linux/irq.h> #include "en.h" #include "en/xdp.h" +#include "en/xsk/tx.h" static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { @@ -48,26 +49,24 @@ static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) static void mlx5e_handle_tx_dim(struct mlx5e_txqsq *sq) { struct mlx5e_sq_stats *stats = sq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_SQ_STATE_AM, &sq->state))) return; - net_dim_sample(sq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(sq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&sq->dim, dim_sample); } static void mlx5e_handle_rx_dim(struct mlx5e_rq *rq) { struct mlx5e_rq_stats *stats = rq->stats; - struct net_dim_sample dim_sample; + struct dim_sample dim_sample; if (unlikely(!test_bit(MLX5E_RQ_STATE_AM, &rq->state))) return; - net_dim_sample(rq->cq.event_ctr, stats->packets, stats->bytes, - &dim_sample); + dim_update_sample(rq->cq.event_ctr, stats->packets, stats->bytes, &dim_sample); net_dim(&rq->dim, dim_sample); } @@ -87,7 +86,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, napi); struct mlx5e_ch_stats *ch_stats = c->stats; + struct mlx5e_xdpsq *xsksq = &c->xsksq; + struct mlx5e_rq *xskrq = &c->xskrq; struct mlx5e_rq *rq = &c->rq; + bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); + bool aff_change = false; + bool busy_xsk = false; bool busy = false; int work_done = 0; int i; @@ -97,22 +101,38 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); - busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq, NULL); + busy |= mlx5e_poll_xdpsq_cq(&c->xdpsq.cq); if (c->xdp) - busy |= mlx5e_poll_xdpsq_cq(&rq->xdpsq.cq, rq); + busy |= mlx5e_poll_xdpsq_cq(&c->rq_xdpsq.cq); if (likely(budget)) { /* budget=0 means: don't poll rx rings */ - work_done = mlx5e_poll_rx_cq(&rq->cq, budget); + if (xsk_open) + work_done = mlx5e_poll_rx_cq(&xskrq->cq, budget); + + if (likely(budget - work_done)) + work_done += mlx5e_poll_rx_cq(&rq->cq, budget - work_done); + busy |= work_done == budget; } - busy |= c->rq.post_wqes(rq); + mlx5e_poll_ico_cq(&c->icosq.cq); + + busy |= rq->post_wqes(rq); + if (xsk_open) { + mlx5e_poll_ico_cq(&c->xskicosq.cq); + busy |= mlx5e_poll_xdpsq_cq(&xsksq->cq); + busy_xsk |= mlx5e_xsk_tx(xsksq, MLX5E_TX_XSK_POLL_BUDGET); + busy_xsk |= xskrq->post_wqes(xskrq); + } + + busy |= busy_xsk; if (busy) { if (likely(mlx5e_channel_no_affinity_change(c))) return budget; ch_stats->aff_change++; + aff_change = true; if (budget && work_done == budget) work_done--; } @@ -133,10 +153,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) mlx5e_cq_arm(&c->icosq.cq); mlx5e_cq_arm(&c->xdpsq.cq); + if (xsk_open) { + mlx5e_handle_rx_dim(xskrq); + mlx5e_cq_arm(&c->xskicosq.cq); + mlx5e_cq_arm(&xsksq->cq); + mlx5e_cq_arm(&xskrq->cq); + } + + if (unlikely(aff_change && busy_xsk)) { + mlx5e_trigger_irq(&c->icosq); + ch_stats->force_irq++; + } + return work_done; } -void mlx5e_completion_event(struct mlx5_core_cq *mcq) +void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 23883d1fa22f..41f25ea2e8d9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -61,17 +61,21 @@ enum { MLX5_EQ_DOORBEL_OFFSET = 0x40, }; -struct mlx5_irq_info { - cpumask_var_t mask; - char name[MLX5_MAX_IRQ_NAME]; - void *context; /* dev_id provided to request_irq */ +/* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update + * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is + * used to set the EQ size, budget must be smaller than the EQ size. + */ +enum { + MLX5_EQ_POLLING_BUDGET = 128, }; +static_assert(MLX5_EQ_POLLING_BUDGET <= MLX5_NUM_SPARE_EQE); + struct mlx5_eq_table { struct list_head comp_eqs_list; - struct mlx5_eq pages_eq; - struct mlx5_eq cmd_eq; - struct mlx5_eq async_eq; + struct mlx5_eq_async pages_eq; + struct mlx5_eq_async cmd_eq; + struct mlx5_eq_async async_eq; struct atomic_notifier_head nh[MLX5_EVENT_TYPE_MAX]; @@ -79,11 +83,8 @@ struct mlx5_eq_table { struct mlx5_nb cq_err_nb; struct mutex lock; /* sync async eqs creations */ - int num_comp_vectors; - struct mlx5_irq_info *irq_info; -#ifdef CONFIG_RFS_ACCEL - struct cpu_rmap *rmap; -#endif + int num_comp_eqs; + struct mlx5_irq_table *irq_table; }; #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@ -124,16 +125,24 @@ static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn) return cq; } -static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) +static int mlx5_eq_comp_int(struct notifier_block *nb, + __always_unused unsigned long action, + __always_unused void *data) { - struct mlx5_eq_comp *eq_comp = eq_ptr; - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_comp *eq_comp = + container_of(nb, struct mlx5_eq_comp, irq_nb); + struct mlx5_eq *eq = &eq_comp->core; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; u32 cqn = -1; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { struct mlx5_core_cq *cq; + /* Make sure we read EQ entry contents after we've * checked the ownership bit. */ @@ -144,33 +153,23 @@ static irqreturn_t mlx5_eq_comp_int(int irq, void *eq_ptr) cq = mlx5_eq_cq_get(eq, cqn); if (likely(cq)) { ++cq->arm_sn; - cq->comp(cq); + cq->comp(cq, eqe); mlx5_cq_put(cq); } else { mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn); } ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); if (cqn != -1) tasklet_schedule(&eq_comp->tasklet_ctx.task); - return IRQ_HANDLED; + return 0; } /* Some architectures don't latch interrupts when they are disabled, so using @@ -184,25 +183,32 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) disable_irq(eq->core.irqn); count_eqe = eq->core.cons_index; - mlx5_eq_comp_int(eq->core.irqn, eq); + mlx5_eq_comp_int(&eq->irq_nb, 0, NULL); count_eqe = eq->core.cons_index - count_eqe; enable_irq(eq->core.irqn); return count_eqe; } -static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) +static int mlx5_eq_async_int(struct notifier_block *nb, + unsigned long action, void *data) { - struct mlx5_eq *eq = eq_ptr; + struct mlx5_eq_async *eq_async = + container_of(nb, struct mlx5_eq_async, irq_nb); + struct mlx5_eq *eq = &eq_async->core; struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; - int set_ci = 0; + int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; - while ((eqe = next_eqe_sw(eq))) { + eqe = next_eqe_sw(eq); + if (!eqe) + goto out; + + do { /* * Make sure we read EQ entry contents after we've * checked the ownership bit. @@ -217,23 +223,13 @@ static irqreturn_t mlx5_eq_async_int(int irq, void *eq_ptr) atomic_notifier_call_chain(&eqt->nh[MLX5_EVENT_TYPE_NOTIFY_ANY], eqe->type, eqe); ++eq->cons_index; - ++set_ci; - /* The HCA will think the queue has overflowed if we - * don't tell it we've been processing events. We - * create our EQs with MLX5_NUM_SPARE_EQE extra - * entries, so we must update our consumer index at - * least that often. - */ - if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) { - eq_update_ci(eq, 0); - set_ci = 0; - } - } + } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); +out: eq_update_ci(eq, 1); - return IRQ_HANDLED; + return 0; } static void init_eq_buf(struct mlx5_eq *eq) @@ -248,22 +244,19 @@ static void init_eq_buf(struct mlx5_eq *eq) } static int -create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, +create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; struct mlx5_cq_table *cq_table = &eq->cq_table; u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->index; + u8 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; u32 *in; int err; - - if (eq_table->irq_info[vecidx].context) - return -EEXIST; + int i; /* Init CQ table */ memset(cq_table, 0, sizeof(*cq_table)); @@ -291,10 +284,12 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, mlx5_fill_page_array(&eq->buf, pas); MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); - if (!param->mask && MLX5_CAP_GEN(dev, log_max_uctx)) + if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx)) MLX5_SET(create_eq_in, in, uid, MLX5_SHARED_RESOURCE_UID); - MLX5_SET64(create_eq_in, in, event_bitmask, param->mask); + for (i = 0; i < 4; i++) + MLX5_ARRAY_SET64(create_eq_in, in, event_bitmask, i, + param->mask[i]); eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); @@ -307,34 +302,19 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, const char *name, if (err) goto err_in; - snprintf(eq_table->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", - name, pci_name(dev->pdev)); - eq_table->irq_info[vecidx].context = param->context; - eq->vecidx = vecidx; eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; - err = request_irq(eq->irqn, param->handler, 0, - eq_table->irq_info[vecidx].name, param->context); - if (err) - goto err_eq; err = mlx5_debug_eq_add(dev, eq); if (err) - goto err_irq; - - /* EQs are created in ARMED state - */ - eq_update_ci(eq, 1); + goto err_eq; kvfree(in); return 0; -err_irq: - free_irq(eq->irqn, eq); - err_eq: mlx5_cmd_destroy_eq(dev, eq->eqn); @@ -346,18 +326,48 @@ err_buf: return err; } -static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +/** + * mlx5_eq_enable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to enable + * @nb - notifier call block + * mlx5_eq_enable - must be called after EQ is created in device. + */ +int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; - struct mlx5_irq_info *irq_info; int err; - irq_info = &eq_table->irq_info[eq->vecidx]; + err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + if (!err) + eq_update_ci(eq, 1); - mlx5_debug_eq_remove(dev, eq); + return err; +} +EXPORT_SYMBOL(mlx5_eq_enable); + +/** + * mlx5_eq_disable - Enable EQ for receiving EQEs + * @dev - Device which owns the eq + * @eq - EQ to disable + * @nb - notifier call block + * mlx5_eq_disable - must be called before EQ is destroyed. + */ +void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, + struct notifier_block *nb) +{ + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); +} +EXPORT_SYMBOL(mlx5_eq_disable); + +static int destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq) +{ + int err; - free_irq(eq->irqn, irq_info->context); - irq_info->context = NULL; + mlx5_debug_eq_remove(dev, eq); err = mlx5_cmd_destroy_eq(dev, eq->eqn); if (err) @@ -382,7 +392,7 @@ int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) return err; } -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) { struct mlx5_cq_table *table = &eq->cq_table; struct mlx5_core_cq *tmp; @@ -392,16 +402,14 @@ int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq) spin_unlock(&table->lock); if (!tmp) { - mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn); - return -ENOENT; - } - - if (tmp != cq) { - mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn); - return -EINVAL; + mlx5_core_dbg(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", + eq->eqn, cq->cqn); + return; } - return 0; + if (tmp != cq) + mlx5_core_dbg(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", + eq->eqn, cq->cqn); } int mlx5_eq_table_init(struct mlx5_core_dev *dev) @@ -423,6 +431,7 @@ int mlx5_eq_table_init(struct mlx5_core_dev *dev) for (i = 0; i < MLX5_EVENT_TYPE_MAX; i++) ATOMIC_INIT_NOTIFIER_HEAD(&eq_table->nh[i]); + eq_table->irq_table = dev->priv.irq_table; return 0; kvfree_eq_table: @@ -439,19 +448,20 @@ void mlx5_eq_table_cleanup(struct mlx5_core_dev *dev) /* Async EQs */ -static int create_async_eq(struct mlx5_core_dev *dev, const char *name, +static int create_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct mlx5_eq_param *param) { struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; mutex_lock(&eq_table->lock); - if (param->index >= MLX5_EQ_MAX_ASYNC_EQS) { - err = -ENOSPC; + /* Async EQs must share irq index 0 */ + if (param->irq_index != 0) { + err = -EINVAL; goto unlock; } - err = create_map_eq(dev, eq, name, param); + err = create_map_eq(dev, eq, param); unlock: mutex_unlock(&eq_table->lock); return err; @@ -480,7 +490,7 @@ static int cq_err_event_notifier(struct notifier_block *nb, /* type == MLX5_EVENT_TYPE_CQ_ERROR */ eqt = mlx5_nb_cof(nb, struct mlx5_eq_table, cq_err_nb); - eq = &eqt->async_eq; + eq = &eqt->async_eq.core; eqe = data; cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff; @@ -493,14 +503,31 @@ static int cq_err_event_notifier(struct notifier_block *nb, return NOTIFY_OK; } - cq->event(cq, type); + if (cq->event) + cq->event(cq, type); mlx5_cq_put(cq); return NOTIFY_OK; } -static u64 gather_async_events_mask(struct mlx5_core_dev *dev) +static void gather_user_async_events(struct mlx5_core_dev *dev, u64 mask[4]) +{ + __be64 *user_unaffiliated_events; + __be64 *user_affiliated_events; + int i; + + user_affiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_affiliated_events); + user_unaffiliated_events = + MLX5_CAP_DEV_EVENT(dev, user_unaffiliated_events); + + for (i = 0; i < 4; i++) + mask[i] |= be64_to_cpu(user_affiliated_events[i] | + user_unaffiliated_events[i]); +} + +static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4]) { u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; @@ -533,10 +560,14 @@ static u64 gather_async_events_mask(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, max_num_of_monitor_counters)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_MONITOR_COUNTER); - if (mlx5_core_is_ecpf_esw_manager(dev)) - async_event_mask |= (1ull << MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE); + if (mlx5_eswitch_is_funcs_handler(dev)) + async_event_mask |= + (1ull << MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED); - return async_event_mask; + mask[0] = async_event_mask; + + if (MLX5_CAP_GEN(dev, event_cap)) + gather_user_async_events(dev, mask); } static int create_async_eqs(struct mlx5_core_dev *dev) @@ -548,55 +579,76 @@ static int create_async_eqs(struct mlx5_core_dev *dev) MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR); mlx5_eq_notifier_register(dev, &table->cq_err_nb); + table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_CMD_IDX, - .mask = 1ull << MLX5_EVENT_TYPE_CMD, + .irq_index = 0, .nent = MLX5_NUM_CMD_EQE, - .context = &table->cmd_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_cmd_eq", &table->cmd_eq, ¶m); + + param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD; + err = create_async_eq(dev, &table->cmd_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); goto err0; } - + err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err); + goto err1; + } mlx5_cmd_use_events(dev); + table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_ASYNC_IDX, - .mask = gather_async_events_mask(dev), + .irq_index = 0, .nent = MLX5_NUM_ASYNC_EQE, - .context = &table->async_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_async_eq", &table->async_eq, ¶m); + + gather_async_events_mask(dev, param.mask); + err = create_async_eq(dev, &table->async_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); - goto err1; + goto err2; + } + err = mlx5_eq_enable(dev, &table->async_eq.core, + &table->async_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable async EQ %d\n", err); + goto err3; } + table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int; param = (struct mlx5_eq_param) { - .index = MLX5_EQ_PAGEREQ_IDX, - .mask = 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, + .irq_index = 0, .nent = /* TODO: sriov max_vf + */ 1, - .context = &table->pages_eq, - .handler = mlx5_eq_async_int, }; - err = create_async_eq(dev, "mlx5_pages_eq", &table->pages_eq, ¶m); + + param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST; + err = create_async_eq(dev, &table->pages_eq.core, ¶m); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); - goto err2; + goto err4; + } + err = mlx5_eq_enable(dev, &table->pages_eq.core, + &table->pages_eq.irq_nb); + if (err) { + mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err); + goto err5; } return err; +err5: + destroy_async_eq(dev, &table->pages_eq.core); +err4: + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); +err3: + destroy_async_eq(dev, &table->async_eq.core); err2: - destroy_async_eq(dev, &table->async_eq); - -err1: mlx5_cmd_use_polling(dev); - destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); +err1: + destroy_async_eq(dev, &table->cmd_eq.core); err0: mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); return err; @@ -607,19 +659,22 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = dev->priv.eq_table; int err; - err = destroy_async_eq(dev, &table->pages_eq); + mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb); + err = destroy_async_eq(dev, &table->pages_eq.core); if (err) mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n", err); - err = destroy_async_eq(dev, &table->async_eq); + mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb); + err = destroy_async_eq(dev, &table->async_eq.core); if (err) mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n", err); mlx5_cmd_use_polling(dev); - err = destroy_async_eq(dev, &table->cmd_eq); + mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb); + err = destroy_async_eq(dev, &table->cmd_eq.core); if (err) mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n", err); @@ -629,24 +684,24 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev) { - return &dev->priv.eq_table->async_eq; + return &dev->priv.eq_table->async_eq.core; } void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->async_eq.irqn); + synchronize_irq(dev->priv.eq_table->async_eq.core.irqn); } void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev) { - synchronize_irq(dev->priv.eq_table->cmd_eq.irqn); + synchronize_irq(dev->priv.eq_table->cmd_eq.core.irqn); } /* Generic EQ API for mlx5_core consumers * Needed For RDMA ODP EQ for now */ struct mlx5_eq * -mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, +mlx5_eq_create_generic(struct mlx5_core_dev *dev, struct mlx5_eq_param *param) { struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); @@ -655,7 +710,7 @@ mlx5_eq_create_generic(struct mlx5_core_dev *dev, const char *name, if (!eq) return ERR_PTR(-ENOMEM); - err = create_async_eq(dev, name, eq, param); + err = create_async_eq(dev, eq, param); if (err) { kvfree(eq); eq = ERR_PTR(err); @@ -713,84 +768,14 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm) } EXPORT_SYMBOL(mlx5_eq_update_ci); -/* Completion EQs */ - -static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - struct mlx5_priv *priv = &mdev->priv; - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - if (!zalloc_cpumask_var(&irq_info->mask, GFP_KERNEL)) { - mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); - return -ENOMEM; - } - - cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node), - irq_info->mask); - - if (IS_ENABLED(CONFIG_SMP) && - irq_set_affinity_hint(irq, irq_info->mask)) - mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq); - - return 0; -} - -static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) -{ - int vecidx = MLX5_EQ_VEC_COMP_BASE + i; - struct mlx5_priv *priv = &mdev->priv; - int irq = pci_irq_vector(mdev->pdev, vecidx); - struct mlx5_irq_info *irq_info = &priv->eq_table->irq_info[vecidx]; - - irq_set_affinity_hint(irq, NULL); - free_cpumask_var(irq_info->mask); -} - -static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) -{ - int err; - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) { - err = set_comp_irq_affinity_hint(mdev, i); - if (err) - goto err_out; - } - - return 0; - -err_out: - for (i--; i >= 0; i--) - clear_comp_irq_affinity_hint(mdev, i); - - return err; -} - -static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) -{ - int i; - - for (i = 0; i < mdev->priv.eq_table->num_comp_vectors; i++) - clear_comp_irq_affinity_hint(mdev, i); -} - static void destroy_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); + mlx5_eq_disable(dev, &eq->core, &eq->irq_nb); if (destroy_unmap_eq(dev, &eq->core)) mlx5_core_warn(dev, "failed to destroy comp EQ 0x%x\n", eq->core.eqn); @@ -802,23 +787,17 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev) static int create_comp_eqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - char name[MLX5_MAX_IRQ_NAME]; struct mlx5_eq_comp *eq; - int ncomp_vec; + int ncomp_eqs; int nent; int err; int i; INIT_LIST_HEAD(&table->comp_eqs_list); - ncomp_vec = table->num_comp_vectors; + ncomp_eqs = table->num_comp_eqs; nent = MLX5_COMP_EQ_SIZE; -#ifdef CONFIG_RFS_ACCEL - table->rmap = alloc_irq_cpu_rmap(ncomp_vec); - if (!table->rmap) - return -ENOMEM; -#endif - for (i = 0; i < ncomp_vec; i++) { - int vecidx = i + MLX5_EQ_VEC_COMP_BASE; + for (i = 0; i < ncomp_eqs; i++) { + int vecidx = i + MLX5_IRQ_VEC_COMP_BASE; struct mlx5_eq_param param = {}; eq = kzalloc(sizeof(*eq), GFP_KERNEL); @@ -833,33 +812,28 @@ static int create_comp_eqs(struct mlx5_core_dev *dev) tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb, (unsigned long)&eq->tasklet_ctx); -#ifdef CONFIG_RFS_ACCEL - irq_cpu_rmap_add(table->rmap, pci_irq_vector(dev->pdev, vecidx)); -#endif - snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); + eq->irq_nb.notifier_call = mlx5_eq_comp_int; param = (struct mlx5_eq_param) { - .index = vecidx, - .mask = 0, + .irq_index = vecidx, .nent = nent, - .context = &eq->core, - .handler = mlx5_eq_comp_int }; - err = create_map_eq(dev, &eq->core, name, ¶m); + err = create_map_eq(dev, &eq->core, ¶m); + if (err) { + kfree(eq); + goto clean; + } + err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); if (err) { + destroy_unmap_eq(dev, &eq->core); kfree(eq); goto clean; } + mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); /* add tail, to keep the list ordered, for mlx5_vector2eqn to work */ list_add_tail(&eq->list, &table->comp_eqs_list); } - err = set_comp_irq_affinity_hints(dev); - if (err) { - mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); - goto clean; - } - return 0; clean: @@ -890,22 +864,24 @@ EXPORT_SYMBOL(mlx5_vector2eqn); unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->num_comp_vectors; + return dev->priv.eq_table->num_comp_eqs; } EXPORT_SYMBOL(mlx5_comp_vectors_count); struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - /* TODO: consider irq_get_affinity_mask(irq) */ - return dev->priv.eq_table->irq_info[vector + MLX5_EQ_VEC_COMP_BASE].mask; + int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; + + return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, + vecidx); } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask); #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return dev->priv.eq_table->rmap; + return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); } #endif @@ -926,82 +902,19 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn) void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - int i, max_eqs; - - clear_comp_irqs_affinity_hints(dev); - -#ifdef CONFIG_RFS_ACCEL - if (table->rmap) { - free_irq_cpu_rmap(table->rmap); - table->rmap = NULL; - } -#endif mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ - max_eqs = table->num_comp_vectors + MLX5_EQ_VEC_COMP_BASE; - for (i = max_eqs - 1; i >= 0; i--) { - if (!table->irq_info[i].context) - continue; - free_irq(pci_irq_vector(dev->pdev, i), table->irq_info[i].context); - table->irq_info[i].context = NULL; - } + mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); - pci_free_irq_vectors(dev->pdev); -} - -static int alloc_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_eq_table *table = priv->eq_table; - int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? - MLX5_CAP_GEN(dev, max_num_eqs) : - 1 << MLX5_CAP_GEN(dev, log_max_eq); - int nvec; - int err; - - nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + - MLX5_EQ_VEC_COMP_BASE; - nvec = min_t(int, nvec, num_eqs); - if (nvec <= MLX5_EQ_VEC_COMP_BASE) - return -ENOMEM; - - table->irq_info = kcalloc(nvec, sizeof(*table->irq_info), GFP_KERNEL); - if (!table->irq_info) - return -ENOMEM; - - nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_EQ_VEC_COMP_BASE + 1, - nvec, PCI_IRQ_MSIX); - if (nvec < 0) { - err = nvec; - goto err_free_irq_info; - } - - table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE; - - return 0; - -err_free_irq_info: - kfree(table->irq_info); - return err; -} - -static void free_irq_vectors(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - - pci_free_irq_vectors(dev->pdev); - kfree(priv->eq_table->irq_info); } int mlx5_eq_table_create(struct mlx5_core_dev *dev) { + struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err; - err = alloc_irq_vectors(dev); - if (err) { - mlx5_core_err(dev, "alloc irq vectors failed\n"); - return err; - } + eq_table->num_comp_eqs = + mlx5_irq_get_num_comp(eq_table->irq_table); err = create_async_eqs(dev); if (err) { @@ -1019,7 +932,6 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev) err_comp_eqs: destroy_async_eqs(dev); err_async_eqs: - free_irq_vectors(dev); return err; } @@ -1027,7 +939,6 @@ void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { destroy_comp_eqs(dev); destroy_async_eqs(dev); - free_irq_vectors(dev); } int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) @@ -1039,6 +950,7 @@ int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb) return atomic_notifier_chain_register(&eqt->nh[nb->event_type], &nb->nb); } +EXPORT_SYMBOL(mlx5_eq_notifier_register); int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) { @@ -1049,3 +961,4 @@ int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb) return atomic_notifier_chain_unregister(&eqt->nh[nb->event_type], &nb->nb); } +EXPORT_SYMBOL(mlx5_eq_notifier_unregister); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 6a921e24cd5e..7281f8d6cba6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -134,6 +134,30 @@ static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen) +{ + return modify_esw_vport_context_cmd(esw->dev, vport, in, inlen); +} + +static int query_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, + void *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {}; + + MLX5_SET(query_esw_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); +} + +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen) +{ + return query_esw_vport_context_cmd(esw->dev, vport, out, outlen); +} + static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, u16 vlan, u8 qos, u8 set_flags) { @@ -473,7 +497,7 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ - if (esw->fdb_table.legacy.fdb && esw->mode == SRIOV_LEGACY) + if (esw->fdb_table.legacy.fdb && esw->mode == MLX5_ESWITCH_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", @@ -873,7 +897,7 @@ static void esw_vport_change_handle_locked(struct mlx5_vport *vport) struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; - mlx5_query_nic_vport_mac_address(dev, vport->vport, mac); + mlx5_query_nic_vport_mac_address(dev, vport->vport, true, mac); esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n", vport->vport, mac); @@ -939,7 +963,7 @@ int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1057,7 +1081,7 @@ int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, - vport->vport); + mlx5_eswitch_vport_num_to_index(esw, vport->vport)); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n", vport->vport); return -EOPNOTSUPP; @@ -1168,6 +1192,8 @@ void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, vport->ingress.drop_rule = NULL; vport->ingress.allow_rule = NULL; + + esw_vport_del_ingress_acl_modify_metadata(esw, vport); } void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, @@ -1527,6 +1553,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u16 vport_num = vport->vport; + int flags; if (esw->manager_vport == vport_num) return; @@ -1544,11 +1571,13 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw, vport->info.node_guid); } + flags = (vport->info.vlan || vport->info.qos) ? + SET_VLAN_STRIP | SET_VLAN_INSERT : 0; modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, - (vport->info.vlan || vport->info.qos)); + flags); /* Only legacy mode needs ACLs */ - if (esw->mode == SRIOV_LEGACY) { + if (esw->mode == MLX5_ESWITCH_LEGACY) { esw_vport_ingress_config(esw, vport); esw_vport_egress_config(esw, vport); } @@ -1600,7 +1629,7 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, struct mlx5_vport *vport, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); /* Create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == SRIOV_LEGACY) + if (vport_num && esw->mode == MLX5_ESWITCH_LEGACY) esw_vport_create_drop_counters(vport); /* Restore old vport configuration */ @@ -1654,7 +1683,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, vport->enabled_events = 0; esw_vport_disable_qos(esw, vport); if (esw->manager_vport != vport_num && - esw->mode == SRIOV_LEGACY) { + esw->mode == MLX5_ESWITCH_LEGACY) { mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, 1, @@ -1686,54 +1715,91 @@ static int eswitch_vport_event(struct notifier_block *nb, return NOTIFY_OK; } +/** + * mlx5_esw_query_functions - Returns raw output about functions state + * @dev: Pointer to device to query + * + * mlx5_esw_query_functions() allocates and returns functions changed + * raw output memory pointer from device on success. Otherwise returns ERR_PTR. + * Caller must free the memory using kvfree() when valid pointer is returned. + */ +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + int outlen = MLX5_ST_SZ_BYTES(query_esw_functions_out); + u32 in[MLX5_ST_SZ_DW(query_esw_functions_in)] = {}; + u32 *out; + int err; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return ERR_PTR(-ENOMEM); + + MLX5_SET(query_esw_functions_in, in, opcode, + MLX5_CMD_OP_QUERY_ESW_FUNCTIONS); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); + if (!err) + return out; + + kvfree(out); + return ERR_PTR(err); +} + +static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) +{ + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { + MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, + ESW_FUNCTIONS_CHANGED); + mlx5_eq_notifier_register(esw->dev, &esw->esw_funcs.nb); + } +} + +static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) +{ + if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) + mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); + + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + + flush_workqueue(esw->work_queue); +} + /* Public E-Switch API */ #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev)) -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { - int vf_nvports = 0, total_nvports = 0; struct mlx5_vport *vport; int err; int i, enabled_events; if (!ESW_ALLOWED(esw) || !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { - esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); + esw_warn(esw->dev, "FDB is not supported, aborting ...\n"); return -EOPNOTSUPP; } if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n"); + esw_warn(esw->dev, "ingress ACL is not supported by FW\n"); if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) - esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); - - esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); - - if (mode == SRIOV_OFFLOADS) { - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - err = mlx5_query_host_params_num_vfs(esw->dev, &vf_nvports); - if (err) - return err; - total_nvports = esw->total_vports; - } else { - vf_nvports = nvfs; - total_nvports = nvfs + MLX5_SPECIAL_VPORTS(esw->dev); - } - } + esw_warn(esw->dev, "engress ACL is not supported by FW\n"); esw->mode = mode; mlx5_lag_update(esw->dev); - if (mode == SRIOV_LEGACY) { + if (mode == MLX5_ESWITCH_LEGACY) { err = esw_create_legacy_table(esw); if (err) goto abort; } else { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); - err = esw_offloads_init(esw, vf_nvports, total_nvports); + err = esw_offloads_init(esw); } if (err) @@ -1743,11 +1809,8 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (err) esw_warn(esw->dev, "Failed to create eswitch TSAR"); - /* Don't enable vport events when in SRIOV_OFFLOADS mode, since: - * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode - * 2. FDB/Eswitch is programmed by user space tools - */ - enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; + enabled_events = (mode == MLX5_ESWITCH_LEGACY) ? SRIOV_VPORT_EVENTS : + UC_ADDR_CHANGE; /* Enable PF vport */ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_PF); @@ -1760,22 +1823,21 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) } /* Enable VF vports */ - mlx5_esw_for_each_vf_vport(esw, i, vport, nvfs) + mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) esw_enable_vport(esw, vport, enabled_events); - if (mode == SRIOV_LEGACY) { - MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->nb); - } + mlx5_eswitch_event_handlers_register(esw); + + esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), active vports(%d)\n", + mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); - esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", - esw->enabled_vports); return 0; abort: - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; - if (mode == SRIOV_OFFLOADS) { + if (mode == MLX5_ESWITCH_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } @@ -1783,23 +1845,22 @@ abort: return err; } -void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) +void mlx5_eswitch_disable(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; struct mlx5_vport *vport; int old_mode; int i; - if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) + if (!ESW_ALLOWED(esw) || esw->mode == MLX5_ESWITCH_NONE) return; - esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", - esw->enabled_vports, esw->mode); + esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), active vports(%d)\n", + esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", + esw->esw_funcs.num_vfs, esw->enabled_vports); mc_promisc = &esw->mc_promisc; - - if (esw->mode == SRIOV_LEGACY) - mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handlers_unregister(esw); mlx5_esw_for_all_vports(esw, i, vport) esw_disable_vport(esw, vport); @@ -1809,17 +1870,17 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_destroy_tsar(esw); - if (esw->mode == SRIOV_LEGACY) + if (esw->mode == MLX5_ESWITCH_LEGACY) esw_destroy_legacy_table(esw); - else if (esw->mode == SRIOV_OFFLOADS) + else if (esw->mode == MLX5_ESWITCH_OFFLOADS) esw_offloads_cleanup(esw); old_mode = esw->mode; - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; mlx5_lag_update(esw->dev); - if (old_mode == SRIOV_OFFLOADS) { + if (old_mode == MLX5_ESWITCH_OFFLOADS) { mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_reload_interface(esw->dev, MLX5_INTERFACE_PROTOCOL_ETH); } @@ -1827,14 +1888,16 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int mlx5_eswitch_init(struct mlx5_core_dev *dev) { - int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; struct mlx5_vport *vport; + int total_vports; int err, i; if (!MLX5_VPORT_MANAGER(dev)) return 0; + total_vports = mlx5_eswitch_get_total_vports(dev); + esw_info(dev, "Total vports %d, per vport: max uc(%d) max mc(%d)\n", total_vports, @@ -1847,6 +1910,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->dev = dev; esw->manager_vport = mlx5_eswitch_manager_vport(dev); + esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { @@ -1880,7 +1944,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->enabled_vports = 0; - esw->mode = SRIOV_NONE; + esw->mode = MLX5_ESWITCH_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) @@ -1950,7 +2014,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, ether_addr_copy(evport->info.mac, mac); evport->info.node_guid = node_guid; - if (evport->enabled && esw->mode == SRIOV_LEGACY) + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) err = esw_vport_ingress_config(esw, evport); unlock: @@ -2034,7 +2098,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, evport->info.vlan = vlan; evport->info.qos = qos; - if (evport->enabled && esw->mode == SRIOV_LEGACY) { + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) goto unlock; @@ -2076,7 +2140,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, mlx5_core_warn(esw->dev, "Spoofchk in set while MAC is invalid, vport(%d)\n", evport->vport); - if (evport->enabled && esw->mode == SRIOV_LEGACY) + if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) err = esw_vport_ingress_config(esw, evport); if (err) evport->info.spoofchk = pschk; @@ -2172,7 +2236,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) return -EPERM; mutex_lock(&esw->state_lock); - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != MLX5_ESWITCH_LEGACY) { err = -EOPNOTSUPP; goto out; } @@ -2195,7 +2259,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) return -EPERM; mutex_lock(&esw->state_lock); - if (esw->mode != SRIOV_LEGACY) { + if (esw->mode != MLX5_ESWITCH_LEGACY) { err = -EOPNOTSUPP; goto out; } @@ -2338,7 +2402,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev, u64 bytes = 0; int err = 0; - if (!vport->enabled || esw->mode != SRIOV_LEGACY) + if (!vport->enabled || esw->mode != MLX5_ESWITCH_LEGACY) return 0; if (vport->egress.drop_counter) @@ -2448,16 +2512,27 @@ free_out: u8 mlx5_eswitch_mode(struct mlx5_eswitch *esw) { - return ESW_ALLOWED(esw) ? esw->mode : SRIOV_NONE; + return ESW_ALLOWED(esw) ? esw->mode : MLX5_ESWITCH_NONE; } EXPORT_SYMBOL_GPL(mlx5_eswitch_mode); +enum devlink_eswitch_encap_mode +mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev) +{ + struct mlx5_eswitch *esw; + + esw = dev->priv.eswitch; + return ESW_ALLOWED(esw) ? esw->offloads.encap : + DEVLINK_ESWITCH_ENCAP_MODE_NONE; +} +EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); + bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { - if ((dev0->priv.eswitch->mode == SRIOV_NONE && - dev1->priv.eswitch->mode == SRIOV_NONE) || - (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && - dev1->priv.eswitch->mode == SRIOV_OFFLOADS)) + if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && + dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) || + (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS)) return true; return false; @@ -2466,6 +2541,26 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { - return (dev0->priv.eswitch->mode == SRIOV_OFFLOADS && - dev1->priv.eswitch->mode == SRIOV_OFFLOADS); + return (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && + dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS); +} + +void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) +{ + const u32 *out; + + WARN_ON_ONCE(esw->mode != MLX5_ESWITCH_NONE); + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) { + esw->esw_funcs.num_vfs = num_vfs; + return; + } + + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + return; + + esw->esw_funcs.num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + kvfree(out); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index d043d6f9797d..a38e8a3c7c9a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -68,6 +68,8 @@ struct vport_ingress { struct mlx5_flow_group *allow_spoofchk_only_grp; struct mlx5_flow_group *allow_untagged_only_grp; struct mlx5_flow_group *drop_grp; + int modify_metadata_id; + struct mlx5_flow_handle *modify_metadata_rule; struct mlx5_flow_handle *allow_rule; struct mlx5_flow_handle *drop_rule; struct mlx5_fc *drop_counter; @@ -173,9 +175,12 @@ struct mlx5_esw_offload { struct mutex peer_mutex; DECLARE_HASHTABLE(encap_tbl, 8); DECLARE_HASHTABLE(mod_hdr_tbl, 8); + DECLARE_HASHTABLE(termtbl_tbl, 8); + struct mutex termtbl_mutex; /* protects termtbl hash */ + const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; u64 num_flows; - u8 encap; + enum devlink_eswitch_encap_mode encap; }; /* E-Switch MC FDB table hash node */ @@ -190,11 +195,15 @@ struct mlx5_host_work { struct mlx5_eswitch *esw; }; -struct mlx5_host_info { +struct mlx5_esw_functions { struct mlx5_nb nb; u16 num_vfs; }; +enum { + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -202,6 +211,7 @@ struct mlx5_eswitch { struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; struct mlx5_vport *vports; + u32 flags; int total_vports; int enabled_vports; /* Synchronize between vport change events @@ -219,12 +229,12 @@ struct mlx5_eswitch { int mode; int nvports; u16 manager_vport; - struct mlx5_host_info host_info; + u16 first_host_vport; + struct mlx5_esw_functions esw_funcs; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw); -int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, - int total_nvports); +int esw_offloads_init(struct mlx5_eswitch *esw); void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); int esw_offloads_init_reps(struct mlx5_eswitch *esw); void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, @@ -239,12 +249,14 @@ void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, struct mlx5_vport *vport); +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); -void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); +int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode); +void mlx5_eswitch_disable(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u16 vport, u8 mac[ETH_ALEN]); int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, @@ -266,8 +278,32 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule); +int mlx5_eswitch_modify_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *in, int inlen); +int mlx5_eswitch_query_esw_vport_context(struct mlx5_eswitch *esw, u16 vport, + void *out, int outlen); + struct mlx5_flow_spec; struct mlx5_esw_flow_attr; +struct mlx5_termtbl_handle; + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec); + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest); + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt); struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, @@ -338,6 +374,7 @@ struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *rep; struct mlx5_core_dev *mdev; u32 encap_id; + struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; u8 match_level; @@ -355,10 +392,12 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack); -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, @@ -386,6 +425,8 @@ bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1); +const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(__dev, format, ...) \ @@ -404,6 +445,24 @@ static inline u16 mlx5_eswitch_manager_vport(struct mlx5_core_dev *dev) MLX5_VPORT_ECPF : MLX5_VPORT_PF; } +static inline u16 mlx5_eswitch_first_host_vport_num(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_ecpf_esw_manager(dev) ? + MLX5_VPORT_PF : MLX5_VPORT_FIRST_VF; +} + +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) +{ + /* Ideally device should have the functions changed supported + * capability regardless of it being ECPF or PF wherever such + * event should be processed such as on eswitch manager device. + * However, some ECPF based device might not have this capability + * set. Hence OR for ECPF check to cover such device. + */ + return MLX5_CAP_ESW(dev, esw_functions_changed) || + mlx5_core_is_ecpf_esw_manager(dev); +} + static inline int mlx5_eswitch_uplink_idx(struct mlx5_eswitch *esw) { /* Uplink always locate at the last element of the array.*/ @@ -488,16 +547,47 @@ void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw); #define mlx5_esw_for_each_vf_vport_num_reverse(esw, vport, nvfs) \ for ((vport) = (nvfs); (vport) >= MLX5_VPORT_FIRST_VF; (vport)--) +/* Includes host PF (vport 0) if it's not esw manager. */ +#define mlx5_esw_for_each_host_func_rep(esw, i, rep, nvfs) \ + for ((i) = (esw)->first_host_vport; \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) <= (nvfs); (i)++) + +#define mlx5_esw_for_each_host_func_rep_reverse(esw, i, rep, nvfs) \ + for ((i) = (nvfs); \ + (rep) = &(esw)->offloads.vport_reps[i], \ + (i) >= (esw)->first_host_vport; (i)--) + +#define mlx5_esw_for_each_host_func_vport(esw, vport, nvfs) \ + for ((vport) = (esw)->first_host_vport; \ + (vport) <= (nvfs); (vport)++) + +#define mlx5_esw_for_each_host_func_vport_reverse(esw, vport, nvfs) \ + for ((vport) = (nvfs); \ + (vport) >= (esw)->first_host_vport; (vport)--) + struct mlx5_vport *__must_check mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num); +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num); + +void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs); +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} -static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } -static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int mode) { return 0; } +static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } +static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } +static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_eswitch_update_num_of_vfs(struct mlx5_eswitch *esw, const int num_vfs) {} #define FDB_MAX_CHAIN 1 #define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 47b446d30f71..8ed4497929b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -41,7 +41,6 @@ #include "en.h" #include "fs_core.h" #include "lib/devcom.h" -#include "ecpf.h" #include "lib/eq.h" /* There are two match-all miss flows, one for unicast dst mac and @@ -89,6 +88,53 @@ u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) return 1; } +static void +mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + void *misc2; + void *misc; + + /* Use metadata matching because vport is not represented by single + * VHCA in dual-port RoCE mode, and matching on source vport may fail. + */ + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(attr->in_mdev->priv.eswitch, + attr->in_rep->vport)); + + misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc2, metadata_reg_c_0); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + if (memchr_inv(misc, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc))) + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); + + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + } + + if (MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source) && + attr->in_rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, @@ -100,9 +146,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule; struct mlx5_flow_table *fdb; int j, i = 0; - void *misc; - if (esw->mode != SRIOV_OFFLOADS) + if (esw->mode != MLX5_ESWITCH_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); flow_act.action = attr->action; @@ -160,21 +205,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, i++; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { if (attr->tunnel_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; @@ -193,7 +225,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } - rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); + if (mlx5_eswitch_termtbl_required(esw, &flow_act, spec)) + rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, attr, + &flow_act, dest, i); + else + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto err_add_rule; else @@ -220,7 +256,6 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_flow_table *fast_fdb; struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; - void *misc; int i; fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); @@ -252,25 +287,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].ft = fwd_fdb, i++; - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); - - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(attr->in_mdev, vhca_id)); + mlx5_eswitch_set_rule_source_port(esw, spec, attr); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); - - if (attr->match_level == MLX5_MATCH_NONE) - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - else - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; + if (attr->match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); @@ -295,8 +316,16 @@ __mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, bool fwd_rule) { bool split = (attr->split_count > 0); + int i; mlx5_del_flow_rules(rule); + + /* unref the term table */ + for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) { + if (attr->dests[i].termtbl) + mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); + } + esw->offloads.num_flows--; if (fwd_rule) { @@ -328,12 +357,11 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) { struct mlx5_eswitch_rep *rep; - int vf_vport, err = 0; + int i, err = 0; esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); - for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { - rep = &esw->offloads.vport_reps[vf_vport]; - if (atomic_read(&rep->rep_if[REP_ETH].state) != REP_LOADED) + mlx5_esw_for_each_host_func_rep(esw, i, rep, esw->esw_funcs.num_vfs) { + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) continue; err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); @@ -559,23 +587,87 @@ void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule) mlx5_del_flow_rules(rule); } -static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, +static int mlx5_eswitch_enable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id |= MLX5_FDB_TO_VPORT_REG_C_0; + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static int mlx5_eswitch_disable_passing_vport_metadata(struct mlx5_eswitch *esw) +{ + u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {}; + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {}; + u8 fdb_to_vport_reg_c_id; + int err; + + err = mlx5_eswitch_query_esw_vport_context(esw, esw->manager_vport, + out, sizeof(out)); + if (err) + return err; + + fdb_to_vport_reg_c_id = MLX5_GET(query_esw_vport_context_out, out, + esw_vport_context.fdb_to_vport_reg_c_id); + + fdb_to_vport_reg_c_id &= ~MLX5_FDB_TO_VPORT_REG_C_0; + + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.fdb_to_vport_reg_c_id, fdb_to_vport_reg_c_id); + + MLX5_SET(modify_esw_vport_context_in, in, + field_select.fdb_to_vport_reg_c_id, 1); + + return mlx5_eswitch_modify_esw_vport_context(esw, esw->manager_vport, + in, sizeof(in)); +} + +static void peer_miss_rules_setup(struct mlx5_eswitch *esw, + struct mlx5_core_dev *peer_dev, struct mlx5_flow_spec *spec, struct mlx5_flow_destination *dest) { - void *misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); + void *misc; - MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(peer_dev, vhca_id)); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, - source_eswitch_owner_vhca_id); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(peer_dev, vhca_id)); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + } dest->type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest->vport.num = peer_dev->priv.eswitch->manager_vport; @@ -583,6 +675,26 @@ static void peer_miss_rules_setup(struct mlx5_core_dev *peer_dev, dest->vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; } +static void esw_set_peer_miss_rule_source_port(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw, + struct mlx5_flow_spec *spec, + u16 vport) +{ + void *misc; + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(peer_esw, + vport)); + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + } +} + static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, struct mlx5_core_dev *peer_dev) { @@ -600,7 +712,7 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, if (!spec) return -ENOMEM; - peer_miss_rules_setup(peer_dev, spec, &dest); + peer_miss_rules_setup(esw, peer_dev, spec, &dest); flows = kvzalloc(nvports * sizeof(*flows), GFP_KERNEL); if (!flows) { @@ -613,7 +725,9 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc_parameters); if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_PF); + esw_set_peer_miss_rule_source_port(esw, peer_dev->priv.eswitch, + spec, MLX5_VPORT_PF); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -635,7 +749,10 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, } mlx5_esw_for_each_vf_vport_num(esw, i, mlx5_core_max_vfs(esw->dev)) { - MLX5_SET(fte_match_set_misc, misc, source_port, i); + esw_set_peer_miss_rule_source_port(esw, + peer_dev->priv.eswitch, + spec, i); + flow = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) { @@ -919,6 +1036,30 @@ static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) #define MAX_PF_SQ 256 #define MAX_SQ_NVPORTS 32 +static void esw_set_flow_group_source_port(struct mlx5_eswitch *esw, + u32 *flow_group_in) +{ + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS_2); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters_2.metadata_reg_c_0); + } else { + MLX5_SET(create_flow_group_in, flow_group_in, + match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + } +} + static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1016,19 +1157,21 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) /* create peer esw miss group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, - match_criteria); + esw_set_flow_group_source_port(esw, flow_group_in); + + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_port); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + esw->total_vports - 1); @@ -1142,7 +1285,6 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *g; u32 *flow_group_in; - void *match_criteria, *misc; int err = 0; nvports = nvports + MLX5_ESW_MISS_FLOWS; @@ -1152,12 +1294,8 @@ static int esw_create_vport_rx_group(struct mlx5_eswitch *esw, int nvports) /* create vport rx group */ memset(flow_group_in, 0, inlen); - MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, - MLX5_MATCH_MISC_PARAMETERS); - match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); - misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + esw_set_flow_group_source_port(esw, flow_group_in); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); @@ -1196,13 +1334,24 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, goto out; } - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, vport); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc, metadata_reg_c_0); - spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2; + } else { + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, @@ -1220,21 +1369,22 @@ out: static int esw_offloads_start(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1; - if (esw->mode != SRIOV_LEGACY && + if (esw->mode != MLX5_ESWITCH_LEGACY && !mlx5_core_is_ecpf_esw_manager(esw->dev)) { NL_SET_ERR_MSG_MOD(extack, "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; } - mlx5_eswitch_disable_sriov(esw); - err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + mlx5_eswitch_disable(esw); + mlx5_eswitch_update_num_of_vfs(esw, esw->dev->priv.sriov.num_vfs); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to offloads"); - err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to legacy"); @@ -1242,7 +1392,6 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, - num_vfs, &esw->offloads.inline_mode)) { esw->offloads.inline_mode = MLX5_INLINE_MODE_L2; NL_SET_ERR_MSG_MOD(extack, @@ -1259,11 +1408,11 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) int esw_offloads_init_reps(struct mlx5_eswitch *esw) { - int total_vports = MLX5_TOTAL_VPORTS(esw->dev); + int total_vports = esw->total_vports; struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; u8 hw_id[ETH_ALEN], rep_type; - int vport; + int vport_index; esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), @@ -1271,14 +1420,15 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) if (!esw->offloads.vport_reps) return -ENOMEM; - mlx5_query_nic_vport_mac_address(dev, 0, hw_id); + mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport, rep) { - rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport); + mlx5_esw_for_all_reps(esw, vport_index, rep) { + rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); + rep->vport_index = vport_index; ether_addr_copy(rep->hw_id, hw_id); for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } @@ -1288,9 +1438,9 @@ int esw_offloads_init_reps(struct mlx5_eswitch *esw) static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_LOADED, REP_REGISTERED) == REP_LOADED) - rep->rep_if[rep_type].unload(rep); + esw->offloads.rep_ops[rep_type]->unload(rep); } static void __unload_reps_special_vport(struct mlx5_eswitch *esw, u8 rep_type) @@ -1329,21 +1479,20 @@ static void esw_offloads_unload_vf_reps(struct mlx5_eswitch *esw, int nvports) __unload_reps_vf_vport(esw, nvports, rep_type); } -static void __unload_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) +static void __unload_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) { - __unload_reps_vf_vport(esw, nvports, rep_type); + __unload_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); /* Special vports must be the last to unload. */ __unload_reps_special_vport(esw, rep_type); } -static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw, int nvports) +static void esw_offloads_unload_all_reps(struct mlx5_eswitch *esw) { u8 rep_type = NUM_REP_TYPES; while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_all_vport(esw, rep_type); } static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, @@ -1351,11 +1500,11 @@ static int __esw_offloads_load_rep(struct mlx5_eswitch *esw, { int err = 0; - if (atomic_cmpxchg(&rep->rep_if[rep_type].state, + if (atomic_cmpxchg(&rep->rep_data[rep_type].state, REP_REGISTERED, REP_LOADED) == REP_REGISTERED) { - err = rep->rep_if[rep_type].load(esw->dev, rep); + err = esw->offloads.rep_ops[rep_type]->load(esw->dev, rep); if (err) - atomic_set(&rep->rep_if[rep_type].state, + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); } @@ -1419,6 +1568,26 @@ err_vf: return err; } +static int __load_reps_all_vport(struct mlx5_eswitch *esw, u8 rep_type) +{ + int err; + + /* Special vports must be loaded first, uplink rep creates mdev resource. */ + err = __load_reps_special_vport(esw, rep_type); + if (err) + return err; + + err = __load_reps_vf_vport(esw, esw->esw_funcs.num_vfs, rep_type); + if (err) + goto err_vfs; + + return 0; + +err_vfs: + __unload_reps_special_vport(esw, rep_type); + return err; +} + static int esw_offloads_load_vf_reps(struct mlx5_eswitch *esw, int nvports) { u8 rep_type = 0; @@ -1438,34 +1607,13 @@ err_reps: return err; } -static int __load_reps_all_vport(struct mlx5_eswitch *esw, int nvports, - u8 rep_type) -{ - int err; - - /* Special vports must be loaded first. */ - err = __load_reps_special_vport(esw, rep_type); - if (err) - return err; - - err = __load_reps_vf_vport(esw, nvports, rep_type); - if (err) - goto err_vfs; - - return 0; - -err_vfs: - __unload_reps_special_vport(esw, rep_type); - return err; -} - -static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) +static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw) { u8 rep_type = 0; int err; for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { - err = __load_reps_all_vport(esw, nvports, rep_type); + err = __load_reps_all_vport(esw, rep_type); if (err) goto err_reps; } @@ -1474,7 +1622,7 @@ static int esw_offloads_load_all_reps(struct mlx5_eswitch *esw, int nvports) err_reps: while (rep_type-- > 0) - __unload_reps_all_vport(esw, nvports, rep_type); + __unload_reps_all_vport(esw, rep_type); return err; } @@ -1510,6 +1658,10 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != + mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) + break; + err = mlx5_esw_offloads_pair(esw, peer_esw); if (err) goto err_out; @@ -1578,32 +1730,16 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_act flow_act = {0}; struct mlx5_flow_spec *spec; int err = 0; /* For prio tag mode, there is only 1 FTEs: - * 1) Untagged packets - push prio tag VLAN, allow + * 1) Untagged packets - push prio tag VLAN and modify metadata if + * required, allow * Unmatched traffic is allowed by default */ - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - return -EOPNOTSUPP; - - esw_vport_cleanup_ingress_rules(esw, vport); - - err = esw_vport_enable_ingress_acl(esw, vport); - if (err) { - mlx5_core_warn(esw->dev, - "failed to enable prio tag ingress acl (%d) on vport[%d]\n", - err, vport->vport); - return err; - } - - esw_debug(esw->dev, - "vport[%d] configure ingress rules\n", vport->vport); - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) { err = -ENOMEM; @@ -1619,6 +1755,12 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw, flow_act.vlan[0].ethtype = ETH_P_8021Q; flow_act.vlan[0].vid = 0; flow_act.vlan[0].prio = 0; + + if (vport->ingress.modify_metadata_rule) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_id = vport->ingress.modify_metadata_id; + } + vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); @@ -1639,6 +1781,58 @@ out_no_mem: return err; } +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec spec = {}; + int err = 0; + + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); + MLX5_SET(set_action_in, action, data, + mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport)); + + err = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + 1, action, &vport->ingress.modify_metadata_id); + if (err) { + esw_warn(esw->dev, + "failed to alloc modify header for vport %d ingress acl (%d)\n", + vport->vport, err); + return err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW; + flow_act.modify_id = vport->ingress.modify_metadata_id; + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport->ingress.acl, + &spec, &flow_act, NULL, 0); + if (IS_ERR(vport->ingress.modify_metadata_rule)) { + err = PTR_ERR(vport->ingress.modify_metadata_rule); + esw_warn(esw->dev, + "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n", + vport->vport, err); + vport->ingress.modify_metadata_rule = NULL; + goto out; + } + +out: + if (err) + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + return err; +} + +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (vport->ingress.modify_metadata_rule) { + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); + mlx5_modify_header_dealloc(esw->dev, vport->ingress.modify_metadata_id); + + vport->ingress.modify_metadata_rule = NULL; + } +} + static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { @@ -1646,6 +1840,9 @@ static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec; int err = 0; + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + /* For prio tag mode, there is only 1 FTEs: * 1) prio tag packets - pop the prio tag VLAN, allow * Unmatched traffic is allowed by default @@ -1699,27 +1896,98 @@ out_no_mem: return err; } -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports) +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - struct mlx5_vport *vport = NULL; - int i, j; int err; - mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) { + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) + return 0; + + esw_vport_cleanup_ingress_rules(esw, vport); + + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + esw_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } + + esw_debug(esw->dev, + "vport[%d] configure ingress rules\n", vport->vport); + + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = esw_vport_add_ingress_acl_modify_metadata(esw, vport); + if (err) + goto out; + } + + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && + mlx5_eswitch_is_vf_vport(esw, vport->vport)) { err = esw_vport_ingress_prio_tag_config(esw, vport); if (err) - goto err_ingress; - err = esw_vport_egress_prio_tag_config(esw, vport); + goto out; + } + +out: + if (err) + esw_vport_disable_ingress_acl(esw, vport); + return err; +} + +static bool +esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) +{ + if (!MLX5_CAP_ESW(esw->dev, esw_uplink_ingress_acl)) + return false; + + if (!(MLX5_CAP_ESW_FLOWTABLE(esw->dev, fdb_to_vport_reg_c_id) & + MLX5_FDB_TO_VPORT_REG_C_0)) + return false; + + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) + return false; + + if (mlx5_core_is_ecpf_esw_manager(esw->dev) || + mlx5_ecpf_vport_exists(esw->dev)) + return false; + + return true; +} + +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + int i, j; + int err; + + if (esw_check_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + + mlx5_esw_for_all_vports(esw, i, vport) { + err = esw_vport_ingress_common_config(esw, vport); if (err) - goto err_egress; + goto err_ingress; + + if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) { + err = esw_vport_egress_prio_tag_config(esw, vport); + if (err) + goto err_egress; + } } + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + esw_info(esw->dev, "Use metadata reg_c as source vport to match\n"); + return 0; err_egress: esw_vport_disable_ingress_acl(esw, vport); err_ingress: - mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) { + for (j = MLX5_VPORT_PF; j < i; j++) { + vport = &esw->vports[j]; esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1727,40 +1995,46 @@ err_ingress: return err; } -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; int i; - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->dev->priv.sriov.num_vfs) { + mlx5_esw_for_all_vports(esw, i, vport) { esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } + + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; } -static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int vf_nvports, - int nvports) +static int esw_offloads_steering_init(struct mlx5_eswitch *esw) { + int num_vfs = esw->esw_funcs.num_vfs; + int total_vports; int err; + if (mlx5_core_is_ecpf_esw_manager(esw->dev)) + total_vports = esw->total_vports; + else + total_vports = num_vfs + MLX5_SPECIAL_VPORTS(esw->dev); + memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { - err = esw_prio_tag_acls_config(esw, vf_nvports); - if (err) - return err; - } - - err = esw_create_offloads_fdb_tables(esw, nvports); + err = esw_create_offloads_acl_tables(esw); if (err) return err; - err = esw_create_offloads_table(esw, nvports); + err = esw_create_offloads_fdb_tables(esw, total_vports); + if (err) + goto create_fdb_err; + + err = esw_create_offloads_table(esw, total_vports); if (err) goto create_ft_err; - err = esw_create_vport_rx_group(esw, nvports); + err = esw_create_vport_rx_group(esw, total_vports); if (err) goto create_fg_err; @@ -1772,6 +2046,9 @@ create_fg_err: create_ft_err: esw_destroy_offloads_fdb_tables(esw); +create_fdb_err: + esw_destroy_offloads_acl_tables(esw); + return err; } @@ -1780,88 +2057,105 @@ static void esw_offloads_steering_cleanup(struct mlx5_eswitch *esw) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_tables(esw); - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) - esw_prio_tag_acls_cleanup(esw); + esw_destroy_offloads_acl_tables(esw); } -static void esw_host_params_event_handler(struct work_struct *work) +static void +esw_vfs_changed_event_handler(struct mlx5_eswitch *esw, const u32 *out) { - struct mlx5_host_work *host_work; - struct mlx5_eswitch *esw; - int err, num_vf = 0; + bool host_pf_disabled; + u16 new_num_vfs; - host_work = container_of(work, struct mlx5_host_work, work); - esw = host_work->esw; + new_num_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_num_of_vfs); + host_pf_disabled = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_pf_disabled); - err = mlx5_query_host_params_num_vfs(esw->dev, &num_vf); - if (err || num_vf == esw->host_info.num_vfs) - goto out; + if (new_num_vfs == esw->esw_funcs.num_vfs || host_pf_disabled) + return; /* Number of VFs can only change from "0 to x" or "x to 0". */ - if (esw->host_info.num_vfs > 0) { - esw_offloads_unload_vf_reps(esw, esw->host_info.num_vfs); + if (esw->esw_funcs.num_vfs > 0) { + esw_offloads_unload_vf_reps(esw, esw->esw_funcs.num_vfs); } else { - err = esw_offloads_load_vf_reps(esw, num_vf); + int err; + err = esw_offloads_load_vf_reps(esw, new_num_vfs); if (err) - goto out; + return; } + esw->esw_funcs.num_vfs = new_num_vfs; +} + +static void esw_functions_changed_event_handler(struct work_struct *work) +{ + struct mlx5_host_work *host_work; + struct mlx5_eswitch *esw; + const u32 *out; - esw->host_info.num_vfs = num_vf; + host_work = container_of(work, struct mlx5_host_work, work); + esw = host_work->esw; + out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(out)) + goto out; + + esw_vfs_changed_event_handler(esw, out); + kvfree(out); out: kfree(host_work); } -static int esw_host_params_event(struct notifier_block *nb, - unsigned long type, void *data) +int mlx5_esw_funcs_changed_handler(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_esw_functions *esw_funcs; struct mlx5_host_work *host_work; - struct mlx5_host_info *host_info; struct mlx5_eswitch *esw; host_work = kzalloc(sizeof(*host_work), GFP_ATOMIC); if (!host_work) return NOTIFY_DONE; - host_info = mlx5_nb_cof(nb, struct mlx5_host_info, nb); - esw = container_of(host_info, struct mlx5_eswitch, host_info); + esw_funcs = mlx5_nb_cof(nb, struct mlx5_esw_functions, nb); + esw = container_of(esw_funcs, struct mlx5_eswitch, esw_funcs); host_work->esw = esw; - INIT_WORK(&host_work->work, esw_host_params_event_handler); + INIT_WORK(&host_work->work, esw_functions_changed_event_handler); queue_work(esw->work_queue, &host_work->work); return NOTIFY_OK; } -int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, - int total_nvports) +int esw_offloads_init(struct mlx5_eswitch *esw) { int err; - err = esw_offloads_steering_init(esw, vf_nvports, total_nvports); + err = esw_offloads_steering_init(esw); if (err) return err; - err = esw_offloads_load_all_reps(esw, vf_nvports); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { + err = mlx5_eswitch_enable_passing_vport_metadata(esw); + if (err) + goto err_vport_metadata; + } + + err = esw_offloads_load_all_reps(esw); if (err) goto err_reps; esw_offloads_devcom_init(esw); - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - MLX5_NB_INIT(&esw->host_info.nb, esw_host_params_event, - HOST_PARAMS_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->host_info.nb); - esw->host_info.num_vfs = vf_nvports; - } + mutex_init(&esw->offloads.termtbl_mutex); mlx5_rdma_enable_roce(esw->dev); return 0; err_reps: + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); +err_vport_metadata: esw_offloads_steering_cleanup(esw); return err; } @@ -1869,13 +2163,13 @@ err_reps: static int esw_offloads_stop(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; + int err, err1; - mlx5_eswitch_disable_sriov(esw); - err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + mlx5_eswitch_disable(esw); + err = mlx5_eswitch_enable(esw, MLX5_ESWITCH_LEGACY); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); - err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + err1 = mlx5_eswitch_enable(esw, MLX5_ESWITCH_OFFLOADS); if (err1) { NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch back to offloads"); @@ -1887,19 +2181,11 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw, void esw_offloads_cleanup(struct mlx5_eswitch *esw) { - u16 num_vfs; - - if (mlx5_core_is_ecpf_esw_manager(esw->dev)) { - mlx5_eq_notifier_unregister(esw->dev, &esw->host_info.nb); - flush_workqueue(esw->work_queue); - num_vfs = esw->host_info.num_vfs; - } else { - num_vfs = esw->dev->priv.sriov.num_vfs; - } - mlx5_rdma_disable_roce(esw->dev); esw_offloads_devcom_cleanup(esw); - esw_offloads_unload_all_reps(esw, num_vfs); + esw_offloads_unload_all_reps(esw); + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_disable_passing_vport_metadata(esw); esw_offloads_steering_cleanup(esw); } @@ -1907,10 +2193,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) { switch (mode) { case DEVLINK_ESWITCH_MODE_LEGACY: - *mlx5_mode = SRIOV_LEGACY; + *mlx5_mode = MLX5_ESWITCH_LEGACY; break; case DEVLINK_ESWITCH_MODE_SWITCHDEV: - *mlx5_mode = SRIOV_OFFLOADS; + *mlx5_mode = MLX5_ESWITCH_OFFLOADS; break; default: return -EINVAL; @@ -1922,10 +2208,10 @@ static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode) { switch (mlx5_mode) { - case SRIOV_LEGACY: + case MLX5_ESWITCH_LEGACY: *mode = DEVLINK_ESWITCH_MODE_LEGACY; break; - case SRIOV_OFFLOADS: + case MLX5_ESWITCH_OFFLOADS: *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV; break; default: @@ -1989,7 +2275,7 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) if(!MLX5_ESWITCH_MANAGER(dev)) return -EPERM; - if (dev->priv.eswitch->mode == SRIOV_NONE && + if (dev->priv.eswitch->mode == MLX5_ESWITCH_NONE && !mlx5_core_is_ecpf_esw_manager(dev)) return -EOPNOTSUPP; @@ -2040,7 +2326,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; - int err, vport; + int err, vport, num_vport; u8 mlx5_mode; err = mlx5_devlink_eswitch_check(devlink); @@ -2069,7 +2355,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (err) goto out; - for (vport = 1; vport < esw->enabled_vports; vport++) { + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); if (err) { NL_SET_ERR_MSG_MOD(extack, @@ -2082,7 +2368,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, return 0; revert_inline_mode: - while (--vport > 0) + num_vport = --vport; + mlx5_esw_for_each_host_func_vport_reverse(esw, vport, num_vport) mlx5_modify_nic_vport_min_inline(dev, vport, esw->offloads.inline_mode); @@ -2103,7 +2390,7 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); } -int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) +int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, u8 *mode) { u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2; struct mlx5_core_dev *dev = esw->dev; @@ -2112,7 +2399,7 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) if (!MLX5_CAP_GEN(dev, vport_group_manager)) return -EOPNOTSUPP; - if (esw->mode == SRIOV_NONE) + if (esw->mode == MLX5_ESWITCH_NONE) return -EOPNOTSUPP; switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { @@ -2127,9 +2414,10 @@ int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode) } query_vports: - for (vport = 1; vport <= nvfs; vport++) { + mlx5_query_nic_vport_min_inline(dev, esw->first_host_vport, &prev_mlx5_mode); + mlx5_esw_for_each_host_func_vport(esw, vport, esw->esw_funcs.num_vfs) { mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode); - if (vport > 1 && prev_mlx5_mode != mlx5_mode) + if (prev_mlx5_mode != mlx5_mode) return -EINVAL; prev_mlx5_mode = mlx5_mode; } @@ -2139,7 +2427,8 @@ out: return 0; } -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, + enum devlink_eswitch_encap_mode encap, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -2158,7 +2447,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) return -EOPNOTSUPP; - if (esw->mode == SRIOV_LEGACY) { + if (esw->mode == MLX5_ESWITCH_LEGACY) { esw->offloads.encap = encap; return 0; } @@ -2188,7 +2477,8 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, return err; } -int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, + enum devlink_eswitch_encap_mode *encap) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -2203,36 +2493,31 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) } void mlx5_eswitch_register_vport_reps(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep_if *__rep_if, + const struct mlx5_eswitch_rep_ops *ops, u8 rep_type) { - struct mlx5_eswitch_rep_if *rep_if; + struct mlx5_eswitch_rep_data *rep_data; struct mlx5_eswitch_rep *rep; int i; + esw->offloads.rep_ops[rep_type] = ops; mlx5_esw_for_all_reps(esw, i, rep) { - rep_if = &rep->rep_if[rep_type]; - rep_if->load = __rep_if->load; - rep_if->unload = __rep_if->unload; - rep_if->get_proto_dev = __rep_if->get_proto_dev; - rep_if->priv = __rep_if->priv; - - atomic_set(&rep_if->state, REP_REGISTERED); + rep_data = &rep->rep_data[rep_type]; + atomic_set(&rep_data->state, REP_REGISTERED); } } EXPORT_SYMBOL(mlx5_eswitch_register_vport_reps); void mlx5_eswitch_unregister_vport_reps(struct mlx5_eswitch *esw, u8 rep_type) { - u16 max_vf = mlx5_core_max_vfs(esw->dev); struct mlx5_eswitch_rep *rep; int i; - if (esw->mode == SRIOV_OFFLOADS) - __unload_reps_all_vport(esw, max_vf, rep_type); + if (esw->mode == MLX5_ESWITCH_OFFLOADS) + __unload_reps_all_vport(esw, rep_type); mlx5_esw_for_all_reps(esw, i, rep) - atomic_set(&rep->rep_if[rep_type].state, REP_UNREGISTERED); + atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); } EXPORT_SYMBOL(mlx5_eswitch_unregister_vport_reps); @@ -2241,7 +2526,7 @@ void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type) struct mlx5_eswitch_rep *rep; rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); - return rep->rep_if[rep_type].priv; + return rep->rep_data[rep_type].priv; } void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, @@ -2252,9 +2537,9 @@ void *mlx5_eswitch_get_proto_dev(struct mlx5_eswitch *esw, rep = mlx5_eswitch_get_rep(esw, vport); - if (atomic_read(&rep->rep_if[rep_type].state) == REP_LOADED && - rep->rep_if[rep_type].get_proto_dev) - return rep->rep_if[rep_type].get_proto_dev(rep); + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + esw->offloads.rep_ops[rep_type]->get_proto_dev) + return esw->offloads.rep_ops[rep_type]->get_proto_dev(rep); return NULL; } EXPORT_SYMBOL(mlx5_eswitch_get_proto_dev); @@ -2271,3 +2556,22 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, return mlx5_eswitch_get_rep(esw, vport); } EXPORT_SYMBOL(mlx5_eswitch_vport_rep); + +bool mlx5_eswitch_is_vf_vport(const struct mlx5_eswitch *esw, u16 vport_num) +{ + return vport_num >= MLX5_VPORT_FIRST_VF && + vport_num <= esw->dev->priv.sriov.max_vfs; +} + +bool mlx5_eswitch_vport_match_metadata_enabled(const struct mlx5_eswitch *esw) +{ + return !!(esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA); +} +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); + +u32 mlx5_eswitch_get_vport_metadata_for_match(const struct mlx5_eswitch *esw, + u16 vport_num) +{ + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport_num; +} +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c new file mode 100644 index 000000000000..1d55a324a17e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include <linux/mlx5/fs.h> +#include "eswitch.h" + +struct mlx5_termtbl_handle { + struct hlist_node termtbl_hlist; + + struct mlx5_flow_table *termtbl; + struct mlx5_flow_act flow_act; + struct mlx5_flow_destination dest; + + struct mlx5_flow_handle *rule; + int ref_count; +}; + +static u32 +mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + u32 hash; + + hash = jhash_1word(flow_act->action, 0); + hash = jhash((const void *)&flow_act->vlan, + sizeof(flow_act->vlan), hash); + hash = jhash((const void *)&dest->vport.num, + sizeof(dest->vport.num), hash); + hash = jhash((const void *)&dest->vport.vhca_id, + sizeof(dest->vport.num), hash); + return hash; +} + +static int +mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, + struct mlx5_flow_destination *dest1, + struct mlx5_flow_act *flow_act2, + struct mlx5_flow_destination *dest2) +{ + return flow_act1->action != flow_act2->action || + dest1->vport.num != dest2->vport.num || + dest1->vport.vhca_id != dest2->vport.vhca_id || + memcmp(&flow_act1->vlan, &flow_act2->vlan, + sizeof(flow_act1->vlan)); +} + +static int +mlx5_eswitch_termtbl_create(struct mlx5_core_dev *dev, + struct mlx5_termtbl_handle *tt, + struct mlx5_flow_act *flow_act) +{ + static const struct mlx5_flow_spec spec = {}; + struct mlx5_flow_namespace *root_ns; + int prio, flags; + int err; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + return -EOPNOTSUPP; + } + + /* As this is the terminating action then the termination table is the + * same prio as the slow path + */ + prio = FDB_SLOW_PATH; + flags = MLX5_FLOW_TABLE_TERMINATION; + tt->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, prio, 1, 1, + 0, flags); + if (IS_ERR(tt->termtbl)) { + esw_warn(dev, "Failed to create termination table\n"); + return -EOPNOTSUPP; + } + + tt->rule = mlx5_add_flow_rules(tt->termtbl, &spec, flow_act, + &tt->dest, 1); + + if (IS_ERR(tt->rule)) { + esw_warn(dev, "Failed to create termination table rule\n"); + goto add_flow_err; + } + return 0; + +add_flow_err: + err = mlx5_destroy_flow_table(tt->termtbl); + if (err) + esw_warn(dev, "Failed to destroy termination table\n"); + + return -EOPNOTSUPP; +} + +static struct mlx5_termtbl_handle * +mlx5_eswitch_termtbl_get_create(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest) +{ + struct mlx5_termtbl_handle *tt; + bool found = false; + u32 hash_key; + int err; + + mutex_lock(&esw->offloads.termtbl_mutex); + + hash_key = mlx5_eswitch_termtbl_hash(flow_act, dest); + hash_for_each_possible(esw->offloads.termtbl_tbl, tt, + termtbl_hlist, hash_key) { + if (!mlx5_eswitch_termtbl_cmp(&tt->flow_act, &tt->dest, + flow_act, dest)) { + found = true; + break; + } + } + if (found) + goto tt_add_ref; + + tt = kzalloc(sizeof(*tt), GFP_KERNEL); + if (!tt) { + err = -ENOMEM; + goto tt_create_err; + } + + tt->dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + tt->dest.vport.num = dest->vport.num; + tt->dest.vport.vhca_id = dest->vport.vhca_id; + memcpy(&tt->flow_act, flow_act, sizeof(*flow_act)); + + err = mlx5_eswitch_termtbl_create(esw->dev, tt, flow_act); + if (err) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto tt_create_err; + } + hash_add(esw->offloads.termtbl_tbl, &tt->termtbl_hlist, hash_key); +tt_add_ref: + tt->ref_count++; + mutex_unlock(&esw->offloads.termtbl_mutex); + return tt; +tt_create_err: + kfree(tt); + mutex_unlock(&esw->offloads.termtbl_mutex); + return ERR_PTR(err); +} + +void +mlx5_eswitch_termtbl_put(struct mlx5_eswitch *esw, + struct mlx5_termtbl_handle *tt) +{ + mutex_lock(&esw->offloads.termtbl_mutex); + if (--tt->ref_count == 0) + hash_del(&tt->termtbl_hlist); + mutex_unlock(&esw->offloads.termtbl_mutex); + + if (!tt->ref_count) { + mlx5_del_flow_rules(tt->rule); + mlx5_destroy_flow_table(tt->termtbl); + kfree(tt); + } +} + +static void +mlx5_eswitch_termtbl_actions_move(struct mlx5_flow_act *src, + struct mlx5_flow_act *dst) +{ + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + memcpy(&dst->vlan[0], &src->vlan[0], sizeof(src->vlan[0])); + memset(&src->vlan[0], 0, sizeof(src->vlan[0])); + + if (!(src->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2)) + return; + + src->action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + dst->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; + memcpy(&dst->vlan[1], &src->vlan[1], sizeof(src->vlan[1])); + memset(&src->vlan[1], 0, sizeof(src->vlan[1])); +} + +bool +mlx5_eswitch_termtbl_required(struct mlx5_eswitch *esw, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_spec *spec) +{ + u32 port_mask = MLX5_GET(fte_match_param, spec->match_criteria, + misc_parameters.source_port); + u32 port_value = MLX5_GET(fte_match_param, spec->match_value, + misc_parameters.source_port); + + if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, termination_table)) + return false; + + /* push vlan on RX */ + return (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) && + ((port_mask & port_value) == MLX5_VPORT_UPLINK); +} + +struct mlx5_flow_handle * +mlx5_eswitch_add_termtbl_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_table *fdb, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int num_dest) +{ + struct mlx5_flow_act term_tbl_act = {}; + struct mlx5_flow_handle *rule = NULL; + bool term_table_created = false; + int num_vport_dests = 0; + int i, curr_dest; + + mlx5_eswitch_termtbl_actions_move(flow_act, &term_tbl_act); + term_tbl_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + + for (i = 0; i < num_dest; i++) { + struct mlx5_termtbl_handle *tt; + + /* only vport destinations can be terminated */ + if (dest[i].type != MLX5_FLOW_DESTINATION_TYPE_VPORT) + continue; + + /* get the terminating table for the action list */ + tt = mlx5_eswitch_termtbl_get_create(esw, &term_tbl_act, + &dest[i]); + if (IS_ERR(tt)) { + esw_warn(esw->dev, "Failed to create termination table\n"); + goto revert_changes; + } + attr->dests[num_vport_dests].termtbl = tt; + num_vport_dests++; + + /* link the destination with the termination table */ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = tt->termtbl; + term_table_created = true; + } + + /* at least one destination should reference a termination table */ + if (!term_table_created) + goto revert_changes; + + /* create the FTE */ + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); + if (IS_ERR(rule)) + goto revert_changes; + + goto out; + +revert_changes: + /* revert the changes that were made to the original flow_act + * and fall-back to the original rule actions + */ + mlx5_eswitch_termtbl_actions_move(&term_tbl_act, flow_act); + + for (curr_dest = 0; curr_dest < num_vport_dests; curr_dest++) { + struct mlx5_termtbl_handle *tt = attr->dests[curr_dest].termtbl; + + /* search for the destination associated with the + * current term table + */ + for (i = 0; i < num_dest; i++) { + if (dest[i].ft != tt->termtbl) + continue; + + memset(&dest[i], 0, sizeof(dest[i])); + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = tt->dest.vport.num; + dest[i].vport.vhca_id = tt->dest.vport.vhca_id; + mlx5_eswitch_termtbl_put(esw, tt); + break; + } + } + rule = mlx5_add_flow_rules(fdb, spec, flow_act, dest, num_dest); +out: + return rule; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index a81e8d2168d8..8bcf3426b9c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -108,8 +108,8 @@ static const char *eqe_type_str(u8 type) return "MLX5_EVENT_TYPE_STALL_EVENT"; case MLX5_EVENT_TYPE_CMD: return "MLX5_EVENT_TYPE_CMD"; - case MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE: - return "MLX5_EVENT_TYPE_HOST_PARAMS_CHANGE"; + case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED: + return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED"; case MLX5_EVENT_TYPE_PAGE_REQUEST: return "MLX5_EVENT_TYPE_PAGE_REQUEST"; case MLX5_EVENT_TYPE_PAGE_FAULT: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index ca2296a2f9ee..4c50efe4e7f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -414,7 +414,8 @@ static void mlx5_fpga_conn_cq_tasklet(unsigned long data) mlx5_fpga_conn_cqes(conn, MLX5_FPGA_CQ_BUDGET); } -static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq) +static void mlx5_fpga_conn_cq_complete(struct mlx5_core_cq *mcq, + struct mlx5_eqe *eqe) { struct mlx5_fpga_conn *conn; @@ -429,6 +430,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_fpga_device *fdev = conn->fdev; struct mlx5_core_dev *mdev = fdev->mdev; u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; @@ -476,7 +478,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas); - err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen); + err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen, out, sizeof(out)); kvfree(in); if (err) @@ -867,7 +869,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, conn->cb_arg = attr->cb_arg; remote_mac = MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, remote_mac_47_32); - err = mlx5_query_nic_vport_mac_address(fdev->mdev, 0, remote_mac); + err = mlx5_query_mac_address(fdev->mdev, remote_mac); if (err) { mlx5_fpga_err(fdev, "Failed to query local MAC: %d\n", err); ret = ERR_PTR(err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index 52c47d3dd5a5..c76da309506b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -636,7 +636,8 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, u8 match_criteria_enable, const u32 *match_c, const u32 *match_v, - struct mlx5_flow_act *flow_act) + struct mlx5_flow_act *flow_act, + struct mlx5_flow_context *flow_context) { const void *outer_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); @@ -655,7 +656,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - (flow_act->flags & FLOW_ACT_HAS_TAG)) + (flow_context->flags & FLOW_CONTEXT_HAS_TAG)) return false; return true; @@ -767,7 +768,8 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev, fg->mask.match_criteria_enable, fg->mask.match_criteria, fte->val, - &fte->action)) + &fte->action, + &fte->flow_context)) return ERR_PTR(-EINVAL); else if (!mlx5_is_fpga_ipsec_rule(mdev, fg->mask.match_criteria_enable, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h index 2b5e63b0d4d6..382985e65b48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -37,8 +37,6 @@ #include "accel/ipsec.h" #include "fs_cmd.h" -#ifdef CONFIG_MLX5_FPGA - u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev); int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters, @@ -66,77 +64,4 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, const struct mlx5_flow_cmds * mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); -#else - -static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline unsigned int -mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, - u64 *counters) -{ - return 0; -} - -static inline void * -mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev, - struct mlx5_accel_esp_xfrm *accel_xfrm, - const __be32 saddr[4], - const __be32 daddr[4], - const __be32 spi, bool is_ipv6) -{ - return NULL; -} - -static inline void mlx5_fpga_ipsec_delete_sa_ctx(void *context) -{ -} - -static inline int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev) -{ - return 0; -} - -static inline void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev) -{ -} - -static inline void mlx5_fpga_ipsec_build_fs_cmds(void) -{ -} - -static inline struct mlx5_accel_esp_xfrm * -mlx5_fpga_esp_create_xfrm(struct mlx5_core_dev *mdev, - const struct mlx5_accel_esp_xfrm_attrs *attrs, - u32 flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void mlx5_fpga_esp_destroy_xfrm(struct mlx5_accel_esp_xfrm *xfrm) -{ -} - -static inline int -mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm, - const struct mlx5_accel_esp_xfrm_attrs *attrs) -{ - return -EOPNOTSUPP; -} - -static inline const struct mlx5_flow_cmds * -mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) -{ - return mlx5_fs_cmd_get_default(type); -} - -#endif /* CONFIG_MLX5_FPGA */ - #endif /* __MLX5_FPGA_SADB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 013b1ca4a791..7ac1249eadc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -147,6 +147,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, { int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; struct mlx5_core_dev *dev = ns->dev; @@ -167,6 +168,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, en_decap); MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, en_encap); + MLX5_SET(create_flow_table_in, in, flow_table_context.termination_table, + term); switch (ft->op_mod) { case FS_FT_OP_MOD_NORMAL: @@ -393,7 +396,11 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); - MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_tag, + fte->flow_context.flow_tag); + MLX5_SET(flow_context, in_flow_context, flow_source, + fte->flow_context.flow_source); + MLX5_SET(flow_context, in_flow_context, extended_destination, extended_dest); if (extended_dest) { @@ -768,6 +775,10 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); table_type = FS_FT_NIC_TX; break; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions); + table_type = FS_FT_ESW_INGRESS_ACL; + break; default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index fe76c6fd6d80..3e99799bdb40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -584,7 +584,7 @@ err_ida_remove: } static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act) { struct mlx5_flow_steering *steering = get_steering(&ft->node); @@ -594,9 +594,10 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, if (!fte) return ERR_PTR(-ENOMEM); - memcpy(fte->val, match_value, sizeof(fte->val)); + memcpy(fte->val, &spec->match_value, sizeof(fte->val)); fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; + fte->flow_context = spec->flow_context; tree_init_node(&fte->node, NULL, del_sw_fte); @@ -612,7 +613,7 @@ static void dealloc_flow_group(struct mlx5_flow_steering *steering, static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index) { @@ -642,7 +643,7 @@ static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steer static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft, u8 match_criteria_enable, - void *match_criteria, + const void *match_criteria, int start_index, int end_index, struct list_head *prev) @@ -1285,7 +1286,7 @@ free_handle: } static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct list_head *prev = &ft->node.children; struct mlx5_flow_group *fg; @@ -1430,7 +1431,9 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; } -static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +static int check_conflicting_ftes(struct fs_fte *fte, + const struct mlx5_flow_context *flow_context, + const struct mlx5_flow_act *flow_act) { if (check_conflicting_actions(flow_act->action, fte->action.action)) { mlx5_core_warn(get_dev(&fte->node), @@ -1438,12 +1441,12 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if ((flow_act->flags & FLOW_ACT_HAS_TAG) && - fte->action.flow_tag != flow_act->flow_tag) { + if ((flow_context->flags & FLOW_CONTEXT_HAS_TAG) && + fte->flow_context.flow_tag != flow_context->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", - fte->action.flow_tag, - flow_act->flow_tag); + fte->flow_context.flow_tag, + flow_context->flow_tag); return -EEXIST; } @@ -1451,7 +1454,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act } static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, - u32 *match_value, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1462,7 +1465,7 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, int i; int ret; - ret = check_conflicting_ftes(fte, flow_act); + ret = check_conflicting_ftes(fte, &spec->flow_context, flow_act); if (ret) return ERR_PTR(ret); @@ -1536,7 +1539,7 @@ static void free_match_list(struct match_list_head *head) static int build_match_list(struct match_list_head *match_head, struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec) + const struct mlx5_flow_spec *spec) { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; @@ -1589,7 +1592,7 @@ static u64 matched_fgs_get_version(struct list_head *match_head) static struct fs_fte * lookup_fte_locked(struct mlx5_flow_group *g, - u32 *match_value, + const u32 *match_value, bool take_write) { struct fs_fte *fte_tmp; @@ -1622,7 +1625,7 @@ out: static struct mlx5_flow_handle * try_add_to_existing_fg(struct mlx5_flow_table *ft, struct list_head *match_head, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num, @@ -1637,7 +1640,7 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, u64 version; int err; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) return ERR_PTR(-ENOMEM); @@ -1653,8 +1656,7 @@ search_again_locked: fte_tmp = lookup_fte_locked(g, spec->match_value, take_write); if (!fte_tmp) continue; - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte_tmp); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte_tmp); up_write_ref_node(&fte_tmp->node, false); tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); @@ -1701,8 +1703,7 @@ skip_search: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); return rule; @@ -1715,7 +1716,7 @@ out: static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int dest_num) @@ -1788,7 +1789,7 @@ search_again_locked: if (err) goto err_release_fg; - fte = alloc_fte(ft, spec->match_value, flow_act); + fte = alloc_fte(ft, spec, flow_act); if (IS_ERR(fte)) { err = PTR_ERR(fte); goto err_release_fg; @@ -1802,8 +1803,7 @@ search_again_locked: nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); up_write_ref_node(&g->node, false); - rule = add_rule_fg(g, spec->match_value, flow_act, dest, - dest_num, fte); + rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); tree_put_node(&g->node, false); @@ -1823,7 +1823,7 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft) struct mlx5_flow_handle * mlx5_add_flow_rules(struct mlx5_flow_table *ft, - struct mlx5_flow_spec *spec, + const struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, int num_dest) @@ -2092,7 +2092,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d { struct mlx5_flow_steering *steering = dev->priv.steering; - if (!steering || vport >= MLX5_TOTAL_VPORTS(dev)) + if (!steering || vport >= mlx5_eswitch_get_total_vports(dev)) return NULL; switch (type) { @@ -2423,7 +2423,7 @@ static void cleanup_egress_acls_root_ns(struct mlx5_core_dev *dev) if (!steering->esw_egress_root_ns) return; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) cleanup_root_ns(steering->esw_egress_root_ns[i]); kfree(steering->esw_egress_root_ns); @@ -2438,7 +2438,7 @@ static void cleanup_ingress_acls_root_ns(struct mlx5_core_dev *dev) if (!steering->esw_ingress_root_ns) return; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) + for (i = 0; i < mlx5_eswitch_get_total_vports(dev); i++) cleanup_root_ns(steering->esw_ingress_root_ns[i]); kfree(steering->esw_ingress_root_ns); @@ -2606,16 +2606,18 @@ static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vpo static int init_egress_acls_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; - steering->esw_egress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), - sizeof(*steering->esw_egress_root_ns), - GFP_KERNEL); + steering->esw_egress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_egress_root_ns), + GFP_KERNEL); if (!steering->esw_egress_root_ns) return -ENOMEM; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + for (i = 0; i < total_vports; i++) { err = init_egress_acl_root_ns(steering, i); if (err) goto cleanup_root_ns; @@ -2634,16 +2636,18 @@ cleanup_root_ns: static int init_ingress_acls_root_ns(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); int err; int i; - steering->esw_ingress_root_ns = kcalloc(MLX5_TOTAL_VPORTS(dev), - sizeof(*steering->esw_ingress_root_ns), - GFP_KERNEL); + steering->esw_ingress_root_ns = + kcalloc(total_vports, + sizeof(*steering->esw_ingress_root_ns), + GFP_KERNEL); if (!steering->esw_ingress_root_ns) return -ENOMEM; - for (i = 0; i < MLX5_TOTAL_VPORTS(dev); i++) { + for (i = 0; i < total_vports; i++) { err = init_ingress_acl_root_ns(steering, i); if (err) goto cleanup_root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index a08c3d09a50f..c48c382f926f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -170,6 +170,7 @@ struct fs_fte { u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 index; + struct mlx5_flow_context flow_context; struct mlx5_flow_act action; enum fs_fte_status status; struct mlx5_fc *counter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index c6c28f56aa29..b3762123a69c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -102,13 +102,15 @@ static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev, struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; unsigned long next_id = (unsigned long)id + 1; struct mlx5_fc *counter; + unsigned long tmp; rcu_read_lock(); /* skip counters that are in idr, but not yet in counters list */ - while ((counter = idr_get_next_ul(&fc_stats->counters_idr, - &next_id)) != NULL && - list_empty(&counter->list)) - next_id++; + idr_for_each_entry_continue_ul(&fc_stats->counters_idr, + counter, tmp, next_id) { + if (!list_empty(&counter->list)) + break; + } rcu_read_unlock(); return counter ? &counter->list : &fc_stats->counters; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 1ab6f7e3bec6..a19790dee7b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -37,6 +37,37 @@ #include "mlx5_core.h" #include "../../mlxfw/mlxfw.h" +enum { + MCQS_IDENTIFIER_BOOT_IMG = 0x1, + MCQS_IDENTIFIER_OEM_NVCONFIG = 0x4, + MCQS_IDENTIFIER_MLNX_NVCONFIG = 0x5, + MCQS_IDENTIFIER_CS_TOKEN = 0x6, + MCQS_IDENTIFIER_DBG_TOKEN = 0x7, + MCQS_IDENTIFIER_GEARBOX = 0xA, +}; + +enum { + MCQS_UPDATE_STATE_IDLE, + MCQS_UPDATE_STATE_IN_PROGRESS, + MCQS_UPDATE_STATE_APPLIED, + MCQS_UPDATE_STATE_ACTIVE, + MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET, + MCQS_UPDATE_STATE_FAILED, + MCQS_UPDATE_STATE_CANCELED, + MCQS_UPDATE_STATE_BUSY, +}; + +enum { + MCQI_INFO_TYPE_CAPABILITIES = 0x0, + MCQI_INFO_TYPE_VERSION = 0x1, + MCQI_INFO_TYPE_ACTIVATION_METHOD = 0x5, +}; + +enum { + MCQI_FW_RUNNING_VERSION = 0, + MCQI_FW_STORED_VERSION = 1, +}; + static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) { @@ -202,6 +233,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, event_cap)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_EVENT); + if (err) + return err; + } + + if (MLX5_CAP_GEN(dev, tls)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_TLS); + if (err) + return err; + } + return 0; } @@ -392,33 +435,49 @@ static int mlx5_reg_mcda_set(struct mlx5_core_dev *dev, } static int mlx5_reg_mcqi_query(struct mlx5_core_dev *dev, - u16 component_index, - u32 *max_component_size, - u8 *log_mcda_word_size, - u16 *mcda_max_write_size) + u16 component_index, bool read_pending, + u8 info_type, u16 data_size, void *mcqi_data) { - u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_ST_SZ_DW(mcqi_cap)]; - int offset = MLX5_ST_SZ_DW(mcqi_reg); - u32 in[MLX5_ST_SZ_DW(mcqi_reg)]; + u32 out[MLX5_ST_SZ_DW(mcqi_reg) + MLX5_UN_SZ_DW(mcqi_reg_data)] = {}; + u32 in[MLX5_ST_SZ_DW(mcqi_reg)] = {}; + void *data; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(mcqi_reg, in, component_index, component_index); - MLX5_SET(mcqi_reg, in, data_size, MLX5_ST_SZ_BYTES(mcqi_cap)); + MLX5_SET(mcqi_reg, in, read_pending_component, read_pending); + MLX5_SET(mcqi_reg, in, info_type, info_type); + MLX5_SET(mcqi_reg, in, data_size, data_size); err = mlx5_core_access_reg(dev, in, sizeof(in), out, - sizeof(out), MLX5_REG_MCQI, 0, 0); + MLX5_ST_SZ_BYTES(mcqi_reg) + data_size, + MLX5_REG_MCQI, 0, 0); if (err) - goto out; + return err; - *max_component_size = MLX5_GET(mcqi_cap, out + offset, max_component_size); - *log_mcda_word_size = MLX5_GET(mcqi_cap, out + offset, log_mcda_word_size); - *mcda_max_write_size = MLX5_GET(mcqi_cap, out + offset, mcda_max_write_size); + data = MLX5_ADDR_OF(mcqi_reg, out, data); + memcpy(mcqi_data, data, data_size); -out: - return err; + return 0; +} + +static int mlx5_reg_mcqi_caps_query(struct mlx5_core_dev *dev, u16 component_index, + u32 *max_component_size, u8 *log_mcda_word_size, + u16 *mcda_max_write_size) +{ + u32 mcqi_reg[MLX5_ST_SZ_DW(mcqi_cap)] = {}; + int err; + + err = mlx5_reg_mcqi_query(dev, component_index, 0, + MCQI_INFO_TYPE_CAPABILITIES, + MLX5_ST_SZ_BYTES(mcqi_cap), mcqi_reg); + if (err) + return err; + + *max_component_size = MLX5_GET(mcqi_cap, mcqi_reg, max_component_size); + *log_mcda_word_size = MLX5_GET(mcqi_cap, mcqi_reg, log_mcda_word_size); + *mcda_max_write_size = MLX5_GET(mcqi_cap, mcqi_reg, mcda_max_write_size); + + return 0; } struct mlx5_mlxfw_dev { @@ -434,8 +493,13 @@ static int mlx5_component_query(struct mlxfw_dev *mlxfw_dev, container_of(mlxfw_dev, struct mlx5_mlxfw_dev, mlxfw_dev); struct mlx5_core_dev *dev = mlx5_mlxfw_dev->mlx5_core_dev; - return mlx5_reg_mcqi_query(dev, component_index, p_max_size, - p_align_bits, p_max_write_size); + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi)) { + mlx5_core_warn(dev, "caps query isn't supported by running FW\n"); + return -EOPNOTSUPP; + } + + return mlx5_reg_mcqi_caps_query(dev, component_index, p_max_size, + p_align_bits, p_max_write_size); } static int mlx5_fsm_lock(struct mlxfw_dev *mlxfw_dev, u32 *fwhandle) @@ -552,7 +616,8 @@ static const struct mlxfw_dev_ops mlx5_mlxfw_dev_ops = { }; int mlx5_firmware_flash(struct mlx5_core_dev *dev, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlx5_mlxfw_dev mlx5_mlxfw_dev = { .mlxfw_dev = { @@ -571,5 +636,133 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, return -EOPNOTSUPP; } - return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, firmware); + return mlxfw_firmware_flash(&mlx5_mlxfw_dev.mlxfw_dev, + firmware, extack); +} + +static int mlx5_reg_mcqi_version_query(struct mlx5_core_dev *dev, + u16 component_index, bool read_pending, + u32 *mcqi_version_out) +{ + return mlx5_reg_mcqi_query(dev, component_index, read_pending, + MCQI_INFO_TYPE_VERSION, + MLX5_ST_SZ_BYTES(mcqi_version), + mcqi_version_out); +} + +static int mlx5_reg_mcqs_query(struct mlx5_core_dev *dev, u32 *out, + u16 component_index) +{ + u8 out_sz = MLX5_ST_SZ_BYTES(mcqs_reg); + u32 in[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + int err; + + memset(out, 0, out_sz); + + MLX5_SET(mcqs_reg, in, component_index, component_index); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + out_sz, MLX5_REG_MCQS, 0, 0); + return err; +} + +/* scans component index sequentially, to find the boot img index */ +static int mlx5_get_boot_img_component_index(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)] = {}; + u16 identifier, component_idx = 0; + bool quit; + int err; + + do { + err = mlx5_reg_mcqs_query(dev, out, component_idx); + if (err) + return err; + + identifier = MLX5_GET(mcqs_reg, out, identifier); + quit = !!MLX5_GET(mcqs_reg, out, last_index_flag); + quit |= identifier == MCQS_IDENTIFIER_BOOT_IMG; + } while (!quit && ++component_idx); + + if (identifier != MCQS_IDENTIFIER_BOOT_IMG) { + mlx5_core_warn(dev, "mcqs: can't find boot_img component ix, last scanned idx %d\n", + component_idx); + return -EOPNOTSUPP; + } + + return component_idx; +} + +static int +mlx5_fw_image_pending(struct mlx5_core_dev *dev, + int component_index, + bool *pending_version_exists) +{ + u32 out[MLX5_ST_SZ_DW(mcqs_reg)]; + u8 component_update_state; + int err; + + err = mlx5_reg_mcqs_query(dev, out, component_index); + if (err) + return err; + + component_update_state = MLX5_GET(mcqs_reg, out, component_update_state); + + if (component_update_state == MCQS_UPDATE_STATE_IDLE) { + *pending_version_exists = false; + } else if (component_update_state == MCQS_UPDATE_STATE_ACTIVE_PENDING_RESET) { + *pending_version_exists = true; + } else { + mlx5_core_warn(dev, + "mcqs: can't read pending fw version while fw state is %d\n", + component_update_state); + return -ENODATA; + } + return 0; +} + +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *pending_ver) +{ + u32 reg_mcqi_version[MLX5_ST_SZ_DW(mcqi_version)] = {}; + bool pending_version_exists; + int component_index; + int err; + + if (!MLX5_CAP_GEN(dev, mcam_reg) || !MLX5_CAP_MCAM_REG(dev, mcqi) || + !MLX5_CAP_MCAM_REG(dev, mcqs)) { + mlx5_core_warn(dev, "fw query isn't supported by the FW\n"); + return -EOPNOTSUPP; + } + + component_index = mlx5_get_boot_img_component_index(dev); + if (component_index < 0) + return component_index; + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_RUNNING_VERSION, + reg_mcqi_version); + if (err) + return err; + + *running_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + err = mlx5_fw_image_pending(dev, component_index, &pending_version_exists); + if (err) + return err; + + if (!pending_version_exists) { + *pending_ver = 0; + return 0; + } + + err = mlx5_reg_mcqi_version_query(dev, component_index, + MCQI_FW_STORED_VERSION, + reg_mcqi_version); + if (err) + return err; + + *pending_ver = MLX5_GET(mcqi_version, reg_mcqi_version, version); + + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index a2656f4008d9..2fe6923f7ce0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -40,6 +40,8 @@ #include "mlx5_core.h" #include "lib/eq.h" #include "lib/mlx5.h" +#include "lib/pci_vsc.h" +#include "diag/fw_tracer.h" enum { MLX5_HEALTH_POLL_INTERVAL = 2 * HZ, @@ -62,12 +64,20 @@ enum { enum { MLX5_DROP_NEW_HEALTH_WORK, - MLX5_DROP_NEW_RECOVERY_WORK, +}; + +enum { + MLX5_SENSOR_NO_ERR = 0, + MLX5_SENSOR_PCI_COMM_ERR = 1, + MLX5_SENSOR_PCI_ERR = 2, + MLX5_SENSOR_NIC_DISABLED = 3, + MLX5_SENSOR_NIC_SW_RESET = 4, + MLX5_SENSOR_FW_SYND_RFR = 5, }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) { - return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; + return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7; } void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) @@ -80,18 +90,105 @@ void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) &dev->iseg->cmdq_addr_l_sz); } -static int in_fatal(struct mlx5_core_dev *dev) +static bool sensor_pci_not_working(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; + /* Offline PCI reads return 0xffffffff */ + return (ioread32be(&h->fw_ver) == 0xffffffff); +} + +static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET; + u8 synd = ioread8(&h->synd); + + if (rfr && synd) + mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd); + return rfr && synd; +} + +static u32 check_fatal_sensors(struct mlx5_core_dev *dev) +{ + if (sensor_pci_not_working(dev)) + return MLX5_SENSOR_PCI_COMM_ERR; + if (pci_channel_offline(dev->pdev)) + return MLX5_SENSOR_PCI_ERR; if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) - return 1; + return MLX5_SENSOR_NIC_DISABLED; + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET) + return MLX5_SENSOR_NIC_SW_RESET; + if (sensor_fw_synd_rfr(dev)) + return MLX5_SENSOR_FW_SYND_RFR; - if (ioread32be(&h->fw_ver) == 0xffffffff) - return 1; + return MLX5_SENSOR_NO_ERR; +} - return 0; +static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock) +{ + enum mlx5_vsc_state state; + int ret; + + if (!mlx5_core_is_pf(dev)) + return -EBUSY; + + /* Try to lock GW access, this stage doesn't return + * EBUSY because locked GW does not mean that other PF + * already started the reset. + */ + ret = mlx5_vsc_gw_lock(dev); + if (ret == -EBUSY) + return -EINVAL; + if (ret) + return ret; + + state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK; + /* At this stage, if the return status == EBUSY, then we know + * for sure that another PF started the reset, so don't allow + * another reset. + */ + ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state); + if (ret) + mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n"); + + /* Unlock GW access */ + mlx5_vsc_gw_unlock(dev); + + return ret; +} + +static bool reset_fw_if_needed(struct mlx5_core_dev *dev) +{ + bool supported = (ioread32be(&dev->iseg->initializing) >> + MLX5_FW_RESET_SUPPORTED_OFFSET) & 1; + u32 fatal_error; + + if (!supported) + return false; + + /* The reset only needs to be issued by one PF. The health buffer is + * shared between all functions, and will be cleared during a reset. + * Check again to avoid a redundant 2nd reset. If the fatal erros was + * PCI related a reset won't help. + */ + fatal_error = check_fatal_sensors(dev); + if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR || + fatal_error == MLX5_SENSOR_NIC_DISABLED || + fatal_error == MLX5_SENSOR_NIC_SW_RESET) { + mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help."); + return false; + } + + mlx5_core_warn(dev, "Issuing FW Reset\n"); + /* Write the NIC interface field to initiate the reset, the command + * interface address also resides here, don't overwrite it. + */ + mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET); + + return true; } void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) @@ -99,14 +196,65 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) mutex_lock(&dev->intf_state_mutex); if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) goto unlock; + if (dev->state == MLX5_DEVICE_STATE_UNINITIALIZED) { + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + goto unlock; + } - mlx5_core_err(dev, "start\n"); - if (pci_channel_offline(dev->pdev) || in_fatal(dev) || force) { + if (check_fatal_sensors(dev) || force) { dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_cmd_flush(dev); } mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +unlock: + mutex_unlock(&dev->intf_state_mutex); +} + +#define MLX5_CRDUMP_WAIT_MS 60000 +#define MLX5_FW_RESET_WAIT_MS 1000 +void mlx5_error_sw_reset(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = MLX5_FW_RESET_WAIT_MS; + int lock = -EBUSY; + + mutex_lock(&dev->intf_state_mutex); + if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto unlock; + + mlx5_core_err(dev, "start\n"); + + if (check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) { + /* Get cr-dump and reset FW semaphore */ + lock = lock_sem_sw_reset(dev, true); + + if (lock == -EBUSY) { + delay_ms = MLX5_CRDUMP_WAIT_MS; + goto recover_from_sw_reset; + } + /* Execute SW reset */ + reset_fw_if_needed(dev); + } + +recover_from_sw_reset: + /* Recover from SW reset */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + cond_resched(); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + } + + /* Release FW semaphore if you are the lock owner */ + if (!lock) + lock_sem_sw_reset(dev, false); + mlx5_core_err(dev, "end\n"); unlock: @@ -129,6 +277,20 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) case MLX5_NIC_IFC_NO_DRAM_NIC: mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n"); break; + + case MLX5_NIC_IFC_SW_RESET: + /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases: + * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded + * and this is a VF), this is not recoverable by SW reset. + * Logging of this is handled elsewhere. + * 2. FW reset has been issued by another function, driver can + * be reloaded to recover after the mode switches to + * MLX5_NIC_IFC_DISABLED. + */ + if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR) + mlx5_core_warn(dev, "NIC SW reset in progress\n"); + break; + default: mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n", nic_interface); @@ -137,52 +299,32 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } -static void health_recover(struct work_struct *work) -{ - struct mlx5_core_health *health; - struct delayed_work *dwork; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - u8 nic_state; - - dwork = container_of(work, struct delayed_work, work); - health = container_of(dwork, struct mlx5_core_health, recover_work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); - - nic_state = mlx5_get_nic_state(dev); - if (nic_state == MLX5_NIC_IFC_INVALID) { - mlx5_core_err(dev, "health recovery flow aborted since the nic state is invalid\n"); - return; - } - - mlx5_core_err(dev, "starting health recovery flow\n"); - mlx5_recover_device(dev); -} - /* How much time to wait until health resetting the driver (in msecs) */ -#define MLX5_RECOVERY_DELAY_MSECS 60000 -static void health_care(struct work_struct *work) +#define MLX5_RECOVERY_WAIT_MSECS 60000 +static int mlx5_health_try_recover(struct mlx5_core_dev *dev) { - unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); - struct mlx5_core_health *health; - struct mlx5_core_dev *dev; - struct mlx5_priv *priv; - unsigned long flags; + unsigned long end; - health = container_of(work, struct mlx5_core_health, work); - priv = container_of(health, struct mlx5_priv, health); - dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); + end = jiffies + msecs_to_jiffies(MLX5_RECOVERY_WAIT_MSECS); + while (sensor_pci_not_working(dev)) { + if (time_after(jiffies, end)) { + mlx5_core_err(dev, + "health recovery flow aborted, PCI reads still not working\n"); + return -EIO; + } + msleep(100); + } - spin_lock_irqsave(&health->wq_lock, flags); - if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) - schedule_delayed_work(&health->recover_work, recover_delay); - else - mlx5_core_err(dev, - "new health works are not permitted at this stage\n"); - spin_unlock_irqrestore(&health->wq_lock, flags); + mlx5_core_err(dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); + if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state) || + check_fatal_sensors(dev)) { + mlx5_core_err(dev, "health recovery failed\n"); + return -EIO; + } + return 0; } static const char *hsynd_str(u8 synd) @@ -246,6 +388,282 @@ static void print_health_info(struct mlx5_core_dev *dev) mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw); } +static int +mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u8 synd; + int err; + + synd = ioread8(&h->synd); + err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd); + if (err || !synd) + return err; + return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd)); +} + +struct mlx5_fw_reporter_ctx { + u8 err_synd; + int miss_counter; +}; + +static int +mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg, + struct mlx5_fw_reporter_ctx *fw_reporter_ctx) +{ + int err; + + err = devlink_fmsg_u8_pair_put(fmsg, "syndrome", + fw_reporter_ctx->err_synd); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter", + fw_reporter_ctx->miss_counter); + if (err) + return err; + return 0; +} + +static int +mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + int err; + int i; + + if (!ioread8(&h->synd)) + return 0; + + err = devlink_fmsg_pair_nest_start(fmsg, "health buffer"); + if (err) + return err; + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var"); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) { + err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i)); + if (err) + return err; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr", + ioread32be(&h->assert_exit_ptr)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra", + ioread32be(&h->assert_callra)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index", + ioread8(&h->irisc_index)); + if (err) + return err; + err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd", + ioread16be(&h->ext_synd)); + if (err) + return err; + err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver", + ioread32be(&h->fw_ver)); + if (err) + return err; + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + return devlink_fmsg_pair_nest_end(fmsg); +} + +static int +mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + int err; + + err = mlx5_fw_tracer_trigger_core_dump_general(dev); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + return err; + } + + err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg); + if (err) + return err; + return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg); +} + +static void mlx5_fw_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + + health = container_of(work, struct mlx5_core_health, report_work); + + if (IS_ERR_OR_NULL(health->fw_reporter)) + return; + + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + if (fw_reporter_ctx.err_synd) { + devlink_health_report(health->fw_reporter, + "FW syndrom reported", &fw_reporter_ctx); + return; + } + if (fw_reporter_ctx.miss_counter) + devlink_health_report(health->fw_reporter, + "FW miss counter reported", + &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = { + .name = "fw", + .diagnose = mlx5_fw_reporter_diagnose, + .dump = mlx5_fw_reporter_dump, +}; + +static int +mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, + void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_health_try_recover(dev); +} + +#define MLX5_CR_DUMP_CHUNK_SIZE 256 +static int +mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + u32 crdump_size = dev->priv.health.crdump_size; + u32 *cr_data; + u32 data_size; + u32 offset; + int err; + + if (!mlx5_core_is_pf(dev)) + return -EPERM; + + cr_data = kvmalloc(crdump_size, GFP_KERNEL); + if (!cr_data) + return -ENOMEM; + err = mlx5_crdump_collect(dev, cr_data); + if (err) + return err; + + if (priv_ctx) { + struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx; + + err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx); + if (err) + goto free_data; + } + + err = devlink_fmsg_arr_pair_nest_start(fmsg, "crdump_data"); + if (err) + goto free_data; + for (offset = 0; offset < crdump_size; offset += data_size) { + if (crdump_size - offset < MLX5_CR_DUMP_CHUNK_SIZE) + data_size = crdump_size - offset; + else + data_size = MLX5_CR_DUMP_CHUNK_SIZE; + err = devlink_fmsg_binary_put(fmsg, cr_data, data_size); + if (err) + goto free_data; + } + err = devlink_fmsg_arr_pair_nest_end(fmsg); + +free_data: + kfree(cr_data); + return err; +} + +static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) +{ + struct mlx5_fw_reporter_ctx fw_reporter_ctx; + struct mlx5_core_health *health; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + + health = container_of(work, struct mlx5_core_health, fatal_report_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + mlx5_enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + if (mlx5_health_try_recover(dev)) + mlx5_core_err(dev, "health recovery failed\n"); + return; + } + fw_reporter_ctx.err_synd = health->synd; + fw_reporter_ctx.miss_counter = health->miss_counter; + devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx); +} + +static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, + .dump = mlx5_fw_fatal_reporter_dump, +}; + +#define MLX5_REPORTER_FW_GRACEFUL_PERIOD 1200000 +static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->fw_reporter = + devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops, + 0, false, dev); + if (IS_ERR(health->fw_reporter)) + mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", + PTR_ERR(health->fw_reporter)); + + health->fw_fatal_reporter = + devlink_health_reporter_create(devlink, + &mlx5_fw_fatal_reporter_ops, + MLX5_REPORTER_FW_GRACEFUL_PERIOD, + true, dev); + if (IS_ERR(health->fw_fatal_reporter)) + mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", + PTR_ERR(health->fw_fatal_reporter)); +} + +static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->fw_reporter)) + devlink_health_reporter_destroy(health->fw_reporter); + + if (!IS_ERR_OR_NULL(health->fw_fatal_reporter)) + devlink_health_reporter_destroy(health->fw_fatal_reporter); +} + static unsigned long get_next_poll_jiffies(void) { unsigned long next; @@ -264,7 +682,7 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) - queue_work(health->wq, &health->work); + queue_work(health->wq, &health->fatal_report_work); else mlx5_core_err(dev, "new health works are not permitted at this stage\n"); spin_unlock_irqrestore(&health->wq_lock, flags); @@ -274,6 +692,9 @@ static void poll_health(struct timer_list *t) { struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer); struct mlx5_core_health *health = &dev->priv.health; + struct health_buffer __iomem *h = health->health; + u32 fatal_error; + u8 prev_synd; u32 count; if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -289,10 +710,19 @@ static void poll_health(struct timer_list *t) if (health->miss_counter == MAX_MISSES) { mlx5_core_err(dev, "device's health compromised - reached miss count\n"); print_health_info(dev); + queue_work(health->wq, &health->report_work); } - if (in_fatal(dev) && !health->sick) { - health->sick = true; + prev_synd = health->synd; + health->synd = ioread8(&h->synd); + if (health->synd && health->synd != prev_synd) + queue_work(health->wq, &health->report_work); + + fatal_error = check_fatal_sensors(dev); + + if (fatal_error && !health->fatal_error) { + mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); + dev->priv.health.fatal_error = fatal_error; print_health_info(dev); mlx5_trigger_health_work(dev); } @@ -306,9 +736,8 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; timer_setup(&health->timer, poll_health, 0); - health->sick = 0; + health->fatal_error = MLX5_SENSOR_NO_ERR; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -324,7 +753,6 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) if (disable_health) { spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); } @@ -338,21 +766,9 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&health->recover_work); - cancel_work_sync(&health->work); -} - -void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) -{ - struct mlx5_core_health *health = &dev->priv.health; - unsigned long flags; - - spin_lock_irqsave(&health->wq_lock, flags); - set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); - spin_unlock_irqrestore(&health->wq_lock, flags); - cancel_delayed_work_sync(&dev->priv.health.recover_work); + cancel_work_sync(&health->report_work); + cancel_work_sync(&health->fatal_report_work); } void mlx5_health_flush(struct mlx5_core_dev *dev) @@ -367,6 +783,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; destroy_workqueue(health->wq); + mlx5_fw_reporters_destroy(dev); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -374,20 +791,26 @@ int mlx5_health_init(struct mlx5_core_dev *dev) struct mlx5_core_health *health; char *name; + mlx5_fw_reporters_create(dev); + health = &dev->priv.health; name = kmalloc(64, GFP_KERNEL); if (!name) - return -ENOMEM; + goto out_err; strcpy(name, "mlx5_health"); strcat(name, dev_name(dev->device)); health->wq = create_singlethread_workqueue(name); kfree(name); if (!health->wq) - return -ENOMEM; + goto out_err; spin_lock_init(&health->wq_lock); - INIT_WORK(&health->work, health_care); - INIT_DELAYED_WORK(&health->recover_work, health_recover); + INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); + INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); return 0; + +out_err: + mlx5_fw_reporters_destroy(dev); + return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 90cb50fe17fd..ebd81f6b556e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -122,14 +122,6 @@ static int mlx5i_get_ts_info(struct net_device *netdev, return mlx5e_ethtool_get_ts_info(priv, info); } -static int mlx5i_flash_device(struct net_device *netdev, - struct ethtool_flash *flash) -{ - struct mlx5e_priv *priv = mlx5i_epriv(netdev); - - return mlx5e_ethtool_flash_device(priv, flash); -} - enum mlx5_ptys_width { MLX5_PTYS_WIDTH_1X = 1 << 0, MLX5_PTYS_WIDTH_2X = 1 << 1, @@ -241,7 +233,6 @@ const struct ethtool_ops mlx5i_ethtool_ops = { .get_ethtool_stats = mlx5i_get_ethtool_stats, .get_ringparam = mlx5i_get_ringparam, .set_ringparam = mlx5i_set_ringparam, - .flash_device = mlx5i_flash_device, .get_channels = mlx5i_get_channels, .set_channels = mlx5i_set_channels, .get_coalesce = mlx5i_get_coalesce, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 9ca492b430d8..faf197d53743 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -87,7 +87,7 @@ int mlx5i_init(struct mlx5_core_dev *mdev, mlx5e_set_netdev_mtu_boundaries(priv); netdev->mtu = netdev->max_mtu; - mlx5e_build_nic_params(mdev, &priv->rss_params, &priv->channels.params, + mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params, mlx5e_get_netdev_max_channels(netdev), netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); @@ -258,6 +258,18 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp * mlx5_core_destroy_qp(mdev, qp); } +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + void *tisc; + + tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); + + return mlx5e_create_tis(mdev, in, tisn); +} + static int mlx5i_init_tx(struct mlx5e_priv *priv) { struct mlx5i_priv *ipriv = priv->ppriv; @@ -269,7 +281,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); goto err_destroy_underlay_qp; @@ -365,7 +377,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); + err = mlx5e_create_direct_rqts(priv, priv->direct_tir); if (err) goto err_destroy_indirect_rqts; @@ -373,7 +385,7 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) if (err) goto err_destroy_direct_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_direct_tirs(priv, priv->direct_tir); if (err) goto err_destroy_indirect_tirs; @@ -384,11 +396,11 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) return 0; err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); err_close_drop_rq: @@ -401,9 +413,9 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_direct_tirs(priv, priv->direct_tir); mlx5e_destroy_indirect_tirs(priv, true); - mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_direct_rqts(priv, priv->direct_tir); mlx5e_destroy_rqt(priv, &priv->indir_rqt); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); @@ -418,6 +430,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .cleanup_rx = mlx5i_cleanup_rx, .enable = NULL, /* mlx5i_enable */ .disable = NULL, /* mlx5i_disable */ + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, /* mlx5i_update_stats */ .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, @@ -526,7 +539,7 @@ static int mlx5i_open(struct net_device *netdev) if (err) goto err_remove_fs_underlay_qp; - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index e19ba3fcd1b7..c87962cab921 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -59,6 +59,8 @@ struct mlx5i_priv { char *mlx5e_priv[0]; }; +int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn); + /* Underlay QP create/destroy functions */ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index b491b8f5fd6b..6e56fa769d2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -210,7 +210,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) goto err_unint_underlay_qp; } - err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]); + err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0]); if (err) { mlx5_core_warn(mdev, "create child tis failed, %d\n", err); goto err_remove_rx_uderlay_qp; @@ -221,7 +221,7 @@ static int mlx5i_pkey_open(struct net_device *netdev) mlx5_core_warn(mdev, "opening child channels failed, %d\n", err); goto err_clear_state_opened_flag; } - mlx5e_refresh_tirs(epriv, false); + epriv->profile->update_rx(epriv); mlx5e_activate_priv_channels(epriv); mutex_unlock(&epriv->state_lock); @@ -350,6 +350,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .cleanup_rx = mlx5i_pkey_cleanup_rx, .enable = NULL, .disable = NULL, + .update_rx = mlx5e_update_nic_rx, .update_stats = NULL, .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 959605559858..c5ef2ff26465 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -305,8 +305,8 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) !mlx5_sriov_is_enabled(dev1); #ifdef CONFIG_MLX5_ESWITCH - roce_lag &= dev0->priv.eswitch->mode == SRIOV_NONE && - dev1->priv.eswitch->mode == SRIOV_NONE; + roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && + dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; #endif if (roce_lag) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 8212bfd05733..e69766393990 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include <linux/netdevice.h> +#include <net/nexthop.h> #include "lag.h" #include "lag_mp.h" #include "mlx5_core.h" @@ -110,6 +111,8 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct fib_info *fi) { struct lag_mp *mp = &ldev->lag_mp; + struct fib_nh *fib_nh0, *fib_nh1; + unsigned int nhs; /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { @@ -120,9 +123,11 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* Handle add/replace event */ - if (fi->fib_nhs == 1) { + nhs = fib_info_num_path(fi); + if (nhs == 1) { if (__mlx5_lag_is_active(ldev)) { - struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev; + struct fib_nh *nh = fib_info_nh(fi, 0); + struct net_device *nh_dev = nh->fib_nh_dev; int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); mlx5_lag_set_port_affinity(ldev, ++i); @@ -130,14 +135,16 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, return; } - if (fi->fib_nhs != 2) + if (nhs != 2) return; /* Verify next hops are ports of the same hca */ - if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev && - fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) && - !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev && - fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) { + fib_nh0 = fib_info_nh(fi, 0); + fib_nh1 = fib_info_nh(fi, 1); + if (!(fib_nh0->fib_nh_dev == ldev->pf[0].netdev && + fib_nh1->fib_nh_dev == ldev->pf[1].netdev) && + !(fib_nh0->fib_nh_dev == ldev->pf[1].netdev && + fib_nh1->fib_nh_dev == ldev->pf[0].netdev)) { mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); return; } @@ -174,7 +181,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, mlx5_lag_set_port_affinity(ldev, i); } } else if (event == FIB_EVENT_NH_ADD && - fi->fib_nhs == 2) { + fib_info_num_path(fi) == 2) { mlx5_lag_set_port_affinity(ldev, 0); } } @@ -238,6 +245,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct mlx5_fib_event_work *fib_work; struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; + struct net_device *fib_dev; struct fib_info *fi; if (info->family != AF_INET) @@ -254,8 +262,13 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->fib_dev != ldev->pf[0].netdev && - fi->fib_dev != ldev->pf[1].netdev) { + if (fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev != ldev->pf[0].netdev && + fib_dev != ldev->pf[1].netdev) { return NOTIFY_DONE; } fib_work = mlx5_lag_init_fib_work(ldev, event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c new file mode 100644 index 000000000000..ea9ee88491e5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2019 Mellanox Technologies. + +#include "mlx5_core.h" + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, + u32 *p_key_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 sz_bits = sz_bytes * BITS_PER_BYTE; + u8 general_obj_key_size; + u64 general_obj_types; + void *obj, *key_p; + int err; + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key); + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY)) + return -EINVAL; + + switch (sz_bits) { + case 128: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; + break; + case 256: + general_obj_key_size = + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256; + break; + default: + return -EINVAL; + } + + memcpy(key_p, key, sz_bytes); + + MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); + MLX5_SET(encryption_key_obj, obj, key_type, + MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_DEK); + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.pdn); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (!err) + *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + /* avoid leaking key on the stack */ + memzero_explicit(in, sizeof(in)); + + return err; +} + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, key_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index c0fb6d72b695..3dfab91ae5f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -7,7 +7,6 @@ #include <linux/mlx5/eq.h> #include <linux/mlx5/cq.h> -#define MLX5_MAX_IRQ_NAME (32) #define MLX5_EQE_SIZE (sizeof(struct mlx5_eqe)) struct mlx5_eq_tasklet { @@ -36,8 +35,14 @@ struct mlx5_eq { struct mlx5_rsc_debug *dbg; }; +struct mlx5_eq_async { + struct mlx5_eq core; + struct notifier_block irq_nb; +}; + struct mlx5_eq_comp { - struct mlx5_eq core; /* Must be first */ + struct mlx5_eq core; + struct notifier_block irq_nb; struct mlx5_eq_tasklet tasklet_ctx; struct list_head list; }; @@ -70,7 +75,7 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev); void mlx5_eq_table_destroy(struct mlx5_core_dev *dev); int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); -int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); +void mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq); struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn); struct mlx5_eq *mlx5_get_async_eq(struct mlx5_core_dev *dev); void mlx5_cq_tasklet_cb(unsigned long data); @@ -92,7 +97,4 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif -int mlx5_eq_notifier_register(struct mlx5_core_dev *dev, struct mlx5_nb *nb); -int mlx5_eq_notifier_unregister(struct mlx5_core_dev *dev, struct mlx5_nb *nb); - #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c new file mode 100644 index 000000000000..23361a9ae4fa --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/kernel.h> +#include "mlx5_core.h" +#include "geneve.h" + +struct mlx5_geneve { + struct mlx5_core_dev *mdev; + __be16 opt_class; + u8 opt_type; + u32 obj_id; + struct mutex sync_lock; /* protect GENEVE obj operations */ + u32 refcount; +}; + +static int mlx5_geneve_tlv_option_create(struct mlx5_core_dev *mdev, + __be16 class, + u8 type, + u8 len) +{ + u32 in[MLX5_ST_SZ_DW(create_geneve_tlv_option_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u64 general_obj_types; + void *hdr, *opt; + u16 obj_id; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_GENEVE_TLV_OPT)) + return -EINVAL; + + hdr = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, hdr); + opt = MLX5_ADDR_OF(create_geneve_tlv_option_in, in, geneve_tlv_opt); + + MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + + MLX5_SET(geneve_tlv_option, opt, option_class, be16_to_cpu(class)); + MLX5_SET(geneve_tlv_option, opt, option_type, type); + MLX5_SET(geneve_tlv_option, opt, option_data_length, len); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + return obj_id; +} + +static void mlx5_geneve_tlv_option_destroy(struct mlx5_core_dev *mdev, u16 obj_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_GENEVE_TLV_OPT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) +{ + int res = 0; + + if (IS_ERR_OR_NULL(geneve)) + return -EOPNOTSUPP; + + mutex_lock(&geneve->sync_lock); + + if (geneve->refcount) { + if (geneve->opt_class == opt->opt_class && + geneve->opt_type == opt->type) { + /* We already have TLV options obj allocated */ + geneve->refcount++; + } else { + /* TLV options obj allocated, but its params + * do not match the new request. + * We support only one such object. + */ + mlx5_core_warn(geneve->mdev, + "Won't create Geneve TLV opt object with class:type:len = 0x%x:0x%x:%d (another class:type already exists)\n", + be16_to_cpu(opt->opt_class), + opt->type, + opt->length); + res = -EOPNOTSUPP; + goto unlock; + } + } else { + /* We don't have any TLV options obj allocated */ + + res = mlx5_geneve_tlv_option_create(geneve->mdev, + opt->opt_class, + opt->type, + opt->length); + if (res < 0) { + mlx5_core_warn(geneve->mdev, + "Failed creating Geneve TLV opt object class:type:len = 0x%x:0x%x:%d (err=%d)\n", + be16_to_cpu(opt->opt_class), + opt->type, opt->length, res); + goto unlock; + } + geneve->opt_class = opt->opt_class; + geneve->opt_type = opt->type; + geneve->obj_id = res; + geneve->refcount++; + } + +unlock: + mutex_unlock(&geneve->sync_lock); + return res; +} + +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + mutex_lock(&geneve->sync_lock); + if (--geneve->refcount == 0) { + /* We've just removed the last user of Geneve option. + * Now delete the object in FW. + */ + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + geneve->opt_class = 0; + geneve->opt_type = 0; + geneve->obj_id = 0; + } + mutex_unlock(&geneve->sync_lock); +} + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev) +{ + struct mlx5_geneve *geneve = + kzalloc(sizeof(*geneve), GFP_KERNEL); + + if (!geneve) + return ERR_PTR(-ENOMEM); + geneve->mdev = mdev; + mutex_init(&geneve->sync_lock); + + return geneve; +} + +void mlx5_geneve_destroy(struct mlx5_geneve *geneve) +{ + if (IS_ERR_OR_NULL(geneve)) + return; + + /* Lockless since we are unloading */ + if (geneve->refcount) + mlx5_geneve_tlv_option_destroy(geneve->mdev, geneve->obj_id); + + kfree(geneve); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h new file mode 100644 index 000000000000..adee0cbba19c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/geneve.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_GENEVE_H__ +#define __MLX5_GENEVE_H__ + +#include <net/geneve.h> +#include <linux/mlx5/driver.h> + +struct mlx5_geneve; + +#ifdef CONFIG_MLX5_ESWITCH + +struct mlx5_geneve *mlx5_geneve_create(struct mlx5_core_dev *mdev); +void mlx5_geneve_destroy(struct mlx5_geneve *geneve); + +int mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt); +void mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline struct mlx5_geneve +*mlx5_geneve_create(struct mlx5_core_dev *mdev) { return NULL; } +static inline void +mlx5_geneve_destroy(struct mlx5_geneve *geneve) {} +static inline int +mlx5_geneve_tlv_option_add(struct mlx5_geneve *geneve, struct geneve_opt *opt) { return 0; } +static inline void +mlx5_geneve_tlv_option_del(struct mlx5_geneve *geneve) {} + +#endif /* CONFIG_MLX5_ESWITCH */ + +#endif /* __MLX5_GENEVE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 397a2847867a..b99d469e4e64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -41,6 +41,9 @@ int mlx5_core_reserve_gids(struct mlx5_core_dev *dev, unsigned int count); void mlx5_core_unreserve_gids(struct mlx5_core_dev *dev, unsigned int count); int mlx5_core_reserved_gid_alloc(struct mlx5_core_dev *dev, int *gid_index); void mlx5_core_reserved_gid_free(struct mlx5_core_dev *dev, int gid_index); +int mlx5_crdump_enable(struct mlx5_core_dev *dev); +void mlx5_crdump_disable(struct mlx5_core_dev *dev); +int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); /* TODO move to lib/events.h */ @@ -76,4 +79,9 @@ struct mlx5_pme_stats { void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); +/* Crypto */ +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + void *key, u32 sz_bytes, u32 *p_key_id); +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c index a71d5b9c7ab2..3118e8d66407 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -67,6 +67,7 @@ static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) struct l2table_node { struct l2addr_node node; u32 index; /* index in HW l2 table */ + int ref_count; }; struct mlx5_mpfs { @@ -134,8 +135,8 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct l2table_node *l2addr; + int err = 0; u32 index; - int err; if (!MLX5_ESWITCH_MANAGER(dev)) return 0; @@ -144,30 +145,35 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); if (l2addr) { - err = -EEXIST; - goto abort; + l2addr->ref_count++; + goto out; } err = alloc_l2table_index(mpfs, &index); if (err) - goto abort; + goto out; l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); if (!l2addr) { - free_l2table_index(mpfs, index); err = -ENOMEM; - goto abort; + goto hash_add_err; } - l2addr->index = index; err = set_l2table_entry_cmd(dev, index, mac); - if (err) { - l2addr_hash_del(l2addr); - free_l2table_index(mpfs, index); - } + if (err) + goto set_table_entry_err; + + l2addr->index = index; + l2addr->ref_count = 1; mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); -abort: + goto out; + +set_table_entry_err: + l2addr_hash_del(l2addr); +hash_add_err: + free_l2table_index(mpfs, index); +out: mutex_unlock(&mpfs->lock); return err; } @@ -190,6 +196,9 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) goto unlock; } + if (--l2addr->ref_count > 0) + goto unlock; + index = l2addr->index; del_l2table_entry_cmd(dev, index); l2addr_hash_del(l2addr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c new file mode 100644 index 000000000000..6b774e0c2766 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies */ + +#include <linux/pci.h> +#include "mlx5_core.h" +#include "pci_vsc.h" + +#define MLX5_EXTRACT_C(source, offset, size) \ + ((((u32)(source)) >> (offset)) & MLX5_ONES32(size)) +#define MLX5_EXTRACT(src, start, len) \ + (((len) == 32) ? (src) : MLX5_EXTRACT_C(src, start, len)) +#define MLX5_ONES32(size) \ + ((size) ? (0xffffffff >> (32 - (size))) : 0) +#define MLX5_MASK32(offset, size) \ + (MLX5_ONES32(size) << (offset)) +#define MLX5_MERGE_C(rsrc1, rsrc2, start, len) \ + ((((rsrc2) << (start)) & (MLX5_MASK32((start), (len)))) | \ + ((rsrc1) & (~MLX5_MASK32((start), (len))))) +#define MLX5_MERGE(rsrc1, rsrc2, start, len) \ + (((len) == 32) ? (rsrc2) : MLX5_MERGE_C(rsrc1, rsrc2, start, len)) +#define vsc_read(dev, offset, val) \ + pci_read_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define vsc_write(dev, offset, val) \ + pci_write_config_dword((dev)->pdev, (dev)->vsc_addr + (offset), (val)) +#define VSC_MAX_RETRIES 2048 + +enum { + VSC_CTRL_OFFSET = 0x4, + VSC_COUNTER_OFFSET = 0x8, + VSC_SEMAPHORE_OFFSET = 0xc, + VSC_ADDR_OFFSET = 0x10, + VSC_DATA_OFFSET = 0x14, + + VSC_FLAG_BIT_OFFS = 31, + VSC_FLAG_BIT_LEN = 1, + + VSC_SYND_BIT_OFFS = 30, + VSC_SYND_BIT_LEN = 1, + + VSC_ADDR_BIT_OFFS = 0, + VSC_ADDR_BIT_LEN = 30, + + VSC_SPACE_BIT_OFFS = 0, + VSC_SPACE_BIT_LEN = 16, + + VSC_SIZE_VLD_BIT_OFFS = 28, + VSC_SIZE_VLD_BIT_LEN = 1, + + VSC_STATUS_BIT_OFFS = 29, + VSC_STATUS_BIT_LEN = 3, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + dev->vsc_addr = pci_find_capability(dev->pdev, + PCI_CAP_ID_VNDR); + if (!dev->vsc_addr) + mlx5_core_warn(dev, "Failed to get valid vendor specific ID\n"); +} + +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) +{ + u32 counter = 0; + int retries = 0; + u32 lock_val; + int ret; + + pci_cfg_access_lock(dev->pdev); + do { + if (retries > VSC_MAX_RETRIES) { + ret = -EBUSY; + goto pci_unlock; + } + + /* Check if semaphore is already locked */ + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + if (lock_val) { + retries++; + usleep_range(1000, 2000); + continue; + } + + /* Read and write counter value, if written value is + * the same, semaphore was acquired successfully. + */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &counter); + if (ret) + goto pci_unlock; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, counter); + if (ret) + goto pci_unlock; + + ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); + if (ret) + goto pci_unlock; + + retries++; + } while (counter != lock_val); + + return 0; + +pci_unlock: + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev) +{ + int ret; + + ret = vsc_write(dev, VSC_SEMAPHORE_OFFSET, MLX5_VSC_UNLOCK); + pci_cfg_access_unlock(dev->pdev); + return ret; +} + +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size) +{ + int ret; + u32 val = 0; + + if (!mlx5_vsc_accessible(dev)) + return -EINVAL; + + if (ret_space_size) + *ret_space_size = 0; + + /* Get a unique val */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + /* Try to modify the lock */ + val = MLX5_MERGE(val, space, VSC_SPACE_BIT_OFFS, VSC_SPACE_BIT_LEN); + ret = vsc_write(dev, VSC_CTRL_OFFSET, val); + if (ret) + goto out; + + /* Verify lock was modified */ + ret = vsc_read(dev, VSC_CTRL_OFFSET, &val); + if (ret) + goto out; + + if (MLX5_EXTRACT(val, VSC_STATUS_BIT_OFFS, VSC_STATUS_BIT_LEN) == 0) + return -EINVAL; + + /* Get space max address if indicated by size valid bit */ + if (ret_space_size && + MLX5_EXTRACT(val, VSC_SIZE_VLD_BIT_OFFS, VSC_SIZE_VLD_BIT_LEN)) { + ret = vsc_read(dev, VSC_ADDR_OFFSET, &val); + if (ret) { + mlx5_core_warn(dev, "Failed to get max space size\n"); + goto out; + } + *ret_space_size = MLX5_EXTRACT(val, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + } + return 0; + +out: + return ret; +} + +static int mlx5_vsc_wait_on_flag(struct mlx5_core_dev *dev, u8 expected_val) +{ + int retries = 0; + u32 flag; + int ret; + + do { + if (retries > VSC_MAX_RETRIES) + return -EBUSY; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, &flag); + if (ret) + return ret; + flag = MLX5_EXTRACT(flag, VSC_FLAG_BIT_OFFS, VSC_FLAG_BIT_LEN); + retries++; + + if ((retries & 0xf) == 0) + usleep_range(1000, 2000); + + } while (flag != expected_val); + + return 0; +} + +static int mlx5_vsc_gw_write(struct mlx5_core_dev *dev, unsigned int address, + u32 data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + /* Set flag to 0x1 */ + address = MLX5_MERGE(address, 1, VSC_FLAG_BIT_OFFS, 1); + ret = vsc_write(dev, VSC_DATA_OFFSET, data); + if (ret) + goto out; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + /* Wait for the flag to be cleared */ + ret = mlx5_vsc_wait_on_flag(dev, 0); + +out: + return ret; +} + +static int mlx5_vsc_gw_read(struct mlx5_core_dev *dev, unsigned int address, + u32 *data) +{ + int ret; + + if (MLX5_EXTRACT(address, VSC_SYND_BIT_OFFS, + VSC_FLAG_BIT_LEN + VSC_SYND_BIT_LEN)) + return -EINVAL; + + ret = vsc_write(dev, VSC_ADDR_OFFSET, address); + if (ret) + goto out; + + ret = mlx5_vsc_wait_on_flag(dev, 1); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_DATA_OFFSET, data); +out: + return ret; +} + +static int mlx5_vsc_gw_read_fast(struct mlx5_core_dev *dev, + unsigned int read_addr, + unsigned int *next_read_addr, + u32 *data) +{ + int ret; + + ret = mlx5_vsc_gw_read(dev, read_addr, data); + if (ret) + goto out; + + ret = vsc_read(dev, VSC_ADDR_OFFSET, next_read_addr); + if (ret) + goto out; + + *next_read_addr = MLX5_EXTRACT(*next_read_addr, VSC_ADDR_BIT_OFFS, + VSC_ADDR_BIT_LEN); + + if (*next_read_addr <= read_addr) + ret = -EINVAL; +out: + return ret; +} + +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length) +{ + unsigned int next_read_addr = 0; + unsigned int read_addr = 0; + + while (read_addr < length) { + if (mlx5_vsc_gw_read_fast(dev, read_addr, &next_read_addr, + &data[(read_addr >> 2)])) + return read_addr; + + read_addr = next_read_addr; + } + return length; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state) +{ + u32 data, id = 0; + int ret; + + ret = mlx5_vsc_gw_set_space(dev, MLX5_SEMAPHORE_SPACE_DOMAIN, NULL); + if (ret) { + mlx5_core_warn(dev, "Failed to set gw space %d\n", ret); + return ret; + } + + if (state == MLX5_VSC_LOCK) { + /* Get a unique ID based on the counter */ + ret = vsc_read(dev, VSC_COUNTER_OFFSET, &id); + if (ret) + return ret; + } + + /* Try to modify lock */ + ret = mlx5_vsc_gw_write(dev, space, id); + if (ret) + return ret; + + /* Verify lock was modified */ + ret = mlx5_vsc_gw_read(dev, space, &data); + if (ret) + return -EINVAL; + + if (data != id) + return -EBUSY; + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h new file mode 100644 index 000000000000..64272a6d7754 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies */ + +#ifndef __MLX5_PCI_VSC_H__ +#define __MLX5_PCI_VSC_H__ + +enum mlx5_vsc_state { + MLX5_VSC_UNLOCK, + MLX5_VSC_LOCK, +}; + +enum { + MLX5_VSC_SPACE_SCAN_CRSPACE = 0x7, +}; + +void mlx5_pci_vsc_init(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_unlock(struct mlx5_core_dev *dev); +int mlx5_vsc_gw_set_space(struct mlx5_core_dev *dev, u16 space, + u32 *ret_space_size); +int mlx5_vsc_gw_read_block_fast(struct mlx5_core_dev *dev, u32 *data, + int length); + +static inline bool mlx5_vsc_accessible(struct mlx5_core_dev *dev) +{ + return !!dev->vsc_addr; +} + +int mlx5_vsc_sem_set_space(struct mlx5_core_dev *dev, u16 space, + enum mlx5_vsc_state state); + +#endif /* __MLX5_PCI_VSC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 23d53163ce15..b15b27a497fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -56,6 +56,7 @@ #include "fs_core.h" #include "lib/mpfs.h" #include "eswitch.h" +#include "devlink.h" #include "lib/mlx5.h" #include "fpga/core.h" #include "fpga/ipsec.h" @@ -63,7 +64,9 @@ #include "accel/tls.h" #include "lib/clock.h" #include "lib/vxlan.h" +#include "lib/geneve.h" #include "lib/devcom.h" +#include "lib/pci_vsc.h" #include "diag/fw_tracer.h" #include "ecpf.h" @@ -169,18 +172,28 @@ static struct mlx5_profile profile[] = { #define FW_INIT_TIMEOUT_MILI 2000 #define FW_INIT_WAIT_MS 2 -#define FW_PRE_INIT_TIMEOUT_MILI 10000 +#define FW_PRE_INIT_TIMEOUT_MILI 120000 +#define FW_INIT_WARN_MESSAGE_INTERVAL 20000 -static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili) +static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, + u32 warn_time_mili) { + unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); int err = 0; + BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL); + while (fw_initializing(dev)) { if (time_after(jiffies, end)) { err = -EBUSY; break; } + if (warn_time_mili && time_after(jiffies, warn)) { + mlx5_core_warn(dev, "Waiting for FW initialization, timeout abort in %ds\n", + jiffies_to_msecs(end - warn) / 1000); + warn = jiffies + msecs_to_jiffies(warn_time_mili); + } msleep(FW_INIT_WAIT_MS); } @@ -721,8 +734,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, struct mlx5_priv *priv = &dev->priv; int err = 0; - priv->pci_dev_data = id->driver_data; - + mutex_init(&dev->pci_status_mutex); pci_set_drvdata(dev->pdev, dev); dev->bar_addr = pci_resource_start(pdev, 0); @@ -761,6 +773,8 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, goto err_clr_master; } + mlx5_pci_vsc_init(dev); + return 0; err_clr_master: @@ -794,10 +808,16 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_devcom; } + err = mlx5_irq_table_init(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize irq table\n"); + goto err_devcom; + } + err = mlx5_eq_table_init(dev); if (err) { mlx5_core_err(dev, "failed to initialize eq\n"); - goto err_devcom; + goto err_irq_cleanup; } err = mlx5_events_init(dev); @@ -821,6 +841,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_clock(dev); dev->vxlan = mlx5_vxlan_create(dev); + dev->geneve = mlx5_geneve_create(dev); err = mlx5_init_rl_table(dev); if (err) { @@ -834,37 +855,38 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) goto err_rl_cleanup; } - err = mlx5_eswitch_init(dev); + err = mlx5_sriov_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + mlx5_core_err(dev, "Failed to init sriov %d\n", err); goto err_mpfs_cleanup; } - err = mlx5_sriov_init(dev); + err = mlx5_eswitch_init(dev); if (err) { - mlx5_core_err(dev, "Failed to init sriov %d\n", err); - goto err_eswitch_cleanup; + mlx5_core_err(dev, "Failed to init eswitch %d\n", err); + goto err_sriov_cleanup; } err = mlx5_fpga_init(dev); if (err) { mlx5_core_err(dev, "Failed to init fpga device %d\n", err); - goto err_sriov_cleanup; + goto err_eswitch_cleanup; } dev->tracer = mlx5_fw_tracer_create(dev); return 0; -err_sriov_cleanup: - mlx5_sriov_cleanup(dev); err_eswitch_cleanup: mlx5_eswitch_cleanup(dev->priv.eswitch); +err_sriov_cleanup: + mlx5_sriov_cleanup(dev); err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: mlx5_cleanup_rl_table(dev); err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_qp_table(dev); @@ -873,6 +895,8 @@ err_events_cleanup: mlx5_events_cleanup(dev); err_eq_cleanup: mlx5_eq_table_cleanup(dev); +err_irq_cleanup: + mlx5_irq_table_cleanup(dev); err_devcom: mlx5_devcom_unregister_device(dev->priv.devcom); @@ -883,10 +907,11 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fw_tracer_destroy(dev->tracer); mlx5_fpga_cleanup(dev); - mlx5_sriov_cleanup(dev); mlx5_eswitch_cleanup(dev->priv.eswitch); + mlx5_sriov_cleanup(dev); mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); + mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); mlx5_cleanup_reserved_gids(dev); @@ -895,6 +920,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_cq_debugfs_cleanup(dev); mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); + mlx5_irq_table_cleanup(dev); mlx5_devcom_unregister_device(dev->priv.devcom); } @@ -911,7 +937,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) /* wait for firmware to accept initialization segments configurations */ - err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL); if (err) { mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n", FW_PRE_INIT_TIMEOUT_MILI); @@ -924,7 +950,7 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) return err; } - err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI); + err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0); if (err) { mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI); @@ -1028,6 +1054,12 @@ static int mlx5_load(struct mlx5_core_dev *dev) mlx5_events_start(dev); mlx5_pagealloc_start(dev); + err = mlx5_irq_table_create(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc IRQs\n"); + goto err_irq_table; + } + err = mlx5_eq_table_create(dev); if (err) { mlx5_core_err(dev, "Failed to create EQs\n"); @@ -1099,6 +1131,8 @@ err_fpga_start: err_fw_tracer: mlx5_eq_table_destroy(dev); err_eq_table: + mlx5_irq_table_destroy(dev); +err_irq_table: mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); @@ -1115,6 +1149,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_fpga_device_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); mlx5_eq_table_destroy(dev); + mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); mlx5_put_uars_page(dev, dev->priv.uar); @@ -1183,7 +1218,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup) int err = 0; if (cleanup) - mlx5_drain_health_recovery(dev); + mlx5_drain_health_wq(dev); mutex_lock(&dev->intf_state_mutex); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { @@ -1210,17 +1245,6 @@ out: return err; } -static const struct devlink_ops mlx5_devlink_ops = { -#ifdef CONFIG_MLX5_ESWITCH - .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, - .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, - .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, - .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, - .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, - .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, -#endif -}; - static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) { struct mlx5_priv *priv = &dev->priv; @@ -1230,7 +1254,6 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); - mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); mutex_init(&priv->bfregs.reg_head.lock); @@ -1282,9 +1305,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) struct devlink *devlink; int err; - devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); + devlink = mlx5_devlink_alloc(); if (!devlink) { - dev_err(&pdev->dev, "kzalloc failed\n"); + dev_err(&pdev->dev, "devlink alloc failed\n"); return -ENOMEM; } @@ -1292,6 +1315,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev->device = &pdev->dev; dev->pdev = pdev; + dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ? + MLX5_COREDEV_VF : MLX5_COREDEV_PF; + err = mlx5_mdev_init(dev, prof_sel); if (err) goto mdev_init_err; @@ -1312,10 +1338,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) request_module_nowait(MLX5_IB_MOD); - err = devlink_register(devlink, &pdev->dev); + err = mlx5_devlink_register(devlink, &pdev->dev); if (err) goto clean_load; + err = mlx5_crdump_enable(dev); + if (err) + dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); + pci_save_state(pdev); return 0; @@ -1327,7 +1357,7 @@ err_load_one: pci_init_err: mlx5_mdev_uninit(dev); mdev_init_err: - devlink_free(devlink); + mlx5_devlink_free(devlink); return err; } @@ -1337,7 +1367,8 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct devlink *devlink = priv_to_devlink(dev); - devlink_unregister(devlink); + mlx5_crdump_disable(dev); + mlx5_devlink_unregister(devlink); mlx5_unregister_device(dev); if (mlx5_unload_one(dev, true)) { @@ -1348,7 +1379,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_pci_close(dev); mlx5_mdev_uninit(dev); - devlink_free(devlink); + mlx5_devlink_free(devlink); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, @@ -1359,12 +1390,10 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_core_info(dev, "%s was called\n", __func__); mlx5_enter_error_state(dev, false); + mlx5_error_sw_reset(dev); mlx5_unload_one(dev, false); - /* In case of kernel call drain the health wq */ - if (state) { - mlx5_drain_health_wq(dev); - mlx5_pci_disable_device(dev); - } + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; @@ -1532,7 +1561,8 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { - mlx5_pci_err_detected(dev->pdev, 0); + mlx5_error_sw_reset(dev); + mlx5_unload_one(dev, false); } void mlx5_recover_device(struct mlx5_core_dev *dev) @@ -1570,7 +1600,7 @@ static int __init init(void) get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); mlx5_core_verify_params(); - mlx5_fpga_ipsec_build_fs_cmds(); + mlx5_accel_ipsec_build_fs_cmds(); mlx5_register_debugfs(); err = pci_register_driver(&mlx5_core_driver); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 22e69d4813e4..471bbc48bc1f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -111,6 +111,11 @@ enum { MLX5_DRIVER_SYND = 0xbadd00de, }; +enum mlx5_semaphore_space_address { + MLX5_SEMAPHORE_SPACE_DOMAIN = 0xA, + MLX5_SEMAPHORE_SW_RESET = 0x20, +}; + int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); @@ -118,6 +123,7 @@ int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force); +void mlx5_error_sw_reset(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); @@ -153,6 +159,19 @@ int mlx5_query_qcam_reg(struct mlx5_core_dev *mdev, u32 *qcam, void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove(struct mlx5_core_dev *dev); +int mlx5_irq_table_init(struct mlx5_core_dev *dev); +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev); +int mlx5_irq_table_create(struct mlx5_core_dev *dev); +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev); +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb); +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx); +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *table); +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table); + int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); void mlx5_events_start(struct mlx5_core_dev *dev); @@ -184,7 +203,10 @@ int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode); MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) && \ MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj)) -int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw); +int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, + struct netlink_ext_ack *extack); +int mlx5_fw_version_query(struct mlx5_core_dev *dev, + u32 *running_ver, u32 *stored_ver); void mlx5e_init(void); void mlx5e_cleanup(void); @@ -213,7 +235,7 @@ enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, MLX5_NIC_IFC_NO_DRAM_NIC = 2, - MLX5_NIC_IFC_INVALID = 3 + MLX5_NIC_IFC_SW_RESET = 7 }; u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index ea744d8466ea..9231b39d18b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -38,15 +38,12 @@ void mlx5_init_mkey_table(struct mlx5_core_dev *dev) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; - - memset(table, 0, sizeof(*table)); - rwlock_init(&table->lock); - INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init_flags(&dev->priv.mkey_table, XA_FLAGS_LOCK_IRQ); } void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { + WARN_ON(!xa_empty(&dev->priv.mkey_table)); } int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, @@ -56,8 +53,8 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_async_cbk_t callback, struct mlx5_async_work *context) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; u32 mkey_index; void *mkc; int err; @@ -88,12 +85,10 @@ int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", mkey_index, key, mkey->key); - /* connect to mkey tree */ - write_lock_irq(&table->lock); - err = radix_tree_insert(&table->tree, mlx5_base_mkey(mkey->key), mkey); - write_unlock_irq(&table->lock); + err = xa_err(xa_store_irq(mkeys, mlx5_base_mkey(mkey->key), mkey, + GFP_KERNEL)); if (err) { - mlx5_core_warn(dev, "failed radix tree insert of mkey 0x%x, %d\n", + mlx5_core_warn(dev, "failed xarray insert of mkey 0x%x, %d\n", mlx5_base_mkey(mkey->key), err); mlx5_core_destroy_mkey(dev, mkey); } @@ -114,17 +109,17 @@ EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { - struct mlx5_mkey_table *table = &dev->priv.mkey_table; u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; + struct xarray *mkeys = &dev->priv.mkey_table; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - write_lock_irqsave(&table->lock, flags); - deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); - write_unlock_irqrestore(&table->lock, flags); + xa_lock_irqsave(mkeys, flags); + deleted_mkey = __xa_erase(mkeys, mlx5_base_mkey(mkey->key)); + xa_unlock_irqrestore(mkeys, flags); if (!deleted_mkey) { - mlx5_core_dbg(dev, "failed radix tree delete of mkey 0x%x\n", + mlx5_core_dbg(dev, "failed xarray delete of mkey 0x%x\n", mlx5_base_mkey(mkey->key)); return -ENOENT; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c new file mode 100644 index 000000000000..373981a659c7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include <linux/interrupt.h> +#include <linux/notifier.h> +#include <linux/module.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif + +#define MLX5_MAX_IRQ_NAME (32) + +struct mlx5_irq { + struct atomic_notifier_head nh; + cpumask_var_t mask; + char name[MLX5_MAX_IRQ_NAME]; +}; + +struct mlx5_irq_table { + struct mlx5_irq *irq; + int nvec; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif +}; + +int mlx5_irq_table_init(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *irq_table; + + irq_table = kvzalloc(sizeof(*irq_table), GFP_KERNEL); + if (!irq_table) + return -ENOMEM; + + dev->priv.irq_table = irq_table; + return 0; +} + +void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev) +{ + kvfree(dev->priv.irq_table); +} + +int mlx5_irq_get_num_comp(struct mlx5_irq_table *table) +{ + return table->nvec - MLX5_IRQ_VEC_COMP_BASE; +} + +static struct mlx5_irq *mlx5_irq_get(struct mlx5_core_dev *dev, int vecidx) +{ + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + return &irq_table->irq[vecidx]; +} + +int mlx5_irq_attach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_register(&irq->nh, nb); +} + +int mlx5_irq_detach_nb(struct mlx5_irq_table *irq_table, int vecidx, + struct notifier_block *nb) +{ + struct mlx5_irq *irq; + + irq = &irq_table->irq[vecidx]; + return atomic_notifier_chain_unregister(&irq->nh, nb); +} + +static irqreturn_t mlx5_irq_int_handler(int irq, void *nh) +{ + atomic_notifier_call_chain(nh, 0, NULL); + return IRQ_HANDLED; +} + +static void irq_set_name(char *name, int vecidx) +{ + if (vecidx == 0) { + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async"); + return; + } + + snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", + vecidx - MLX5_IRQ_VEC_COMP_BASE); + return; +} + +static int request_irqs(struct mlx5_core_dev *dev, int nvec) +{ + char name[MLX5_MAX_IRQ_NAME]; + int err; + int i; + + for (i = 0; i < nvec; i++) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + irq_set_name(name, i); + ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh); + snprintf(irq->name, MLX5_MAX_IRQ_NAME, + "%s@pci:%s", name, pci_name(dev->pdev)); + err = request_irq(irqn, mlx5_irq_int_handler, 0, irq->name, + &irq->nh); + if (err) { + mlx5_core_err(dev, "Failed to request irq\n"); + goto err_request_irq; + } + } + return 0; + +err_request_irq: + for (; i >= 0; i--) { + struct mlx5_irq *irq = mlx5_irq_get(dev, i); + int irqn = pci_irq_vector(dev->pdev, i); + + free_irq(irqn, &irq->nh); + } + return err; +} + +static void irq_clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = dev->priv.irq_table; + + free_irq_cpu_rmap(irq_table->rmap); +#endif +} + +static int irq_set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_irq_table *irq_table = mdev->priv.irq_table; + int num_affinity_vec; + int vecidx; + + num_affinity_vec = mlx5_irq_get_num_comp(irq_table); + irq_table->rmap = alloc_irq_cpu_rmap(num_affinity_vec); + if (!irq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_IRQ_VEC_COMP_BASE; + for (; vecidx < irq_table->nvec; vecidx++) { + err = irq_cpu_rmap_add(irq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + irq_clear_rmap(mdev); +err_out: +#endif + return err; +} + +/* Completion IRQ vectors */ + +static int set_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + if (!zalloc_cpumask_var(&irq->mask, GFP_KERNEL)) { + mlx5_core_warn(mdev, "zalloc_cpumask_var failed"); + return -ENOMEM; + } + + cpumask_set_cpu(cpumask_local_spread(i, mdev->priv.numa_node), + irq->mask); + if (IS_ENABLED(CONFIG_SMP) && + irq_set_affinity_hint(irqn, irq->mask)) + mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", + irqn); + + return 0; +} + +static void clear_comp_irq_affinity_hint(struct mlx5_core_dev *mdev, int i) +{ + int vecidx = MLX5_IRQ_VEC_COMP_BASE + i; + struct mlx5_irq *irq; + int irqn; + + irq = mlx5_irq_get(mdev, vecidx); + irqn = pci_irq_vector(mdev->pdev, vecidx); + irq_set_affinity_hint(irqn, NULL); + free_cpumask_var(irq->mask); +} + +static int set_comp_irq_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int err; + int i; + + for (i = 0; i < nvec; i++) { + err = set_comp_irq_affinity_hint(mdev, i); + if (err) + goto err_out; + } + + return 0; + +err_out: + for (i--; i >= 0; i--) + clear_comp_irq_affinity_hint(mdev, i); + + return err; +} + +static void clear_comp_irqs_affinity_hints(struct mlx5_core_dev *mdev) +{ + int nvec = mlx5_irq_get_num_comp(mdev->priv.irq_table); + int i; + + for (i = 0; i < nvec; i++) + clear_comp_irq_affinity_hint(mdev, i); +} + +struct cpumask * +mlx5_irq_get_affinity_mask(struct mlx5_irq_table *irq_table, int vecidx) +{ + return irq_table->irq[vecidx].mask; +} + +#ifdef CONFIG_RFS_ACCEL +struct cpu_rmap *mlx5_irq_get_rmap(struct mlx5_irq_table *irq_table) +{ + return irq_table->rmap; +} +#endif + +static void unrequest_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); +} + +int mlx5_irq_table_create(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_irq_table *table = priv->irq_table; + int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? + MLX5_CAP_GEN(dev, max_num_eqs) : + 1 << MLX5_CAP_GEN(dev, log_max_eq); + int nvec; + int err; + + nvec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + + MLX5_IRQ_VEC_COMP_BASE; + nvec = min_t(int, nvec, num_eqs); + if (nvec <= MLX5_IRQ_VEC_COMP_BASE) + return -ENOMEM; + + table->irq = kcalloc(nvec, sizeof(*table->irq), GFP_KERNEL); + if (!table->irq) + return -ENOMEM; + + nvec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1, + nvec, PCI_IRQ_MSIX); + if (nvec < 0) { + err = nvec; + goto err_free_irq; + } + + table->nvec = nvec; + + err = irq_set_rmap(dev); + if (err) + goto err_set_rmap; + + err = request_irqs(dev, nvec); + if (err) + goto err_request_irqs; + + err = set_comp_irq_affinity_hints(dev); + if (err) { + mlx5_core_err(dev, "Failed to alloc affinity hint cpumask\n"); + goto err_set_affinity; + } + + return 0; + +err_set_affinity: + unrequest_irqs(dev); +err_request_irqs: + irq_clear_rmap(dev); +err_set_rmap: + pci_free_irq_vectors(dev->pdev); +err_free_irq: + kfree(table->irq); + return err; +} + +void mlx5_irq_table_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_irq_table *table = dev->priv.irq_table; + int i; + + /* free_irq requires that affinity and rmap will be cleared + * before calling it. This is why there is asymmetry with set_rmap + * which should be called after alloc_irq but before request_irq. + */ + irq_clear_rmap(dev); + clear_comp_irqs_affinity_hints(dev); + for (i = 0; i < table->nvec; i++) + free_irq(pci_irq_vector(dev->pdev, i), + &mlx5_irq_get(dev, i)->nh); + pci_free_irq_vectors(dev->pdev); + kfree(table->irq); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c index 86f77456f873..17ce9dd56b13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c @@ -106,10 +106,10 @@ static int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) return 0; -destroy_flow_table: - mlx5_destroy_flow_table(ft); destroy_flow_group: mlx5_destroy_flow_group(fg); +destroy_flow_table: + mlx5_destroy_flow_table(ft); free: kvfree(spec); kvfree(flow_group_in); @@ -126,7 +126,7 @@ static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid * { u8 hw_id[ETH_ALEN]; - mlx5_query_nic_vport_mac_address(dev, 0, hw_id); + mlx5_query_mac_address(dev, hw_id); gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); addrconf_addr_eui48(&gid->raw[8], hw_id); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index a249b3c3843d..61fcfd8b39b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -74,17 +74,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) int err; int vf; - if (sriov->enabled_vfs) { - mlx5_core_warn(dev, - "failed to enable SRIOV on device, already enabled with %d vfs\n", - sriov->enabled_vfs); - return -EBUSY; - } - if (!MLX5_ESWITCH_MANAGER(dev)) goto enable_vfs_hca; - err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); + mlx5_eswitch_update_num_of_vfs(dev->priv.eswitch, num_vfs); + err = mlx5_eswitch_enable(dev->priv.eswitch, MLX5_ESWITCH_LEGACY); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); @@ -99,7 +93,6 @@ enable_vfs_hca: continue; } sriov->vfs_ctx[vf].enabled = 1; - sriov->enabled_vfs++; if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) { err = sriov_restore_guids(dev, vf); if (err) { @@ -118,13 +111,11 @@ enable_vfs_hca: static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int num_vfs = pci_num_vf(dev->pdev); int err; int vf; - if (!sriov->enabled_vfs) - goto out; - - for (vf = 0; vf < sriov->num_vfs; vf++) { + for (vf = num_vfs - 1; vf >= 0; vf--) { if (!sriov->vfs_ctx[vf].enabled) continue; err = mlx5_core_disable_hca(dev, vf + 1); @@ -133,12 +124,10 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) continue; } sriov->vfs_ctx[vf].enabled = 0; - sriov->enabled_vfs--; } -out: if (MLX5_ESWITCH_MANAGER(dev)) - mlx5_eswitch_disable_sriov(dev->priv.eswitch); + mlx5_eswitch_disable(dev->priv.eswitch); if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); @@ -191,13 +180,11 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) int mlx5_sriov_attach(struct mlx5_core_dev *dev) { - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - - if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + if (!mlx5_core_is_pf(dev) || !pci_num_vf(dev->pdev)) return 0; /* If sriov VFs exist in PCI level, enable them in device level */ - return mlx5_device_enable_sriov(dev, sriov->num_vfs); + return mlx5_device_enable_sriov(dev, pci_num_vf(dev->pdev)); } void mlx5_sriov_detach(struct mlx5_core_dev *dev) @@ -208,6 +195,30 @@ void mlx5_sriov_detach(struct mlx5_core_dev *dev) mlx5_device_disable_sriov(dev); } +static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev) +{ + u16 host_total_vfs; + const u32 *out; + + if (mlx5_core_is_ecpf_esw_manager(dev)) { + out = mlx5_esw_query_functions(dev); + + /* Old FW doesn't support getting total_vfs from esw func + * but supports getting it from pci_sriov. + */ + if (IS_ERR(out)) + goto done; + host_total_vfs = MLX5_GET(query_esw_functions_out, out, + host_params_context.host_total_vfs); + kvfree(out); + if (host_total_vfs) + return host_total_vfs; + } + +done: + return pci_sriov_get_totalvfs(dev->pdev); +} + int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; @@ -218,6 +229,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) return 0; total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->max_vfs = mlx5_get_max_vfs(dev); sriov->num_vfs = pci_num_vf(pdev); sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 95cdc8cbcba4..c912d82ca64b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -34,6 +34,7 @@ #include <linux/etherdevice.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> +#include <linux/mlx5/eswitch.h> #include "mlx5_core.h" /* Mutex to hold while enabling or disabling RoCE */ @@ -155,11 +156,12 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, } int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, - u16 vport, u8 *addr) + u16 vport, bool other, u8 *addr) { - u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out); + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {}; u8 *out_addr; + u32 *out; int err; out = kvzalloc(outlen, GFP_KERNEL); @@ -169,7 +171,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out, nic_vport_context.permanent_address); - err = mlx5_query_nic_vport_context(mdev, vport, out, outlen); + MLX5_SET(query_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); + MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(query_nic_vport_context_in, in, other_vport, other); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); if (!err) ether_addr_copy(addr, &out_addr[2]); @@ -178,6 +185,12 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address); +int mlx5_query_mac_address(struct mlx5_core_dev *mdev, u8 *addr) +{ + return mlx5_query_nic_vport_mac_address(mdev, 0, false, addr); +} +EXPORT_SYMBOL_GPL(mlx5_query_mac_address); + int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr) { @@ -194,9 +207,7 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev, MLX5_SET(modify_nic_vport_context_in, in, field_select.permanent_address, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - - if (vport) - MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -291,9 +302,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); MLX5_SET(query_nic_vport_context_in, in, allowed_list_type, list_type); MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); - - if (vport) - MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); + MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) @@ -483,7 +492,7 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev, MLX5_SET(modify_nic_vport_context_in, in, field_select.node_guid, 1); MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); - MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in, in, nic_vport_context); @@ -1157,3 +1166,17 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) return tmp; } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); + +/** + * mlx5_eswitch_get_total_vports - Get total vports of the eswitch + * + * @dev: Pointer to core device + * + * mlx5_eswitch_get_total_vports returns total number of vports for + * the eswitch. + */ +u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) +{ + return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev); +} +EXPORT_SYMBOL(mlx5_eswitch_get_total_vports); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 1f87cce421e0..f1ec58c9e9e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -134,11 +134,6 @@ static inline void mlx5_wq_cyc_update_db_record(struct mlx5_wq_cyc *wq) *wq->db = cpu_to_be32(wq->wqe_ctr); } -static inline u16 mlx5_wq_cyc_get_ctr_wrap_cnt(struct mlx5_wq_cyc *wq, u16 ctr) -{ - return ctr >> wq->fbc.log_sz; -} - static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) { return ctr & wq->fbc.sz_m1; diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h index 14c0c62f8e73..c50e74ab02c4 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw.h @@ -5,6 +5,7 @@ #define _MLXFW_H #include <linux/firmware.h> +#include <linux/netlink.h> enum mlxfw_fsm_state { MLXFW_FSM_STATE_IDLE, @@ -57,6 +58,10 @@ struct mlxfw_dev_ops { void (*fsm_cancel)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); void (*fsm_release)(struct mlxfw_dev *mlxfw_dev, u32 fwhandle); + + void (*status_notify)(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes); }; struct mlxfw_dev { @@ -67,11 +72,13 @@ struct mlxfw_dev { #if IS_REACHABLE(CONFIG_MLXFW) int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, - const struct firmware *firmware); + const struct firmware *firmware, + struct netlink_ext_ack *extack); #else static inline int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 240c027e5f07..67990406cba2 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -39,8 +39,19 @@ static const char * const mlxfw_fsm_state_err_str[] = { "unknown error" }; +static void mlxfw_status_notify(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes) +{ + if (!mlxfw_dev->ops->status_notify) + return; + mlxfw_dev->ops->status_notify(mlxfw_dev, msg, comp_name, + done_bytes, total_bytes); +} + static int mlxfw_fsm_state_wait(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, - enum mlxfw_fsm_state fsm_state) + enum mlxfw_fsm_state fsm_state, + struct netlink_ext_ack *extack) { enum mlxfw_fsm_state_err fsm_state_err; enum mlxfw_fsm_state curr_fsm_state; @@ -57,11 +68,13 @@ retry: if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { pr_err("Firmware flash failed: %s\n", mlxfw_fsm_state_err_str[fsm_state_err]); + NL_SET_ERR_MSG_MOD(extack, "Firmware flash failed"); return -EINVAL; } if (curr_fsm_state != fsm_state) { if (--times == 0) { pr_err("Timeout reached on FSM state change"); + NL_SET_ERR_MSG_MOD(extack, "Timeout reached on FSM state change"); return -ETIMEDOUT; } msleep(MLXFW_FSM_STATE_WAIT_CYCLE_MS); @@ -76,16 +89,20 @@ retry: static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, - struct mlxfw_mfa2_component *comp) + struct mlxfw_mfa2_component *comp, + struct netlink_ext_ack *extack) { u16 comp_max_write_size; u8 comp_align_bits; u32 comp_max_size; + char comp_name[8]; u16 block_size; u8 *block_ptr; u32 offset; int err; + sprintf(comp_name, "%u", comp->index); + err = mlxfw_dev->ops->component_query(mlxfw_dev, comp->index, &comp_max_size, &comp_align_bits, &comp_max_write_size); @@ -96,6 +113,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, if (comp->data_size > comp_max_size) { pr_err("Component %d is of size %d which is bigger than limit %d\n", comp->index, comp->data_size, comp_max_size); + NL_SET_ERR_MSG_MOD(extack, "Component is bigger than limit"); return -EINVAL; } @@ -103,6 +121,7 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, comp_align_bits); pr_debug("Component update\n"); + mlxfw_status_notify(mlxfw_dev, "Updating component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_update(mlxfw_dev, fwhandle, comp->index, comp->data_size); @@ -110,11 +129,13 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, return err; err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, - MLXFW_FSM_STATE_DOWNLOAD); + MLXFW_FSM_STATE_DOWNLOAD, extack); if (err) goto err_out; pr_debug("Component download\n"); + mlxfw_status_notify(mlxfw_dev, "Downloading component", + comp_name, 0, comp->data_size); for (offset = 0; offset < MLXFW_ALIGN_UP(comp->data_size, comp_align_bits); offset += comp_max_write_size) { @@ -126,15 +147,20 @@ static int mlxfw_flash_component(struct mlxfw_dev *mlxfw_dev, offset); if (err) goto err_out; + mlxfw_status_notify(mlxfw_dev, "Downloading component", + comp_name, offset + block_size, + comp->data_size); } pr_debug("Component verify\n"); + mlxfw_status_notify(mlxfw_dev, "Verifying component", comp_name, 0, 0); err = mlxfw_dev->ops->fsm_component_verify(mlxfw_dev, fwhandle, comp->index); if (err) goto err_out; - err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED); + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); if (err) goto err_out; return 0; @@ -145,7 +171,8 @@ err_out: } static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, - struct mlxfw_mfa2_file *mfa2_file) + struct mlxfw_mfa2_file *mfa2_file, + struct netlink_ext_ack *extack) { u32 component_count; int err; @@ -156,6 +183,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, &component_count); if (err) { pr_err("Could not find device PSID in MFA2 file\n"); + NL_SET_ERR_MSG_MOD(extack, "Could not find device PSID in MFA2 file"); return err; } @@ -168,7 +196,7 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, return PTR_ERR(comp); pr_info("Flashing component type %d\n", comp->index); - err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp); + err = mlxfw_flash_component(mlxfw_dev, fwhandle, comp, extack); mlxfw_mfa2_file_component_put(comp); if (err) return err; @@ -177,7 +205,8 @@ static int mlxfw_flash_components(struct mlxfw_dev *mlxfw_dev, u32 fwhandle, } int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlxfw_mfa2_file *mfa2_file; u32 fwhandle; @@ -185,6 +214,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, if (!mlxfw_mfa2_check(firmware)) { pr_err("Firmware file is not MFA2\n"); + NL_SET_ERR_MSG_MOD(extack, "Firmware file is not MFA2"); return -EINVAL; } @@ -193,29 +223,35 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, return PTR_ERR(mfa2_file); pr_info("Initialize firmware flash process\n"); + mlxfw_status_notify(mlxfw_dev, "Initializing firmware flash process", + NULL, 0, 0); err = mlxfw_dev->ops->fsm_lock(mlxfw_dev, &fwhandle); if (err) { pr_err("Could not lock the firmware FSM\n"); + NL_SET_ERR_MSG_MOD(extack, "Could not lock the firmware FSM"); goto err_fsm_lock; } err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, - MLXFW_FSM_STATE_LOCKED); + MLXFW_FSM_STATE_LOCKED, extack); if (err) goto err_state_wait_idle_to_locked; - err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file); + err = mlxfw_flash_components(mlxfw_dev, fwhandle, mfa2_file, extack); if (err) goto err_flash_components; pr_debug("Activate image\n"); + mlxfw_status_notify(mlxfw_dev, "Activating image", NULL, 0, 0); err = mlxfw_dev->ops->fsm_activate(mlxfw_dev, fwhandle); if (err) { pr_err("Could not activate the downloaded image\n"); + NL_SET_ERR_MSG_MOD(extack, "Could not activate the downloaded image"); goto err_fsm_activate; } - err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, MLXFW_FSM_STATE_LOCKED); + err = mlxfw_fsm_state_wait(mlxfw_dev, fwhandle, + MLXFW_FSM_STATE_LOCKED, extack); if (err) goto err_state_wait_activate_to_locked; @@ -223,6 +259,7 @@ int mlxfw_firmware_flash(struct mlxfw_dev *mlxfw_dev, mlxfw_dev->ops->fsm_release(mlxfw_dev, fwhandle); pr_info("Firmware flash done.\n"); + mlxfw_status_notify(mlxfw_dev, "Firmware flash done", NULL, 0, 0); mlxfw_mfa2_file_fini(mfa2_file); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 11ded0bc7d98..06c80343d9ed 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -83,6 +83,8 @@ config MLXSW_SPECTRUM select PARMAN select OBJAGG select MLXFW + imply PTP_1588_CLOCK + select NET_PTP_CLASSIFY if PTP_1588_CLOCK default m ---help--- This driver supports Mellanox Technologies Spectrum Ethernet diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index c4dc72e1ce63..171b36bd8a4e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -31,5 +31,6 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_nve.o spectrum_nve_vxlan.o \ spectrum_dpipe.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o +mlxsw_spectrum-$(CONFIG_PTP_1588_CLOCK) += spectrum_ptp.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index 0772e4339b33..5ffdfb532cb7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -317,6 +317,18 @@ MLXSW_ITEM64(cmd_mbox, query_fw, doorbell_page_offset, 0x40, 0, 64); */ MLXSW_ITEM32(cmd_mbox, query_fw, doorbell_page_bar, 0x48, 30, 2); +/* cmd_mbox_query_fw_free_running_clock_offset + * The offset of the free running clock page + */ +MLXSW_ITEM64(cmd_mbox, query_fw, free_running_clock_offset, 0x50, 0, 64); + +/* cmd_mbox_query_fw_fr_rn_clk_bar + * PCI base address register (BAR) of the free running clock page + * 0: BAR 0 + * 1: 64 bit BAR + */ +MLXSW_ITEM32(cmd_mbox, query_fw, fr_rn_clk_bar, 0x58, 30, 2); + /* QUERY_BOARDINFO - Query Board Information * ----------------------------------------- * OpMod == 0 (N/A), INMmod == 0 (N/A) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 6ee6de7f0160..17ceac7505e5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1003,6 +1003,20 @@ static int mlxsw_devlink_core_bus_device_reload(struct devlink *devlink, return err; } +static int mlxsw_devlink_flash_update(struct devlink *devlink, + const char *file_name, + const char *component, + struct netlink_ext_ack *extack) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->flash_update) + return -EOPNOTSUPP; + return mlxsw_driver->flash_update(mlxsw_core, file_name, + component, extack); +} + static const struct devlink_ops mlxsw_devlink_ops = { .reload = mlxsw_devlink_core_bus_device_reload, .port_type_set = mlxsw_devlink_port_type_set, @@ -1019,6 +1033,7 @@ static const struct devlink_ops mlxsw_devlink_ops = { .sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, .info_get = mlxsw_devlink_info_get, + .flash_update = mlxsw_devlink_flash_update, }; static int @@ -1098,6 +1113,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_register_params; } + if (mlxsw_driver->init) { + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); + if (err) + goto err_driver_init; + } + err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon); if (err) goto err_hwmon_init; @@ -1107,22 +1128,17 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_thermal_init; - if (mlxsw_driver->init) { - err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); - if (err) - goto err_driver_init; - } - if (mlxsw_driver->params_register && !reload) devlink_params_publish(devlink); return 0; -err_driver_init: - mlxsw_thermal_fini(mlxsw_core->thermal); err_thermal_init: mlxsw_hwmon_fini(mlxsw_core->hwmon); err_hwmon_init: + if (mlxsw_core->driver->fini) + mlxsw_core->driver->fini(mlxsw_core); +err_driver_init: if (mlxsw_driver->params_unregister && !reload) mlxsw_driver->params_unregister(mlxsw_core); err_register_params: @@ -1187,10 +1203,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core, if (mlxsw_core->driver->params_unregister && !reload) devlink_params_unpublish(devlink); - if (mlxsw_core->driver->fini) - mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); mlxsw_hwmon_fini(mlxsw_core->hwmon); + if (mlxsw_core->driver->fini) + mlxsw_core->driver->fini(mlxsw_core); if (mlxsw_core->driver->params_unregister && !reload) mlxsw_core->driver->params_unregister(mlxsw_core); if (!reload) @@ -1229,6 +1245,15 @@ int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, } EXPORT_SYMBOL(mlxsw_core_skb_transmit); +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port) +{ + if (mlxsw_core->driver->ptp_transmitted) + mlxsw_core->driver->ptp_transmitted(mlxsw_core, skb, + local_port); +} +EXPORT_SYMBOL(mlxsw_core_ptp_transmitted); + static bool __is_rx_listener_equal(const struct mlxsw_rx_listener *rxl_a, const struct mlxsw_rx_listener *rxl_b) { @@ -2010,6 +2035,18 @@ int mlxsw_core_resources_query(struct mlxsw_core *mlxsw_core, char *mbox, } EXPORT_SYMBOL(mlxsw_core_resources_query); +u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_frc_h(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_frc_h); + +u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->bus->read_frc_l(mlxsw_core->bus_priv); +} +EXPORT_SYMBOL(mlxsw_core_read_frc_l); + static int __init mlxsw_core_module_init(void) { int err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index e3832cb5bdda..8efcff4b59cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -48,6 +48,8 @@ bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); +void mlxsw_core_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port); struct mlxsw_rx_listener { void (*func)(struct sk_buff *skb, u8 local_port, void *priv); @@ -284,6 +286,9 @@ struct mlxsw_driver { unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); + int (*flash_update)(struct mlxsw_core *mlxsw_core, + const char *file_name, const char *component, + struct netlink_ext_ack *extack); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); int (*resources_register)(struct mlxsw_core *mlxsw_core); @@ -293,6 +298,13 @@ struct mlxsw_driver { u64 *p_linear_size); int (*params_register)(struct mlxsw_core *mlxsw_core); void (*params_unregister)(struct mlxsw_core *mlxsw_core); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*ptp_transmitted)(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port); + u8 txhdr_len; const struct mlxsw_config_profile *profile; bool res_query_enabled; @@ -306,6 +318,9 @@ int mlxsw_core_kvd_sizes_get(struct mlxsw_core *mlxsw_core, void mlxsw_core_fw_flash_start(struct mlxsw_core *mlxsw_core); void mlxsw_core_fw_flash_end(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_frc_h(struct mlxsw_core *mlxsw_core); +u32 mlxsw_core_read_frc_l(struct mlxsw_core *mlxsw_core); + bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core, enum mlxsw_res_id res_id); @@ -336,6 +351,8 @@ struct mlxsw_bus { char *in_mbox, size_t in_mbox_size, char *out_mbox, size_t out_mbox_size, u8 *p_status); + u32 (*read_frc_h)(void *bus_priv); + u32 (*read_frc_l)(void *bus_priv); u8 features; }; @@ -353,7 +370,8 @@ struct mlxsw_bus_info { struct mlxsw_fw_rev fw_rev; u8 vsd[MLXSW_CMD_BOARDINFO_VSD_LEN]; u8 psid[MLXSW_CMD_BOARDINFO_PSID_LEN]; - u8 low_frequency; + u8 low_frequency:1, + read_frc_capable:1; }; struct mlxsw_hwmon; @@ -409,4 +427,14 @@ enum mlxsw_devlink_param_id { MLXSW_DEVLINK_PARAM_ID_ACL_REGION_REHASH_INTERVAL, }; +struct mlxsw_skb_cb { + struct mlxsw_tx_info tx_info; +}; + +static inline struct mlxsw_skb_cb *mlxsw_skb_cb(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(mlxsw_skb_cb) > sizeof(skb->cb)); + return (struct mlxsw_skb_cb *) skb->cb; +} + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c index cb3e663b1d37..feb4672a5ac0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c @@ -30,8 +30,9 @@ static bool mlxsw_afk_blocks_check(struct mlxsw_afk *mlxsw_afk) elinst = &block->instances[j]; if (elinst->type != elinst->info->type || - elinst->item.size.bits != - elinst->info->item.size.bits) + (!elinst->avoid_size_check && + elinst->item.size.bits != + elinst->info->item.size.bits)) return false; } } @@ -385,12 +386,12 @@ EXPORT_SYMBOL(mlxsw_afk_values_add_buf); static void mlxsw_sp_afk_encode_u32(const struct mlxsw_item *storage_item, const struct mlxsw_item *output_item, - char *storage, char *output) + char *storage, char *output, int diff) { u32 value; value = __mlxsw_item_get32(storage, storage_item, 0); - __mlxsw_item_set32(output, output_item, 0, value); + __mlxsw_item_set32(output, output_item, 0, value + diff); } static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item, @@ -406,14 +407,14 @@ static void mlxsw_sp_afk_encode_buf(const struct mlxsw_item *storage_item, static void mlxsw_sp_afk_encode_one(const struct mlxsw_afk_element_inst *elinst, - char *output, char *storage) + char *output, char *storage, int u32_diff) { const struct mlxsw_item *storage_item = &elinst->info->item; const struct mlxsw_item *output_item = &elinst->item; if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_U32) mlxsw_sp_afk_encode_u32(storage_item, output_item, - storage, output); + storage, output, u32_diff); else if (elinst->type == MLXSW_AFK_ELEMENT_TYPE_BUF) mlxsw_sp_afk_encode_buf(storage_item, output_item, storage, output); @@ -446,9 +447,10 @@ void mlxsw_afk_encode(struct mlxsw_afk *mlxsw_afk, continue; mlxsw_sp_afk_encode_one(elinst, block_key, - values->storage.key); + values->storage.key, + elinst->u32_key_diff); mlxsw_sp_afk_encode_one(elinst, block_mask, - values->storage.mask); + values->storage.mask, 0); } mlxsw_afk->ops->encode_block(key, i, block_key); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 4a625cdf3e7c..cb229b55ecc4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -74,7 +74,7 @@ struct mlxsw_afk_element_info { * define an internal storage geometry. */ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { - MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 8), + MLXSW_AFK_ELEMENT_INFO_U32(SRC_SYS_PORT, 0x00, 16, 16), MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_32_47, 0x04, 2), MLXSW_AFK_ELEMENT_INFO_BUF(DMAC_0_31, 0x06, 4), MLXSW_AFK_ELEMENT_INFO_BUF(SMAC_32_47, 0x0A, 2), @@ -107,9 +107,14 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */ const struct mlxsw_afk_element_info *info; enum mlxsw_afk_element_type type; struct mlxsw_item item; /* element geometry in block */ + int u32_key_diff; /* in case value needs to be adjusted before write + * this diff is here to handle that + */ + bool avoid_size_check; }; -#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, _shift, _size) \ +#define MLXSW_AFK_ELEMENT_INST(_type, _element, _offset, \ + _shift, _size, _u32_key_diff, _avoid_size_check) \ { \ .info = &mlxsw_afk_element_infos[MLXSW_AFK_ELEMENT_##_element], \ .type = _type, \ @@ -119,15 +124,24 @@ struct mlxsw_afk_element_inst { /* element instance in actual block */ .size = {.bits = _size}, \ .name = #_element, \ }, \ + .u32_key_diff = _u32_key_diff, \ + .avoid_size_check = _avoid_size_check, \ } #define MLXSW_AFK_ELEMENT_INST_U32(_element, _offset, _shift, _size) \ MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \ - _element, _offset, _shift, _size) + _element, _offset, _shift, _size, 0, false) + +#define MLXSW_AFK_ELEMENT_INST_EXT_U32(_element, _offset, \ + _shift, _size, _key_diff, \ + _avoid_size_check) \ + MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_U32, \ + _element, _offset, _shift, _size, \ + _key_diff, _avoid_size_check) #define MLXSW_AFK_ELEMENT_INST_BUF(_element, _offset, _size) \ MLXSW_AFK_ELEMENT_INST(MLXSW_AFK_ELEMENT_TYPE_BUF, \ - _element, _offset, 0, _size) + _element, _offset, 0, _size, 0, false) struct mlxsw_afk_block { u16 encoding; /* block ID */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 72539a9a3847..d2c7ce67c300 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -92,33 +92,20 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, u16 temp; } temp_thresh; char mcia_pl[MLXSW_REG_MCIA_LEN] = {0}; - char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; - u16 module_temp; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + unsigned int module_temp; bool qsfp; int err; - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, - 1); - err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl); + mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, + false, false); + err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); if (err) return err; - - /* Don't read temperature thresholds for module with no valid info. */ - mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL); - switch (module_temp) { - case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */ - case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ - case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ - case MLXSW_REG_MTBR_INDEX_NA: + mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL); + if (!module_temp) { *temp = 0; return 0; - default: - /* Do not consider thresholds for zero temperature. */ - if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) { - *temp = 0; - return 0; - } - break; } /* Read Free Side Device Temperature Thresholds from page 03h diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c index 496dc904c5ed..5b00726c4346 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c @@ -23,6 +23,14 @@ struct mlxsw_hwmon_attr { char name[32]; }; +static int mlxsw_hwmon_get_attr_index(int index, int count) +{ + if (index >= count) + return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN; + + return index; +} + struct mlxsw_hwmon { struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -33,6 +41,7 @@ struct mlxsw_hwmon { struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT]; unsigned int attrs_count; u8 sensor_count; + u8 module_sensor_count; }; static ssize_t mlxsw_hwmon_temp_show(struct device *dev, @@ -43,18 +52,19 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp, index; int err; - mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, - false, false); + index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + mlxsw_hwmon->module_sensor_count); + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - return sprintf(buf, "%u\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, @@ -65,18 +75,19 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev, container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp_max; + int temp_max, index; int err; - mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, - false, false); + index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + mlxsw_hwmon->module_sensor_count); + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n"); return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, NULL, &temp_max, NULL); - return sprintf(buf, "%u\n", temp_max); + return sprintf(buf, "%d\n", temp_max); } static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, @@ -88,6 +99,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; char mtmp_pl[MLXSW_REG_MTMP_LEN]; unsigned long val; + int index; int err; err = kstrtoul(buf, 10, &val); @@ -96,7 +108,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev, if (val != 1) return -EINVAL; - mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true); + index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index, + mlxsw_hwmon->module_sensor_count); + mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true); err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); if (err) { dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n"); @@ -198,40 +212,20 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev, struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = container_of(attr, struct mlxsw_hwmon_attr, dev_attr); struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; - char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0}; - u16 temp; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; u8 module; + int temp; int err; module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count; - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module, - 1); - err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl); - if (err) { - dev_err(dev, "Failed to query module temperature sensor\n"); + mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module, + false, false); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) return err; - } - - mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); - /* Update status and temperature cache. */ - switch (temp) { - case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ - case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ - case MLXSW_REG_MTBR_INDEX_NA: - temp = 0; - break; - case MLXSW_REG_MTBR_BAD_SENS_INFO: - /* Untrusted cable is connected. Reading temperature from its - * sensor is faulty. - */ - temp = 0; - break; - default: - temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); - break; - } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); - return sprintf(buf, "%u\n", temp); + return sprintf(buf, "%d\n", temp); } static ssize_t mlxsw_hwmon_module_temp_fault_show(struct device *dev, @@ -333,6 +327,20 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev, mlwsw_hwmon_attr->type_index); } +static ssize_t +mlxsw_hwmon_gbox_temp_label_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct mlxsw_hwmon_attr *mlwsw_hwmon_attr = + container_of(attr, struct mlxsw_hwmon_attr, dev_attr); + struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon; + int index = mlwsw_hwmon_attr->type_index - + mlxsw_hwmon->module_sensor_count + 1; + + return sprintf(buf, "gearbox %03u\n", index); +} + enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP, MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, @@ -345,6 +353,7 @@ enum mlxsw_hwmon_attr_type { MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG, MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL, + MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL, }; static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, @@ -428,6 +437,13 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon, snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), "temp%u_label", num + 1); break; + case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL: + mlxsw_hwmon_attr->dev_attr.show = + mlxsw_hwmon_gbox_temp_label_show; + mlxsw_hwmon_attr->dev_attr.attr.mode = 0444; + snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name), + "temp%u_label", num + 1); + break; default: WARN_ON(1); } @@ -556,6 +572,54 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon) index, index); index++; } + mlxsw_hwmon->module_sensor_count = index; + + return 0; +} + +static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon) +{ + int index, max_index, sensor_index; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u8 gbox_num; + int err; + + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL); + if (!gbox_num) + return 0; + + index = mlxsw_hwmon->module_sensor_count; + max_index = mlxsw_hwmon->module_sensor_count + gbox_num; + while (index < max_index) { + sensor_index = index % mlxsw_hwmon->module_sensor_count + + MLXSW_REG_MTMP_GBOX_INDEX_MIN; + mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true); + err = mlxsw_reg_write(mlxsw_hwmon->core, + MLXSW_REG(mtmp), mtmp_pl); + if (err) { + dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n", + sensor_index); + return err; + } + mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP, + index, index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index, + index); + mlxsw_hwmon_attr_add(mlxsw_hwmon, + MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL, + index, index); + index++; + } return 0; } @@ -586,6 +650,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, if (err) goto err_temp_module_init; + err = mlxsw_hwmon_gearbox_init(mlxsw_hwmon); + if (err) + goto err_temp_gearbox_init; + mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group; mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs; @@ -602,6 +670,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core, return 0; err_hwmon_register: +err_temp_gearbox_init: err_temp_module_init: err_fans_init: err_temp_init: diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index d3e851e7ca72..35a1dc89c28a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -23,6 +23,7 @@ #define MLXSW_THERMAL_HYSTERESIS_TEMP 5000 /* 5C */ #define MLXSW_THERMAL_MODULE_TEMP_SHIFT (MLXSW_THERMAL_HYSTERESIS_TEMP * 2) #define MLXSW_THERMAL_ZONE_MAX_NAME 16 +#define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE 10 #define MLXSW_THERMAL_MAX_DUTY 255 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values @@ -98,7 +99,7 @@ struct mlxsw_thermal_module { struct thermal_zone_device *tzdev; struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; - int module; + int module; /* Module or gearbox number */ }; struct mlxsw_thermal { @@ -111,6 +112,10 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; + struct mlxsw_thermal_module *tz_gearbox_arr; + u8 tz_gearbox_num; + unsigned int tz_highest_score; + struct thermal_zone_device *tz_highest_dev; }; static inline u8 mlxsw_state_to_duty(int state) @@ -195,6 +200,34 @@ mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, return 0; } +static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal, + struct thermal_zone_device *tzdev, + struct mlxsw_thermal_trip *trips, + int temp) +{ + struct mlxsw_thermal_trip *trip = trips; + unsigned int score, delta, i, shift = 1; + + /* Calculate thermal zone score, if temperature is above the critical + * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX. + */ + score = MLXSW_THERMAL_TEMP_SCORE_MAX; + for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS; + i++, trip++) { + if (temp < trip->temp) { + delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp); + score = delta * shift; + break; + } + shift *= 256; + } + + if (score > thermal->tz_highest_score) { + thermal->tz_highest_score = score; + thermal->tz_highest_dev = tzdev; + } +} + static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev, struct thermal_cooling_device *cdev) { @@ -279,7 +312,7 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, struct mlxsw_thermal *thermal = tzdev->devdata; struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - unsigned int temp; + int temp; int err; mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false); @@ -290,8 +323,11 @@ static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev, return err; } mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + if (temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, + temp); - *p_temp = (int) temp; + *p_temp = temp; return 0; } @@ -351,6 +387,22 @@ static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev, return 0; } +static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev, + int trip, enum thermal_trend *trend) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + + if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS) + return -EINVAL; + + if (tzdev == thermal->tz_highest_dev) + return 1; + + *trend = THERMAL_TREND_STABLE; + return 0; +} + static struct thermal_zone_device_ops mlxsw_thermal_ops = { .bind = mlxsw_thermal_bind, .unbind = mlxsw_thermal_unbind, @@ -362,6 +414,7 @@ static struct thermal_zone_device_ops mlxsw_thermal_ops = { .set_trip_temp = mlxsw_thermal_set_trip_temp, .get_trip_hyst = mlxsw_thermal_get_trip_hyst, .set_trip_hyst = mlxsw_thermal_set_trip_hyst, + .get_trend = mlxsw_thermal_trend_get, }; static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev, @@ -449,39 +502,33 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, struct mlxsw_thermal_module *tz = tzdev->devdata; struct mlxsw_thermal *thermal = tz->parent; struct device *dev = thermal->bus_info->dev; - char mtbr_pl[MLXSW_REG_MTBR_LEN]; - u16 temp; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + int temp; int err; /* Read module temperature. */ - mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + - tz->module, 1); - err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl); - if (err) - return err; - - mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL); - /* Update temperature. */ - switch (temp) { - case MLXSW_REG_MTBR_NO_CONN: /* fall-through */ - case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */ - case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */ - case MLXSW_REG_MTBR_BAD_SENS_INFO: + mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + + tz->module, false, false); + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) { + /* Do not return error - in case of broken module's sensor + * it will cause error message flooding. + */ temp = 0; - break; - default: - temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); - /* Reset all trip point. */ - mlxsw_thermal_module_trips_reset(tz); - /* Update trip points. */ - err = mlxsw_thermal_module_trips_update(dev, thermal->core, - tz); - if (err) - return err; - break; + *p_temp = (int) temp; + return 0; } + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + *p_temp = temp; + + if (!temp) + return 0; + + /* Update trip points. */ + err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); + if (!err && temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); - *p_temp = (int) temp; return 0; } @@ -545,10 +592,6 @@ mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip, return 0; } -static struct thermal_zone_params mlxsw_thermal_module_params = { - .governor_name = "user_space", -}; - static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .bind = mlxsw_thermal_module_bind, .unbind = mlxsw_thermal_module_unbind, @@ -560,6 +603,46 @@ static struct thermal_zone_device_ops mlxsw_thermal_module_ops = { .set_trip_temp = mlxsw_thermal_module_trip_temp_set, .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, + .get_trend = mlxsw_thermal_trend_get, +}; + +static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + char mtmp_pl[MLXSW_REG_MTMP_LEN]; + u16 index; + int temp; + int err; + + index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module; + mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false); + + err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + if (err) + return err; + + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + if (temp > 0) + mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp); + + *p_temp = temp; + return 0; +} + +static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = { + .bind = mlxsw_thermal_module_bind, + .unbind = mlxsw_thermal_module_unbind, + .get_mode = mlxsw_thermal_module_mode_get, + .set_mode = mlxsw_thermal_module_mode_set, + .get_temp = mlxsw_thermal_gearbox_temp_get, + .get_trip_type = mlxsw_thermal_module_trip_type_get, + .get_trip_temp = mlxsw_thermal_module_trip_temp_get, + .set_trip_temp = mlxsw_thermal_module_trip_temp_set, + .get_trip_hyst = mlxsw_thermal_module_trip_hyst_get, + .set_trip_hyst = mlxsw_thermal_module_trip_hyst_set, + .get_trend = mlxsw_thermal_trend_get, }; static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev, @@ -675,13 +758,13 @@ mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz) MLXSW_THERMAL_TRIP_MASK, module_tz, &mlxsw_thermal_module_ops, - &mlxsw_thermal_module_params, - 0, 0); + NULL, 0, 0); if (IS_ERR(module_tz->tzdev)) { err = PTR_ERR(module_tz->tzdev); return err; } + module_tz->mode = THERMAL_DEVICE_ENABLED; return 0; } @@ -787,6 +870,92 @@ mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal) kfree(thermal->tz_module_arr); } +static int +mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz) +{ + char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME]; + + snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d", + gearbox_tz->module + 1); + gearbox_tz->tzdev = thermal_zone_device_register(tz_name, + MLXSW_THERMAL_NUM_TRIPS, + MLXSW_THERMAL_TRIP_MASK, + gearbox_tz, + &mlxsw_thermal_gearbox_ops, + NULL, 0, 0); + if (IS_ERR(gearbox_tz->tzdev)) + return PTR_ERR(gearbox_tz->tzdev); + + gearbox_tz->mode = THERMAL_DEVICE_ENABLED; + return 0; +} + +static void +mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz) +{ + thermal_zone_device_unregister(gearbox_tz->tzdev); +} + +static int +mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core, + struct mlxsw_thermal *thermal) +{ + struct mlxsw_thermal_module *gearbox_tz; + char mgpir_pl[MLXSW_REG_MGPIR_LEN]; + int i; + int err; + + if (!mlxsw_core_res_query_enabled(core)) + return 0; + + mlxsw_reg_mgpir_pack(mgpir_pl); + err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl); + if (err) + return err; + + mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL); + if (!thermal->tz_gearbox_num) + return 0; + + thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num, + sizeof(*thermal->tz_gearbox_arr), + GFP_KERNEL); + if (!thermal->tz_gearbox_arr) + return -ENOMEM; + + for (i = 0; i < thermal->tz_gearbox_num; i++) { + gearbox_tz = &thermal->tz_gearbox_arr[i]; + memcpy(gearbox_tz->trips, default_thermal_trips, + sizeof(thermal->trips)); + gearbox_tz->module = i; + gearbox_tz->parent = thermal; + err = mlxsw_thermal_gearbox_tz_init(gearbox_tz); + if (err) + goto err_unreg_tz_gearbox; + } + + return 0; + +err_unreg_tz_gearbox: + for (i--; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); + kfree(thermal->tz_gearbox_arr); + return err; +} + +static void +mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal) +{ + int i; + + if (!mlxsw_core_res_query_enabled(thermal->core)) + return; + + for (i = thermal->tz_gearbox_num - 1; i >= 0; i--) + mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]); + kfree(thermal->tz_gearbox_arr); +} + int mlxsw_thermal_init(struct mlxsw_core *core, const struct mlxsw_bus_info *bus_info, struct mlxsw_thermal **p_thermal) @@ -877,10 +1046,16 @@ int mlxsw_thermal_init(struct mlxsw_core *core, if (err) goto err_unreg_tzdev; + err = mlxsw_thermal_gearboxes_init(dev, core, thermal); + if (err) + goto err_unreg_modules_tzdev; + thermal->mode = THERMAL_DEVICE_ENABLED; *p_thermal = thermal; return 0; +err_unreg_modules_tzdev: + mlxsw_thermal_modules_fini(thermal); err_unreg_tzdev: if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); @@ -899,6 +1074,7 @@ void mlxsw_thermal_fini(struct mlxsw_thermal *thermal) { int i; + mlxsw_thermal_gearboxes_fini(thermal); mlxsw_thermal_modules_fini(thermal); if (thermal->tzdev) { thermal_zone_device_unregister(thermal->tzdev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c index 06aea1999518..95f408d0e103 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c +++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c @@ -43,11 +43,10 @@ #define MLXSW_I2C_PREP_SIZE (MLXSW_I2C_ADDR_WIDTH + 28) #define MLXSW_I2C_MBOX_SIZE 20 #define MLXSW_I2C_MBOX_OUT_PARAM_OFF 12 -#define MLXSW_I2C_MAX_BUFF_SIZE 32 #define MLXSW_I2C_MBOX_OFFSET_BITS 20 #define MLXSW_I2C_MBOX_SIZE_BITS 12 #define MLXSW_I2C_ADDR_BUF_SIZE 4 -#define MLXSW_I2C_BLK_MAX 32 +#define MLXSW_I2C_BLK_DEF 32 #define MLXSW_I2C_RETRY 5 #define MLXSW_I2C_TIMEOUT_MSECS 5000 #define MLXSW_I2C_MAX_DATA_SIZE 256 @@ -62,6 +61,7 @@ * @dev: I2C device; * @core: switch core pointer; * @bus_info: bus info block; + * @block_size: maximum block size allowed to pass to under layer; */ struct mlxsw_i2c { struct { @@ -74,6 +74,7 @@ struct mlxsw_i2c { struct device *dev; struct mlxsw_core *core; struct mlxsw_bus_info bus_info; + u16 block_size; }; #define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \ @@ -315,20 +316,26 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, struct i2c_client *client = to_i2c_client(dev); struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client); unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS); - u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE]; int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j; unsigned long end; + u8 *tran_buf; struct i2c_msg write_tran = - MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE); + MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE); int err; + tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE, + GFP_KERNEL); + if (!tran_buf) + return -ENOMEM; + + write_tran.buf = tran_buf; for (i = 0; i < num; i++) { - chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ? - MLXSW_I2C_BLK_MAX : in_mbox_size; + chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ? + mlxsw_i2c->block_size : in_mbox_size; write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size; mlxsw_i2c_set_slave_addr(tran_buf, off); memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox + - MLXSW_I2C_BLK_MAX * i, chunk_size); + mlxsw_i2c->block_size * i, chunk_size); j = 0; end = jiffies + timeout; @@ -342,9 +349,10 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, (j++ < MLXSW_I2C_RETRY)); if (err != 1) { - if (!err) + if (!err) { err = -EIO; - return err; + goto mlxsw_i2c_write_exit; + } } off += chunk_size; @@ -355,24 +363,27 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num, err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0); if (err) { dev_err(&client->dev, "Could not start transaction"); - return -EIO; + err = -EIO; + goto mlxsw_i2c_write_exit; } /* Wait until go bit is cleared. */ err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status); if (err) { dev_err(&client->dev, "HW semaphore is not released"); - return err; + goto mlxsw_i2c_write_exit; } /* Validate transaction completion status. */ if (*p_status) { dev_err(&client->dev, "Bad transaction completion status %x\n", *p_status); - return -EIO; + err = -EIO; } - return 0; +mlxsw_i2c_write_exit: + kfree(tran_buf); + return err; } /* Routine executes I2C command. */ @@ -395,8 +406,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, if (in_mbox) { reg_size = mlxsw_i2c_get_reg_size(in_mbox); - num = reg_size / MLXSW_I2C_BLK_MAX; - if (reg_size % MLXSW_I2C_BLK_MAX) + num = reg_size / mlxsw_i2c->block_size; + if (reg_size % mlxsw_i2c->block_size) num++; if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { @@ -416,7 +427,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, } else { /* No input mailbox is case of initialization query command. */ reg_size = MLXSW_I2C_MAX_DATA_SIZE; - num = reg_size / MLXSW_I2C_BLK_MAX; + num = reg_size / mlxsw_i2c->block_size; if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) { dev_err(&client->dev, "Could not acquire lock"); @@ -432,8 +443,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size, /* Send read transaction to get output mailbox content. */ read_tran[1].buf = out_mbox; for (i = 0; i < num; i++) { - chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ? - MLXSW_I2C_BLK_MAX : reg_size; + chunk_size = (reg_size > mlxsw_i2c->block_size) ? + mlxsw_i2c->block_size : reg_size; read_tran[1].len = chunk_size; mlxsw_i2c_set_slave_addr(tran_buf, off); @@ -509,8 +520,20 @@ mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (!mbox) return -ENOMEM; + err = mlxsw_cmd_query_fw(mlxsw_core, mbox); + if (err) + goto mbox_put; + + mlxsw_i2c->bus_info.fw_rev.major = + mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox); + mlxsw_i2c->bus_info.fw_rev.minor = + mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox); + mlxsw_i2c->bus_info.fw_rev.subminor = + mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox); + err = mlxsw_core_resources_query(mlxsw_core, mbox, res); +mbox_put: mlxsw_cmd_mbox_free(mbox); return err; } @@ -534,6 +557,7 @@ static const struct mlxsw_bus mlxsw_i2c_bus = { static int mlxsw_i2c_probe(struct i2c_client *client, const struct i2c_device_id *id) { + const struct i2c_adapter_quirks *quirks = client->adapter->quirks; struct mlxsw_i2c *mlxsw_i2c; u8 status; int err; @@ -542,6 +566,22 @@ static int mlxsw_i2c_probe(struct i2c_client *client, if (!mlxsw_i2c) return -ENOMEM; + if (quirks) { + if ((quirks->max_read_len && + quirks->max_read_len < MLXSW_I2C_BLK_DEF) || + (quirks->max_write_len && + quirks->max_write_len < MLXSW_I2C_BLK_DEF)) { + dev_err(&client->dev, "Insufficient transaction buffer length\n"); + return -EOPNOTSUPP; + } + + mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF, + min_t(u16, quirks->max_read_len, + quirks->max_write_len)); + } else { + mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF; + } + i2c_set_clientdata(client, mlxsw_i2c); mutex_init(&mlxsw_i2c->cmd.lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index cf2114273b72..471b0ca6d69a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -67,6 +67,23 @@ static const struct net_device_ops mlxsw_m_port_netdev_ops = { .ndo_get_devlink_port = mlxsw_m_port_get_devlink_port, }; +static void mlxsw_m_module_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev); + struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m; + + strlcpy(drvinfo->driver, mlxsw_m->bus_info->device_kind, + sizeof(drvinfo->driver)); + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", + mlxsw_m->bus_info->fw_rev.major, + mlxsw_m->bus_info->fw_rev.minor, + mlxsw_m->bus_info->fw_rev.subminor); + strlcpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name, + sizeof(drvinfo->bus_info)); +} + static int mlxsw_m_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { @@ -88,6 +105,7 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee, } static const struct ethtool_ops mlxsw_m_port_ethtool_ops = { + .get_drvinfo = mlxsw_m_module_get_drvinfo, .get_module_info = mlxsw_m_get_module_info, .get_module_eeprom = mlxsw_m_get_module_eeprom, }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index b40455f8293d..051b19388a81 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -102,6 +102,7 @@ struct mlxsw_pci_queue_type_group { struct mlxsw_pci { struct pci_dev *pdev; u8 __iomem *hw_addr; + u64 free_running_clock_offset; struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; u32 doorbell_offset; struct mlxsw_core *core; @@ -507,17 +508,28 @@ static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, { struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; + struct mlxsw_tx_info tx_info; char *wqe; struct sk_buff *skb; int i; spin_lock(&q->lock); elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); + tx_info = mlxsw_skb_cb(elem_info->u.sdq.skb)->tx_info; skb = elem_info->u.sdq.skb; wqe = elem_info->elem; for (i = 0; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, i, DMA_TO_DEVICE); - dev_kfree_skb_any(skb); + + if (unlikely(!tx_info.is_emad && + skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + mlxsw_core_ptp_transmitted(mlxsw_pci->core, skb, + tx_info.local_port); + skb = NULL; + } + + if (skb) + dev_kfree_skb_any(skb); elem_info->u.sdq.skb = NULL; if (q->consumer_counter++ != consumer_counter_limit) @@ -1414,6 +1426,15 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, mlxsw_pci->doorbell_offset = mlxsw_cmd_mbox_query_fw_doorbell_page_offset_get(mbox); + if (mlxsw_cmd_mbox_query_fw_fr_rn_clk_bar_get(mbox) != 0) { + dev_err(&pdev->dev, "Unsupported free running clock BAR queried from hw\n"); + err = -EINVAL; + goto err_fr_rn_clk_bar; + } + + mlxsw_pci->free_running_clock_offset = + mlxsw_cmd_mbox_query_fw_free_running_clock_offset_get(mbox); + num_pages = mlxsw_cmd_mbox_query_fw_fw_pages_get(mbox); err = mlxsw_pci_fw_area_init(mlxsw_pci, mbox, num_pages); if (err) @@ -1469,6 +1490,7 @@ err_query_resources: err_boardinfo: mlxsw_pci_fw_area_fini(mlxsw_pci); err_fw_area_init: +err_fr_rn_clk_bar: err_doorbell_page_bar: err_iface_rev: err_query_fw: @@ -1537,6 +1559,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, err = -EAGAIN; goto unlock; } + mlxsw_skb_cb(skb)->tx_info = *tx_info; elem_info->u.sdq.skb = skb; wqe = elem_info->elem; @@ -1560,6 +1583,9 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, goto unmap_frags; } + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + /* Set unused sq entries byte count to zero. */ for (i++; i < MLXSW_PCI_WQE_SG_ENTRIES; i++) mlxsw_pci_wqe_byte_count_set(wqe, i, 0); @@ -1672,6 +1698,24 @@ static int mlxsw_pci_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod, return err; } +static u32 mlxsw_pci_read_frc_h(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + u64 frc_offset; + + frc_offset = mlxsw_pci->free_running_clock_offset; + return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_H(frc_offset)); +} + +static u32 mlxsw_pci_read_frc_l(void *bus_priv) +{ + struct mlxsw_pci *mlxsw_pci = bus_priv; + u64 frc_offset; + + frc_offset = mlxsw_pci->free_running_clock_offset; + return mlxsw_pci_read32(mlxsw_pci, FREE_RUNNING_CLOCK_L(frc_offset)); +} + static const struct mlxsw_bus mlxsw_pci_bus = { .kind = "pci", .init = mlxsw_pci_init, @@ -1679,6 +1723,8 @@ static const struct mlxsw_bus mlxsw_pci_bus = { .skb_transmit_busy = mlxsw_pci_skb_transmit_busy, .skb_transmit = mlxsw_pci_skb_transmit, .cmd_exec = mlxsw_pci_cmd_exec, + .read_frc_h = mlxsw_pci_read_frc_h, + .read_frc_l = mlxsw_pci_read_frc_l, .features = MLXSW_BUS_F_TXRX | MLXSW_BUS_F_RESET, }; @@ -1740,6 +1786,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->bus_info.device_kind = driver_name; mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); mlxsw_pci->bus_info.dev = &pdev->dev; + mlxsw_pci->bus_info.read_frc_capable = true; mlxsw_pci->id = id; err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 8648ca171254..e57e42e2d2b2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -43,6 +43,9 @@ #define MLXSW_PCI_DOORBELL(offset, type_offset, num) \ ((offset) + (type_offset) + (num) * 4) +#define MLXSW_PCI_FREE_RUNNING_CLOCK_H(offset) (offset) +#define MLXSW_PCI_FREE_RUNNING_CLOCK_L(offset) ((offset) + 4) + #define MLXSW_PCI_CQS_MAX 96 #define MLXSW_PCI_EQS_COUNT 2 #define MLXSW_PCI_EQ_ASYNC_NUM 0 diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 7ed63ed657c7..ead36702549a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3515,6 +3515,18 @@ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); */ MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1); +/* reg_qeec_ptps + * PTP shaper + * 0: regular shaper mode + * 1: PTP oriented shaper + * Allowed only for hierarchy 0 + * Not supported for CPU port + * Note that ptps mode may affect the shaper rates of all hierarchies + * Supported only on Spectrum-1 + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, ptps, 0x0C, 29, 1); + enum { MLXSW_REG_QEEC_BYTES_MODE, MLXSW_REG_QEEC_PACKETS_MODE, @@ -3601,6 +3613,16 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, mlxsw_reg_qeec_next_element_index_set(payload, next_index); } +static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port, + bool ptps) +{ + MLXSW_REG_ZERO(qeec, payload); + mlxsw_reg_qeec_local_port_set(payload, local_port); + mlxsw_reg_qeec_element_hierarchy_set(payload, + MLXSW_REG_QEEC_HIERARCY_PORT); + mlxsw_reg_qeec_ptps_set(payload, ptps); +} + /* QRWE - QoS ReWrite Enable * ------------------------- * This register configures the rewrite enable per receive port. @@ -3814,6 +3836,112 @@ mlxsw_reg_qtctm_pack(char *payload, u8 local_port, bool mc) mlxsw_reg_qtctm_mc_set(payload, mc); } +/* QPSC - QoS PTP Shaper Configuration Register + * -------------------------------------------- + * The QPSC allows advanced configuration of the shapers when QEEC.ptps=1. + * Supported only on Spectrum-1. + */ +#define MLXSW_REG_QPSC_ID 0x401B +#define MLXSW_REG_QPSC_LEN 0x28 + +MLXSW_REG_DEFINE(qpsc, MLXSW_REG_QPSC_ID, MLXSW_REG_QPSC_LEN); + +enum mlxsw_reg_qpsc_port_speed { + MLXSW_REG_QPSC_PORT_SPEED_100M, + MLXSW_REG_QPSC_PORT_SPEED_1G, + MLXSW_REG_QPSC_PORT_SPEED_10G, + MLXSW_REG_QPSC_PORT_SPEED_25G, +}; + +/* reg_qpsc_port_speed + * Port speed. + * Access: Index + */ +MLXSW_ITEM32(reg, qpsc, port_speed, 0x00, 0, 4); + +/* reg_qpsc_shaper_time_exp + * The base-time-interval for updating the shapers tokens (for all hierarchies). + * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec + * shaper_rate = 64bit * shaper_inc / shaper_update_rate + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_time_exp, 0x04, 16, 4); + +/* reg_qpsc_shaper_time_mantissa + * The base-time-interval for updating the shapers tokens (for all hierarchies). + * shaper_update_rate = 2 ^ shaper_time_exp * (1 + shaper_time_mantissa) * 32nSec + * shaper_rate = 64bit * shaper_inc / shaper_update_rate + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_time_mantissa, 0x04, 0, 5); + +/* reg_qpsc_shaper_inc + * Number of tokens added to shaper on each update. + * Units of 8B. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_inc, 0x08, 0, 5); + +/* reg_qpsc_shaper_bs + * Max shaper Burst size. + * Burst size is 2 ^ max_shaper_bs * 512 [bits] + * Range is: 5..25 (from 2KB..2GB) + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, shaper_bs, 0x0C, 0, 6); + +/* reg_qpsc_ptsc_we + * Write enable to port_to_shaper_credits. + * Access: WO + */ +MLXSW_ITEM32(reg, qpsc, ptsc_we, 0x10, 31, 1); + +/* reg_qpsc_port_to_shaper_credits + * For split ports: range 1..57 + * For non-split ports: range 1..112 + * Written only when ptsc_we is set. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, port_to_shaper_credits, 0x10, 0, 8); + +/* reg_qpsc_ing_timestamp_inc + * Ingress timestamp increment. + * 2's complement. + * The timestamp of MTPPTR at ingress will be incremented by this value. Global + * value for all ports. + * Same units as used by MTPPTR. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, ing_timestamp_inc, 0x20, 0, 32); + +/* reg_qpsc_egr_timestamp_inc + * Egress timestamp increment. + * 2's complement. + * The timestamp of MTPPTR at egress will be incremented by this value. Global + * value for all ports. + * Same units as used by MTPPTR. + * Access: RW + */ +MLXSW_ITEM32(reg, qpsc, egr_timestamp_inc, 0x24, 0, 32); + +static inline void +mlxsw_reg_qpsc_pack(char *payload, enum mlxsw_reg_qpsc_port_speed port_speed, + u8 shaper_time_exp, u8 shaper_time_mantissa, u8 shaper_inc, + u8 shaper_bs, u8 port_to_shaper_credits, + int ing_timestamp_inc, int egr_timestamp_inc) +{ + MLXSW_REG_ZERO(qpsc, payload); + mlxsw_reg_qpsc_port_speed_set(payload, port_speed); + mlxsw_reg_qpsc_shaper_time_exp_set(payload, shaper_time_exp); + mlxsw_reg_qpsc_shaper_time_mantissa_set(payload, shaper_time_mantissa); + mlxsw_reg_qpsc_shaper_inc_set(payload, shaper_inc); + mlxsw_reg_qpsc_shaper_bs_set(payload, shaper_bs); + mlxsw_reg_qpsc_ptsc_we_set(payload, true); + mlxsw_reg_qpsc_port_to_shaper_credits_set(payload, port_to_shaper_credits); + mlxsw_reg_qpsc_ing_timestamp_inc_set(payload, ing_timestamp_inc); + mlxsw_reg_qpsc_egr_timestamp_inc_set(payload, egr_timestamp_inc); +} + /* PMLP - Ports Module to Local Port Register * ------------------------------------------ * Configures the assignment of modules to local ports. @@ -5292,6 +5420,8 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, + MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, }; /* reg_htgt_trap_group @@ -8039,16 +8169,21 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7); MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN); +#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64 +#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256 /* reg_mtmp_sensor_index * Sensors index to access. * 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially * (module 0 is mapped to sensor_index 64). * Access: Index */ -MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7); +MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12); /* Convert to milli degrees Celsius */ -#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) +#define MLXSW_REG_MTMP_TEMP_TO_MC(val) ({ typeof(val) v_ = (val); \ + ((v_) >= 0) ? ((v_) * 125) : \ + ((s16)((GENMASK(15, 0) + (v_) + 1) \ + * 125)); }) /* reg_mtmp_temperature * Temperature reading from the sensor. Reading is in 0.125 Celsius @@ -8107,7 +8242,7 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16); */ MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE); -static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, +static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index, bool max_temp_enable, bool max_temp_reset) { @@ -8119,11 +8254,10 @@ static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index, MLXSW_REG_MTMP_THRESH_HI); } -static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp, - unsigned int *p_max_temp, - char *sensor_name) +static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, + int *p_max_temp, char *sensor_name) { - u16 temp; + s16 temp; if (p_temp) { temp = mlxsw_reg_mtmp_temperature_get(payload); @@ -8156,7 +8290,7 @@ MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN); * 64-127 are mapped to the SFP+/QSFP modules sequentially). * Access: Index */ -MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7); +MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12); /* reg_mtbr_num_rec * Request: Number of records to read @@ -8183,7 +8317,7 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16, MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16, MLXSW_REG_MTBR_REC_LEN, 0x00, false); -static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index, +static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index, u8 num_rec) { MLXSW_REG_ZERO(mtbr, payload); @@ -8689,6 +8823,107 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port, MLXSW_REG_MLCR_DURATION_MAX : 0); } +/* MTPPS - Management Pulse Per Second Register + * -------------------------------------------- + * This register provides the device PPS capabilities, configure the PPS in and + * out modules and holds the PPS in time stamp. + */ +#define MLXSW_REG_MTPPS_ID 0x9053 +#define MLXSW_REG_MTPPS_LEN 0x3C + +MLXSW_REG_DEFINE(mtpps, MLXSW_REG_MTPPS_ID, MLXSW_REG_MTPPS_LEN); + +/* reg_mtpps_enable + * Enables the PPS functionality the specific pin. + * A boolean variable. + * Access: RW + */ +MLXSW_ITEM32(reg, mtpps, enable, 0x20, 31, 1); + +enum mlxsw_reg_mtpps_pin_mode { + MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN = 0x2, +}; + +/* reg_mtpps_pin_mode + * Pin mode to be used. The mode must comply with the supported modes of the + * requested pin. + * Access: RW + */ +MLXSW_ITEM32(reg, mtpps, pin_mode, 0x20, 8, 4); + +#define MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN 7 + +/* reg_mtpps_pin + * Pin to be configured or queried out of the supported pins. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpps, pin, 0x20, 0, 8); + +/* reg_mtpps_time_stamp + * When pin_mode = pps_in, the latched device time when it was triggered from + * the external GPIO pin. + * When pin_mode = pps_out or virtual_pin or pps_out_and_virtual_pin, the target + * time to generate next output signal. + * Time is in units of device clock. + * Access: RW + */ +MLXSW_ITEM64(reg, mtpps, time_stamp, 0x28, 0, 64); + +static inline void +mlxsw_reg_mtpps_vpin_pack(char *payload, u64 time_stamp) +{ + MLXSW_REG_ZERO(mtpps, payload); + mlxsw_reg_mtpps_pin_set(payload, MLXSW_REG_MTPPS_PIN_SP_VIRTUAL_PIN); + mlxsw_reg_mtpps_pin_mode_set(payload, + MLXSW_REG_MTPPS_PIN_MODE_VIRTUAL_PIN); + mlxsw_reg_mtpps_enable_set(payload, true); + mlxsw_reg_mtpps_time_stamp_set(payload, time_stamp); +} + +/* MTUTC - Management UTC Register + * ------------------------------- + * Configures the HW UTC counter. + */ +#define MLXSW_REG_MTUTC_ID 0x9055 +#define MLXSW_REG_MTUTC_LEN 0x1C + +MLXSW_REG_DEFINE(mtutc, MLXSW_REG_MTUTC_ID, MLXSW_REG_MTUTC_LEN); + +enum mlxsw_reg_mtutc_operation { + MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC = 0, + MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ = 3, +}; + +/* reg_mtutc_operation + * Operation. + * Access: OP + */ +MLXSW_ITEM32(reg, mtutc, operation, 0x00, 0, 4); + +/* reg_mtutc_freq_adjustment + * Frequency adjustment: Every PPS the HW frequency will be + * adjusted by this value. Units of HW clock, where HW counts + * 10^9 HW clocks for 1 HW second. + * Access: RW + */ +MLXSW_ITEM32(reg, mtutc, freq_adjustment, 0x04, 0, 32); + +/* reg_mtutc_utc_sec + * UTC seconds. + * Access: WO + */ +MLXSW_ITEM32(reg, mtutc, utc_sec, 0x10, 0, 32); + +static inline void +mlxsw_reg_mtutc_pack(char *payload, enum mlxsw_reg_mtutc_operation oper, + u32 freq_adj, u32 utc_sec) +{ + MLXSW_REG_ZERO(mtutc, payload); + mlxsw_reg_mtutc_operation_set(payload, oper); + mlxsw_reg_mtutc_freq_adjustment_set(payload, freq_adj); + mlxsw_reg_mtutc_utc_sec_set(payload, utc_sec); +} + /* MCQI - Management Component Query Information * --------------------------------------------- * This register allows querying information about firmware components. @@ -9043,6 +9278,267 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth, mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport); } +/* MOGCR - Monitoring Global Configuration Register + * ------------------------------------------------ + */ +#define MLXSW_REG_MOGCR_ID 0x9086 +#define MLXSW_REG_MOGCR_LEN 0x20 + +MLXSW_REG_DEFINE(mogcr, MLXSW_REG_MOGCR_ID, MLXSW_REG_MOGCR_LEN); + +/* reg_mogcr_ptp_iftc + * PTP Ingress FIFO Trap Clear + * The PTP_ING_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_iftc, 0x00, 1, 1); + +/* reg_mogcr_ptp_eftc + * PTP Egress FIFO Trap Clear + * The PTP_EGR_FIFO trap provides MTPPTR with clr according + * to this value. Default 0. + * Reserved when IB switches and when SwitchX/-2, Spectrum-2 + * Access: RW + */ +MLXSW_ITEM32(reg, mogcr, ptp_eftc, 0x00, 0, 1); + +/* MTPPPC - Time Precision Packet Port Configuration + * ------------------------------------------------- + * This register serves for configuration of which PTP messages should be + * timestamped. This is a global configuration, despite the register name. + * + * Reserved when Spectrum-2. + */ +#define MLXSW_REG_MTPPPC_ID 0x9090 +#define MLXSW_REG_MTPPPC_LEN 0x28 + +MLXSW_REG_DEFINE(mtpppc, MLXSW_REG_MTPPPC_ID, MLXSW_REG_MTPPPC_LEN); + +/* reg_mtpppc_ing_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at ingress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, ing_timestamp_message_type, 0x08, 0, 16); + +/* reg_mtpppc_egr_timestamp_message_type + * Bitwise vector of PTP message types to timestamp at egress. + * MessageType field as defined by IEEE 1588 + * Each bit corresponds to a value (e.g. Bit0: Sync, Bit1: Delay_Req) + * Default all 0 + * Access: RW + */ +MLXSW_ITEM32(reg, mtpppc, egr_timestamp_message_type, 0x0C, 0, 16); + +static inline void mlxsw_reg_mtpppc_pack(char *payload, u16 ing, u16 egr) +{ + MLXSW_REG_ZERO(mtpppc, payload); + mlxsw_reg_mtpppc_ing_timestamp_message_type_set(payload, ing); + mlxsw_reg_mtpppc_egr_timestamp_message_type_set(payload, egr); +} + +/* MTPPTR - Time Precision Packet Timestamping Reading + * --------------------------------------------------- + * The MTPPTR is used for reading the per port PTP timestamp FIFO. + * There is a trap for packets which are latched to the timestamp FIFO, thus the + * SW knows which FIFO to read. Note that packets enter the FIFO before been + * trapped. The sequence number is used to synchronize the timestamp FIFO + * entries and the trapped packets. + * Reserved when Spectrum-2. + */ + +#define MLXSW_REG_MTPPTR_ID 0x9091 +#define MLXSW_REG_MTPPTR_BASE_LEN 0x10 /* base length, without records */ +#define MLXSW_REG_MTPPTR_REC_LEN 0x10 /* record length */ +#define MLXSW_REG_MTPPTR_REC_MAX_COUNT 4 +#define MLXSW_REG_MTPPTR_LEN (MLXSW_REG_MTPPTR_BASE_LEN + \ + MLXSW_REG_MTPPTR_REC_LEN * MLXSW_REG_MTPPTR_REC_MAX_COUNT) + +MLXSW_REG_DEFINE(mtpptr, MLXSW_REG_MTPPTR_ID, MLXSW_REG_MTPPTR_LEN); + +/* reg_mtpptr_local_port + * Not supported for CPU port. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpptr, local_port, 0x00, 16, 8); + +enum mlxsw_reg_mtpptr_dir { + MLXSW_REG_MTPPTR_DIR_INGRESS, + MLXSW_REG_MTPPTR_DIR_EGRESS, +}; + +/* reg_mtpptr_dir + * Direction. + * Access: Index + */ +MLXSW_ITEM32(reg, mtpptr, dir, 0x00, 0, 1); + +/* reg_mtpptr_clr + * Clear the records. + * Access: OP + */ +MLXSW_ITEM32(reg, mtpptr, clr, 0x04, 31, 1); + +/* reg_mtpptr_num_rec + * Number of valid records in the response + * Range 0.. cap_ptp_timestamp_fifo + * Access: RO + */ +MLXSW_ITEM32(reg, mtpptr, num_rec, 0x08, 0, 4); + +/* reg_mtpptr_rec_message_type + * MessageType field as defined by IEEE 1588 Each bit corresponds to a value + * (e.g. Bit0: Sync, Bit1: Delay_Req) + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_message_type, + MLXSW_REG_MTPPTR_BASE_LEN, 8, 4, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_domain_number + * DomainNumber field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_domain_number, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 8, + MLXSW_REG_MTPPTR_REC_LEN, 0, false); + +/* reg_mtpptr_rec_sequence_id + * SequenceId field as defined by IEEE 1588 + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_sequence_id, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 16, + MLXSW_REG_MTPPTR_REC_LEN, 0x4, false); + +/* reg_mtpptr_rec_timestamp_high + * Timestamp of when the PTP packet has passed through the port Units of PLL + * clock time. + * For Spectrum-1 the PLL clock is 156.25Mhz and PLL clock time is 6.4nSec. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_high, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0x8, false); + +/* reg_mtpptr_rec_timestamp_low + * See rec_timestamp_high. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, mtpptr, rec_timestamp_low, + MLXSW_REG_MTPPTR_BASE_LEN, 0, 32, + MLXSW_REG_MTPPTR_REC_LEN, 0xC, false); + +static inline void mlxsw_reg_mtpptr_unpack(const char *payload, + unsigned int rec, + u8 *p_message_type, + u8 *p_domain_number, + u16 *p_sequence_id, + u64 *p_timestamp) +{ + u32 timestamp_high, timestamp_low; + + *p_message_type = mlxsw_reg_mtpptr_rec_message_type_get(payload, rec); + *p_domain_number = mlxsw_reg_mtpptr_rec_domain_number_get(payload, rec); + *p_sequence_id = mlxsw_reg_mtpptr_rec_sequence_id_get(payload, rec); + timestamp_high = mlxsw_reg_mtpptr_rec_timestamp_high_get(payload, rec); + timestamp_low = mlxsw_reg_mtpptr_rec_timestamp_low_get(payload, rec); + *p_timestamp = (u64)timestamp_high << 32 | timestamp_low; +} + +/* MTPTPT - Monitoring Precision Time Protocol Trap Register + * --------------------------------------------------------- + * This register is used for configuring under which trap to deliver PTP + * packets depending on type of the packet. + */ +#define MLXSW_REG_MTPTPT_ID 0x9092 +#define MLXSW_REG_MTPTPT_LEN 0x08 + +MLXSW_REG_DEFINE(mtptpt, MLXSW_REG_MTPTPT_ID, MLXSW_REG_MTPTPT_LEN); + +enum mlxsw_reg_mtptpt_trap_id { + MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + MLXSW_REG_MTPTPT_TRAP_ID_PTP1, +}; + +/* reg_mtptpt_trap_id + * Trap id. + * Access: Index + */ +MLXSW_ITEM32(reg, mtptpt, trap_id, 0x00, 0, 4); + +/* reg_mtptpt_message_type + * Bitwise vector of PTP message types to trap. This is a necessary but + * non-sufficient condition since need to enable also per port. See MTPPPC. + * Message types are defined by IEEE 1588 Each bit corresponds to a value (e.g. + * Bit0: Sync, Bit1: Delay_Req) + */ +MLXSW_ITEM32(reg, mtptpt, message_type, 0x04, 0, 16); + +static inline void mlxsw_reg_mtptptp_pack(char *payload, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + MLXSW_REG_ZERO(mtptpt, payload); + mlxsw_reg_mtptpt_trap_id_set(payload, trap_id); + mlxsw_reg_mtptpt_message_type_set(payload, message_type); +} + +/* MGPIR - Management General Peripheral Information Register + * ---------------------------------------------------------- + * MGPIR register allows software to query the hardware and + * firmware general information of peripheral entities. + */ +#define MLXSW_REG_MGPIR_ID 0x9100 +#define MLXSW_REG_MGPIR_LEN 0xA0 + +MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN); + +enum mlxsw_reg_mgpir_device_type { + MLXSW_REG_MGPIR_DEVICE_TYPE_NONE, + MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE, +}; + +/* device_type + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4); + +/* devices_per_flash + * Number of devices of device_type per flash (can be shared by few devices). + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8); + +/* num_of_devices + * Number of devices of device_type. + * Access: RO + */ +MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8); + +static inline void mlxsw_reg_mgpir_pack(char *payload) +{ + MLXSW_REG_ZERO(mgpir, payload); +} + +static inline void +mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices, + enum mlxsw_reg_mgpir_device_type *device_type, + u8 *devices_per_flash) +{ + if (num_of_devices) + *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload); + if (device_type) + *device_type = mlxsw_reg_mgpir_device_type_get(payload); + if (devices_per_flash) + *devices_per_flash = + mlxsw_reg_mgpir_devices_per_flash_get(payload); +} + /* TNGCR - Tunneling NVE General Configuration Register * ---------------------------------------------------- * The TNGCR register is used for setting up the NVE Tunneling configuration. @@ -10006,6 +10502,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(qpdsm), MLXSW_REG(qpdpm), MLXSW_REG(qtctm), + MLXSW_REG(qpsc), MLXSW_REG(pmlp), MLXSW_REG(pmtu), MLXSW_REG(ptys), @@ -10052,12 +10549,19 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mgir), MLXSW_REG(mrsr), MLXSW_REG(mlcr), + MLXSW_REG(mtpps), + MLXSW_REG(mtutc), MLXSW_REG(mpsc), MLXSW_REG(mcqi), MLXSW_REG(mcc), MLXSW_REG(mcda), MLXSW_REG(mgpc), MLXSW_REG(mprs), + MLXSW_REG(mogcr), + MLXSW_REG(mtpppc), + MLXSW_REG(mtpptr), + MLXSW_REG(mtptpt), + MLXSW_REG(mgpir), MLXSW_REG(tngcr), MLXSW_REG(tnumt), MLXSW_REG(tnqcr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 23204356ad88..4d34d42b3b0e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -41,6 +41,7 @@ #include "spectrum_dpipe.h" #include "spectrum_acl_flex_actions.h" #include "spectrum_span.h" +#include "spectrum_ptp.h" #include "../mlxfw/mlxfw.h" #define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100) @@ -146,6 +147,35 @@ struct mlxsw_sp_mlxfw_dev { struct mlxsw_sp *mlxsw_sp; }; +struct mlxsw_sp_ptp_ops { + struct mlxsw_sp_ptp_clock * + (*clock_init)(struct mlxsw_sp *mlxsw_sp, struct device *dev); + void (*clock_fini)(struct mlxsw_sp_ptp_clock *clock); + + struct mlxsw_sp_ptp_state *(*init)(struct mlxsw_sp *mlxsw_sp); + void (*fini)(struct mlxsw_sp_ptp_state *ptp_state); + + /* Notify a driver that a packet that might be PTP was received. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*receive)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + + /* Notify a driver that a timestamped packet was transmitted. Driver + * is responsible for freeing the passed-in SKB. + */ + void (*transmitted)(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + + int (*hwtstamp_get)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + int (*hwtstamp_set)(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + void (*shaper_work)(struct work_struct *work); + int (*get_ts_info)(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); +}; + static int mlxsw_sp_component_query(struct mlxfw_dev *mlxfw_dev, u16 component_index, u32 *p_max_size, u8 *p_align_bits, u16 *p_max_write_size) @@ -294,6 +324,19 @@ static void mlxsw_sp_fsm_release(struct mlxfw_dev *mlxfw_dev, u32 fwhandle) mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mcc), mcc_pl); } +static void mlxsw_sp_status_notify(struct mlxfw_dev *mlxfw_dev, + const char *msg, const char *comp_name, + u32 done_bytes, u32 total_bytes) +{ + struct mlxsw_sp_mlxfw_dev *mlxsw_sp_mlxfw_dev = + container_of(mlxfw_dev, struct mlxsw_sp_mlxfw_dev, mlxfw_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_mlxfw_dev->mlxsw_sp; + + devlink_flash_update_status_notify(priv_to_devlink(mlxsw_sp->core), + msg, comp_name, + done_bytes, total_bytes); +} + static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .component_query = mlxsw_sp_component_query, .fsm_lock = mlxsw_sp_fsm_lock, @@ -303,11 +346,13 @@ static const struct mlxfw_dev_ops mlxsw_sp_mlxfw_dev_ops = { .fsm_activate = mlxsw_sp_fsm_activate, .fsm_query_state = mlxsw_sp_fsm_query_state, .fsm_cancel = mlxsw_sp_fsm_cancel, - .fsm_release = mlxsw_sp_fsm_release + .fsm_release = mlxsw_sp_fsm_release, + .status_notify = mlxsw_sp_status_notify, }; static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, - const struct firmware *firmware) + const struct firmware *firmware, + struct netlink_ext_ack *extack) { struct mlxsw_sp_mlxfw_dev mlxsw_sp_mlxfw_dev = { .mlxfw_dev = { @@ -320,7 +365,10 @@ static int mlxsw_sp_firmware_flash(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_core_fw_flash_start(mlxsw_sp->core); - err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, firmware); + devlink_flash_update_begin_notify(priv_to_devlink(mlxsw_sp->core)); + err = mlxfw_firmware_flash(&mlxsw_sp_mlxfw_dev.mlxfw_dev, + firmware, extack); + devlink_flash_update_end_notify(priv_to_devlink(mlxsw_sp->core)); mlxsw_core_fw_flash_end(mlxsw_sp->core); return err; @@ -374,7 +422,7 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) return err; } - err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware); + err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, NULL); release_firmware(firmware); if (err) dev_err(mlxsw_sp->bus_info->dev, "Could not upgrade firmware\n"); @@ -388,6 +436,27 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_flash_update(struct mlxsw_core *mlxsw_core, + const char *file_name, const char *component, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + const struct firmware *firmware; + int err; + + if (component) + return -EOPNOTSUPP; + + err = request_firmware_direct(&firmware, file_name, + mlxsw_sp->bus_info->dev); + if (err) + return err; + err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware, extack); + release_firmware(firmware); + + return err; +} + int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, unsigned int counter_index, u64 *packets, u64 *bytes) @@ -738,6 +807,8 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) return NETDEV_TX_BUSY; @@ -1437,21 +1508,21 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_acl_block *acl_block, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_acl_block_mlxsw_sp(acl_block); switch (f->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return mlxsw_sp_flower_replace(mlxsw_sp, acl_block, f); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: mlxsw_sp_flower_destroy(mlxsw_sp, acl_block, f); return 0; - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return mlxsw_sp_flower_stats(mlxsw_sp, acl_block, f); - case TC_CLSFLOWER_TMPLT_CREATE: + case FLOW_CLS_TMPLT_CREATE: return mlxsw_sp_flower_tmplt_create(mlxsw_sp, acl_block, f); - case TC_CLSFLOWER_TMPLT_DESTROY: + case FLOW_CLS_TMPLT_DESTROY: mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, acl_block, f); return 0; default: @@ -1514,33 +1585,45 @@ static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type, } } +static void mlxsw_sp_tc_block_flower_release(void *cb_priv) +{ + struct mlxsw_sp_acl_block *acl_block = cb_priv; + + mlxsw_sp_acl_block_destroy(acl_block); +} + +static LIST_HEAD(mlxsw_sp_block_cb_list); + static int mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port, - struct tcf_block *block, bool ingress, - struct netlink_ext_ack *extack) + struct flow_block_offload *f, bool ingress) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_acl_block *acl_block; - struct tcf_block_cb *block_cb; + struct flow_block_cb *block_cb; + bool register_block = false; int err; - block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower, - mlxsw_sp); + block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower, + mlxsw_sp); if (!block_cb) { - acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, block->net); + acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net); if (!acl_block) return -ENOMEM; - block_cb = __tcf_block_cb_register(block, - mlxsw_sp_setup_tc_block_cb_flower, - mlxsw_sp, acl_block, extack); + block_cb = flow_block_cb_alloc(f->net, + mlxsw_sp_setup_tc_block_cb_flower, + mlxsw_sp, acl_block, + mlxsw_sp_tc_block_flower_release); if (IS_ERR(block_cb)) { + mlxsw_sp_acl_block_destroy(acl_block); err = PTR_ERR(block_cb); goto err_cb_register; } + register_block = true; } else { - acl_block = tcf_block_cb_priv(block_cb); + acl_block = flow_block_cb_priv(block_cb); } - tcf_block_cb_incref(block_cb); + flow_block_cb_incref(block_cb); err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block, mlxsw_sp_port, ingress); if (err) @@ -1551,28 +1634,31 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port, else mlxsw_sp_port->eg_acl_block = acl_block; + if (register_block) { + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list); + } + return 0; err_block_bind: - if (!tcf_block_cb_decref(block_cb)) { - __tcf_block_cb_unregister(block, block_cb); + if (!flow_block_cb_decref(block_cb)) + flow_block_cb_free(block_cb); err_cb_register: - mlxsw_sp_acl_block_destroy(acl_block); - } return err; } static void mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port, - struct tcf_block *block, bool ingress) + struct flow_block_offload *f, bool ingress) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_acl_block *acl_block; - struct tcf_block_cb *block_cb; + struct flow_block_cb *block_cb; int err; - block_cb = tcf_block_cb_lookup(block, mlxsw_sp_setup_tc_block_cb_flower, - mlxsw_sp); + block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower, + mlxsw_sp); if (!block_cb) return; @@ -1581,50 +1667,63 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port, else mlxsw_sp_port->eg_acl_block = NULL; - acl_block = tcf_block_cb_priv(block_cb); + acl_block = flow_block_cb_priv(block_cb); err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block, mlxsw_sp_port, ingress); - if (!err && !tcf_block_cb_decref(block_cb)) { - __tcf_block_cb_unregister(block, block_cb); - mlxsw_sp_acl_block_destroy(acl_block); + if (!err && !flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); } } static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_block_offload *f) + struct flow_block_offload *f) { + struct flow_block_cb *block_cb; tc_setup_cb_t *cb; bool ingress; int err; - if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) { + if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) { cb = mlxsw_sp_setup_tc_block_cb_matchall_ig; ingress = true; - } else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) { + } else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) { cb = mlxsw_sp_setup_tc_block_cb_matchall_eg; ingress = false; } else { return -EOPNOTSUPP; } + f->driver_block_list = &mlxsw_sp_block_cb_list; + switch (f->command) { - case TC_BLOCK_BIND: - err = tcf_block_cb_register(f->block, cb, mlxsw_sp_port, - mlxsw_sp_port, f->extack); - if (err) - return err; - err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port, - f->block, ingress, - f->extack); + case FLOW_BLOCK_BIND: + if (flow_block_cb_is_busy(cb, mlxsw_sp_port, + &mlxsw_sp_block_cb_list)) + return -EBUSY; + + block_cb = flow_block_cb_alloc(f->net, cb, mlxsw_sp_port, + mlxsw_sp_port, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port, f, + ingress); if (err) { - tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port); + flow_block_cb_free(block_cb); return err; } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list); return 0; - case TC_BLOCK_UNBIND: + case FLOW_BLOCK_UNBIND: mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port, - f->block, ingress); - tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port); + f, ingress); + block_cb = flow_block_cb_lookup(f, cb, mlxsw_sp_port); + if (!block_cb) + return -ENOENT; + + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); return 0; default: return -EOPNOTSUPP; @@ -1745,6 +1844,65 @@ mlxsw_sp_port_get_devlink_port(struct net_device *dev) mlxsw_sp_port->local_port); } +static int mlxsw_sp_port_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static int mlxsw_sp_port_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct ifreq *ifr) +{ + struct hwtstamp_config config; + int err; + + err = mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_get(mlxsw_sp_port, + &config); + if (err) + return err; + + if (copy_to_user(ifr->ifr_data, &config, sizeof(config))) + return -EFAULT; + + return 0; +} + +static inline void mlxsw_sp_port_ptp_clear(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct hwtstamp_config config = {0}; + + mlxsw_sp_port->mlxsw_sp->ptp_ops->hwtstamp_set(mlxsw_sp_port, &config); +} + +static int +mlxsw_sp_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (cmd) { + case SIOCSHWTSTAMP: + return mlxsw_sp_port_hwtstamp_set(mlxsw_sp_port, ifr); + case SIOCGHWTSTAMP: + return mlxsw_sp_port_hwtstamp_get(mlxsw_sp_port, ifr); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, @@ -1760,6 +1918,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_set_features = mlxsw_sp_set_features, .ndo_get_devlink_port = mlxsw_sp_port_get_devlink_port, + .ndo_do_ioctl = mlxsw_sp_port_ioctl, }; static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, @@ -2525,28 +2684,33 @@ mlxsw_sp1_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, } } +static u32 +mlxsw_sp1_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) +{ + int i; + + for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) + return mlxsw_sp1_port_link_mode[i].speed; + } + + return SPEED_UNKNOWN; +} + static void mlxsw_sp1_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd) { - u32 speed = SPEED_UNKNOWN; - u8 duplex = DUPLEX_UNKNOWN; - int i; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; if (!carrier_ok) - goto out; + return; - for (i = 0; i < MLXSW_SP1_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp1_port_link_mode[i].mask) { - speed = mlxsw_sp1_port_link_mode[i].speed; - duplex = DUPLEX_FULL; - break; - } - } -out: - cmd->base.speed = speed; - cmd->base.duplex = duplex; + cmd->base.speed = mlxsw_sp1_from_ptys_speed(mlxsw_sp, ptys_eth_proto); + if (cmd->base.speed != SPEED_UNKNOWN) + cmd->base.duplex = DUPLEX_FULL; } static u32 @@ -2617,6 +2781,7 @@ static const struct mlxsw_sp_port_type_speed_ops mlxsw_sp1_port_type_speed_ops = { .from_ptys_supported_port = mlxsw_sp1_from_ptys_supported_port, .from_ptys_link = mlxsw_sp1_from_ptys_link, + .from_ptys_speed = mlxsw_sp1_from_ptys_speed, .from_ptys_speed_duplex = mlxsw_sp1_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp1_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp1_to_ptys_speed, @@ -2867,28 +3032,33 @@ mlxsw_sp2_from_ptys_link(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, } } +static u32 +mlxsw_sp2_from_ptys_speed(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto) +{ + int i; + + for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { + if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) + return mlxsw_sp2_port_link_mode[i].speed; + } + + return SPEED_UNKNOWN; +} + static void mlxsw_sp2_from_ptys_speed_duplex(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd) { - u32 speed = SPEED_UNKNOWN; - u8 duplex = DUPLEX_UNKNOWN; - int i; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; if (!carrier_ok) - goto out; + return; - for (i = 0; i < MLXSW_SP2_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp2_port_link_mode[i].mask) { - speed = mlxsw_sp2_port_link_mode[i].speed; - duplex = DUPLEX_FULL; - break; - } - } -out: - cmd->base.speed = speed; - cmd->base.duplex = duplex; + cmd->base.speed = mlxsw_sp2_from_ptys_speed(mlxsw_sp, ptys_eth_proto); + if (cmd->base.speed != SPEED_UNKNOWN) + cmd->base.duplex = DUPLEX_FULL; } static bool @@ -2999,6 +3169,7 @@ static const struct mlxsw_sp_port_type_speed_ops mlxsw_sp2_port_type_speed_ops = { .from_ptys_supported_port = mlxsw_sp2_from_ptys_supported_port, .from_ptys_link = mlxsw_sp2_from_ptys_link, + .from_ptys_speed = mlxsw_sp2_from_ptys_speed, .from_ptys_speed_duplex = mlxsw_sp2_from_ptys_speed_duplex, .to_ptys_advert_link = mlxsw_sp2_to_ptys_advert_link, .to_ptys_speed = mlxsw_sp2_to_ptys_speed, @@ -3159,31 +3330,6 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev, return 0; } -static int mlxsw_sp_flash_device(struct net_device *dev, - struct ethtool_flash *flash) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - const struct firmware *firmware; - int err; - - if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) - return -EOPNOTSUPP; - - dev_hold(dev); - rtnl_unlock(); - - err = request_firmware_direct(&firmware, flash->data, &dev->dev); - if (err) - goto out; - err = mlxsw_sp_firmware_flash(mlxsw_sp, firmware); - release_firmware(firmware); -out: - rtnl_lock(); - dev_put(dev); - return err; -} - static int mlxsw_sp_get_module_info(struct net_device *netdev, struct ethtool_modinfo *modinfo) { @@ -3213,6 +3359,15 @@ static int mlxsw_sp_get_module_eeprom(struct net_device *netdev, return err; } +static int +mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + return mlxsw_sp->ptp_ops->get_ts_info(mlxsw_sp, info); +} + static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, @@ -3224,9 +3379,9 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_sset_count = mlxsw_sp_port_get_sset_count, .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, - .flash_device = mlxsw_sp_flash_device, .get_module_info = mlxsw_sp_get_module_info, .get_module_eeprom = mlxsw_sp_get_module_eeprom, + .get_ts_info = mlxsw_sp_get_ts_info, }; static int @@ -3343,8 +3498,9 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } - /* Make sure the max shaper is disabled in all hierarchies that - * support it. + /* Make sure the max shaper is disabled in all hierarchies that support + * it. Note that this disables ptps (PTP shaper), but that is intended + * for the initial configuration. */ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, @@ -3589,6 +3745,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, } mlxsw_sp_port->default_vlan = mlxsw_sp_port_vlan; + INIT_DELAYED_WORK(&mlxsw_sp_port->ptp.shaper_dw, + mlxsw_sp->ptp_ops->shaper_work); + mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); if (err) { @@ -3643,6 +3802,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; cancel_delayed_work_sync(&mlxsw_sp_port->periodic_hw_stats.update_dw); + cancel_delayed_work_sync(&mlxsw_sp_port->ptp.shaper_dw); + mlxsw_sp_port_ptp_clear(mlxsw_sp_port); mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -3927,14 +4088,55 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, if (status == MLXSW_PORT_OPER_STATUS_UP) { netdev_info(mlxsw_sp_port->dev, "link up\n"); netif_carrier_on(mlxsw_sp_port->dev); + mlxsw_core_schedule_dw(&mlxsw_sp_port->ptp.shaper_dw, 0); } else { netdev_info(mlxsw_sp_port->dev, "link down\n"); netif_carrier_off(mlxsw_sp_port->dev); } } -static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, - u8 local_port, void *priv) +static void mlxsw_sp1_ptp_fifo_event_func(struct mlxsw_sp *mlxsw_sp, + char *mtpptr_pl, bool ingress) +{ + u8 local_port; + u8 num_rec; + int i; + + local_port = mlxsw_reg_mtpptr_local_port_get(mtpptr_pl); + num_rec = mlxsw_reg_mtpptr_num_rec_get(mtpptr_pl); + for (i = 0; i < num_rec; i++) { + u8 domain_number; + u8 message_type; + u16 sequence_id; + u64 timestamp; + + mlxsw_reg_mtpptr_unpack(mtpptr_pl, i, &message_type, + &domain_number, &sequence_id, + ×tamp); + mlxsw_sp1_ptp_got_timestamp(mlxsw_sp, ingress, local_port, + message_type, domain_number, + sequence_id, timestamp); + } +} + +static void mlxsw_sp1_ptp_ing_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, true); +} + +static void mlxsw_sp1_ptp_egr_fifo_event_func(const struct mlxsw_reg_info *reg, + char *mtpptr_pl, void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp1_ptp_fifo_event_func(mlxsw_sp, mtpptr_pl, false); +} + +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u8 local_port, void *priv) { struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; @@ -4008,6 +4210,14 @@ out: consume_skb(skb); } +static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port, + void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port); +} + #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl) \ MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action, \ _is_ctrl, SP_##_trap_group, DISCARD) @@ -4029,7 +4239,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* L2 traps */ MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true), MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true), - MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true), + MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU, + false, SP_LLDP, DISCARD), MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false), MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false), MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false), @@ -4098,6 +4309,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { /* NVE traps */ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false), + /* PTP traps */ + MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU, + false, SP_PTP0, DISCARD), + MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false), +}; + +static const struct mlxsw_listener mlxsw_sp1_listener[] = { + /* Events */ + MLXSW_EVENTL(mlxsw_sp1_ptp_egr_fifo_event_func, PTP_EGR_FIFO, SP_PTP0), + MLXSW_EVENTL(mlxsw_sp1_ptp_ing_fifo_event_func, PTP_ING_FIFO, SP_PTP0), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -4149,6 +4370,14 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) rate = 1024; burst_size = 7; break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: + rate = 24 * 1024; + burst_size = 12; + break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: + rate = 19 * 1024; + burst_size = 12; + break; default: continue; } @@ -4187,6 +4416,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: priority = 5; tc = 5; break; @@ -4204,6 +4434,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1: priority = 2; tc = 2; break; @@ -4237,22 +4468,16 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) return 0; } -static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_traps_register(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_listener listeners[], + size_t listeners_count) { int i; int err; - err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); - if (err) - return err; - - err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); - if (err) - return err; - - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) { + for (i = 0; i < listeners_count; i++) { err = mlxsw_core_trap_register(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); if (err) goto err_listener_register; @@ -4263,23 +4488,63 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) err_listener_register: for (i--; i >= 0; i--) { mlxsw_core_trap_unregister(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); } return err; } -static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_traps_unregister(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_listener listeners[], + size_t listeners_count) { int i; - for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) { + for (i = 0; i < listeners_count; i++) { mlxsw_core_trap_unregister(mlxsw_sp->core, - &mlxsw_sp_listener[i], + &listeners[i], mlxsw_sp); } } +static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core); + if (err) + return err; + + err = mlxsw_sp_trap_groups_set(mlxsw_sp->core); + if (err) + return err; + + err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); + if (err) + return err; + + err = mlxsw_sp_traps_register(mlxsw_sp, mlxsw_sp->listeners, + mlxsw_sp->listeners_count); + if (err) + goto err_extra_traps_init; + + return 0; + +err_extra_traps_init: + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); + return err; +} + +static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp->listeners, + mlxsw_sp->listeners_count); + mlxsw_sp_traps_unregister(mlxsw_sp, mlxsw_sp_listener, + ARRAY_SIZE(mlxsw_sp_listener)); +} + #define MLXSW_SP_LAG_SEED_INIT 0xcafecafe static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) @@ -4332,6 +4597,32 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static const struct mlxsw_sp_ptp_ops mlxsw_sp1_ptp_ops = { + .clock_init = mlxsw_sp1_ptp_clock_init, + .clock_fini = mlxsw_sp1_ptp_clock_fini, + .init = mlxsw_sp1_ptp_init, + .fini = mlxsw_sp1_ptp_fini, + .receive = mlxsw_sp1_ptp_receive, + .transmitted = mlxsw_sp1_ptp_transmitted, + .hwtstamp_get = mlxsw_sp1_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp1_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp1_ptp_shaper_work, + .get_ts_info = mlxsw_sp1_ptp_get_ts_info, +}; + +static const struct mlxsw_sp_ptp_ops mlxsw_sp2_ptp_ops = { + .clock_init = mlxsw_sp2_ptp_clock_init, + .clock_fini = mlxsw_sp2_ptp_clock_fini, + .init = mlxsw_sp2_ptp_init, + .fini = mlxsw_sp2_ptp_fini, + .receive = mlxsw_sp2_ptp_receive, + .transmitted = mlxsw_sp2_ptp_transmitted, + .hwtstamp_get = mlxsw_sp2_ptp_hwtstamp_get, + .hwtstamp_set = mlxsw_sp2_ptp_hwtstamp_set, + .shaper_work = mlxsw_sp2_ptp_shaper_work, + .get_ts_info = mlxsw_sp2_ptp_get_ts_info, +}; + static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr); @@ -4429,6 +4720,28 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_router_init; } + if (mlxsw_sp->bus_info->read_frc_capable) { + /* NULL is a valid return value from clock_init */ + mlxsw_sp->clock = + mlxsw_sp->ptp_ops->clock_init(mlxsw_sp, + mlxsw_sp->bus_info->dev); + if (IS_ERR(mlxsw_sp->clock)) { + err = PTR_ERR(mlxsw_sp->clock); + dev_err(mlxsw_sp->bus_info->dev, "Failed to init ptp clock\n"); + goto err_ptp_clock_init; + } + } + + if (mlxsw_sp->clock) { + /* NULL is a valid return value from ptp_ops->init */ + mlxsw_sp->ptp_state = mlxsw_sp->ptp_ops->init(mlxsw_sp); + if (IS_ERR(mlxsw_sp->ptp_state)) { + err = PTR_ERR(mlxsw_sp->ptp_state); + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize PTP\n"); + goto err_ptp_init; + } + } + /* Initialize netdevice notifier after router and SPAN is initialized, * so that the event handler can use router structures and call SPAN * respin. @@ -4459,6 +4772,12 @@ err_ports_create: err_dpipe_init: unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); err_netdev_notifier: + if (mlxsw_sp->clock) + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); +err_ptp_init: + if (mlxsw_sp->clock) + mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); +err_ptp_clock_init: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: mlxsw_sp_acl_fini(mlxsw_sp); @@ -4502,6 +4821,9 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->rif_ops_arr = mlxsw_sp1_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp1_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp1_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp1_ptp_ops; + mlxsw_sp->listeners = mlxsw_sp1_listener; + mlxsw_sp->listeners_count = ARRAY_SIZE(mlxsw_sp1_listener); return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4521,6 +4843,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->rif_ops_arr = mlxsw_sp2_rif_ops_arr; mlxsw_sp->sb_vals = &mlxsw_sp2_sb_vals; mlxsw_sp->port_type_speed_ops = &mlxsw_sp2_port_type_speed_ops; + mlxsw_sp->ptp_ops = &mlxsw_sp2_ptp_ops; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -4532,6 +4855,10 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_dpipe_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); + if (mlxsw_sp->clock) { + mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state); + mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock); + } mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_nve_fini(mlxsw_sp); @@ -4874,6 +5201,15 @@ static void mlxsw_sp2_params_unregister(struct mlxsw_core *mlxsw_core) mlxsw_sp_params_unregister(mlxsw_core); } +static void mlxsw_sp_ptp_transmitted(struct mlxsw_core *mlxsw_core, + struct sk_buff *skb, u8 local_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + + skb_pull(skb, MLXSW_TXHDR_LEN); + mlxsw_sp->ptp_ops->transmitted(mlxsw_sp, skb, local_port); +} + static struct mlxsw_driver mlxsw_sp1_driver = { .kind = mlxsw_sp1_driver_name, .priv_size = sizeof(struct mlxsw_sp), @@ -4892,11 +5228,13 @@ static struct mlxsw_driver mlxsw_sp1_driver = { .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .flash_update = mlxsw_sp_flash_update, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp1_resources_register, .kvd_sizes_get = mlxsw_sp_kvd_sizes_get, .params_register = mlxsw_sp_params_register, .params_unregister = mlxsw_sp_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp1_config_profile, .res_query_enabled = true, @@ -4920,10 +5258,12 @@ static struct mlxsw_driver mlxsw_sp2_driver = { .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .flash_update = mlxsw_sp_flash_update, .txhdr_construct = mlxsw_sp_txhdr_construct, .resources_register = mlxsw_sp2_resources_register, .params_register = mlxsw_sp2_params_register, .params_unregister = mlxsw_sp2_params_unregister, + .ptp_transmitted = mlxsw_sp_ptp_transmitted, .txhdr_len = MLXSW_TXHDR_LEN, .profile = &mlxsw_sp2_config_profile, .res_query_enabled = true, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 8601b3041acd..a252b080dda9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -136,6 +136,8 @@ struct mlxsw_sp_acl_tcam_ops; struct mlxsw_sp_nve_ops; struct mlxsw_sp_sb_vals; struct mlxsw_sp_port_type_speed_ops; +struct mlxsw_sp_ptp_state; +struct mlxsw_sp_ptp_ops; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -155,6 +157,8 @@ struct mlxsw_sp { struct mlxsw_sp_kvdl *kvdl; struct mlxsw_sp_nve *nve; struct notifier_block netdevice_nb; + struct mlxsw_sp_ptp_clock *clock; + struct mlxsw_sp_ptp_state *ptp_state; struct mlxsw_sp_counter_pool *counter_pool; struct { @@ -172,6 +176,9 @@ struct mlxsw_sp { const struct mlxsw_sp_rif_ops **rif_ops_arr; const struct mlxsw_sp_sb_vals *sb_vals; const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; + const struct mlxsw_sp_ptp_ops *ptp_ops; + const struct mlxsw_listener *listeners; + size_t listeners_count; }; static inline struct mlxsw_sp_upper * @@ -259,6 +266,12 @@ struct mlxsw_sp_port { unsigned acl_rule_count; struct mlxsw_sp_acl_block *ing_acl_block; struct mlxsw_sp_acl_block *eg_acl_block; + struct { + struct delayed_work shaper_dw; + struct hwtstamp_config hwtstamp_config; + u16 ing_types; + u16 egr_types; + } ptp; }; struct mlxsw_sp_port_type_speed_ops { @@ -267,6 +280,7 @@ struct mlxsw_sp_port_type_speed_ops { struct ethtool_link_ksettings *cmd); void (*from_ptys_link)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto, unsigned long *mode); + u32 (*from_ptys_speed)(struct mlxsw_sp *mlxsw_sp, u32 ptys_eth_proto); void (*from_ptys_speed_duplex)(struct mlxsw_sp *mlxsw_sp, bool carrier_ok, u32 ptys_eth_proto, struct ethtool_link_ksettings *cmd); @@ -435,6 +449,8 @@ struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, extern struct notifier_block mlxsw_sp_switchdev_notifier; /* spectrum.c */ +void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, + u8 local_port, void *priv); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -620,6 +636,15 @@ enum mlxsw_sp_acl_profile { MLXSW_SP_ACL_PROFILE_MR, }; +struct mlxsw_sp_acl_block { + struct list_head binding_list; + struct mlxsw_sp_acl_ruleset *ruleset_zero; + struct mlxsw_sp *mlxsw_sp; + unsigned int rule_count; + unsigned int disable_count; + struct net *net; +}; + struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block); unsigned int mlxsw_sp_acl_block_rule_count(struct mlxsw_sp_acl_block *block); @@ -782,19 +807,19 @@ extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops; /* spectrum_flower.c */ int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f); + struct flow_cls_offload *f); void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f); + struct flow_cls_offload *f); int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f); + struct flow_cls_offload *f); int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f); + struct flow_cls_offload *f); void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f); + struct flow_cls_offload *f); /* spectrum_qdisc.c */ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index a146a44634e9..e8ac90564dbe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -45,14 +45,6 @@ struct mlxsw_sp_acl_block_binding { bool ingress; }; -struct mlxsw_sp_acl_block { - struct list_head binding_list; - struct mlxsw_sp_acl_ruleset *ruleset_zero; - struct mlxsw_sp *mlxsw_sp; - unsigned int rule_count; - unsigned int disable_count; -}; - struct mlxsw_sp_acl_ruleset_ht_key { struct mlxsw_sp_acl_block *block; u32 chain_index; @@ -221,6 +213,7 @@ struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp, return NULL; INIT_LIST_HEAD(&block->binding_list); block->mlxsw_sp = mlxsw_sp; + block->net = net; return block; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c index 2a998dea4f39..279c241f76f0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c @@ -12,7 +12,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = { MLXSW_AFK_ELEMENT_INST_BUF(DMAC_0_31, 0x02, 4), MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { @@ -20,7 +20,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { MLXSW_AFK_ELEMENT_INST_BUF(SMAC_0_31, 0x02, 4), MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = { @@ -32,13 +32,13 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac_ex[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_sip[] = { MLXSW_AFK_ELEMENT_INST_BUF(SRC_IP_0_31, 0x00, 4), MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { MLXSW_AFK_ELEMENT_INST_BUF(DST_IP_0_31, 0x00, 4), MLXSW_AFK_ELEMENT_INST_U32(IP_PROTO, 0x08, 0, 8), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 8), + MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { @@ -149,7 +149,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_4[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5[] = { MLXSW_AFK_ELEMENT_INST_U32(VID, 0x04, 16, 12), - MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x04, 0, 8), /* RX_ACL_SYSTEM_PORT */ + MLXSW_AFK_ELEMENT_INST_EXT_U32(SRC_SYS_PORT, 0x04, 0, 8, -1, true), /* RX_ACL_SYSTEM_PORT */ }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_0[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 96b23c856f4d..202e9a246019 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -120,8 +120,51 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei, + struct flow_cls_offload *f, + struct mlxsw_sp_acl_block *block) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct mlxsw_sp_port *mlxsw_sp_port; + struct net_device *ingress_dev; + struct flow_match_meta match; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) + return 0; + + flow_rule_match_meta(rule, &match); + if (match.mask->ingress_ifindex != 0xFFFFFFFF) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Unsupported ingress ifindex mask"); + return -EINVAL; + } + + ingress_dev = __dev_get_by_index(block->net, + match.key->ingress_ifindex); + if (!ingress_dev) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't find specified ingress port to match on"); + return -EINVAL; + } + + if (!mlxsw_sp_port_dev_check(ingress_dev)) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on non-mlxsw ingress port"); + return -EINVAL; + } + + mlxsw_sp_port = netdev_priv(ingress_dev); + if (mlxsw_sp_port->mlxsw_sp != block->mlxsw_sp) { + NL_SET_ERR_MSG_MOD(f->common.extack, "Can't match on a port from different device"); + return -EINVAL; + } + + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_SRC_SYS_PORT, + mlxsw_sp_port->local_port, + 0xFFFFFFFF); + return 0; +} + static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct flow_match_ipv4_addrs match; @@ -136,7 +179,7 @@ static void mlxsw_sp_flower_parse_ipv4(struct mlxsw_sp_acl_rule_info *rulei, } static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct flow_match_ipv6_addrs match; @@ -170,10 +213,10 @@ static void mlxsw_sp_flower_parse_ipv6(struct mlxsw_sp_acl_rule_info *rulei, static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u8 ip_proto) { - const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + const struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_match_ports match; if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) @@ -197,10 +240,10 @@ static int mlxsw_sp_flower_parse_ports(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u8 ip_proto) { - const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + const struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_match_tcp match; if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) @@ -222,10 +265,10 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, - struct tc_cls_flower_offload *f, + struct flow_cls_offload *f, u16 n_proto) { - const struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + const struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_match_ip match; if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) @@ -256,9 +299,9 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_acl_rule_info *rulei, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 n_proto_mask = 0; u16 n_proto_key = 0; @@ -267,7 +310,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, int err; if (dissector->used_keys & - ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + ~(BIT(FLOW_DISSECTOR_KEY_META) | + BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | @@ -283,6 +327,10 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); + err = mlxsw_sp_flower_parse_meta(rulei, f, block); + if (err) + return err; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_match_control match; @@ -378,7 +426,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct mlxsw_sp_acl_rule_info *rulei; struct mlxsw_sp_acl_ruleset *ruleset; @@ -425,7 +473,7 @@ err_rule_create: void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule *rule; @@ -447,7 +495,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule *rule; @@ -483,7 +531,7 @@ err_rule_get_stats: int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule_info rulei; @@ -504,7 +552,7 @@ int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp, void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct mlxsw_sp_acl_ruleset *ruleset; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c new file mode 100644 index 000000000000..bd9c2bc2d5d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -0,0 +1,1111 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#include <linux/ptp_clock_kernel.h> +#include <linux/clocksource.h> +#include <linux/timecounter.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/rhashtable.h> +#include <linux/ptp_classify.h> +#include <linux/if_ether.h> +#include <linux/if_vlan.h> +#include <linux/net_tstamp.h> + +#include "spectrum.h" +#include "spectrum_ptp.h" +#include "core.h" + +#define MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT 29 +#define MLXSW_SP1_PTP_CLOCK_FREQ_KHZ 156257 /* 6.4nSec */ +#define MLXSW_SP1_PTP_CLOCK_MASK 64 + +#define MLXSW_SP1_PTP_HT_GC_INTERVAL 500 /* ms */ + +/* How long, approximately, should the unmatched entries stay in the hash table + * before they are collected. Should be evenly divisible by the GC interval. + */ +#define MLXSW_SP1_PTP_HT_GC_TIMEOUT 1000 /* ms */ + +struct mlxsw_sp_ptp_state { + struct mlxsw_sp *mlxsw_sp; + struct rhashtable unmatched_ht; + spinlock_t unmatched_lock; /* protects the HT */ + struct delayed_work ht_gc_dw; + u32 gc_cycle; +}; + +struct mlxsw_sp1_ptp_key { + u8 local_port; + u8 message_type; + u16 sequence_id; + u8 domain_number; + bool ingress; +}; + +struct mlxsw_sp1_ptp_unmatched { + struct mlxsw_sp1_ptp_key key; + struct rhash_head ht_node; + struct rcu_head rcu; + struct sk_buff *skb; + u64 timestamp; + u32 gc_cycle; +}; + +static const struct rhashtable_params mlxsw_sp1_ptp_unmatched_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp1_ptp_unmatched, key), + .key_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, key), + .head_offset = offsetof(struct mlxsw_sp1_ptp_unmatched, ht_node), +}; + +struct mlxsw_sp_ptp_clock { + struct mlxsw_core *core; + spinlock_t lock; /* protect this structure */ + struct cyclecounter cycles; + struct timecounter tc; + u32 nominal_c_mult; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + unsigned long overflow_period; + struct delayed_work overflow_work; +}; + +static u64 __mlxsw_sp1_ptp_read_frc(struct mlxsw_sp_ptp_clock *clock, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_core *mlxsw_core = clock->core; + u32 frc_h1, frc_h2, frc_l; + + frc_h1 = mlxsw_core_read_frc_h(mlxsw_core); + ptp_read_system_prets(sts); + frc_l = mlxsw_core_read_frc_l(mlxsw_core); + ptp_read_system_postts(sts); + frc_h2 = mlxsw_core_read_frc_h(mlxsw_core); + + if (frc_h1 != frc_h2) { + /* wrap around */ + ptp_read_system_prets(sts); + frc_l = mlxsw_core_read_frc_l(mlxsw_core); + ptp_read_system_postts(sts); + } + + return (u64) frc_l | (u64) frc_h2 << 32; +} + +static u64 mlxsw_sp1_ptp_read_frc(const struct cyclecounter *cc) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(cc, struct mlxsw_sp_ptp_clock, cycles); + + return __mlxsw_sp1_ptp_read_frc(clock, NULL) & cc->mask; +} + +static int +mlxsw_sp1_ptp_phc_adjfreq(struct mlxsw_sp_ptp_clock *clock, int freq_adj) +{ + struct mlxsw_core *mlxsw_core = clock->core; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + + mlxsw_reg_mtutc_pack(mtutc_pl, MLXSW_REG_MTUTC_OPERATION_ADJUST_FREQ, + freq_adj, 0); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static u64 mlxsw_sp1_ptp_ns2cycles(const struct timecounter *tc, u64 nsec) +{ + u64 cycles = (u64) nsec; + + cycles <<= tc->cc->shift; + cycles = div_u64(cycles, tc->cc->mult); + + return cycles; +} + +static int +mlxsw_sp1_ptp_phc_settime(struct mlxsw_sp_ptp_clock *clock, u64 nsec) +{ + struct mlxsw_core *mlxsw_core = clock->core; + u64 next_sec, next_sec_in_nsec, cycles; + char mtutc_pl[MLXSW_REG_MTUTC_LEN]; + char mtpps_pl[MLXSW_REG_MTPPS_LEN]; + int err; + + next_sec = div_u64(nsec, NSEC_PER_SEC) + 1; + next_sec_in_nsec = next_sec * NSEC_PER_SEC; + + spin_lock_bh(&clock->lock); + cycles = mlxsw_sp1_ptp_ns2cycles(&clock->tc, next_sec_in_nsec); + spin_unlock_bh(&clock->lock); + + mlxsw_reg_mtpps_vpin_pack(mtpps_pl, cycles); + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtpps), mtpps_pl); + if (err) + return err; + + mlxsw_reg_mtutc_pack(mtutc_pl, + MLXSW_REG_MTUTC_OPERATION_SET_TIME_AT_NEXT_SEC, + 0, next_sec); + return mlxsw_reg_write(mlxsw_core, MLXSW_REG(mtutc), mtutc_pl); +} + +static int mlxsw_sp1_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + int neg_adj = 0; + u32 diff; + u64 adj; + s32 ppb; + + ppb = scaled_ppm_to_ppb(scaled_ppm); + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + + adj = clock->nominal_c_mult; + adj *= ppb; + diff = div_u64(adj, NSEC_PER_SEC); + + spin_lock_bh(&clock->lock); + timecounter_read(&clock->tc); + clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : + clock->nominal_c_mult + diff; + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_adjfreq(clock, neg_adj ? -ppb : ppb); +} + +static int mlxsw_sp1_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec; + + spin_lock_bh(&clock->lock); + timecounter_adjtime(&clock->tc, delta); + nsec = timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_settime(clock, nsec); +} + +static int mlxsw_sp1_ptp_gettimex(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 cycles, nsec; + + spin_lock_bh(&clock->lock); + cycles = __mlxsw_sp1_ptp_read_frc(clock, sts); + nsec = timecounter_cyc2time(&clock->tc, cycles); + spin_unlock_bh(&clock->lock); + + *ts = ns_to_timespec64(nsec); + + return 0; +} + +static int mlxsw_sp1_ptp_settime(struct ptp_clock_info *ptp, + const struct timespec64 *ts) +{ + struct mlxsw_sp_ptp_clock *clock = + container_of(ptp, struct mlxsw_sp_ptp_clock, ptp_info); + u64 nsec = timespec64_to_ns(ts); + + spin_lock_bh(&clock->lock); + timecounter_init(&clock->tc, &clock->cycles, nsec); + nsec = timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + + return mlxsw_sp1_ptp_phc_settime(clock, nsec); +} + +static const struct ptp_clock_info mlxsw_sp1_ptp_clock_info = { + .owner = THIS_MODULE, + .name = "mlxsw_sp_clock", + .max_adj = 100000000, + .adjfine = mlxsw_sp1_ptp_adjfine, + .adjtime = mlxsw_sp1_ptp_adjtime, + .gettimex64 = mlxsw_sp1_ptp_gettimex, + .settime64 = mlxsw_sp1_ptp_settime, +}; + +static void mlxsw_sp1_ptp_clock_overflow(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp_ptp_clock *clock; + + clock = container_of(dwork, struct mlxsw_sp_ptp_clock, overflow_work); + + spin_lock_bh(&clock->lock); + timecounter_read(&clock->tc); + spin_unlock_bh(&clock->lock); + mlxsw_core_schedule_dw(&clock->overflow_work, clock->overflow_period); +} + +struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + u64 overflow_cycles, nsec, frac = 0; + struct mlxsw_sp_ptp_clock *clock; + int err; + + clock = kzalloc(sizeof(*clock), GFP_KERNEL); + if (!clock) + return ERR_PTR(-ENOMEM); + + spin_lock_init(&clock->lock); + clock->cycles.read = mlxsw_sp1_ptp_read_frc; + clock->cycles.shift = MLXSW_SP1_PTP_CLOCK_CYCLES_SHIFT; + clock->cycles.mult = clocksource_khz2mult(MLXSW_SP1_PTP_CLOCK_FREQ_KHZ, + clock->cycles.shift); + clock->nominal_c_mult = clock->cycles.mult; + clock->cycles.mask = CLOCKSOURCE_MASK(MLXSW_SP1_PTP_CLOCK_MASK); + clock->core = mlxsw_sp->core; + + timecounter_init(&clock->tc, &clock->cycles, + ktime_to_ns(ktime_get_real())); + + /* Calculate period in seconds to call the overflow watchdog - to make + * sure counter is checked at least twice every wrap around. + * The period is calculated as the minimum between max HW cycles count + * (The clock source mask) and max amount of cycles that can be + * multiplied by clock multiplier where the result doesn't exceed + * 64bits. + */ + overflow_cycles = div64_u64(~0ULL >> 1, clock->cycles.mult); + overflow_cycles = min(overflow_cycles, div_u64(clock->cycles.mask, 3)); + + nsec = cyclecounter_cyc2ns(&clock->cycles, overflow_cycles, 0, &frac); + clock->overflow_period = nsecs_to_jiffies(nsec); + + INIT_DELAYED_WORK(&clock->overflow_work, mlxsw_sp1_ptp_clock_overflow); + mlxsw_core_schedule_dw(&clock->overflow_work, 0); + + clock->ptp_info = mlxsw_sp1_ptp_clock_info; + clock->ptp = ptp_clock_register(&clock->ptp_info, dev); + if (IS_ERR(clock->ptp)) { + err = PTR_ERR(clock->ptp); + dev_err(dev, "ptp_clock_register failed %d\n", err); + goto err_ptp_clock_register; + } + + return clock; + +err_ptp_clock_register: + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock); + return ERR_PTR(err); +} + +void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ + ptp_clock_unregister(clock->ptp); + cancel_delayed_work_sync(&clock->overflow_work); + kfree(clock); +} + +static int mlxsw_sp_ptp_parse(struct sk_buff *skb, + u8 *p_domain_number, + u8 *p_message_type, + u16 *p_sequence_id) +{ + unsigned int offset = 0; + unsigned int ptp_class; + u8 *data; + + data = skb_mac_header(skb); + ptp_class = ptp_classify_raw(skb); + + switch (ptp_class & PTP_CLASS_VMASK) { + case PTP_CLASS_V1: + case PTP_CLASS_V2: + break; + default: + return -ERANGE; + } + + if (ptp_class & PTP_CLASS_VLAN) + offset += VLAN_HLEN; + + switch (ptp_class & PTP_CLASS_PMASK) { + case PTP_CLASS_IPV4: + offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN; + break; + case PTP_CLASS_IPV6: + offset += ETH_HLEN + IP6_HLEN + UDP_HLEN; + break; + case PTP_CLASS_L2: + offset += ETH_HLEN; + break; + default: + return -ERANGE; + } + + /* PTP header is 34 bytes. */ + if (skb->len < offset + 34) + return -EINVAL; + + *p_message_type = data[offset] & 0x0f; + *p_domain_number = data[offset + 4]; + *p_sequence_id = (u16)(data[offset + 30]) << 8 | data[offset + 31]; + return 0; +} + +/* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on + * error. + */ +static struct mlxsw_sp1_ptp_unmatched * +mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL; + struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state; + struct mlxsw_sp1_ptp_unmatched *unmatched; + struct mlxsw_sp1_ptp_unmatched *conflict; + + unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC); + if (!unmatched) + return ERR_PTR(-ENOMEM); + + unmatched->key = key; + unmatched->skb = skb; + unmatched->timestamp = timestamp; + unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles; + + conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + if (conflict) + kfree(unmatched); + + return conflict; +} + +static struct mlxsw_sp1_ptp_unmatched * +mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key) +{ + return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key, + mlxsw_sp1_ptp_unmatched_ht_params); +} + +static int +mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); +} + +/* This function is called in the following scenarios: + * + * 1) When a packet is matched with its timestamp. + * 2) In several situation when it is necessary to immediately pass on + * an SKB without a timestamp. + * 3) From GC indirectly through mlxsw_sp1_ptp_unmatched_finish(). + * This case is similar to 2) above. + */ +static void mlxsw_sp1_ptp_packet_finish(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port, + bool ingress, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + + /* Between capturing the packet and finishing it, there is a window of + * opportunity for the originating port to go away (e.g. due to a + * split). Also make sure the SKB device reference is still valid. + */ + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!(mlxsw_sp_port && (!skb->dev || skb->dev == mlxsw_sp_port->dev))) { + dev_kfree_skb_any(skb); + return; + } + + if (ingress) { + if (hwtstamps) + *skb_hwtstamps(skb) = *hwtstamps; + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); + } else { + /* skb_tstamp_tx() allows hwtstamps to be NULL. */ + skb_tstamp_tx(skb, hwtstamps); + dev_kfree_skb_any(skb); + } +} + +static void mlxsw_sp1_packet_timestamp(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, + u64 timestamp) +{ + struct skb_shared_hwtstamps hwtstamps; + u64 nsec; + + spin_lock_bh(&mlxsw_sp->clock->lock); + nsec = timecounter_cyc2time(&mlxsw_sp->clock->tc, timestamp); + spin_unlock_bh(&mlxsw_sp->clock->lock); + + hwtstamps.hwtstamp = ns_to_ktime(nsec); + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, key.ingress, &hwtstamps); +} + +static void +mlxsw_sp1_ptp_unmatched_finish(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + if (unmatched->skb && unmatched->timestamp) + mlxsw_sp1_packet_timestamp(mlxsw_sp, unmatched->key, + unmatched->skb, + unmatched->timestamp); + else if (unmatched->skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, unmatched->skb, + unmatched->key.local_port, + unmatched->key.ingress, NULL); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_unmatched_free_fn(void *ptr, void *arg) +{ + struct mlxsw_sp1_ptp_unmatched *unmatched = ptr; + + /* This is invoked at a point where the ports are gone already. Nothing + * to do with whatever is left in the HT but to free it. + */ + if (unmatched->skb) + dev_kfree_skb_any(unmatched->skb); + kfree_rcu(unmatched, rcu); +} + +static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp1_ptp_key key, + struct sk_buff *skb, u64 timestamp) +{ + struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict; + int err; + + rcu_read_lock(); + + unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key); + + spin_lock(&mlxsw_sp->ptp_state->unmatched_lock); + + if (unmatched) { + /* There was an unmatched entry when we looked, but it may have + * been removed before we took the lock. + */ + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched); + if (err) + unmatched = NULL; + } + + if (!unmatched) { + /* We have no unmatched entry, but one may have been added after + * we looked, but before we took the lock. + */ + unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + if (IS_ERR(unmatched)) { + if (skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + unmatched = NULL; + } else if (unmatched) { + /* Save just told us, under lock, that the entry is + * there, so this has to work. + */ + err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, + unmatched); + WARN_ON_ONCE(err); + } + } + + /* If unmatched is non-NULL here, it comes either from the lookup, or + * from the save attempt above. In either case the entry was removed + * from the hash table. If unmatched is NULL, a new unmatched entry was + * added to the hash table, and there was no conflict. + */ + + if (skb && unmatched && unmatched->timestamp) { + unmatched->skb = skb; + } else if (timestamp && unmatched && unmatched->skb) { + unmatched->timestamp = timestamp; + } else if (unmatched) { + /* unmatched holds an older entry of the same type: either an + * skb if we are handling skb, or a timestamp if we are handling + * timestamp. We can't match that up, so save what we have. + */ + conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key, + skb, timestamp); + if (IS_ERR(conflict)) { + if (skb) + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, + key.local_port, + key.ingress, NULL); + } else { + /* Above, we removed an object with this key from the + * hash table, under lock, so conflict can not be a + * valid pointer. + */ + WARN_ON_ONCE(conflict); + } + } + + spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock); + + if (unmatched) + mlxsw_sp1_ptp_unmatched_finish(mlxsw_sp, unmatched); + + rcu_read_unlock(); +} + +static void mlxsw_sp1_ptp_got_packet(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port, + bool ingress) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + int err; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + goto immediate; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + if (!types) + goto immediate; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.ingress = ingress; + + err = mlxsw_sp_ptp_parse(skb, &key.domain_number, &key.message_type, + &key.sequence_id); + if (err) + goto immediate; + + /* For packets whose timestamping was not enabled on this port, don't + * bother trying to match the timestamp. + */ + if (!((1 << key.message_type) & types)) + goto immediate; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, skb, 0); + return; + +immediate: + mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb, local_port, ingress, NULL); +} + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp) +{ + struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_sp1_ptp_key key; + u8 types; + + mlxsw_sp_port = mlxsw_sp->ports[local_port]; + if (!mlxsw_sp_port) + return; + + types = ingress ? mlxsw_sp_port->ptp.ing_types : + mlxsw_sp_port->ptp.egr_types; + + /* For message types whose timestamping was not enabled on this port, + * don't bother with the timestamp. + */ + if (!((1 << message_type) & types)) + return; + + memset(&key, 0, sizeof(key)); + key.local_port = local_port; + key.domain_number = domain_number; + key.message_type = message_type; + key.sequence_id = sequence_id; + key.ingress = ingress; + + mlxsw_sp1_ptp_got_piece(mlxsw_sp, key, NULL, timestamp); +} + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port) +{ + skb_reset_mac_header(skb); + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, true); +} + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp1_ptp_got_packet(mlxsw_sp, skb, local_port, false); +} + +static void +mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state, + struct mlxsw_sp1_ptp_unmatched *unmatched) +{ + int err; + + /* If an unmatched entry has an SKB, it has to be handed over to the + * networking stack. This is usually done from a trap handler, which is + * invoked in a softirq context. Here we are going to do it in process + * context. If that were to be interrupted by a softirq, it could cause + * a deadlock when an attempt is made to take an already-taken lock + * somewhere along the sending path. Disable softirqs to prevent this. + */ + local_bh_disable(); + + spin_lock(&ptp_state->unmatched_lock); + err = rhashtable_remove_fast(&ptp_state->unmatched_ht, + &unmatched->ht_node, + mlxsw_sp1_ptp_unmatched_ht_params); + spin_unlock(&ptp_state->unmatched_lock); + + if (err) + /* The packet was matched with timestamp during the walk. */ + goto out; + + /* mlxsw_sp1_ptp_unmatched_finish() invokes netif_receive_skb(). While + * the comment at that function states that it can only be called in + * soft IRQ context, this pattern of local_bh_disable() + + * netif_receive_skb(), in process context, is seen elsewhere in the + * kernel, notably in pktgen. + */ + mlxsw_sp1_ptp_unmatched_finish(ptp_state->mlxsw_sp, unmatched); + +out: + local_bh_enable(); +} + +static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp1_ptp_unmatched *unmatched; + struct mlxsw_sp_ptp_state *ptp_state; + struct rhashtable_iter iter; + u32 gc_cycle; + void *obj; + + ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw); + gc_cycle = ptp_state->gc_cycle++; + + rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter); + rhashtable_walk_start(&iter); + while ((obj = rhashtable_walk_next(&iter))) { + if (IS_ERR(obj)) + continue; + + unmatched = obj; + if (unmatched->gc_cycle <= gc_cycle) + mlxsw_sp1_ptp_ht_gc_collect(ptp_state, unmatched); + } + rhashtable_walk_stop(&iter); + rhashtable_walk_exit(&iter); + + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); +} + +static int mlxsw_sp_ptp_mtptpt_set(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_mtptpt_trap_id trap_id, + u16 message_type) +{ + char mtptpt_pl[MLXSW_REG_MTPTPT_LEN]; + + mlxsw_reg_mtptptp_pack(mtptpt_pl, trap_id, message_type); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtptpt), mtptpt_pl); +} + +static int mlxsw_sp1_ptp_set_fifo_clr_on_trap(struct mlxsw_sp *mlxsw_sp, + bool clr) +{ + char mogcr_pl[MLXSW_REG_MOGCR_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); + if (err) + return err; + + mlxsw_reg_mogcr_ptp_iftc_set(mogcr_pl, clr); + mlxsw_reg_mogcr_ptp_eftc_set(mogcr_pl, clr); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mogcr), mogcr_pl); +} + +static int mlxsw_sp1_ptp_mtpppc_set(struct mlxsw_sp *mlxsw_sp, + u16 ing_types, u16 egr_types) +{ + char mtpppc_pl[MLXSW_REG_MTPPPC_LEN]; + + mlxsw_reg_mtpppc_pack(mtpppc_pl, ing_types, egr_types); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mtpppc), mtpppc_pl); +} + +struct mlxsw_sp1_ptp_shaper_params { + u32 ethtool_speed; + enum mlxsw_reg_qpsc_port_speed port_speed; + u8 shaper_time_exp; + u8 shaper_time_mantissa; + u8 shaper_inc; + u8 shaper_bs; + u8 port_to_shaper_credits; + int ing_timestamp_inc; + int egr_timestamp_inc; +}; + +static const struct mlxsw_sp1_ptp_shaper_params +mlxsw_sp1_ptp_shaper_params[] = { + { + .ethtool_speed = SPEED_100, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_100M, + .shaper_time_exp = 4, + .shaper_time_mantissa = 12, + .shaper_inc = 9, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -313, + .egr_timestamp_inc = 313, + }, + { + .ethtool_speed = SPEED_1000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_1G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 12, + .shaper_inc = 6, + .shaper_bs = 0, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -35, + .egr_timestamp_inc = 35, + }, + { + .ethtool_speed = SPEED_10000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_10G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 2, + .shaper_inc = 14, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -11, + .egr_timestamp_inc = 11, + }, + { + .ethtool_speed = SPEED_25000, + .port_speed = MLXSW_REG_QPSC_PORT_SPEED_25G, + .shaper_time_exp = 0, + .shaper_time_mantissa = 0, + .shaper_inc = 11, + .shaper_bs = 1, + .port_to_shaper_credits = 1, + .ing_timestamp_inc = -14, + .egr_timestamp_inc = 14, + }, +}; + +#define MLXSW_SP1_PTP_SHAPER_PARAMS_LEN ARRAY_SIZE(mlxsw_sp1_ptp_shaper_params) + +static int mlxsw_sp1_ptp_shaper_params_set(struct mlxsw_sp *mlxsw_sp) +{ + const struct mlxsw_sp1_ptp_shaper_params *params; + char qpsc_pl[MLXSW_REG_QPSC_LEN]; + int i, err; + + for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) { + params = &mlxsw_sp1_ptp_shaper_params[i]; + mlxsw_reg_qpsc_pack(qpsc_pl, params->port_speed, + params->shaper_time_exp, + params->shaper_time_mantissa, + params->shaper_inc, params->shaper_bs, + params->port_to_shaper_credits, + params->ing_timestamp_inc, + params->egr_timestamp_inc); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpsc), qpsc_pl); + if (err) + return err; + } + + return 0; +} + +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_ptp_state *ptp_state; + u16 message_type; + int err; + + err = mlxsw_sp1_ptp_shaper_params_set(mlxsw_sp); + if (err) + return ERR_PTR(err); + + ptp_state = kzalloc(sizeof(*ptp_state), GFP_KERNEL); + if (!ptp_state) + return ERR_PTR(-ENOMEM); + ptp_state->mlxsw_sp = mlxsw_sp; + + spin_lock_init(&ptp_state->unmatched_lock); + + err = rhashtable_init(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_ht_params); + if (err) + goto err_hashtable_init; + + /* Delive these message types as PTP0. */ + message_type = BIT(MLXSW_SP_PTP_MESSAGE_TYPE_SYNC) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ) | + BIT(MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP); + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, + message_type); + if (err) + goto err_mtptpt_set; + + /* Everything else is PTP1. */ + message_type = ~message_type; + err = mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, + message_type); + if (err) + goto err_mtptpt1_set; + + err = mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, true); + if (err) + goto err_fifo_clr; + + INIT_DELAYED_WORK(&ptp_state->ht_gc_dw, mlxsw_sp1_ptp_ht_gc); + mlxsw_core_schedule_dw(&ptp_state->ht_gc_dw, + MLXSW_SP1_PTP_HT_GC_INTERVAL); + return ptp_state; + +err_fifo_clr: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); +err_mtptpt1_set: + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); +err_mtptpt_set: + rhashtable_destroy(&ptp_state->unmatched_ht); +err_hashtable_init: + kfree(ptp_state); + return ERR_PTR(err); +} + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ + struct mlxsw_sp *mlxsw_sp = ptp_state->mlxsw_sp; + + cancel_delayed_work_sync(&ptp_state->ht_gc_dw); + mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp, 0, 0); + mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0); + mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0); + rhashtable_free_and_destroy(&ptp_state->unmatched_ht, + &mlxsw_sp1_ptp_unmatched_free_fn, NULL); + kfree(ptp_state); +} + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + *config = mlxsw_sp_port->ptp.hwtstamp_config; + return 0; +} + +static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config, + u16 *p_ing_types, u16 *p_egr_types, + enum hwtstamp_rx_filters *p_rx_filter) +{ + enum hwtstamp_rx_filters rx_filter = config->rx_filter; + enum hwtstamp_tx_types tx_type = config->tx_type; + u16 ing_types = 0x00; + u16 egr_types = 0x00; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + egr_types = 0x00; + break; + case HWTSTAMP_TX_ON: + egr_types = 0xff; + break; + case HWTSTAMP_TX_ONESTEP_SYNC: + return -ERANGE; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + ing_types = 0x00; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + ing_types = 0x01; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + ing_types = 0x02; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + ing_types = 0x0f; + break; + case HWTSTAMP_FILTER_ALL: + ing_types = 0xff; + break; + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_NTP_ALL: + return -ERANGE; + } + + *p_ing_types = ing_types; + *p_egr_types = egr_types; + *p_rx_filter = rx_filter; + return 0; +} + +static int mlxsw_sp1_ptp_mtpppc_update(struct mlxsw_sp_port *mlxsw_sp_port, + u16 ing_types, u16 egr_types) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_port *tmp; + int i; + + /* MTPPPC configures timestamping globally, not per port. Find the + * configuration that contains all configured timestamping requests. + */ + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) { + tmp = mlxsw_sp->ports[i]; + if (tmp && tmp != mlxsw_sp_port) { + ing_types |= tmp->ptp.ing_types; + egr_types |= tmp->ptp.egr_types; + } + } + + return mlxsw_sp1_ptp_mtpppc_set(mlxsw_sp_port->mlxsw_sp, + ing_types, egr_types); +} + +static bool mlxsw_sp1_ptp_hwtstamp_enabled(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->ptp.ing_types || mlxsw_sp_port->ptp.egr_types; +} + +static int +mlxsw_sp1_ptp_port_shaper_set(struct mlxsw_sp_port *mlxsw_sp_port, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_ptps_pack(qeec_pl, mlxsw_sp_port->local_port, enable); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +static int mlxsw_sp1_ptp_port_shaper_check(struct mlxsw_sp_port *mlxsw_sp_port) +{ + const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops; + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u32 eth_proto_oper, speed; + bool ptps = false; + int err, i; + + if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port)) + return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, false); + + port_type_speed_ops = mlxsw_sp->port_type_speed_ops; + port_type_speed_ops->reg_ptys_eth_pack(mlxsw_sp, ptys_pl, + mlxsw_sp_port->local_port, 0, + false); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + port_type_speed_ops->reg_ptys_eth_unpack(mlxsw_sp, ptys_pl, NULL, NULL, + ð_proto_oper); + + speed = port_type_speed_ops->from_ptys_speed(mlxsw_sp, eth_proto_oper); + for (i = 0; i < MLXSW_SP1_PTP_SHAPER_PARAMS_LEN; i++) { + if (mlxsw_sp1_ptp_shaper_params[i].ethtool_speed == speed) { + ptps = true; + break; + } + } + + return mlxsw_sp1_ptp_port_shaper_set(mlxsw_sp_port, ptps); +} + +void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct mlxsw_sp_port *mlxsw_sp_port; + int err; + + mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port, + ptp.shaper_dw); + + if (!mlxsw_sp1_ptp_hwtstamp_enabled(mlxsw_sp_port)) + return; + + err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port); + if (err) + netdev_err(mlxsw_sp_port->dev, "Failed to set up PTP shaper\n"); +} + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + enum hwtstamp_rx_filters rx_filter; + u16 ing_types; + u16 egr_types; + int err; + + err = mlxsw_sp_ptp_get_message_types(config, &ing_types, &egr_types, + &rx_filter); + if (err) + return err; + + err = mlxsw_sp1_ptp_mtpppc_update(mlxsw_sp_port, ing_types, egr_types); + if (err) + return err; + + mlxsw_sp_port->ptp.hwtstamp_config = *config; + mlxsw_sp_port->ptp.ing_types = ing_types; + mlxsw_sp_port->ptp.egr_types = egr_types; + + err = mlxsw_sp1_ptp_port_shaper_check(mlxsw_sp_port); + if (err) + return err; + + /* Notify the ioctl caller what we are actually timestamping. */ + config->rx_filter = rx_filter; + + return 0; +} + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + info->phc_index = ptp_clock_index(mlxsw_sp->clock->ptp); + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | + BIT(HWTSTAMP_TX_ON); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_ALL); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h new file mode 100644 index 000000000000..72e55f6926b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2019 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_PTP_H +#define _MLXSW_SPECTRUM_PTP_H + +#include <linux/device.h> +#include <linux/rhashtable.h> + +struct mlxsw_sp; +struct mlxsw_sp_port; +struct mlxsw_sp_ptp_clock; + +enum { + MLXSW_SP_PTP_MESSAGE_TYPE_SYNC, + MLXSW_SP_PTP_MESSAGE_TYPE_DELAY_REQ, + MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_REQ, + MLXSW_SP_PTP_MESSAGE_TYPE_PDELAY_RESP, +}; + +static inline int mlxsw_sp_ptp_get_ts_info_noptp(struct ethtool_ts_info *info) +{ + info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + info->phc_index = -1; + return 0; +} + +#if IS_REACHABLE(CONFIG_PTP_1588_CLOCK) + +struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev); + +void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock); + +struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp); + +void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state); + +void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb, + u8 local_port); + +void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port); + +void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, u16 sequence_id, + u64 timestamp); + +int mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +int mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config); + +void mlxsw_sp1_ptp_shaper_work(struct work_struct *work); + +int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info); + +#else + +static inline struct mlxsw_sp_ptp_clock * +mlxsw_sp1_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + return NULL; +} + +static inline void mlxsw_sp1_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ +} + +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp1_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp1_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline void +mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, + u8 local_port, u8 message_type, + u8 domain_number, + u16 sequence_id, u64 timestamp) +{ +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp1_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline void mlxsw_sp1_ptp_shaper_work(struct work_struct *work) +{ +} + +static inline int mlxsw_sp1_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + +#endif + +static inline struct mlxsw_sp_ptp_clock * +mlxsw_sp2_ptp_clock_init(struct mlxsw_sp *mlxsw_sp, struct device *dev) +{ + return NULL; +} + +static inline void mlxsw_sp2_ptp_clock_fini(struct mlxsw_sp_ptp_clock *clock) +{ +} + +static inline struct mlxsw_sp_ptp_state * +mlxsw_sp2_ptp_init(struct mlxsw_sp *mlxsw_sp) +{ + return NULL; +} + +static inline void mlxsw_sp2_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state) +{ +} + +static inline void mlxsw_sp2_ptp_receive(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + mlxsw_sp_rx_listener_no_mark_func(skb, local_port, mlxsw_sp); +} + +static inline void mlxsw_sp2_ptp_transmitted(struct mlxsw_sp *mlxsw_sp, + struct sk_buff *skb, u8 local_port) +{ + dev_kfree_skb_any(skb); +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_get(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline int +mlxsw_sp2_ptp_hwtstamp_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct hwtstamp_config *config) +{ + return -EOPNOTSUPP; +} + +static inline void mlxsw_sp2_ptp_shaper_work(struct work_struct *work) +{ +} + +static inline int mlxsw_sp2_ptp_get_ts_info(struct mlxsw_sp *mlxsw_sp, + struct ethtool_ts_info *info) +{ + return mlxsw_sp_ptp_get_ts_info_noptp(info); +} + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index ef554739dd54..e618be7ce6c6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -21,6 +21,7 @@ #include <net/arp.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> +#include <net/nexthop.h> #include <net/fib_rules.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> @@ -2887,7 +2888,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, return false; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; struct in6_addr *gw; int ifindex, weight; @@ -2959,7 +2960,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev; + dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev; val ^= dev->ifindex; } @@ -3883,23 +3884,25 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, - const struct fib_info *fi) + struct fib_info *fi) { - return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK || - mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); + const struct fib_nh *nh = fib_info_nh(fi, 0); + + return nh->fib_nh_scope == RT_SCOPE_LINK || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL); } static struct mlxsw_sp_nexthop_group * mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { + unsigned int nhs = fib_info_num_path(fi); struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; struct fib_nh *fib_nh; int i; int err; - nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs), - GFP_KERNEL); + nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); nh_grp->priv = fi; @@ -3907,11 +3910,11 @@ mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) nh_grp->neigh_tbl = &arp_tbl; nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); - nh_grp->count = fi->fib_nhs; + nh_grp->count = nhs; fib_info_hold(fi); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - fib_nh = &fi->fib_nh[i]; + fib_nh = fib_info_nh(fi, i); err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) goto err_nexthop4_init; @@ -4027,9 +4030,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev && + if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->fib6_nh.fib_nh_gw6)) + &rt->fib6_nh->fib_nh_gw6)) return nh; continue; } @@ -4089,13 +4092,13 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD; + list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; return; } list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; - struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; + struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh; struct mlxsw_sp_nexthop *nh; nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); @@ -4117,7 +4120,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct fib6_info *rt = mlxsw_sp_rt6->rt; - rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD; + rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4349,9 +4352,9 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { + struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); - struct net_device *dev = fen_info->fi->fib_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; @@ -4995,7 +4998,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) { /* RTF_CACHE routes are ignored */ - return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family; + return !(rt->fib6_flags & RTF_ADDRCONF) && + rt->fib6_nh->fib_nh_gw_family; } static struct fib6_info * @@ -5054,8 +5058,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->fib6_nh.fib_nh_dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret); + return rt->fib6_nh->fib_nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret); } static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, @@ -5065,7 +5069,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->fib6_nh.fib_nh_dev; + struct net_device *dev = rt->fib6_nh->fib_nh_dev; struct mlxsw_sp_rif *rif; int err; @@ -5108,11 +5112,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct fib6_info *rt) { - struct net_device *dev = rt->fib6_nh.fib_nh_dev; + struct net_device *dev = rt->fib6_nh->fib_nh_dev; nh->nh_grp = nh_grp; - nh->nh_weight = rt->fib6_nh.fib_nh_weight; - memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr)); + nh->nh_weight = rt->fib6_nh->fib_nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -5135,7 +5139,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { - return rt->fib6_nh.fib_nh_gw_family || + return rt->fib6_nh->fib_nh_gw_family || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } @@ -5274,17 +5278,21 @@ err_nexthop6_group_get: static int mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) - return PTR_ERR(mlxsw_sp_rt6); + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6++; + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + } err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); if (err) @@ -5293,27 +5301,38 @@ mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, return 0; err_nexthop6_group_update: - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + i = nrt6; +err_rt6_create: + for (i--; i >= 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } return err; } static void mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int i; - mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); - if (WARN_ON(!mlxsw_sp_rt6)) - return; + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, + rt_arr[i]); + if (WARN_ON_ONCE(!mlxsw_sp_rt6)) + continue; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } - fib6_entry->nrt6--; - list_del(&mlxsw_sp_rt6->list); mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); } static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, @@ -5354,29 +5373,32 @@ mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) static struct mlxsw_sp_fib6_entry * mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, - struct fib6_info *rt) + struct fib6_info **rt_arr, unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_rt6 *mlxsw_sp_rt6; - int err; + int err, i; fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); if (!fib6_entry) return ERR_PTR(-ENOMEM); fib_entry = &fib6_entry->common; - mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); - if (IS_ERR(mlxsw_sp_rt6)) { - err = PTR_ERR(mlxsw_sp_rt6); - goto err_rt6_create; + INIT_LIST_HEAD(&fib6_entry->rt6_list); + + for (i = 0; i < nrt6; i++) { + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; } - mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]); - INIT_LIST_HEAD(&fib6_entry->rt6_list); - list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); - fib6_entry->nrt6 = 1; err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); if (err) goto err_nexthop6_group_get; @@ -5386,9 +5408,15 @@ mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, return fib6_entry; err_nexthop6_group_get: - list_del(&mlxsw_sp_rt6->list); - mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + i = nrt6; err_rt6_create: + for (i--; i >= 0; i--) { + fib6_entry->nrt6--; + mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } kfree(fib6_entry); return ERR_PTR(err); } @@ -5431,16 +5459,16 @@ mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, static int mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, - bool replace) + bool *p_replace) { struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); struct mlxsw_sp_fib6_entry *fib6_entry; - fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace); - if (replace && WARN_ON(!fib6_entry)) - return -EINVAL; + if (*p_replace && !fib6_entry) + *p_replace = false; if (fib6_entry) { list_add_tail(&new6_entry->common.list, @@ -5475,11 +5503,11 @@ mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib6_entry *fib6_entry, - bool replace) + bool *p_replace) { int err; - err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); + err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace); if (err) return err; @@ -5552,10 +5580,12 @@ static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt, bool replace) + struct fib6_info **rt_arr, + unsigned int nrt6, bool replace) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; int err; if (mlxsw_sp->router->aborted) @@ -5580,19 +5610,21 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, */ fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); if (fib6_entry) { - err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, + rt_arr, nrt6); if (err) goto err_fib6_entry_nexthop_add; return 0; } - fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr, + nrt6); if (IS_ERR(fib6_entry)) { err = PTR_ERR(fib6_entry); goto err_fib6_entry_create; } - err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace); if (err) goto err_fib6_node_entry_link; @@ -5609,10 +5641,12 @@ err_fib6_entry_nexthop_add: } static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, - struct fib6_info *rt) + struct fib6_info **rt_arr, + unsigned int nrt6) { struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; + struct fib6_info *rt = rt_arr[0]; if (mlxsw_sp->router->aborted) return; @@ -5624,11 +5658,12 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, if (WARN_ON(!fib6_entry)) return; - /* If route is part of a multipath entry, but not the last one - * removed, then only reduce its nexthop group. + /* If not all the nexthops are deleted, then only reduce the nexthop + * group. */ - if (!list_is_singular(&fib6_entry->rt6_list)) { - mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); + if (nrt6 != fib6_entry->nrt6) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr, + nrt6); return; } @@ -5889,10 +5924,15 @@ static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } +struct mlxsw_sp_fib6_event_work { + struct fib6_info **rt_arr; + unsigned int nrt6; +}; + struct mlxsw_sp_fib_event_work { struct work_struct work; union { - struct fib6_entry_notifier_info fen6_info; + struct mlxsw_sp_fib6_event_work fib6_work; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -5903,6 +5943,54 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; +static int +mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work, + struct fib6_entry_notifier_info *fen6_info) +{ + struct fib6_info *rt = fen6_info->rt; + struct fib6_info **rt_arr; + struct fib6_info *iter; + unsigned int nrt6; + int i = 0; + + nrt6 = fen6_info->nsiblings + 1; + + rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); + if (!rt_arr) + return -ENOMEM; + + fib6_work->rt_arr = rt_arr; + fib6_work->nrt6 = nrt6; + + rt_arr[0] = rt; + fib6_info_hold(rt); + + if (!fen6_info->nsiblings) + return 0; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + if (i == fen6_info->nsiblings) + break; + + rt_arr[i + 1] = iter; + fib6_info_hold(iter); + i++; + } + WARN_ON_ONCE(i != fen6_info->nsiblings); + + return 0; +} + +static void +mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work) +{ + int i; + + for (i = 0; i < fib6_work->nrt6; i++) + mlxsw_sp_rt6_release(fib6_work->rt_arr[i]); + kfree(fib6_work->rt_arr); +} + static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = @@ -5961,18 +6049,21 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; err = mlxsw_sp_router_fib6_add(mlxsw_sp, - fib_work->fen6_info.rt, replace); + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6, + replace); if (err) mlxsw_sp_router_fib_abort(mlxsw_sp); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; case FIB_EVENT_ENTRY_DEL: - mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); - mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + mlxsw_sp_router_fib6_del(mlxsw_sp, + fib_work->fib6_work.rt_arr, + fib_work->fib6_work.nrt6); + mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work); break; case FIB_EVENT_RULE_ADD: /* if we get here, a rule was added that we do not support. @@ -6061,22 +6152,26 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, } } -static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, - struct fib_notifier_info *info) +static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) { struct fib6_entry_notifier_info *fen6_info; + int err; switch (fib_work->event) { case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ case FIB_EVENT_ENTRY_ADD: /* fall through */ case FIB_EVENT_ENTRY_DEL: fen6_info = container_of(info, struct fib6_entry_notifier_info, info); - fib_work->fen6_info = *fen6_info; - fib6_info_hold(fib_work->fen6_info.rt); + err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work, + fen6_info); + if (err) + return err; break; } + + return 0; } static void @@ -6185,6 +6280,20 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); return notifier_from_errno(-EINVAL); } + if (fen_info->fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } + } else if (info->family == AF_INET6) { + struct fib6_entry_notifier_info *fen6_info; + + fen6_info = container_of(info, + struct fib6_entry_notifier_info, + info); + if (fen6_info->rt->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } } break; } @@ -6203,7 +6312,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, break; case AF_INET6: INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); - mlxsw_sp_router_fib6_event(fib_work, info); + err = mlxsw_sp_router_fib6_event(fib_work, info); + if (err) + goto err_fib_event; break; case RTNL_FAMILY_IP6MR: case RTNL_FAMILY_IPMR: @@ -6215,6 +6326,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, mlxsw_core_schedule_work(&fib_work->work); return NOTIFY_DONE; + +err_fib_event: + kfree(fib_work); + return NOTIFY_BAD; } struct mlxsw_sp_rif * diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index fc4f19167262..bdab96f5bc70 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -299,6 +299,8 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, u64 len; int err; + memset(skb->cb, 0, sizeof(struct mlxsw_skb_cb)); + if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info)) return NETDEV_TX_BUSY; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 451216dd7f6b..19202bdb5105 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -17,6 +17,8 @@ enum { MLXSW_TRAP_ID_MVRP = 0x15, MLXSW_TRAP_ID_RPVST = 0x16, MLXSW_TRAP_ID_DHCP = 0x19, + MLXSW_TRAP_ID_PTP0 = 0x28, + MLXSW_TRAP_ID_PTP1 = 0x29, MLXSW_TRAP_ID_IGMP_QUERY = 0x30, MLXSW_TRAP_ID_IGMP_V1_REPORT = 0x31, MLXSW_TRAP_ID_IGMP_V2_REPORT = 0x32, @@ -76,6 +78,10 @@ enum { enum mlxsw_event_trap_id { /* Port Up/Down event generated by hardware */ MLXSW_TRAP_ID_PUDE = 0x8, + /* PTP Ingress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_ING_FIFO = 0x2D, + /* PTP Egress FIFO has a new entry */ + MLXSW_TRAP_ID_PTP_EGR_FIFO = 0x2E, }; #endif /* _MLXSW_TRAP_H */ diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile index cb52a3b128ae..9a36c26095c8 100644 --- a/drivers/net/ethernet/mscc/Makefile +++ b/drivers/net/ethernet/mscc/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: (GPL-2.0 OR MIT) obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o mscc_ocelot_common-y := ocelot.o ocelot_io.o -mscc_ocelot_common-y += ocelot_regs.o +mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o ocelot_ace.o ocelot_flower.o obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 02ad11e0b0d8..b71e4ecbe469 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -22,6 +22,7 @@ #include <net/switchdev.h> #include "ocelot.h" +#include "ocelot_ace.h" #define TABLE_UPDATE_SLEEP_US 10 #define TABLE_UPDATE_TIMEOUT_US 100000 @@ -130,6 +131,13 @@ static void ocelot_mact_init(struct ocelot *ocelot) ocelot_write(ocelot, MACACCESS_CMD_INIT, ANA_TABLES_MACACCESS); } +static void ocelot_vcap_enable(struct ocelot *ocelot, struct ocelot_port *port) +{ + ocelot_write_gix(ocelot, ANA_PORT_VCAP_S2_CFG_S2_ENA | + ANA_PORT_VCAP_S2_CFG_S2_IP6_CFG(0xa), + ANA_PORT_VCAP_S2_CFG, port->chip_port); +} + static inline u32 ocelot_vlant_read_vlanaccess(struct ocelot *ocelot) { return ocelot_read(ocelot, ANA_TABLES_VLANACCESS); @@ -884,6 +892,13 @@ static int ocelot_set_features(struct net_device *dev, struct ocelot_port *port = netdev_priv(dev); netdev_features_t changed = dev->features ^ features; + if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) && + port->tc.offload_cnt) { + netdev_err(dev, + "Cannot disable HW TC offload while offloads active\n"); + return -EBUSY; + } + if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) ocelot_vlan_mode(port, features); @@ -917,6 +932,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = { .ndo_vlan_rx_kill_vid = ocelot_vlan_rx_kill_vid, .ndo_set_features = ocelot_set_features, .ndo_get_port_parent_id = ocelot_get_port_parent_id, + .ndo_setup_tc = ocelot_setup_tc, }; static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data) @@ -1636,8 +1652,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port, dev->netdev_ops = &ocelot_port_netdev_ops; dev->ethtool_ops = &ocelot_ethtool_ops; - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS; - dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS | + NETIF_F_HW_TC; + dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC; memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN); dev->dev_addr[ETH_ALEN - 1] += port; @@ -1653,6 +1670,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port, /* Basic L2 initialization */ ocelot_vlan_port_apply(ocelot, ocelot_port); + /* Enable vcap lookups */ + ocelot_vcap_enable(ocelot, ocelot_port); + return 0; err_register_netdev: @@ -1687,6 +1707,7 @@ int ocelot_init(struct ocelot *ocelot) ocelot_mact_init(ocelot); ocelot_vlan_init(ocelot); + ocelot_ace_init(ocelot); for (port = 0; port < ocelot->num_phys_ports; port++) { /* Clear all counters (5 groups) */ @@ -1799,6 +1820,7 @@ void ocelot_deinit(struct ocelot *ocelot) { destroy_workqueue(ocelot->stats_queue); mutex_destroy(&ocelot->stats_lock); + ocelot_ace_deinit(); } EXPORT_SYMBOL(ocelot_deinit); diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h index 541fe41e60b0..f7eeb4806897 100644 --- a/drivers/net/ethernet/mscc/ocelot.h +++ b/drivers/net/ethernet/mscc/ocelot.h @@ -22,6 +22,7 @@ #include "ocelot_rew.h" #include "ocelot_sys.h" #include "ocelot_qs.h" +#include "ocelot_tc.h" #define PGID_AGGR 64 #define PGID_SRC 80 @@ -68,6 +69,7 @@ enum ocelot_target { QSYS, REW, SYS, + S2, HSIO, TARGET_MAX, }; @@ -334,6 +336,13 @@ enum ocelot_reg { SYS_CM_DATA_RD, SYS_CM_OP, SYS_CM_DATA, + S2_CORE_UPDATE_CTRL = S2 << TARGET_OFFSET, + S2_CORE_MV_CFG, + S2_CACHE_ENTRY_DAT, + S2_CACHE_MASK_DAT, + S2_CACHE_ACTION_DAT, + S2_CACHE_CNT_DAT, + S2_CACHE_TG_DAT, }; enum ocelot_regfield { @@ -454,6 +463,8 @@ struct ocelot_port { phy_interface_t phy_mode; struct phy *serdes; + + struct ocelot_port_tc tc; }; u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset); diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c new file mode 100644 index 000000000000..39aca1ab4687 --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_ace.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Microsemi Ocelot Switch driver + * Copyright (c) 2019 Microsemi Corporation + */ + +#include <linux/iopoll.h> +#include <linux/proc_fs.h> + +#include "ocelot_ace.h" +#include "ocelot_vcap.h" +#include "ocelot_s2.h" + +#define OCELOT_POLICER_DISCARD 0x17f + +static struct ocelot_acl_block *acl_block; + +struct vcap_props { + const char *name; /* Symbolic name */ + u16 tg_width; /* Type-group width (in bits) */ + u16 sw_count; /* Sub word count */ + u16 entry_count; /* Entry count */ + u16 entry_words; /* Number of entry words */ + u16 entry_width; /* Entry width (in bits) */ + u16 action_count; /* Action count */ + u16 action_words; /* Number of action words */ + u16 action_width; /* Action width (in bits) */ + u16 action_type_width; /* Action type width (in bits) */ + struct { + u16 width; /* Action type width (in bits) */ + u16 count; /* Action type sub word count */ + } action_table[2]; + u16 counter_words; /* Number of counter words */ + u16 counter_width; /* Counter width (in bits) */ +}; + +#define ENTRY_WIDTH 32 +#define BITS_TO_32BIT(x) (1 + (((x) - 1) / ENTRY_WIDTH)) + +static const struct vcap_props vcap_is2 = { + .name = "IS2", + .tg_width = 2, + .sw_count = 4, + .entry_count = VCAP_IS2_CNT, + .entry_words = BITS_TO_32BIT(VCAP_IS2_ENTRY_WIDTH), + .entry_width = VCAP_IS2_ENTRY_WIDTH, + .action_count = (VCAP_IS2_CNT + VCAP_PORT_CNT + 2), + .action_words = BITS_TO_32BIT(VCAP_IS2_ACTION_WIDTH), + .action_width = (VCAP_IS2_ACTION_WIDTH), + .action_type_width = 1, + .action_table = { + { + .width = (IS2_AO_ACL_ID + IS2_AL_ACL_ID), + .count = 2 + }, + { + .width = 6, + .count = 4 + }, + }, + .counter_words = BITS_TO_32BIT(4 * ENTRY_WIDTH), + .counter_width = ENTRY_WIDTH, +}; + +enum vcap_sel { + VCAP_SEL_ENTRY = 0x1, + VCAP_SEL_ACTION = 0x2, + VCAP_SEL_COUNTER = 0x4, + VCAP_SEL_ALL = 0x7, +}; + +enum vcap_cmd { + VCAP_CMD_WRITE = 0, /* Copy from Cache to TCAM */ + VCAP_CMD_READ = 1, /* Copy from TCAM to Cache */ + VCAP_CMD_MOVE_UP = 2, /* Move <count> up */ + VCAP_CMD_MOVE_DOWN = 3, /* Move <count> down */ + VCAP_CMD_INITIALIZE = 4, /* Write all (from cache) */ +}; + +#define VCAP_ENTRY_WIDTH 12 /* Max entry width (32bit words) */ +#define VCAP_COUNTER_WIDTH 4 /* Max counter width (32bit words) */ + +struct vcap_data { + u32 entry[VCAP_ENTRY_WIDTH]; /* ENTRY_DAT */ + u32 mask[VCAP_ENTRY_WIDTH]; /* MASK_DAT */ + u32 action[VCAP_ENTRY_WIDTH]; /* ACTION_DAT */ + u32 counter[VCAP_COUNTER_WIDTH]; /* CNT_DAT */ + u32 tg; /* TG_DAT */ + u32 type; /* Action type */ + u32 tg_sw; /* Current type-group */ + u32 cnt; /* Current counter */ + u32 key_offset; /* Current entry offset */ + u32 action_offset; /* Current action offset */ + u32 counter_offset; /* Current counter offset */ + u32 tg_value; /* Current type-group value */ + u32 tg_mask; /* Current type-group mask */ +}; + +static u32 vcap_s2_read_update_ctrl(struct ocelot *oc) +{ + return ocelot_read(oc, S2_CORE_UPDATE_CTRL); +} + +static void vcap_cmd(struct ocelot *oc, u16 ix, int cmd, int sel) +{ + u32 value = (S2_CORE_UPDATE_CTRL_UPDATE_CMD(cmd) | + S2_CORE_UPDATE_CTRL_UPDATE_ADDR(ix) | + S2_CORE_UPDATE_CTRL_UPDATE_SHOT); + + if ((sel & VCAP_SEL_ENTRY) && ix >= vcap_is2.entry_count) + return; + + if (!(sel & VCAP_SEL_ENTRY)) + value |= S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS; + + if (!(sel & VCAP_SEL_ACTION)) + value |= S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS; + + if (!(sel & VCAP_SEL_COUNTER)) + value |= S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS; + + ocelot_write(oc, value, S2_CORE_UPDATE_CTRL); + readx_poll_timeout(vcap_s2_read_update_ctrl, oc, value, + (value & S2_CORE_UPDATE_CTRL_UPDATE_SHOT) == 0, + 10, 100000); +} + +/* Convert from 0-based row to VCAP entry row and run command */ +static void vcap_row_cmd(struct ocelot *oc, u32 row, int cmd, int sel) +{ + vcap_cmd(oc, vcap_is2.entry_count - row - 1, cmd, sel); +} + +static void vcap_entry2cache(struct ocelot *oc, struct vcap_data *data) +{ + u32 i; + + for (i = 0; i < vcap_is2.entry_words; i++) { + ocelot_write_rix(oc, data->entry[i], S2_CACHE_ENTRY_DAT, i); + ocelot_write_rix(oc, ~data->mask[i], S2_CACHE_MASK_DAT, i); + } + ocelot_write(oc, data->tg, S2_CACHE_TG_DAT); +} + +static void vcap_cache2entry(struct ocelot *oc, struct vcap_data *data) +{ + u32 i; + + for (i = 0; i < vcap_is2.entry_words; i++) { + data->entry[i] = ocelot_read_rix(oc, S2_CACHE_ENTRY_DAT, i); + // Invert mask + data->mask[i] = ~ocelot_read_rix(oc, S2_CACHE_MASK_DAT, i); + } + data->tg = ocelot_read(oc, S2_CACHE_TG_DAT); +} + +static void vcap_action2cache(struct ocelot *oc, struct vcap_data *data) +{ + u32 i, width, mask; + + /* Encode action type */ + width = vcap_is2.action_type_width; + if (width) { + mask = GENMASK(width, 0); + data->action[0] = ((data->action[0] & ~mask) | data->type); + } + + for (i = 0; i < vcap_is2.action_words; i++) + ocelot_write_rix(oc, data->action[i], S2_CACHE_ACTION_DAT, i); + + for (i = 0; i < vcap_is2.counter_words; i++) + ocelot_write_rix(oc, data->counter[i], S2_CACHE_CNT_DAT, i); +} + +static void vcap_cache2action(struct ocelot *oc, struct vcap_data *data) +{ + u32 i, width; + + for (i = 0; i < vcap_is2.action_words; i++) + data->action[i] = ocelot_read_rix(oc, S2_CACHE_ACTION_DAT, i); + + for (i = 0; i < vcap_is2.counter_words; i++) + data->counter[i] = ocelot_read_rix(oc, S2_CACHE_CNT_DAT, i); + + /* Extract action type */ + width = vcap_is2.action_type_width; + data->type = (width ? (data->action[0] & GENMASK(width, 0)) : 0); +} + +/* Calculate offsets for entry */ +static void is2_data_get(struct vcap_data *data, int ix) +{ + u32 i, col, offset, count, cnt, base, width = vcap_is2.tg_width; + + count = (data->tg_sw == VCAP_TG_HALF ? 2 : 4); + col = (ix % 2); + cnt = (vcap_is2.sw_count / count); + base = (vcap_is2.sw_count - col * cnt - cnt); + data->tg_value = 0; + data->tg_mask = 0; + for (i = 0; i < cnt; i++) { + offset = ((base + i) * width); + data->tg_value |= (data->tg_sw << offset); + data->tg_mask |= GENMASK(offset + width - 1, offset); + } + + /* Calculate key/action/counter offsets */ + col = (count - col - 1); + data->key_offset = (base * vcap_is2.entry_width) / vcap_is2.sw_count; + data->counter_offset = (cnt * col * vcap_is2.counter_width); + i = data->type; + width = vcap_is2.action_table[i].width; + cnt = vcap_is2.action_table[i].count; + data->action_offset = + (((cnt * col * width) / count) + vcap_is2.action_type_width); +} + +static void vcap_data_set(u32 *data, u32 offset, u32 len, u32 value) +{ + u32 i, v, m; + + for (i = 0; i < len; i++, offset++) { + v = data[offset / ENTRY_WIDTH]; + m = (1 << (offset % ENTRY_WIDTH)); + if (value & (1 << i)) + v |= m; + else + v &= ~m; + data[offset / ENTRY_WIDTH] = v; + } +} + +static u32 vcap_data_get(u32 *data, u32 offset, u32 len) +{ + u32 i, v, m, value = 0; + + for (i = 0; i < len; i++, offset++) { + v = data[offset / ENTRY_WIDTH]; + m = (1 << (offset % ENTRY_WIDTH)); + if (v & m) + value |= (1 << i); + } + return value; +} + +static void vcap_key_set(struct vcap_data *data, u32 offset, u32 width, + u32 value, u32 mask) +{ + vcap_data_set(data->entry, offset + data->key_offset, width, value); + vcap_data_set(data->mask, offset + data->key_offset, width, mask); +} + +static void vcap_key_bytes_set(struct vcap_data *data, u32 offset, u8 *val, + u8 *msk, u32 count) +{ + u32 i, j, n = 0, value = 0, mask = 0; + + /* Data wider than 32 bits are split up in chunks of maximum 32 bits. + * The 32 LSB of the data are written to the 32 MSB of the TCAM. + */ + offset += (count * 8); + for (i = 0; i < count; i++) { + j = (count - i - 1); + value += (val[j] << n); + mask += (msk[j] << n); + n += 8; + if (n == ENTRY_WIDTH || (i + 1) == count) { + offset -= n; + vcap_key_set(data, offset, n, value, mask); + n = 0; + value = 0; + mask = 0; + } + } +} + +static void vcap_key_l4_port_set(struct vcap_data *data, u32 offset, + struct ocelot_vcap_udp_tcp *port) +{ + vcap_key_set(data, offset, 16, port->value, port->mask); +} + +static void vcap_key_bit_set(struct vcap_data *data, u32 offset, + enum ocelot_vcap_bit val) +{ + vcap_key_set(data, offset, 1, val == OCELOT_VCAP_BIT_1 ? 1 : 0, + val == OCELOT_VCAP_BIT_ANY ? 0 : 1); +} + +#define VCAP_KEY_SET(fld, val, msk) \ + vcap_key_set(&data, IS2_HKO_##fld, IS2_HKL_##fld, val, msk) +#define VCAP_KEY_ANY_SET(fld) \ + vcap_key_set(&data, IS2_HKO_##fld, IS2_HKL_##fld, 0, 0) +#define VCAP_KEY_BIT_SET(fld, val) vcap_key_bit_set(&data, IS2_HKO_##fld, val) +#define VCAP_KEY_BYTES_SET(fld, val, msk) \ + vcap_key_bytes_set(&data, IS2_HKO_##fld, val, msk, IS2_HKL_##fld / 8) + +static void vcap_action_set(struct vcap_data *data, u32 offset, u32 width, + u32 value) +{ + vcap_data_set(data->action, offset + data->action_offset, width, value); +} + +#define VCAP_ACT_SET(fld, val) \ + vcap_action_set(data, IS2_AO_##fld, IS2_AL_##fld, val) + +static void is2_action_set(struct vcap_data *data, + enum ocelot_ace_action action) +{ + switch (action) { + case OCELOT_ACL_ACTION_DROP: + VCAP_ACT_SET(PORT_MASK, 0x0); + VCAP_ACT_SET(MASK_MODE, 0x1); + VCAP_ACT_SET(POLICE_ENA, 0x1); + VCAP_ACT_SET(POLICE_IDX, OCELOT_POLICER_DISCARD); + VCAP_ACT_SET(CPU_QU_NUM, 0x0); + VCAP_ACT_SET(CPU_COPY_ENA, 0x0); + break; + case OCELOT_ACL_ACTION_TRAP: + VCAP_ACT_SET(PORT_MASK, 0x0); + VCAP_ACT_SET(MASK_MODE, 0x0); + VCAP_ACT_SET(POLICE_ENA, 0x0); + VCAP_ACT_SET(POLICE_IDX, 0x0); + VCAP_ACT_SET(CPU_QU_NUM, 0x0); + VCAP_ACT_SET(CPU_COPY_ENA, 0x1); + break; + } +} + +static void is2_entry_set(struct ocelot *ocelot, int ix, + struct ocelot_ace_rule *ace) +{ + u32 val, msk, type, type_mask = 0xf, i, count; + struct ocelot_ace_vlan *tag = &ace->vlan; + struct ocelot_vcap_u64 payload; + struct vcap_data data; + int row = (ix / 2); + + memset(&payload, 0, sizeof(payload)); + memset(&data, 0, sizeof(data)); + + /* Read row */ + vcap_row_cmd(ocelot, row, VCAP_CMD_READ, VCAP_SEL_ALL); + vcap_cache2entry(ocelot, &data); + vcap_cache2action(ocelot, &data); + + data.tg_sw = VCAP_TG_HALF; + is2_data_get(&data, ix); + data.tg = (data.tg & ~data.tg_mask); + if (ace->prio != 0) + data.tg |= data.tg_value; + + data.type = IS2_ACTION_TYPE_NORMAL; + + VCAP_KEY_ANY_SET(PAG); + VCAP_KEY_SET(IGR_PORT_MASK, 0, ~BIT(ace->chip_port)); + VCAP_KEY_BIT_SET(FIRST, OCELOT_VCAP_BIT_1); + VCAP_KEY_BIT_SET(HOST_MATCH, OCELOT_VCAP_BIT_ANY); + VCAP_KEY_BIT_SET(L2_MC, ace->dmac_mc); + VCAP_KEY_BIT_SET(L2_BC, ace->dmac_bc); + VCAP_KEY_BIT_SET(VLAN_TAGGED, tag->tagged); + VCAP_KEY_SET(VID, tag->vid.value, tag->vid.mask); + VCAP_KEY_SET(PCP, tag->pcp.value[0], tag->pcp.mask[0]); + VCAP_KEY_BIT_SET(DEI, tag->dei); + + switch (ace->type) { + case OCELOT_ACE_TYPE_ETYPE: { + struct ocelot_ace_frame_etype *etype = &ace->frame.etype; + + type = IS2_TYPE_ETYPE; + VCAP_KEY_BYTES_SET(L2_DMAC, etype->dmac.value, + etype->dmac.mask); + VCAP_KEY_BYTES_SET(L2_SMAC, etype->smac.value, + etype->smac.mask); + VCAP_KEY_BYTES_SET(MAC_ETYPE_ETYPE, etype->etype.value, + etype->etype.mask); + VCAP_KEY_ANY_SET(MAC_ETYPE_L2_PAYLOAD); // Clear unused bits + vcap_key_bytes_set(&data, IS2_HKO_MAC_ETYPE_L2_PAYLOAD, + etype->data.value, etype->data.mask, 2); + break; + } + case OCELOT_ACE_TYPE_LLC: { + struct ocelot_ace_frame_llc *llc = &ace->frame.llc; + + type = IS2_TYPE_LLC; + VCAP_KEY_BYTES_SET(L2_DMAC, llc->dmac.value, llc->dmac.mask); + VCAP_KEY_BYTES_SET(L2_SMAC, llc->smac.value, llc->smac.mask); + for (i = 0; i < 4; i++) { + payload.value[i] = llc->llc.value[i]; + payload.mask[i] = llc->llc.mask[i]; + } + VCAP_KEY_BYTES_SET(MAC_LLC_L2_LLC, payload.value, payload.mask); + break; + } + case OCELOT_ACE_TYPE_SNAP: { + struct ocelot_ace_frame_snap *snap = &ace->frame.snap; + + type = IS2_TYPE_SNAP; + VCAP_KEY_BYTES_SET(L2_DMAC, snap->dmac.value, snap->dmac.mask); + VCAP_KEY_BYTES_SET(L2_SMAC, snap->smac.value, snap->smac.mask); + VCAP_KEY_BYTES_SET(MAC_SNAP_L2_SNAP, + ace->frame.snap.snap.value, + ace->frame.snap.snap.mask); + break; + } + case OCELOT_ACE_TYPE_ARP: { + struct ocelot_ace_frame_arp *arp = &ace->frame.arp; + + type = IS2_TYPE_ARP; + VCAP_KEY_BYTES_SET(MAC_ARP_L2_SMAC, arp->smac.value, + arp->smac.mask); + VCAP_KEY_BIT_SET(MAC_ARP_ARP_ADDR_SPACE_OK, arp->ethernet); + VCAP_KEY_BIT_SET(MAC_ARP_ARP_PROTO_SPACE_OK, arp->ip); + VCAP_KEY_BIT_SET(MAC_ARP_ARP_LEN_OK, arp->length); + VCAP_KEY_BIT_SET(MAC_ARP_ARP_TGT_MATCH, arp->dmac_match); + VCAP_KEY_BIT_SET(MAC_ARP_ARP_SENDER_MATCH, arp->smac_match); + VCAP_KEY_BIT_SET(MAC_ARP_ARP_OPCODE_UNKNOWN, arp->unknown); + + /* OPCODE is inverse, bit 0 is reply flag, bit 1 is RARP flag */ + val = ((arp->req == OCELOT_VCAP_BIT_0 ? 1 : 0) | + (arp->arp == OCELOT_VCAP_BIT_0 ? 2 : 0)); + msk = ((arp->req == OCELOT_VCAP_BIT_ANY ? 0 : 1) | + (arp->arp == OCELOT_VCAP_BIT_ANY ? 0 : 2)); + VCAP_KEY_SET(MAC_ARP_ARP_OPCODE, val, msk); + vcap_key_bytes_set(&data, IS2_HKO_MAC_ARP_L3_IP4_DIP, + arp->dip.value.addr, arp->dip.mask.addr, 4); + vcap_key_bytes_set(&data, IS2_HKO_MAC_ARP_L3_IP4_SIP, + arp->sip.value.addr, arp->sip.mask.addr, 4); + VCAP_KEY_ANY_SET(MAC_ARP_DIP_EQ_SIP); + break; + } + case OCELOT_ACE_TYPE_IPV4: + case OCELOT_ACE_TYPE_IPV6: { + enum ocelot_vcap_bit sip_eq_dip, sport_eq_dport, seq_zero, tcp; + enum ocelot_vcap_bit ttl, fragment, options, tcp_ack, tcp_urg; + enum ocelot_vcap_bit tcp_fin, tcp_syn, tcp_rst, tcp_psh; + struct ocelot_ace_frame_ipv4 *ipv4 = NULL; + struct ocelot_ace_frame_ipv6 *ipv6 = NULL; + struct ocelot_vcap_udp_tcp *sport, *dport; + struct ocelot_vcap_ipv4 sip, dip; + struct ocelot_vcap_u8 proto, ds; + struct ocelot_vcap_u48 *ip_data; + + if (ace->type == OCELOT_ACE_TYPE_IPV4) { + ipv4 = &ace->frame.ipv4; + ttl = ipv4->ttl; + fragment = ipv4->fragment; + options = ipv4->options; + proto = ipv4->proto; + ds = ipv4->ds; + ip_data = &ipv4->data; + sip = ipv4->sip; + dip = ipv4->dip; + sport = &ipv4->sport; + dport = &ipv4->dport; + tcp_fin = ipv4->tcp_fin; + tcp_syn = ipv4->tcp_syn; + tcp_rst = ipv4->tcp_rst; + tcp_psh = ipv4->tcp_psh; + tcp_ack = ipv4->tcp_ack; + tcp_urg = ipv4->tcp_urg; + sip_eq_dip = ipv4->sip_eq_dip; + sport_eq_dport = ipv4->sport_eq_dport; + seq_zero = ipv4->seq_zero; + } else { + ipv6 = &ace->frame.ipv6; + ttl = ipv6->ttl; + fragment = OCELOT_VCAP_BIT_ANY; + options = OCELOT_VCAP_BIT_ANY; + proto = ipv6->proto; + ds = ipv6->ds; + ip_data = &ipv6->data; + for (i = 0; i < 8; i++) { + val = ipv6->sip.value[i + 8]; + msk = ipv6->sip.mask[i + 8]; + if (i < 4) { + dip.value.addr[i] = val; + dip.mask.addr[i] = msk; + } else { + sip.value.addr[i - 4] = val; + sip.mask.addr[i - 4] = msk; + } + } + sport = &ipv6->sport; + dport = &ipv6->dport; + tcp_fin = ipv6->tcp_fin; + tcp_syn = ipv6->tcp_syn; + tcp_rst = ipv6->tcp_rst; + tcp_psh = ipv6->tcp_psh; + tcp_ack = ipv6->tcp_ack; + tcp_urg = ipv6->tcp_urg; + sip_eq_dip = ipv6->sip_eq_dip; + sport_eq_dport = ipv6->sport_eq_dport; + seq_zero = ipv6->seq_zero; + } + + VCAP_KEY_BIT_SET(IP4, + ipv4 ? OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0); + VCAP_KEY_BIT_SET(L3_FRAGMENT, fragment); + VCAP_KEY_ANY_SET(L3_FRAG_OFS_GT0); + VCAP_KEY_BIT_SET(L3_OPTIONS, options); + VCAP_KEY_BIT_SET(L3_TTL_GT0, ttl); + VCAP_KEY_BYTES_SET(L3_TOS, ds.value, ds.mask); + vcap_key_bytes_set(&data, IS2_HKO_L3_IP4_DIP, dip.value.addr, + dip.mask.addr, 4); + vcap_key_bytes_set(&data, IS2_HKO_L3_IP4_SIP, sip.value.addr, + sip.mask.addr, 4); + VCAP_KEY_BIT_SET(DIP_EQ_SIP, sip_eq_dip); + val = proto.value[0]; + msk = proto.mask[0]; + type = IS2_TYPE_IP_UDP_TCP; + if (msk == 0xff && (val == 6 || val == 17)) { + /* UDP/TCP protocol match */ + tcp = (val == 6 ? + OCELOT_VCAP_BIT_1 : OCELOT_VCAP_BIT_0); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_TCP, tcp); + vcap_key_l4_port_set(&data, + IS2_HKO_IP4_TCP_UDP_L4_DPORT, + dport); + vcap_key_l4_port_set(&data, + IS2_HKO_IP4_TCP_UDP_L4_SPORT, + sport); + VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_RNG); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_SPORT_EQ_DPORT, + sport_eq_dport); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_SEQUENCE_EQ0, seq_zero); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_FIN, tcp_fin); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_SYN, tcp_syn); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_RST, tcp_rst); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_PSH, tcp_psh); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_ACK, tcp_ack); + VCAP_KEY_BIT_SET(IP4_TCP_UDP_L4_URG, tcp_urg); + VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_1588_DOM); + VCAP_KEY_ANY_SET(IP4_TCP_UDP_L4_1588_VER); + } else { + if (msk == 0) { + /* Any IP protocol match */ + type_mask = IS2_TYPE_MASK_IP_ANY; + } else { + /* Non-UDP/TCP protocol match */ + type = IS2_TYPE_IP_OTHER; + for (i = 0; i < 6; i++) { + payload.value[i] = ip_data->value[i]; + payload.mask[i] = ip_data->mask[i]; + } + } + VCAP_KEY_BYTES_SET(IP4_OTHER_L3_PROTO, proto.value, + proto.mask); + VCAP_KEY_BYTES_SET(IP4_OTHER_L3_PAYLOAD, payload.value, + payload.mask); + } + break; + } + case OCELOT_ACE_TYPE_ANY: + default: + type = 0; + type_mask = 0; + count = (vcap_is2.entry_width / 2); + for (i = (IS2_HKO_PCP + IS2_HKL_PCP); i < count; + i += ENTRY_WIDTH) { + /* Clear entry data */ + vcap_key_set(&data, i, min(32u, count - i), 0, 0); + } + break; + } + + VCAP_KEY_SET(TYPE, type, type_mask); + is2_action_set(&data, ace->action); + vcap_data_set(data.counter, data.counter_offset, vcap_is2.counter_width, + ace->stats.pkts); + + /* Write row */ + vcap_entry2cache(ocelot, &data); + vcap_action2cache(ocelot, &data); + vcap_row_cmd(ocelot, row, VCAP_CMD_WRITE, VCAP_SEL_ALL); +} + +static void is2_entry_get(struct ocelot_ace_rule *rule, int ix) +{ + struct ocelot *op = rule->port->ocelot; + struct vcap_data data; + int row = (ix / 2); + u32 cnt; + + vcap_row_cmd(op, row, VCAP_CMD_READ, VCAP_SEL_COUNTER); + vcap_cache2action(op, &data); + data.tg_sw = VCAP_TG_HALF; + is2_data_get(&data, ix); + cnt = vcap_data_get(data.counter, data.counter_offset, + vcap_is2.counter_width); + + rule->stats.pkts = cnt; +} + +static void ocelot_ace_rule_add(struct ocelot_acl_block *block, + struct ocelot_ace_rule *rule) +{ + struct ocelot_ace_rule *tmp; + struct list_head *pos, *n; + + block->count++; + + if (list_empty(&block->rules)) { + list_add(&rule->list, &block->rules); + return; + } + + list_for_each_safe(pos, n, &block->rules) { + tmp = list_entry(pos, struct ocelot_ace_rule, list); + if (rule->prio < tmp->prio) + break; + } + list_add(&rule->list, pos->prev); +} + +static int ocelot_ace_rule_get_index_id(struct ocelot_acl_block *block, + struct ocelot_ace_rule *rule) +{ + struct ocelot_ace_rule *tmp; + int index = -1; + + list_for_each_entry(tmp, &block->rules, list) { + ++index; + if (rule->id == tmp->id) + break; + } + return index; +} + +static struct ocelot_ace_rule* +ocelot_ace_rule_get_rule_index(struct ocelot_acl_block *block, int index) +{ + struct ocelot_ace_rule *tmp; + int i = 0; + + list_for_each_entry(tmp, &block->rules, list) { + if (i == index) + return tmp; + ++i; + } + + return NULL; +} + +int ocelot_ace_rule_offload_add(struct ocelot_ace_rule *rule) +{ + struct ocelot_ace_rule *ace; + int i, index; + + /* Add rule to the linked list */ + ocelot_ace_rule_add(acl_block, rule); + + /* Get the index of the inserted rule */ + index = ocelot_ace_rule_get_index_id(acl_block, rule); + + /* Move down the rules to make place for the new rule */ + for (i = acl_block->count - 1; i > index; i--) { + ace = ocelot_ace_rule_get_rule_index(acl_block, i); + is2_entry_set(rule->port->ocelot, i, ace); + } + + /* Now insert the new rule */ + is2_entry_set(rule->port->ocelot, index, rule); + return 0; +} + +static void ocelot_ace_rule_del(struct ocelot_acl_block *block, + struct ocelot_ace_rule *rule) +{ + struct ocelot_ace_rule *tmp; + struct list_head *pos, *q; + + list_for_each_safe(pos, q, &block->rules) { + tmp = list_entry(pos, struct ocelot_ace_rule, list); + if (tmp->id == rule->id) { + list_del(pos); + kfree(tmp); + } + } + + block->count--; +} + +int ocelot_ace_rule_offload_del(struct ocelot_ace_rule *rule) +{ + struct ocelot_ace_rule del_ace; + struct ocelot_ace_rule *ace; + int i, index; + + memset(&del_ace, 0, sizeof(del_ace)); + + /* Gets index of the rule */ + index = ocelot_ace_rule_get_index_id(acl_block, rule); + + /* Delete rule */ + ocelot_ace_rule_del(acl_block, rule); + + /* Move up all the blocks over the deleted rule */ + for (i = index; i < acl_block->count; i++) { + ace = ocelot_ace_rule_get_rule_index(acl_block, i); + is2_entry_set(rule->port->ocelot, i, ace); + } + + /* Now delete the last rule, because it is duplicated */ + is2_entry_set(rule->port->ocelot, acl_block->count, &del_ace); + + return 0; +} + +int ocelot_ace_rule_stats_update(struct ocelot_ace_rule *rule) +{ + struct ocelot_ace_rule *tmp; + int index; + + index = ocelot_ace_rule_get_index_id(acl_block, rule); + is2_entry_get(rule, index); + + /* After we get the result we need to clear the counters */ + tmp = ocelot_ace_rule_get_rule_index(acl_block, index); + tmp->stats.pkts = 0; + is2_entry_set(rule->port->ocelot, index, tmp); + + return 0; +} + +static struct ocelot_acl_block *ocelot_acl_block_create(struct ocelot *ocelot) +{ + struct ocelot_acl_block *block; + + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return NULL; + + INIT_LIST_HEAD(&block->rules); + block->count = 0; + block->ocelot = ocelot; + + return block; +} + +static void ocelot_acl_block_destroy(struct ocelot_acl_block *block) +{ + kfree(block); +} + +int ocelot_ace_init(struct ocelot *ocelot) +{ + struct vcap_data data; + + memset(&data, 0, sizeof(data)); + vcap_entry2cache(ocelot, &data); + ocelot_write(ocelot, vcap_is2.entry_count, S2_CORE_MV_CFG); + vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE, VCAP_SEL_ENTRY); + + vcap_action2cache(ocelot, &data); + ocelot_write(ocelot, vcap_is2.action_count, S2_CORE_MV_CFG); + vcap_cmd(ocelot, 0, VCAP_CMD_INITIALIZE, + VCAP_SEL_ACTION | VCAP_SEL_COUNTER); + + /* Create a policer that will drop the frames for the cpu. + * This policer will be used as action in the acl rules to drop + * frames. + */ + ocelot_write_gix(ocelot, 0x299, ANA_POL_MODE_CFG, + OCELOT_POLICER_DISCARD); + ocelot_write_gix(ocelot, 0x1, ANA_POL_PIR_CFG, + OCELOT_POLICER_DISCARD); + ocelot_write_gix(ocelot, 0x3fffff, ANA_POL_PIR_STATE, + OCELOT_POLICER_DISCARD); + ocelot_write_gix(ocelot, 0x0, ANA_POL_CIR_CFG, + OCELOT_POLICER_DISCARD); + ocelot_write_gix(ocelot, 0x3fffff, ANA_POL_CIR_STATE, + OCELOT_POLICER_DISCARD); + + acl_block = ocelot_acl_block_create(ocelot); + + return 0; +} + +void ocelot_ace_deinit(void) +{ + ocelot_acl_block_destroy(acl_block); +} diff --git a/drivers/net/ethernet/mscc/ocelot_ace.h b/drivers/net/ethernet/mscc/ocelot_ace.h new file mode 100644 index 000000000000..e98944c87259 --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_ace.h @@ -0,0 +1,232 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* Microsemi Ocelot Switch driver + * Copyright (c) 2019 Microsemi Corporation + */ + +#ifndef _MSCC_OCELOT_ACE_H_ +#define _MSCC_OCELOT_ACE_H_ + +#include "ocelot.h" +#include <net/sch_generic.h> +#include <net/pkt_cls.h> + +struct ocelot_ipv4 { + u8 addr[4]; +}; + +enum ocelot_vcap_bit { + OCELOT_VCAP_BIT_ANY, + OCELOT_VCAP_BIT_0, + OCELOT_VCAP_BIT_1 +}; + +struct ocelot_vcap_u8 { + u8 value[1]; + u8 mask[1]; +}; + +struct ocelot_vcap_u16 { + u8 value[2]; + u8 mask[2]; +}; + +struct ocelot_vcap_u24 { + u8 value[3]; + u8 mask[3]; +}; + +struct ocelot_vcap_u32 { + u8 value[4]; + u8 mask[4]; +}; + +struct ocelot_vcap_u40 { + u8 value[5]; + u8 mask[5]; +}; + +struct ocelot_vcap_u48 { + u8 value[6]; + u8 mask[6]; +}; + +struct ocelot_vcap_u64 { + u8 value[8]; + u8 mask[8]; +}; + +struct ocelot_vcap_u128 { + u8 value[16]; + u8 mask[16]; +}; + +struct ocelot_vcap_vid { + u16 value; + u16 mask; +}; + +struct ocelot_vcap_ipv4 { + struct ocelot_ipv4 value; + struct ocelot_ipv4 mask; +}; + +struct ocelot_vcap_udp_tcp { + u16 value; + u16 mask; +}; + +enum ocelot_ace_type { + OCELOT_ACE_TYPE_ANY, + OCELOT_ACE_TYPE_ETYPE, + OCELOT_ACE_TYPE_LLC, + OCELOT_ACE_TYPE_SNAP, + OCELOT_ACE_TYPE_ARP, + OCELOT_ACE_TYPE_IPV4, + OCELOT_ACE_TYPE_IPV6 +}; + +struct ocelot_ace_vlan { + struct ocelot_vcap_vid vid; /* VLAN ID (12 bit) */ + struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ + enum ocelot_vcap_bit dei; /* DEI */ + enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ +}; + +struct ocelot_ace_frame_etype { + struct ocelot_vcap_u48 dmac; + struct ocelot_vcap_u48 smac; + struct ocelot_vcap_u16 etype; + struct ocelot_vcap_u16 data; /* MAC data */ +}; + +struct ocelot_ace_frame_llc { + struct ocelot_vcap_u48 dmac; + struct ocelot_vcap_u48 smac; + + /* LLC header: DSAP at byte 0, SSAP at byte 1, Control at byte 2 */ + struct ocelot_vcap_u32 llc; +}; + +struct ocelot_ace_frame_snap { + struct ocelot_vcap_u48 dmac; + struct ocelot_vcap_u48 smac; + + /* SNAP header: Organization Code at byte 0, Type at byte 3 */ + struct ocelot_vcap_u40 snap; +}; + +struct ocelot_ace_frame_arp { + struct ocelot_vcap_u48 smac; + enum ocelot_vcap_bit arp; /* Opcode ARP/RARP */ + enum ocelot_vcap_bit req; /* Opcode request/reply */ + enum ocelot_vcap_bit unknown; /* Opcode unknown */ + enum ocelot_vcap_bit smac_match; /* Sender MAC matches SMAC */ + enum ocelot_vcap_bit dmac_match; /* Target MAC matches DMAC */ + + /**< Protocol addr. length 4, hardware length 6 */ + enum ocelot_vcap_bit length; + + enum ocelot_vcap_bit ip; /* Protocol address type IP */ + enum ocelot_vcap_bit ethernet; /* Hardware address type Ethernet */ + struct ocelot_vcap_ipv4 sip; /* Sender IP address */ + struct ocelot_vcap_ipv4 dip; /* Target IP address */ +}; + +struct ocelot_ace_frame_ipv4 { + enum ocelot_vcap_bit ttl; /* TTL zero */ + enum ocelot_vcap_bit fragment; /* Fragment */ + enum ocelot_vcap_bit options; /* Header options */ + struct ocelot_vcap_u8 ds; + struct ocelot_vcap_u8 proto; /* Protocol */ + struct ocelot_vcap_ipv4 sip; /* Source IP address */ + struct ocelot_vcap_ipv4 dip; /* Destination IP address */ + struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */ + struct ocelot_vcap_udp_tcp sport; /* UDP/TCP: Source port */ + struct ocelot_vcap_udp_tcp dport; /* UDP/TCP: Destination port */ + enum ocelot_vcap_bit tcp_fin; + enum ocelot_vcap_bit tcp_syn; + enum ocelot_vcap_bit tcp_rst; + enum ocelot_vcap_bit tcp_psh; + enum ocelot_vcap_bit tcp_ack; + enum ocelot_vcap_bit tcp_urg; + enum ocelot_vcap_bit sip_eq_dip; /* SIP equals DIP */ + enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT */ + enum ocelot_vcap_bit seq_zero; /* TCP sequence number is zero */ +}; + +struct ocelot_ace_frame_ipv6 { + struct ocelot_vcap_u8 proto; /* IPv6 protocol */ + struct ocelot_vcap_u128 sip; /* IPv6 source (byte 0-7 ignored) */ + enum ocelot_vcap_bit ttl; /* TTL zero */ + struct ocelot_vcap_u8 ds; + struct ocelot_vcap_u48 data; /* Not UDP/TCP: IP data */ + struct ocelot_vcap_udp_tcp sport; + struct ocelot_vcap_udp_tcp dport; + enum ocelot_vcap_bit tcp_fin; + enum ocelot_vcap_bit tcp_syn; + enum ocelot_vcap_bit tcp_rst; + enum ocelot_vcap_bit tcp_psh; + enum ocelot_vcap_bit tcp_ack; + enum ocelot_vcap_bit tcp_urg; + enum ocelot_vcap_bit sip_eq_dip; /* SIP equals DIP */ + enum ocelot_vcap_bit sport_eq_dport; /* SPORT equals DPORT */ + enum ocelot_vcap_bit seq_zero; /* TCP sequence number is zero */ +}; + +enum ocelot_ace_action { + OCELOT_ACL_ACTION_DROP, + OCELOT_ACL_ACTION_TRAP, +}; + +struct ocelot_ace_stats { + u64 bytes; + u64 pkts; + u64 used; +}; + +struct ocelot_ace_rule { + struct list_head list; + struct ocelot_port *port; + + u16 prio; + u32 id; + + enum ocelot_ace_action action; + struct ocelot_ace_stats stats; + int chip_port; + + enum ocelot_vcap_bit dmac_mc; + enum ocelot_vcap_bit dmac_bc; + struct ocelot_ace_vlan vlan; + + enum ocelot_ace_type type; + union { + /* ocelot_ACE_TYPE_ANY: No specific fields */ + struct ocelot_ace_frame_etype etype; + struct ocelot_ace_frame_llc llc; + struct ocelot_ace_frame_snap snap; + struct ocelot_ace_frame_arp arp; + struct ocelot_ace_frame_ipv4 ipv4; + struct ocelot_ace_frame_ipv6 ipv6; + } frame; +}; + +struct ocelot_acl_block { + struct list_head rules; + struct ocelot *ocelot; + int count; +}; + +int ocelot_ace_rule_offload_add(struct ocelot_ace_rule *rule); +int ocelot_ace_rule_offload_del(struct ocelot_ace_rule *rule); +int ocelot_ace_rule_stats_update(struct ocelot_ace_rule *rule); + +int ocelot_ace_init(struct ocelot *ocelot); +void ocelot_ace_deinit(void); + +int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port, + struct flow_block_offload *f); +void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port, + struct flow_block_offload *f); + +#endif /* _MSCC_OCELOT_ACE_H_ */ diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c index e7f90101d2e0..58bde1a9eacb 100644 --- a/drivers/net/ethernet/mscc/ocelot_board.c +++ b/drivers/net/ethernet/mscc/ocelot_board.c @@ -188,6 +188,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) { QSYS, "qsys" }, { ANA, "ana" }, { QS, "qs" }, + { S2, "s2" }, }; if (!np && !pdev->dev.platform_data) diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c new file mode 100644 index 000000000000..7aaddc09c185 --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Microsemi Ocelot Switch driver + * Copyright (c) 2019 Microsemi Corporation + */ + +#include <net/pkt_cls.h> +#include <net/tc_act/tc_gact.h> + +#include "ocelot_ace.h" + +struct ocelot_port_block { + struct ocelot_acl_block *block; + struct ocelot_port *port; +}; + +static u16 get_prio(u32 prio) +{ + /* prio starts from 0x1000 while the ids starts from 0 */ + return prio >> 16; +} + +static int ocelot_flower_parse_action(struct flow_cls_offload *f, + struct ocelot_ace_rule *rule) +{ + const struct flow_action_entry *a; + int i; + + if (f->rule->action.num_entries != 1) + return -EOPNOTSUPP; + + flow_action_for_each(i, a, &f->rule->action) { + switch (a->id) { + case FLOW_ACTION_DROP: + rule->action = OCELOT_ACL_ACTION_DROP; + break; + case FLOW_ACTION_TRAP: + rule->action = OCELOT_ACL_ACTION_TRAP; + break; + default: + return -EOPNOTSUPP; + } + } + + return 0; +} + +static int ocelot_flower_parse(struct flow_cls_offload *f, + struct ocelot_ace_rule *ocelot_rule) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS))) { + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + u16 proto = ntohs(f->common.protocol); + + /* The hw support mac matches only for MAC_ETYPE key, + * therefore if other matches(port, tcp flags, etc) are added + * then just bail out + */ + if ((dissector->used_keys & + (BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL))) != + (BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_CONTROL))) + return -EOPNOTSUPP; + + if (proto == ETH_P_IP || + proto == ETH_P_IPV6 || + proto == ETH_P_ARP) + return -EOPNOTSUPP; + + flow_rule_match_eth_addrs(rule, &match); + ocelot_rule->type = OCELOT_ACE_TYPE_ETYPE; + ether_addr_copy(ocelot_rule->frame.etype.dmac.value, + match.key->dst); + ether_addr_copy(ocelot_rule->frame.etype.smac.value, + match.key->src); + ether_addr_copy(ocelot_rule->frame.etype.dmac.mask, + match.mask->dst); + ether_addr_copy(ocelot_rule->frame.etype.smac.mask, + match.mask->src); + goto finished_key_parsing; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + if (ntohs(match.key->n_proto) == ETH_P_IP) { + ocelot_rule->type = OCELOT_ACE_TYPE_IPV4; + ocelot_rule->frame.ipv4.proto.value[0] = + match.key->ip_proto; + ocelot_rule->frame.ipv4.proto.mask[0] = + match.mask->ip_proto; + } + if (ntohs(match.key->n_proto) == ETH_P_IPV6) { + ocelot_rule->type = OCELOT_ACE_TYPE_IPV6; + ocelot_rule->frame.ipv6.proto.value[0] = + match.key->ip_proto; + ocelot_rule->frame.ipv6.proto.mask[0] = + match.mask->ip_proto; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) && + ntohs(f->common.protocol) == ETH_P_IP) { + struct flow_match_ipv4_addrs match; + u8 *tmp; + + flow_rule_match_ipv4_addrs(rule, &match); + tmp = &ocelot_rule->frame.ipv4.sip.value.addr[0]; + memcpy(tmp, &match.key->src, 4); + + tmp = &ocelot_rule->frame.ipv4.sip.mask.addr[0]; + memcpy(tmp, &match.mask->src, 4); + + tmp = &ocelot_rule->frame.ipv4.dip.value.addr[0]; + memcpy(tmp, &match.key->dst, 4); + + tmp = &ocelot_rule->frame.ipv4.dip.mask.addr[0]; + memcpy(tmp, &match.mask->dst, 4); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) && + ntohs(f->common.protocol) == ETH_P_IPV6) { + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + ocelot_rule->frame.ipv4.sport.value = ntohs(match.key->src); + ocelot_rule->frame.ipv4.sport.mask = ntohs(match.mask->src); + ocelot_rule->frame.ipv4.dport.value = ntohs(match.key->dst); + ocelot_rule->frame.ipv4.dport.mask = ntohs(match.mask->dst); + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + ocelot_rule->type = OCELOT_ACE_TYPE_ANY; + ocelot_rule->vlan.vid.value = match.key->vlan_id; + ocelot_rule->vlan.vid.mask = match.mask->vlan_id; + ocelot_rule->vlan.pcp.value[0] = match.key->vlan_priority; + ocelot_rule->vlan.pcp.mask[0] = match.mask->vlan_priority; + } + +finished_key_parsing: + ocelot_rule->prio = get_prio(f->common.prio); + ocelot_rule->id = f->cookie; + return ocelot_flower_parse_action(f, ocelot_rule); +} + +static +struct ocelot_ace_rule *ocelot_ace_rule_create(struct flow_cls_offload *f, + struct ocelot_port_block *block) +{ + struct ocelot_ace_rule *rule; + + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return NULL; + + rule->port = block->port; + rule->chip_port = block->port->chip_port; + return rule; +} + +static int ocelot_flower_replace(struct flow_cls_offload *f, + struct ocelot_port_block *port_block) +{ + struct ocelot_ace_rule *rule; + int ret; + + rule = ocelot_ace_rule_create(f, port_block); + if (!rule) + return -ENOMEM; + + ret = ocelot_flower_parse(f, rule); + if (ret) { + kfree(rule); + return ret; + } + + ret = ocelot_ace_rule_offload_add(rule); + if (ret) + return ret; + + port_block->port->tc.offload_cnt++; + return 0; +} + +static int ocelot_flower_destroy(struct flow_cls_offload *f, + struct ocelot_port_block *port_block) +{ + struct ocelot_ace_rule rule; + int ret; + + rule.prio = get_prio(f->common.prio); + rule.port = port_block->port; + rule.id = f->cookie; + + ret = ocelot_ace_rule_offload_del(&rule); + if (ret) + return ret; + + port_block->port->tc.offload_cnt--; + return 0; +} + +static int ocelot_flower_stats_update(struct flow_cls_offload *f, + struct ocelot_port_block *port_block) +{ + struct ocelot_ace_rule rule; + int ret; + + rule.prio = get_prio(f->common.prio); + rule.port = port_block->port; + rule.id = f->cookie; + ret = ocelot_ace_rule_stats_update(&rule); + if (ret) + return ret; + + flow_stats_update(&f->stats, 0x0, rule.stats.pkts, 0x0); + return 0; +} + +static int ocelot_setup_tc_cls_flower(struct flow_cls_offload *f, + struct ocelot_port_block *port_block) +{ + switch (f->command) { + case FLOW_CLS_REPLACE: + return ocelot_flower_replace(f, port_block); + case FLOW_CLS_DESTROY: + return ocelot_flower_destroy(f, port_block); + case FLOW_CLS_STATS: + return ocelot_flower_stats_update(f, port_block); + default: + return -EOPNOTSUPP; + } +} + +static int ocelot_setup_tc_block_cb_flower(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct ocelot_port_block *port_block = cb_priv; + + if (!tc_cls_can_offload_and_chain0(port_block->port->dev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ocelot_setup_tc_cls_flower(type_data, cb_priv); + case TC_SETUP_CLSMATCHALL: + return 0; + default: + return -EOPNOTSUPP; + } +} + +static struct ocelot_port_block* +ocelot_port_block_create(struct ocelot_port *port) +{ + struct ocelot_port_block *port_block; + + port_block = kzalloc(sizeof(*port_block), GFP_KERNEL); + if (!port_block) + return NULL; + + port_block->port = port; + + return port_block; +} + +static void ocelot_port_block_destroy(struct ocelot_port_block *block) +{ + kfree(block); +} + +static void ocelot_tc_block_unbind(void *cb_priv) +{ + struct ocelot_port_block *port_block = cb_priv; + + ocelot_port_block_destroy(port_block); +} + +int ocelot_setup_tc_block_flower_bind(struct ocelot_port *port, + struct flow_block_offload *f) +{ + struct ocelot_port_block *port_block; + struct flow_block_cb *block_cb; + int ret; + + if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + return -EOPNOTSUPP; + + block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower, + port); + if (!block_cb) { + port_block = ocelot_port_block_create(port); + if (!port_block) + return -ENOMEM; + + block_cb = flow_block_cb_alloc(f->net, + ocelot_setup_tc_block_cb_flower, + port, port_block, + ocelot_tc_block_unbind); + if (IS_ERR(block_cb)) { + ret = PTR_ERR(block_cb); + goto err_cb_register; + } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, f->driver_block_list); + } else { + port_block = flow_block_cb_priv(block_cb); + } + + flow_block_cb_incref(block_cb); + return 0; + +err_cb_register: + ocelot_port_block_destroy(port_block); + + return ret; +} + +void ocelot_setup_tc_block_flower_unbind(struct ocelot_port *port, + struct flow_block_offload *f) +{ + struct flow_block_cb *block_cb; + + block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower, + port); + if (!block_cb) + return; + + if (!flow_block_cb_decref(block_cb)) { + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + } +} diff --git a/drivers/net/ethernet/mscc/ocelot_police.c b/drivers/net/ethernet/mscc/ocelot_police.c new file mode 100644 index 000000000000..701e82dd749a --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_police.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Microsemi Ocelot Switch driver + * + * Copyright (c) 2019 Microsemi Corporation + */ + +#include "ocelot_police.h" + +enum mscc_qos_rate_mode { + MSCC_QOS_RATE_MODE_DISABLED, /* Policer/shaper disabled */ + MSCC_QOS_RATE_MODE_LINE, /* Measure line rate in kbps incl. IPG */ + MSCC_QOS_RATE_MODE_DATA, /* Measures data rate in kbps excl. IPG */ + MSCC_QOS_RATE_MODE_FRAME, /* Measures frame rate in fps */ + __MSCC_QOS_RATE_MODE_END, + NUM_MSCC_QOS_RATE_MODE = __MSCC_QOS_RATE_MODE_END, + MSCC_QOS_RATE_MODE_MAX = __MSCC_QOS_RATE_MODE_END - 1, +}; + +/* Types for ANA:POL[0-192]:POL_MODE_CFG.FRM_MODE */ +#define POL_MODE_LINERATE 0 /* Incl IPG. Unit: 33 1/3 kbps, 4096 bytes */ +#define POL_MODE_DATARATE 1 /* Excl IPG. Unit: 33 1/3 kbps, 4096 bytes */ +#define POL_MODE_FRMRATE_HI 2 /* Unit: 33 1/3 fps, 32.8 frames */ +#define POL_MODE_FRMRATE_LO 3 /* Unit: 1/3 fps, 0.3 frames */ + +/* Policer indexes */ +#define POL_IX_PORT 0 /* 0-11 : Port policers */ +#define POL_IX_QUEUE 32 /* 32-127 : Queue policers */ + +/* Default policer order */ +#define POL_ORDER 0x1d3 /* Ocelot policer order: Serial (QoS -> Port -> VCAP) */ + +struct qos_policer_conf { + enum mscc_qos_rate_mode mode; + bool dlb; /* Enable DLB (dual leaky bucket mode */ + bool cf; /* Coupling flag (ignored in SLB mode) */ + u32 cir; /* CIR in kbps/fps (ignored in SLB mode) */ + u32 cbs; /* CBS in bytes/frames (ignored in SLB mode) */ + u32 pir; /* PIR in kbps/fps */ + u32 pbs; /* PBS in bytes/frames */ + u8 ipg; /* Size of IPG when MSCC_QOS_RATE_MODE_LINE is chosen */ +}; + +static int qos_policer_conf_set(struct ocelot_port *port, u32 pol_ix, + struct qos_policer_conf *conf) +{ + u32 cf = 0, cir_ena = 0, frm_mode = POL_MODE_LINERATE; + u32 cir = 0, cbs = 0, pir = 0, pbs = 0; + bool cir_discard = 0, pir_discard = 0; + struct ocelot *ocelot = port->ocelot; + u32 pbs_max = 0, cbs_max = 0; + u8 ipg = 20; + u32 value; + + pir = conf->pir; + pbs = conf->pbs; + + switch (conf->mode) { + case MSCC_QOS_RATE_MODE_LINE: + case MSCC_QOS_RATE_MODE_DATA: + if (conf->mode == MSCC_QOS_RATE_MODE_LINE) { + frm_mode = POL_MODE_LINERATE; + ipg = min_t(u8, GENMASK(4, 0), conf->ipg); + } else { + frm_mode = POL_MODE_DATARATE; + } + if (conf->dlb) { + cir_ena = 1; + cir = conf->cir; + cbs = conf->cbs; + if (cir == 0 && cbs == 0) { + /* Discard cir frames */ + cir_discard = 1; + } else { + cir = DIV_ROUND_UP(cir, 100); + cir *= 3; /* 33 1/3 kbps */ + cbs = DIV_ROUND_UP(cbs, 4096); + cbs = (cbs ? cbs : 1); /* No zero burst size */ + cbs_max = 60; /* Limit burst size */ + cf = conf->cf; + if (cf) + pir += conf->cir; + } + } + if (pir == 0 && pbs == 0) { + /* Discard PIR frames */ + pir_discard = 1; + } else { + pir = DIV_ROUND_UP(pir, 100); + pir *= 3; /* 33 1/3 kbps */ + pbs = DIV_ROUND_UP(pbs, 4096); + pbs = (pbs ? pbs : 1); /* No zero burst size */ + pbs_max = 60; /* Limit burst size */ + } + break; + case MSCC_QOS_RATE_MODE_FRAME: + if (pir >= 100) { + frm_mode = POL_MODE_FRMRATE_HI; + pir = DIV_ROUND_UP(pir, 100); + pir *= 3; /* 33 1/3 fps */ + pbs = (pbs * 10) / 328; /* 32.8 frames */ + pbs = (pbs ? pbs : 1); /* No zero burst size */ + pbs_max = GENMASK(6, 0); /* Limit burst size */ + } else { + frm_mode = POL_MODE_FRMRATE_LO; + if (pir == 0 && pbs == 0) { + /* Discard all frames */ + pir_discard = 1; + cir_discard = 1; + } else { + pir *= 3; /* 1/3 fps */ + pbs = (pbs * 10) / 3; /* 0.3 frames */ + pbs = (pbs ? pbs : 1); /* No zero burst size */ + pbs_max = 61; /* Limit burst size */ + } + } + break; + default: /* MSCC_QOS_RATE_MODE_DISABLED */ + /* Disable policer using maximum rate and zero burst */ + pir = GENMASK(15, 0); + pbs = 0; + break; + } + + /* Check limits */ + if (pir > GENMASK(15, 0)) { + netdev_err(port->dev, "Invalid pir\n"); + return -EINVAL; + } + + if (cir > GENMASK(15, 0)) { + netdev_err(port->dev, "Invalid cir\n"); + return -EINVAL; + } + + if (pbs > pbs_max) { + netdev_err(port->dev, "Invalid pbs\n"); + return -EINVAL; + } + + if (cbs > cbs_max) { + netdev_err(port->dev, "Invalid cbs\n"); + return -EINVAL; + } + + value = (ANA_POL_MODE_CFG_IPG_SIZE(ipg) | + ANA_POL_MODE_CFG_FRM_MODE(frm_mode) | + (cf ? ANA_POL_MODE_CFG_DLB_COUPLED : 0) | + (cir_ena ? ANA_POL_MODE_CFG_CIR_ENA : 0) | + ANA_POL_MODE_CFG_OVERSHOOT_ENA); + + ocelot_write_gix(ocelot, value, ANA_POL_MODE_CFG, pol_ix); + + ocelot_write_gix(ocelot, + ANA_POL_PIR_CFG_PIR_RATE(pir) | + ANA_POL_PIR_CFG_PIR_BURST(pbs), + ANA_POL_PIR_CFG, pol_ix); + + ocelot_write_gix(ocelot, + (pir_discard ? GENMASK(22, 0) : 0), + ANA_POL_PIR_STATE, pol_ix); + + ocelot_write_gix(ocelot, + ANA_POL_CIR_CFG_CIR_RATE(cir) | + ANA_POL_CIR_CFG_CIR_BURST(cbs), + ANA_POL_CIR_CFG, pol_ix); + + ocelot_write_gix(ocelot, + (cir_discard ? GENMASK(22, 0) : 0), + ANA_POL_CIR_STATE, pol_ix); + + return 0; +} + +int ocelot_port_policer_add(struct ocelot_port *port, + struct ocelot_policer *pol) +{ + struct ocelot *ocelot = port->ocelot; + struct qos_policer_conf pp = { 0 }; + int err; + + if (!pol) + return -EINVAL; + + pp.mode = MSCC_QOS_RATE_MODE_DATA; + pp.pir = pol->rate; + pp.pbs = pol->burst; + + netdev_dbg(port->dev, + "%s: port %u pir %u kbps, pbs %u bytes\n", + __func__, port->chip_port, pp.pir, pp.pbs); + + err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp); + if (err) + return err; + + ocelot_rmw_gix(ocelot, + ANA_PORT_POL_CFG_PORT_POL_ENA | + ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER), + ANA_PORT_POL_CFG_PORT_POL_ENA | + ANA_PORT_POL_CFG_POL_ORDER_M, + ANA_PORT_POL_CFG, port->chip_port); + + return 0; +} + +int ocelot_port_policer_del(struct ocelot_port *port) +{ + struct ocelot *ocelot = port->ocelot; + struct qos_policer_conf pp = { 0 }; + int err; + + netdev_dbg(port->dev, "%s: port %u\n", __func__, port->chip_port); + + pp.mode = MSCC_QOS_RATE_MODE_DISABLED; + + err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp); + if (err) + return err; + + ocelot_rmw_gix(ocelot, + ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER), + ANA_PORT_POL_CFG_PORT_POL_ENA | + ANA_PORT_POL_CFG_POL_ORDER_M, + ANA_PORT_POL_CFG, port->chip_port); + + return 0; +} diff --git a/drivers/net/ethernet/mscc/ocelot_police.h b/drivers/net/ethernet/mscc/ocelot_police.h new file mode 100644 index 000000000000..d1137f79efda --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_police.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* Microsemi Ocelot Switch driver + * + * Copyright (c) 2019 Microsemi Corporation + */ + +#ifndef _MSCC_OCELOT_POLICE_H_ +#define _MSCC_OCELOT_POLICE_H_ + +#include "ocelot.h" + +struct ocelot_policer { + u32 rate; /* kilobit per second */ + u32 burst; /* bytes */ +}; + +int ocelot_port_policer_add(struct ocelot_port *port, + struct ocelot_policer *pol); + +int ocelot_port_policer_del(struct ocelot_port *port); + +#endif /* _MSCC_OCELOT_POLICE_H_ */ diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c index 9271af18b93b..6c387f994ec5 100644 --- a/drivers/net/ethernet/mscc/ocelot_regs.c +++ b/drivers/net/ethernet/mscc/ocelot_regs.c @@ -224,12 +224,23 @@ static const u32 ocelot_sys_regmap[] = { REG(SYS_PTP_CFG, 0x0006c4), }; +static const u32 ocelot_s2_regmap[] = { + REG(S2_CORE_UPDATE_CTRL, 0x000000), + REG(S2_CORE_MV_CFG, 0x000004), + REG(S2_CACHE_ENTRY_DAT, 0x000008), + REG(S2_CACHE_MASK_DAT, 0x000108), + REG(S2_CACHE_ACTION_DAT, 0x000208), + REG(S2_CACHE_CNT_DAT, 0x000308), + REG(S2_CACHE_TG_DAT, 0x000388), +}; + static const u32 *ocelot_regmap[] = { [ANA] = ocelot_ana_regmap, [QS] = ocelot_qs_regmap, [QSYS] = ocelot_qsys_regmap, [REW] = ocelot_rew_regmap, [SYS] = ocelot_sys_regmap, + [S2] = ocelot_s2_regmap, }; static const struct reg_field ocelot_regfields[] = { diff --git a/drivers/net/ethernet/mscc/ocelot_s2.h b/drivers/net/ethernet/mscc/ocelot_s2.h new file mode 100644 index 000000000000..80107bec2e45 --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_s2.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* Microsemi Ocelot Switch driver + * Copyright (c) 2018 Microsemi Corporation + */ + +#ifndef _OCELOT_S2_CORE_H_ +#define _OCELOT_S2_CORE_H_ + +#define S2_CORE_UPDATE_CTRL_UPDATE_CMD(x) (((x) << 22) & GENMASK(24, 22)) +#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_M GENMASK(24, 22) +#define S2_CORE_UPDATE_CTRL_UPDATE_CMD_X(x) (((x) & GENMASK(24, 22)) >> 22) +#define S2_CORE_UPDATE_CTRL_UPDATE_ENTRY_DIS BIT(21) +#define S2_CORE_UPDATE_CTRL_UPDATE_ACTION_DIS BIT(20) +#define S2_CORE_UPDATE_CTRL_UPDATE_CNT_DIS BIT(19) +#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR(x) (((x) << 3) & GENMASK(18, 3)) +#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_M GENMASK(18, 3) +#define S2_CORE_UPDATE_CTRL_UPDATE_ADDR_X(x) (((x) & GENMASK(18, 3)) >> 3) +#define S2_CORE_UPDATE_CTRL_UPDATE_SHOT BIT(2) +#define S2_CORE_UPDATE_CTRL_CLEAR_CACHE BIT(1) +#define S2_CORE_UPDATE_CTRL_MV_TRAFFIC_IGN BIT(0) + +#define S2_CORE_MV_CFG_MV_NUM_POS(x) (((x) << 16) & GENMASK(31, 16)) +#define S2_CORE_MV_CFG_MV_NUM_POS_M GENMASK(31, 16) +#define S2_CORE_MV_CFG_MV_NUM_POS_X(x) (((x) & GENMASK(31, 16)) >> 16) +#define S2_CORE_MV_CFG_MV_SIZE(x) ((x) & GENMASK(15, 0)) +#define S2_CORE_MV_CFG_MV_SIZE_M GENMASK(15, 0) + +#define S2_CACHE_ENTRY_DAT_RSZ 0x4 + +#define S2_CACHE_MASK_DAT_RSZ 0x4 + +#define S2_CACHE_ACTION_DAT_RSZ 0x4 + +#define S2_CACHE_CNT_DAT_RSZ 0x4 + +#define S2_STICKY_VCAP_ROW_DELETED_STICKY BIT(0) + +#define S2_BIST_CTRL_TCAM_BIST BIT(1) +#define S2_BIST_CTRL_TCAM_INIT BIT(0) + +#define S2_BIST_CFG_TCAM_BIST_SOE_ENA BIT(8) +#define S2_BIST_CFG_TCAM_HCG_DIS BIT(7) +#define S2_BIST_CFG_TCAM_CG_DIS BIT(6) +#define S2_BIST_CFG_TCAM_BIAS(x) ((x) & GENMASK(5, 0)) +#define S2_BIST_CFG_TCAM_BIAS_M GENMASK(5, 0) + +#define S2_BIST_STAT_BIST_RT_ERR BIT(15) +#define S2_BIST_STAT_BIST_PENC_ERR BIT(14) +#define S2_BIST_STAT_BIST_COMP_ERR BIT(13) +#define S2_BIST_STAT_BIST_ADDR_ERR BIT(12) +#define S2_BIST_STAT_BIST_BL1E_ERR BIT(11) +#define S2_BIST_STAT_BIST_BL1_ERR BIT(10) +#define S2_BIST_STAT_BIST_BL0E_ERR BIT(9) +#define S2_BIST_STAT_BIST_BL0_ERR BIT(8) +#define S2_BIST_STAT_BIST_PH1_ERR BIT(7) +#define S2_BIST_STAT_BIST_PH0_ERR BIT(6) +#define S2_BIST_STAT_BIST_PV1_ERR BIT(5) +#define S2_BIST_STAT_BIST_PV0_ERR BIT(4) +#define S2_BIST_STAT_BIST_RUN BIT(3) +#define S2_BIST_STAT_BIST_ERR BIT(2) +#define S2_BIST_STAT_BIST_BUSY BIT(1) +#define S2_BIST_STAT_TCAM_RDY BIT(0) + +#endif /* _OCELOT_S2_CORE_H_ */ diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c new file mode 100644 index 000000000000..9e6464ffae5d --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_tc.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Microsemi Ocelot Switch TC driver + * + * Copyright (c) 2019 Microsemi Corporation + */ + +#include "ocelot_tc.h" +#include "ocelot_police.h" +#include "ocelot_ace.h" +#include <net/pkt_cls.h> + +static int ocelot_setup_tc_cls_matchall(struct ocelot_port *port, + struct tc_cls_matchall_offload *f, + bool ingress) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct ocelot_policer pol = { 0 }; + struct flow_action_entry *action; + int err; + + netdev_dbg(port->dev, "%s: port %u command %d cookie %lu\n", + __func__, port->chip_port, f->command, f->cookie); + + if (!ingress) { + NL_SET_ERR_MSG_MOD(extack, "Only ingress is supported"); + return -EOPNOTSUPP; + } + + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + if (!flow_offload_has_one_action(&f->rule->action)) { + NL_SET_ERR_MSG_MOD(extack, + "Only one action is supported"); + return -EOPNOTSUPP; + } + + if (port->tc.block_shared) { + NL_SET_ERR_MSG_MOD(extack, + "Rate limit is not supported on shared blocks"); + return -EOPNOTSUPP; + } + + action = &f->rule->action.entries[0]; + + if (action->id != FLOW_ACTION_POLICE) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported action"); + return -EOPNOTSUPP; + } + + if (port->tc.police_id && port->tc.police_id != f->cookie) { + NL_SET_ERR_MSG_MOD(extack, + "Only one policer per port is supported\n"); + return -EEXIST; + } + + pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8; + pol.burst = (u32)div_u64(action->police.rate_bytes_ps * + PSCHED_NS2TICKS(action->police.burst), + PSCHED_TICKS_PER_SEC); + + err = ocelot_port_policer_add(port, &pol); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Could not add policer\n"); + return err; + } + + port->tc.police_id = f->cookie; + port->tc.offload_cnt++; + return 0; + case TC_CLSMATCHALL_DESTROY: + if (port->tc.police_id != f->cookie) + return -ENOENT; + + err = ocelot_port_policer_del(port); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Could not delete policer\n"); + return err; + } + port->tc.police_id = 0; + port->tc.offload_cnt--; + return 0; + case TC_CLSMATCHALL_STATS: /* fall through */ + default: + return -EOPNOTSUPP; + } +} + +static int ocelot_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, + void *cb_priv, bool ingress) +{ + struct ocelot_port *port = cb_priv; + + if (!tc_cls_can_offload_and_chain0(port->dev, type_data)) + return -EOPNOTSUPP; + + switch (type) { + case TC_SETUP_CLSMATCHALL: + netdev_dbg(port->dev, "tc_block_cb: TC_SETUP_CLSMATCHALL %s\n", + ingress ? "ingress" : "egress"); + + return ocelot_setup_tc_cls_matchall(port, type_data, ingress); + case TC_SETUP_CLSFLOWER: + return 0; + default: + netdev_dbg(port->dev, "tc_block_cb: type %d %s\n", + type, + ingress ? "ingress" : "egress"); + + return -EOPNOTSUPP; + } +} + +static int ocelot_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, + void *cb_priv) +{ + return ocelot_setup_tc_block_cb(type, type_data, + cb_priv, true); +} + +static int ocelot_setup_tc_block_cb_eg(enum tc_setup_type type, + void *type_data, + void *cb_priv) +{ + return ocelot_setup_tc_block_cb(type, type_data, + cb_priv, false); +} + +static LIST_HEAD(ocelot_block_cb_list); + +static int ocelot_setup_tc_block(struct ocelot_port *port, + struct flow_block_offload *f) +{ + struct flow_block_cb *block_cb; + tc_setup_cb_t *cb; + int err; + + netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n", + f->command, f->binder_type); + + if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) { + cb = ocelot_setup_tc_block_cb_ig; + port->tc.block_shared = f->block_shared; + } else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) { + cb = ocelot_setup_tc_block_cb_eg; + } else { + return -EOPNOTSUPP; + } + + f->driver_block_list = &ocelot_block_cb_list; + + switch (f->command) { + case FLOW_BLOCK_BIND: + if (flow_block_cb_is_busy(cb, port, &ocelot_block_cb_list)) + return -EBUSY; + + block_cb = flow_block_cb_alloc(f->net, cb, port, port, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + err = ocelot_setup_tc_block_flower_bind(port, f); + if (err < 0) { + flow_block_cb_free(block_cb); + return err; + } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, f->driver_block_list); + return 0; + case FLOW_BLOCK_UNBIND: + block_cb = flow_block_cb_lookup(f, cb, port); + if (!block_cb) + return -ENOENT; + + ocelot_setup_tc_block_flower_unbind(port, f); + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); + return 0; + default: + return -EOPNOTSUPP; + } +} + +int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct ocelot_port *port = netdev_priv(dev); + + switch (type) { + case TC_SETUP_BLOCK: + return ocelot_setup_tc_block(port, type_data); + default: + return -EOPNOTSUPP; + } + return 0; +} diff --git a/drivers/net/ethernet/mscc/ocelot_tc.h b/drivers/net/ethernet/mscc/ocelot_tc.h new file mode 100644 index 000000000000..61757c2250a6 --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_tc.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* Microsemi Ocelot Switch driver + * + * Copyright (c) 2019 Microsemi Corporation + */ + +#ifndef _MSCC_OCELOT_TC_H_ +#define _MSCC_OCELOT_TC_H_ + +#include <linux/netdevice.h> + +struct ocelot_port_tc { + bool block_shared; + unsigned long offload_cnt; + + unsigned long police_id; +}; + +int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data); + +#endif /* _MSCC_OCELOT_TC_H_ */ diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.h b/drivers/net/ethernet/mscc/ocelot_vcap.h new file mode 100644 index 000000000000..e22eac1da783 --- /dev/null +++ b/drivers/net/ethernet/mscc/ocelot_vcap.h @@ -0,0 +1,403 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) + * Microsemi Ocelot Switch driver + * Copyright (c) 2019 Microsemi Corporation + */ + +#ifndef _OCELOT_VCAP_H_ +#define _OCELOT_VCAP_H_ + +/* ================================================================= + * VCAP Common + * ================================================================= + */ + +/* VCAP Type-Group values */ +#define VCAP_TG_NONE 0 /* Entry is invalid */ +#define VCAP_TG_FULL 1 /* Full entry */ +#define VCAP_TG_HALF 2 /* Half entry */ +#define VCAP_TG_QUARTER 3 /* Quarter entry */ + +/* ================================================================= + * VCAP IS2 + * ================================================================= + */ + +#define VCAP_IS2_CNT 64 +#define VCAP_IS2_ENTRY_WIDTH 376 +#define VCAP_IS2_ACTION_WIDTH 99 +#define VCAP_PORT_CNT 11 + +/* IS2 half key types */ +#define IS2_TYPE_ETYPE 0 +#define IS2_TYPE_LLC 1 +#define IS2_TYPE_SNAP 2 +#define IS2_TYPE_ARP 3 +#define IS2_TYPE_IP_UDP_TCP 4 +#define IS2_TYPE_IP_OTHER 5 +#define IS2_TYPE_IPV6 6 +#define IS2_TYPE_OAM 7 +#define IS2_TYPE_SMAC_SIP6 8 +#define IS2_TYPE_ANY 100 /* Pseudo type */ + +/* IS2 half key type mask for matching any IP */ +#define IS2_TYPE_MASK_IP_ANY 0xe + +/* IS2 action types */ +#define IS2_ACTION_TYPE_NORMAL 0 +#define IS2_ACTION_TYPE_SMAC_SIP 1 + +/* IS2 MASK_MODE values */ +#define IS2_ACT_MASK_MODE_NONE 0 +#define IS2_ACT_MASK_MODE_FILTER 1 +#define IS2_ACT_MASK_MODE_POLICY 2 +#define IS2_ACT_MASK_MODE_REDIR 3 + +/* IS2 REW_OP values */ +#define IS2_ACT_REW_OP_NONE 0 +#define IS2_ACT_REW_OP_PTP_ONE 2 +#define IS2_ACT_REW_OP_PTP_TWO 3 +#define IS2_ACT_REW_OP_SPECIAL 8 +#define IS2_ACT_REW_OP_PTP_ORG 9 +#define IS2_ACT_REW_OP_PTP_ONE_SUB_DELAY_1 (IS2_ACT_REW_OP_PTP_ONE | (1 << 3)) +#define IS2_ACT_REW_OP_PTP_ONE_SUB_DELAY_2 (IS2_ACT_REW_OP_PTP_ONE | (2 << 3)) +#define IS2_ACT_REW_OP_PTP_ONE_ADD_DELAY (IS2_ACT_REW_OP_PTP_ONE | (1 << 5)) +#define IS2_ACT_REW_OP_PTP_ONE_ADD_SUB BIT(7) + +#define VCAP_PORT_WIDTH 4 + +/* IS2 quarter key - SMAC_SIP4 */ +#define IS2_QKO_IGR_PORT 0 +#define IS2_QKL_IGR_PORT VCAP_PORT_WIDTH +#define IS2_QKO_L2_SMAC (IS2_QKO_IGR_PORT + IS2_QKL_IGR_PORT) +#define IS2_QKL_L2_SMAC 48 +#define IS2_QKO_L3_IP4_SIP (IS2_QKO_L2_SMAC + IS2_QKL_L2_SMAC) +#define IS2_QKL_L3_IP4_SIP 32 + +/* IS2 half key - common */ +#define IS2_HKO_TYPE 0 +#define IS2_HKL_TYPE 4 +#define IS2_HKO_FIRST (IS2_HKO_TYPE + IS2_HKL_TYPE) +#define IS2_HKL_FIRST 1 +#define IS2_HKO_PAG (IS2_HKO_FIRST + IS2_HKL_FIRST) +#define IS2_HKL_PAG 8 +#define IS2_HKO_IGR_PORT_MASK (IS2_HKO_PAG + IS2_HKL_PAG) +#define IS2_HKL_IGR_PORT_MASK (VCAP_PORT_CNT + 1) +#define IS2_HKO_SERVICE_FRM (IS2_HKO_IGR_PORT_MASK + IS2_HKL_IGR_PORT_MASK) +#define IS2_HKL_SERVICE_FRM 1 +#define IS2_HKO_HOST_MATCH (IS2_HKO_SERVICE_FRM + IS2_HKL_SERVICE_FRM) +#define IS2_HKL_HOST_MATCH 1 +#define IS2_HKO_L2_MC (IS2_HKO_HOST_MATCH + IS2_HKL_HOST_MATCH) +#define IS2_HKL_L2_MC 1 +#define IS2_HKO_L2_BC (IS2_HKO_L2_MC + IS2_HKL_L2_MC) +#define IS2_HKL_L2_BC 1 +#define IS2_HKO_VLAN_TAGGED (IS2_HKO_L2_BC + IS2_HKL_L2_BC) +#define IS2_HKL_VLAN_TAGGED 1 +#define IS2_HKO_VID (IS2_HKO_VLAN_TAGGED + IS2_HKL_VLAN_TAGGED) +#define IS2_HKL_VID 12 +#define IS2_HKO_DEI (IS2_HKO_VID + IS2_HKL_VID) +#define IS2_HKL_DEI 1 +#define IS2_HKO_PCP (IS2_HKO_DEI + IS2_HKL_DEI) +#define IS2_HKL_PCP 3 + +/* IS2 half key - MAC_ETYPE/MAC_LLC/MAC_SNAP/OAM common */ +#define IS2_HKO_L2_DMAC (IS2_HKO_PCP + IS2_HKL_PCP) +#define IS2_HKL_L2_DMAC 48 +#define IS2_HKO_L2_SMAC (IS2_HKO_L2_DMAC + IS2_HKL_L2_DMAC) +#define IS2_HKL_L2_SMAC 48 + +/* IS2 half key - MAC_ETYPE */ +#define IS2_HKO_MAC_ETYPE_ETYPE (IS2_HKO_L2_SMAC + IS2_HKL_L2_SMAC) +#define IS2_HKL_MAC_ETYPE_ETYPE 16 +#define IS2_HKO_MAC_ETYPE_L2_PAYLOAD \ + (IS2_HKO_MAC_ETYPE_ETYPE + IS2_HKL_MAC_ETYPE_ETYPE) +#define IS2_HKL_MAC_ETYPE_L2_PAYLOAD 27 + +/* IS2 half key - MAC_LLC */ +#define IS2_HKO_MAC_LLC_L2_LLC IS2_HKO_MAC_ETYPE_ETYPE +#define IS2_HKL_MAC_LLC_L2_LLC 40 + +/* IS2 half key - MAC_SNAP */ +#define IS2_HKO_MAC_SNAP_L2_SNAP IS2_HKO_MAC_ETYPE_ETYPE +#define IS2_HKL_MAC_SNAP_L2_SNAP 40 + +/* IS2 half key - ARP */ +#define IS2_HKO_MAC_ARP_L2_SMAC IS2_HKO_L2_DMAC +#define IS2_HKL_MAC_ARP_L2_SMAC 48 +#define IS2_HKO_MAC_ARP_ARP_ADDR_SPACE_OK \ + (IS2_HKO_MAC_ARP_L2_SMAC + IS2_HKL_MAC_ARP_L2_SMAC) +#define IS2_HKL_MAC_ARP_ARP_ADDR_SPACE_OK 1 +#define IS2_HKO_MAC_ARP_ARP_PROTO_SPACE_OK \ + (IS2_HKO_MAC_ARP_ARP_ADDR_SPACE_OK + IS2_HKL_MAC_ARP_ARP_ADDR_SPACE_OK) +#define IS2_HKL_MAC_ARP_ARP_PROTO_SPACE_OK 1 +#define IS2_HKO_MAC_ARP_ARP_LEN_OK \ + (IS2_HKO_MAC_ARP_ARP_PROTO_SPACE_OK + \ + IS2_HKL_MAC_ARP_ARP_PROTO_SPACE_OK) +#define IS2_HKL_MAC_ARP_ARP_LEN_OK 1 +#define IS2_HKO_MAC_ARP_ARP_TGT_MATCH \ + (IS2_HKO_MAC_ARP_ARP_LEN_OK + IS2_HKL_MAC_ARP_ARP_LEN_OK) +#define IS2_HKL_MAC_ARP_ARP_TGT_MATCH 1 +#define IS2_HKO_MAC_ARP_ARP_SENDER_MATCH \ + (IS2_HKO_MAC_ARP_ARP_TGT_MATCH + IS2_HKL_MAC_ARP_ARP_TGT_MATCH) +#define IS2_HKL_MAC_ARP_ARP_SENDER_MATCH 1 +#define IS2_HKO_MAC_ARP_ARP_OPCODE_UNKNOWN \ + (IS2_HKO_MAC_ARP_ARP_SENDER_MATCH + IS2_HKL_MAC_ARP_ARP_SENDER_MATCH) +#define IS2_HKL_MAC_ARP_ARP_OPCODE_UNKNOWN 1 +#define IS2_HKO_MAC_ARP_ARP_OPCODE \ + (IS2_HKO_MAC_ARP_ARP_OPCODE_UNKNOWN + \ + IS2_HKL_MAC_ARP_ARP_OPCODE_UNKNOWN) +#define IS2_HKL_MAC_ARP_ARP_OPCODE 2 +#define IS2_HKO_MAC_ARP_L3_IP4_DIP \ + (IS2_HKO_MAC_ARP_ARP_OPCODE + IS2_HKL_MAC_ARP_ARP_OPCODE) +#define IS2_HKL_MAC_ARP_L3_IP4_DIP 32 +#define IS2_HKO_MAC_ARP_L3_IP4_SIP \ + (IS2_HKO_MAC_ARP_L3_IP4_DIP + IS2_HKL_MAC_ARP_L3_IP4_DIP) +#define IS2_HKL_MAC_ARP_L3_IP4_SIP 32 +#define IS2_HKO_MAC_ARP_DIP_EQ_SIP \ + (IS2_HKO_MAC_ARP_L3_IP4_SIP + IS2_HKL_MAC_ARP_L3_IP4_SIP) +#define IS2_HKL_MAC_ARP_DIP_EQ_SIP 1 + +/* IS2 half key - IP4_TCP_UDP/IP4_OTHER common */ +#define IS2_HKO_IP4 IS2_HKO_L2_DMAC +#define IS2_HKL_IP4 1 +#define IS2_HKO_L3_FRAGMENT (IS2_HKO_IP4 + IS2_HKL_IP4) +#define IS2_HKL_L3_FRAGMENT 1 +#define IS2_HKO_L3_FRAG_OFS_GT0 (IS2_HKO_L3_FRAGMENT + IS2_HKL_L3_FRAGMENT) +#define IS2_HKL_L3_FRAG_OFS_GT0 1 +#define IS2_HKO_L3_OPTIONS (IS2_HKO_L3_FRAG_OFS_GT0 + IS2_HKL_L3_FRAG_OFS_GT0) +#define IS2_HKL_L3_OPTIONS 1 +#define IS2_HKO_L3_TTL_GT0 (IS2_HKO_L3_OPTIONS + IS2_HKL_L3_OPTIONS) +#define IS2_HKL_L3_TTL_GT0 1 +#define IS2_HKO_L3_TOS (IS2_HKO_L3_TTL_GT0 + IS2_HKL_L3_TTL_GT0) +#define IS2_HKL_L3_TOS 8 +#define IS2_HKO_L3_IP4_DIP (IS2_HKO_L3_TOS + IS2_HKL_L3_TOS) +#define IS2_HKL_L3_IP4_DIP 32 +#define IS2_HKO_L3_IP4_SIP (IS2_HKO_L3_IP4_DIP + IS2_HKL_L3_IP4_DIP) +#define IS2_HKL_L3_IP4_SIP 32 +#define IS2_HKO_DIP_EQ_SIP (IS2_HKO_L3_IP4_SIP + IS2_HKL_L3_IP4_SIP) +#define IS2_HKL_DIP_EQ_SIP 1 + +/* IS2 half key - IP4_TCP_UDP */ +#define IS2_HKO_IP4_TCP_UDP_TCP (IS2_HKO_DIP_EQ_SIP + IS2_HKL_DIP_EQ_SIP) +#define IS2_HKL_IP4_TCP_UDP_TCP 1 +#define IS2_HKO_IP4_TCP_UDP_L4_DPORT \ + (IS2_HKO_IP4_TCP_UDP_TCP + IS2_HKL_IP4_TCP_UDP_TCP) +#define IS2_HKL_IP4_TCP_UDP_L4_DPORT 16 +#define IS2_HKO_IP4_TCP_UDP_L4_SPORT \ + (IS2_HKO_IP4_TCP_UDP_L4_DPORT + IS2_HKL_IP4_TCP_UDP_L4_DPORT) +#define IS2_HKL_IP4_TCP_UDP_L4_SPORT 16 +#define IS2_HKO_IP4_TCP_UDP_L4_RNG \ + (IS2_HKO_IP4_TCP_UDP_L4_SPORT + IS2_HKL_IP4_TCP_UDP_L4_SPORT) +#define IS2_HKL_IP4_TCP_UDP_L4_RNG 8 +#define IS2_HKO_IP4_TCP_UDP_SPORT_EQ_DPORT \ + (IS2_HKO_IP4_TCP_UDP_L4_RNG + IS2_HKL_IP4_TCP_UDP_L4_RNG) +#define IS2_HKL_IP4_TCP_UDP_SPORT_EQ_DPORT 1 +#define IS2_HKO_IP4_TCP_UDP_SEQUENCE_EQ0 \ + (IS2_HKO_IP4_TCP_UDP_SPORT_EQ_DPORT + \ + IS2_HKL_IP4_TCP_UDP_SPORT_EQ_DPORT) +#define IS2_HKL_IP4_TCP_UDP_SEQUENCE_EQ0 1 +#define IS2_HKO_IP4_TCP_UDP_L4_FIN \ + (IS2_HKO_IP4_TCP_UDP_SEQUENCE_EQ0 + IS2_HKL_IP4_TCP_UDP_SEQUENCE_EQ0) +#define IS2_HKL_IP4_TCP_UDP_L4_FIN 1 +#define IS2_HKO_IP4_TCP_UDP_L4_SYN \ + (IS2_HKO_IP4_TCP_UDP_L4_FIN + IS2_HKL_IP4_TCP_UDP_L4_FIN) +#define IS2_HKL_IP4_TCP_UDP_L4_SYN 1 +#define IS2_HKO_IP4_TCP_UDP_L4_RST \ + (IS2_HKO_IP4_TCP_UDP_L4_SYN + IS2_HKL_IP4_TCP_UDP_L4_SYN) +#define IS2_HKL_IP4_TCP_UDP_L4_RST 1 +#define IS2_HKO_IP4_TCP_UDP_L4_PSH \ + (IS2_HKO_IP4_TCP_UDP_L4_RST + IS2_HKL_IP4_TCP_UDP_L4_RST) +#define IS2_HKL_IP4_TCP_UDP_L4_PSH 1 +#define IS2_HKO_IP4_TCP_UDP_L4_ACK \ + (IS2_HKO_IP4_TCP_UDP_L4_PSH + IS2_HKL_IP4_TCP_UDP_L4_PSH) +#define IS2_HKL_IP4_TCP_UDP_L4_ACK 1 +#define IS2_HKO_IP4_TCP_UDP_L4_URG \ + (IS2_HKO_IP4_TCP_UDP_L4_ACK + IS2_HKL_IP4_TCP_UDP_L4_ACK) +#define IS2_HKL_IP4_TCP_UDP_L4_URG 1 +#define IS2_HKO_IP4_TCP_UDP_L4_1588_DOM \ + (IS2_HKO_IP4_TCP_UDP_L4_URG + IS2_HKL_IP4_TCP_UDP_L4_URG) +#define IS2_HKL_IP4_TCP_UDP_L4_1588_DOM 8 +#define IS2_HKO_IP4_TCP_UDP_L4_1588_VER \ + (IS2_HKO_IP4_TCP_UDP_L4_1588_DOM + IS2_HKL_IP4_TCP_UDP_L4_1588_DOM) +#define IS2_HKL_IP4_TCP_UDP_L4_1588_VER 4 + +/* IS2 half key - IP4_OTHER */ +#define IS2_HKO_IP4_OTHER_L3_PROTO IS2_HKO_IP4_TCP_UDP_TCP +#define IS2_HKL_IP4_OTHER_L3_PROTO 8 +#define IS2_HKO_IP4_OTHER_L3_PAYLOAD \ + (IS2_HKO_IP4_OTHER_L3_PROTO + IS2_HKL_IP4_OTHER_L3_PROTO) +#define IS2_HKL_IP4_OTHER_L3_PAYLOAD 56 + +/* IS2 half key - IP6_STD */ +#define IS2_HKO_IP6_STD_L3_TTL_GT0 IS2_HKO_L2_DMAC +#define IS2_HKL_IP6_STD_L3_TTL_GT0 1 +#define IS2_HKO_IP6_STD_L3_IP6_SIP \ + (IS2_HKO_IP6_STD_L3_TTL_GT0 + IS2_HKL_IP6_STD_L3_TTL_GT0) +#define IS2_HKL_IP6_STD_L3_IP6_SIP 128 +#define IS2_HKO_IP6_STD_L3_PROTO \ + (IS2_HKO_IP6_STD_L3_IP6_SIP + IS2_HKL_IP6_STD_L3_IP6_SIP) +#define IS2_HKL_IP6_STD_L3_PROTO 8 + +/* IS2 half key - OAM */ +#define IS2_HKO_OAM_OAM_MEL_FLAGS IS2_HKO_MAC_ETYPE_ETYPE +#define IS2_HKL_OAM_OAM_MEL_FLAGS 7 +#define IS2_HKO_OAM_OAM_VER \ + (IS2_HKO_OAM_OAM_MEL_FLAGS + IS2_HKL_OAM_OAM_MEL_FLAGS) +#define IS2_HKL_OAM_OAM_VER 5 +#define IS2_HKO_OAM_OAM_OPCODE (IS2_HKO_OAM_OAM_VER + IS2_HKL_OAM_OAM_VER) +#define IS2_HKL_OAM_OAM_OPCODE 8 +#define IS2_HKO_OAM_OAM_FLAGS (IS2_HKO_OAM_OAM_OPCODE + IS2_HKL_OAM_OAM_OPCODE) +#define IS2_HKL_OAM_OAM_FLAGS 8 +#define IS2_HKO_OAM_OAM_MEPID (IS2_HKO_OAM_OAM_FLAGS + IS2_HKL_OAM_OAM_FLAGS) +#define IS2_HKL_OAM_OAM_MEPID 16 +#define IS2_HKO_OAM_OAM_CCM_CNTS_EQ0 \ + (IS2_HKO_OAM_OAM_MEPID + IS2_HKL_OAM_OAM_MEPID) +#define IS2_HKL_OAM_OAM_CCM_CNTS_EQ0 1 + +/* IS2 half key - SMAC_SIP6 */ +#define IS2_HKO_SMAC_SIP6_IGR_PORT IS2_HKL_TYPE +#define IS2_HKL_SMAC_SIP6_IGR_PORT VCAP_PORT_WIDTH +#define IS2_HKO_SMAC_SIP6_L2_SMAC \ + (IS2_HKO_SMAC_SIP6_IGR_PORT + IS2_HKL_SMAC_SIP6_IGR_PORT) +#define IS2_HKL_SMAC_SIP6_L2_SMAC 48 +#define IS2_HKO_SMAC_SIP6_L3_IP6_SIP \ + (IS2_HKO_SMAC_SIP6_L2_SMAC + IS2_HKL_SMAC_SIP6_L2_SMAC) +#define IS2_HKL_SMAC_SIP6_L3_IP6_SIP 128 + +/* IS2 full key - common */ +#define IS2_FKO_TYPE 0 +#define IS2_FKL_TYPE 2 +#define IS2_FKO_FIRST (IS2_FKO_TYPE + IS2_FKL_TYPE) +#define IS2_FKL_FIRST 1 +#define IS2_FKO_PAG (IS2_FKO_FIRST + IS2_FKL_FIRST) +#define IS2_FKL_PAG 8 +#define IS2_FKO_IGR_PORT_MASK (IS2_FKO_PAG + IS2_FKL_PAG) +#define IS2_FKL_IGR_PORT_MASK (VCAP_PORT_CNT + 1) +#define IS2_FKO_SERVICE_FRM (IS2_FKO_IGR_PORT_MASK + IS2_FKL_IGR_PORT_MASK) +#define IS2_FKL_SERVICE_FRM 1 +#define IS2_FKO_HOST_MATCH (IS2_FKO_SERVICE_FRM + IS2_FKL_SERVICE_FRM) +#define IS2_FKL_HOST_MATCH 1 +#define IS2_FKO_L2_MC (IS2_FKO_HOST_MATCH + IS2_FKL_HOST_MATCH) +#define IS2_FKL_L2_MC 1 +#define IS2_FKO_L2_BC (IS2_FKO_L2_MC + IS2_FKL_L2_MC) +#define IS2_FKL_L2_BC 1 +#define IS2_FKO_VLAN_TAGGED (IS2_FKO_L2_BC + IS2_FKL_L2_BC) +#define IS2_FKL_VLAN_TAGGED 1 +#define IS2_FKO_VID (IS2_FKO_VLAN_TAGGED + IS2_FKL_VLAN_TAGGED) +#define IS2_FKL_VID 12 +#define IS2_FKO_DEI (IS2_FKO_VID + IS2_FKL_VID) +#define IS2_FKL_DEI 1 +#define IS2_FKO_PCP (IS2_FKO_DEI + IS2_FKL_DEI) +#define IS2_FKL_PCP 3 + +/* IS2 full key - IP6_TCP_UDP/IP6_OTHER common */ +#define IS2_FKO_L3_TTL_GT0 (IS2_FKO_PCP + IS2_FKL_PCP) +#define IS2_FKL_L3_TTL_GT0 1 +#define IS2_FKO_L3_TOS (IS2_FKO_L3_TTL_GT0 + IS2_FKL_L3_TTL_GT0) +#define IS2_FKL_L3_TOS 8 +#define IS2_FKO_L3_IP6_DIP (IS2_FKO_L3_TOS + IS2_FKL_L3_TOS) +#define IS2_FKL_L3_IP6_DIP 128 +#define IS2_FKO_L3_IP6_SIP (IS2_FKO_L3_IP6_DIP + IS2_FKL_L3_IP6_DIP) +#define IS2_FKL_L3_IP6_SIP 128 +#define IS2_FKO_DIP_EQ_SIP (IS2_FKO_L3_IP6_SIP + IS2_FKL_L3_IP6_SIP) +#define IS2_FKL_DIP_EQ_SIP 1 + +/* IS2 full key - IP6_TCP_UDP */ +#define IS2_FKO_IP6_TCP_UDP_TCP (IS2_FKO_DIP_EQ_SIP + IS2_FKL_DIP_EQ_SIP) +#define IS2_FKL_IP6_TCP_UDP_TCP 1 +#define IS2_FKO_IP6_TCP_UDP_L4_DPORT \ + (IS2_FKO_IP6_TCP_UDP_TCP + IS2_FKL_IP6_TCP_UDP_TCP) +#define IS2_FKL_IP6_TCP_UDP_L4_DPORT 16 +#define IS2_FKO_IP6_TCP_UDP_L4_SPORT \ + (IS2_FKO_IP6_TCP_UDP_L4_DPORT + IS2_FKL_IP6_TCP_UDP_L4_DPORT) +#define IS2_FKL_IP6_TCP_UDP_L4_SPORT 16 +#define IS2_FKO_IP6_TCP_UDP_L4_RNG \ + (IS2_FKO_IP6_TCP_UDP_L4_SPORT + IS2_FKL_IP6_TCP_UDP_L4_SPORT) +#define IS2_FKL_IP6_TCP_UDP_L4_RNG 8 +#define IS2_FKO_IP6_TCP_UDP_SPORT_EQ_DPORT \ + (IS2_FKO_IP6_TCP_UDP_L4_RNG + IS2_FKL_IP6_TCP_UDP_L4_RNG) +#define IS2_FKL_IP6_TCP_UDP_SPORT_EQ_DPORT 1 +#define IS2_FKO_IP6_TCP_UDP_SEQUENCE_EQ0 \ + (IS2_FKO_IP6_TCP_UDP_SPORT_EQ_DPORT + \ + IS2_FKL_IP6_TCP_UDP_SPORT_EQ_DPORT) +#define IS2_FKL_IP6_TCP_UDP_SEQUENCE_EQ0 1 +#define IS2_FKO_IP6_TCP_UDP_L4_FIN \ + (IS2_FKO_IP6_TCP_UDP_SEQUENCE_EQ0 + IS2_FKL_IP6_TCP_UDP_SEQUENCE_EQ0) +#define IS2_FKL_IP6_TCP_UDP_L4_FIN 1 +#define IS2_FKO_IP6_TCP_UDP_L4_SYN \ + (IS2_FKO_IP6_TCP_UDP_L4_FIN + IS2_FKL_IP6_TCP_UDP_L4_FIN) +#define IS2_FKL_IP6_TCP_UDP_L4_SYN 1 +#define IS2_FKO_IP6_TCP_UDP_L4_RST \ + (IS2_FKO_IP6_TCP_UDP_L4_SYN + IS2_FKL_IP6_TCP_UDP_L4_SYN) +#define IS2_FKL_IP6_TCP_UDP_L4_RST 1 +#define IS2_FKO_IP6_TCP_UDP_L4_PSH \ + (IS2_FKO_IP6_TCP_UDP_L4_RST + IS2_FKL_IP6_TCP_UDP_L4_RST) +#define IS2_FKL_IP6_TCP_UDP_L4_PSH 1 +#define IS2_FKO_IP6_TCP_UDP_L4_ACK \ + (IS2_FKO_IP6_TCP_UDP_L4_PSH + IS2_FKL_IP6_TCP_UDP_L4_PSH) +#define IS2_FKL_IP6_TCP_UDP_L4_ACK 1 +#define IS2_FKO_IP6_TCP_UDP_L4_URG \ + (IS2_FKO_IP6_TCP_UDP_L4_ACK + IS2_FKL_IP6_TCP_UDP_L4_ACK) +#define IS2_FKL_IP6_TCP_UDP_L4_URG 1 +#define IS2_FKO_IP6_TCP_UDP_L4_1588_DOM \ + (IS2_FKO_IP6_TCP_UDP_L4_URG + IS2_FKL_IP6_TCP_UDP_L4_URG) +#define IS2_FKL_IP6_TCP_UDP_L4_1588_DOM 8 +#define IS2_FKO_IP6_TCP_UDP_L4_1588_VER \ + (IS2_FKO_IP6_TCP_UDP_L4_1588_DOM + IS2_FKL_IP6_TCP_UDP_L4_1588_DOM) +#define IS2_FKL_IP6_TCP_UDP_L4_1588_VER 4 + +/* IS2 full key - IP6_OTHER */ +#define IS2_FKO_IP6_OTHER_L3_PROTO IS2_FKO_IP6_TCP_UDP_TCP +#define IS2_FKL_IP6_OTHER_L3_PROTO 8 +#define IS2_FKO_IP6_OTHER_L3_PAYLOAD \ + (IS2_FKO_IP6_OTHER_L3_PROTO + IS2_FKL_IP6_OTHER_L3_PROTO) +#define IS2_FKL_IP6_OTHER_L3_PAYLOAD 56 + +/* IS2 full key - CUSTOM */ +#define IS2_FKO_CUSTOM_CUSTOM_TYPE IS2_FKO_L3_TTL_GT0 +#define IS2_FKL_CUSTOM_CUSTOM_TYPE 1 +#define IS2_FKO_CUSTOM_CUSTOM \ + (IS2_FKO_CUSTOM_CUSTOM_TYPE + IS2_FKL_CUSTOM_CUSTOM_TYPE) +#define IS2_FKL_CUSTOM_CUSTOM 320 + +/* IS2 action - BASE_TYPE */ +#define IS2_AO_HIT_ME_ONCE 0 +#define IS2_AL_HIT_ME_ONCE 1 +#define IS2_AO_CPU_COPY_ENA (IS2_AO_HIT_ME_ONCE + IS2_AL_HIT_ME_ONCE) +#define IS2_AL_CPU_COPY_ENA 1 +#define IS2_AO_CPU_QU_NUM (IS2_AO_CPU_COPY_ENA + IS2_AL_CPU_COPY_ENA) +#define IS2_AL_CPU_QU_NUM 3 +#define IS2_AO_MASK_MODE (IS2_AO_CPU_QU_NUM + IS2_AL_CPU_QU_NUM) +#define IS2_AL_MASK_MODE 2 +#define IS2_AO_MIRROR_ENA (IS2_AO_MASK_MODE + IS2_AL_MASK_MODE) +#define IS2_AL_MIRROR_ENA 1 +#define IS2_AO_LRN_DIS (IS2_AO_MIRROR_ENA + IS2_AL_MIRROR_ENA) +#define IS2_AL_LRN_DIS 1 +#define IS2_AO_POLICE_ENA (IS2_AO_LRN_DIS + IS2_AL_LRN_DIS) +#define IS2_AL_POLICE_ENA 1 +#define IS2_AO_POLICE_IDX (IS2_AO_POLICE_ENA + IS2_AL_POLICE_ENA) +#define IS2_AL_POLICE_IDX 9 +#define IS2_AO_POLICE_VCAP_ONLY (IS2_AO_POLICE_IDX + IS2_AL_POLICE_IDX) +#define IS2_AL_POLICE_VCAP_ONLY 1 +#define IS2_AO_PORT_MASK (IS2_AO_POLICE_VCAP_ONLY + IS2_AL_POLICE_VCAP_ONLY) +#define IS2_AL_PORT_MASK VCAP_PORT_CNT +#define IS2_AO_REW_OP (IS2_AO_PORT_MASK + IS2_AL_PORT_MASK) +#define IS2_AL_REW_OP 9 +#define IS2_AO_LM_CNT_DIS (IS2_AO_REW_OP + IS2_AL_REW_OP) +#define IS2_AL_LM_CNT_DIS 1 +#define IS2_AO_ISDX_ENA \ + (IS2_AO_LM_CNT_DIS + IS2_AL_LM_CNT_DIS + 1) /* Reserved bit */ +#define IS2_AL_ISDX_ENA 1 +#define IS2_AO_ACL_ID (IS2_AO_ISDX_ENA + IS2_AL_ISDX_ENA) +#define IS2_AL_ACL_ID 6 + +/* IS2 action - SMAC_SIP */ +#define IS2_AO_SMAC_SIP_CPU_COPY_ENA 0 +#define IS2_AL_SMAC_SIP_CPU_COPY_ENA 1 +#define IS2_AO_SMAC_SIP_CPU_QU_NUM 1 +#define IS2_AL_SMAC_SIP_CPU_QU_NUM 3 +#define IS2_AO_SMAC_SIP_FWD_KILL_ENA 4 +#define IS2_AL_SMAC_SIP_FWD_KILL_ENA 1 +#define IS2_AO_SMAC_SIP_HOST_MATCH 5 +#define IS2_AL_SMAC_SIP_HOST_MATCH 1 + +#endif /* _OCELOT_VCAP_H_ */ diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index 4ad5109059e0..bac5be4d4f43 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -20,6 +20,7 @@ config NFP tristate "Netronome(R) NFP4000/NFP6000 NIC driver" depends on PCI && PCI_MSI depends on VXLAN || VXLAN=n + depends on TLS && TLS_DEVICE || TLS_DEVICE=n select NET_DEVLINK ---help--- This driver supports the Netronome(R) NFP4000/NFP6000 based diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 87bf784f8e8f..2805641965f3 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -16,6 +16,7 @@ nfp-objs := \ nfpcore/nfp_rtsym.o \ nfpcore/nfp_target.o \ ccm.o \ + ccm_mbox.o \ nfp_asm.o \ nfp_app.o \ nfp_app_nic.o \ @@ -34,6 +35,11 @@ nfp-objs := \ nfp_shared_buf.o \ nic/main.o +ifeq ($(CONFIG_TLS_DEVICE),y) +nfp-objs += \ + crypto/tls.o +endif + ifeq ($(CONFIG_NFP_APP_FLOWER),y) nfp-objs += \ flower/action.o \ diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c index ff3913085665..23ebddfb9532 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/cls.c +++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c @@ -262,22 +262,12 @@ static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type, } } +static LIST_HEAD(nfp_abm_block_cb_list); + int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr, - struct tc_block_offload *f) + struct flow_block_offload *f) { - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, - nfp_abm_setup_tc_block_cb, - repr, repr, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, nfp_abm_setup_tc_block_cb, - repr); - return 0; - default: - return -EOPNOTSUPP; - } + return flow_block_cb_setup_simple(f, &nfp_abm_block_cb_list, + nfp_abm_setup_tc_block_cb, + repr, repr, true); } diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.h b/drivers/net/ethernet/netronome/nfp/abm/main.h index 49749c60885e..48746c9c6224 100644 --- a/drivers/net/ethernet/netronome/nfp/abm/main.h +++ b/drivers/net/ethernet/netronome/nfp/abm/main.h @@ -247,7 +247,7 @@ int nfp_abm_setup_tc_mq(struct net_device *netdev, struct nfp_abm_link *alink, int nfp_abm_setup_tc_gred(struct net_device *netdev, struct nfp_abm_link *alink, struct tc_gred_qopt_offload *opt); int nfp_abm_setup_cls_block(struct net_device *netdev, struct nfp_repr *repr, - struct tc_block_offload *opt); + struct flow_block_offload *opt); int nfp_abm_ctrl_read_params(struct nfp_abm_link *alink); int nfp_abm_ctrl_find_addrs(struct nfp_abm *abm); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index d4bf0e694541..4054b70d7719 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -623,6 +623,13 @@ static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) } static void +wrp_zext(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst) +{ + if (meta->flags & FLAG_INSN_DO_ZEXT) + wrp_immed(nfp_prog, reg_both(dst + 1), 0); +} + +static void wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, enum nfp_relo_type relo) { @@ -858,7 +865,8 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) } static int -data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) +data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, swreg offset, + u8 dst_gpr, int size) { unsigned int i; u16 shift, sz; @@ -881,14 +889,15 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); if (i < 2) - wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + wrp_zext(nfp_prog, meta, dst_gpr); return 0; } static int -data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, - swreg lreg, swreg rreg, int size, enum cmd_mode mode) +data_ld_host_order(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 dst_gpr, swreg lreg, swreg rreg, int size, + enum cmd_mode mode) { unsigned int i; u8 mask, sz; @@ -911,33 +920,34 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); if (i < 2) - wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); + wrp_zext(nfp_prog, meta, dst_gpr); return 0; } static int -data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, - u8 dst_gpr, u8 size) +data_ld_host_order_addr32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) { - return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset, - size, CMD_MODE_32b); + return data_ld_host_order(nfp_prog, meta, dst_gpr, reg_a(src_gpr), + offset, size, CMD_MODE_32b); } static int -data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, - u8 dst_gpr, u8 size) +data_ld_host_order_addr40(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u8 src_gpr, swreg offset, u8 dst_gpr, u8 size) { swreg rega, regb; addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b); - return data_ld_host_order(nfp_prog, dst_gpr, rega, regb, + return data_ld_host_order(nfp_prog, meta, dst_gpr, rega, regb, size, CMD_MODE_40b_BA); } static int -construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) +construct_data_ind_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u16 offset, u16 src, u8 size) { swreg tmp_reg; @@ -953,10 +963,12 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ - return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); + return data_ld(nfp_prog, meta, imm_b(nfp_prog), 0, size); } -static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +static int +construct_data_ld(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + u16 offset, u8 size) { swreg tmp_reg; @@ -967,7 +979,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) /* Load data */ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); - return data_ld(nfp_prog, tmp_reg, 0, size); + return data_ld(nfp_prog, meta, tmp_reg, 0, size); } static int @@ -1204,7 +1216,7 @@ mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, } if (clr_gpr && size < 8) - wrp_immed(nfp_prog, reg_both(gpr + 1), 0); + wrp_zext(nfp_prog, meta, gpr); while (size) { u32 slice_end; @@ -1305,9 +1317,10 @@ wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, enum alu_op alu_op) { const struct bpf_insn *insn = &meta->insn; + u8 dst = insn->dst_reg * 2; - wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); - wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + wrp_alu_imm(nfp_prog, dst, alu_op, insn->imm); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -1319,7 +1332,7 @@ wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -2396,12 +2409,14 @@ static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u8 dst = meta->insn.dst_reg * 2; emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); - wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } -static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +static int +__ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) { if (shift_amt) { /* Set signedness bit (MSB of result). */ @@ -2410,7 +2425,7 @@ static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, reg_b(dst), SHF_SC_R_SHF, shift_amt); } - wrp_immed(nfp_prog, reg_both(dst + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -2425,7 +2440,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) umin = meta->umin_src; umax = meta->umax_src; if (umin == umax) - return __ashr_imm(nfp_prog, dst, umin); + return __ashr_imm(nfp_prog, meta, dst, umin); src = insn->src_reg * 2; /* NOTE: the first insn will set both indirect shift amount (source A) @@ -2434,7 +2449,7 @@ static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst)); emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, reg_b(dst), SHF_SC_R_SHF); - wrp_immed(nfp_prog, reg_both(dst + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -2444,15 +2459,17 @@ static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) const struct bpf_insn *insn = &meta->insn; u8 dst = insn->dst_reg * 2; - return __ashr_imm(nfp_prog, dst, insn->imm); + return __ashr_imm(nfp_prog, meta, dst, insn->imm); } -static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +static int +__shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) { if (shift_amt) emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, reg_b(dst), SHF_SC_R_SHF, shift_amt); - wrp_immed(nfp_prog, reg_both(dst + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -2461,7 +2478,7 @@ static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) const struct bpf_insn *insn = &meta->insn; u8 dst = insn->dst_reg * 2; - return __shr_imm(nfp_prog, dst, insn->imm); + return __shr_imm(nfp_prog, meta, dst, insn->imm); } static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -2474,22 +2491,24 @@ static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) umin = meta->umin_src; umax = meta->umax_src; if (umin == umax) - return __shr_imm(nfp_prog, dst, umin); + return __shr_imm(nfp_prog, meta, dst, umin); src = insn->src_reg * 2; emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, reg_b(dst), SHF_SC_R_SHF); - wrp_immed(nfp_prog, reg_both(dst + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } -static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) +static int +__shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 dst, + u8 shift_amt) { if (shift_amt) emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, reg_b(dst), SHF_SC_L_SHF, shift_amt); - wrp_immed(nfp_prog, reg_both(dst + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -2498,7 +2517,7 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) const struct bpf_insn *insn = &meta->insn; u8 dst = insn->dst_reg * 2; - return __shl_imm(nfp_prog, dst, insn->imm); + return __shl_imm(nfp_prog, meta, dst, insn->imm); } static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) @@ -2511,11 +2530,11 @@ static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) umin = meta->umin_src; umax = meta->umax_src; if (umin == umax) - return __shl_imm(nfp_prog, dst, umin); + return __shl_imm(nfp_prog, meta, dst, umin); src = insn->src_reg * 2; shl_reg64_lt32_low(nfp_prog, dst, src); - wrp_immed(nfp_prog, reg_both(dst + 1), 0); + wrp_zext(nfp_prog, meta, dst); return 0; } @@ -2577,34 +2596,34 @@ static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ld(nfp_prog, meta->insn.imm, 1); + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 1); } static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ld(nfp_prog, meta->insn.imm, 2); + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 2); } static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ld(nfp_prog, meta->insn.imm, 4); + return construct_data_ld(nfp_prog, meta, meta->insn.imm, 4); } static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ind_ld(nfp_prog, meta->insn.imm, + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, meta->insn.src_reg * 2, 1); } static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ind_ld(nfp_prog, meta->insn.imm, + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, meta->insn.src_reg * 2, 2); } static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - return construct_data_ind_ld(nfp_prog, meta->insn.imm, + return construct_data_ind_ld(nfp_prog, meta, meta->insn.imm, meta->insn.src_reg * 2, 4); } @@ -2682,7 +2701,7 @@ mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); - return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2, + return data_ld_host_order_addr32(nfp_prog, meta, meta->insn.src_reg * 2, tmp_reg, meta->insn.dst_reg * 2, size); } @@ -2694,7 +2713,7 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); - return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2, + return data_ld_host_order_addr40(nfp_prog, meta, meta->insn.src_reg * 2, tmp_reg, meta->insn.dst_reg * 2, size); } @@ -2755,7 +2774,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); if (!len_mid) { - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); return 0; } @@ -2763,7 +2782,7 @@ mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, if (size <= REG_WIDTH) { wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); } else { swreg src_hi = reg_xfer(idx + 2); @@ -2794,10 +2813,10 @@ mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, if (size < REG_WIDTH) { wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); } else if (size == REG_WIDTH) { wrp_mov(nfp_prog, dst_lo, src_lo); - wrp_immed(nfp_prog, dst_hi, 0); + wrp_zext(nfp_prog, meta, dst_gpr); } else { swreg src_hi = reg_xfer(idx + 1); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 9c136da25221..1c9fb11470df 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -160,35 +160,19 @@ static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, return 0; } -static int nfp_bpf_setup_tc_block(struct net_device *netdev, - struct tc_block_offload *f) -{ - struct nfp_net *nn = netdev_priv(netdev); - - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, - nfp_bpf_setup_tc_block_cb, - nn, nn, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, - nfp_bpf_setup_tc_block_cb, - nn); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(nfp_bpf_block_cb_list); static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data) { + struct nfp_net *nn = netdev_priv(netdev); + switch (type) { case TC_SETUP_BLOCK: - return nfp_bpf_setup_tc_block(netdev, type_data); + return flow_block_cb_setup_simple(type_data, + &nfp_bpf_block_cb_list, + nfp_bpf_setup_tc_block_cb, + nn, nn, true); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index e54d1ac84df2..57d6ff51e980 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -238,6 +238,8 @@ struct nfp_bpf_reg_state { #define FLAG_INSN_SKIP_PREC_DEPENDENT BIT(4) /* Instruction is optimized by the verifier */ #define FLAG_INSN_SKIP_VERIFIER_OPT BIT(5) +/* Instruction needs to zero extend to high 32-bit */ +#define FLAG_INSN_DO_ZEXT BIT(6) #define FLAG_INSN_SKIP_MASK (FLAG_INSN_SKIP_NOOP | \ FLAG_INSN_SKIP_PREC_DEPENDENT | \ diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 36f56eb4cbe2..e92ee510fd52 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -744,6 +744,17 @@ continue_subprog: goto continue_subprog; } +static void nfp_bpf_insn_flag_zext(struct nfp_prog *nfp_prog, + struct bpf_insn_aux_data *aux) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (aux[meta->n].zext_dst) + meta->flags |= FLAG_INSN_DO_ZEXT; + } +} + int nfp_bpf_finalize(struct bpf_verifier_env *env) { struct bpf_subprog_info *info; @@ -784,6 +795,7 @@ int nfp_bpf_finalize(struct bpf_verifier_env *env) return -EOPNOTSUPP; } + nfp_bpf_insn_flag_zext(nfp_prog, env->insn_aux_data); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/ccm.c b/drivers/net/ethernet/netronome/nfp/ccm.c index 94476e41e261..71afd111bae3 100644 --- a/drivers/net/ethernet/netronome/nfp/ccm.c +++ b/drivers/net/ethernet/netronome/nfp/ccm.c @@ -7,9 +7,6 @@ #include "nfp_app.h" #include "nfp_net.h" -#define NFP_CCM_TYPE_REPLY_BIT 7 -#define __NFP_CCM_REPLY(req) (BIT(NFP_CCM_TYPE_REPLY_BIT) | (req)) - #define ccm_warn(app, msg...) nn_dp_warn(&(app)->ctrl->dp, msg) #define NFP_CCM_TAG_ALLOC_SPAN (U16_MAX / 4) diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h index ac963b128203..a460c75522be 100644 --- a/drivers/net/ethernet/netronome/nfp/ccm.h +++ b/drivers/net/ethernet/netronome/nfp/ccm.h @@ -9,6 +9,7 @@ #include <linux/wait.h> struct nfp_app; +struct nfp_net; /* Firmware ABI */ @@ -21,15 +22,27 @@ enum nfp_ccm_type { NFP_CCM_TYPE_BPF_MAP_GETNEXT = 6, NFP_CCM_TYPE_BPF_MAP_GETFIRST = 7, NFP_CCM_TYPE_BPF_BPF_EVENT = 8, + NFP_CCM_TYPE_CRYPTO_RESET = 9, + NFP_CCM_TYPE_CRYPTO_ADD = 10, + NFP_CCM_TYPE_CRYPTO_DEL = 11, + NFP_CCM_TYPE_CRYPTO_UPDATE = 12, __NFP_CCM_TYPE_MAX, }; #define NFP_CCM_ABI_VERSION 1 +#define NFP_CCM_TYPE_REPLY_BIT 7 +#define __NFP_CCM_REPLY(req) (BIT(NFP_CCM_TYPE_REPLY_BIT) | (req)) + struct nfp_ccm_hdr { - u8 type; - u8 ver; - __be16 tag; + union { + struct { + u8 type; + u8 ver; + __be16 tag; + }; + __be32 raw; + }; }; static inline u8 nfp_ccm_get_type(struct sk_buff *skb) @@ -41,15 +54,31 @@ static inline u8 nfp_ccm_get_type(struct sk_buff *skb) return hdr->type; } -static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb) +static inline __be16 __nfp_ccm_get_tag(struct sk_buff *skb) { struct nfp_ccm_hdr *hdr; hdr = (struct nfp_ccm_hdr *)skb->data; - return be16_to_cpu(hdr->tag); + return hdr->tag; +} + +static inline unsigned int nfp_ccm_get_tag(struct sk_buff *skb) +{ + return be16_to_cpu(__nfp_ccm_get_tag(skb)); } +#define NFP_NET_MBOX_TLV_TYPE GENMASK(31, 16) +#define NFP_NET_MBOX_TLV_LEN GENMASK(15, 0) + +enum nfp_ccm_mbox_tlv_type { + NFP_NET_MBOX_TLV_TYPE_UNKNOWN = 0, + NFP_NET_MBOX_TLV_TYPE_END = 1, + NFP_NET_MBOX_TLV_TYPE_MSG = 2, + NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP = 3, + NFP_NET_MBOX_TLV_TYPE_RESV = 4, +}; + /* Implementation */ /** @@ -71,7 +100,7 @@ struct nfp_ccm { u16 tag_alloc_last; struct sk_buff_head replies; - struct wait_queue_head wq; + wait_queue_head_t wq; }; int nfp_ccm_init(struct nfp_ccm *ccm, struct nfp_app *app); @@ -80,4 +109,23 @@ void nfp_ccm_rx(struct nfp_ccm *ccm, struct sk_buff *skb); struct sk_buff * nfp_ccm_communicate(struct nfp_ccm *ccm, struct sk_buff *skb, enum nfp_ccm_type type, unsigned int reply_size); + +int nfp_ccm_mbox_alloc(struct nfp_net *nn); +void nfp_ccm_mbox_free(struct nfp_net *nn); +int nfp_ccm_mbox_init(struct nfp_net *nn); +void nfp_ccm_mbox_clean(struct nfp_net *nn); +bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size); +struct sk_buff * +nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size, + unsigned int reply_size, gfp_t flags); +int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size, bool critical); +int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size); +int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, unsigned int max_reply_size); #endif diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c new file mode 100644 index 000000000000..f0783aa9e66e --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c @@ -0,0 +1,743 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/io.h> +#include <linux/skbuff.h> + +#include "ccm.h" +#include "nfp_net.h" + +/* CCM messages via the mailbox. CMSGs get wrapped into simple TLVs + * and copied into the mailbox. Multiple messages can be copied to + * form a batch. Threads come in with CMSG formed in an skb, then + * enqueue that skb onto the request queue. If threads skb is first + * in queue this thread will handle the mailbox operation. It copies + * up to 64 messages into the mailbox (making sure that both requests + * and replies will fit. After FW is done processing the batch it + * copies the data out and wakes waiting threads. + * If a thread is waiting it either gets its the message completed + * (response is copied into the same skb as the request, overwriting + * it), or becomes the first in queue. + * Completions and next-to-run are signaled via the control buffer + * to limit potential cache line bounces. + */ + +#define NFP_CCM_MBOX_BATCH_LIMIT 64 +#define NFP_CCM_TIMEOUT (NFP_NET_POLL_TIMEOUT * 1000) +#define NFP_CCM_MAX_QLEN 1024 + +enum nfp_net_mbox_cmsg_state { + NFP_NET_MBOX_CMSG_STATE_QUEUED, + NFP_NET_MBOX_CMSG_STATE_NEXT, + NFP_NET_MBOX_CMSG_STATE_BUSY, + NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND, + NFP_NET_MBOX_CMSG_STATE_DONE, +}; + +/** + * struct nfp_ccm_mbox_skb_cb - CCM mailbox specific info + * @state: processing state (/stage) of the message + * @err: error encountered during processing if any + * @max_len: max(request_len, reply_len) + * @exp_reply: expected reply length (0 means don't validate) + * @posted: the message was posted and nobody waits for the reply + */ +struct nfp_ccm_mbox_cmsg_cb { + enum nfp_net_mbox_cmsg_state state; + int err; + unsigned int max_len; + unsigned int exp_reply; + bool posted; +}; + +static u32 nfp_ccm_mbox_max_msg(struct nfp_net *nn) +{ + return round_down(nn->tlv_caps.mbox_len, 4) - + NFP_NET_CFG_MBOX_SIMPLE_VAL - /* common mbox command header */ + 4 * 2; /* Msg TLV plus End TLV headers */ +} + +static void +nfp_ccm_mbox_msg_init(struct sk_buff *skb, unsigned int exp_reply, int max_len) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + cb->state = NFP_NET_MBOX_CMSG_STATE_QUEUED; + cb->err = 0; + cb->max_len = max_len; + cb->exp_reply = exp_reply; + cb->posted = false; +} + +static int nfp_ccm_mbox_maxlen(const struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->max_len; +} + +static bool nfp_ccm_mbox_done(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->state == NFP_NET_MBOX_CMSG_STATE_DONE; +} + +static bool nfp_ccm_mbox_in_progress(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->state != NFP_NET_MBOX_CMSG_STATE_QUEUED && + cb->state != NFP_NET_MBOX_CMSG_STATE_NEXT; +} + +static void nfp_ccm_mbox_set_busy(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + cb->state = NFP_NET_MBOX_CMSG_STATE_BUSY; +} + +static bool nfp_ccm_mbox_is_posted(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->posted; +} + +static void nfp_ccm_mbox_mark_posted(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + cb->posted = true; +} + +static bool nfp_ccm_mbox_is_first(struct nfp_net *nn, struct sk_buff *skb) +{ + return skb_queue_is_first(&nn->mbox_cmsg.queue, skb); +} + +static bool nfp_ccm_mbox_should_run(struct nfp_net *nn, struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + return cb->state == NFP_NET_MBOX_CMSG_STATE_NEXT; +} + +static void nfp_ccm_mbox_mark_next_runner(struct nfp_net *nn) +{ + struct nfp_ccm_mbox_cmsg_cb *cb; + struct sk_buff *skb; + + skb = skb_peek(&nn->mbox_cmsg.queue); + if (!skb) + return; + + cb = (void *)skb->cb; + cb->state = NFP_NET_MBOX_CMSG_STATE_NEXT; + if (cb->posted) + queue_work(nn->mbox_cmsg.workq, &nn->mbox_cmsg.runq_work); +} + +static void +nfp_ccm_mbox_write_tlv(struct nfp_net *nn, u32 off, u32 type, u32 len) +{ + nn_writel(nn, off, + FIELD_PREP(NFP_NET_MBOX_TLV_TYPE, type) | + FIELD_PREP(NFP_NET_MBOX_TLV_LEN, len)); +} + +static void nfp_ccm_mbox_copy_in(struct nfp_net *nn, struct sk_buff *last) +{ + struct sk_buff *skb; + int reserve, i, cnt; + __be32 *data; + u32 off, len; + + off = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; + skb = __skb_peek(&nn->mbox_cmsg.queue); + while (true) { + nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_MSG, + skb->len); + off += 4; + + /* Write data word by word, skb->data should be aligned */ + data = (__be32 *)skb->data; + cnt = skb->len / 4; + for (i = 0 ; i < cnt; i++) { + nn_writel(nn, off, be32_to_cpu(data[i])); + off += 4; + } + if (skb->len & 3) { + __be32 tmp = 0; + + memcpy(&tmp, &data[i], skb->len & 3); + nn_writel(nn, off, be32_to_cpu(tmp)); + off += 4; + } + + /* Reserve space if reply is bigger */ + len = round_up(skb->len, 4); + reserve = nfp_ccm_mbox_maxlen(skb) - len; + if (reserve > 0) { + nfp_ccm_mbox_write_tlv(nn, off, + NFP_NET_MBOX_TLV_TYPE_RESV, + reserve); + off += 4 + reserve; + } + + if (skb == last) + break; + skb = skb_queue_next(&nn->mbox_cmsg.queue, skb); + } + + nfp_ccm_mbox_write_tlv(nn, off, NFP_NET_MBOX_TLV_TYPE_END, 0); +} + +static struct sk_buff * +nfp_ccm_mbox_find_req(struct nfp_net *nn, __be16 tag, struct sk_buff *last) +{ + struct sk_buff *skb; + + skb = __skb_peek(&nn->mbox_cmsg.queue); + while (true) { + if (__nfp_ccm_get_tag(skb) == tag) + return skb; + + if (skb == last) + return NULL; + skb = skb_queue_next(&nn->mbox_cmsg.queue, skb); + } +} + +static void nfp_ccm_mbox_copy_out(struct nfp_net *nn, struct sk_buff *last) +{ + struct nfp_ccm_mbox_cmsg_cb *cb; + u8 __iomem *data, *end; + struct sk_buff *skb; + + data = nn->dp.ctrl_bar + nn->tlv_caps.mbox_off + + NFP_NET_CFG_MBOX_SIMPLE_VAL; + end = data + nn->tlv_caps.mbox_len; + + while (true) { + unsigned int length, offset, type; + struct nfp_ccm_hdr hdr; + u32 tlv_hdr; + + tlv_hdr = readl(data); + type = FIELD_GET(NFP_NET_MBOX_TLV_TYPE, tlv_hdr); + length = FIELD_GET(NFP_NET_MBOX_TLV_LEN, tlv_hdr); + offset = data - nn->dp.ctrl_bar; + + /* Advance past the header */ + data += 4; + + if (data + length > end) { + nn_dp_warn(&nn->dp, "mailbox oversized TLV type:%d offset:%u len:%u\n", + type, offset, length); + break; + } + + if (type == NFP_NET_MBOX_TLV_TYPE_END) + break; + if (type == NFP_NET_MBOX_TLV_TYPE_RESV) + goto next_tlv; + if (type != NFP_NET_MBOX_TLV_TYPE_MSG && + type != NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) { + nn_dp_warn(&nn->dp, "mailbox unknown TLV type:%d offset:%u len:%u\n", + type, offset, length); + break; + } + + if (length < 4) { + nn_dp_warn(&nn->dp, "mailbox msg too short to contain header TLV type:%d offset:%u len:%u\n", + type, offset, length); + break; + } + + hdr.raw = cpu_to_be32(readl(data)); + + skb = nfp_ccm_mbox_find_req(nn, hdr.tag, last); + if (!skb) { + nn_dp_warn(&nn->dp, "mailbox request not found:%u\n", + be16_to_cpu(hdr.tag)); + break; + } + cb = (void *)skb->cb; + + if (type == NFP_NET_MBOX_TLV_TYPE_MSG_NOSUP) { + nn_dp_warn(&nn->dp, + "mailbox msg not supported type:%d\n", + nfp_ccm_get_type(skb)); + cb->err = -EIO; + goto next_tlv; + } + + if (hdr.type != __NFP_CCM_REPLY(nfp_ccm_get_type(skb))) { + nn_dp_warn(&nn->dp, "mailbox msg reply wrong type:%u expected:%lu\n", + hdr.type, + __NFP_CCM_REPLY(nfp_ccm_get_type(skb))); + cb->err = -EIO; + goto next_tlv; + } + if (cb->exp_reply && length != cb->exp_reply) { + nn_dp_warn(&nn->dp, "mailbox msg reply wrong size type:%u expected:%u have:%u\n", + hdr.type, length, cb->exp_reply); + cb->err = -EIO; + goto next_tlv; + } + if (length > cb->max_len) { + nn_dp_warn(&nn->dp, "mailbox msg oversized reply type:%u max:%u have:%u\n", + hdr.type, cb->max_len, length); + cb->err = -EIO; + goto next_tlv; + } + + if (!cb->posted) { + __be32 *skb_data; + int i, cnt; + + if (length <= skb->len) + __skb_trim(skb, length); + else + skb_put(skb, length - skb->len); + + /* We overcopy here slightly, but that's okay, + * the skb is large enough, and the garbage will + * be ignored (beyond skb->len). + */ + skb_data = (__be32 *)skb->data; + memcpy(skb_data, &hdr, 4); + + cnt = DIV_ROUND_UP(length, 4); + for (i = 1 ; i < cnt; i++) + skb_data[i] = cpu_to_be32(readl(data + i * 4)); + } + + cb->state = NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND; +next_tlv: + data += round_up(length, 4); + if (data + 4 > end) { + nn_dp_warn(&nn->dp, + "reached end of MBOX without END TLV\n"); + break; + } + } + + smp_wmb(); /* order the skb->data vs. cb->state */ + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + do { + skb = __skb_dequeue(&nn->mbox_cmsg.queue); + cb = (void *)skb->cb; + + if (cb->state != NFP_NET_MBOX_CMSG_STATE_REPLY_FOUND) { + cb->err = -ENOENT; + smp_wmb(); /* order the cb->err vs. cb->state */ + } + cb->state = NFP_NET_MBOX_CMSG_STATE_DONE; + + if (cb->posted) { + if (cb->err) + nn_dp_warn(&nn->dp, + "mailbox posted msg failed type:%u err:%d\n", + nfp_ccm_get_type(skb), cb->err); + dev_consume_skb_any(skb); + } + } while (skb != last); + + nfp_ccm_mbox_mark_next_runner(nn); + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +} + +static void +nfp_ccm_mbox_mark_all_err(struct nfp_net *nn, struct sk_buff *last, int err) +{ + struct nfp_ccm_mbox_cmsg_cb *cb; + struct sk_buff *skb; + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + do { + skb = __skb_dequeue(&nn->mbox_cmsg.queue); + cb = (void *)skb->cb; + + cb->err = err; + smp_wmb(); /* order the cb->err vs. cb->state */ + cb->state = NFP_NET_MBOX_CMSG_STATE_DONE; + } while (skb != last); + + nfp_ccm_mbox_mark_next_runner(nn); + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +} + +static void nfp_ccm_mbox_run_queue_unlock(struct nfp_net *nn) + __releases(&nn->mbox_cmsg.queue.lock) +{ + int space = nn->tlv_caps.mbox_len - NFP_NET_CFG_MBOX_SIMPLE_VAL; + struct sk_buff *skb, *last; + int cnt, err; + + space -= 4; /* for End TLV */ + + /* First skb must fit, because it's ours and we checked it fits */ + cnt = 1; + last = skb = __skb_peek(&nn->mbox_cmsg.queue); + space -= 4 + nfp_ccm_mbox_maxlen(skb); + + while (!skb_queue_is_last(&nn->mbox_cmsg.queue, last)) { + skb = skb_queue_next(&nn->mbox_cmsg.queue, last); + space -= 4 + nfp_ccm_mbox_maxlen(skb); + if (space < 0) + break; + last = skb; + nfp_ccm_mbox_set_busy(skb); + cnt++; + if (cnt == NFP_CCM_MBOX_BATCH_LIMIT) + break; + } + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + /* Now we own all skb's marked in progress, new requests may arrive + * at the end of the queue. + */ + + nn_ctrl_bar_lock(nn); + + nfp_ccm_mbox_copy_in(nn, last); + + err = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_TLV_CMSG); + if (!err) + nfp_ccm_mbox_copy_out(nn, last); + else + nfp_ccm_mbox_mark_all_err(nn, last, -EIO); + + nn_ctrl_bar_unlock(nn); + + wake_up_all(&nn->mbox_cmsg.wq); +} + +static int nfp_ccm_mbox_skb_return(struct sk_buff *skb) +{ + struct nfp_ccm_mbox_cmsg_cb *cb = (void *)skb->cb; + + if (cb->err) + dev_kfree_skb_any(skb); + return cb->err; +} + +/* If wait timed out but the command is already in progress we have + * to wait until it finishes. Runners has ownership of the skbs marked + * as busy. + */ +static int +nfp_ccm_mbox_unlink_unlock(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type) + __releases(&nn->mbox_cmsg.queue.lock) +{ + bool was_first; + + if (nfp_ccm_mbox_in_progress(skb)) { + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + wait_event(nn->mbox_cmsg.wq, nfp_ccm_mbox_done(skb)); + smp_rmb(); /* pairs with smp_wmb() after data is written */ + return nfp_ccm_mbox_skb_return(skb); + } + + was_first = nfp_ccm_mbox_should_run(nn, skb); + __skb_unlink(skb, &nn->mbox_cmsg.queue); + if (was_first) + nfp_ccm_mbox_mark_next_runner(nn); + + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + if (was_first) + wake_up_all(&nn->mbox_cmsg.wq); + + nn_dp_warn(&nn->dp, "time out waiting for mbox response to 0x%02x\n", + type); + return -ETIMEDOUT; +} + +static int +nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, unsigned int max_reply_size, + gfp_t flags) +{ + const unsigned int mbox_max = nfp_ccm_mbox_max_msg(nn); + unsigned int max_len; + ssize_t undersize; + int err; + + if (unlikely(!(nn->tlv_caps.mbox_cmsg_types & BIT(type)))) { + nn_dp_warn(&nn->dp, + "message type %d not supported by mailbox\n", type); + return -EINVAL; + } + + /* If the reply size is unknown assume it will take the entire + * mailbox, the callers should do their best for this to never + * happen. + */ + if (!max_reply_size) + max_reply_size = mbox_max; + max_reply_size = round_up(max_reply_size, 4); + + /* Make sure we can fit the entire reply into the skb, + * and that we don't have to slow down the mbox handler + * with allocations. + */ + undersize = max_reply_size - (skb_end_pointer(skb) - skb->data); + if (undersize > 0) { + err = pskb_expand_head(skb, 0, undersize, flags); + if (err) { + nn_dp_warn(&nn->dp, + "can't allocate reply buffer for mailbox\n"); + return err; + } + } + + /* Make sure that request and response both fit into the mailbox */ + max_len = max(max_reply_size, round_up(skb->len, 4)); + if (max_len > mbox_max) { + nn_dp_warn(&nn->dp, + "message too big for tha mailbox: %u/%u vs %u\n", + skb->len, max_reply_size, mbox_max); + return -EMSGSIZE; + } + + nfp_ccm_mbox_msg_init(skb, reply_size, max_len); + + return 0; +} + +static int +nfp_ccm_mbox_msg_enqueue(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, bool critical) +{ + struct nfp_ccm_hdr *hdr; + + assert_spin_locked(&nn->mbox_cmsg.queue.lock); + + if (!critical && nn->mbox_cmsg.queue.qlen >= NFP_CCM_MAX_QLEN) { + nn_dp_warn(&nn->dp, "mailbox request queue too long\n"); + return -EBUSY; + } + + hdr = (void *)skb->data; + hdr->ver = NFP_CCM_ABI_VERSION; + hdr->type = type; + hdr->tag = cpu_to_be16(nn->mbox_cmsg.tag++); + + __skb_queue_tail(&nn->mbox_cmsg.queue, skb); + + return 0; +} + +int __nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size, bool critical) +{ + int err; + + err = nfp_ccm_mbox_msg_prepare(nn, skb, type, reply_size, + max_reply_size, GFP_KERNEL); + if (err) + goto err_free_skb; + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, critical); + if (err) + goto err_unlock; + + /* First in queue takes the mailbox lock and processes the batch */ + if (!nfp_ccm_mbox_is_first(nn, skb)) { + bool to; + + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + to = !wait_event_timeout(nn->mbox_cmsg.wq, + nfp_ccm_mbox_done(skb) || + nfp_ccm_mbox_should_run(nn, skb), + msecs_to_jiffies(NFP_CCM_TIMEOUT)); + + /* fast path for those completed by another thread */ + if (nfp_ccm_mbox_done(skb)) { + smp_rmb(); /* pairs with wmb after data is written */ + return nfp_ccm_mbox_skb_return(skb); + } + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + if (!nfp_ccm_mbox_is_first(nn, skb)) { + WARN_ON(!to); + + err = nfp_ccm_mbox_unlink_unlock(nn, skb, type); + if (err) + goto err_free_skb; + return 0; + } + } + + /* run queue expects the lock held */ + nfp_ccm_mbox_run_queue_unlock(nn); + return nfp_ccm_mbox_skb_return(skb); + +err_unlock: + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +err_free_skb: + dev_kfree_skb_any(skb); + return err; +} + +int nfp_ccm_mbox_communicate(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, + unsigned int reply_size, + unsigned int max_reply_size) +{ + return __nfp_ccm_mbox_communicate(nn, skb, type, reply_size, + max_reply_size, false); +} + +static void nfp_ccm_mbox_post_runq_work(struct work_struct *work) +{ + struct sk_buff *skb; + struct nfp_net *nn; + + nn = container_of(work, struct nfp_net, mbox_cmsg.runq_work); + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + skb = __skb_peek(&nn->mbox_cmsg.queue); + if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb) || + !nfp_ccm_mbox_should_run(nn, skb))) { + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + return; + } + + nfp_ccm_mbox_run_queue_unlock(nn); +} + +static void nfp_ccm_mbox_post_wait_work(struct work_struct *work) +{ + struct sk_buff *skb; + struct nfp_net *nn; + int err; + + nn = container_of(work, struct nfp_net, mbox_cmsg.wait_work); + + skb = skb_peek(&nn->mbox_cmsg.queue); + if (WARN_ON(!skb || !nfp_ccm_mbox_is_posted(skb))) + /* Should never happen so it's unclear what to do here.. */ + goto exit_unlock_wake; + + err = nfp_net_mbox_reconfig_wait_posted(nn); + if (!err) + nfp_ccm_mbox_copy_out(nn, skb); + else + nfp_ccm_mbox_mark_all_err(nn, skb, -EIO); +exit_unlock_wake: + nn_ctrl_bar_unlock(nn); + wake_up_all(&nn->mbox_cmsg.wq); +} + +int nfp_ccm_mbox_post(struct nfp_net *nn, struct sk_buff *skb, + enum nfp_ccm_type type, unsigned int max_reply_size) +{ + int err; + + err = nfp_ccm_mbox_msg_prepare(nn, skb, type, 0, max_reply_size, + GFP_ATOMIC); + if (err) + goto err_free_skb; + + nfp_ccm_mbox_mark_posted(skb); + + spin_lock_bh(&nn->mbox_cmsg.queue.lock); + + err = nfp_ccm_mbox_msg_enqueue(nn, skb, type, false); + if (err) + goto err_unlock; + + if (nfp_ccm_mbox_is_first(nn, skb)) { + if (nn_ctrl_bar_trylock(nn)) { + nfp_ccm_mbox_copy_in(nn, skb); + nfp_net_mbox_reconfig_post(nn, + NFP_NET_CFG_MBOX_CMD_TLV_CMSG); + queue_work(nn->mbox_cmsg.workq, + &nn->mbox_cmsg.wait_work); + } else { + nfp_ccm_mbox_mark_next_runner(nn); + } + } + + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); + + return 0; + +err_unlock: + spin_unlock_bh(&nn->mbox_cmsg.queue.lock); +err_free_skb: + dev_kfree_skb_any(skb); + return err; +} + +struct sk_buff * +nfp_ccm_mbox_msg_alloc(struct nfp_net *nn, unsigned int req_size, + unsigned int reply_size, gfp_t flags) +{ + unsigned int max_size; + struct sk_buff *skb; + + if (!reply_size) + max_size = nfp_ccm_mbox_max_msg(nn); + else + max_size = max(req_size, reply_size); + max_size = round_up(max_size, 4); + + skb = alloc_skb(max_size, flags); + if (!skb) + return NULL; + + skb_put(skb, req_size); + + return skb; +} + +bool nfp_ccm_mbox_fits(struct nfp_net *nn, unsigned int size) +{ + return nfp_ccm_mbox_max_msg(nn) >= size; +} + +int nfp_ccm_mbox_init(struct nfp_net *nn) +{ + return 0; +} + +void nfp_ccm_mbox_clean(struct nfp_net *nn) +{ + drain_workqueue(nn->mbox_cmsg.workq); +} + +int nfp_ccm_mbox_alloc(struct nfp_net *nn) +{ + skb_queue_head_init(&nn->mbox_cmsg.queue); + init_waitqueue_head(&nn->mbox_cmsg.wq); + INIT_WORK(&nn->mbox_cmsg.wait_work, nfp_ccm_mbox_post_wait_work); + INIT_WORK(&nn->mbox_cmsg.runq_work, nfp_ccm_mbox_post_runq_work); + + nn->mbox_cmsg.workq = alloc_workqueue("nfp-ccm-mbox", WQ_UNBOUND, 0); + if (!nn->mbox_cmsg.workq) + return -ENOMEM; + return 0; +} + +void nfp_ccm_mbox_free(struct nfp_net *nn) +{ + destroy_workqueue(nn->mbox_cmsg.workq); + WARN_ON(!skb_queue_empty(&nn->mbox_cmsg.queue)); +} diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h new file mode 100644 index 000000000000..60372ddf69f0 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef NFP_CRYPTO_H +#define NFP_CRYPTO_H 1 + +struct nfp_net_tls_offload_ctx { + __be32 fw_handle[2]; + + u8 rx_end[0]; + /* Tx only fields follow - Rx side does not have enough driver state + * to fit these + */ + + u32 next_seq; +}; + +#ifdef CONFIG_TLS_DEVICE +int nfp_net_tls_init(struct nfp_net *nn); +#else +static inline int nfp_net_tls_init(struct nfp_net *nn) +{ + return 0; +} +#endif + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h new file mode 100644 index 000000000000..67413d946c4a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#ifndef NFP_CRYPTO_FW_H +#define NFP_CRYPTO_FW_H 1 + +#include "../ccm.h" + +#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC 0 +#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC 1 + +struct nfp_crypto_reply_simple { + struct nfp_ccm_hdr hdr; + __be32 error; +}; + +struct nfp_crypto_req_reset { + struct nfp_ccm_hdr hdr; + __be32 ep_id; +}; + +#define NFP_NET_TLS_IPVER GENMASK(15, 12) +#define NFP_NET_TLS_VLAN GENMASK(11, 0) +#define NFP_NET_TLS_VLAN_UNUSED 4095 + +struct nfp_crypto_req_add_front { + struct nfp_ccm_hdr hdr; + __be32 ep_id; + u8 resv[3]; + u8 opcode; + u8 key_len; + __be16 ipver_vlan __packed; + u8 l4_proto; +#define NFP_NET_TLS_NON_ADDR_KEY_LEN 8 + u8 l3_addrs[0]; +}; + +struct nfp_crypto_req_add_back { + __be16 src_port; + __be16 dst_port; + __be32 key[8]; + __be32 salt; + __be32 iv[2]; + __be32 counter; + __be32 rec_no[2]; + __be32 tcp_seq; +}; + +struct nfp_crypto_req_add_v4 { + struct nfp_crypto_req_add_front front; + __be32 src_ip; + __be32 dst_ip; + struct nfp_crypto_req_add_back back; +}; + +struct nfp_crypto_req_add_v6 { + struct nfp_crypto_req_add_front front; + __be32 src_ip[4]; + __be32 dst_ip[4]; + struct nfp_crypto_req_add_back back; +}; + +struct nfp_crypto_reply_add { + struct nfp_ccm_hdr hdr; + __be32 error; + __be32 handle[2]; +}; + +struct nfp_crypto_req_del { + struct nfp_ccm_hdr hdr; + __be32 ep_id; + __be32 handle[2]; +}; + +struct nfp_crypto_req_update { + struct nfp_ccm_hdr hdr; + __be32 ep_id; + u8 resv[3]; + u8 opcode; + __be32 handle[2]; + __be32 rec_no[2]; + __be32 tcp_seq; +}; +#endif diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c new file mode 100644 index 000000000000..96a96b35c0ca --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Netronome Systems, Inc. */ + +#include <linux/bitfield.h> +#include <linux/ipv6.h> +#include <linux/skbuff.h> +#include <linux/string.h> +#include <net/tls.h> + +#include "../ccm.h" +#include "../nfp_net.h" +#include "crypto.h" +#include "fw.h" + +#define NFP_NET_TLS_CCM_MBOX_OPS_MASK \ + (BIT(NFP_CCM_TYPE_CRYPTO_RESET) | \ + BIT(NFP_CCM_TYPE_CRYPTO_ADD) | \ + BIT(NFP_CCM_TYPE_CRYPTO_DEL) | \ + BIT(NFP_CCM_TYPE_CRYPTO_UPDATE)) + +#define NFP_NET_TLS_OPCODE_MASK_RX \ + BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC) + +#define NFP_NET_TLS_OPCODE_MASK_TX \ + BIT(NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC) + +#define NFP_NET_TLS_OPCODE_MASK \ + (NFP_NET_TLS_OPCODE_MASK_RX | NFP_NET_TLS_OPCODE_MASK_TX) + +static void nfp_net_crypto_set_op(struct nfp_net *nn, u8 opcode, bool on) +{ + u32 off, val; + + off = nn->tlv_caps.crypto_enable_off + round_down(opcode / 8, 4); + + val = nn_readl(nn, off); + if (on) + val |= BIT(opcode & 31); + else + val &= ~BIT(opcode & 31); + nn_writel(nn, off, val); +} + +static bool +__nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add, + enum tls_offload_ctx_dir direction) +{ + u8 opcode; + int cnt; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC; + nn->ktls_tx_conn_cnt += add; + cnt = nn->ktls_tx_conn_cnt; + nn->dp.ktls_tx = !!nn->ktls_tx_conn_cnt; + } else { + opcode = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC; + nn->ktls_rx_conn_cnt += add; + cnt = nn->ktls_rx_conn_cnt; + } + + /* Care only about 0 -> 1 and 1 -> 0 transitions */ + if (cnt > 1) + return false; + + nfp_net_crypto_set_op(nn, opcode, cnt); + return true; +} + +static int +nfp_net_tls_conn_cnt_changed(struct nfp_net *nn, int add, + enum tls_offload_ctx_dir direction) +{ + int ret = 0; + + /* Use the BAR lock to protect the connection counts */ + nn_ctrl_bar_lock(nn); + if (__nfp_net_tls_conn_cnt_changed(nn, add, direction)) { + ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO); + /* Undo the cnt adjustment if failed */ + if (ret) + __nfp_net_tls_conn_cnt_changed(nn, -add, direction); + } + nn_ctrl_bar_unlock(nn); + + return ret; +} + +static int +nfp_net_tls_conn_add(struct nfp_net *nn, enum tls_offload_ctx_dir direction) +{ + return nfp_net_tls_conn_cnt_changed(nn, 1, direction); +} + +static int +nfp_net_tls_conn_remove(struct nfp_net *nn, enum tls_offload_ctx_dir direction) +{ + return nfp_net_tls_conn_cnt_changed(nn, -1, direction); +} + +static struct sk_buff * +nfp_net_tls_alloc_simple(struct nfp_net *nn, size_t req_sz, gfp_t flags) +{ + return nfp_ccm_mbox_msg_alloc(nn, req_sz, + sizeof(struct nfp_crypto_reply_simple), + flags); +} + +static int +nfp_net_tls_communicate_simple(struct nfp_net *nn, struct sk_buff *skb, + const char *name, enum nfp_ccm_type type) +{ + struct nfp_crypto_reply_simple *reply; + int err; + + err = __nfp_ccm_mbox_communicate(nn, skb, type, + sizeof(*reply), sizeof(*reply), + type == NFP_CCM_TYPE_CRYPTO_DEL); + if (err) { + nn_dp_warn(&nn->dp, "failed to %s TLS: %d\n", name, err); + return err; + } + + reply = (void *)skb->data; + err = -be32_to_cpu(reply->error); + if (err) + nn_dp_warn(&nn->dp, "failed to %s TLS, fw replied: %d\n", + name, err); + dev_consume_skb_any(skb); + + return err; +} + +static void nfp_net_tls_del_fw(struct nfp_net *nn, __be32 *fw_handle) +{ + struct nfp_crypto_req_del *req; + struct sk_buff *skb; + + skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL); + if (!skb) + return; + + req = (void *)skb->data; + req->ep_id = 0; + memcpy(req->handle, fw_handle, sizeof(req->handle)); + + nfp_net_tls_communicate_simple(nn, skb, "delete", + NFP_CCM_TYPE_CRYPTO_DEL); +} + +static void +nfp_net_tls_set_ipver_vlan(struct nfp_crypto_req_add_front *front, u8 ipver) +{ + front->ipver_vlan = cpu_to_be16(FIELD_PREP(NFP_NET_TLS_IPVER, ipver) | + FIELD_PREP(NFP_NET_TLS_VLAN, + NFP_NET_TLS_VLAN_UNUSED)); +} + +static void +nfp_net_tls_assign_conn_id(struct nfp_net *nn, + struct nfp_crypto_req_add_front *front) +{ + u32 len; + u64 id; + + id = atomic64_inc_return(&nn->ktls_conn_id_gen); + len = front->key_len - NFP_NET_TLS_NON_ADDR_KEY_LEN; + + memcpy(front->l3_addrs, &id, sizeof(id)); + memset(front->l3_addrs + sizeof(id), 0, len - sizeof(id)); +} + +static struct nfp_crypto_req_add_back * +nfp_net_tls_set_ipv4(struct nfp_net *nn, struct nfp_crypto_req_add_v4 *req, + struct sock *sk, int direction) +{ + struct inet_sock *inet = inet_sk(sk); + + req->front.key_len += sizeof(__be32) * 2; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + nfp_net_tls_assign_conn_id(nn, &req->front); + } else { + req->src_ip = inet->inet_daddr; + req->dst_ip = inet->inet_saddr; + } + + return &req->back; +} + +static struct nfp_crypto_req_add_back * +nfp_net_tls_set_ipv6(struct nfp_net *nn, struct nfp_crypto_req_add_v6 *req, + struct sock *sk, int direction) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6_pinfo *np = inet6_sk(sk); + + req->front.key_len += sizeof(struct in6_addr) * 2; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + nfp_net_tls_assign_conn_id(nn, &req->front); + } else { + memcpy(req->src_ip, &sk->sk_v6_daddr, sizeof(req->src_ip)); + memcpy(req->dst_ip, &np->saddr, sizeof(req->dst_ip)); + } + +#endif + return &req->back; +} + +static void +nfp_net_tls_set_l4(struct nfp_crypto_req_add_front *front, + struct nfp_crypto_req_add_back *back, struct sock *sk, + int direction) +{ + struct inet_sock *inet = inet_sk(sk); + + front->l4_proto = IPPROTO_TCP; + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + back->src_port = 0; + back->dst_port = 0; + } else { + back->src_port = inet->inet_dport; + back->dst_port = inet->inet_sport; + } +} + +static u8 nfp_tls_1_2_dir_to_opcode(enum tls_offload_ctx_dir direction) +{ + switch (direction) { + case TLS_OFFLOAD_CTX_DIR_TX: + return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC; + case TLS_OFFLOAD_CTX_DIR_RX: + return NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC; + default: + WARN_ON_ONCE(1); + return 0; + } +} + +static bool +nfp_net_cipher_supported(struct nfp_net *nn, u16 cipher_type, + enum tls_offload_ctx_dir direction) +{ + u8 bit; + + switch (cipher_type) { + case TLS_CIPHER_AES_GCM_128: + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC; + else + bit = NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC; + break; + default: + return false; + } + + return nn->tlv_caps.crypto_ops & BIT(bit); +} + +static int +nfp_net_tls_add(struct net_device *netdev, struct sock *sk, + enum tls_offload_ctx_dir direction, + struct tls_crypto_info *crypto_info, + u32 start_offload_tcp_sn) +{ + struct tls12_crypto_info_aes_gcm_128 *tls_ci; + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_crypto_req_add_front *front; + struct nfp_net_tls_offload_ctx *ntls; + struct nfp_crypto_req_add_back *back; + struct nfp_crypto_reply_add *reply; + struct sk_buff *skb; + size_t req_sz; + void *req; + bool ipv6; + int err; + + BUILD_BUG_ON(sizeof(struct nfp_net_tls_offload_ctx) > + TLS_DRIVER_STATE_SIZE_TX); + BUILD_BUG_ON(offsetof(struct nfp_net_tls_offload_ctx, rx_end) > + TLS_DRIVER_STATE_SIZE_RX); + + if (!nfp_net_cipher_supported(nn, crypto_info->cipher_type, direction)) + return -EOPNOTSUPP; + + switch (sk->sk_family) { +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + if (sk->sk_ipv6only || + ipv6_addr_type(&sk->sk_v6_daddr) != IPV6_ADDR_MAPPED) { + req_sz = sizeof(struct nfp_crypto_req_add_v6); + ipv6 = true; + break; + } +#endif + /* fall through */ + case AF_INET: + req_sz = sizeof(struct nfp_crypto_req_add_v4); + ipv6 = false; + break; + default: + return -EOPNOTSUPP; + } + + err = nfp_net_tls_conn_add(nn, direction); + if (err) + return err; + + skb = nfp_ccm_mbox_msg_alloc(nn, req_sz, sizeof(*reply), GFP_KERNEL); + if (!skb) { + err = -ENOMEM; + goto err_conn_remove; + } + + front = (void *)skb->data; + front->ep_id = 0; + front->key_len = NFP_NET_TLS_NON_ADDR_KEY_LEN; + front->opcode = nfp_tls_1_2_dir_to_opcode(direction); + memset(front->resv, 0, sizeof(front->resv)); + + nfp_net_tls_set_ipver_vlan(front, ipv6 ? 6 : 4); + + req = (void *)skb->data; + if (ipv6) + back = nfp_net_tls_set_ipv6(nn, req, sk, direction); + else + back = nfp_net_tls_set_ipv4(nn, req, sk, direction); + + nfp_net_tls_set_l4(front, back, sk, direction); + + back->counter = 0; + back->tcp_seq = cpu_to_be32(start_offload_tcp_sn); + + tls_ci = (struct tls12_crypto_info_aes_gcm_128 *)crypto_info; + memcpy(back->key, tls_ci->key, TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memset(&back->key[TLS_CIPHER_AES_GCM_128_KEY_SIZE / 4], 0, + sizeof(back->key) - TLS_CIPHER_AES_GCM_128_KEY_SIZE); + memcpy(back->iv, tls_ci->iv, TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(&back->salt, tls_ci->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(back->rec_no, tls_ci->rec_seq, sizeof(tls_ci->rec_seq)); + + /* Get an extra ref on the skb so we can wipe the key after */ + skb_get(skb); + + err = nfp_ccm_mbox_communicate(nn, skb, NFP_CCM_TYPE_CRYPTO_ADD, + sizeof(*reply), sizeof(*reply)); + reply = (void *)skb->data; + + /* We depend on CCM MBOX code not reallocating skb we sent + * so we can clear the key material out of the memory. + */ + if (!WARN_ON_ONCE((u8 *)back < skb->head || + (u8 *)back > skb_end_pointer(skb)) && + !WARN_ON_ONCE((u8 *)&reply[1] > (u8 *)back)) + memzero_explicit(back, sizeof(*back)); + dev_consume_skb_any(skb); /* the extra ref from skb_get() above */ + + if (err) { + nn_dp_warn(&nn->dp, "failed to add TLS: %d (%d)\n", + err, direction == TLS_OFFLOAD_CTX_DIR_TX); + /* communicate frees skb on error */ + goto err_conn_remove; + } + + err = -be32_to_cpu(reply->error); + if (err) { + if (err == -ENOSPC) { + if (!atomic_fetch_inc(&nn->ktls_no_space)) + nn_info(nn, "HW TLS table full\n"); + } else { + nn_dp_warn(&nn->dp, + "failed to add TLS, FW replied: %d\n", err); + } + goto err_free_skb; + } + + if (!reply->handle[0] && !reply->handle[1]) { + nn_dp_warn(&nn->dp, "FW returned NULL handle\n"); + err = -EINVAL; + goto err_fw_remove; + } + + ntls = tls_driver_ctx(sk, direction); + memcpy(ntls->fw_handle, reply->handle, sizeof(ntls->fw_handle)); + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + ntls->next_seq = start_offload_tcp_sn; + dev_consume_skb_any(skb); + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) + return 0; + + tls_offload_rx_resync_set_type(sk, + TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT); + return 0; + +err_fw_remove: + nfp_net_tls_del_fw(nn, reply->handle); +err_free_skb: + dev_consume_skb_any(skb); +err_conn_remove: + nfp_net_tls_conn_remove(nn, direction); + return err; +} + +static void +nfp_net_tls_del(struct net_device *netdev, struct tls_context *tls_ctx, + enum tls_offload_ctx_dir direction) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + + nfp_net_tls_conn_remove(nn, direction); + + ntls = __tls_driver_ctx(tls_ctx, direction); + nfp_net_tls_del_fw(nn, ntls->fw_handle); +} + +static int +nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq, + u8 *rcd_sn, enum tls_offload_ctx_dir direction) +{ + struct nfp_net *nn = netdev_priv(netdev); + struct nfp_net_tls_offload_ctx *ntls; + struct nfp_crypto_req_update *req; + struct sk_buff *skb; + gfp_t flags; + int err; + + flags = direction == TLS_OFFLOAD_CTX_DIR_TX ? GFP_KERNEL : GFP_ATOMIC; + skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), flags); + if (!skb) + return -ENOMEM; + + ntls = tls_driver_ctx(sk, direction); + req = (void *)skb->data; + req->ep_id = 0; + req->opcode = nfp_tls_1_2_dir_to_opcode(direction); + memset(req->resv, 0, sizeof(req->resv)); + memcpy(req->handle, ntls->fw_handle, sizeof(ntls->fw_handle)); + req->tcp_seq = cpu_to_be32(seq); + memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no)); + + if (direction == TLS_OFFLOAD_CTX_DIR_TX) { + err = nfp_net_tls_communicate_simple(nn, skb, "sync", + NFP_CCM_TYPE_CRYPTO_UPDATE); + if (err) + return err; + ntls->next_seq = seq; + } else { + nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE, + sizeof(struct nfp_crypto_reply_simple)); + } + + return 0; +} + +static const struct tlsdev_ops nfp_net_tls_ops = { + .tls_dev_add = nfp_net_tls_add, + .tls_dev_del = nfp_net_tls_del, + .tls_dev_resync = nfp_net_tls_resync, +}; + +static int nfp_net_tls_reset(struct nfp_net *nn) +{ + struct nfp_crypto_req_reset *req; + struct sk_buff *skb; + + skb = nfp_net_tls_alloc_simple(nn, sizeof(*req), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + req = (void *)skb->data; + req->ep_id = 0; + + return nfp_net_tls_communicate_simple(nn, skb, "reset", + NFP_CCM_TYPE_CRYPTO_RESET); +} + +int nfp_net_tls_init(struct nfp_net *nn) +{ + struct net_device *netdev = nn->dp.netdev; + int err; + + if (!(nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK)) + return 0; + + if ((nn->tlv_caps.mbox_cmsg_types & NFP_NET_TLS_CCM_MBOX_OPS_MASK) != + NFP_NET_TLS_CCM_MBOX_OPS_MASK) + return 0; + + if (!nfp_ccm_mbox_fits(nn, sizeof(struct nfp_crypto_req_add_v6))) { + nn_warn(nn, "disabling TLS offload - mbox too small: %d\n", + nn->tlv_caps.mbox_len); + return 0; + } + + err = nfp_net_tls_reset(nn); + if (err) + return err; + + nn_ctrl_bar_lock(nn); + nn_writel(nn, nn->tlv_caps.crypto_enable_off, 0); + err = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_CRYPTO); + nn_ctrl_bar_unlock(nn); + if (err) + return err; + + if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_RX) { + netdev->hw_features |= NETIF_F_HW_TLS_RX; + netdev->features |= NETIF_F_HW_TLS_RX; + } + if (nn->tlv_caps.crypto_ops & NFP_NET_TLS_OPCODE_MASK_TX) { + netdev->hw_features |= NETIF_F_HW_TLS_TX; + netdev->features |= NETIF_F_HW_TLS_TX; + } + + netdev->tlsdev_ops = &nfp_net_tls_ops; + + return 0; +} diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index c56e31d9f8a4..5a54fe848de4 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -54,7 +54,8 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan, static int nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act, - struct nfp_fl_payload *nfp_flow, int act_len) + struct nfp_fl_payload *nfp_flow, int act_len, + struct netlink_ext_ack *extack) { size_t act_size = sizeof(struct nfp_fl_pre_lag); struct nfp_fl_pre_lag *pre_lag; @@ -65,8 +66,10 @@ nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act, if (!out_dev || !netif_is_lag_master(out_dev)) return 0; - if (act_len + act_size > NFP_FL_MAX_A_SIZ) + if (act_len + act_size > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at LAG action"); return -EOPNOTSUPP; + } /* Pre_lag action must be first on action list. * If other actions already exist they need pushed forward. @@ -76,7 +79,7 @@ nfp_fl_pre_lag(struct nfp_app *app, const struct flow_action_entry *act, nfp_flow->action_data, act_len); pre_lag = (struct nfp_fl_pre_lag *)nfp_flow->action_data; - err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag); + err = nfp_flower_lag_populate_pre_action(app, out_dev, pre_lag, extack); if (err) return err; @@ -93,7 +96,8 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, const struct flow_action_entry *act, struct nfp_fl_payload *nfp_flow, bool last, struct net_device *in_dev, - enum nfp_flower_tun_type tun_type, int *tun_out_cnt) + enum nfp_flower_tun_type tun_type, int *tun_out_cnt, + struct netlink_ext_ack *extack) { size_t act_size = sizeof(struct nfp_fl_output); struct nfp_flower_priv *priv = app->priv; @@ -104,18 +108,24 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, output->head.len_lw = act_size >> NFP_FL_LW_SIZ; out_dev = act->dev; - if (!out_dev) + if (!out_dev) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid egress interface for mirred action"); return -EOPNOTSUPP; + } tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0; if (tun_type) { /* Verify the egress netdev matches the tunnel type. */ - if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) + if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface does not match the required tunnel type"); return -EOPNOTSUPP; + } - if (*tun_out_cnt) + if (*tun_out_cnt) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: cannot offload more than one tunnel mirred output per filter"); return -EOPNOTSUPP; + } (*tun_out_cnt)++; output->flags = cpu_to_be16(tmp_flags | @@ -127,8 +137,10 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, output->flags = cpu_to_be16(tmp_flags); gid = nfp_flower_lag_get_output_id(app, out_dev); - if (gid < 0) + if (gid < 0) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot find group id for LAG action"); return gid; + } output->port = cpu_to_be32(NFP_FL_LAG_OUT | gid); } else { /* Set action output parameters. */ @@ -136,29 +148,58 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output, if (nfp_netdev_is_nfp_repr(in_dev)) { /* Confirm ingress and egress are on same device. */ - if (!netdev_port_same_parent_id(in_dev, out_dev)) + if (!netdev_port_same_parent_id(in_dev, out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress and egress interfaces are on different devices"); return -EOPNOTSUPP; + } } - if (!nfp_netdev_is_nfp_repr(out_dev)) + if (!nfp_netdev_is_nfp_repr(out_dev)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: egress interface is not an nfp port"); return -EOPNOTSUPP; + } output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev)); - if (!output->port) + if (!output->port) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid port id for egress interface"); return -EOPNOTSUPP; + } } nfp_flow->meta.shortcut = output->port; return 0; } +static bool +nfp_flower_tun_is_gre(struct flow_cls_offload *flow, int start_idx) +{ + struct flow_action_entry *act = flow->rule->action.entries; + int num_act = flow->rule->action.num_entries; + int act_idx; + + /* Preparse action list for next mirred or redirect action */ + for (act_idx = start_idx + 1; act_idx < num_act; act_idx++) + if (act[act_idx].id == FLOW_ACTION_REDIRECT || + act[act_idx].id == FLOW_ACTION_MIRRED) + return netif_is_gretap(act[act_idx].dev); + + return false; +} + static enum nfp_flower_tun_type -nfp_fl_get_tun_from_act_l4_port(struct nfp_app *app, - const struct flow_action_entry *act) +nfp_fl_get_tun_from_act(struct nfp_app *app, + struct flow_cls_offload *flow, + const struct flow_action_entry *act, int act_idx) { const struct ip_tunnel_info *tun = act->tunnel; struct nfp_flower_priv *priv = app->priv; + /* Determine the tunnel type based on the egress netdev + * in the mirred action for tunnels without l4. + */ + if (nfp_flower_tun_is_gre(flow, act_idx)) + return NFP_FL_TUNNEL_GRE; + switch (tun->key.tp_dst) { case htons(IANA_VXLAN_UDP_PORT): return NFP_FL_TUNNEL_VXLAN; @@ -194,7 +235,8 @@ static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len) static int nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, - const struct flow_action_entry *act) + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) { struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel; int opt_len, opt_cnt, act_start, tot_push_len; @@ -212,20 +254,26 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, struct geneve_opt *opt = (struct geneve_opt *)src; opt_cnt++; - if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) + if (opt_cnt > NFP_FL_MAX_GENEVE_OPT_CNT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed number of geneve options exceeded"); return -EOPNOTSUPP; + } tot_push_len += sizeof(struct nfp_fl_push_geneve) + opt->length * 4; - if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) + if (tot_push_len > NFP_FL_MAX_GENEVE_OPT_ACT) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options"); return -EOPNOTSUPP; + } opt_len -= sizeof(struct geneve_opt) + opt->length * 4; src += sizeof(struct geneve_opt) + opt->length * 4; } - if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) + if (*list_len + tot_push_len > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push geneve options"); return -EOPNOTSUPP; + } act_start = *list_len; *list_len += tot_push_len; @@ -256,14 +304,13 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len, } static int -nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, - struct nfp_fl_set_ipv4_udp_tun *set_tun, - const struct flow_action_entry *act, - struct nfp_fl_pre_tunnel *pre_tun, - enum nfp_flower_tun_type tun_type, - struct net_device *netdev) +nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun, + const struct flow_action_entry *act, + struct nfp_fl_pre_tunnel *pre_tun, + enum nfp_flower_tun_type tun_type, + struct net_device *netdev, struct netlink_ext_ack *extack) { - size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); + size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun); const struct ip_tunnel_info *ip_tun = act->tunnel; struct nfp_flower_priv *priv = app->priv; u32 tmp_set_ip_tun_type_index = 0; @@ -275,8 +322,10 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT); if (ip_tun->options_len && (tun_type != NFP_FL_TUNNEL_GENEVE || - !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) + !(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve options offload"); return -EOPNOTSUPP; + } set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -316,8 +365,10 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_app *app, set_tun->tos = ip_tun->key.tos; if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) || - ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) + ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload"); return -EOPNOTSUPP; + } set_tun->tun_flags = ip_tun->key.tun_flags; if (tun_type == NFP_FL_TUNNEL_GENEVE) { @@ -345,18 +396,22 @@ static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask) static int nfp_fl_set_eth(const struct flow_action_entry *act, u32 off, - struct nfp_fl_set_eth *set_eth) + struct nfp_fl_set_eth *set_eth, struct netlink_ext_ack *extack) { u32 exact, mask; - if (off + 4 > ETH_ALEN * 2) + if (off + 4 > ETH_ALEN * 2) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action"); return -EOPNOTSUPP; + } mask = ~act->mangle.mask; exact = act->mangle.val; - if (exact & ~mask) + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit ethernet action"); return -EOPNOTSUPP; + } nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off], &set_eth->eth_addr_mask[off]); @@ -377,7 +432,8 @@ struct ipv4_ttl_word { static int nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_ip4_addrs *set_ip_addr, - struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos) + struct nfp_fl_set_ip4_ttl_tos *set_ip_ttl_tos, + struct netlink_ext_ack *extack) { struct ipv4_ttl_word *ttl_word_mask; struct ipv4_ttl_word *ttl_word; @@ -389,8 +445,10 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, mask = (__force __be32)~act->mangle.mask; exact = (__force __be32)act->mangle.val; - if (exact & ~mask) + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 action"); return -EOPNOTSUPP; + } switch (off) { case offsetof(struct iphdr, daddr): @@ -413,8 +471,10 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, ttl_word_mask = (struct ipv4_ttl_word *)&mask; ttl_word = (struct ipv4_ttl_word *)&exact; - if (ttl_word_mask->protocol || ttl_word_mask->check) + if (ttl_word_mask->protocol || ttl_word_mask->check) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 ttl action"); return -EOPNOTSUPP; + } set_ip_ttl_tos->ipv4_ttl_mask |= ttl_word_mask->ttl; set_ip_ttl_tos->ipv4_ttl &= ~ttl_word_mask->ttl; @@ -429,8 +489,10 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, tos_word = (struct iphdr *)&exact; if (tos_word_mask->version || tos_word_mask->ihl || - tos_word_mask->tot_len) + tos_word_mask->tot_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv4 tos action"); return -EOPNOTSUPP; + } set_ip_ttl_tos->ipv4_tos_mask |= tos_word_mask->tos; set_ip_ttl_tos->ipv4_tos &= ~tos_word_mask->tos; @@ -441,6 +503,7 @@ nfp_fl_set_ip4(const struct flow_action_entry *act, u32 off, NFP_FL_LW_SIZ; break; default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv4 header"); return -EOPNOTSUPP; } @@ -468,7 +531,8 @@ struct ipv6_hop_limit_word { static int nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, - struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl) + struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl, + struct netlink_ext_ack *extack) { struct ipv6_hop_limit_word *fl_hl_mask; struct ipv6_hop_limit_word *fl_hl; @@ -478,8 +542,10 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, fl_hl_mask = (struct ipv6_hop_limit_word *)&mask; fl_hl = (struct ipv6_hop_limit_word *)&exact; - if (fl_hl_mask->nexthdr || fl_hl_mask->payload_len) + if (fl_hl_mask->nexthdr || fl_hl_mask->payload_len) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 hop limit action"); return -EOPNOTSUPP; + } ip_hl_fl->ipv6_hop_limit_mask |= fl_hl_mask->hop_limit; ip_hl_fl->ipv6_hop_limit &= ~fl_hl_mask->hop_limit; @@ -488,8 +554,10 @@ nfp_fl_set_ip6_hop_limit_flow_label(u32 off, __be32 exact, __be32 mask, break; case round_down(offsetof(struct ipv6hdr, flow_lbl), 4): if (mask & ~IPV6_FLOW_LABEL_MASK || - exact & ~IPV6_FLOW_LABEL_MASK) + exact & ~IPV6_FLOW_LABEL_MASK) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 flow label action"); return -EOPNOTSUPP; + } ip_hl_fl->ipv6_label_mask |= mask; ip_hl_fl->ipv6_label &= ~mask; @@ -507,7 +575,8 @@ static int nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, struct nfp_fl_set_ipv6_addr *ip_dst, struct nfp_fl_set_ipv6_addr *ip_src, - struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl) + struct nfp_fl_set_ipv6_tc_hl_fl *ip_hl_fl, + struct netlink_ext_ack *extack) { __be32 exact, mask; int err = 0; @@ -517,12 +586,14 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, mask = (__force __be32)~act->mangle.mask; exact = (__force __be32)act->mangle.val; - if (exact & ~mask) + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit IPv6 action"); return -EOPNOTSUPP; + } if (off < offsetof(struct ipv6hdr, saddr)) { err = nfp_fl_set_ip6_hop_limit_flow_label(off, exact, mask, - ip_hl_fl); + ip_hl_fl, extack); } else if (off < offsetof(struct ipv6hdr, daddr)) { word = (off - offsetof(struct ipv6hdr, saddr)) / sizeof(exact); nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, word, @@ -533,6 +604,7 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, word, exact, mask, ip_dst); } else { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of IPv6 header"); return -EOPNOTSUPP; } @@ -541,18 +613,23 @@ nfp_fl_set_ip6(const struct flow_action_entry *act, u32 off, static int nfp_fl_set_tport(const struct flow_action_entry *act, u32 off, - struct nfp_fl_set_tport *set_tport, int opcode) + struct nfp_fl_set_tport *set_tport, int opcode, + struct netlink_ext_ack *extack) { u32 exact, mask; - if (off) + if (off) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported section of L4 header"); return -EOPNOTSUPP; + } mask = ~act->mangle.mask; exact = act->mangle.val; - if (exact & ~mask) + if (exact & ~mask) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid pedit L4 action"); return -EOPNOTSUPP; + } nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val, set_tport->tp_port_mask); @@ -592,11 +669,11 @@ struct nfp_flower_pedit_acts { }; static int -nfp_fl_commit_mangle(struct tc_cls_flower_offload *flow, char *nfp_action, +nfp_fl_commit_mangle(struct flow_cls_offload *flow, char *nfp_action, int *a_len, struct nfp_flower_pedit_acts *set_act, u32 *csum_updated) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); size_t act_size = 0; u8 ip_proto = 0; @@ -694,8 +771,9 @@ nfp_fl_commit_mangle(struct tc_cls_flower_offload *flow, char *nfp_action, static int nfp_fl_pedit(const struct flow_action_entry *act, - struct tc_cls_flower_offload *flow, char *nfp_action, int *a_len, - u32 *csum_updated, struct nfp_flower_pedit_acts *set_act) + struct flow_cls_offload *flow, char *nfp_action, int *a_len, + u32 *csum_updated, struct nfp_flower_pedit_acts *set_act, + struct netlink_ext_ack *extack) { enum flow_action_mangle_base htype; u32 offset; @@ -705,21 +783,22 @@ nfp_fl_pedit(const struct flow_action_entry *act, switch (htype) { case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH: - return nfp_fl_set_eth(act, offset, &set_act->set_eth); + return nfp_fl_set_eth(act, offset, &set_act->set_eth, extack); case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4: return nfp_fl_set_ip4(act, offset, &set_act->set_ip_addr, - &set_act->set_ip_ttl_tos); + &set_act->set_ip_ttl_tos, extack); case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6: return nfp_fl_set_ip6(act, offset, &set_act->set_ip6_dst, &set_act->set_ip6_src, - &set_act->set_ip6_tc_hl_fl); + &set_act->set_ip6_tc_hl_fl, extack); case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP: return nfp_fl_set_tport(act, offset, &set_act->set_tport, - NFP_FL_ACTION_OPCODE_SET_TCP); + NFP_FL_ACTION_OPCODE_SET_TCP, extack); case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP: return nfp_fl_set_tport(act, offset, &set_act->set_tport, - NFP_FL_ACTION_OPCODE_SET_UDP); + NFP_FL_ACTION_OPCODE_SET_UDP, extack); default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: pedit on unsupported header"); return -EOPNOTSUPP; } } @@ -730,7 +809,8 @@ nfp_flower_output_action(struct nfp_app *app, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, bool last, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, - int *out_cnt, u32 *csum_updated) + int *out_cnt, u32 *csum_updated, + struct netlink_ext_ack *extack) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_output *output; @@ -739,15 +819,19 @@ nfp_flower_output_action(struct nfp_app *app, /* If csum_updated has not been reset by now, it means HW will * incorrectly update csums when they are not requested. */ - if (*csum_updated) + if (*csum_updated) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: set actions without updating checksums are not supported"); return -EOPNOTSUPP; + } - if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) + if (*a_len + sizeof(struct nfp_fl_output) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: mirred output increases action list size beyond the allowed maximum"); return -EOPNOTSUPP; + } output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len]; err = nfp_fl_output(app, output, act, nfp_fl, last, netdev, *tun_type, - tun_out_cnt); + tun_out_cnt, extack); if (err) return err; @@ -757,11 +841,13 @@ nfp_flower_output_action(struct nfp_app *app, /* nfp_fl_pre_lag returns -err or size of prelag action added. * This will be 0 if it is not egressing to a lag dev. */ - prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len); - if (prelag_size < 0) + prelag_size = nfp_fl_pre_lag(app, act, nfp_fl, *a_len, extack); + if (prelag_size < 0) { return prelag_size; - else if (prelag_size > 0 && (!last || *out_cnt)) + } else if (prelag_size > 0 && (!last || *out_cnt)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: LAG action has to be last action in action list"); return -EOPNOTSUPP; + } *a_len += prelag_size; } @@ -772,14 +858,15 @@ nfp_flower_output_action(struct nfp_app *app, static int nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, - struct tc_cls_flower_offload *flow, + struct flow_cls_offload *flow, struct nfp_fl_payload *nfp_fl, int *a_len, struct net_device *netdev, enum nfp_flower_tun_type *tun_type, int *tun_out_cnt, int *out_cnt, u32 *csum_updated, - struct nfp_flower_pedit_acts *set_act) + struct nfp_flower_pedit_acts *set_act, + struct netlink_ext_ack *extack, int act_idx) { - struct nfp_fl_set_ipv4_udp_tun *set_tun; + struct nfp_fl_set_ipv4_tun *set_tun; struct nfp_fl_pre_tunnel *pre_tun; struct nfp_fl_push_vlan *psh_v; struct nfp_fl_pop_vlan *pop_v; @@ -792,20 +879,23 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, case FLOW_ACTION_REDIRECT: err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, true, tun_type, tun_out_cnt, - out_cnt, csum_updated); + out_cnt, csum_updated, extack); if (err) return err; break; case FLOW_ACTION_MIRRED: err = nfp_flower_output_action(app, act, nfp_fl, a_len, netdev, false, tun_type, tun_out_cnt, - out_cnt, csum_updated); + out_cnt, csum_updated, extack); if (err) return err; break; case FLOW_ACTION_VLAN_POP: - if (*a_len + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) + if (*a_len + + sizeof(struct nfp_fl_pop_vlan) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at pop vlan"); return -EOPNOTSUPP; + } pop_v = (struct nfp_fl_pop_vlan *)&nfp_fl->action_data[*a_len]; nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_POPV); @@ -814,8 +904,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, *a_len += sizeof(struct nfp_fl_pop_vlan); break; case FLOW_ACTION_VLAN_PUSH: - if (*a_len + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) + if (*a_len + + sizeof(struct nfp_fl_push_vlan) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at push vlan"); return -EOPNOTSUPP; + } psh_v = (struct nfp_fl_push_vlan *)&nfp_fl->action_data[*a_len]; nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); @@ -826,35 +919,41 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, case FLOW_ACTION_TUNNEL_ENCAP: { const struct ip_tunnel_info *ip_tun = act->tunnel; - *tun_type = nfp_fl_get_tun_from_act_l4_port(app, act); - if (*tun_type == NFP_FL_TUNNEL_NONE) + *tun_type = nfp_fl_get_tun_from_act(app, flow, act, act_idx); + if (*tun_type == NFP_FL_TUNNEL_NONE) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel type in action list"); return -EOPNOTSUPP; + } - if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS) + if (ip_tun->mode & ~NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported tunnel flags in action list"); return -EOPNOTSUPP; + } /* Pre-tunnel action is required for tunnel encap. * This checks for next hop entries on NFP. * If none, the packet falls back before applying other actions. */ if (*a_len + sizeof(struct nfp_fl_pre_tunnel) + - sizeof(struct nfp_fl_set_ipv4_udp_tun) > NFP_FL_MAX_A_SIZ) + sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap"); return -EOPNOTSUPP; + } pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len); nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL); *a_len += sizeof(struct nfp_fl_pre_tunnel); - err = nfp_fl_push_geneve_options(nfp_fl, a_len, act); + err = nfp_fl_push_geneve_options(nfp_fl, a_len, act, extack); if (err) return err; set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(app, set_tun, act, pre_tun, - *tun_type, netdev); + err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun, + *tun_type, netdev, extack); if (err) return err; - *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); + *a_len += sizeof(struct nfp_fl_set_ipv4_tun); } break; case FLOW_ACTION_TUNNEL_DECAP: @@ -862,13 +961,15 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, return 0; case FLOW_ACTION_MANGLE: if (nfp_fl_pedit(act, flow, &nfp_fl->action_data[*a_len], - a_len, csum_updated, set_act)) + a_len, csum_updated, set_act, extack)) return -EOPNOTSUPP; break; case FLOW_ACTION_CSUM: /* csum action requests recalc of something we have not fixed */ - if (act->csum_flags & ~*csum_updated) + if (act->csum_flags & ~*csum_updated) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported csum update action in action list"); return -EOPNOTSUPP; + } /* If we will correctly fix the csum we can remove it from the * csum update list. Which will later be used to check support. */ @@ -876,6 +977,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act, break; default: /* Currently we do not handle any other actions. */ + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unsupported action in action list"); return -EOPNOTSUPP; } @@ -919,9 +1021,10 @@ static bool nfp_fl_check_mangle_end(struct flow_action *flow_act, } int nfp_flower_compile_action(struct nfp_app *app, - struct tc_cls_flower_offload *flow, + struct flow_cls_offload *flow, struct net_device *netdev, - struct nfp_fl_payload *nfp_flow) + struct nfp_fl_payload *nfp_flow, + struct netlink_ext_ack *extack) { int act_len, act_cnt, err, tun_out_cnt, out_cnt, i; struct nfp_flower_pedit_acts set_act; @@ -942,7 +1045,8 @@ int nfp_flower_compile_action(struct nfp_app *app, memset(&set_act, 0, sizeof(set_act)); err = nfp_flower_loop_action(app, act, flow, nfp_flow, &act_len, netdev, &tun_type, &tun_out_cnt, - &out_cnt, &csum_updated, &set_act); + &out_cnt, &csum_updated, + &set_act, extack, i); if (err) return err; act_cnt++; diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index 537f7fc19584..0f1706ae5bfc 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -8,6 +8,7 @@ #include <linux/skbuff.h> #include <linux/types.h> #include <net/geneve.h> +#include <net/gre.h> #include <net/vxlan.h> #include "../nfp_app.h" @@ -22,6 +23,7 @@ #define NFP_FLOWER_LAYER_CT BIT(6) #define NFP_FLOWER_LAYER_VXLAN BIT(7) +#define NFP_FLOWER_LAYER2_GRE BIT(0) #define NFP_FLOWER_LAYER2_GENEVE BIT(5) #define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6) @@ -37,6 +39,9 @@ #define NFP_FL_IP_FRAG_FIRST BIT(7) #define NFP_FL_IP_FRAGMENTED BIT(6) +/* GRE Tunnel flags */ +#define NFP_FL_GRE_FLAG_KEY BIT(2) + /* Compressed HW representation of TCP Flags */ #define NFP_FL_TCP_FLAG_URG BIT(4) #define NFP_FL_TCP_FLAG_PSH BIT(3) @@ -107,6 +112,7 @@ enum nfp_flower_tun_type { NFP_FL_TUNNEL_NONE = 0, + NFP_FL_TUNNEL_GRE = 1, NFP_FL_TUNNEL_VXLAN = 2, NFP_FL_TUNNEL_GENEVE = 4, }; @@ -203,7 +209,7 @@ struct nfp_fl_pre_tunnel { __be32 extra[3]; }; -struct nfp_fl_set_ipv4_udp_tun { +struct nfp_fl_set_ipv4_tun { struct nfp_fl_act_head head; __be16 reserved; __be64 tun_id __packed; @@ -354,6 +360,16 @@ struct nfp_flower_ipv6 { struct in6_addr ipv6_dst; }; +struct nfp_flower_tun_ipv4 { + __be32 src; + __be32 dst; +}; + +struct nfp_flower_tun_ip_ext { + u8 tos; + u8 ttl; +}; + /* Flow Frame IPv4 UDP TUNNEL --> Tunnel details (4W/16B) * ----------------------------------------------------------------- * 3 2 1 @@ -371,15 +387,42 @@ struct nfp_flower_ipv6 { * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct nfp_flower_ipv4_udp_tun { - __be32 ip_src; - __be32 ip_dst; + struct nfp_flower_tun_ipv4 ipv4; __be16 reserved1; - u8 tos; - u8 ttl; + struct nfp_flower_tun_ip_ext ip_ext; __be32 reserved2; __be32 tun_id; }; +/* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B) + * ----------------------------------------------------------------- + * 3 2 1 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_src | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ipv4_addr_dst | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | tun_flags | tos | ttl | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | Ethertype | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +struct nfp_flower_ipv4_gre_tun { + struct nfp_flower_tun_ipv4 ipv4; + __be16 tun_flags; + struct nfp_flower_tun_ip_ext ip_ext; + __be16 reserved1; + __be16 ethertype; + __be32 tun_key; + __be32 reserved2; +}; + struct nfp_flower_geneve_options { u8 data[NFP_FL_MAX_GENEVE_OPT_KEY]; }; @@ -530,6 +573,8 @@ nfp_fl_netdev_is_tunnel_type(struct net_device *netdev, { if (netif_is_vxlan(netdev)) return tun_type == NFP_FL_TUNNEL_VXLAN; + if (netif_is_gretap(netdev)) + return tun_type == NFP_FL_TUNNEL_GRE; if (netif_is_geneve(netdev)) return tun_type == NFP_FL_TUNNEL_GENEVE; @@ -546,6 +591,8 @@ static inline bool nfp_fl_is_netdev_to_offload(struct net_device *netdev) return true; if (netif_is_geneve(netdev)) return true; + if (netif_is_gretap(netdev)) + return true; return false; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c index 5db838f45694..63907aeb3884 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/lag_conf.c @@ -156,7 +156,8 @@ nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag, int nfp_flower_lag_populate_pre_action(struct nfp_app *app, struct net_device *master, - struct nfp_fl_pre_lag *pre_act) + struct nfp_fl_pre_lag *pre_act, + struct netlink_ext_ack *extack) { struct nfp_flower_priv *priv = app->priv; struct nfp_fl_lag_group *group = NULL; @@ -167,6 +168,7 @@ int nfp_flower_lag_populate_pre_action(struct nfp_app *app, master); if (!group) { mutex_unlock(&priv->nfp_lag.lock); + NL_SET_ERR_MSG_MOD(extack, "invalid entry: group does not exist for LAG action"); return -ENOENT; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h index 40957a8dbfe6..af9441d5787f 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.h +++ b/drivers/net/ethernet/netronome/nfp/flower/main.h @@ -343,19 +343,22 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow1, struct nfp_fl_payload *sub_flow2); int nfp_flower_compile_flow_match(struct nfp_app *app, - struct tc_cls_flower_offload *flow, + struct flow_cls_offload *flow, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, - enum nfp_flower_tun_type tun_type); + enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack); int nfp_flower_compile_action(struct nfp_app *app, - struct tc_cls_flower_offload *flow, + struct flow_cls_offload *flow, struct net_device *netdev, - struct nfp_fl_payload *nfp_flow); + struct nfp_fl_payload *nfp_flow, + struct netlink_ext_ack *extack); int nfp_compile_flow_metadata(struct nfp_app *app, - struct tc_cls_flower_offload *flow, + struct flow_cls_offload *flow, struct nfp_fl_payload *nfp_flow, - struct net_device *netdev); + struct net_device *netdev, + struct netlink_ext_ack *extack); void __nfp_modify_flow_metadata(struct nfp_flower_priv *priv, struct nfp_fl_payload *nfp_flow); int nfp_modify_flow_metadata(struct nfp_app *app, @@ -389,7 +392,8 @@ int nfp_flower_lag_netdev_event(struct nfp_flower_priv *priv, bool nfp_flower_lag_unprocessed_msg(struct nfp_app *app, struct sk_buff *skb); int nfp_flower_lag_populate_pre_action(struct nfp_app *app, struct net_device *master, - struct nfp_fl_pre_lag *pre_act); + struct nfp_fl_pre_lag *pre_act, + struct netlink_ext_ack *extack); int nfp_flower_lag_get_output_id(struct nfp_app *app, struct net_device *master); void nfp_flower_qos_init(struct nfp_app *app); diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c index bfa4bf34911d..9cc3ba17ff69 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/match.c +++ b/drivers/net/ethernet/netronome/nfp/flower/match.c @@ -10,9 +10,9 @@ static void nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext, struct nfp_flower_meta_tci *msk, - struct tc_cls_flower_offload *flow, u8 key_type) + struct flow_cls_offload *flow, u8 key_type) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); u16 tmp_tci; memset(ext, 0, sizeof(struct nfp_flower_meta_tci)); @@ -54,7 +54,8 @@ nfp_flower_compile_ext_meta(struct nfp_flower_ext_meta *frame, u32 key_ext) static int nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, - bool mask_version, enum nfp_flower_tun_type tun_type) + bool mask_version, enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack) { if (mask_version) { frame->in_port = cpu_to_be32(~0); @@ -64,8 +65,10 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, if (tun_type) { frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type); } else { - if (!cmsg_port) + if (!cmsg_port) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: invalid ingress interface for match offload"); return -EOPNOTSUPP; + } frame->in_port = cpu_to_be32(cmsg_port); } @@ -75,9 +78,9 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port, static void nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, struct nfp_flower_mac_mpls *msk, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); memset(ext, 0, sizeof(struct nfp_flower_mac_mpls)); memset(msk, 0, sizeof(struct nfp_flower_mac_mpls)); @@ -127,9 +130,9 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext, static void nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, struct nfp_flower_tp_ports *msk, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); memset(ext, 0, sizeof(struct nfp_flower_tp_ports)); memset(msk, 0, sizeof(struct nfp_flower_tp_ports)); @@ -148,9 +151,9 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext, static void nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, struct nfp_flower_ip_ext *msk, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { struct flow_match_basic match; @@ -222,9 +225,9 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext, static void nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, struct nfp_flower_ipv4 *msk, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_match_ipv4_addrs match; memset(ext, 0, sizeof(struct nfp_flower_ipv4)); @@ -244,9 +247,9 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext, static void nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, struct nfp_flower_ipv6 *msk, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); memset(ext, 0, sizeof(struct nfp_flower_ipv6)); memset(msk, 0, sizeof(struct nfp_flower_ipv6)); @@ -266,7 +269,7 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext, static int nfp_flower_compile_geneve_opt(void *ext, void *msk, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { struct flow_match_enc_opts match; @@ -278,11 +281,76 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk, } static void +nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext, + struct nfp_flower_tun_ipv4 *msk, + struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + ext->src = match.key->src; + ext->dst = match.key->dst; + msk->src = match.mask->src; + msk->dst = match.mask->dst; + } +} + +static void +nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext, + struct nfp_flower_tun_ip_ext *msk, + struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + ext->tos = match.key->tos; + ext->ttl = match.key->ttl; + msk->tos = match.mask->tos; + msk->ttl = match.mask->ttl; + } +} + +static void +nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext, + struct nfp_flower_ipv4_gre_tun *msk, + struct flow_cls_offload *flow) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); + + memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun)); + + /* NVGRE is the only supported GRE tunnel type */ + ext->ethertype = cpu_to_be16(ETH_P_TEB); + msk->ethertype = cpu_to_be16(~0); + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid match; + + flow_rule_match_enc_keyid(rule, &match); + ext->tun_key = match.key->keyid; + msk->tun_key = match.mask->keyid; + + ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY); + } + + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); +} + +static void nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, struct nfp_flower_ipv4_udp_tun *msk, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun)); @@ -298,33 +366,17 @@ nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext, msk->tun_id = cpu_to_be32(temp_vni); } - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { - struct flow_match_ipv4_addrs match; - - flow_rule_match_enc_ipv4_addrs(rule, &match); - ext->ip_src = match.key->src; - ext->ip_dst = match.key->dst; - msk->ip_src = match.mask->src; - msk->ip_dst = match.mask->dst; - } - - if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_match_ip match; - - flow_rule_match_enc_ip(rule, &match); - ext->tos = match.key->tos; - ext->ttl = match.key->ttl; - msk->tos = match.mask->tos; - msk->ttl = match.mask->ttl; - } + nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow); + nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow); } int nfp_flower_compile_flow_match(struct nfp_app *app, - struct tc_cls_flower_offload *flow, + struct flow_cls_offload *flow, struct nfp_fl_key_ls *key_ls, struct net_device *netdev, struct nfp_fl_payload *nfp_flow, - enum nfp_flower_tun_type tun_type) + enum nfp_flower_tun_type tun_type, + struct netlink_ext_ack *extack) { u32 port_id; int err; @@ -357,13 +409,13 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, /* Populate Exact Port data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext, - port_id, false, tun_type); + port_id, false, tun_type, extack); if (err) return err; /* Populate Mask Port Data. */ err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk, - port_id, true, tun_type); + port_id, true, tun_type, extack); if (err) return err; @@ -402,12 +454,27 @@ int nfp_flower_compile_flow_match(struct nfp_app *app, msk += sizeof(struct nfp_flower_ipv6); } + if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) { + __be32 tun_dst; + + nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow); + tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst; + ext += sizeof(struct nfp_flower_ipv4_gre_tun); + msk += sizeof(struct nfp_flower_ipv4_gre_tun); + + /* Store the tunnel destination in the rule data. + * This must be present and be an exact match. + */ + nfp_flow->nfp_tun_ipv4_addr = tun_dst; + nfp_tunnel_add_ipv4_off(app, tun_dst); + } + if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN || key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) { __be32 tun_dst; nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow); - tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ip_dst; + tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst; ext += sizeof(struct nfp_flower_ipv4_udp_tun); msk += sizeof(struct nfp_flower_ipv4_udp_tun); diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 3d326efdc814..7c4a15e967df 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -290,9 +290,10 @@ nfp_check_mask_remove(struct nfp_app *app, char *mask_data, u32 mask_len, } int nfp_compile_flow_metadata(struct nfp_app *app, - struct tc_cls_flower_offload *flow, + struct flow_cls_offload *flow, struct nfp_fl_payload *nfp_flow, - struct net_device *netdev) + struct net_device *netdev, + struct netlink_ext_ack *extack) { struct nfp_fl_stats_ctx_to_flow *ctx_entry; struct nfp_flower_priv *priv = app->priv; @@ -302,8 +303,10 @@ int nfp_compile_flow_metadata(struct nfp_app *app, int err; err = nfp_get_stats_entry(app, &stats_cxt); - if (err) + if (err) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate new stats context"); return err; + } nfp_flow->meta.host_ctx_id = cpu_to_be32(stats_cxt); nfp_flow->meta.host_cookie = cpu_to_be64(flow->cookie); @@ -328,6 +331,12 @@ int nfp_compile_flow_metadata(struct nfp_app *app, if (!nfp_check_mask_add(app, nfp_flow->mask_data, nfp_flow->meta.mask_len, &nfp_flow->meta.flags, &new_mask_id)) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id"); + if (nfp_release_stats_entry(app, stats_cxt)) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context"); + err = -EINVAL; + goto err_remove_rhash; + } err = -ENOENT; goto err_remove_rhash; } @@ -343,6 +352,21 @@ int nfp_compile_flow_metadata(struct nfp_app *app, check_entry = nfp_flower_search_fl_table(app, flow->cookie, netdev); if (check_entry) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot offload duplicate flow entry"); + if (nfp_release_stats_entry(app, stats_cxt)) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release stats context"); + err = -EINVAL; + goto err_remove_mask; + } + + if (!nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + NULL, &new_mask_id)) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release mask id"); + err = -EINVAL; + goto err_remove_mask; + } + err = -EEXIST; goto err_remove_mask; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 1fbfeb43c538..7e725fa60347 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -52,8 +52,7 @@ #define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \ - BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \ - BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)) + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) #define NFP_FLOWER_MERGE_FIELDS \ (NFP_FLOWER_LAYER_PORT | \ @@ -122,9 +121,9 @@ nfp_flower_xmit_flow(struct nfp_app *app, struct nfp_fl_payload *nfp_flow, return 0; } -static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) +static bool nfp_flower_check_higher_than_mac(struct flow_cls_offload *f) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_rule *rule = flow_cls_offload_flow_rule(f); return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) || flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) || @@ -132,14 +131,25 @@ static bool nfp_flower_check_higher_than_mac(struct tc_cls_flower_offload *f) flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); } +static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + + return flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP); +} + static int -nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts, - u32 *key_layer_two, int *key_size) +nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts, + u32 *key_layer_two, int *key_size, + struct netlink_ext_ack *extack) { - if (enc_opts->key->len > NFP_FL_MAX_GENEVE_OPT_KEY) + if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length"); return -EOPNOTSUPP; + } - if (enc_opts->key->len > 0) { + if (enc_opts->len > 0) { *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE_OP; *key_size += sizeof(struct nfp_flower_geneve_options); } @@ -148,13 +158,65 @@ nfp_flower_calc_opt_layer(struct flow_match_enc_opts *enc_opts, } static int +nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports, + struct flow_dissector_key_enc_opts *enc_op, + u32 *key_layer_two, u8 *key_layer, int *key_size, + struct nfp_flower_priv *priv, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack) +{ + int err; + + switch (enc_ports->dst) { + case htons(IANA_VXLAN_UDP_PORT): + *tun_type = NFP_FL_TUNNEL_VXLAN; + *key_layer |= NFP_FLOWER_LAYER_VXLAN; + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (enc_op) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels"); + return -EOPNOTSUPP; + } + break; + case htons(GENEVE_UDP_PORT): + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve offload"); + return -EOPNOTSUPP; + } + *tun_type = NFP_FL_TUNNEL_GENEVE; + *key_layer |= NFP_FLOWER_LAYER_EXT_META; + *key_size += sizeof(struct nfp_flower_ext_meta); + *key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; + *key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + + if (!enc_op) + break; + if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload"); + return -EOPNOTSUPP; + } + err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, + key_size, extack); + if (err) + return err; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel type unknown"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int nfp_flower_calculate_key_layers(struct nfp_app *app, struct net_device *netdev, struct nfp_fl_key_ls *ret_key_ls, - struct tc_cls_flower_offload *flow, - enum nfp_flower_tun_type *tun_type) + struct flow_cls_offload *flow, + enum nfp_flower_tun_type *tun_type, + struct netlink_ext_ack *extack) { - struct flow_rule *rule = tc_cls_flower_offload_flow_rule(flow); + struct flow_rule *rule = flow_cls_offload_flow_rule(flow); struct flow_dissector *dissector = rule->match.dissector; struct flow_match_basic basic = { NULL, NULL}; struct nfp_flower_priv *priv = app->priv; @@ -163,14 +225,18 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, int key_size; int err; - if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) + if (dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match not supported"); return -EOPNOTSUPP; + } /* If any tun dissector is used then the required set must be used. */ if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR && (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) - != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) + != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported"); return -EOPNOTSUPP; + } key_layer_two = 0; key_layer = NFP_FLOWER_LAYER_PORT; @@ -188,8 +254,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, flow_rule_match_vlan(rule, &vlan); if (!(priv->flower_ext_feats & NFP_FL_FEATS_VLAN_PCP) && - vlan.key->vlan_priority) + vlan.key->vlan_priority) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support VLAN PCP offload"); return -EOPNOTSUPP; + } } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { @@ -200,56 +268,68 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, flow_rule_match_enc_control(rule, &enc_ctl); - if (enc_ctl.mask->addr_type != 0xffff || - enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) + if (enc_ctl.mask->addr_type != 0xffff) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported"); + return -EOPNOTSUPP; + } + if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported"); return -EOPNOTSUPP; + } /* These fields are already verified as used. */ flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs); - if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) - return -EOPNOTSUPP; - - flow_rule_match_enc_ports(rule, &enc_ports); - if (enc_ports.mask->dst != cpu_to_be16(~0)) + if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported"); return -EOPNOTSUPP; + } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS)) flow_rule_match_enc_opts(rule, &enc_op); - switch (enc_ports.key->dst) { - case htons(IANA_VXLAN_UDP_PORT): - *tun_type = NFP_FL_TUNNEL_VXLAN; - key_layer |= NFP_FLOWER_LAYER_VXLAN; - key_size += sizeof(struct nfp_flower_ipv4_udp_tun); - if (enc_op.key) + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + /* check if GRE, which has no enc_ports */ + if (netif_is_gretap(netdev)) { + *tun_type = NFP_FL_TUNNEL_GRE; + key_layer |= NFP_FLOWER_LAYER_EXT_META; + key_size += sizeof(struct nfp_flower_ext_meta); + key_layer_two |= NFP_FLOWER_LAYER2_GRE; + key_size += + sizeof(struct nfp_flower_ipv4_gre_tun); + + if (enc_op.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels"); + return -EOPNOTSUPP; + } + } else { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels"); return -EOPNOTSUPP; - break; - case htons(GENEVE_UDP_PORT): - if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE)) + } + } else { + flow_rule_match_enc_ports(rule, &enc_ports); + if (enc_ports.mask->dst != cpu_to_be16(~0)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match L4 destination port is supported"); return -EOPNOTSUPP; - *tun_type = NFP_FL_TUNNEL_GENEVE; - key_layer |= NFP_FLOWER_LAYER_EXT_META; - key_size += sizeof(struct nfp_flower_ext_meta); - key_layer_two |= NFP_FLOWER_LAYER2_GENEVE; - key_size += sizeof(struct nfp_flower_ipv4_udp_tun); + } - if (!enc_op.key) - break; - if (!(priv->flower_ext_feats & NFP_FL_FEATS_GENEVE_OPT)) - return -EOPNOTSUPP; - err = nfp_flower_calc_opt_layer(&enc_op, &key_layer_two, - &key_size); + err = nfp_flower_calc_udp_tun_layer(enc_ports.key, + enc_op.key, + &key_layer_two, + &key_layer, + &key_size, priv, + tun_type, extack); if (err) return err; - break; - default: - return -EOPNOTSUPP; - } - /* Ensure the ingress netdev matches the expected tun type. */ - if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) - return -EOPNOTSUPP; + /* Ensure the ingress netdev matches the expected + * tun type. + */ + if (!nfp_fl_netdev_is_tunnel_type(netdev, *tun_type)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ingress netdev does not match the expected tunnel type"); + return -EOPNOTSUPP; + } + } } if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) @@ -272,6 +352,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, * because we rely on it to get to the host. */ case cpu_to_be16(ETH_P_ARP): + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: ARP not supported"); return -EOPNOTSUPP; case cpu_to_be16(ETH_P_MPLS_UC): @@ -290,14 +371,15 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, /* Other ethtype - we need check the masks for the * remainder of the key to ensure we can offload. */ - if (nfp_flower_check_higher_than_mac(flow)) + if (nfp_flower_check_higher_than_mac(flow)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: non IPv4/IPv6 offload with L3/L4 matches not supported"); return -EOPNOTSUPP; + } break; } } if (basic.mask && basic.mask->ip_proto) { - /* Ethernet type is present in the key. */ switch (basic.key->ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: @@ -311,7 +393,11 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, /* Other ip proto - we need check the masks for the * remainder of the key to ensure we can offload. */ - return -EOPNOTSUPP; + if (nfp_flower_check_higher_than_l3(flow)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: unknown IP protocol with L4 matches not supported"); + return -EOPNOTSUPP; + } + break; } } @@ -322,22 +408,28 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, flow_rule_match_tcp(rule, &tcp); tcp_flags = be16_to_cpu(tcp.key->flags); - if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) + if (tcp_flags & ~NFP_FLOWER_SUPPORTED_TCPFLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: no match support for selected TCP flags"); return -EOPNOTSUPP; + } /* We only support PSH and URG flags when either * FIN, SYN or RST is present as well. */ if ((tcp_flags & (TCPHDR_PSH | TCPHDR_URG)) && - !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) + !(tcp_flags & (TCPHDR_FIN | TCPHDR_SYN | TCPHDR_RST))) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: PSH and URG is only supported when used with FIN, SYN or RST"); return -EOPNOTSUPP; + } /* We need to store TCP flags in the either the IPv4 or IPv6 key * space, thus we need to ensure we include a IPv4/IPv6 key * layer if we have not done so already. */ - if (!basic.key) + if (!basic.key) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on L3 protocol"); return -EOPNOTSUPP; + } if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && !(key_layer & NFP_FLOWER_LAYER_IPV6)) { @@ -353,6 +445,7 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, break; default: + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on TCP flags requires a match on IPv4/IPv6"); return -EOPNOTSUPP; } } @@ -362,8 +455,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, struct flow_match_control ctl; flow_rule_match_control(rule, &ctl); - if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) + if (ctl.key->flags & ~NFP_FLOWER_SUPPORTED_CTLFLAGS) { + NL_SET_ERR_MSG_MOD(extack, "unsupported offload: match on unknown control flag"); return -EOPNOTSUPP; + } } ret_key_ls->key_layer = key_layer; @@ -771,14 +866,16 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, struct nfp_fl_payload *sub_flow1, struct nfp_fl_payload *sub_flow2) { - struct tc_cls_flower_offload merge_tc_off; + struct flow_cls_offload merge_tc_off; struct nfp_flower_priv *priv = app->priv; + struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *merge_flow; struct nfp_fl_key_ls merge_key_ls; int err; ASSERT_RTNL(); + extack = merge_tc_off.common.extack; if (sub_flow1 == sub_flow2 || nfp_flower_is_merge_flow(sub_flow1) || nfp_flower_is_merge_flow(sub_flow2)) @@ -816,7 +913,7 @@ int nfp_flower_merge_offloaded_flows(struct nfp_app *app, merge_tc_off.cookie = merge_flow->tc_flower_cookie; err = nfp_compile_flow_metadata(app, &merge_tc_off, merge_flow, - merge_flow->ingress_dev); + merge_flow->ingress_dev, extack); if (err) goto err_unlink_sub_flow2; @@ -865,15 +962,17 @@ err_destroy_merge_flow: */ static int nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE; struct nfp_flower_priv *priv = app->priv; + struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *flow_pay; struct nfp_fl_key_ls *key_layer; struct nfp_port *port = NULL; int err; + extack = flow->common.extack; if (nfp_netdev_is_nfp_repr(netdev)) port = nfp_port_from_netdev(netdev); @@ -882,7 +981,7 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, return -ENOMEM; err = nfp_flower_calculate_key_layers(app, netdev, key_layer, flow, - &tun_type); + &tun_type, extack); if (err) goto err_free_key_ls; @@ -893,23 +992,25 @@ nfp_flower_add_offload(struct nfp_app *app, struct net_device *netdev, } err = nfp_flower_compile_flow_match(app, flow, key_layer, netdev, - flow_pay, tun_type); + flow_pay, tun_type, extack); if (err) goto err_destroy_flow; - err = nfp_flower_compile_action(app, flow, netdev, flow_pay); + err = nfp_flower_compile_action(app, flow, netdev, flow_pay, extack); if (err) goto err_destroy_flow; - err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev); + err = nfp_compile_flow_metadata(app, flow, flow_pay, netdev, extack); if (err) goto err_destroy_flow; flow_pay->tc_flower_cookie = flow->cookie; err = rhashtable_insert_fast(&priv->flow_table, &flow_pay->fl_node, nfp_flower_table_params); - if (err) + if (err) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot insert flow into tables for offloads"); goto err_release_metadata; + } err = nfp_flower_xmit_flow(app, flow_pay, NFP_FLOWER_CMSG_TYPE_FLOW_ADD); @@ -1024,19 +1125,23 @@ nfp_flower_del_linked_merge_flows(struct nfp_app *app, */ static int nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { struct nfp_flower_priv *priv = app->priv; + struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *nfp_flow; struct nfp_port *port = NULL; int err; + extack = flow->common.extack; if (nfp_netdev_is_nfp_repr(netdev)) port = nfp_port_from_netdev(netdev); nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); - if (!nfp_flow) + if (!nfp_flow) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot remove flow that does not exist"); return -ENOENT; + } err = nfp_modify_flow_metadata(app, nfp_flow); if (err) @@ -1127,15 +1232,19 @@ nfp_flower_update_merge_stats(struct nfp_app *app, */ static int nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, - struct tc_cls_flower_offload *flow) + struct flow_cls_offload *flow) { struct nfp_flower_priv *priv = app->priv; + struct netlink_ext_ack *extack = NULL; struct nfp_fl_payload *nfp_flow; u32 ctx_id; + extack = flow->common.extack; nfp_flow = nfp_flower_search_fl_table(app, flow->cookie, netdev); - if (!nfp_flow) + if (!nfp_flow) { + NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot dump stats for flow that does not exist"); return -EINVAL; + } ctx_id = be32_to_cpu(nfp_flow->meta.host_ctx_id); @@ -1156,17 +1265,17 @@ nfp_flower_get_stats(struct nfp_app *app, struct net_device *netdev, static int nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev, - struct tc_cls_flower_offload *flower) + struct flow_cls_offload *flower) { if (!eth_proto_is_802_3(flower->common.protocol)) return -EOPNOTSUPP; switch (flower->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return nfp_flower_add_offload(app, netdev, flower); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return nfp_flower_del_offload(app, netdev, flower); - case TC_CLSFLOWER_STATS: + case FLOW_CLS_STATS: return nfp_flower_get_stats(app, netdev, flower); default: return -EOPNOTSUPP; @@ -1193,27 +1302,45 @@ static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type, } } +static LIST_HEAD(nfp_block_cb_list); + static int nfp_flower_setup_tc_block(struct net_device *netdev, - struct tc_block_offload *f) + struct flow_block_offload *f) { struct nfp_repr *repr = netdev_priv(netdev); struct nfp_flower_repr_priv *repr_priv; + struct flow_block_cb *block_cb; - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; repr_priv = repr->app_priv; - repr_priv->block_shared = tcf_block_shared(f->block); + repr_priv->block_shared = f->block_shared; + f->driver_block_list = &nfp_block_cb_list; switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, - nfp_flower_setup_tc_block_cb, - repr, repr, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, - nfp_flower_setup_tc_block_cb, - repr); + case FLOW_BLOCK_BIND: + if (flow_block_cb_is_busy(nfp_flower_setup_tc_block_cb, repr, + &nfp_block_cb_list)) + return -EBUSY; + + block_cb = flow_block_cb_alloc(f->net, + nfp_flower_setup_tc_block_cb, + repr, repr, NULL); + if (IS_ERR(block_cb)) + return PTR_ERR(block_cb); + + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &nfp_block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: + block_cb = flow_block_cb_lookup(f, nfp_flower_setup_tc_block_cb, + repr); + if (!block_cb) + return -ENOENT; + + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); return 0; default: return -EOPNOTSUPP; @@ -1258,7 +1385,7 @@ static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { struct nfp_flower_indr_block_cb_priv *priv = cb_priv; - struct tc_cls_flower_offload *flower = type_data; + struct flow_cls_offload *flower = type_data; if (flower->common.chain_index) return -EOPNOTSUPP; @@ -1272,21 +1399,29 @@ static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, } } +static void nfp_flower_setup_indr_tc_release(void *cb_priv) +{ + struct nfp_flower_indr_block_cb_priv *priv = cb_priv; + + list_del(&priv->list); + kfree(priv); +} + static int nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, - struct tc_block_offload *f) + struct flow_block_offload *f) { struct nfp_flower_indr_block_cb_priv *cb_priv; struct nfp_flower_priv *priv = app->priv; - int err; + struct flow_block_cb *block_cb; - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS && - !(f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS && + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS && + !(f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS && nfp_flower_internal_port_can_offload(app, netdev))) return -EOPNOTSUPP; switch (f->command) { - case TC_BLOCK_BIND: + case FLOW_BLOCK_BIND: cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL); if (!cb_priv) return -ENOMEM; @@ -1295,26 +1430,32 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app, cb_priv->app = app; list_add(&cb_priv->list, &priv->indr_block_cb_priv); - err = tcf_block_cb_register(f->block, - nfp_flower_setup_indr_block_cb, - cb_priv, cb_priv, f->extack); - if (err) { + block_cb = flow_block_cb_alloc(f->net, + nfp_flower_setup_indr_block_cb, + cb_priv, cb_priv, + nfp_flower_setup_indr_tc_release); + if (IS_ERR(block_cb)) { list_del(&cb_priv->list); kfree(cb_priv); + return PTR_ERR(block_cb); } - return err; - case TC_BLOCK_UNBIND: + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &nfp_block_cb_list); + return 0; + case FLOW_BLOCK_UNBIND: cb_priv = nfp_flower_indr_block_cb_priv_lookup(app, netdev); if (!cb_priv) return -ENOENT; - tcf_block_cb_unregister(f->block, - nfp_flower_setup_indr_block_cb, - cb_priv); - list_del(&cb_priv->list); - kfree(cb_priv); + block_cb = flow_block_cb_lookup(f, + nfp_flower_setup_indr_block_cb, + cb_priv); + if (!block_cb) + return -ENOENT; + flow_block_cb_remove(block_cb, f); + list_del(&block_cb->driver_list); return 0; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 8c67505865a4..a7a80f4b722a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -162,8 +162,7 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb) } pay_len = nfp_flower_cmsg_get_data_len(skb); - if (pay_len != sizeof(struct nfp_tun_active_tuns) + - sizeof(struct route_ip_info) * count) { + if (pay_len != struct_size(payload, tun_info, count)) { nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n"); return; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 948d1a4b4643..60e57f08de80 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -596,6 +596,10 @@ static int nfp_pci_probe(struct pci_dev *pdev, struct nfp_pf *pf; int err; + if (pdev->vendor == PCI_VENDOR_ID_NETRONOME && + pdev->device == PCI_DEVICE_ID_NETRONOME_NFP6000_VF) + dev_warn(&pdev->dev, "Binding NFP VF device to the NFP PF driver, the VF driver is called 'nfp_netvf'\n"); + err = pci_enable_device(pdev); if (err < 0) return err; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index df9aff2684ed..5d6c3738b494 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -12,11 +12,14 @@ #ifndef _NFP_NET_H_ #define _NFP_NET_H_ +#include <linux/atomic.h> #include <linux/interrupt.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <linux/io-64-nonatomic-hi-lo.h> +#include <linux/semaphore.h> +#include <linux/workqueue.h> #include <net/xdp.h> #include "nfp_net_ctrl.h" @@ -238,7 +241,7 @@ struct nfp_net_tx_ring { #define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) #define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) #define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) -#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_DECRYPTED cpu_to_le16(BIT(8)) #define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) #define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) #define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) @@ -365,6 +368,7 @@ struct nfp_net_rx_ring { * @hw_csum_rx_inner_ok: Counter of packets where the inner HW checksum was OK * @hw_csum_rx_complete: Counter of packets with CHECKSUM_COMPLETE reported * @hw_csum_rx_error: Counter of packets with bad checksums + * @hw_tls_rx: Number of packets with TLS decrypted by hardware * @tx_sync: Seqlock for atomic updates of TX stats * @tx_pkts: Number of Transmitted packets * @tx_bytes: Number of Transmitted bytes @@ -372,6 +376,11 @@ struct nfp_net_rx_ring { * @hw_csum_tx_inner: Counter of inner TX checksum offload requests * @tx_gather: Counter of packets with Gather DMA * @tx_lso: Counter of LSO packets sent + * @hw_tls_tx: Counter of TLS packets sent with crypto offloaded to HW + * @tls_tx_fallback: Counter of TLS packets sent which had to be encrypted + * by the fallback path because packets came out of order + * @tls_tx_no_fallback: Counter of TLS packets not sent because the fallback + * path could not encrypt them * @tx_errors: How many TX errors were encountered * @tx_busy: How often was TX busy (no space)? * @rx_replace_buf_alloc_fail: Counter of RX buffer allocation failures @@ -392,7 +401,7 @@ struct nfp_net_r_vector { struct { struct tasklet_struct tasklet; struct sk_buff_head queue; - struct spinlock lock; + spinlock_t lock; }; }; @@ -408,22 +417,30 @@ struct nfp_net_r_vector { u64 hw_csum_rx_ok; u64 hw_csum_rx_inner_ok; u64 hw_csum_rx_complete; + u64 hw_tls_rx; + + u64 hw_csum_rx_error; + u64 rx_replace_buf_alloc_fail; struct nfp_net_tx_ring *xdp_ring; struct u64_stats_sync tx_sync; u64 tx_pkts; u64 tx_bytes; - u64 hw_csum_tx; + + u64 ____cacheline_aligned_in_smp hw_csum_tx; u64 hw_csum_tx_inner; u64 tx_gather; u64 tx_lso; + u64 hw_tls_tx; - u64 hw_csum_rx_error; - u64 rx_replace_buf_alloc_fail; + u64 tls_tx_fallback; + u64 tls_tx_no_fallback; u64 tx_errors; u64 tx_busy; + /* Cold data follows */ + u32 irq_vector; irq_handler_t handler; char name[IFNAMSIZ + 8]; @@ -458,6 +475,7 @@ struct nfp_stat_pair { * @netdev: Backpointer to net_device structure * @is_vf: Is the driver attached to a VF? * @chained_metadata_format: Firemware will use new metadata format + * @ktls_tx: Is kTLS TX enabled? * @rx_dma_dir: Mapping direction for RX buffers * @rx_dma_off: Offset at which DMA packets (for XDP headroom) * @rx_offset: Offset in the RX buffers where packet data starts @@ -482,6 +500,7 @@ struct nfp_net_dp { u8 is_vf:1; u8 chained_metadata_format:1; + u8 ktls_tx:1; u8 rx_dma_dir; u8 rx_offset; @@ -549,7 +568,7 @@ struct nfp_net_dp { * @reconfig_timer: Timer for async reading of reconfig results * @reconfig_in_progress_update: Update FW is processing now (debug only) * @bar_lock: vNIC config BAR access lock, protects: update, - * mailbox area + * mailbox area, crypto TLV * @link_up: Is the link up? * @link_status_lock: Protects @link_* and ensures atomicity with BAR reading * @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter @@ -562,6 +581,18 @@ struct nfp_net_dp { * @tx_bar: Pointer to mapped TX queues * @rx_bar: Pointer to mapped FL/RX queues * @tlv_caps: Parsed TLV capabilities + * @ktls_tx_conn_cnt: Number of offloaded kTLS TX connections + * @ktls_rx_conn_cnt: Number of offloaded kTLS RX connections + * @ktls_conn_id_gen: Trivial generator for kTLS connection ids (for TX) + * @ktls_no_space: Counter of firmware rejecting kTLS connection due to + * lack of space + * @mbox_cmsg: Common Control Message via vNIC mailbox state + * @mbox_cmsg.queue: CCM mbox queue of pending messages + * @mbox_cmsg.wq: CCM mbox wait queue of waiting processes + * @mbox_cmsg.workq: CCM mbox work queue for @wait_work and @runq_work + * @mbox_cmsg.wait_work: CCM mbox posted msg reconfig wait work + * @mbox_cmsg.runq_work: CCM mbox posted msg queue runner work + * @mbox_cmsg.tag: CCM mbox message tag allocator * @debugfs_dir: Device directory in debugfs * @vnic_list: Entry on device vNIC list * @pdev: Backpointer to PCI device @@ -620,7 +651,7 @@ struct nfp_net { struct timer_list reconfig_timer; u32 reconfig_in_progress_update; - struct mutex bar_lock; + struct semaphore bar_lock; u32 rx_coalesce_usecs; u32 rx_coalesce_max_frames; @@ -637,6 +668,22 @@ struct nfp_net { struct nfp_net_tlv_caps tlv_caps; + unsigned int ktls_tx_conn_cnt; + unsigned int ktls_rx_conn_cnt; + + atomic64_t ktls_conn_id_gen; + + atomic_t ktls_no_space; + + struct { + struct sk_buff_head queue; + wait_queue_head_t wq; + struct workqueue_struct *workq; + struct work_struct wait_work; + struct work_struct runq_work; + u16 tag; + } mbox_cmsg; + struct dentry *debugfs_dir; struct list_head vnic_list; @@ -848,12 +895,17 @@ static inline void nfp_ctrl_unlock(struct nfp_net *nn) static inline void nn_ctrl_bar_lock(struct nfp_net *nn) { - mutex_lock(&nn->bar_lock); + down(&nn->bar_lock); +} + +static inline bool nn_ctrl_bar_trylock(struct nfp_net *nn) +{ + return !down_trylock(&nn->bar_lock); } static inline void nn_ctrl_bar_unlock(struct nfp_net *nn) { - mutex_unlock(&nn->bar_lock); + up(&nn->bar_lock); } /* Globals */ @@ -883,6 +935,7 @@ void nfp_ctrl_close(struct nfp_net *nn); void nfp_net_set_ethtool_ops(struct net_device *netdev); void nfp_net_info(struct nfp_net *nn); +int __nfp_net_reconfig(struct nfp_net *nn, u32 update); int nfp_net_reconfig(struct nfp_net *nn, u32 update); unsigned int nfp_net_rss_key_sz(struct nfp_net *nn); void nfp_net_rss_write_itbl(struct nfp_net *nn); @@ -891,6 +944,8 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn); int nfp_net_mbox_lock(struct nfp_net *nn, unsigned int data_size); int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd); int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd); +void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 update); +int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn); unsigned int nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 36a3bd30cfd9..9903805717da 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -23,7 +23,6 @@ #include <linux/interrupt.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/lockdep.h> #include <linux/mm.h> #include <linux/overflow.h> #include <linux/page_ref.h> @@ -37,14 +36,17 @@ #include <linux/vmalloc.h> #include <linux/ktime.h> +#include <net/tls.h> #include <net/vxlan.h> #include "nfpcore/nfp_nsp.h" +#include "ccm.h" #include "nfp_app.h" #include "nfp_net_ctrl.h" #include "nfp_net.h" #include "nfp_net_sriov.h" #include "nfp_port.h" +#include "crypto/crypto.h" /** * nfp_net_get_fw_version() - Read and parse the FW version @@ -228,6 +230,7 @@ static void nfp_net_reconfig_sync_enter(struct nfp_net *nn) spin_lock_bh(&nn->reconfig_lock); + WARN_ON(nn->reconfig_sync_present); nn->reconfig_sync_present = true; if (nn->reconfig_timer_active) { @@ -271,12 +274,10 @@ static void nfp_net_reconfig_wait_posted(struct nfp_net *nn) * * Return: Negative errno on error, 0 on success */ -static int __nfp_net_reconfig(struct nfp_net *nn, u32 update) +int __nfp_net_reconfig(struct nfp_net *nn, u32 update) { int ret; - lockdep_assert_held(&nn->bar_lock); - nfp_net_reconfig_sync_enter(nn); nfp_net_reconfig_start(nn, update); @@ -331,7 +332,6 @@ int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd) u32 mbox = nn->tlv_caps.mbox_off; int ret; - lockdep_assert_held(&nn->bar_lock); nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd); ret = __nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MBOX); @@ -343,6 +343,24 @@ int nfp_net_mbox_reconfig(struct nfp_net *nn, u32 mbox_cmd) return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET); } +void nfp_net_mbox_reconfig_post(struct nfp_net *nn, u32 mbox_cmd) +{ + u32 mbox = nn->tlv_caps.mbox_off; + + nn_writeq(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_CMD, mbox_cmd); + + nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_MBOX); +} + +int nfp_net_mbox_reconfig_wait_posted(struct nfp_net *nn) +{ + u32 mbox = nn->tlv_caps.mbox_off; + + nfp_net_reconfig_wait_posted(nn); + + return -nn_readl(nn, mbox + NFP_NET_CFG_MBOX_SIMPLE_RET); +} + int nfp_net_mbox_reconfig_and_unlock(struct nfp_net *nn, u32 mbox_cmd) { int ret; @@ -804,6 +822,99 @@ static void nfp_net_tx_csum(struct nfp_net_dp *dp, u64_stats_update_end(&r_vec->tx_sync); } +static struct sk_buff * +nfp_net_tls_tx(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, + struct sk_buff *skb, u64 *tls_handle, int *nr_frags) +{ +#ifdef CONFIG_TLS_DEVICE + struct nfp_net_tls_offload_ctx *ntls; + struct sk_buff *nskb; + bool resync_pending; + u32 datalen, seq; + + if (likely(!dp->ktls_tx)) + return skb; + if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk)) + return skb; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + seq = ntohl(tcp_hdr(skb)->seq); + ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + resync_pending = tls_offload_tx_resync_pending(skb->sk); + if (unlikely(resync_pending || ntls->next_seq != seq)) { + /* Pure ACK out of order already */ + if (!datalen) + return skb; + + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tls_tx_fallback++; + u64_stats_update_end(&r_vec->tx_sync); + + nskb = tls_encrypt_skb(skb); + if (!nskb) { + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tls_tx_no_fallback++; + u64_stats_update_end(&r_vec->tx_sync); + return NULL; + } + /* encryption wasn't necessary */ + if (nskb == skb) + return skb; + /* we don't re-check ring space */ + if (unlikely(skb_is_nonlinear(nskb))) { + nn_dp_warn(dp, "tls_encrypt_skb() produced fragmented frame\n"); + u64_stats_update_begin(&r_vec->tx_sync); + r_vec->tx_errors++; + u64_stats_update_end(&r_vec->tx_sync); + dev_kfree_skb_any(nskb); + return NULL; + } + + /* jump forward, a TX may have gotten lost, need to sync TX */ + if (!resync_pending && seq - ntls->next_seq < U32_MAX / 4) + tls_offload_tx_resync_request(nskb->sk); + + *nr_frags = 0; + return nskb; + } + + if (datalen) { + u64_stats_update_begin(&r_vec->tx_sync); + if (!skb_is_gso(skb)) + r_vec->hw_tls_tx++; + else + r_vec->hw_tls_tx += skb_shinfo(skb)->gso_segs; + u64_stats_update_end(&r_vec->tx_sync); + } + + memcpy(tls_handle, ntls->fw_handle, sizeof(ntls->fw_handle)); + ntls->next_seq += datalen; +#endif + return skb; +} + +static void nfp_net_tls_tx_undo(struct sk_buff *skb, u64 tls_handle) +{ +#ifdef CONFIG_TLS_DEVICE + struct nfp_net_tls_offload_ctx *ntls; + u32 datalen, seq; + + if (!tls_handle) + return; + if (WARN_ON_ONCE(!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))) + return; + + datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb)); + seq = ntohl(tcp_hdr(skb)->seq); + + ntls = tls_driver_ctx(skb->sk, TLS_OFFLOAD_CTX_DIR_TX); + if (ntls->next_seq == seq + datalen) + ntls->next_seq = seq; + else + WARN_ON_ONCE(1); +#endif +} + static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) { wmb(); @@ -811,24 +922,47 @@ static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring) tx_ring->wr_ptr_add = 0; } -static int nfp_net_prep_port_id(struct sk_buff *skb) +static int nfp_net_prep_tx_meta(struct sk_buff *skb, u64 tls_handle) { struct metadata_dst *md_dst = skb_metadata_dst(skb); unsigned char *data; + u32 meta_id = 0; + int md_bytes; - if (likely(!md_dst)) - return 0; - if (unlikely(md_dst->type != METADATA_HW_PORT_MUX)) + if (likely(!md_dst && !tls_handle)) return 0; + if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) { + if (!tls_handle) + return 0; + md_dst = NULL; + } + + md_bytes = 4 + !!md_dst * 4 + !!tls_handle * 8; - if (unlikely(skb_cow_head(skb, 8))) + if (unlikely(skb_cow_head(skb, md_bytes))) return -ENOMEM; - data = skb_push(skb, 8); - put_unaligned_be32(NFP_NET_META_PORTID, data); - put_unaligned_be32(md_dst->u.port_info.port_id, data + 4); + meta_id = 0; + data = skb_push(skb, md_bytes) + md_bytes; + if (md_dst) { + data -= 4; + put_unaligned_be32(md_dst->u.port_info.port_id, data); + meta_id = NFP_NET_META_PORTID; + } + if (tls_handle) { + /* conn handle is opaque, we just use u64 to be able to quickly + * compare it to zero + */ + data -= 8; + memcpy(data, &tls_handle, sizeof(tls_handle)); + meta_id <<= NFP_NET_META_FIELD_SIZE; + meta_id |= NFP_NET_META_CONN_HANDLE; + } + + data -= 4; + put_unaligned_be32(meta_id, data); - return 8; + return md_bytes; } /** @@ -851,6 +985,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) struct nfp_net_dp *dp; dma_addr_t dma_addr; unsigned int fsize; + u64 tls_handle = 0; u16 qidx; dp = &nn->dp; @@ -872,18 +1007,21 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } - md_bytes = nfp_net_prep_port_id(skb); - if (unlikely(md_bytes < 0)) { + skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags); + if (unlikely(!skb)) { nfp_net_tx_xmit_more_flush(tx_ring); - dev_kfree_skb_any(skb); return NETDEV_TX_OK; } + md_bytes = nfp_net_prep_tx_meta(skb, tls_handle); + if (unlikely(md_bytes < 0)) + goto err_flush; + /* Start with the head skbuf */ dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(dp->dev, dma_addr)) - goto err_free; + goto err_dma_err; wr_idx = D_IDX(tx_ring, tx_ring->wr_p); @@ -979,12 +1117,14 @@ err_unmap: tx_ring->txbufs[wr_idx].skb = NULL; tx_ring->txbufs[wr_idx].dma_addr = 0; tx_ring->txbufs[wr_idx].fidx = -2; -err_free: +err_dma_err: nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); +err_flush: nfp_net_tx_xmit_more_flush(tx_ring); u64_stats_update_begin(&r_vec->tx_sync); r_vec->tx_errors++; u64_stats_update_end(&r_vec->tx_sync); + nfp_net_tls_tx_undo(skb, tls_handle); dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -1857,6 +1997,15 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb); +#ifdef CONFIG_TLS_DEVICE + if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) { + skb->decrypted = true; + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->hw_tls_rx++; + u64_stats_update_end(&r_vec->rx_sync); + } +#endif + if (rxd->rxd.flags & PCIE_DESC_RX_VLAN) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(rxd->rxd.vlan)); @@ -3705,7 +3854,7 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT; nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; - mutex_init(&nn->bar_lock); + sema_init(&nn->bar_lock, 1); spin_lock_init(&nn->reconfig_lock); spin_lock_init(&nn->link_status_lock); @@ -3717,6 +3866,10 @@ nfp_net_alloc(struct pci_dev *pdev, void __iomem *ctrl_bar, bool needs_netdev, if (err) goto err_free_nn; + err = nfp_ccm_mbox_alloc(nn); + if (err) + goto err_free_nn; + return nn; err_free_nn: @@ -3734,8 +3887,7 @@ err_free_nn: void nfp_net_free(struct nfp_net *nn) { WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted); - - mutex_destroy(&nn->bar_lock); + nfp_ccm_mbox_free(nn); if (nn->dp.netdev) free_netdev(nn->dp.netdev); @@ -4010,14 +4162,27 @@ int nfp_net_init(struct nfp_net *nn) if (err) return err; - if (nn->dp.netdev) + if (nn->dp.netdev) { nfp_net_netdev_init(nn); + err = nfp_ccm_mbox_init(nn); + if (err) + return err; + + err = nfp_net_tls_init(nn); + if (err) + goto err_clean_mbox; + } + nfp_net_vecs_init(nn); if (!nn->dp.netdev) return 0; return register_netdev(nn->dp.netdev); + +err_clean_mbox: + nfp_ccm_mbox_clean(nn); + return err; } /** @@ -4030,5 +4195,6 @@ void nfp_net_clean(struct nfp_net *nn) return; unregister_netdev(nn->dp.netdev); + nfp_ccm_mbox_clean(nn); nfp_net_reconfig_wait_posted(nn); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c index 6d5213b5bcb0..d835c14b7257 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c @@ -99,6 +99,21 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, caps->repr_cap = readl(data); break; + case NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES: + if (length >= 4) + caps->mbox_cmsg_types = readl(data); + break; + case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS: + if (length < 32) { + dev_err(dev, + "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n", + length, offset); + return -EINVAL; + } + + caps->crypto_ops = readl(data); + caps->crypto_enable_off = data - ctrl_mem + 16; + break; default: if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr)) break; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 25919e338071..ee6b24e4eacd 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -44,6 +44,7 @@ #define NFP_NET_META_MARK 2 #define NFP_NET_META_PORTID 5 #define NFP_NET_META_CSUM 6 /* checksum complete type */ +#define NFP_NET_META_CONN_HANDLE 7 #define NFP_META_PORT_ID_CTRL ~0U @@ -135,6 +136,7 @@ #define NFP_NET_CFG_UPDATE_MACADDR (0x1 << 11) /* MAC address change */ #define NFP_NET_CFG_UPDATE_MBOX (0x1 << 12) /* Mailbox update */ #define NFP_NET_CFG_UPDATE_VF (0x1 << 13) /* VF settings change */ +#define NFP_NET_CFG_UPDATE_CRYPTO (0x1 << 14) /* Crypto on/off */ #define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ #define NFP_NET_CFG_TXRS_ENABLE 0x0008 #define NFP_NET_CFG_RXRS_ENABLE 0x0010 @@ -394,6 +396,7 @@ #define NFP_NET_CFG_MBOX_CMD_CTAG_FILTER_KILL 2 #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5 +#define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6 /** * VLAN filtering using general use mailbox @@ -466,6 +469,16 @@ * %NFP_NET_CFG_TLV_TYPE_REPR_CAP: * Single word, equivalent of %NFP_NET_CFG_CAP for representors, features which * can be used on representors. + * + * %NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES: + * Variable, bitmap of control message types supported by the mailbox handler. + * Bit 0 corresponds to message type 0, bit 1 to 1, etc. Control messages are + * encapsulated into simple TLVs, with an end TLV and written to the Mailbox. + * + * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS: + * 8 words, bitmaps of supported and enabled crypto operations. + * First 16B (4 words) contains a bitmap of supported crypto operations, + * and next 16B contain the enabled operations. */ #define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0 #define NFP_NET_CFG_TLV_TYPE_RESERVED 1 @@ -475,6 +488,8 @@ #define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL0 5 #define NFP_NET_CFG_TLV_TYPE_EXPERIMENTAL1 6 #define NFP_NET_CFG_TLV_TYPE_REPR_CAP 7 +#define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES 10 +#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS 11 /* see crypto/fw.h */ struct device; @@ -484,12 +499,18 @@ struct device; * @mbox_off: vNIC mailbox area offset * @mbox_len: vNIC mailbox area length * @repr_cap: capabilities for representors + * @mbox_cmsg_types: cmsgs which can be passed through the mailbox + * @crypto_ops: supported crypto operations + * @crypto_enable_off: offset of crypto ops enable region */ struct nfp_net_tlv_caps { u32 me_freq_mhz; unsigned int mbox_off; unsigned int mbox_len; u32 repr_cap; + u32 mbox_cmsg_types; + u32 crypto_ops; + unsigned int crypto_enable_off; }; int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 851e31e0ba8e..d9cbe84ac6ad 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -150,8 +150,9 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = { #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) #define NN_ET_SWITCH_STATS_LEN 9 -#define NN_RVEC_GATHER_STATS 9 +#define NN_RVEC_GATHER_STATS 13 #define NN_RVEC_PER_Q_STATS 3 +#define NN_CTRL_PATH_STATS 1 #define SFP_SFF_REV_COMPLIANCE 1 @@ -423,7 +424,8 @@ static unsigned int nfp_vnic_get_sw_stats_count(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); - return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS; + return NN_RVEC_GATHER_STATS + nn->max_r_vecs * NN_RVEC_PER_Q_STATS + + NN_CTRL_PATH_STATS; } static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) @@ -442,10 +444,16 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data) data = nfp_pr_et(data, "hw_rx_csum_complete"); data = nfp_pr_et(data, "hw_rx_csum_err"); data = nfp_pr_et(data, "rx_replace_buf_alloc_fail"); + data = nfp_pr_et(data, "rx_tls_decrypted"); data = nfp_pr_et(data, "hw_tx_csum"); data = nfp_pr_et(data, "hw_tx_inner_csum"); data = nfp_pr_et(data, "tx_gather"); data = nfp_pr_et(data, "tx_lso"); + data = nfp_pr_et(data, "tx_tls_encrypted"); + data = nfp_pr_et(data, "tx_tls_ooo"); + data = nfp_pr_et(data, "tx_tls_drop_no_sync_data"); + + data = nfp_pr_et(data, "hw_tls_no_space"); return data; } @@ -468,16 +476,20 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) tmp[2] = nn->r_vecs[i].hw_csum_rx_complete; tmp[3] = nn->r_vecs[i].hw_csum_rx_error; tmp[4] = nn->r_vecs[i].rx_replace_buf_alloc_fail; + tmp[5] = nn->r_vecs[i].hw_tls_rx; } while (u64_stats_fetch_retry(&nn->r_vecs[i].rx_sync, start)); do { start = u64_stats_fetch_begin(&nn->r_vecs[i].tx_sync); data[1] = nn->r_vecs[i].tx_pkts; data[2] = nn->r_vecs[i].tx_busy; - tmp[5] = nn->r_vecs[i].hw_csum_tx; - tmp[6] = nn->r_vecs[i].hw_csum_tx_inner; - tmp[7] = nn->r_vecs[i].tx_gather; - tmp[8] = nn->r_vecs[i].tx_lso; + tmp[6] = nn->r_vecs[i].hw_csum_tx; + tmp[7] = nn->r_vecs[i].hw_csum_tx_inner; + tmp[8] = nn->r_vecs[i].tx_gather; + tmp[9] = nn->r_vecs[i].tx_lso; + tmp[10] = nn->r_vecs[i].hw_tls_tx; + tmp[11] = nn->r_vecs[i].tls_tx_fallback; + tmp[12] = nn->r_vecs[i].tls_tx_no_fallback; } while (u64_stats_fetch_retry(&nn->r_vecs[i].tx_sync, start)); data += NN_RVEC_PER_Q_STATS; @@ -489,6 +501,8 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data) for (j = 0; j < NN_RVEC_GATHER_STATS; j++) *data++ = gathered_stats[j]; + *data++ = atomic_read(&nn->ktls_no_space); + return data; } diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c index 42cf4fd875ea..9a08623c325d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_nsp.c @@ -241,11 +241,16 @@ static int nfp_nsp_check(struct nfp_nsp *state) state->ver.major = FIELD_GET(NSP_STATUS_MAJOR, reg); state->ver.minor = FIELD_GET(NSP_STATUS_MINOR, reg); - if (state->ver.major != NSP_MAJOR || state->ver.minor < NSP_MINOR) { + if (state->ver.major != NSP_MAJOR) { nfp_err(cpp, "Unsupported ABI %hu.%hu\n", state->ver.major, state->ver.minor); return -EINVAL; } + if (state->ver.minor < NSP_MINOR) { + nfp_err(cpp, "ABI too old to support NIC operation (%u.%hu < %u.%u), please update the management FW on the flash\n", + NSP_MAJOR, state->ver.minor, NSP_MAJOR, NSP_MINOR); + return -EINVAL; + } if (reg & NSP_STATUS_BUSY) { nfp_err(cpp, "Service processor busy!\n"); diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 96f7a9818294..0b384f97d2fd 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -990,7 +990,7 @@ static void nixge_ethtools_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *ed) { strlcpy(ed->driver, "nixge", sizeof(ed->driver)); - strlcpy(ed->bus_info, "platform", sizeof(ed->driver)); + strlcpy(ed->bus_info, "platform", sizeof(ed->bus_info)); } static int nixge_ethtools_get_coalesce(struct net_device *ndev, diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index bf5a7bca0298..be6660128b55 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1042,7 +1042,6 @@ static int pasemi_mac_phy_init(struct net_device *dev) dn = pci_device_to_OF_node(mac->pdev); phy_dn = of_parse_phandle(dn, "phy-handle", 0); - of_node_put(phy_dn); mac->link = 0; mac->speed = 0; @@ -1051,6 +1050,7 @@ static int pasemi_mac_phy_init(struct net_device *dev) phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); + of_node_put(phy_dn); if (!phydev) { printk(KERN_ERR "%s: Could not attach to phy\n", dev->name); return -ENODEV; diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index fdbb3ce00e20..a391cf6ee4b2 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -87,6 +87,7 @@ config QED depends on PCI select ZLIB_INFLATE select CRC8 + select NET_DEVLINK ---help--- This enables the support for ... diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 84cb62434556..58e2eaf77014 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -3248,6 +3248,7 @@ netxen_config_indev_addr(struct netxen_adapter *adapter, struct net_device *dev, unsigned long event) { struct in_device *indev; + struct in_ifaddr *ifa; if (!netxen_destip_supported(adapter)) return; @@ -3256,7 +3257,8 @@ netxen_config_indev_addr(struct netxen_adapter *adapter, if (!indev) return; - for_ifa(indev) { + rcu_read_lock(); + in_dev_for_each_ifa_rcu(ifa, indev) { switch (event) { case NETDEV_UP: netxen_list_config_ip(adapter, ifa, NX_IP_UP); @@ -3267,8 +3269,8 @@ netxen_config_indev_addr(struct netxen_adapter *adapter, default: break; } - } endfor_ifa(indev); - + } + rcu_read_unlock(); in_dev_put(indev); } diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index c5e96ce20f59..89fe091c958d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -140,6 +140,7 @@ struct qed_cxt_mngr; struct qed_sb_sp_info; struct qed_ll2_info; struct qed_mcp_info; +struct qed_llh_info; struct qed_rt_data { u32 *init_val; @@ -741,6 +742,7 @@ struct qed_dev { #define QED_DEV_ID_MASK 0xff00 #define QED_DEV_ID_MASK_BB 0x1600 #define QED_DEV_ID_MASK_AH 0x8000 +#define QED_IS_E4(dev) (QED_IS_BB(dev) || QED_IS_AH(dev)) u16 chip_num; #define CHIP_NUM_MASK 0xffff @@ -801,6 +803,11 @@ struct qed_dev { u8 num_hwfns; struct qed_hwfn hwfns[MAX_HWFNS_PER_DEVICE]; + /* Engine affinity */ + u8 l2_affin_hint; + u8 fir_affin; + u8 iwarp_affin; + /* SRIOV */ struct qed_hw_sriov_info *p_iov_info; #define IS_QED_SRIOV(cdev) (!!(cdev)->p_iov_info) @@ -815,6 +822,10 @@ struct qed_dev { /* Recovery */ bool recov_in_prog; + /* LLH info */ + u8 ppfid_bitmap; + struct qed_llh_info *p_llh_info; + /* Linux specific here */ struct qede_dev *edev; struct pci_dev *pdev; @@ -852,6 +863,9 @@ struct qed_dev { u32 rdma_max_inline; u32 rdma_max_srq_sge; u16 tunn_feature_mask; + + struct devlink *dl; + bool iwarp_cmt; }; #define NUM_OF_VFS(dev) (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \ @@ -904,6 +918,14 @@ void qed_set_fw_mac_addr(__le16 *fw_msb, __le16 *fw_mid, __le16 *fw_lsb, u8 *mac); #define QED_LEADING_HWFN(dev) (&dev->hwfns[0]) +#define QED_IS_CMT(dev) ((dev)->num_hwfns > 1) +/* Macros for getting the engine-affinitized hwfn (FIR: fcoe,iscsi,roce) */ +#define QED_FIR_AFFIN_HWFN(dev) (&(dev)->hwfns[dev->fir_affin]) +#define QED_IWARP_AFFIN_HWFN(dev) (&(dev)->hwfns[dev->iwarp_affin]) +#define QED_AFFIN_HWFN(dev) \ + (QED_IS_IWARP_PERSONALITY(QED_LEADING_HWFN(dev)) ? \ + QED_IWARP_AFFIN_HWFN(dev) : QED_FIR_AFFIN_HWFN(dev)) +#define QED_AFFIN_HWFN_IDX(dev) (IS_LEAD_HWFN(QED_AFFIN_HWFN(dev)) ? 0 : 1) /* Flags for indication of required queues */ #define PQ_FLAGS_RLS (BIT(0)) @@ -923,8 +945,6 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf); u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc); u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc); -#define QED_LEADING_HWFN(dev) (&dev->hwfns[0]) - /* doorbell recovery mechanism */ void qed_db_recovery_dp(struct qed_hwfn *p_hwfn); void qed_db_recovery_execute(struct qed_hwfn *p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index e61d1d905415..8e1bdf58b9e7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -2351,7 +2351,8 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn, /* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */ qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry, - reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), 0); + reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), + NULL); if (elem_type == QED_ELEM_CXT) { u32 last_cid_allocated = (1 + (iid / elems_per_p)) * @@ -2457,7 +2458,7 @@ qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn, (u64) (uintptr_t) &ilt_hw_entry, reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), - 0); + NULL); } qed_ptt_release(p_hwfn, p_ptt); diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index ab8cacbdee3e..5ea6c4fc6050 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -2534,7 +2534,7 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn, (len >= s_platform_defs[dev_data->platform_id].dmae_thresh || wide_bus)) { if (!qed_dmae_grc2host(p_hwfn, p_ptt, DWORDS_TO_BYTES(addr), - (u64)(uintptr_t)(dump_buf), len, 0)) + (u64)(uintptr_t)(dump_buf), len, NULL)) return len; dev_data->use_dmae = 0; DP_VERBOSE(p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index fccdb06fc5c5..a1ebc2b1ca0b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -361,6 +361,927 @@ void qed_db_recovery_execute(struct qed_hwfn *p_hwfn) /******************** Doorbell Recovery end ****************/ +/********************************** NIG LLH ***********************************/ + +enum qed_llh_filter_type { + QED_LLH_FILTER_TYPE_MAC, + QED_LLH_FILTER_TYPE_PROTOCOL, +}; + +struct qed_llh_mac_filter { + u8 addr[ETH_ALEN]; +}; + +struct qed_llh_protocol_filter { + enum qed_llh_prot_filter_type_t type; + u16 source_port_or_eth_type; + u16 dest_port; +}; + +union qed_llh_filter { + struct qed_llh_mac_filter mac; + struct qed_llh_protocol_filter protocol; +}; + +struct qed_llh_filter_info { + bool b_enabled; + u32 ref_cnt; + enum qed_llh_filter_type type; + union qed_llh_filter filter; +}; + +struct qed_llh_info { + /* Number of LLH filters banks */ + u8 num_ppfid; + +#define MAX_NUM_PPFID 8 + u8 ppfid_array[MAX_NUM_PPFID]; + + /* Array of filters arrays: + * "num_ppfid" elements of filters banks, where each is an array of + * "NIG_REG_LLH_FUNC_FILTER_EN_SIZE" filters. + */ + struct qed_llh_filter_info **pp_filters; +}; + +static void qed_llh_free(struct qed_dev *cdev) +{ + struct qed_llh_info *p_llh_info = cdev->p_llh_info; + u32 i; + + if (p_llh_info) { + if (p_llh_info->pp_filters) + for (i = 0; i < p_llh_info->num_ppfid; i++) + kfree(p_llh_info->pp_filters[i]); + + kfree(p_llh_info->pp_filters); + } + + kfree(p_llh_info); + cdev->p_llh_info = NULL; +} + +static int qed_llh_alloc(struct qed_dev *cdev) +{ + struct qed_llh_info *p_llh_info; + u32 size, i; + + p_llh_info = kzalloc(sizeof(*p_llh_info), GFP_KERNEL); + if (!p_llh_info) + return -ENOMEM; + cdev->p_llh_info = p_llh_info; + + for (i = 0; i < MAX_NUM_PPFID; i++) { + if (!(cdev->ppfid_bitmap & (0x1 << i))) + continue; + + p_llh_info->ppfid_array[p_llh_info->num_ppfid] = i; + DP_VERBOSE(cdev, QED_MSG_SP, "ppfid_array[%d] = %hhd\n", + p_llh_info->num_ppfid, i); + p_llh_info->num_ppfid++; + } + + size = p_llh_info->num_ppfid * sizeof(*p_llh_info->pp_filters); + p_llh_info->pp_filters = kzalloc(size, GFP_KERNEL); + if (!p_llh_info->pp_filters) + return -ENOMEM; + + size = NIG_REG_LLH_FUNC_FILTER_EN_SIZE * + sizeof(**p_llh_info->pp_filters); + for (i = 0; i < p_llh_info->num_ppfid; i++) { + p_llh_info->pp_filters[i] = kzalloc(size, GFP_KERNEL); + if (!p_llh_info->pp_filters[i]) + return -ENOMEM; + } + + return 0; +} + +static int qed_llh_shadow_sanity(struct qed_dev *cdev, + u8 ppfid, u8 filter_idx, const char *action) +{ + struct qed_llh_info *p_llh_info = cdev->p_llh_info; + + if (ppfid >= p_llh_info->num_ppfid) { + DP_NOTICE(cdev, + "LLH shadow [%s]: using ppfid %d while only %d ppfids are available\n", + action, ppfid, p_llh_info->num_ppfid); + return -EINVAL; + } + + if (filter_idx >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) { + DP_NOTICE(cdev, + "LLH shadow [%s]: using filter_idx %d while only %d filters are available\n", + action, filter_idx, NIG_REG_LLH_FUNC_FILTER_EN_SIZE); + return -EINVAL; + } + + return 0; +} + +#define QED_LLH_INVALID_FILTER_IDX 0xff + +static int +qed_llh_shadow_search_filter(struct qed_dev *cdev, + u8 ppfid, + union qed_llh_filter *p_filter, u8 *p_filter_idx) +{ + struct qed_llh_info *p_llh_info = cdev->p_llh_info; + struct qed_llh_filter_info *p_filters; + int rc; + u8 i; + + rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "search"); + if (rc) + return rc; + + *p_filter_idx = QED_LLH_INVALID_FILTER_IDX; + + p_filters = p_llh_info->pp_filters[ppfid]; + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + if (!memcmp(p_filter, &p_filters[i].filter, + sizeof(*p_filter))) { + *p_filter_idx = i; + break; + } + } + + return 0; +} + +static int +qed_llh_shadow_get_free_idx(struct qed_dev *cdev, u8 ppfid, u8 *p_filter_idx) +{ + struct qed_llh_info *p_llh_info = cdev->p_llh_info; + struct qed_llh_filter_info *p_filters; + int rc; + u8 i; + + rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "get_free_idx"); + if (rc) + return rc; + + *p_filter_idx = QED_LLH_INVALID_FILTER_IDX; + + p_filters = p_llh_info->pp_filters[ppfid]; + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + if (!p_filters[i].b_enabled) { + *p_filter_idx = i; + break; + } + } + + return 0; +} + +static int +__qed_llh_shadow_add_filter(struct qed_dev *cdev, + u8 ppfid, + u8 filter_idx, + enum qed_llh_filter_type type, + union qed_llh_filter *p_filter, u32 *p_ref_cnt) +{ + struct qed_llh_info *p_llh_info = cdev->p_llh_info; + struct qed_llh_filter_info *p_filters; + int rc; + + rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "add"); + if (rc) + return rc; + + p_filters = p_llh_info->pp_filters[ppfid]; + if (!p_filters[filter_idx].ref_cnt) { + p_filters[filter_idx].b_enabled = true; + p_filters[filter_idx].type = type; + memcpy(&p_filters[filter_idx].filter, p_filter, + sizeof(p_filters[filter_idx].filter)); + } + + *p_ref_cnt = ++p_filters[filter_idx].ref_cnt; + + return 0; +} + +static int +qed_llh_shadow_add_filter(struct qed_dev *cdev, + u8 ppfid, + enum qed_llh_filter_type type, + union qed_llh_filter *p_filter, + u8 *p_filter_idx, u32 *p_ref_cnt) +{ + int rc; + + /* Check if the same filter already exist */ + rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx); + if (rc) + return rc; + + /* Find a new entry in case of a new filter */ + if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) { + rc = qed_llh_shadow_get_free_idx(cdev, ppfid, p_filter_idx); + if (rc) + return rc; + } + + /* No free entry was found */ + if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) { + DP_NOTICE(cdev, + "Failed to find an empty LLH filter to utilize [ppfid %d]\n", + ppfid); + return -EINVAL; + } + + return __qed_llh_shadow_add_filter(cdev, ppfid, *p_filter_idx, type, + p_filter, p_ref_cnt); +} + +static int +__qed_llh_shadow_remove_filter(struct qed_dev *cdev, + u8 ppfid, u8 filter_idx, u32 *p_ref_cnt) +{ + struct qed_llh_info *p_llh_info = cdev->p_llh_info; + struct qed_llh_filter_info *p_filters; + int rc; + + rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "remove"); + if (rc) + return rc; + + p_filters = p_llh_info->pp_filters[ppfid]; + if (!p_filters[filter_idx].ref_cnt) { + DP_NOTICE(cdev, + "LLH shadow: trying to remove a filter with ref_cnt=0\n"); + return -EINVAL; + } + + *p_ref_cnt = --p_filters[filter_idx].ref_cnt; + if (!p_filters[filter_idx].ref_cnt) + memset(&p_filters[filter_idx], + 0, sizeof(p_filters[filter_idx])); + + return 0; +} + +static int +qed_llh_shadow_remove_filter(struct qed_dev *cdev, + u8 ppfid, + union qed_llh_filter *p_filter, + u8 *p_filter_idx, u32 *p_ref_cnt) +{ + int rc; + + rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx); + if (rc) + return rc; + + /* No matching filter was found */ + if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) { + DP_NOTICE(cdev, "Failed to find a filter in the LLH shadow\n"); + return -EINVAL; + } + + return __qed_llh_shadow_remove_filter(cdev, ppfid, *p_filter_idx, + p_ref_cnt); +} + +static int qed_llh_abs_ppfid(struct qed_dev *cdev, u8 ppfid, u8 *p_abs_ppfid) +{ + struct qed_llh_info *p_llh_info = cdev->p_llh_info; + + if (ppfid >= p_llh_info->num_ppfid) { + DP_NOTICE(cdev, + "ppfid %d is not valid, available indices are 0..%hhd\n", + ppfid, p_llh_info->num_ppfid - 1); + *p_abs_ppfid = 0; + return -EINVAL; + } + + *p_abs_ppfid = p_llh_info->ppfid_array[ppfid]; + + return 0; +} + +static int +qed_llh_set_engine_affin(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_dev *cdev = p_hwfn->cdev; + enum qed_eng eng; + u8 ppfid; + int rc; + + rc = qed_mcp_get_engine_config(p_hwfn, p_ptt); + if (rc != 0 && rc != -EOPNOTSUPP) { + DP_NOTICE(p_hwfn, + "Failed to get the engine affinity configuration\n"); + return rc; + } + + /* RoCE PF is bound to a single engine */ + if (QED_IS_ROCE_PERSONALITY(p_hwfn)) { + eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0; + rc = qed_llh_set_roce_affinity(cdev, eng); + if (rc) { + DP_NOTICE(cdev, + "Failed to set the RoCE engine affinity\n"); + return rc; + } + + DP_VERBOSE(cdev, + QED_MSG_SP, + "LLH: Set the engine affinity of RoCE packets as %d\n", + eng); + } + + /* Storage PF is bound to a single engine while L2 PF uses both */ + if (QED_IS_FCOE_PERSONALITY(p_hwfn) || QED_IS_ISCSI_PERSONALITY(p_hwfn)) + eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0; + else /* L2_PERSONALITY */ + eng = QED_BOTH_ENG; + + for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) { + rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng); + if (rc) { + DP_NOTICE(cdev, + "Failed to set the engine affinity of ppfid %d\n", + ppfid); + return rc; + } + } + + DP_VERBOSE(cdev, QED_MSG_SP, + "LLH: Set the engine affinity of non-RoCE packets as %d\n", + eng); + + return 0; +} + +static int qed_llh_hw_init_pf(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct qed_dev *cdev = p_hwfn->cdev; + u8 ppfid, abs_ppfid; + int rc; + + for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) { + u32 addr; + + rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid); + if (rc) + return rc; + + addr = NIG_REG_LLH_PPFID2PFID_TBL_0 + abs_ppfid * 0x4; + qed_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id); + } + + if (test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) && + !QED_IS_FCOE_PERSONALITY(p_hwfn)) { + rc = qed_llh_add_mac_filter(cdev, 0, + p_hwfn->hw_info.hw_mac_addr); + if (rc) + DP_NOTICE(cdev, + "Failed to add an LLH filter with the primary MAC\n"); + } + + if (QED_IS_CMT(cdev)) { + rc = qed_llh_set_engine_affin(p_hwfn, p_ptt); + if (rc) + return rc; + } + + return 0; +} + +u8 qed_llh_get_num_ppfid(struct qed_dev *cdev) +{ + return cdev->p_llh_info->num_ppfid; +} + +#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_MASK 0x3 +#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_SHIFT 0 +#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_MASK 0x3 +#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_SHIFT 2 + +int qed_llh_set_ppfid_affinity(struct qed_dev *cdev, u8 ppfid, enum qed_eng eng) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + u32 addr, val, eng_sel; + u8 abs_ppfid; + int rc = 0; + + if (!p_ptt) + return -EAGAIN; + + if (!QED_IS_CMT(cdev)) + goto out; + + rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid); + if (rc) + goto out; + + switch (eng) { + case QED_ENG0: + eng_sel = 0; + break; + case QED_ENG1: + eng_sel = 1; + break; + case QED_BOTH_ENG: + eng_sel = 2; + break; + default: + DP_NOTICE(cdev, "Invalid affinity value for ppfid [%d]\n", eng); + rc = -EINVAL; + goto out; + } + + addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4; + val = qed_rd(p_hwfn, p_ptt, addr); + SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE, eng_sel); + qed_wr(p_hwfn, p_ptt, addr, val); + + /* The iWARP affinity is set as the affinity of ppfid 0 */ + if (!ppfid && QED_IS_IWARP_PERSONALITY(p_hwfn)) + cdev->iwarp_affin = (eng == QED_ENG1) ? 1 : 0; +out: + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + u32 addr, val, eng_sel; + u8 ppfid, abs_ppfid; + int rc = 0; + + if (!p_ptt) + return -EAGAIN; + + if (!QED_IS_CMT(cdev)) + goto out; + + switch (eng) { + case QED_ENG0: + eng_sel = 0; + break; + case QED_ENG1: + eng_sel = 1; + break; + case QED_BOTH_ENG: + eng_sel = 2; + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL, + 0xf); /* QP bit 15 */ + break; + default: + DP_NOTICE(cdev, "Invalid affinity value for RoCE [%d]\n", eng); + rc = -EINVAL; + goto out; + } + + for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) { + rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid); + if (rc) + goto out; + + addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4; + val = qed_rd(p_hwfn, p_ptt, addr); + SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_ROCE, eng_sel); + qed_wr(p_hwfn, p_ptt, addr, val); + } +out: + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +struct qed_llh_filter_details { + u64 value; + u32 mode; + u32 protocol_type; + u32 hdr_sel; + u32 enable; +}; + +static int +qed_llh_access_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u8 abs_ppfid, + u8 filter_idx, + struct qed_llh_filter_details *p_details) +{ + struct qed_dmae_params params = {0}; + u32 addr; + u8 pfid; + int rc; + + /* The NIG/LLH registers that are accessed in this function have only 16 + * rows which are exposed to a PF. I.e. only the 16 filters of its + * default ppfid. Accessing filters of other ppfids requires pretending + * to another PFs. + * The calculation of PPFID->PFID in AH is based on the relative index + * of a PF on its port. + * For BB the pfid is actually the abs_ppfid. + */ + if (QED_IS_BB(p_hwfn->cdev)) + pfid = abs_ppfid; + else + pfid = abs_ppfid * p_hwfn->cdev->num_ports_in_engine + + MFW_PORT(p_hwfn); + + /* Filter enable - should be done first when removing a filter */ + if (!p_details->enable) { + qed_fid_pretend(p_hwfn, p_ptt, + pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT); + + addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4; + qed_wr(p_hwfn, p_ptt, addr, p_details->enable); + + qed_fid_pretend(p_hwfn, p_ptt, + p_hwfn->rel_pf_id << + PXP_PRETEND_CONCRETE_FID_PFID_SHIFT); + } + + /* Filter value */ + addr = NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * filter_idx * 0x4; + + params.flags = QED_DMAE_FLAG_PF_DST; + params.dst_pfid = pfid; + rc = qed_dmae_host2grc(p_hwfn, + p_ptt, + (u64)(uintptr_t)&p_details->value, + addr, 2 /* size_in_dwords */, + ¶ms); + if (rc) + return rc; + + qed_fid_pretend(p_hwfn, p_ptt, + pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT); + + /* Filter mode */ + addr = NIG_REG_LLH_FUNC_FILTER_MODE + filter_idx * 0x4; + qed_wr(p_hwfn, p_ptt, addr, p_details->mode); + + /* Filter protocol type */ + addr = NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + filter_idx * 0x4; + qed_wr(p_hwfn, p_ptt, addr, p_details->protocol_type); + + /* Filter header select */ + addr = NIG_REG_LLH_FUNC_FILTER_HDR_SEL + filter_idx * 0x4; + qed_wr(p_hwfn, p_ptt, addr, p_details->hdr_sel); + + /* Filter enable - should be done last when adding a filter */ + if (p_details->enable) { + addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4; + qed_wr(p_hwfn, p_ptt, addr, p_details->enable); + } + + qed_fid_pretend(p_hwfn, p_ptt, + p_hwfn->rel_pf_id << + PXP_PRETEND_CONCRETE_FID_PFID_SHIFT); + + return 0; +} + +static int +qed_llh_add_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u8 abs_ppfid, + u8 filter_idx, u8 filter_prot_type, u32 high, u32 low) +{ + struct qed_llh_filter_details filter_details; + + filter_details.enable = 1; + filter_details.value = ((u64)high << 32) | low; + filter_details.hdr_sel = 0; + filter_details.protocol_type = filter_prot_type; + /* Mode: 0: MAC-address classification 1: protocol classification */ + filter_details.mode = filter_prot_type ? 1 : 0; + + return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx, + &filter_details); +} + +static int +qed_llh_remove_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 abs_ppfid, u8 filter_idx) +{ + struct qed_llh_filter_details filter_details = {0}; + + return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx, + &filter_details); +} + +int qed_llh_add_mac_filter(struct qed_dev *cdev, + u8 ppfid, u8 mac_addr[ETH_ALEN]) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + union qed_llh_filter filter = {}; + u8 filter_idx, abs_ppfid; + u32 high, low, ref_cnt; + int rc = 0; + + if (!p_ptt) + return -EAGAIN; + + if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits)) + goto out; + + memcpy(filter.mac.addr, mac_addr, ETH_ALEN); + rc = qed_llh_shadow_add_filter(cdev, ppfid, + QED_LLH_FILTER_TYPE_MAC, + &filter, &filter_idx, &ref_cnt); + if (rc) + goto err; + + /* Configure the LLH only in case of a new the filter */ + if (ref_cnt == 1) { + rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid); + if (rc) + goto err; + + high = mac_addr[1] | (mac_addr[0] << 8); + low = mac_addr[5] | (mac_addr[4] << 8) | (mac_addr[3] << 16) | + (mac_addr[2] << 24); + rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx, + 0, high, low); + if (rc) + goto err; + } + + DP_VERBOSE(cdev, + QED_MSG_SP, + "LLH: Added MAC filter [%pM] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n", + mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt); + + goto out; + +err: DP_NOTICE(cdev, + "LLH: Failed to add MAC filter [%pM] to ppfid %hhd\n", + mac_addr, ppfid); +out: + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +static int +qed_llh_protocol_filter_stringify(struct qed_dev *cdev, + enum qed_llh_prot_filter_type_t type, + u16 source_port_or_eth_type, + u16 dest_port, u8 *str, size_t str_len) +{ + switch (type) { + case QED_LLH_FILTER_ETHERTYPE: + snprintf(str, str_len, "Ethertype 0x%04x", + source_port_or_eth_type); + break; + case QED_LLH_FILTER_TCP_SRC_PORT: + snprintf(str, str_len, "TCP src port 0x%04x", + source_port_or_eth_type); + break; + case QED_LLH_FILTER_UDP_SRC_PORT: + snprintf(str, str_len, "UDP src port 0x%04x", + source_port_or_eth_type); + break; + case QED_LLH_FILTER_TCP_DEST_PORT: + snprintf(str, str_len, "TCP dst port 0x%04x", dest_port); + break; + case QED_LLH_FILTER_UDP_DEST_PORT: + snprintf(str, str_len, "UDP dst port 0x%04x", dest_port); + break; + case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT: + snprintf(str, str_len, "TCP src/dst ports 0x%04x/0x%04x", + source_port_or_eth_type, dest_port); + break; + case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT: + snprintf(str, str_len, "UDP src/dst ports 0x%04x/0x%04x", + source_port_or_eth_type, dest_port); + break; + default: + DP_NOTICE(cdev, + "Non valid LLH protocol filter type %d\n", type); + return -EINVAL; + } + + return 0; +} + +static int +qed_llh_protocol_filter_to_hilo(struct qed_dev *cdev, + enum qed_llh_prot_filter_type_t type, + u16 source_port_or_eth_type, + u16 dest_port, u32 *p_high, u32 *p_low) +{ + *p_high = 0; + *p_low = 0; + + switch (type) { + case QED_LLH_FILTER_ETHERTYPE: + *p_high = source_port_or_eth_type; + break; + case QED_LLH_FILTER_TCP_SRC_PORT: + case QED_LLH_FILTER_UDP_SRC_PORT: + *p_low = source_port_or_eth_type << 16; + break; + case QED_LLH_FILTER_TCP_DEST_PORT: + case QED_LLH_FILTER_UDP_DEST_PORT: + *p_low = dest_port; + break; + case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT: + case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT: + *p_low = (source_port_or_eth_type << 16) | dest_port; + break; + default: + DP_NOTICE(cdev, + "Non valid LLH protocol filter type %d\n", type); + return -EINVAL; + } + + return 0; +} + +int +qed_llh_add_protocol_filter(struct qed_dev *cdev, + u8 ppfid, + enum qed_llh_prot_filter_type_t type, + u16 source_port_or_eth_type, u16 dest_port) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + u8 filter_idx, abs_ppfid, str[32], type_bitmap; + union qed_llh_filter filter = {}; + u32 high, low, ref_cnt; + int rc = 0; + + if (!p_ptt) + return -EAGAIN; + + if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits)) + goto out; + + rc = qed_llh_protocol_filter_stringify(cdev, type, + source_port_or_eth_type, + dest_port, str, sizeof(str)); + if (rc) + goto err; + + filter.protocol.type = type; + filter.protocol.source_port_or_eth_type = source_port_or_eth_type; + filter.protocol.dest_port = dest_port; + rc = qed_llh_shadow_add_filter(cdev, + ppfid, + QED_LLH_FILTER_TYPE_PROTOCOL, + &filter, &filter_idx, &ref_cnt); + if (rc) + goto err; + + rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid); + if (rc) + goto err; + + /* Configure the LLH only in case of a new the filter */ + if (ref_cnt == 1) { + rc = qed_llh_protocol_filter_to_hilo(cdev, type, + source_port_or_eth_type, + dest_port, &high, &low); + if (rc) + goto err; + + type_bitmap = 0x1 << type; + rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, + filter_idx, type_bitmap, high, low); + if (rc) + goto err; + } + + DP_VERBOSE(cdev, + QED_MSG_SP, + "LLH: Added protocol filter [%s] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n", + str, ppfid, abs_ppfid, filter_idx, ref_cnt); + + goto out; + +err: DP_NOTICE(p_hwfn, + "LLH: Failed to add protocol filter [%s] to ppfid %hhd\n", + str, ppfid); +out: + qed_ptt_release(p_hwfn, p_ptt); + + return rc; +} + +void qed_llh_remove_mac_filter(struct qed_dev *cdev, + u8 ppfid, u8 mac_addr[ETH_ALEN]) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + union qed_llh_filter filter = {}; + u8 filter_idx, abs_ppfid; + int rc = 0; + u32 ref_cnt; + + if (!p_ptt) + return; + + if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits)) + goto out; + + ether_addr_copy(filter.mac.addr, mac_addr); + rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx, + &ref_cnt); + if (rc) + goto err; + + rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid); + if (rc) + goto err; + + /* Remove from the LLH in case the filter is not in use */ + if (!ref_cnt) { + rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid, + filter_idx); + if (rc) + goto err; + } + + DP_VERBOSE(cdev, + QED_MSG_SP, + "LLH: Removed MAC filter [%pM] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n", + mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt); + + goto out; + +err: DP_NOTICE(cdev, + "LLH: Failed to remove MAC filter [%pM] from ppfid %hhd\n", + mac_addr, ppfid); +out: + qed_ptt_release(p_hwfn, p_ptt); +} + +void qed_llh_remove_protocol_filter(struct qed_dev *cdev, + u8 ppfid, + enum qed_llh_prot_filter_type_t type, + u16 source_port_or_eth_type, u16 dest_port) +{ + struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + u8 filter_idx, abs_ppfid, str[32]; + union qed_llh_filter filter = {}; + int rc = 0; + u32 ref_cnt; + + if (!p_ptt) + return; + + if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits)) + goto out; + + rc = qed_llh_protocol_filter_stringify(cdev, type, + source_port_or_eth_type, + dest_port, str, sizeof(str)); + if (rc) + goto err; + + filter.protocol.type = type; + filter.protocol.source_port_or_eth_type = source_port_or_eth_type; + filter.protocol.dest_port = dest_port; + rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx, + &ref_cnt); + if (rc) + goto err; + + rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid); + if (rc) + goto err; + + /* Remove from the LLH in case the filter is not in use */ + if (!ref_cnt) { + rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid, + filter_idx); + if (rc) + goto err; + } + + DP_VERBOSE(cdev, + QED_MSG_SP, + "LLH: Removed protocol filter [%s] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n", + str, ppfid, abs_ppfid, filter_idx, ref_cnt); + + goto out; + +err: DP_NOTICE(cdev, + "LLH: Failed to remove protocol filter [%s] from ppfid %hhd\n", + str, ppfid); +out: + qed_ptt_release(p_hwfn, p_ptt); +} + +/******************************* NIG LLH - End ********************************/ + #define QED_MIN_DPIS (4) #define QED_MIN_PWM_REGION (QED_WID_SIZE * QED_MIN_DPIS) @@ -461,6 +1382,8 @@ void qed_resc_free(struct qed_dev *cdev) kfree(cdev->reset_stats); cdev->reset_stats = NULL; + qed_llh_free(cdev); + for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; @@ -1428,6 +2351,13 @@ int qed_resc_alloc(struct qed_dev *cdev) goto alloc_err; } + rc = qed_llh_alloc(cdev); + if (rc) { + DP_NOTICE(cdev, + "Failed to allocate memory for the llh_info structure\n"); + goto alloc_err; + } + cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL); if (!cdev->reset_stats) goto alloc_no_mem; @@ -1879,6 +2809,10 @@ static int qed_hw_init_port(struct qed_hwfn *p_hwfn, { int rc = 0; + /* In CMT the gate should be cleared by the 2nd hwfn */ + if (!QED_IS_CMT(p_hwfn->cdev) || !IS_LEAD_HWFN(p_hwfn)) + STORE_RT_REG(p_hwfn, NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET, 0); + rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode); if (rc) return rc; @@ -1964,6 +2898,13 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, if (rc) return rc; + /* Use the leading hwfn since in CMT only NIG #0 is operational */ + if (IS_LEAD_HWFN(p_hwfn)) { + rc = qed_llh_hw_init_pf(p_hwfn, p_ptt); + if (rc) + return rc; + } + if (b_hw_start) { /* enable interrupts */ qed_int_igu_enable(p_hwfn, p_ptt, int_mode); @@ -2393,6 +3334,12 @@ int qed_hw_stop(struct qed_dev *cdev) qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0); qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0); + if (IS_LEAD_HWFN(p_hwfn) && + test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) && + !QED_IS_FCOE_PERSONALITY(p_hwfn)) + qed_llh_remove_mac_filter(cdev, 0, + p_hwfn->hw_info.hw_mac_addr); + if (!cdev->recov_in_prog) { rc = qed_mcp_unload_done(p_hwfn, p_ptt); if (rc) { @@ -2868,6 +3815,36 @@ static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn) return 0; } +static int qed_hw_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct qed_dev *cdev = p_hwfn->cdev; + u8 native_ppfid_idx; + int rc; + + /* Calculation of BB/AH is different for native_ppfid_idx */ + if (QED_IS_BB(cdev)) + native_ppfid_idx = p_hwfn->rel_pf_id; + else + native_ppfid_idx = p_hwfn->rel_pf_id / + cdev->num_ports_in_engine; + + rc = qed_mcp_get_ppfid_bitmap(p_hwfn, p_ptt); + if (rc != 0 && rc != -EOPNOTSUPP) + return rc; + else if (rc == -EOPNOTSUPP) + cdev->ppfid_bitmap = 0x1 << native_ppfid_idx; + + if (!(cdev->ppfid_bitmap & (0x1 << native_ppfid_idx))) { + DP_INFO(p_hwfn, + "Fix the PPFID bitmap to include the native PPFID [native_ppfid_idx %hhd, orig_bitmap 0x%hhx]\n", + native_ppfid_idx, cdev->ppfid_bitmap); + cdev->ppfid_bitmap = 0x1 << native_ppfid_idx; + } + + return 0; +} + static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_resc_unlock_params resc_unlock_params; @@ -2925,6 +3902,13 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) "Failed to release the resource lock for the resource allocation commands\n"); } + /* PPFID bitmap */ + if (IS_LEAD_HWFN(p_hwfn)) { + rc = qed_hw_get_ppfid_bitmap(p_hwfn, p_ptt); + if (rc) + return rc; + } + /* Sanity for ILT */ if ((b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_K2)) || (!b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB))) { @@ -3443,6 +4427,7 @@ static void qed_nvm_info_free(struct qed_hwfn *p_hwfn) static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, void __iomem *p_regview, void __iomem *p_doorbells, + u64 db_phys_addr, enum qed_pci_personality personality) { struct qed_dev *cdev = p_hwfn->cdev; @@ -3451,6 +4436,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* Split PCI bars evenly between hwfns */ p_hwfn->regview = p_regview; p_hwfn->doorbells = p_doorbells; + p_hwfn->db_phys_addr = db_phys_addr; if (IS_VF(p_hwfn->cdev)) return qed_vf_hw_prepare(p_hwfn); @@ -3546,7 +4532,9 @@ int qed_hw_prepare(struct qed_dev *cdev, /* Initialize the first hwfn - will learn number of hwfns */ rc = qed_hw_prepare_single(p_hwfn, cdev->regview, - cdev->doorbells, personality); + cdev->doorbells, + cdev->db_phys_addr, + personality); if (rc) return rc; @@ -3555,22 +4543,25 @@ int qed_hw_prepare(struct qed_dev *cdev, /* Initialize the rest of the hwfns */ if (cdev->num_hwfns > 1) { void __iomem *p_regview, *p_doorbell; - u8 __iomem *addr; + u64 db_phys_addr; + u32 offset; /* adjust bar offset for second engine */ - addr = cdev->regview + - qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt, - BAR_ID_0) / 2; - p_regview = addr; + offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt, + BAR_ID_0) / 2; + p_regview = cdev->regview + offset; + + offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt, + BAR_ID_1) / 2; - addr = cdev->doorbells + - qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt, - BAR_ID_1) / 2; - p_doorbell = addr; + p_doorbell = cdev->doorbells + offset; + + db_phys_addr = cdev->db_phys_addr + offset; /* prepare second hw function */ rc = qed_hw_prepare_single(&cdev->hwfns[1], p_regview, - p_doorbell, personality); + p_doorbell, db_phys_addr, + personality); /* in case of error, need to free the previously * initiliazed hwfn 0. @@ -3951,269 +4942,6 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id) return 0; } -static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low, - u8 *p_filter) -{ - *p_high = p_filter[1] | (p_filter[0] << 8); - *p_low = p_filter[5] | (p_filter[4] << 8) | - (p_filter[3] << 16) | (p_filter[2] << 24); -} - -int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_filter) -{ - u32 high = 0, low = 0, en; - int i; - - if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits)) - return 0; - - qed_llh_mac_to_filter(&high, &low, p_filter); - - /* Find a free entry and utilize it */ - for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { - en = qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)); - if (en) - continue; - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - 2 * i * sizeof(u32), low); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - (2 * i + 1) * sizeof(u32), high); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + - i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1); - break; - } - if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) { - DP_NOTICE(p_hwfn, - "Failed to find an empty LLH filter to utilize\n"); - return -EINVAL; - } - - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "mac: %pM is added at %d\n", - p_filter, i); - - return 0; -} - -void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_filter) -{ - u32 high = 0, low = 0; - int i; - - if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits)) - return; - - qed_llh_mac_to_filter(&high, &low, p_filter); - - /* Find the entry and clean it */ - for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { - if (qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - 2 * i * sizeof(u32)) != low) - continue; - if (qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - (2 * i + 1) * sizeof(u32)) != high) - continue; - - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - (2 * i + 1) * sizeof(u32), 0); - - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "mac: %pM is removed from %d\n", - p_filter, i); - break; - } - if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) - DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n"); -} - -int -qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 source_port_or_eth_type, - u16 dest_port, enum qed_llh_port_filter_type_t type) -{ - u32 high = 0, low = 0, en; - int i; - - if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits)) - return 0; - - switch (type) { - case QED_LLH_FILTER_ETHERTYPE: - high = source_port_or_eth_type; - break; - case QED_LLH_FILTER_TCP_SRC_PORT: - case QED_LLH_FILTER_UDP_SRC_PORT: - low = source_port_or_eth_type << 16; - break; - case QED_LLH_FILTER_TCP_DEST_PORT: - case QED_LLH_FILTER_UDP_DEST_PORT: - low = dest_port; - break; - case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT: - case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT: - low = (source_port_or_eth_type << 16) | dest_port; - break; - default: - DP_NOTICE(p_hwfn, - "Non valid LLH protocol filter type %d\n", type); - return -EINVAL; - } - /* Find a free entry and utilize it */ - for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { - en = qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)); - if (en) - continue; - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - 2 * i * sizeof(u32), low); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - (2 * i + 1) * sizeof(u32), high); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 1); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + - i * sizeof(u32), 1 << type); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1); - break; - } - if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) { - DP_NOTICE(p_hwfn, - "Failed to find an empty LLH filter to utilize\n"); - return -EINVAL; - } - switch (type) { - case QED_LLH_FILTER_ETHERTYPE: - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "ETH type %x is added at %d\n", - source_port_or_eth_type, i); - break; - case QED_LLH_FILTER_TCP_SRC_PORT: - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "TCP src port %x is added at %d\n", - source_port_or_eth_type, i); - break; - case QED_LLH_FILTER_UDP_SRC_PORT: - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "UDP src port %x is added at %d\n", - source_port_or_eth_type, i); - break; - case QED_LLH_FILTER_TCP_DEST_PORT: - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "TCP dst port %x is added at %d\n", dest_port, i); - break; - case QED_LLH_FILTER_UDP_DEST_PORT: - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "UDP dst port %x is added at %d\n", dest_port, i); - break; - case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT: - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "TCP src/dst ports %x/%x are added at %d\n", - source_port_or_eth_type, dest_port, i); - break; - case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT: - DP_VERBOSE(p_hwfn, NETIF_MSG_HW, - "UDP src/dst ports %x/%x are added at %d\n", - source_port_or_eth_type, dest_port, i); - break; - } - return 0; -} - -void -qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 source_port_or_eth_type, - u16 dest_port, - enum qed_llh_port_filter_type_t type) -{ - u32 high = 0, low = 0; - int i; - - if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits)) - return; - - switch (type) { - case QED_LLH_FILTER_ETHERTYPE: - high = source_port_or_eth_type; - break; - case QED_LLH_FILTER_TCP_SRC_PORT: - case QED_LLH_FILTER_UDP_SRC_PORT: - low = source_port_or_eth_type << 16; - break; - case QED_LLH_FILTER_TCP_DEST_PORT: - case QED_LLH_FILTER_UDP_DEST_PORT: - low = dest_port; - break; - case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT: - case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT: - low = (source_port_or_eth_type << 16) | dest_port; - break; - default: - DP_NOTICE(p_hwfn, - "Non valid LLH protocol filter type %d\n", type); - return; - } - - for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { - if (!qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32))) - continue; - if (!qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32))) - continue; - if (!(qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + - i * sizeof(u32)) & BIT(type))) - continue; - if (qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - 2 * i * sizeof(u32)) != low) - continue; - if (qed_rd(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - (2 * i + 1) * sizeof(u32)) != high) - continue; - - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + - i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0); - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_FUNC_FILTER_VALUE + - (2 * i + 1) * sizeof(u32), 0); - break; - } - - if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) - DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n"); -} - static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 hw_addr, void *p_eth_qzone, size_t eth_qzone_size, u8 timeset) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index e4b4e3b78e8a..47376d4d071f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -241,11 +241,17 @@ enum qed_dmae_address_type_t { #define QED_DMAE_FLAG_VF_SRC 0x00000002 #define QED_DMAE_FLAG_VF_DST 0x00000004 #define QED_DMAE_FLAG_COMPLETION_DST 0x00000008 +#define QED_DMAE_FLAG_PORT 0x00000010 +#define QED_DMAE_FLAG_PF_SRC 0x00000020 +#define QED_DMAE_FLAG_PF_DST 0x00000040 struct qed_dmae_params { u32 flags; /* consists of QED_DMAE_FLAG_* values */ u8 src_vfid; u8 dst_vfid; + u8 port_id; + u8 src_pfid; + u8 dst_pfid; }; /** @@ -257,7 +263,7 @@ struct qed_dmae_params { * @param source_addr * @param grc_addr (dmae_data_offset) * @param size_in_dwords - * @param flags (one of the flags defined above) + * @param p_params (default parameters will be used in case of NULL) */ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, @@ -265,7 +271,7 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn, u64 source_addr, u32 grc_addr, u32 size_in_dwords, - u32 flags); + struct qed_dmae_params *p_params); /** * @brief qed_dmae_grc2host - Read data from dmae data offset @@ -275,11 +281,11 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn, * @param grc_addr (dmae_data_offset) * @param dest_addr * @param size_in_dwords - * @param flags - one of the flags defined above + * @param p_params (default parameters will be used in case of NULL) */ int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords, - u32 flags); + struct qed_dmae_params *p_params); /** * @brief qed_dmae_host2host - copy data from to source address @@ -290,7 +296,7 @@ int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, * @param source_addr * @param dest_addr * @param size_in_dwords - * @param params + * @param p_params (default parameters will be used in case of NULL) */ int qed_dmae_host2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, @@ -368,26 +374,66 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 *dst_id); /** - * @brief qed_llh_add_mac_filter - configures a MAC filter in llh + * @brief qed_llh_get_num_ppfid - Return the allocated number of LLH filter + * banks that are allocated to the PF. * - * @param p_hwfn - * @param p_ptt - * @param p_filter - MAC to add + * @param cdev + * + * @return u8 - Number of LLH filter banks */ -int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_filter); +u8 qed_llh_get_num_ppfid(struct qed_dev *cdev); + +enum qed_eng { + QED_ENG0, + QED_ENG1, + QED_BOTH_ENG, +}; /** - * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh + * @brief qed_llh_set_ppfid_affinity - Set the engine affinity for the given + * LLH filter bank. + * + * @param cdev + * @param ppfid - relative within the allocated ppfids ('0' is the default one). + * @param eng + * + * @return int + */ +int qed_llh_set_ppfid_affinity(struct qed_dev *cdev, + u8 ppfid, enum qed_eng eng); + +/** + * @brief qed_llh_set_roce_affinity - Set the RoCE engine affinity + * + * @param cdev + * @param eng + * + * @return int + */ +int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng); + +/** + * @brief qed_llh_add_mac_filter - Add a LLH MAC filter into the given filter + * bank. + * + * @param cdev + * @param ppfid - relative within the allocated ppfids ('0' is the default one). + * @param mac_addr - MAC to add + */ +int qed_llh_add_mac_filter(struct qed_dev *cdev, + u8 ppfid, u8 mac_addr[ETH_ALEN]); + +/** + * @brief qed_llh_remove_mac_filter - Remove a LLH MAC filter from the given + * filter bank. * - * @param p_hwfn * @param p_ptt * @param p_filter - MAC to remove */ -void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_filter); +void qed_llh_remove_mac_filter(struct qed_dev *cdev, + u8 ppfid, u8 mac_addr[ETH_ALEN]); -enum qed_llh_port_filter_type_t { +enum qed_llh_prot_filter_type_t { QED_LLH_FILTER_ETHERTYPE, QED_LLH_FILTER_TCP_SRC_PORT, QED_LLH_FILTER_TCP_DEST_PORT, @@ -398,36 +444,37 @@ enum qed_llh_port_filter_type_t { }; /** - * @brief qed_llh_add_protocol_filter - configures a protocol filter in llh + * @brief qed_llh_add_protocol_filter - Add a LLH protocol filter into the + * given filter bank. * - * @param p_hwfn - * @param p_ptt + * @param cdev + * @param ppfid - relative within the allocated ppfids ('0' is the default one). + * @param type - type of filters and comparing * @param source_port_or_eth_type - source port or ethertype to add * @param dest_port - destination port to add * @param type - type of filters and comparing */ int -qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 source_port_or_eth_type, - u16 dest_port, - enum qed_llh_port_filter_type_t type); +qed_llh_add_protocol_filter(struct qed_dev *cdev, + u8 ppfid, + enum qed_llh_prot_filter_type_t type, + u16 source_port_or_eth_type, u16 dest_port); /** - * @brief qed_llh_remove_protocol_filter - remove a protocol filter in llh + * @brief qed_llh_remove_protocol_filter - Remove a LLH protocol filter from + * the given filter bank. * - * @param p_hwfn - * @param p_ptt + * @param cdev + * @param ppfid - relative within the allocated ppfids ('0' is the default one). + * @param type - type of filters and comparing * @param source_port_or_eth_type - source port or ethertype to add * @param dest_port - destination port to add - * @param type - type of filters and comparing */ void -qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 source_port_or_eth_type, - u16 dest_port, - enum qed_llh_port_filter_type_t type); +qed_llh_remove_protocol_filter(struct qed_dev *cdev, + u8 ppfid, + enum qed_llh_prot_filter_type_t type, + u16 source_port_or_eth_type, u16 dest_port); /** * *@brief Cleanup of previous driver remains prior to load diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c index 46dc93d3b9b5..de31a382f58e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c +++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c @@ -745,7 +745,7 @@ struct qed_hash_fcoe_con { static int qed_fill_fcoe_dev_info(struct qed_dev *cdev, struct qed_dev_fcoe_info *info) { - struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev); int rc; memset(info, 0, sizeof(*info)); @@ -806,15 +806,15 @@ static int qed_fcoe_stop(struct qed_dev *cdev) return -EINVAL; } - p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + p_ptt = qed_ptt_acquire(QED_AFFIN_HWFN(cdev)); if (!p_ptt) return -EAGAIN; /* Stop the fcoe */ - rc = qed_sp_fcoe_func_stop(QED_LEADING_HWFN(cdev), p_ptt, + rc = qed_sp_fcoe_func_stop(QED_AFFIN_HWFN(cdev), p_ptt, QED_SPQ_MODE_EBLOCK, NULL); cdev->flags &= ~QED_FLAG_STORAGE_STARTED; - qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + qed_ptt_release(QED_AFFIN_HWFN(cdev), p_ptt); return rc; } @@ -828,8 +828,8 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks) return 0; } - rc = qed_sp_fcoe_func_start(QED_LEADING_HWFN(cdev), - QED_SPQ_MODE_EBLOCK, NULL); + rc = qed_sp_fcoe_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK, + NULL); if (rc) { DP_NOTICE(cdev, "Failed to start fcoe\n"); return rc; @@ -849,7 +849,7 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks) return -ENOMEM; } - rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev), tid_info); + rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info); if (rc) { DP_NOTICE(cdev, "Failed to gather task information\n"); qed_fcoe_stop(cdev); @@ -884,7 +884,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev, } /* Acquire the connection */ - rc = qed_fcoe_acquire_connection(QED_LEADING_HWFN(cdev), NULL, + rc = qed_fcoe_acquire_connection(QED_AFFIN_HWFN(cdev), NULL, &hash_con->con); if (rc) { DP_NOTICE(cdev, "Failed to acquire Connection\n"); @@ -898,7 +898,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev, hash_add(cdev->connections, &hash_con->node, *handle); if (p_doorbell) - *p_doorbell = qed_fcoe_get_db_addr(QED_LEADING_HWFN(cdev), + *p_doorbell = qed_fcoe_get_db_addr(QED_AFFIN_HWFN(cdev), *handle); return 0; @@ -916,7 +916,7 @@ static int qed_fcoe_release_conn(struct qed_dev *cdev, u32 handle) } hlist_del(&hash_con->node); - qed_fcoe_release_connection(QED_LEADING_HWFN(cdev), hash_con->con); + qed_fcoe_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con); kfree(hash_con); return 0; @@ -971,7 +971,7 @@ static int qed_fcoe_offload_conn(struct qed_dev *cdev, con->d_id.addr_mid = conn_info->d_id.addr_mid; con->d_id.addr_lo = conn_info->d_id.addr_lo; - return qed_sp_fcoe_conn_offload(QED_LEADING_HWFN(cdev), con, + return qed_sp_fcoe_conn_offload(QED_AFFIN_HWFN(cdev), con, QED_SPQ_MODE_EBLOCK, NULL); } @@ -992,13 +992,13 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev, con = hash_con->con; con->terminate_params = terminate_params; - return qed_sp_fcoe_conn_destroy(QED_LEADING_HWFN(cdev), con, + return qed_sp_fcoe_conn_destroy(QED_AFFIN_HWFN(cdev), con, QED_SPQ_MODE_EBLOCK, NULL); } static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats) { - return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats); + return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats); } void qed_get_protocol_stats_fcoe(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 37edaa847512..e054f6c69e3a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -12612,8 +12612,10 @@ struct public_drv_mb { #define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 -#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000 +#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000 #define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000 +#define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000 +#define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000 #define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F #define RESOURCE_CMD_REQ_RESC_SHIFT 0 @@ -12802,6 +12804,18 @@ struct public_drv_mb { #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR (1 << 0) +#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_MASK 0x00000001 +#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_SHIFT 0 +#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_MASK 0x00000002 +#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_SHIFT 1 +#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_MASK 0x00000004 +#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_SHIFT 2 +#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_MASK 0x00000008 +#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_SHIFT 3 + +#define FW_MB_PARAM_PPFID_BITMAP_MASK 0xFF +#define FW_MB_PARAM_PPFID_BITMAP_SHIFT 0 + u32 drv_pulse_mb; #define DRV_PULSE_SEQ_MASK 0x00007fff #define DRV_PULSE_SYSTEM_TIME_MASK 0xffff0000 diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index 72ec1c6bdf70..a4de9e3ef72c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -392,11 +392,15 @@ u32 qed_vfid_to_concrete(struct qed_hwfn *p_hwfn, u8 vfid) } /* DMAE */ +#define QED_DMAE_FLAGS_IS_SET(params, flag) \ + ((params) != NULL && ((params)->flags & QED_DMAE_FLAG_##flag)) + static void qed_dmae_opcode(struct qed_hwfn *p_hwfn, const u8 is_src_type_grc, const u8 is_dst_type_grc, struct qed_dmae_params *p_params) { + u8 src_pfid, dst_pfid, port_id; u16 opcode_b = 0; u32 opcode = 0; @@ -407,14 +411,18 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn, opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC : DMAE_CMD_SRC_MASK_PCIE) << DMAE_CMD_SRC_SHIFT; - opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_SRC_PF_ID_MASK) << + src_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_SRC) ? + p_params->src_pfid : p_hwfn->rel_pf_id; + opcode |= ((src_pfid & DMAE_CMD_SRC_PF_ID_MASK) << DMAE_CMD_SRC_PF_ID_SHIFT); /* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */ opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC : DMAE_CMD_DST_MASK_PCIE) << DMAE_CMD_DST_SHIFT; - opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_DST_PF_ID_MASK) << + dst_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_DST) ? + p_params->dst_pfid : p_hwfn->rel_pf_id; + opcode |= ((dst_pfid & DMAE_CMD_DST_PF_ID_MASK) << DMAE_CMD_DST_PF_ID_SHIFT); /* Whether to write a completion word to the completion destination: @@ -425,12 +433,14 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn, opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK << DMAE_CMD_SRC_ADDR_RESET_SHIFT); - if (p_params->flags & QED_DMAE_FLAG_COMPLETION_DST) + if (QED_DMAE_FLAGS_IS_SET(p_params, COMPLETION_DST)) opcode |= (1 << DMAE_CMD_COMP_FUNC_SHIFT); opcode |= (DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT); - opcode |= ((p_hwfn->port_id) << DMAE_CMD_PORT_ID_SHIFT); + port_id = (QED_DMAE_FLAGS_IS_SET(p_params, PORT)) ? + p_params->port_id : p_hwfn->port_id; + opcode |= (port_id << DMAE_CMD_PORT_ID_SHIFT); /* reset source address in next go */ opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK << @@ -441,7 +451,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn, DMAE_CMD_DST_ADDR_RESET_SHIFT); /* SRC/DST VFID: all 1's - pf, otherwise VF id */ - if (p_params->flags & QED_DMAE_FLAG_VF_SRC) { + if (QED_DMAE_FLAGS_IS_SET(p_params, VF_SRC)) { opcode |= 1 << DMAE_CMD_SRC_VF_ID_VALID_SHIFT; opcode_b |= p_params->src_vfid << DMAE_CMD_SRC_VF_ID_SHIFT; } else { @@ -449,7 +459,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn, DMAE_CMD_SRC_VF_ID_SHIFT; } - if (p_params->flags & QED_DMAE_FLAG_VF_DST) { + if (QED_DMAE_FLAGS_IS_SET(p_params, VF_DST)) { opcode |= 1 << DMAE_CMD_DST_VF_ID_VALID_SHIFT; opcode_b |= p_params->dst_vfid << DMAE_CMD_DST_VF_ID_SHIFT; } else { @@ -733,7 +743,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, for (i = 0; i <= cnt_split; i++) { offset = length_limit * i; - if (!(p_params->flags & QED_DMAE_FLAG_RW_REPL_SRC)) { + if (!QED_DMAE_FLAGS_IS_SET(p_params, RW_REPL_SRC)) { if (src_type == QED_DMAE_ADDRESS_GRC) src_addr_split = src_addr + offset; else @@ -771,14 +781,12 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags) + u64 source_addr, u32 grc_addr, u32 size_in_dwords, + struct qed_dmae_params *p_params) { u32 grc_addr_in_dw = grc_addr / sizeof(u32); - struct qed_dmae_params params; int rc; - memset(¶ms, 0, sizeof(struct qed_dmae_params)); - params.flags = flags; mutex_lock(&p_hwfn->dmae_info.mutex); @@ -786,7 +794,7 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, grc_addr_in_dw, QED_DMAE_ADDRESS_HOST_VIRT, QED_DMAE_ADDRESS_GRC, - size_in_dwords, ¶ms); + size_in_dwords, p_params); mutex_unlock(&p_hwfn->dmae_info.mutex); @@ -796,21 +804,19 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr, - dma_addr_t dest_addr, u32 size_in_dwords, u32 flags) + dma_addr_t dest_addr, u32 size_in_dwords, + struct qed_dmae_params *p_params) { u32 grc_addr_in_dw = grc_addr / sizeof(u32); - struct qed_dmae_params params; int rc; - memset(¶ms, 0, sizeof(struct qed_dmae_params)); - params.flags = flags; mutex_lock(&p_hwfn->dmae_info.mutex); rc = qed_dmae_execute_command(p_hwfn, p_ptt, grc_addr_in_dw, dest_addr, QED_DMAE_ADDRESS_GRC, QED_DMAE_ADDRESS_HOST_VIRT, - size_in_dwords, ¶ms); + size_in_dwords, p_params); mutex_unlock(&p_hwfn->dmae_info.mutex); @@ -842,7 +848,6 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, const char *phase) { u32 size = PAGE_SIZE / 2, val; - struct qed_dmae_params params; int rc = 0; dma_addr_t p_phys; void *p_virt; @@ -875,9 +880,8 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn, (u64)p_phys, p_virt, (u64)(p_phys + size), (u8 *)p_virt + size, size); - memset(¶ms, 0, sizeof(params)); rc = qed_dmae_host2host(p_hwfn, p_ptt, p_phys, p_phys + size, - size / 4 /* size_in_dwords */, ¶ms); + size / 4, NULL); if (rc) { DP_NOTICE(p_hwfn, "DMAE sanity [%s]: qed_dmae_host2host() failed. rc = %d.\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index 34193c2f1699..a868d7f88601 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -131,7 +131,7 @@ static int qed_init_rt(struct qed_hwfn *p_hwfn, rc = qed_dmae_host2grc(p_hwfn, p_ptt, (uintptr_t)(p_init_val + i), - addr + (i << 2), segment, 0); + addr + (i << 2), segment, NULL); if (rc) return rc; @@ -194,7 +194,7 @@ static int qed_init_array_dmae(struct qed_hwfn *p_hwfn, } else { rc = qed_dmae_host2grc(p_hwfn, p_ptt, (uintptr_t)(buf + dmae_data_offset), - addr, size, 0); + addr, size, NULL); } return rc; @@ -205,6 +205,7 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn, u32 addr, u32 fill, u32 fill_count) { static u32 zero_buffer[DMAE_MAX_RW_SIZE]; + struct qed_dmae_params params = {}; memset(zero_buffer, 0, sizeof(u32) * DMAE_MAX_RW_SIZE); @@ -214,10 +215,10 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn, * 3. p_hwfb->temp_data, * 4. fill_count */ - + params.flags = QED_DMAE_FLAG_RW_REPL_SRC; return qed_dmae_host2grc(p_hwfn, p_ptt, (uintptr_t)(&zero_buffer[0]), - addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC); + addr, fill_count, ¶ms); } static void qed_init_fill(struct qed_hwfn *p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index fdfedbc8e431..4e8118a08654 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -1508,10 +1508,10 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn, qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&phys_addr, CAU_REG_SB_ADDR_MEMORY + - igu_sb_id * sizeof(u64), 2, 0); + igu_sb_id * sizeof(u64), 2, NULL); qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry, CAU_REG_SB_VAR_MEMORY + - igu_sb_id * sizeof(u64), 2, 0); + igu_sb_id * sizeof(u64), 2, NULL); } else { /* Initialize Status Block Address */ STORE_RT_REG_AGG(p_hwfn, @@ -2362,7 +2362,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY + sb_id * sizeof(u64), - (u64)(uintptr_t)&sb_entry, 2, 0); + (u64)(uintptr_t)&sb_entry, 2, NULL); if (rc) { DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc); return rc; @@ -2376,7 +2376,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, rc = qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry, CAU_REG_SB_VAR_MEMORY + - sb_id * sizeof(u64), 2, 0); + sb_id * sizeof(u64), 2, NULL); if (rc) { DP_ERR(p_hwfn, "dmae_host2grc failed %d\n", rc); return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c index 4f8a685d1a55..5585c18053ec 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c @@ -1082,7 +1082,7 @@ struct qed_hash_iscsi_con { static int qed_fill_iscsi_dev_info(struct qed_dev *cdev, struct qed_dev_iscsi_info *info) { - struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev); int rc; @@ -1141,8 +1141,8 @@ static int qed_iscsi_stop(struct qed_dev *cdev) } /* Stop the iscsi */ - rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev), - QED_SPQ_MODE_EBLOCK, NULL); + rc = qed_sp_iscsi_func_stop(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK, + NULL); cdev->flags &= ~QED_FLAG_STORAGE_STARTED; return rc; @@ -1161,9 +1161,8 @@ static int qed_iscsi_start(struct qed_dev *cdev, return 0; } - rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev), - QED_SPQ_MODE_EBLOCK, NULL, event_context, - async_event_cb); + rc = qed_sp_iscsi_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK, + NULL, event_context, async_event_cb); if (rc) { DP_NOTICE(cdev, "Failed to start iscsi\n"); return rc; @@ -1182,8 +1181,7 @@ static int qed_iscsi_start(struct qed_dev *cdev, return -ENOMEM; } - rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev), - tid_info); + rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info); if (rc) { DP_NOTICE(cdev, "Failed to gather task information\n"); qed_iscsi_stop(cdev); @@ -1215,7 +1213,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev, return -ENOMEM; /* Acquire the connection */ - rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL, + rc = qed_iscsi_acquire_connection(QED_AFFIN_HWFN(cdev), NULL, &hash_con->con); if (rc) { DP_NOTICE(cdev, "Failed to acquire Connection\n"); @@ -1229,7 +1227,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev, hash_add(cdev->connections, &hash_con->node, *handle); if (p_doorbell) - *p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev), + *p_doorbell = qed_iscsi_get_db_addr(QED_AFFIN_HWFN(cdev), *handle); return 0; @@ -1247,7 +1245,7 @@ static int qed_iscsi_release_conn(struct qed_dev *cdev, u32 handle) } hlist_del(&hash_con->node); - qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con); + qed_iscsi_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con); kfree(hash_con); return 0; @@ -1324,7 +1322,7 @@ static int qed_iscsi_offload_conn(struct qed_dev *cdev, /* Set default values on other connection fields */ con->offl_flags = 0x1; - return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con, + return qed_sp_iscsi_conn_offload(QED_AFFIN_HWFN(cdev), con, QED_SPQ_MODE_EBLOCK, NULL); } @@ -1351,7 +1349,7 @@ static int qed_iscsi_update_conn(struct qed_dev *cdev, con->first_seq_length = conn_info->first_seq_length; con->exp_stat_sn = conn_info->exp_stat_sn; - return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con, + return qed_sp_iscsi_conn_update(QED_AFFIN_HWFN(cdev), con, QED_SPQ_MODE_EBLOCK, NULL); } @@ -1366,8 +1364,7 @@ static int qed_iscsi_clear_conn_sq(struct qed_dev *cdev, u32 handle) return -EINVAL; } - return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev), - hash_con->con, + return qed_sp_iscsi_conn_clear_sq(QED_AFFIN_HWFN(cdev), hash_con->con, QED_SPQ_MODE_EBLOCK, NULL); } @@ -1385,14 +1382,13 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev, hash_con->con->abortive_dsconnect = abrt_conn; - return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev), - hash_con->con, + return qed_sp_iscsi_conn_terminate(QED_AFFIN_HWFN(cdev), hash_con->con, QED_SPQ_MODE_EBLOCK, NULL); } static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats) { - return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats); + return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats); } static int qed_iscsi_change_mac(struct qed_dev *cdev, @@ -1407,8 +1403,7 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev, return -EINVAL; } - return qed_sp_iscsi_mac_update(QED_LEADING_HWFN(cdev), - hash_con->con, + return qed_sp_iscsi_mac_update(QED_AFFIN_HWFN(cdev), hash_con->con, QED_SPQ_MODE_EBLOCK, NULL); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index ded556b7bab5..f380fae8799d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -63,7 +63,12 @@ struct mpa_v2_hdr { #define MPA_REV2(_mpa_rev) ((_mpa_rev) == MPA_NEGOTIATION_TYPE_ENHANCED) #define QED_IWARP_INVALID_TCP_CID 0xffffffff -#define QED_IWARP_RCV_WND_SIZE_DEF (256 * 1024) + +#define QED_IWARP_RCV_WND_SIZE_DEF_BB_2P (200 * 1024) +#define QED_IWARP_RCV_WND_SIZE_DEF_BB_4P (100 * 1024) +#define QED_IWARP_RCV_WND_SIZE_DEF_AH_2P (150 * 1024) +#define QED_IWARP_RCV_WND_SIZE_DEF_AH_4P (90 * 1024) + #define QED_IWARP_RCV_WND_SIZE_MIN (0xffff) #define TIMESTAMP_HEADER_SIZE (12) #define QED_IWARP_MAX_FIN_RT_DEFAULT (2) @@ -532,7 +537,8 @@ int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) /* Make sure ep is closed before returning and freeing memory. */ if (ep) { - while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200) + while (READ_ONCE(ep->state) != QED_IWARP_EP_CLOSED && + wait_count++ < 200) msleep(100); if (ep->state != QED_IWARP_EP_CLOSED) @@ -1022,8 +1028,6 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn, params.ep_context = ep; - ep->state = QED_IWARP_EP_CLOSED; - switch (fw_return_code) { case RDMA_RETURN_OK: ep->qp->max_rd_atomic_req = ep->cm_info.ord; @@ -1083,6 +1087,10 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn, break; } + if (fw_return_code != RDMA_RETURN_OK) + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); + ep->event_cb(ep->cb_context, ¶ms); /* on passive side, if there is no associated QP (REJECT) we need to @@ -2528,7 +2536,7 @@ qed_iwarp_ll2_slowpath(void *cxt, memset(fpdu, 0, sizeof(*fpdu)); } -static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn) { struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp; int rc = 0; @@ -2563,8 +2571,9 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL; } - qed_llh_remove_mac_filter(p_hwfn, - p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr); + qed_llh_remove_mac_filter(p_hwfn->cdev, 0, + p_hwfn->p_rdma_info->iwarp.mac_addr); + return rc; } @@ -2609,7 +2618,7 @@ qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn, static int qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, struct qed_rdma_start_in_params *params, - struct qed_ptt *p_ptt) + u32 rcv_wnd_size) { struct qed_iwarp_info *iwarp_info; struct qed_ll2_acquire_data data; @@ -2628,7 +2637,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr); - rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr); + rc = qed_llh_add_mac_filter(p_hwfn->cdev, 0, params->mac_addr); if (rc) return rc; @@ -2637,6 +2646,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, cbs.rx_release_cb = qed_iwarp_ll2_rel_rx_pkt; cbs.tx_comp_cb = qed_iwarp_ll2_comp_tx_pkt; cbs.tx_release_cb = qed_iwarp_ll2_rel_tx_pkt; + cbs.slowpath_cb = NULL; cbs.cookie = p_hwfn; memset(&data, 0, sizeof(data)); @@ -2653,7 +2663,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, rc = qed_ll2_acquire_connection(p_hwfn, &data); if (rc) { DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n"); - qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr); + qed_llh_remove_mac_filter(p_hwfn->cdev, 0, params->mac_addr); return rc; } @@ -2675,7 +2685,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, data.input.conn_type = QED_LL2_TYPE_OOO; data.input.mtu = params->max_mtu; - n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) / + n_ooo_bufs = (QED_IWARP_MAX_OOO * rcv_wnd_size) / iwarp_info->max_mtu; n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE); @@ -2708,6 +2718,8 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, data.input.rx_num_desc = n_ooo_bufs * 2; data.input.tx_num_desc = data.input.rx_num_desc; data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU; + data.input.tx_tc = PKT_LB_TC; + data.input.tx_dest = QED_LL2_TX_DEST_LB; data.p_connection_handle = &iwarp_info->ll2_mpa_handle; data.input.secondary_queue = true; data.cbs = &cbs; @@ -2757,21 +2769,35 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn, &iwarp_info->mpa_buf_list); return rc; err: - qed_iwarp_ll2_stop(p_hwfn, p_ptt); + qed_iwarp_ll2_stop(p_hwfn); return rc; } -int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, +static struct { + u32 two_ports; + u32 four_ports; +} qed_iwarp_rcv_wnd_size[MAX_CHIP_IDS] = { + {QED_IWARP_RCV_WND_SIZE_DEF_BB_2P, QED_IWARP_RCV_WND_SIZE_DEF_BB_4P}, + {QED_IWARP_RCV_WND_SIZE_DEF_AH_2P, QED_IWARP_RCV_WND_SIZE_DEF_AH_4P} +}; + +int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_rdma_start_in_params *params) { + struct qed_dev *cdev = p_hwfn->cdev; struct qed_iwarp_info *iwarp_info; + enum chip_ids chip_id; u32 rcv_wnd_size; iwarp_info = &p_hwfn->p_rdma_info->iwarp; iwarp_info->tcp_flags = QED_IWARP_TS_EN; - rcv_wnd_size = QED_IWARP_RCV_WND_SIZE_DEF; + + chip_id = QED_IS_BB(cdev) ? CHIP_BB : CHIP_K2; + rcv_wnd_size = (qed_device_num_ports(cdev) == 4) ? + qed_iwarp_rcv_wnd_size[chip_id].four_ports : + qed_iwarp_rcv_wnd_size[chip_id].two_ports; /* value 0 is used for ilog2(QED_IWARP_RCV_WND_SIZE_MIN) */ iwarp_info->rcv_wnd_scale = ilog2(rcv_wnd_size) - @@ -2794,10 +2820,10 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, qed_iwarp_async_event); qed_ooo_setup(p_hwfn); - return qed_iwarp_ll2_start(p_hwfn, params, p_ptt); + return qed_iwarp_ll2_start(p_hwfn, params, rcv_wnd_size); } -int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +int qed_iwarp_stop(struct qed_hwfn *p_hwfn) { int rc; @@ -2808,7 +2834,7 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP); - return qed_iwarp_ll2_stop(p_hwfn, p_ptt); + return qed_iwarp_ll2_stop(p_hwfn); } static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn, @@ -2825,7 +2851,9 @@ static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn, params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ? 0 : -ECONNRESET; - ep->state = QED_IWARP_EP_CLOSED; + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); list_del(&ep->list_entry); spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); @@ -2914,7 +2942,8 @@ qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn, params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE; params.ep_context = ep; params.cm_info = &ep->cm_info; - ep->state = QED_IWARP_EP_CLOSED; + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); switch (fw_return_code) { case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET: diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h index 7ac959038324..c1b2057d23b8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h @@ -183,13 +183,13 @@ struct qed_iwarp_listener { int qed_iwarp_alloc(struct qed_hwfn *p_hwfn); -int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, +int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_rdma_start_in_params *params); void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn, struct iwarp_init_func_ramrod_data *p_ramrod); -int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +int qed_iwarp_stop(struct qed_hwfn *p_hwfn); void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 57641728df69..9f36e7948222 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -2111,7 +2111,7 @@ int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn, rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY + p_cid->sb_igu_id * sizeof(u64), - (u64)(uintptr_t)&sb_entry, 2, 0); + (u64)(uintptr_t)&sb_entry, 2, NULL); if (rc) { DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc); return rc; @@ -2144,7 +2144,7 @@ int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn, rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY + p_cid->sb_igu_id * sizeof(u64), - (u64)(uintptr_t)&sb_entry, 2, 0); + (u64)(uintptr_t)&sb_entry, 2, NULL); if (rc) { DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc); return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index b5f419b71287..19a1a58d60f8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -239,9 +239,8 @@ out_post1: buffer->phys_addr = new_phys_addr; out_post: - rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle, - buffer->phys_addr, 0, buffer, 1); - + rc = qed_ll2_post_rx_buffer(p_hwfn, cdev->ll2->handle, + buffer->phys_addr, 0, buffer, 1); if (rc) qed_ll2_dealloc_buffer(cdev, buffer); } @@ -926,16 +925,15 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) return 0; } -static void qed_ll2_stop_ooo(struct qed_dev *cdev) +static void qed_ll2_stop_ooo(struct qed_hwfn *p_hwfn) { - struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); - u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; + u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; - DP_VERBOSE(cdev, QED_MSG_STORAGE, "Stopping LL2 OOO queue [%02x]\n", - *handle); + DP_VERBOSE(p_hwfn, (QED_MSG_STORAGE | QED_MSG_LL2), + "Stopping LL2 OOO queue [%02x]\n", *handle); - qed_ll2_terminate_connection(hwfn, *handle); - qed_ll2_release_connection(hwfn, *handle); + qed_ll2_terminate_connection(p_hwfn, *handle); + qed_ll2_release_connection(p_hwfn, *handle); *handle = QED_LL2_UNUSED_HANDLE; } @@ -1574,12 +1572,12 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle) if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) { if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits)) - qed_llh_add_protocol_filter(p_hwfn, p_ptt, - ETH_P_FCOE, 0, - QED_LLH_FILTER_ETHERTYPE); - qed_llh_add_protocol_filter(p_hwfn, p_ptt, - ETH_P_FIP, 0, - QED_LLH_FILTER_ETHERTYPE); + qed_llh_add_protocol_filter(p_hwfn->cdev, 0, + QED_LLH_FILTER_ETHERTYPE, + ETH_P_FCOE, 0); + qed_llh_add_protocol_filter(p_hwfn->cdev, 0, + QED_LLH_FILTER_ETHERTYPE, + ETH_P_FIP, 0); } out: @@ -1980,12 +1978,12 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle) if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) { if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits)) - qed_llh_remove_protocol_filter(p_hwfn, p_ptt, - ETH_P_FCOE, 0, - QED_LLH_FILTER_ETHERTYPE); - qed_llh_remove_protocol_filter(p_hwfn, p_ptt, - ETH_P_FIP, 0, - QED_LLH_FILTER_ETHERTYPE); + qed_llh_remove_protocol_filter(p_hwfn->cdev, 0, + QED_LLH_FILTER_ETHERTYPE, + ETH_P_FCOE, 0); + qed_llh_remove_protocol_filter(p_hwfn->cdev, 0, + QED_LLH_FILTER_ETHERTYPE, + ETH_P_FIP, 0); } out: @@ -2086,12 +2084,12 @@ static void _qed_ll2_get_port_stats(struct qed_hwfn *p_hwfn, TSTORM_LL2_PORT_STAT_OFFSET(MFW_PORT(p_hwfn)), sizeof(port_stats)); - p_stats->gsi_invalid_hdr = HILO_64_REGPAIR(port_stats.gsi_invalid_hdr); - p_stats->gsi_invalid_pkt_length = + p_stats->gsi_invalid_hdr += HILO_64_REGPAIR(port_stats.gsi_invalid_hdr); + p_stats->gsi_invalid_pkt_length += HILO_64_REGPAIR(port_stats.gsi_invalid_pkt_length); - p_stats->gsi_unsupported_pkt_typ = + p_stats->gsi_unsupported_pkt_typ += HILO_64_REGPAIR(port_stats.gsi_unsupported_pkt_typ); - p_stats->gsi_crcchksm_error = + p_stats->gsi_crcchksm_error += HILO_64_REGPAIR(port_stats.gsi_crcchksm_error); } @@ -2109,9 +2107,9 @@ static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn, CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid); qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats)); - p_stats->packet_too_big_discard = + p_stats->packet_too_big_discard += HILO_64_REGPAIR(tstats.packet_too_big_discard); - p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard); + p_stats->no_buff_discard += HILO_64_REGPAIR(tstats.no_buff_discard); } static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn, @@ -2128,12 +2126,12 @@ static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn, CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid); qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats)); - p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes); - p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes); - p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes); - p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts); - p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts); - p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts); + p_stats->rcv_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes); + p_stats->rcv_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes); + p_stats->rcv_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes); + p_stats->rcv_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts); + p_stats->rcv_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts); + p_stats->rcv_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts); } static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn, @@ -2150,23 +2148,21 @@ static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn, CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id); qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats)); - p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes); - p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes); - p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes); - p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts); - p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts); - p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts); + p_stats->sent_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes); + p_stats->sent_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes); + p_stats->sent_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes); + p_stats->sent_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts); + p_stats->sent_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts); + p_stats->sent_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts); } -int qed_ll2_get_stats(void *cxt, - u8 connection_handle, struct qed_ll2_stats *p_stats) +static int __qed_ll2_get_stats(void *cxt, u8 connection_handle, + struct qed_ll2_stats *p_stats) { struct qed_hwfn *p_hwfn = cxt; struct qed_ll2_info *p_ll2_conn = NULL; struct qed_ptt *p_ptt; - memset(p_stats, 0, sizeof(*p_stats)); - if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) || !p_hwfn->p_ll2_info) return -EINVAL; @@ -2181,15 +2177,26 @@ int qed_ll2_get_stats(void *cxt, if (p_ll2_conn->input.gsi_enable) _qed_ll2_get_port_stats(p_hwfn, p_ptt, p_stats); + _qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + _qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + if (p_ll2_conn->tx_stats_en) _qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); qed_ptt_release(p_hwfn, p_ptt); + return 0; } +int qed_ll2_get_stats(void *cxt, + u8 connection_handle, struct qed_ll2_stats *p_stats) +{ + memset(p_stats, 0, sizeof(*p_stats)); + return __qed_ll2_get_stats(cxt, connection_handle, p_stats); +} + static void qed_ll2b_release_rx_packet(void *cxt, u8 connection_handle, void *cookie, @@ -2216,7 +2223,7 @@ struct qed_ll2_cbs ll2_cbs = { .tx_release_cb = &qed_ll2b_complete_tx_packet, }; -static void qed_ll2_set_conn_data(struct qed_dev *cdev, +static void qed_ll2_set_conn_data(struct qed_hwfn *p_hwfn, struct qed_ll2_acquire_data *data, struct qed_ll2_params *params, enum qed_ll2_conn_type conn_type, @@ -2232,7 +2239,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev, data->input.tx_num_desc = QED_LL2_TX_SIZE; data->p_connection_handle = handle; data->cbs = &ll2_cbs; - ll2_cbs.cookie = QED_LEADING_HWFN(cdev); + ll2_cbs.cookie = p_hwfn; if (lb) { data->input.tx_tc = PKT_LB_TC; @@ -2243,74 +2250,102 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev, } } -static int qed_ll2_start_ooo(struct qed_dev *cdev, +static int qed_ll2_start_ooo(struct qed_hwfn *p_hwfn, struct qed_ll2_params *params) { - struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); - u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; + u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id; struct qed_ll2_acquire_data data; int rc; - qed_ll2_set_conn_data(cdev, &data, params, + qed_ll2_set_conn_data(p_hwfn, &data, params, QED_LL2_TYPE_OOO, handle, true); - rc = qed_ll2_acquire_connection(hwfn, &data); + rc = qed_ll2_acquire_connection(p_hwfn, &data); if (rc) { - DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n"); + DP_INFO(p_hwfn, "Failed to acquire LL2 OOO connection\n"); goto out; } - rc = qed_ll2_establish_connection(hwfn, *handle); + rc = qed_ll2_establish_connection(p_hwfn, *handle); if (rc) { - DP_INFO(cdev, "Failed to establist LL2 OOO connection\n"); + DP_INFO(p_hwfn, "Failed to establish LL2 OOO connection\n"); goto fail; } return 0; fail: - qed_ll2_release_connection(hwfn, *handle); + qed_ll2_release_connection(p_hwfn, *handle); out: *handle = QED_LL2_UNUSED_HANDLE; return rc; } -static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) +static bool qed_ll2_is_storage_eng1(struct qed_dev *cdev) { - struct qed_ll2_buffer *buffer, *tmp_buffer; - enum qed_ll2_conn_type conn_type; - struct qed_ll2_acquire_data data; - struct qed_ptt *p_ptt; - int rc, i; + return (QED_IS_FCOE_PERSONALITY(QED_LEADING_HWFN(cdev)) || + QED_IS_ISCSI_PERSONALITY(QED_LEADING_HWFN(cdev))) && + (QED_AFFIN_HWFN(cdev) != QED_LEADING_HWFN(cdev)); +} +static int __qed_ll2_stop(struct qed_hwfn *p_hwfn) +{ + struct qed_dev *cdev = p_hwfn->cdev; + int rc; - /* Initialize LL2 locks & lists */ - INIT_LIST_HEAD(&cdev->ll2->list); - spin_lock_init(&cdev->ll2->lock); - cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + - L1_CACHE_BYTES + params->mtu; + rc = qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle); + if (rc) + DP_INFO(cdev, "Failed to terminate LL2 connection\n"); - /*Allocate memory for LL2 */ - DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n", - cdev->ll2->rx_size); - for (i = 0; i < QED_LL2_RX_SIZE; i++) { - buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); - if (!buffer) { - DP_INFO(cdev, "Failed to allocate LL2 buffers\n"); - goto fail; - } + qed_ll2_release_connection(p_hwfn, cdev->ll2->handle); - rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data, - &buffer->phys_addr); - if (rc) { - kfree(buffer); - goto fail; - } + return rc; +} - list_add_tail(&buffer->list, &cdev->ll2->list); +static int qed_ll2_stop(struct qed_dev *cdev) +{ + bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev); + struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev); + int rc = 0, rc2 = 0; + + if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE) + return 0; + + qed_llh_remove_mac_filter(cdev, 0, cdev->ll2_mac_address); + eth_zero_addr(cdev->ll2_mac_address); + + if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) + qed_ll2_stop_ooo(p_hwfn); + + /* In CMT mode, LL2 is always started on engine 0 for a storage PF */ + if (b_is_storage_eng1) { + rc2 = __qed_ll2_stop(QED_LEADING_HWFN(cdev)); + if (rc2) + DP_NOTICE(QED_LEADING_HWFN(cdev), + "Failed to stop LL2 on engine 0\n"); } - switch (QED_LEADING_HWFN(cdev)->hw_info.personality) { + rc = __qed_ll2_stop(p_hwfn); + if (rc) + DP_NOTICE(p_hwfn, "Failed to stop LL2\n"); + + qed_ll2_kill_buffers(cdev); + + cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; + + return rc | rc2; +} + +static int __qed_ll2_start(struct qed_hwfn *p_hwfn, + struct qed_ll2_params *params) +{ + struct qed_ll2_buffer *buffer, *tmp_buffer; + struct qed_dev *cdev = p_hwfn->cdev; + enum qed_ll2_conn_type conn_type; + struct qed_ll2_acquire_data data; + int rc, rx_cnt; + + switch (p_hwfn->hw_info.personality) { case QED_PCI_FCOE: conn_type = QED_LL2_TYPE_FCOE; break; @@ -2321,33 +2356,34 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) conn_type = QED_LL2_TYPE_ROCE; break; default: + conn_type = QED_LL2_TYPE_TEST; } - qed_ll2_set_conn_data(cdev, &data, params, conn_type, + qed_ll2_set_conn_data(p_hwfn, &data, params, conn_type, &cdev->ll2->handle, false); - rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data); + rc = qed_ll2_acquire_connection(p_hwfn, &data); if (rc) { - DP_INFO(cdev, "Failed to acquire LL2 connection\n"); - goto fail; + DP_INFO(p_hwfn, "Failed to acquire LL2 connection\n"); + return rc; } - rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev), - cdev->ll2->handle); + rc = qed_ll2_establish_connection(p_hwfn, cdev->ll2->handle); if (rc) { - DP_INFO(cdev, "Failed to establish LL2 connection\n"); - goto release_fail; + DP_INFO(p_hwfn, "Failed to establish LL2 connection\n"); + goto release_conn; } /* Post all Rx buffers to FW */ spin_lock_bh(&cdev->ll2->lock); + rx_cnt = cdev->ll2->rx_cnt; list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) { - rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), + rc = qed_ll2_post_rx_buffer(p_hwfn, cdev->ll2->handle, buffer->phys_addr, 0, buffer, 1); if (rc) { - DP_INFO(cdev, + DP_INFO(p_hwfn, "Failed to post an Rx buffer; Deleting it\n"); dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, cdev->ll2->rx_size, DMA_FROM_DEVICE); @@ -2355,100 +2391,127 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) list_del(&buffer->list); kfree(buffer); } else { - cdev->ll2->rx_cnt++; + rx_cnt++; } } spin_unlock_bh(&cdev->ll2->lock); - if (!cdev->ll2->rx_cnt) { - DP_INFO(cdev, "Failed passing even a single Rx buffer\n"); - goto release_terminate; + if (rx_cnt == cdev->ll2->rx_cnt) { + DP_NOTICE(p_hwfn, "Failed passing even a single Rx buffer\n"); + goto terminate_conn; } + cdev->ll2->rx_cnt = rx_cnt; + + return 0; + +terminate_conn: + qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle); +release_conn: + qed_ll2_release_connection(p_hwfn, cdev->ll2->handle); + return rc; +} + +static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) +{ + bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev); + struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev); + struct qed_ll2_buffer *buffer; + int rx_num_desc, i, rc; if (!is_valid_ether_addr(params->ll2_mac_address)) { - DP_INFO(cdev, "Invalid Ethernet address\n"); - goto release_terminate; + DP_NOTICE(cdev, "Invalid Ethernet address\n"); + return -EINVAL; } - if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI) { - DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n"); - rc = qed_ll2_start_ooo(cdev, params); + WARN_ON(!cdev->ll2->cbs); + + /* Initialize LL2 locks & lists */ + INIT_LIST_HEAD(&cdev->ll2->list); + spin_lock_init(&cdev->ll2->lock); + + cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + L1_CACHE_BYTES + params->mtu; + + /* Allocate memory for LL2. + * In CMT mode, in case of a storage PF which is affintized to engine 1, + * LL2 is started also on engine 0 and thus we need twofold buffers. + */ + rx_num_desc = QED_LL2_RX_SIZE * (b_is_storage_eng1 ? 2 : 1); + DP_INFO(cdev, "Allocating %d LL2 buffers of size %08x bytes\n", + rx_num_desc, cdev->ll2->rx_size); + for (i = 0; i < rx_num_desc; i++) { + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + if (!buffer) { + DP_INFO(cdev, "Failed to allocate LL2 buffers\n"); + rc = -ENOMEM; + goto err0; + } + + rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data, + &buffer->phys_addr); if (rc) { - DP_INFO(cdev, - "Failed to initialize the OOO LL2 queue\n"); - goto release_terminate; + kfree(buffer); + goto err0; } - } - p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); - if (!p_ptt) { - DP_INFO(cdev, "Failed to acquire PTT\n"); - goto release_terminate; + list_add_tail(&buffer->list, &cdev->ll2->list); } - rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, - params->ll2_mac_address); - qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + rc = __qed_ll2_start(p_hwfn, params); if (rc) { - DP_ERR(cdev, "Failed to allocate LLH filter\n"); - goto release_terminate_all; + DP_NOTICE(cdev, "Failed to start LL2\n"); + goto err0; } - ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address); - return 0; - -release_terminate_all: - -release_terminate: - qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); -release_fail: - qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); -fail: - qed_ll2_kill_buffers(cdev); - cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; - return -EINVAL; -} - -static int qed_ll2_stop(struct qed_dev *cdev) -{ - struct qed_ptt *p_ptt; - int rc; - - if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE) - return 0; + /* In CMT mode, always need to start LL2 on engine 0 for a storage PF, + * since broadcast/mutlicast packets are routed to engine 0. + */ + if (b_is_storage_eng1) { + rc = __qed_ll2_start(QED_LEADING_HWFN(cdev), params); + if (rc) { + DP_NOTICE(QED_LEADING_HWFN(cdev), + "Failed to start LL2 on engine 0\n"); + goto err1; + } + } - p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); - if (!p_ptt) { - DP_INFO(cdev, "Failed to acquire PTT\n"); - goto fail; + if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) { + DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n"); + rc = qed_ll2_start_ooo(p_hwfn, params); + if (rc) { + DP_NOTICE(cdev, "Failed to start OOO LL2\n"); + goto err2; + } } - qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, - cdev->ll2_mac_address); - qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); - eth_zero_addr(cdev->ll2_mac_address); + rc = qed_llh_add_mac_filter(cdev, 0, params->ll2_mac_address); + if (rc) { + DP_NOTICE(cdev, "Failed to add an LLH filter\n"); + goto err3; + } - if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI) - qed_ll2_stop_ooo(cdev); + ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address); - rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), - cdev->ll2->handle); - if (rc) - DP_INFO(cdev, "Failed to terminate LL2 connection\n"); + return 0; +err3: + if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) + qed_ll2_stop_ooo(p_hwfn); +err2: + if (b_is_storage_eng1) + __qed_ll2_stop(QED_LEADING_HWFN(cdev)); +err1: + __qed_ll2_stop(p_hwfn); +err0: qed_ll2_kill_buffers(cdev); - - qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; - return rc; -fail: - return -EINVAL; } static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, unsigned long xmit_flags) { + struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev); struct qed_ll2_tx_pkt_info pkt; const skb_frag_t *frag; u8 flags = 0, nr_frags; @@ -2506,7 +2569,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, * routine may run and free the SKB, so no dereferencing the SKB * beyond this point unless skb has any fragments. */ - rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle, + rc = qed_ll2_prepare_tx_packet(p_hwfn, cdev->ll2->handle, &pkt, 1); if (rc) goto err; @@ -2524,13 +2587,13 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, goto err; } - rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev), + rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, cdev->ll2->handle, mapping, skb_frag_size(frag)); /* if failed not much to do here, partial packet has been posted - * we can't free memory, will need to wait for completion. + * we can't free memory, will need to wait for completion */ if (rc) goto err2; @@ -2540,18 +2603,37 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb, err: dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE); - err2: return rc; } static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats) { + bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev); + struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev); + int rc; + if (!cdev->ll2) return -EINVAL; - return qed_ll2_get_stats(QED_LEADING_HWFN(cdev), - cdev->ll2->handle, stats); + rc = qed_ll2_get_stats(p_hwfn, cdev->ll2->handle, stats); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to get LL2 stats\n"); + return rc; + } + + /* In CMT mode, LL2 is always started on engine 0 for a storage PF */ + if (b_is_storage_eng1) { + rc = __qed_ll2_get_stats(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, stats); + if (rc) { + DP_NOTICE(QED_LEADING_HWFN(cdev), + "Failed to get LL2 stats on engine 0\n"); + return rc; + } + } + + return 0; } const struct qed_ll2_ops qed_ll2_ops_pass = { diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6de23b56b294..829dd60ab937 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -48,6 +48,7 @@ #include <linux/crc32.h> #include <linux/qed/qed_if.h> #include <linux/qed/qed_ll2_if.h> +#include <net/devlink.h> #include "qed.h" #include "qed_sriov.h" @@ -342,6 +343,107 @@ static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state) return 0; } +struct qed_devlink { + struct qed_dev *cdev; +}; + +enum qed_devlink_param_id { + QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + QED_DEVLINK_PARAM_ID_IWARP_CMT, +}; + +static int qed_dl_param_get(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct qed_devlink *qed_dl; + struct qed_dev *cdev; + + qed_dl = devlink_priv(dl); + cdev = qed_dl->cdev; + ctx->val.vbool = cdev->iwarp_cmt; + + return 0; +} + +static int qed_dl_param_set(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct qed_devlink *qed_dl; + struct qed_dev *cdev; + + qed_dl = devlink_priv(dl); + cdev = qed_dl->cdev; + cdev->iwarp_cmt = ctx->val.vbool; + + return 0; +} + +static const struct devlink_param qed_devlink_params[] = { + DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT, + "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + qed_dl_param_get, qed_dl_param_set, NULL), +}; + +static const struct devlink_ops qed_dl_ops; + +static int qed_devlink_register(struct qed_dev *cdev) +{ + union devlink_param_value value; + struct qed_devlink *qed_dl; + struct devlink *dl; + int rc; + + dl = devlink_alloc(&qed_dl_ops, sizeof(*qed_dl)); + if (!dl) + return -ENOMEM; + + qed_dl = devlink_priv(dl); + + cdev->dl = dl; + qed_dl->cdev = cdev; + + rc = devlink_register(dl, &cdev->pdev->dev); + if (rc) + goto err_free; + + rc = devlink_params_register(dl, qed_devlink_params, + ARRAY_SIZE(qed_devlink_params)); + if (rc) + goto err_unregister; + + value.vbool = false; + devlink_param_driverinit_value_set(dl, + QED_DEVLINK_PARAM_ID_IWARP_CMT, + value); + + devlink_params_publish(dl); + cdev->iwarp_cmt = false; + + return 0; + +err_unregister: + devlink_unregister(dl); + +err_free: + cdev->dl = NULL; + devlink_free(dl); + + return rc; +} + +static void qed_devlink_unregister(struct qed_dev *cdev) +{ + if (!cdev->dl) + return; + + devlink_params_unregister(cdev->dl, qed_devlink_params, + ARRAY_SIZE(qed_devlink_params)); + + devlink_unregister(cdev->dl); + devlink_free(cdev->dl); +} + /* probing */ static struct qed_dev *qed_probe(struct pci_dev *pdev, struct qed_probe_params *params) @@ -370,6 +472,12 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev, } DP_INFO(cdev, "PCI init completed successfully\n"); + rc = qed_devlink_register(cdev); + if (rc) { + DP_INFO(cdev, "Failed to register devlink.\n"); + goto err2; + } + rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT); if (rc) { DP_ERR(cdev, "hw prepare failed\n"); @@ -399,6 +507,8 @@ static void qed_remove(struct qed_dev *cdev) qed_set_power_state(cdev, PCI_D3hot); + qed_devlink_unregister(cdev); + qed_free_cdev(cdev); } @@ -1301,26 +1411,21 @@ static u32 qed_sb_init(struct qed_dev *cdev, { struct qed_hwfn *p_hwfn; struct qed_ptt *p_ptt; - int hwfn_index; u16 rel_sb_id; - u8 n_hwfns; u32 rc; - /* RoCE uses single engine and CMT uses two engines. When using both - * we force only a single engine. Storage uses only engine 0 too. - */ - if (type == QED_SB_TYPE_L2_QUEUE) - n_hwfns = cdev->num_hwfns; - else - n_hwfns = 1; - - hwfn_index = sb_id % n_hwfns; - p_hwfn = &cdev->hwfns[hwfn_index]; - rel_sb_id = sb_id / n_hwfns; + /* RoCE/Storage use a single engine in CMT mode while L2 uses both */ + if (type == QED_SB_TYPE_L2_QUEUE) { + p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns]; + rel_sb_id = sb_id / cdev->num_hwfns; + } else { + p_hwfn = QED_AFFIN_HWFN(cdev); + rel_sb_id = sb_id; + } DP_VERBOSE(cdev, NETIF_MSG_INTR, "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n", - hwfn_index, rel_sb_id, sb_id); + IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id); if (IS_PF(p_hwfn->cdev)) { p_ptt = qed_ptt_acquire(p_hwfn); @@ -1339,20 +1444,26 @@ static u32 qed_sb_init(struct qed_dev *cdev, } static u32 qed_sb_release(struct qed_dev *cdev, - struct qed_sb_info *sb_info, u16 sb_id) + struct qed_sb_info *sb_info, + u16 sb_id, + enum qed_sb_type type) { struct qed_hwfn *p_hwfn; - int hwfn_index; u16 rel_sb_id; u32 rc; - hwfn_index = sb_id % cdev->num_hwfns; - p_hwfn = &cdev->hwfns[hwfn_index]; - rel_sb_id = sb_id / cdev->num_hwfns; + /* RoCE/Storage use a single engine in CMT mode while L2 uses both */ + if (type == QED_SB_TYPE_L2_QUEUE) { + p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns]; + rel_sb_id = sb_id / cdev->num_hwfns; + } else { + p_hwfn = QED_AFFIN_HWFN(cdev); + rel_sb_id = sb_id; + } DP_VERBOSE(cdev, NETIF_MSG_INTR, "hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n", - hwfn_index, rel_sb_id, sb_id); + IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id); rc = qed_int_sb_release(p_hwfn, sb_info, rel_sb_id); @@ -2372,6 +2483,11 @@ static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf, return rc; } +static u8 qed_get_affin_hwfn_idx(struct qed_dev *cdev) +{ + return QED_AFFIN_HWFN_IDX(cdev); +} + static struct qed_selftest_ops qed_selftest_ops_pass = { .selftest_memory = &qed_selftest_memory, .selftest_interrupt = &qed_selftest_interrupt, @@ -2419,6 +2535,7 @@ const struct qed_common_ops qed_common_ops_pass = { .db_recovery_add = &qed_db_recovery_add, .db_recovery_del = &qed_db_recovery_del, .read_module_eeprom = &qed_read_module_eeprom, + .get_affin_hwfn_idx = &qed_get_affin_hwfn_idx, }; void qed_get_protocol_stats(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index cc27fd60d689..758702c1ce9c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -3685,3 +3685,68 @@ int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT, features, &mcp_resp, &mcp_param); } + +int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_mcp_mb_params mb_params = {0}; + struct qed_dev *cdev = p_hwfn->cdev; + u8 fir_valid, l2_valid; + int rc; + + mb_params.cmd = DRV_MSG_CODE_GET_ENGINE_CONFIG; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) { + DP_INFO(p_hwfn, + "The get_engine_config command is unsupported by the MFW\n"); + return -EOPNOTSUPP; + } + + fir_valid = QED_MFW_GET_FIELD(mb_params.mcp_param, + FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID); + if (fir_valid) + cdev->fir_affin = + QED_MFW_GET_FIELD(mb_params.mcp_param, + FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE); + + l2_valid = QED_MFW_GET_FIELD(mb_params.mcp_param, + FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID); + if (l2_valid) + cdev->l2_affin_hint = + QED_MFW_GET_FIELD(mb_params.mcp_param, + FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE); + + DP_INFO(p_hwfn, + "Engine affinity config: FIR={valid %hhd, value %hhd}, L2_hint={valid %hhd, value %hhd}\n", + fir_valid, cdev->fir_affin, l2_valid, cdev->l2_affin_hint); + + return 0; +} + +int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + struct qed_mcp_mb_params mb_params = {0}; + struct qed_dev *cdev = p_hwfn->cdev; + int rc; + + mb_params.cmd = DRV_MSG_CODE_GET_PPFID_BITMAP; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) { + DP_INFO(p_hwfn, + "The get_ppfid_bitmap command is unsupported by the MFW\n"); + return -EOPNOTSUPP; + } + + cdev->ppfid_bitmap = QED_MFW_GET_FIELD(mb_params.mcp_param, + FW_MB_PARAM_PPFID_BITMAP); + + DP_VERBOSE(p_hwfn, QED_MSG_SP, "PPFID bitmap 0x%hhx\n", + cdev->ppfid_bitmap); + + return 0; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 261c1a392e2c..e4f8fe4bd062 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -1186,4 +1186,20 @@ void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); */ int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn); +/** + * @brief Get the engine affinity configuration. + * + * @param p_hwfn + * @param p_ptt + */ +int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +/** + * @brief Get the PPFID bitmap. + * + * @param p_hwfn + * @param p_ptt + */ +int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c index 1302b308bd87..0dacf2c18c09 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c @@ -44,6 +44,8 @@ /* Add/subtract the Adjustment_Value when making a Drift adjustment */ #define QED_DRIFT_CNTR_DIRECTION_SHIFT 31 #define QED_TIMESTAMP_MASK BIT(16) +/* Param mask for Hardware to detect/timestamp the unicast PTP packets */ +#define QED_PTP_UCAST_PARAM_MASK 0xF static enum qed_resc_lock qed_ptcdev_to_resc(struct qed_hwfn *p_hwfn) { @@ -157,7 +159,8 @@ static int qed_ptp_hw_read_tx_ts(struct qed_dev *cdev, u64 *timestamp) *timestamp = 0; val = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID); if (!(val & QED_TIMESTAMP_MASK)) { - DP_INFO(p_hwfn, "Invalid Tx timestamp, buf_seqid = %d\n", val); + DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, + "Invalid Tx timestamp, buf_seqid = %08x\n", val); return -EINVAL; } @@ -242,7 +245,8 @@ static int qed_ptp_hw_cfg_filters(struct qed_dev *cdev, return -EINVAL; } - qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, 0); + qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_PARAM_MASK, + QED_PTP_UCAST_PARAM_MASK); qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_PTP_RULE_MASK, rule_mask); qed_wr(p_hwfn, p_ptt, NIG_REG_RX_PTP_EN, enable_cfg); @@ -252,7 +256,8 @@ static int qed_ptp_hw_cfg_filters(struct qed_dev *cdev, qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, 0x3FFF); } else { qed_wr(p_hwfn, p_ptt, NIG_REG_TX_PTP_EN, enable_cfg); - qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, 0); + qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_PARAM_MASK, + QED_PTP_UCAST_PARAM_MASK); qed_wr(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_RULE_MASK, rule_mask); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index 7873d6dfd91f..f900fde448db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -700,7 +700,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn, return rc; if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { - rc = qed_iwarp_setup(p_hwfn, p_ptt, params); + rc = qed_iwarp_setup(p_hwfn, params); if (rc) return rc; } else { @@ -742,7 +742,7 @@ static int qed_rdma_stop(void *rdma_cxt) (ll2_ethertype_en & 0xFFFE)); if (QED_IS_IWARP_PERSONALITY(p_hwfn)) { - rc = qed_iwarp_stop(p_hwfn, p_ptt); + rc = qed_iwarp_stop(p_hwfn); if (rc) { qed_ptt_release(p_hwfn, p_ptt); return rc; @@ -803,7 +803,7 @@ static int qed_rdma_add_user(void *rdma_cxt, dpi_start_offset + ((out_params->dpi) * p_hwfn->dpi_size)); - out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr + + out_params->dpi_phys_addr = p_hwfn->db_phys_addr + dpi_start_offset + ((out_params->dpi) * p_hwfn->dpi_size); @@ -818,14 +818,17 @@ static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port; + struct qed_mcp_link_state *p_link_output; DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n"); - /* Link may have changed */ - p_port->port_state = p_hwfn->mcp_info->link_output.link_up ? - QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN; + /* The link state is saved only for the leading hwfn */ + p_link_output = &QED_LEADING_HWFN(p_hwfn->cdev)->mcp_info->link_output; - p_port->link_speed = p_hwfn->mcp_info->link_output.speed; + p_port->port_state = p_link_output->link_up ? QED_RDMA_PORT_UP + : QED_RDMA_PORT_DOWN; + + p_port->link_speed = p_link_output->speed; p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE; @@ -870,7 +873,7 @@ static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) static int qed_fill_rdma_dev_info(struct qed_dev *cdev, struct qed_dev_rdma_info *info) { - struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); + struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev); memset(info, 0, sizeof(*info)); @@ -889,9 +892,9 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev) int feat_num; if (cdev->num_hwfns > 1) - feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE); + feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE); else - feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) * + feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE) * cdev->num_hwfns; return feat_num; @@ -899,7 +902,7 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev) static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev) { - int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ); + int n_cnq = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_RDMA_CNQ); int n_msix = cdev->int_params.rdma_msix_cnt; return min_t(int, n_cnq, n_msix); @@ -1653,7 +1656,7 @@ static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) { - return QED_LEADING_HWFN(cdev); + return QED_AFFIN_HWFN(cdev); } static int qed_rdma_modify_srq(void *rdma_cxt, @@ -1881,7 +1884,7 @@ err: static int qed_rdma_init(struct qed_dev *cdev, struct qed_rdma_start_in_params *params) { - return qed_rdma_start(QED_LEADING_HWFN(cdev), params); + return qed_rdma_start(QED_AFFIN_HWFN(cdev), params); } static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) @@ -1899,23 +1902,12 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev, u8 *old_mac_address, u8 *new_mac_address) { - struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev); - struct qed_ptt *p_ptt; int rc = 0; - p_ptt = qed_ptt_acquire(p_hwfn); - if (!p_ptt) { - DP_ERR(cdev, - "qed roce ll2 mac filter set: failed to acquire PTT\n"); - return -EINVAL; - } - if (old_mac_address) - qed_llh_remove_mac_filter(p_hwfn, p_ptt, old_mac_address); + qed_llh_remove_mac_filter(cdev, 0, old_mac_address); if (new_mac_address) - rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, new_mac_address); - - qed_ptt_release(p_hwfn, p_ptt); + rc = qed_llh_add_mac_filter(cdev, 0, new_mac_address); if (rc) DP_ERR(cdev, @@ -1924,6 +1916,36 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev, return rc; } +static int qed_iwarp_set_engine_affin(struct qed_dev *cdev, bool b_reset) +{ + enum qed_eng eng; + u8 ppfid = 0; + int rc; + + /* Make sure iwarp cmt mode is enabled before setting affinity */ + if (!cdev->iwarp_cmt) + return -EINVAL; + + if (b_reset) + eng = QED_BOTH_ENG; + else + eng = cdev->l2_affin_hint ? QED_ENG1 : QED_ENG0; + + rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng); + if (rc) { + DP_NOTICE(cdev, + "Failed to set the engine affinity of ppfid %d\n", + ppfid); + return rc; + } + + DP_VERBOSE(cdev, (QED_MSG_RDMA | QED_MSG_SP), + "LLH: Set the engine affinity of non-RoCE packets as %d\n", + eng); + + return 0; +} + static const struct qed_rdma_ops qed_rdma_ops_pass = { .common = &qed_common_ops_pass, .fill_dev_info = &qed_fill_rdma_dev_info, @@ -1963,6 +1985,7 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = { .ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet, .ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter, .ll2_get_stats = &qed_ll2_get_stats, + .iwarp_set_engine_affin = &qed_iwarp_set_engine_affin, .iwarp_connect = &qed_iwarp_connect, .iwarp_create_listen = &qed_iwarp_create_listen, .iwarp_destroy_listen = &qed_iwarp_destroy_listen, diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index 5ce825ca5f24..60f850c3bdd6 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -254,6 +254,10 @@ 0x500840UL #define NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR \ 0x50196cUL +#define NIG_REG_LLH_PPFID2PFID_TBL_0 \ + 0x501970UL +#define NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL \ + 0x50 #define NIG_REG_LLH_CLS_TYPE_DUALMODE \ 0x501964UL #define NIG_REG_LLH_FUNC_TAG_EN 0x5019b0UL @@ -1626,6 +1630,8 @@ #define PHY_PCIE_REG_PHY1_K2_E5 \ 0x624000UL #define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL +#define NIG_REG_PPF_TO_ENGINE_SEL 0x508900UL +#define NIG_REG_PPF_TO_ENGINE_SEL_SIZE 8 #define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL #define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL #define DORQ_REG_PF_DPM_ENABLE 0x100510UL diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index 5a495fda9e9d..7e0b795230b2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -588,7 +588,7 @@ int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn) { struct qed_spq_entry *p_ent = NULL; struct qed_sp_init_data init_data; - int rc = -EINVAL; + int rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 2f318aaf2b05..78f77b712b10 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -917,10 +917,11 @@ static u8 qed_iov_alloc_vf_igu_sbs(struct qed_hwfn *p_hwfn, /* Configure igu sb in CAU which were marked valid */ qed_init_cau_sb_entry(p_hwfn, &sb_entry, p_hwfn->rel_pf_id, vf->abs_vf_id, 1); + qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry, CAU_REG_SB_VAR_MEMORY + - p_block->igu_sb_id * sizeof(u64), 2, 0); + p_block->igu_sb_id * sizeof(u64), 2, NULL); } vf->num_sbs = (u8) num_rx_queues; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 92fe226980fd..0e931c04fecf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -92,6 +92,7 @@ struct qede_stats_common { u64 non_coalesced_pkts; u64 coalesced_bytes; u64 link_change_count; + u64 ptp_skip_txts; /* port */ u64 rx_64_byte_packets; @@ -189,6 +190,7 @@ struct qede_dev { const struct qed_eth_ops *ops; struct qede_ptp *ptp; + u64 ptp_skip_txts; struct qed_dev_eth_info dev_info; #define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues) @@ -549,7 +551,7 @@ int qede_txq_has_work(struct qede_tx_queue *txq); void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count); void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq); int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, - struct tc_cls_flower_offload *f); + struct flow_cls_offload *f); #define RX_RING_SIZE_POW 13 #define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 8911a97ab0ca..e85f9fef930c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -174,6 +174,7 @@ static const struct { QEDE_STAT(coalesced_bytes), QEDE_STAT(link_change_count), + QEDE_STAT(ptp_skip_txts), }; #define QEDE_NUM_STATS ARRAY_SIZE(qede_stats_arr) diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index add922b93d2c..9a6a9a008714 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1943,7 +1943,7 @@ qede_parse_flow_attr(struct qede_dev *edev, __be16 proto, } int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto, - struct tc_cls_flower_offload *f) + struct flow_cls_offload *f) { struct qede_arfs_fltr_node *n; int min_hlen, rc = -EINVAL; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 02a97c659e29..8d1c208f778f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -390,6 +390,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) p_common->brb_discards = stats.common.brb_discards; p_common->tx_mac_ctrl_frames = stats.common.tx_mac_ctrl_frames; p_common->link_change_count = stats.common.link_change_count; + p_common->ptp_skip_txts = edev->ptp_skip_txts; if (QEDE_IS_BB(edev)) { struct qede_stats_bb *p_bb = &edev->stats.bb; @@ -547,13 +548,13 @@ static int qede_setup_tc(struct net_device *ndev, u8 num_tc) } static int -qede_set_flower(struct qede_dev *edev, struct tc_cls_flower_offload *f, +qede_set_flower(struct qede_dev *edev, struct flow_cls_offload *f, __be16 proto) { switch (f->command) { - case TC_CLSFLOWER_REPLACE: + case FLOW_CLS_REPLACE: return qede_add_tc_flower_fltr(edev, proto, f); - case TC_CLSFLOWER_DESTROY: + case FLOW_CLS_DESTROY: return qede_delete_flow_filter(edev, f->cookie); default: return -EOPNOTSUPP; @@ -563,7 +564,7 @@ qede_set_flower(struct qede_dev *edev, struct tc_cls_flower_offload *f, static int qede_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { - struct tc_cls_flower_offload *f; + struct flow_cls_offload *f; struct qede_dev *edev = cb_priv; if (!tc_cls_can_offload_and_chain0(edev->ndev, type_data)) @@ -578,24 +579,7 @@ static int qede_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -static int qede_setup_tc_block(struct qede_dev *edev, - struct tc_block_offload *f) -{ - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, - qede_setup_tc_block_cb, - edev, edev, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, qede_setup_tc_block_cb, edev); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(qede_block_cb_list); static int qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type, @@ -606,7 +590,10 @@ qede_setup_tc_offload(struct net_device *dev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: - return qede_setup_tc_block(edev, type_data); + return flow_block_cb_setup_simple(type_data, + &qede_block_cb_list, + qede_setup_tc_block_cb, + edev, edev, true); case TC_SETUP_QDISC_MQPRIO: mqprio = type_data; @@ -959,13 +946,13 @@ void __qede_unlock(struct qede_dev *edev) /* This version of the lock should be used when acquiring the RTNL lock is also * needed in addition to the internal qede lock. */ -void qede_lock(struct qede_dev *edev) +static void qede_lock(struct qede_dev *edev) { rtnl_lock(); __qede_lock(edev); } -void qede_unlock(struct qede_dev *edev) +static void qede_unlock(struct qede_dev *edev) { __qede_unlock(edev); rtnl_unlock(); @@ -1306,7 +1293,8 @@ static void qede_free_mem_sb(struct qede_dev *edev, struct qed_sb_info *sb_info, u16 sb_id) { if (sb_info->sb_virt) { - edev->ops->common->sb_release(edev->cdev, sb_info, sb_id); + edev->ops->common->sb_release(edev->cdev, sb_info, sb_id, + QED_SB_TYPE_L2_QUEUE); dma_free_coherent(&edev->pdev->dev, sizeof(*sb_info->sb_virt), (void *)sb_info->sb_virt, sb_info->sb_phys); memset(sb_info, 0, sizeof(*sb_info)); @@ -2231,6 +2219,8 @@ out: if (mode != QEDE_UNLOAD_RECOVERY) DP_NOTICE(edev, "Link is down\n"); + edev->ptp_skip_txts = 0; + DP_INFO(edev, "Ending qede unload\n"); } diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c index bddb2b5982dc..f815435cf106 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c @@ -30,6 +30,7 @@ * SOFTWARE. */ #include "qede_ptp.h" +#define QEDE_PTP_TX_TIMEOUT (2 * HZ) struct qede_ptp { const struct qed_eth_ptp_ops *ops; @@ -38,6 +39,7 @@ struct qede_ptp { struct timecounter tc; struct ptp_clock *clock; struct work_struct work; + unsigned long ptp_tx_start; struct qede_dev *edev; struct sk_buff *tx_skb; @@ -160,18 +162,30 @@ static void qede_ptp_task(struct work_struct *work) struct qede_dev *edev; struct qede_ptp *ptp; u64 timestamp, ns; + bool timedout; int rc; ptp = container_of(work, struct qede_ptp, work); edev = ptp->edev; + timedout = time_is_before_jiffies(ptp->ptp_tx_start + + QEDE_PTP_TX_TIMEOUT); /* Read Tx timestamp registers */ spin_lock_bh(&ptp->lock); rc = ptp->ops->read_tx_ts(edev->cdev, ×tamp); spin_unlock_bh(&ptp->lock); if (rc) { - /* Reschedule to keep checking for a valid timestamp value */ - schedule_work(&ptp->work); + if (unlikely(timedout)) { + DP_INFO(edev, "Tx timestamp is not recorded\n"); + dev_kfree_skb_any(ptp->tx_skb); + ptp->tx_skb = NULL; + clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, + &edev->flags); + edev->ptp_skip_txts++; + } else { + /* Reschedule to keep checking for a valid TS value */ + schedule_work(&ptp->work); + } return; } @@ -514,19 +528,28 @@ void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb) if (!ptp) return; - if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags)) + if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, + &edev->flags)) { + DP_ERR(edev, "Timestamping in progress\n"); + edev->ptp_skip_txts++; return; + } if (unlikely(!test_bit(QEDE_FLAGS_TX_TIMESTAMPING_EN, &edev->flags))) { - DP_NOTICE(edev, - "Tx timestamping was not enabled, this packet will not be timestamped\n"); + DP_ERR(edev, + "Tx timestamping was not enabled, this packet will not be timestamped\n"); + clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags); + edev->ptp_skip_txts++; } else if (unlikely(ptp->tx_skb)) { - DP_NOTICE(edev, - "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n"); + DP_ERR(edev, + "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n"); + clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags); + edev->ptp_skip_txts++; } else { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; /* schedule check for Tx timestamp */ ptp->tx_skb = skb_get(skb); + ptp->ptp_tx_start = jiffies; schedule_work(&ptp->work); } } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 7a873002e626..c07438db30ba 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -4119,13 +4119,14 @@ static void qlcnic_config_indev_addr(struct qlcnic_adapter *adapter, struct net_device *dev, unsigned long event) { + const struct in_ifaddr *ifa; struct in_device *indev; indev = in_dev_get(dev); if (!indev) return; - for_ifa(indev) { + in_dev_for_each_ifa_rtnl(ifa, indev) { switch (event) { case NETDEV_UP: qlcnic_config_ipaddr(adapter, @@ -4138,7 +4139,7 @@ qlcnic_config_indev_addr(struct qlcnic_adapter *adapter, default: break; } - } endfor_ifa(indev); + } in_dev_put(indev); } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index af3b037fa442..5632da05145a 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1066,7 +1066,7 @@ static int qlcnic_sriov_pf_cfg_ip_cmd(struct qlcnic_bc_trans *trans, { struct qlcnic_vf_info *vf = trans->vf; struct qlcnic_adapter *adapter = vf->adapter; - int err = -EIO; + int err; cmd->req.arg[1] |= vf->vp->handle << 16; cmd->req.arg[1] |= BIT_31; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h index 4bf20d0651c4..576501db2a0b 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h @@ -4,6 +4,7 @@ #ifndef _RMNET_MAP_H_ #define _RMNET_MAP_H_ +#include <linux/if_rmnet.h> struct rmnet_map_control_command { u8 command_name; @@ -31,30 +32,6 @@ enum rmnet_map_commands { RMNET_MAP_COMMAND_ENUM_LENGTH }; -struct rmnet_map_header { - u8 pad_len:6; - u8 reserved_bit:1; - u8 cd_bit:1; - u8 mux_id; - __be16 pkt_len; -} __aligned(1); - -struct rmnet_map_dl_csum_trailer { - u8 reserved1; - u8 valid:1; - u8 reserved2:7; - u16 csum_start_offset; - u16 csum_length; - __be16 csum_value; -} __aligned(1); - -struct rmnet_map_ul_csum_header { - __be16 csum_start_offset; - u16 csum_insert_offset:14; - u16 udp_ip4_ind:1; - u16 csum_enabled:1; -} __aligned(1); - #define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \ (Y)->data)->mux_id) #define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \ diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile index 33be8c5ad0c9..d5304bad2372 100644 --- a/drivers/net/ethernet/realtek/Makefile +++ b/drivers/net/ethernet/realtek/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_8139CP) += 8139cp.o obj-$(CONFIG_8139TOO) += 8139too.o obj-$(CONFIG_ATP) += atp.o +r8169-objs += r8169_main.o r8169_firmware.o obj-$(CONFIG_R8169) += r8169.o diff --git a/drivers/net/ethernet/realtek/r8169_firmware.c b/drivers/net/ethernet/realtek/r8169_firmware.c new file mode 100644 index 000000000000..8f54a2c832eb --- /dev/null +++ b/drivers/net/ethernet/realtek/r8169_firmware.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* r8169_firmware.c: RealTek 8169/8168/8101 ethernet driver. + * + * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw> + * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com> + * Copyright (c) a lot of people too. Please respect their work. + * + * See MAINTAINERS file for support contact information. + */ + +#include <linux/delay.h> +#include <linux/firmware.h> + +#include "r8169_firmware.h" + +enum rtl_fw_opcode { + PHY_READ = 0x0, + PHY_DATA_OR = 0x1, + PHY_DATA_AND = 0x2, + PHY_BJMPN = 0x3, + PHY_MDIO_CHG = 0x4, + PHY_CLEAR_READCOUNT = 0x7, + PHY_WRITE = 0x8, + PHY_READCOUNT_EQ_SKIP = 0x9, + PHY_COMP_EQ_SKIPN = 0xa, + PHY_COMP_NEQ_SKIPN = 0xb, + PHY_WRITE_PREVIOUS = 0xc, + PHY_SKIPN = 0xd, + PHY_DELAY_MS = 0xe, +}; + +struct fw_info { + u32 magic; + char version[RTL_VER_SIZE]; + __le32 fw_start; + __le32 fw_len; + u8 chksum; +} __packed; + +#define FW_OPCODE_SIZE sizeof(typeof(*((struct rtl_fw_phy_action *)0)->code)) + +static bool rtl_fw_format_ok(struct rtl_fw *rtl_fw) +{ + const struct firmware *fw = rtl_fw->fw; + struct fw_info *fw_info = (struct fw_info *)fw->data; + struct rtl_fw_phy_action *pa = &rtl_fw->phy_action; + + if (fw->size < FW_OPCODE_SIZE) + return false; + + if (!fw_info->magic) { + size_t i, size, start; + u8 checksum = 0; + + if (fw->size < sizeof(*fw_info)) + return false; + + for (i = 0; i < fw->size; i++) + checksum += fw->data[i]; + if (checksum != 0) + return false; + + start = le32_to_cpu(fw_info->fw_start); + if (start > fw->size) + return false; + + size = le32_to_cpu(fw_info->fw_len); + if (size > (fw->size - start) / FW_OPCODE_SIZE) + return false; + + strscpy(rtl_fw->version, fw_info->version, RTL_VER_SIZE); + + pa->code = (__le32 *)(fw->data + start); + pa->size = size; + } else { + if (fw->size % FW_OPCODE_SIZE) + return false; + + strscpy(rtl_fw->version, rtl_fw->fw_name, RTL_VER_SIZE); + + pa->code = (__le32 *)fw->data; + pa->size = fw->size / FW_OPCODE_SIZE; + } + + return true; +} + +static bool rtl_fw_data_ok(struct rtl_fw *rtl_fw) +{ + struct rtl_fw_phy_action *pa = &rtl_fw->phy_action; + size_t index; + + for (index = 0; index < pa->size; index++) { + u32 action = le32_to_cpu(pa->code[index]); + u32 regno = (action & 0x0fff0000) >> 16; + + switch (action >> 28) { + case PHY_READ: + case PHY_DATA_OR: + case PHY_DATA_AND: + case PHY_MDIO_CHG: + case PHY_CLEAR_READCOUNT: + case PHY_WRITE: + case PHY_WRITE_PREVIOUS: + case PHY_DELAY_MS: + break; + + case PHY_BJMPN: + if (regno > index) + goto out; + break; + case PHY_READCOUNT_EQ_SKIP: + if (index + 2 >= pa->size) + goto out; + break; + case PHY_COMP_EQ_SKIPN: + case PHY_COMP_NEQ_SKIPN: + case PHY_SKIPN: + if (index + 1 + regno >= pa->size) + goto out; + break; + + default: + dev_err(rtl_fw->dev, "Invalid action 0x%08x\n", action); + return false; + } + } + + return true; +out: + dev_err(rtl_fw->dev, "Out of range of firmware\n"); + return false; +} + +void rtl_fw_write_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) +{ + struct rtl_fw_phy_action *pa = &rtl_fw->phy_action; + rtl_fw_write_t fw_write = rtl_fw->phy_write; + rtl_fw_read_t fw_read = rtl_fw->phy_read; + int predata = 0, count = 0; + size_t index; + + for (index = 0; index < pa->size; index++) { + u32 action = le32_to_cpu(pa->code[index]); + u32 data = action & 0x0000ffff; + u32 regno = (action & 0x0fff0000) >> 16; + enum rtl_fw_opcode opcode = action >> 28; + + if (!action) + break; + + switch (opcode) { + case PHY_READ: + predata = fw_read(tp, regno); + count++; + break; + case PHY_DATA_OR: + predata |= data; + break; + case PHY_DATA_AND: + predata &= data; + break; + case PHY_BJMPN: + index -= (regno + 1); + break; + case PHY_MDIO_CHG: + if (data == 0) { + fw_write = rtl_fw->phy_write; + fw_read = rtl_fw->phy_read; + } else if (data == 1) { + fw_write = rtl_fw->mac_mcu_write; + fw_read = rtl_fw->mac_mcu_read; + } + + break; + case PHY_CLEAR_READCOUNT: + count = 0; + break; + case PHY_WRITE: + fw_write(tp, regno, data); + break; + case PHY_READCOUNT_EQ_SKIP: + if (count == data) + index++; + break; + case PHY_COMP_EQ_SKIPN: + if (predata == data) + index += regno; + break; + case PHY_COMP_NEQ_SKIPN: + if (predata != data) + index += regno; + break; + case PHY_WRITE_PREVIOUS: + fw_write(tp, regno, predata); + break; + case PHY_SKIPN: + index += regno; + break; + case PHY_DELAY_MS: + mdelay(data); + break; + } + } +} + +void rtl_fw_release_firmware(struct rtl_fw *rtl_fw) +{ + release_firmware(rtl_fw->fw); +} + +int rtl_fw_request_firmware(struct rtl_fw *rtl_fw) +{ + int rc; + + rc = request_firmware(&rtl_fw->fw, rtl_fw->fw_name, rtl_fw->dev); + if (rc < 0) + goto out; + + if (!rtl_fw_format_ok(rtl_fw) || !rtl_fw_data_ok(rtl_fw)) { + release_firmware(rtl_fw->fw); + rc = -EINVAL; + goto out; + } + + return 0; +out: + dev_err(rtl_fw->dev, "Unable to load firmware %s (%d)\n", + rtl_fw->fw_name, rc); + return rc; +} diff --git a/drivers/net/ethernet/realtek/r8169_firmware.h b/drivers/net/ethernet/realtek/r8169_firmware.h new file mode 100644 index 000000000000..7dc348ed8345 --- /dev/null +++ b/drivers/net/ethernet/realtek/r8169_firmware.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* r8169_firmware.h: RealTek 8169/8168/8101 ethernet driver. + * + * Copyright (c) 2002 ShuChen <shuchen@realtek.com.tw> + * Copyright (c) 2003 - 2007 Francois Romieu <romieu@fr.zoreil.com> + * Copyright (c) a lot of people too. Please respect their work. + * + * See MAINTAINERS file for support contact information. + */ + +#include <linux/device.h> +#include <linux/firmware.h> + +struct rtl8169_private; +typedef void (*rtl_fw_write_t)(struct rtl8169_private *tp, int reg, int val); +typedef int (*rtl_fw_read_t)(struct rtl8169_private *tp, int reg); + +#define RTL_VER_SIZE 32 + +struct rtl_fw { + rtl_fw_write_t phy_write; + rtl_fw_read_t phy_read; + rtl_fw_write_t mac_mcu_write; + rtl_fw_read_t mac_mcu_read; + const struct firmware *fw; + const char *fw_name; + struct device *dev; + + char version[RTL_VER_SIZE]; + + struct rtl_fw_phy_action { + __le32 *code; + size_t size; + } phy_action; +}; + +int rtl_fw_request_firmware(struct rtl_fw *rtl_fw); +void rtl_fw_release_firmware(struct rtl_fw *rtl_fw); +void rtl_fw_write_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169_main.c index d06a61f00e78..efef5453b94f 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -27,12 +27,13 @@ #include <linux/interrupt.h> #include <linux/dma-mapping.h> #include <linux/pm_runtime.h> -#include <linux/firmware.h> #include <linux/prefetch.h> #include <linux/pci-aspm.h> #include <linux/ipv6.h> #include <net/ip6_checksum.h> +#include "r8169_firmware.h" + #define MODULENAME "r8169" #define FIRMWARE_8168D_1 "rtl_nic/rtl8168d-1.fw" @@ -72,6 +73,8 @@ static const int multicast_filter_limit = 32; #define R8169_TX_RING_BYTES (NUM_TX_DESC * sizeof(struct TxDesc)) #define R8169_RX_RING_BYTES (NUM_RX_DESC * sizeof(struct RxDesc)) +#define RTL_CFG_NO_GBIT 1 + /* write/read MMIO register */ #define RTL_W8(tp, reg, val8) writeb((val8), tp->mmio_addr + (reg)) #define RTL_W16(tp, reg, val16) writew((val16), tp->mmio_addr + (reg)) @@ -81,7 +84,7 @@ static const int multicast_filter_limit = 32; #define RTL_R32(tp, reg) readl(tp->mmio_addr + (reg)) enum mac_version { - RTL_GIGA_MAC_VER_01 = 0, + /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */ RTL_GIGA_MAC_VER_02, RTL_GIGA_MAC_VER_03, RTL_GIGA_MAC_VER_04, @@ -132,7 +135,7 @@ enum mac_version { RTL_GIGA_MAC_VER_49, RTL_GIGA_MAC_VER_50, RTL_GIGA_MAC_VER_51, - RTL_GIGA_MAC_NONE = 0xff, + RTL_GIGA_MAC_NONE }; #define JUMBO_1K ETH_DATA_LEN @@ -146,7 +149,6 @@ static const struct { const char *fw_name; } rtl_chip_infos[] = { /* PCI devices. */ - [RTL_GIGA_MAC_VER_01] = {"RTL8169" }, [RTL_GIGA_MAC_VER_02] = {"RTL8169s" }, [RTL_GIGA_MAC_VER_03] = {"RTL8110s" }, [RTL_GIGA_MAC_VER_04] = {"RTL8169sb/8110sb" }, @@ -155,7 +157,7 @@ static const struct { /* PCI-E devices. */ [RTL_GIGA_MAC_VER_07] = {"RTL8102e" }, [RTL_GIGA_MAC_VER_08] = {"RTL8102e" }, - [RTL_GIGA_MAC_VER_09] = {"RTL8102e" }, + [RTL_GIGA_MAC_VER_09] = {"RTL8102e/RTL8103e" }, [RTL_GIGA_MAC_VER_10] = {"RTL8101e" }, [RTL_GIGA_MAC_VER_11] = {"RTL8168b/8111b" }, [RTL_GIGA_MAC_VER_12] = {"RTL8168b/8111b" }, @@ -188,9 +190,9 @@ static const struct { [RTL_GIGA_MAC_VER_39] = {"RTL8106e", FIRMWARE_8106E_1}, [RTL_GIGA_MAC_VER_40] = {"RTL8168g/8111g", FIRMWARE_8168G_2}, [RTL_GIGA_MAC_VER_41] = {"RTL8168g/8111g" }, - [RTL_GIGA_MAC_VER_42] = {"RTL8168g/8111g", FIRMWARE_8168G_3}, - [RTL_GIGA_MAC_VER_43] = {"RTL8106e", FIRMWARE_8106E_2}, - [RTL_GIGA_MAC_VER_44] = {"RTL8411", FIRMWARE_8411_2 }, + [RTL_GIGA_MAC_VER_42] = {"RTL8168gu/8111gu", FIRMWARE_8168G_3}, + [RTL_GIGA_MAC_VER_43] = {"RTL8106eus", FIRMWARE_8106E_2}, + [RTL_GIGA_MAC_VER_44] = {"RTL8411b", FIRMWARE_8411_2 }, [RTL_GIGA_MAC_VER_45] = {"RTL8168h/8111h", FIRMWARE_8168H_1}, [RTL_GIGA_MAC_VER_46] = {"RTL8168h/8111h", FIRMWARE_8168H_2}, [RTL_GIGA_MAC_VER_47] = {"RTL8107e", FIRMWARE_8107E_1}, @@ -200,32 +202,24 @@ static const struct { [RTL_GIGA_MAC_VER_51] = {"RTL8168ep/8111ep" }, }; -enum cfg_version { - RTL_CFG_0 = 0x00, - RTL_CFG_1, - RTL_CFG_2 -}; - static const struct pci_device_id rtl8169_pci_tbl[] = { - { PCI_VDEVICE(REALTEK, 0x2502), RTL_CFG_1 }, - { PCI_VDEVICE(REALTEK, 0x2600), RTL_CFG_1 }, - { PCI_VDEVICE(REALTEK, 0x8129), RTL_CFG_0 }, - { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_2 }, - { PCI_VDEVICE(REALTEK, 0x8161), RTL_CFG_1 }, - { PCI_VDEVICE(REALTEK, 0x8167), RTL_CFG_0 }, - { PCI_VDEVICE(REALTEK, 0x8168), RTL_CFG_1 }, - { PCI_VDEVICE(NCUBE, 0x8168), RTL_CFG_1 }, - { PCI_VDEVICE(REALTEK, 0x8169), RTL_CFG_0 }, + { PCI_VDEVICE(REALTEK, 0x2502) }, + { PCI_VDEVICE(REALTEK, 0x2600) }, + { PCI_VDEVICE(REALTEK, 0x8129) }, + { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT }, + { PCI_VDEVICE(REALTEK, 0x8161) }, + { PCI_VDEVICE(REALTEK, 0x8167) }, + { PCI_VDEVICE(REALTEK, 0x8168) }, + { PCI_VDEVICE(NCUBE, 0x8168) }, + { PCI_VDEVICE(REALTEK, 0x8169) }, { PCI_VENDOR_ID_DLINK, 0x4300, - PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0, RTL_CFG_1 }, - { PCI_VDEVICE(DLINK, 0x4300), RTL_CFG_0 }, - { PCI_VDEVICE(DLINK, 0x4302), RTL_CFG_0 }, - { PCI_VDEVICE(AT, 0xc107), RTL_CFG_0 }, - { PCI_VDEVICE(USR, 0x0116), RTL_CFG_0 }, - { PCI_VENDOR_ID_LINKSYS, 0x1032, - PCI_ANY_ID, 0x0024, 0, 0, RTL_CFG_0 }, - { 0x0001, 0x8168, - PCI_ANY_ID, 0x2410, 0, 0, RTL_CFG_2 }, + PCI_VENDOR_ID_DLINK, 0x4b10, 0, 0 }, + { PCI_VDEVICE(DLINK, 0x4300) }, + { PCI_VDEVICE(DLINK, 0x4302) }, + { PCI_VDEVICE(AT, 0xc107) }, + { PCI_VDEVICE(USR, 0x0116) }, + { PCI_VENDOR_ID_LINKSYS, 0x1032, PCI_ANY_ID, 0x0024 }, + { 0x0001, 0x8168, PCI_ANY_ID, 0x2410 }, {} }; @@ -406,8 +400,6 @@ enum rtl_register_content { RxOK = 0x0001, /* RxStatusDesc */ - RxBOVF = (1 << 24), - RxFOVF = (1 << 23), RxRWT = (1 << 22), RxRES = (1 << 21), RxRUNT = (1 << 20), @@ -492,6 +484,7 @@ enum rtl_register_content { PCIDAC = (1 << 4), PCIMulRW = (1 << 3), #define INTT_MASK GENMASK(1, 0) +#define CPCMD_MASK (Normal_mode | RxVlan | RxChkSum | INTT_MASK) /* rtl8169_PHYstatus */ TBI_Enable = 0x80, @@ -503,9 +496,6 @@ enum rtl_register_content { LinkStatus = 0x02, FullDup = 0x01, - /* _TBICSRBit */ - TBILinkOK = 0x02000000, - /* ResetCounterCommand */ CounterReset = 0x1, @@ -578,7 +568,6 @@ enum rtl_rx_desc_bit { }; #define RsvdMask 0x3fffc000 -#define CPCMD_QUIRK_MASK (Normal_mode | RxVlan | RxChkSum | INTT_MASK) struct TxDesc { __le32 opts1; @@ -639,7 +628,7 @@ struct rtl8169_private { struct phy_device *phydev; struct napi_struct napi; u32 msg_enable; - u16 mac_version; + enum mac_version mac_version; u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */ u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */ u32 dirty_tx; @@ -652,24 +641,9 @@ struct rtl8169_private { void *Rx_databuff[NUM_RX_DESC]; /* Rx data buffers */ struct ring_info tx_skb[NUM_TX_DESC]; /* Tx data buffers */ u16 cp_cmd; - u16 irq_mask; - const struct rtl_coalesce_info *coalesce_info; struct clk *clk; - struct mdio_ops { - void (*write)(struct rtl8169_private *, int, int); - int (*read)(struct rtl8169_private *, int); - } mdio_ops; - - struct jumbo_ops { - void (*enable)(struct rtl8169_private *); - void (*disable)(struct rtl8169_private *); - } jumbo_ops; - - void (*hw_start)(struct rtl8169_private *tp); - bool (*tso_csum)(struct rtl8169_private *, struct sk_buff *, u32 *); - struct { DECLARE_BITMAP(flags, RTL_FLAG_MAX); struct mutex mutex; @@ -678,24 +652,14 @@ struct rtl8169_private { unsigned irq_enabled:1; unsigned supports_gmii:1; + unsigned aspm_manageable:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; struct rtl8169_tc_offsets tc_offset; u32 saved_wolopts; const char *fw_name; - struct rtl_fw { - const struct firmware *fw; - -#define RTL_VER_SIZE 32 - - char version[RTL_VER_SIZE]; - - struct rtl_fw_phy_action { - __le32 *code; - size_t size; - } phy_action; - } *rtl_fw; + struct rtl_fw *rtl_fw; u32 ocp_base; }; @@ -759,6 +723,12 @@ static void rtl_tx_performance_tweak(struct rtl8169_private *tp, u16 force) PCI_EXP_DEVCTL_READRQ, force); } +static bool rtl_is_8168evl_up(struct rtl8169_private *tp) +{ + return tp->mac_version >= RTL_GIGA_MAC_VER_34 && + tp->mac_version != RTL_GIGA_MAC_VER_39; +} + struct rtl_cond { bool (*check)(struct rtl8169_private *); const char *msg; @@ -847,7 +817,7 @@ static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10); } -static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg) +static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg) { if (rtl_ocp_reg_failure(tp, reg)) return 0; @@ -855,7 +825,7 @@ static u16 r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg) RTL_W32(tp, GPHY_OCP, reg << 15); return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ? - (RTL_R32(tp, GPHY_OCP) & 0xffff) : ~0; + (RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT; } static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data) @@ -938,7 +908,7 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg) RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16); value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ? - RTL_R32(tp, PHYAR) & 0xffff : ~0; + RTL_R32(tp, PHYAR) & 0xffff : -ETIMEDOUT; /* * According to hardware specs a 20us delay is required after read @@ -978,7 +948,7 @@ static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg) RTL_W32(tp, EPHY_RXER_NUM, 0); return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ? - RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : ~0; + RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : -ETIMEDOUT; } #define R8168DP_1_MDIO_ACCESS_BIT 0x00020000 @@ -1015,14 +985,38 @@ static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg) return value; } -static void rtl_writephy(struct rtl8169_private *tp, int location, u32 val) +static void rtl_writephy(struct rtl8169_private *tp, int location, int val) { - tp->mdio_ops.write(tp, location, val); + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_27: + r8168dp_1_mdio_write(tp, location, val); + break; + case RTL_GIGA_MAC_VER_28: + case RTL_GIGA_MAC_VER_31: + r8168dp_2_mdio_write(tp, location, val); + break; + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + r8168g_mdio_write(tp, location, val); + break; + default: + r8169_mdio_write(tp, location, val); + break; + } } static int rtl_readphy(struct rtl8169_private *tp, int location) { - return tp->mdio_ops.read(tp, location); + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_27: + return r8168dp_1_mdio_read(tp, location); + case RTL_GIGA_MAC_VER_28: + case RTL_GIGA_MAC_VER_31: + return r8168dp_2_mdio_read(tp, location); + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + return r8168g_mdio_read(tp, location); + default: + return r8169_mdio_read(tp, location); + } } static void rtl_patchphy(struct rtl8169_private *tp, int reg_addr, int value) @@ -1400,9 +1394,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) rtl_unlock_config_regs(tp); - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: + if (rtl_is_8168evl_up(tp)) { tmp = ARRAY_SIZE(cfg) - 1; if (wolopts & WAKE_MAGIC) rtl_eri_set_bits(tp, 0x0dc, ERIAR_MASK_0100, @@ -1410,10 +1402,8 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) else rtl_eri_clear_bits(tp, 0x0dc, ERIAR_MASK_0100, MagicPacket_v2); - break; - default: + } else { tmp = ARRAY_SIZE(cfg); - break; } for (i = 0; i < tmp; i++) { @@ -1424,7 +1414,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) } switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17: + case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_17: options = RTL_R8(tp, Config1) & ~PMEnable; if (wolopts) options |= PMEnable; @@ -1794,18 +1784,16 @@ static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = { static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); - struct ethtool_link_ksettings ecmd; const struct rtl_coalesce_info *ci; - int rc; - rc = phy_ethtool_get_link_ksettings(dev, &ecmd); - if (rc < 0) - return ERR_PTR(rc); + if (tp->mac_version <= RTL_GIGA_MAC_VER_06) + ci = rtl_coalesce_info_8169; + else + ci = rtl_coalesce_info_8168_8136; - for (ci = tp->coalesce_info; ci->speed != 0; ci++) { - if (ecmd.base.speed == ci->speed) { + for (; ci->speed; ci++) { + if (tp->phydev->speed == ci->speed) return ci; - } } return ERR_PTR(-ELNRNG); @@ -1954,9 +1942,7 @@ static int rtl_get_eee_supp(struct rtl8169_private *tp) ret = phy_read_mmd(phydev, MDIO_MMD_PCS, MDIO_PCS_EEE_ABLE); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - phy_write(phydev, 0x1f, 0x0a5c); - ret = phy_read(phydev, 0x12); - phy_write(phydev, 0x1f, 0x0000); + ret = phy_read_paged(phydev, 0x0a5c, 0x12); break; default: ret = -EPROTONOSUPPORT; @@ -1979,9 +1965,7 @@ static int rtl_get_eee_lpadv(struct rtl8169_private *tp) ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_LPABLE); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - phy_write(phydev, 0x1f, 0x0a5d); - ret = phy_read(phydev, 0x11); - phy_write(phydev, 0x1f, 0x0000); + ret = phy_read_paged(phydev, 0x0a5d, 0x11); break; default: ret = -EPROTONOSUPPORT; @@ -2004,9 +1988,7 @@ static int rtl_get_eee_adv(struct rtl8169_private *tp) ret = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - phy_write(phydev, 0x1f, 0x0a5d); - ret = phy_read(phydev, 0x10); - phy_write(phydev, 0x1f, 0x0000); + ret = phy_read_paged(phydev, 0x0a5d, 0x10); break; default: ret = -EPROTONOSUPPORT; @@ -2029,9 +2011,7 @@ static int rtl_set_eee_adv(struct rtl8169_private *tp, int val) ret = phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, val); break; case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - phy_write(phydev, 0x1f, 0x0a5d); - phy_write(phydev, 0x10, val); - phy_write(phydev, 0x1f, 0x0000); + phy_write_paged(phydev, 0x0a5d, 0x10, val); break; default: ret = -EPROTONOSUPPORT; @@ -2252,7 +2232,6 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp) { 0xfc8, 0x100, RTL_GIGA_MAC_VER_04 }, { 0xfc8, 0x040, RTL_GIGA_MAC_VER_03 }, { 0xfc8, 0x008, RTL_GIGA_MAC_VER_02 }, - { 0xfc8, 0x000, RTL_GIGA_MAC_VER_01 }, /* Catch-all */ { 0x000, 0x000, RTL_GIGA_MAC_NONE } @@ -2292,246 +2271,10 @@ static void __rtl_writephy_batch(struct rtl8169_private *tp, #define rtl_writephy_batch(tp, a) __rtl_writephy_batch(tp, a, ARRAY_SIZE(a)) -#define PHY_READ 0x00000000 -#define PHY_DATA_OR 0x10000000 -#define PHY_DATA_AND 0x20000000 -#define PHY_BJMPN 0x30000000 -#define PHY_MDIO_CHG 0x40000000 -#define PHY_CLEAR_READCOUNT 0x70000000 -#define PHY_WRITE 0x80000000 -#define PHY_READCOUNT_EQ_SKIP 0x90000000 -#define PHY_COMP_EQ_SKIPN 0xa0000000 -#define PHY_COMP_NEQ_SKIPN 0xb0000000 -#define PHY_WRITE_PREVIOUS 0xc0000000 -#define PHY_SKIPN 0xd0000000 -#define PHY_DELAY_MS 0xe0000000 - -struct fw_info { - u32 magic; - char version[RTL_VER_SIZE]; - __le32 fw_start; - __le32 fw_len; - u8 chksum; -} __packed; - -#define FW_OPCODE_SIZE sizeof(typeof(*((struct rtl_fw_phy_action *)0)->code)) - -static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) -{ - const struct firmware *fw = rtl_fw->fw; - struct fw_info *fw_info = (struct fw_info *)fw->data; - struct rtl_fw_phy_action *pa = &rtl_fw->phy_action; - char *version = rtl_fw->version; - bool rc = false; - - if (fw->size < FW_OPCODE_SIZE) - goto out; - - if (!fw_info->magic) { - size_t i, size, start; - u8 checksum = 0; - - if (fw->size < sizeof(*fw_info)) - goto out; - - for (i = 0; i < fw->size; i++) - checksum += fw->data[i]; - if (checksum != 0) - goto out; - - start = le32_to_cpu(fw_info->fw_start); - if (start > fw->size) - goto out; - - size = le32_to_cpu(fw_info->fw_len); - if (size > (fw->size - start) / FW_OPCODE_SIZE) - goto out; - - memcpy(version, fw_info->version, RTL_VER_SIZE); - - pa->code = (__le32 *)(fw->data + start); - pa->size = size; - } else { - if (fw->size % FW_OPCODE_SIZE) - goto out; - - strlcpy(version, tp->fw_name, RTL_VER_SIZE); - - pa->code = (__le32 *)fw->data; - pa->size = fw->size / FW_OPCODE_SIZE; - } - version[RTL_VER_SIZE - 1] = 0; - - rc = true; -out: - return rc; -} - -static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev, - struct rtl_fw_phy_action *pa) -{ - bool rc = false; - size_t index; - - for (index = 0; index < pa->size; index++) { - u32 action = le32_to_cpu(pa->code[index]); - u32 regno = (action & 0x0fff0000) >> 16; - - switch(action & 0xf0000000) { - case PHY_READ: - case PHY_DATA_OR: - case PHY_DATA_AND: - case PHY_MDIO_CHG: - case PHY_CLEAR_READCOUNT: - case PHY_WRITE: - case PHY_WRITE_PREVIOUS: - case PHY_DELAY_MS: - break; - - case PHY_BJMPN: - if (regno > index) { - netif_err(tp, ifup, tp->dev, - "Out of range of firmware\n"); - goto out; - } - break; - case PHY_READCOUNT_EQ_SKIP: - if (index + 2 >= pa->size) { - netif_err(tp, ifup, tp->dev, - "Out of range of firmware\n"); - goto out; - } - break; - case PHY_COMP_EQ_SKIPN: - case PHY_COMP_NEQ_SKIPN: - case PHY_SKIPN: - if (index + 1 + regno >= pa->size) { - netif_err(tp, ifup, tp->dev, - "Out of range of firmware\n"); - goto out; - } - break; - - default: - netif_err(tp, ifup, tp->dev, - "Invalid action 0x%08x\n", action); - goto out; - } - } - rc = true; -out: - return rc; -} - -static int rtl_check_firmware(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) -{ - struct net_device *dev = tp->dev; - int rc = -EINVAL; - - if (!rtl_fw_format_ok(tp, rtl_fw)) { - netif_err(tp, ifup, dev, "invalid firmware\n"); - goto out; - } - - if (rtl_fw_data_ok(tp, dev, &rtl_fw->phy_action)) - rc = 0; -out: - return rc; -} - -static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw) -{ - struct rtl_fw_phy_action *pa = &rtl_fw->phy_action; - struct mdio_ops org, *ops = &tp->mdio_ops; - u32 predata, count; - size_t index; - - predata = count = 0; - org.write = ops->write; - org.read = ops->read; - - for (index = 0; index < pa->size; ) { - u32 action = le32_to_cpu(pa->code[index]); - u32 data = action & 0x0000ffff; - u32 regno = (action & 0x0fff0000) >> 16; - - if (!action) - break; - - switch(action & 0xf0000000) { - case PHY_READ: - predata = rtl_readphy(tp, regno); - count++; - index++; - break; - case PHY_DATA_OR: - predata |= data; - index++; - break; - case PHY_DATA_AND: - predata &= data; - index++; - break; - case PHY_BJMPN: - index -= regno; - break; - case PHY_MDIO_CHG: - if (data == 0) { - ops->write = org.write; - ops->read = org.read; - } else if (data == 1) { - ops->write = mac_mcu_write; - ops->read = mac_mcu_read; - } - - index++; - break; - case PHY_CLEAR_READCOUNT: - count = 0; - index++; - break; - case PHY_WRITE: - rtl_writephy(tp, regno, data); - index++; - break; - case PHY_READCOUNT_EQ_SKIP: - index += (count == data) ? 2 : 1; - break; - case PHY_COMP_EQ_SKIPN: - if (predata == data) - index += regno; - index++; - break; - case PHY_COMP_NEQ_SKIPN: - if (predata != data) - index += regno; - index++; - break; - case PHY_WRITE_PREVIOUS: - rtl_writephy(tp, regno, predata); - index++; - break; - case PHY_SKIPN: - index += regno + 1; - break; - case PHY_DELAY_MS: - mdelay(data); - index++; - break; - - default: - BUG(); - } - } - - ops->write = org.write; - ops->read = org.read; -} - static void rtl_release_firmware(struct rtl8169_private *tp) { if (tp->rtl_fw) { - release_firmware(tp->rtl_fw->fw); + rtl_fw_release_firmware(tp->rtl_fw); kfree(tp->rtl_fw); tp->rtl_fw = NULL; } @@ -2539,9 +2282,9 @@ static void rtl_release_firmware(struct rtl8169_private *tp) static void rtl_apply_firmware(struct rtl8169_private *tp) { - /* TODO: release firmware once rtl_phy_write_fw signals failures. */ + /* TODO: release firmware if rtl_fw_write_firmware signals failure. */ if (tp->rtl_fw) - rtl_phy_write_fw(tp, tp->rtl_fw); + rtl_fw_write_firmware(tp, tp->rtl_fw); } static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val) @@ -2578,9 +2321,7 @@ static void rtl8168f_config_eee_phy(struct rtl8169_private *tp) static void rtl8168g_config_eee_phy(struct rtl8169_private *tp) { - phy_write(tp->phydev, 0x1f, 0x0a43); - phy_set_bits(tp->phydev, 0x11, BIT(4)); - phy_write(tp->phydev, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0a43, 0x11, 0, BIT(4)); } static void rtl8169s_hw_phy_config(struct rtl8169_private *tp) @@ -2910,50 +2651,59 @@ static void rtl8168c_4_hw_phy_config(struct rtl8169_private *tp) rtl8168c_3_hw_phy_config(tp); } -static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp) -{ - static const struct phy_reg phy_reg_init_0[] = { - /* Channel Estimation */ - { 0x1f, 0x0001 }, - { 0x06, 0x4064 }, - { 0x07, 0x2863 }, - { 0x08, 0x059c }, - { 0x09, 0x26b4 }, - { 0x0a, 0x6a19 }, - { 0x0b, 0xdcc8 }, - { 0x10, 0xf06d }, - { 0x14, 0x7f68 }, - { 0x18, 0x7fd9 }, - { 0x1c, 0xf0ff }, - { 0x1d, 0x3d9c }, - { 0x1f, 0x0003 }, - { 0x12, 0xf49f }, - { 0x13, 0x070b }, - { 0x1a, 0x05ad }, - { 0x14, 0x94c0 }, +static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = { + /* Channel Estimation */ + { 0x1f, 0x0001 }, + { 0x06, 0x4064 }, + { 0x07, 0x2863 }, + { 0x08, 0x059c }, + { 0x09, 0x26b4 }, + { 0x0a, 0x6a19 }, + { 0x0b, 0xdcc8 }, + { 0x10, 0xf06d }, + { 0x14, 0x7f68 }, + { 0x18, 0x7fd9 }, + { 0x1c, 0xf0ff }, + { 0x1d, 0x3d9c }, + { 0x1f, 0x0003 }, + { 0x12, 0xf49f }, + { 0x13, 0x070b }, + { 0x1a, 0x05ad }, + { 0x14, 0x94c0 }, - /* - * Tx Error Issue - * Enhance line driver power - */ - { 0x1f, 0x0002 }, - { 0x06, 0x5561 }, - { 0x1f, 0x0005 }, - { 0x05, 0x8332 }, - { 0x06, 0x5561 }, + /* + * Tx Error Issue + * Enhance line driver power + */ + { 0x1f, 0x0002 }, + { 0x06, 0x5561 }, + { 0x1f, 0x0005 }, + { 0x05, 0x8332 }, + { 0x06, 0x5561 }, - /* - * Can not link to 1Gbps with bad cable - * Decrease SNR threshold form 21.07dB to 19.04dB - */ - { 0x1f, 0x0001 }, - { 0x17, 0x0cc0 }, + /* + * Can not link to 1Gbps with bad cable + * Decrease SNR threshold form 21.07dB to 19.04dB + */ + { 0x1f, 0x0001 }, + { 0x17, 0x0cc0 }, - { 0x1f, 0x0000 }, - { 0x0d, 0xf880 } - }; + { 0x1f, 0x0000 }, + { 0x0d, 0xf880 } +}; - rtl_writephy_batch(tp, phy_reg_init_0); +static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = { + { 0x1f, 0x0002 }, + { 0x05, 0x669a }, + { 0x1f, 0x0005 }, + { 0x05, 0x8330 }, + { 0x06, 0x669a }, + { 0x1f, 0x0002 } +}; + +static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp) +{ + rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0); /* * Rx Error Issue @@ -2964,17 +2714,9 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp) rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00); if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0002 }, - { 0x05, 0x669a }, - { 0x1f, 0x0005 }, - { 0x05, 0x8330 }, - { 0x06, 0x669a }, - { 0x1f, 0x0002 } - }; int val; - rtl_writephy_batch(tp, phy_reg_init); + rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1); val = rtl_readphy(tp, 0x0d); @@ -3023,62 +2765,12 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp) static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp) { - static const struct phy_reg phy_reg_init_0[] = { - /* Channel Estimation */ - { 0x1f, 0x0001 }, - { 0x06, 0x4064 }, - { 0x07, 0x2863 }, - { 0x08, 0x059c }, - { 0x09, 0x26b4 }, - { 0x0a, 0x6a19 }, - { 0x0b, 0xdcc8 }, - { 0x10, 0xf06d }, - { 0x14, 0x7f68 }, - { 0x18, 0x7fd9 }, - { 0x1c, 0xf0ff }, - { 0x1d, 0x3d9c }, - { 0x1f, 0x0003 }, - { 0x12, 0xf49f }, - { 0x13, 0x070b }, - { 0x1a, 0x05ad }, - { 0x14, 0x94c0 }, - - /* - * Tx Error Issue - * Enhance line driver power - */ - { 0x1f, 0x0002 }, - { 0x06, 0x5561 }, - { 0x1f, 0x0005 }, - { 0x05, 0x8332 }, - { 0x06, 0x5561 }, - - /* - * Can not link to 1Gbps with bad cable - * Decrease SNR threshold form 21.07dB to 19.04dB - */ - { 0x1f, 0x0001 }, - { 0x17, 0x0cc0 }, - - { 0x1f, 0x0000 }, - { 0x0d, 0xf880 } - }; - - rtl_writephy_batch(tp, phy_reg_init_0); + rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0); if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) { - static const struct phy_reg phy_reg_init[] = { - { 0x1f, 0x0002 }, - { 0x05, 0x669a }, - { 0x1f, 0x0005 }, - { 0x05, 0x8330 }, - { 0x06, 0x669a }, - - { 0x1f, 0x0002 } - }; int val; - rtl_writephy_batch(tp, phy_reg_init); + rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1); val = rtl_readphy(tp, 0x0d); if ((val & 0x00ff) != 0x006c) { @@ -3528,20 +3220,15 @@ static void rtl8411_hw_phy_config(struct rtl8169_private *tp) static void rtl8168g_disable_aldps(struct rtl8169_private *tp) { - phy_write(tp->phydev, 0x1f, 0x0a43); - phy_clear_bits(tp->phydev, 0x10, BIT(2)); + phy_modify_paged(tp->phydev, 0x0a43, 0x10, BIT(2), 0); } static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp) { struct phy_device *phydev = tp->phydev; - phy_write(phydev, 0x1f, 0x0bcc); - phy_clear_bits(phydev, 0x14, BIT(8)); - - phy_write(phydev, 0x1f, 0x0a44); - phy_set_bits(phydev, 0x11, BIT(7) | BIT(6)); - + phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0); + phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6)); phy_write(phydev, 0x1f, 0x0a43); phy_write(phydev, 0x13, 0x8084); phy_clear_bits(phydev, 0x14, BIT(14) | BIT(13)); @@ -3552,43 +3239,36 @@ static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp) static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) { + int ret; + rtl_apply_firmware(tp); - rtl_writephy(tp, 0x1f, 0x0a46); - if (rtl_readphy(tp, 0x10) & 0x0100) { - rtl_writephy(tp, 0x1f, 0x0bcc); - rtl_w0w1_phy(tp, 0x12, 0x0000, 0x8000); - } else { - rtl_writephy(tp, 0x1f, 0x0bcc); - rtl_w0w1_phy(tp, 0x12, 0x8000, 0x0000); - } + ret = phy_read_paged(tp->phydev, 0x0a46, 0x10); + if (ret & BIT(8)) + phy_modify_paged(tp->phydev, 0x0bcc, 0x12, BIT(15), 0); + else + phy_modify_paged(tp->phydev, 0x0bcc, 0x12, 0, BIT(15)); - rtl_writephy(tp, 0x1f, 0x0a46); - if (rtl_readphy(tp, 0x13) & 0x0100) { - rtl_writephy(tp, 0x1f, 0x0c41); - rtl_w0w1_phy(tp, 0x15, 0x0002, 0x0000); - } else { - rtl_writephy(tp, 0x1f, 0x0c41); - rtl_w0w1_phy(tp, 0x15, 0x0000, 0x0002); - } + ret = phy_read_paged(tp->phydev, 0x0a46, 0x13); + if (ret & BIT(8)) + phy_modify_paged(tp->phydev, 0x0c41, 0x12, 0, BIT(1)); + else + phy_modify_paged(tp->phydev, 0x0c41, 0x12, BIT(1), 0); /* Enable PHY auto speed down */ - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000); + phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2)); rtl8168g_phy_adjust_10m_aldps(tp); /* EEE auto-fallback function */ - rtl_writephy(tp, 0x1f, 0x0a4b); - rtl_w0w1_phy(tp, 0x11, 0x0004, 0x0000); + phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2)); /* Enable UC LPF tune function */ rtl_writephy(tp, 0x1f, 0x0a43); rtl_writephy(tp, 0x13, 0x8012); rtl_w0w1_phy(tp, 0x14, 0x8000, 0x0000); - rtl_writephy(tp, 0x1f, 0x0c42); - rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000); + phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14)); /* Improve SWR Efficiency */ rtl_writephy(tp, 0x1f, 0x0bcd); @@ -3600,6 +3280,7 @@ static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x14, 0x1065); rtl_writephy(tp, 0x14, 0x9065); rtl_writephy(tp, 0x14, 0x1065); + rtl_writephy(tp, 0x1f, 0x0000); rtl8168g_disable_aldps(tp); rtl8168g_config_eee_phy(tp); @@ -3684,14 +3365,10 @@ static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0000); /* enable GPHY 10M */ - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x0800, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11)); /* SAR ADC performance */ - rtl_writephy(tp, 0x1f, 0x0bca); - rtl_w0w1_phy(tp, 0x17, 0x4000, 0x3000); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14)); rtl_writephy(tp, 0x1f, 0x0a43); rtl_writephy(tp, 0x13, 0x803f); @@ -3711,9 +3388,7 @@ static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0000); /* disable phy pfm mode */ - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0); rtl8168g_disable_aldps(tp); rtl8168g_config_eee_phy(tp); @@ -3743,9 +3418,7 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0000); /* enable GPHY 10M */ - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x0800, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11)); r8168_mac_ocp_write(tp, 0xdd02, 0x807d); data = r8168_mac_ocp_read(tp, 0xdd02); @@ -3781,9 +3454,7 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0000); /* disable phy pfm mode */ - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x0000, 0x0080); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0); rtl8168g_disable_aldps(tp); rtl8168g_config_eee_phy(tp); @@ -3793,16 +3464,12 @@ static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp) static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp) { /* Enable PHY auto speed down */ - rtl_writephy(tp, 0x1f, 0x0a44); - rtl_w0w1_phy(tp, 0x11, 0x000c, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2)); rtl8168g_phy_adjust_10m_aldps(tp); /* Enable EEE auto-fallback function */ - rtl_writephy(tp, 0x1f, 0x0a4b); - rtl_w0w1_phy(tp, 0x11, 0x0004, 0x0000); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2)); /* Enable UC LPF tune function */ rtl_writephy(tp, 0x1f, 0x0a43); @@ -3811,9 +3478,7 @@ static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0000); /* set rg_sel_sdm_rate */ - rtl_writephy(tp, 0x1f, 0x0c42); - rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14)); rtl8168g_disable_aldps(tp); rtl8168g_config_eee_phy(tp); @@ -3831,9 +3496,7 @@ static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp) rtl_writephy(tp, 0x1f, 0x0000); /* Set rg_sel_sdm_rate */ - rtl_writephy(tp, 0x1f, 0x0c42); - rtl_w0w1_phy(tp, 0x11, 0x4000, 0x2000); - rtl_writephy(tp, 0x1f, 0x0000); + phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14)); /* Channel estimation parameters */ rtl_writephy(tp, 0x1f, 0x0a43); @@ -3985,7 +3648,6 @@ static void rtl_hw_phy_config(struct net_device *dev) { static const rtl_generic_fct phy_configs[] = { /* PCI devices. */ - [RTL_GIGA_MAC_VER_01] = NULL, [RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config, [RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config, [RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config, @@ -4050,12 +3712,6 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag) schedule_work(&tp->wk.work); } -static bool rtl_tbi_enabled(struct rtl8169_private *tp) -{ - return (tp->mac_version == RTL_GIGA_MAC_VER_01) && - (RTL_R8(tp, PHYstatus) & TBI_Enable); -} - static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp) { rtl_hw_phy_config(dev); @@ -4124,31 +3780,6 @@ static int rtl8169_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return phy_mii_ioctl(tp->phydev, ifr, cmd); } -static void rtl_init_mdio_ops(struct rtl8169_private *tp) -{ - struct mdio_ops *ops = &tp->mdio_ops; - - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_27: - ops->write = r8168dp_1_mdio_write; - ops->read = r8168dp_1_mdio_read; - break; - case RTL_GIGA_MAC_VER_28: - case RTL_GIGA_MAC_VER_31: - ops->write = r8168dp_2_mdio_write; - ops->read = r8168dp_2_mdio_read; - break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - ops->write = r8168g_mdio_write; - ops->read = r8168g_mdio_read; - break; - default: - ops->write = r8169_mdio_write; - ops->read = r8169_mdio_read; - break; - } -} - static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) { switch (tp->mac_version) { @@ -4168,7 +3799,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp) } } -static void r8168_pll_power_down(struct rtl8169_private *tp) +static void rtl_pll_power_down(struct rtl8169_private *tp) { if (r8168_check_dash(tp)) return; @@ -4203,10 +3834,12 @@ static void r8168_pll_power_down(struct rtl8169_private *tp) rtl_eri_clear_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000); RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80); break; + default: + break; } } -static void r8168_pll_power_up(struct rtl8169_private *tp) +static void rtl_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: @@ -4230,6 +3863,8 @@ static void r8168_pll_power_up(struct rtl8169_private *tp) RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0); rtl_eri_set_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000); break; + default: + break; } phy_resume(tp->phydev); @@ -4237,32 +3872,10 @@ static void r8168_pll_power_up(struct rtl8169_private *tp) msleep(20); } -static void rtl_pll_power_down(struct rtl8169_private *tp) -{ - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: - case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15: - break; - default: - r8168_pll_power_down(tp); - } -} - -static void rtl_pll_power_up(struct rtl8169_private *tp) -{ - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: - case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15: - break; - default: - r8168_pll_power_up(tp); - } -} - static void rtl_init_rxcfg(struct rtl8169_private *tp) { switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: + case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17: RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST); break; @@ -4285,24 +3898,6 @@ static void rtl8169_init_ring_indexes(struct rtl8169_private *tp) tp->dirty_tx = tp->cur_tx = tp->cur_rx = 0; } -static void rtl_hw_jumbo_enable(struct rtl8169_private *tp) -{ - if (tp->jumbo_ops.enable) { - rtl_unlock_config_regs(tp); - tp->jumbo_ops.enable(tp); - rtl_lock_config_regs(tp); - } -} - -static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) -{ - if (tp->jumbo_ops.disable) { - rtl_unlock_config_regs(tp); - tp->jumbo_ops.disable(tp); - rtl_lock_config_regs(tp); - } -} - static void r8168c_hw_jumbo_enable(struct rtl8169_private *tp) { RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0); @@ -4369,55 +3964,56 @@ static void r8168b_1_hw_jumbo_disable(struct rtl8169_private *tp) RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0)); } -static void rtl_init_jumbo_ops(struct rtl8169_private *tp) +static void rtl_hw_jumbo_enable(struct rtl8169_private *tp) { - struct jumbo_ops *ops = &tp->jumbo_ops; - + rtl_unlock_config_regs(tp); switch (tp->mac_version) { case RTL_GIGA_MAC_VER_11: - ops->disable = r8168b_0_hw_jumbo_disable; - ops->enable = r8168b_0_hw_jumbo_enable; + r8168b_0_hw_jumbo_enable(tp); break; case RTL_GIGA_MAC_VER_12: case RTL_GIGA_MAC_VER_17: - ops->disable = r8168b_1_hw_jumbo_disable; - ops->enable = r8168b_1_hw_jumbo_enable; + r8168b_1_hw_jumbo_enable(tp); break; - case RTL_GIGA_MAC_VER_18: /* Wild guess. Needs info from Realtek. */ - case RTL_GIGA_MAC_VER_19: - case RTL_GIGA_MAC_VER_20: - case RTL_GIGA_MAC_VER_21: /* Wild guess. Needs info from Realtek. */ - case RTL_GIGA_MAC_VER_22: - case RTL_GIGA_MAC_VER_23: - case RTL_GIGA_MAC_VER_24: - case RTL_GIGA_MAC_VER_25: - case RTL_GIGA_MAC_VER_26: - ops->disable = r8168c_hw_jumbo_disable; - ops->enable = r8168c_hw_jumbo_enable; + case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26: + r8168c_hw_jumbo_enable(tp); break; - case RTL_GIGA_MAC_VER_27: - case RTL_GIGA_MAC_VER_28: - ops->disable = r8168dp_hw_jumbo_disable; - ops->enable = r8168dp_hw_jumbo_enable; + case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28: + r8168dp_hw_jumbo_enable(tp); break; - case RTL_GIGA_MAC_VER_31: /* Wild guess. Needs info from Realtek. */ - case RTL_GIGA_MAC_VER_32: - case RTL_GIGA_MAC_VER_33: - case RTL_GIGA_MAC_VER_34: - ops->disable = r8168e_hw_jumbo_disable; - ops->enable = r8168e_hw_jumbo_enable; + case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_34: + r8168e_hw_jumbo_enable(tp); break; + default: + break; + } + rtl_lock_config_regs(tp); +} - /* - * No action needed for jumbo frames with 8169. - * No jumbo for 810x at all. - */ - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: +static void rtl_hw_jumbo_disable(struct rtl8169_private *tp) +{ + rtl_unlock_config_regs(tp); + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_11: + r8168b_0_hw_jumbo_disable(tp); + break; + case RTL_GIGA_MAC_VER_12: + case RTL_GIGA_MAC_VER_17: + r8168b_1_hw_jumbo_disable(tp); + break; + case RTL_GIGA_MAC_VER_18 ... RTL_GIGA_MAC_VER_26: + r8168c_hw_jumbo_disable(tp); + break; + case RTL_GIGA_MAC_VER_27 ... RTL_GIGA_MAC_VER_28: + r8168dp_hw_jumbo_disable(tp); + break; + case RTL_GIGA_MAC_VER_31 ... RTL_GIGA_MAC_VER_34: + r8168e_hw_jumbo_disable(tp); + break; default: - ops->disable = NULL; - ops->enable = NULL; break; } + rtl_lock_config_regs(tp); } DECLARE_RTL_COND(rtl_chipcmd_cond) @@ -4435,35 +4031,28 @@ static void rtl_hw_reset(struct rtl8169_private *tp) static void rtl_request_firmware(struct rtl8169_private *tp) { struct rtl_fw *rtl_fw; - int rc = -ENOMEM; /* firmware loaded already or no firmware available */ if (tp->rtl_fw || !tp->fw_name) return; rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL); - if (!rtl_fw) - goto err_warn; - - rc = request_firmware(&rtl_fw->fw, tp->fw_name, tp_to_dev(tp)); - if (rc < 0) - goto err_free; - - rc = rtl_check_firmware(tp, rtl_fw); - if (rc < 0) - goto err_release_firmware; - - tp->rtl_fw = rtl_fw; + if (!rtl_fw) { + netif_warn(tp, ifup, tp->dev, "Unable to load firmware, out of memory\n"); + return; + } - return; + rtl_fw->phy_write = rtl_writephy; + rtl_fw->phy_read = rtl_readphy; + rtl_fw->mac_mcu_write = mac_mcu_write; + rtl_fw->mac_mcu_read = mac_mcu_read; + rtl_fw->fw_name = tp->fw_name; + rtl_fw->dev = tp_to_dev(tp); -err_release_firmware: - release_firmware(rtl_fw->fw); -err_free: - kfree(rtl_fw); -err_warn: - netif_warn(tp, ifup, tp->dev, "unable to load firmware patch %s (%d)\n", - tp->fw_name, rc); + if (rtl_fw_request_firmware(rtl_fw)) + kfree(rtl_fw); + else + tp->rtl_fw = rtl_fw; } static void rtl_rx_close(struct rtl8169_private *tp) @@ -4513,8 +4102,7 @@ static void rtl_set_tx_config_registers(struct rtl8169_private *tp) u32 val = TX_DMA_BURST << TxDMAShift | InterFrameGap << TxInterFrameGapShift; - if (tp->mac_version >= RTL_GIGA_MAC_VER_34 && - tp->mac_version != RTL_GIGA_MAC_VER_39) + if (rtl_is_8168evl_up(tp)) val |= TXCFG_AUTO_FIFO; RTL_W32(tp, TxConfig, val); @@ -4608,53 +4196,6 @@ static void rtl_set_rx_mode(struct net_device *dev) RTL_W32(tp, RxConfig, tmp); } -static void rtl_hw_start(struct rtl8169_private *tp) -{ - rtl_unlock_config_regs(tp); - - tp->hw_start(tp); - - rtl_set_rx_max_size(tp); - rtl_set_rx_tx_desc_registers(tp); - rtl_lock_config_regs(tp); - - /* disable interrupt coalescing */ - RTL_W16(tp, IntrMitigate, 0x0000); - /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ - RTL_R8(tp, IntrMask); - RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); - rtl_init_rxcfg(tp); - rtl_set_tx_config_registers(tp); - - rtl_set_rx_mode(tp->dev); - /* no early-rx interrupts */ - RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); - rtl_irq_enable(tp); -} - -static void rtl_hw_start_8169(struct rtl8169_private *tp) -{ - if (tp->mac_version == RTL_GIGA_MAC_VER_05) - pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08); - - RTL_W8(tp, EarlyTxThres, NoEarlyTx); - - tp->cp_cmd |= PCIMulRW; - - if (tp->mac_version == RTL_GIGA_MAC_VER_02 || - tp->mac_version == RTL_GIGA_MAC_VER_03) { - netif_dbg(tp, drv, tp->dev, - "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n"); - tp->cp_cmd |= (1 << 14); - } - - RTL_W16(tp, CPlusCmd, tp->cp_cmd); - - rtl8169_set_magic_reg(tp, tp->mac_version); - - RTL_W32(tp, RxMissed, 0); -} - DECLARE_RTL_COND(rtl_csiar_cond) { return RTL_R32(tp, CSIAR) & CSIAR_FLAG; @@ -4746,7 +4287,8 @@ static void rtl_pcie_state_l2l3_disable(struct rtl8169_private *tp) static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) { - if (enable) { + /* Don't enable ASPM in the chip if OS can't control ASPM */ + if (enable && tp->aspm_manageable) { RTL_W8(tp, Config5, RTL_R8(tp, Config5) | ASPM_en); RTL_W8(tp, Config2, RTL_R8(tp, Config2) | ClkReqEn); } else { @@ -4779,9 +4321,6 @@ static void rtl_hw_start_8168bb(struct rtl8169_private *tp) { RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Beacon_en); - tp->cp_cmd &= CPCMD_QUIRK_MASK; - RTL_W16(tp, CPlusCmd, tp->cp_cmd); - if (tp->dev->mtu <= ETH_DATA_LEN) { rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B | PCI_EXP_DEVCTL_NOSNOOP_EN); @@ -4792,8 +4331,6 @@ static void rtl_hw_start_8168bef(struct rtl8169_private *tp) { rtl_hw_start_8168bb(tp); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~(1 << 0)); } @@ -4807,9 +4344,6 @@ static void __rtl_hw_start_8168cp(struct rtl8169_private *tp) rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); rtl_disable_clock_request(tp); - - tp->cp_cmd &= CPCMD_QUIRK_MASK; - RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168cp_1(struct rtl8169_private *tp) @@ -4837,9 +4371,6 @@ static void rtl_hw_start_8168cp_2(struct rtl8169_private *tp) if (tp->dev->mtu <= ETH_DATA_LEN) rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - - tp->cp_cmd &= CPCMD_QUIRK_MASK; - RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) @@ -4851,13 +4382,8 @@ static void rtl_hw_start_8168cp_3(struct rtl8169_private *tp) /* Magic. */ RTL_W8(tp, DBG_REG, 0x20); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - if (tp->dev->mtu <= ETH_DATA_LEN) rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - - tp->cp_cmd &= CPCMD_QUIRK_MASK; - RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168c_1(struct rtl8169_private *tp) @@ -4909,13 +4435,8 @@ static void rtl_hw_start_8168d(struct rtl8169_private *tp) rtl_disable_clock_request(tp); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - if (tp->dev->mtu <= ETH_DATA_LEN) rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - - tp->cp_cmd &= CPCMD_QUIRK_MASK; - RTL_W16(tp, CPlusCmd, tp->cp_cmd); } static void rtl_hw_start_8168dp(struct rtl8169_private *tp) @@ -4925,8 +4446,6 @@ static void rtl_hw_start_8168dp(struct rtl8169_private *tp) if (tp->dev->mtu <= ETH_DATA_LEN) rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - rtl_disable_clock_request(tp); } @@ -4942,8 +4461,6 @@ static void rtl_hw_start_8168d_4(struct rtl8169_private *tp) rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - rtl_ephy_init(tp, e_info_8168d_4); rtl_enable_clock_request(tp); @@ -4974,8 +4491,6 @@ static void rtl_hw_start_8168e_1(struct rtl8169_private *tp) if (tp->dev->mtu <= ETH_DATA_LEN) rtl_tx_performance_tweak(tp, PCI_EXP_DEVCTL_READRQ_4096B); - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); - rtl_disable_clock_request(tp); /* Reset tx FIFO pointer */ @@ -5007,8 +4522,6 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp) rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_0001, BIT(4)); rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00); - RTL_W8(tp, MaxTxPacketSize, EarlySize); - rtl_disable_clock_request(tp); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); @@ -5037,8 +4550,6 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp) rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050); rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060); - RTL_W8(tp, MaxTxPacketSize, EarlySize); - rtl_disable_clock_request(tp); RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB); @@ -5095,7 +4606,6 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp) rtl_eri_write(tp, 0x2f8, ERIAR_MASK_0011, 0x1d8f); RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN); - RTL_W8(tp, MaxTxPacketSize, EarlySize); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000); @@ -5193,7 +4703,6 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87); RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN); - RTL_W8(tp, MaxTxPacketSize, EarlySize); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000); @@ -5269,7 +4778,6 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp) rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87); RTL_W32(tp, MISC, RTL_R32(tp, MISC) & ~RXDV_GATED_EN); - RTL_W8(tp, MaxTxPacketSize, EarlySize); rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000); rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000); @@ -5536,33 +5044,70 @@ static void rtl_hw_config(struct rtl8169_private *tp) static void rtl_hw_start_8168(struct rtl8169_private *tp) { - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); + if (tp->mac_version == RTL_GIGA_MAC_VER_13 || + tp->mac_version == RTL_GIGA_MAC_VER_16) + pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_NOSNOOP_EN); - /* Workaround for RxFIFO overflow. */ - if (tp->mac_version == RTL_GIGA_MAC_VER_11) { - tp->irq_mask |= RxFIFOOver; - tp->irq_mask &= ~RxOverflow; - } + if (rtl_is_8168evl_up(tp)) + RTL_W8(tp, MaxTxPacketSize, EarlySize); + else + RTL_W8(tp, MaxTxPacketSize, TxPacketMax); rtl_hw_config(tp); } -static void rtl_hw_start_8101(struct rtl8169_private *tp) +static void rtl_hw_start_8169(struct rtl8169_private *tp) { - if (tp->mac_version >= RTL_GIGA_MAC_VER_30) - tp->irq_mask &= ~RxFIFOOver; + if (tp->mac_version == RTL_GIGA_MAC_VER_05) + pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08); - if (tp->mac_version == RTL_GIGA_MAC_VER_13 || - tp->mac_version == RTL_GIGA_MAC_VER_16) - pcie_capability_set_word(tp->pci_dev, PCI_EXP_DEVCTL, - PCI_EXP_DEVCTL_NOSNOOP_EN); + RTL_W8(tp, EarlyTxThres, NoEarlyTx); + + tp->cp_cmd |= PCIMulRW; - RTL_W8(tp, MaxTxPacketSize, TxPacketMax); + if (tp->mac_version == RTL_GIGA_MAC_VER_02 || + tp->mac_version == RTL_GIGA_MAC_VER_03) { + netif_dbg(tp, drv, tp->dev, + "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n"); + tp->cp_cmd |= (1 << 14); + } - tp->cp_cmd &= CPCMD_QUIRK_MASK; RTL_W16(tp, CPlusCmd, tp->cp_cmd); - rtl_hw_config(tp); + rtl8169_set_magic_reg(tp, tp->mac_version); + + RTL_W32(tp, RxMissed, 0); +} + +static void rtl_hw_start(struct rtl8169_private *tp) +{ + rtl_unlock_config_regs(tp); + + tp->cp_cmd &= CPCMD_MASK; + RTL_W16(tp, CPlusCmd, tp->cp_cmd); + + if (tp->mac_version <= RTL_GIGA_MAC_VER_06) + rtl_hw_start_8169(tp); + else + rtl_hw_start_8168(tp); + + rtl_set_rx_max_size(tp); + rtl_set_rx_tx_desc_registers(tp); + rtl_lock_config_regs(tp); + + /* disable interrupt coalescing */ + RTL_W16(tp, IntrMitigate, 0x0000); + /* Initially a 10 us delay. Turned it into a PCI commit. - FR */ + RTL_R8(tp, IntrMask); + RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb); + rtl_init_rxcfg(tp); + rtl_set_tx_config_registers(tp); + + rtl_set_rx_mode(tp->dev); + /* no early-rx interrupts */ + RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000); + rtl_irq_enable(tp); } static int rtl8169_change_mtu(struct net_device *dev, int new_mtu) @@ -5834,7 +5379,7 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, static void r8169_csum_workaround(struct rtl8169_private *tp, struct sk_buff *skb) { - if (skb_shinfo(skb)->gso_size) { + if (skb_is_gso(skb)) { netdev_features_t features = tp->dev->features; struct sk_buff *segs, *nskb; @@ -5857,11 +5402,8 @@ static void r8169_csum_workaround(struct rtl8169_private *tp, rtl8169_start_xmit(skb, tp->dev); } else { - struct net_device_stats *stats; - drop: - stats = &tp->dev->stats; - stats->tx_dropped++; + tp->dev->stats.tx_dropped++; dev_kfree_skb_any(skb); } } @@ -5889,8 +5431,7 @@ static int msdn_giant_send_check(struct sk_buff *skb) return ret; } -static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp, - struct sk_buff *skb, u32 *opts) +static void rtl8169_tso_csum_v1(struct sk_buff *skb, u32 *opts) { u32 mss = skb_shinfo(skb)->gso_size; @@ -5907,8 +5448,6 @@ static bool rtl8169_tso_csum_v1(struct rtl8169_private *tp, else WARN_ON_ONCE(1); } - - return true; } static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp, @@ -5998,6 +5537,18 @@ static bool rtl_tx_slots_avail(struct rtl8169_private *tp, return slots_avail > nr_frags; } +/* Versions RTL8102e and from RTL8168c onwards support csum_v2 */ +static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp) +{ + switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: + case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17: + return false; + default: + return true; + } +} + static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -6017,12 +5568,16 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, if (unlikely(le32_to_cpu(txd->opts1) & DescOwn)) goto err_stop_0; - opts[1] = cpu_to_le32(rtl8169_tx_vlan_tag(skb)); + opts[1] = rtl8169_tx_vlan_tag(skb); opts[0] = DescOwn; - if (!tp->tso_csum(tp, skb, opts)) { - r8169_csum_workaround(tp, skb); - return NETDEV_TX_OK; + if (rtl_chip_supports_csum_v2(tp)) { + if (!rtl8169_tso_csum_v2(tp, skb, opts)) { + r8169_csum_workaround(tp, skb); + return NETDEV_TX_OK; + } + } else { + rtl8169_tso_csum_v1(skb, opts); } len = skb_headlen(skb); @@ -6229,7 +5784,6 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data, skb = napi_alloc_skb(&tp->napi, pkt_size); if (skb) skb_copy_to_linear_data(skb, data, pkt_size); - dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE); return skb; } @@ -6264,14 +5818,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget dev->stats.rx_length_errors++; if (status & RxCRC) dev->stats.rx_crc_errors++; - /* RxFOVF is a reserved bit on later chip versions */ - if (tp->mac_version == RTL_GIGA_MAC_VER_01 && - status & RxFOVF) { - rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); - dev->stats.rx_fifo_errors++; - } else if (status & (RxRUNT | RxCRC) && - !(status & RxRWT) && - dev->features & NETIF_F_RXALL) { + if (status & (RxRUNT | RxCRC) && !(status & RxRWT) && + dev->features & NETIF_F_RXALL) { goto process_pkt; } } else { @@ -6451,7 +5999,10 @@ static int r8169_phy_connect(struct rtl8169_private *tp) if (ret) return ret; - if (!tp->supports_gmii) + if (tp->supports_gmii) + phy_remove_link_mode(phydev, + ETHTOOL_LINK_MODE_1000baseT_Half_BIT); + else phy_set_max_speed(phydev, SPEED_100); phy_support_asym_pause(phydev); @@ -6884,30 +6435,18 @@ static const struct net_device_ops rtl_netdev_ops = { }; -static const struct rtl_cfg_info { - void (*hw_start)(struct rtl8169_private *tp); - u16 irq_mask; - unsigned int has_gmii:1; - const struct rtl_coalesce_info *coalesce_info; -} rtl_cfg_infos [] = { - [RTL_CFG_0] = { - .hw_start = rtl_hw_start_8169, - .irq_mask = SYSErr | LinkChg | RxOverflow | RxFIFOOver, - .has_gmii = 1, - .coalesce_info = rtl_coalesce_info_8169, - }, - [RTL_CFG_1] = { - .hw_start = rtl_hw_start_8168, - .irq_mask = LinkChg | RxOverflow, - .has_gmii = 1, - .coalesce_info = rtl_coalesce_info_8168_8136, - }, - [RTL_CFG_2] = { - .hw_start = rtl_hw_start_8101, - .irq_mask = LinkChg | RxOverflow | RxFIFOOver, - .coalesce_info = rtl_coalesce_info_8168_8136, - } -}; +static void rtl_set_irq_mask(struct rtl8169_private *tp) +{ + tp->irq_mask = RTL_EVENT_NAPI | LinkChg; + + if (tp->mac_version <= RTL_GIGA_MAC_VER_06) + tp->irq_mask |= SYSErr | RxOverflow | RxFIFOOver; + else if (tp->mac_version == RTL_GIGA_MAC_VER_11) + /* special workaround needed */ + tp->irq_mask |= RxFIFOOver; + else + tp->irq_mask |= RxOverflow; +} static int rtl_alloc_irq(struct rtl8169_private *tp) { @@ -6928,13 +6467,10 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) static void rtl_read_mac_address(struct rtl8169_private *tp, u8 mac_addr[ETH_ALEN]) { - u32 value; - /* Get MAC address */ - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_35 ... RTL_GIGA_MAC_VER_38: - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_51: - value = rtl_eri_read(tp, 0xe0); + if (rtl_is_8168evl_up(tp) && tp->mac_version != RTL_GIGA_MAC_VER_34) { + u32 value = rtl_eri_read(tp, 0xe0); + mac_addr[0] = (value >> 0) & 0xff; mac_addr[1] = (value >> 8) & 0xff; mac_addr[2] = (value >> 16) & 0xff; @@ -6943,9 +6479,6 @@ static void rtl_read_mac_address(struct rtl8169_private *tp, value = rtl_eri_read(tp, 0xe4); mac_addr[4] = (value >> 0) & 0xff; mac_addr[5] = (value >> 8) & 0xff; - break; - default: - break; } } @@ -7046,42 +6579,23 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp) data |= (1 << 15); r8168_mac_ocp_write(tp, 0xe8de, data); - if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42)) - return; -} - -static void rtl_hw_init_8168ep(struct rtl8169_private *tp) -{ - rtl8168ep_stop_cmac(tp); - rtl_hw_init_8168g(tp); + rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42); } static void rtl_hw_initialize(struct rtl8169_private *tp) { switch (tp->mac_version) { + case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51: + rtl8168ep_stop_cmac(tp); + /* fall through */ case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48: rtl_hw_init_8168g(tp); break; - case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51: - rtl_hw_init_8168ep(tp); - break; default: break; } } -/* Versions RTL8102e and from RTL8168c onwards support csum_v2 */ -static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp) -{ - switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: - case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17: - return false; - default: - return true; - } -} - static int rtl_jumbo_max(struct rtl8169_private *tp) { /* Non-GBit versions don't support jumbo frames */ @@ -7090,7 +6604,7 @@ static int rtl_jumbo_max(struct rtl8169_private *tp) switch (tp->mac_version) { /* RTL8169 */ - case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06: + case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: return JUMBO_7K; /* RTL8168b */ case RTL_GIGA_MAC_VER_11: @@ -7136,14 +6650,36 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp) return rc; } +static void rtl_init_mac_address(struct rtl8169_private *tp) +{ + struct net_device *dev = tp->dev; + u8 *mac_addr = dev->dev_addr; + int rc, i; + + rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr); + if (!rc) + goto done; + + rtl_read_mac_address(tp, mac_addr); + if (is_valid_ether_addr(mac_addr)) + goto done; + + for (i = 0; i < ETH_ALEN; i++) + mac_addr[i] = RTL_R8(tp, MAC0 + i); + if (is_valid_ether_addr(mac_addr)) + goto done; + + eth_hw_addr_random(dev); + dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n"); +done: + rtl_rar_set(tp, mac_addr); +} + static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { - const struct rtl_cfg_info *cfg = rtl_cfg_infos + ent->driver_data; - /* align to u16 for is_valid_ether_addr() */ - u8 mac_addr[ETH_ALEN] __aligned(2) = {}; struct rtl8169_private *tp; struct net_device *dev; - int chipset, region, i; + int chipset, region; int jumbo_max, rc; dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp)); @@ -7156,7 +6692,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) tp->dev = dev; tp->pci_dev = pdev; tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT); - tp->supports_gmii = cfg->has_gmii; + tp->supports_gmii = ent->driver_data == RTL_CFG_NO_GBIT ? 0 : 1; /* Get the *optional* external "ether_clk" used on some boards */ rc = rtl_get_ether_clk(tp); @@ -7166,7 +6702,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Disable ASPM completely as that cause random device stop working * problems as well as full system hangs for some PCIe devices users. */ - pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1); + rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | + PCIE_LINK_STATE_L1); + tp->aspm_manageable = !rc; /* enable device (incl. PCI PM wakeup and hotplug setup) */ rc = pcim_enable_device(pdev); @@ -7204,23 +6742,11 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (tp->mac_version == RTL_GIGA_MAC_NONE) return -ENODEV; - if (rtl_tbi_enabled(tp)) { - dev_err(&pdev->dev, "TBI fiber mode not supported\n"); - return -ENODEV; - } - tp->cp_cmd = RTL_R16(tp, CPlusCmd); if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 && - !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) { + !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) dev->features |= NETIF_F_HIGHDMA; - } else { - rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); - if (rc < 0) { - dev_err(&pdev->dev, "DMA configuration failed\n"); - return rc; - } - } rtl_init_rxcfg(tp); @@ -7232,9 +6758,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); - rtl_init_mdio_ops(tp); - rtl_init_jumbo_ops(tp); - chipset = tp->mac_version; rc = rtl_alloc_irq(tp); @@ -7248,16 +6771,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) u64_stats_init(&tp->rx_stats.syncp); u64_stats_init(&tp->tx_stats.syncp); - /* get MAC address */ - rc = eth_platform_get_mac_address(&pdev->dev, mac_addr); - if (rc) - rtl_read_mac_address(tp, mac_addr); - - if (is_valid_ether_addr(mac_addr)) - rtl_rar_set(tp, mac_addr); - - for (i = 0; i < ETH_ALEN; i++) - dev->dev_addr[i] = RTL_R8(tp, MAC0 + i); + rtl_init_mac_address(tp); dev->ethtool_ops = &rtl8169_ethtool_ops; @@ -7285,12 +6799,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Disallow toggling */ dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX; - if (rtl_chip_supports_csum_v2(tp)) { - tp->tso_csum = rtl8169_tso_csum_v2; + if (rtl_chip_supports_csum_v2(tp)) dev->hw_features |= NETIF_F_IPV6_CSUM | NETIF_F_TSO6; - } else { - tp->tso_csum = rtl8169_tso_csum_v1; - } dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; @@ -7300,9 +6810,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) jumbo_max = rtl_jumbo_max(tp); dev->max_mtu = jumbo_max; - tp->hw_start = cfg->hw_start; - tp->irq_mask = RTL_EVENT_NAPI | cfg->irq_mask; - tp->coalesce_info = cfg->coalesce_info; + rtl_set_irq_mask(tp); tp->fw_name = rtl_chip_infos[chipset].fw_name; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 3e5bc1fc3c46..079f459c73a5 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2210,6 +2210,10 @@ static int rocker_router_fib_event(struct notifier_block *nb, NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported"); return notifier_from_errno(-EINVAL); } + if (fen_info->fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } } memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index bdfa6a19d620..7072b249c8bd 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -18,6 +18,7 @@ #include <net/neighbour.h> #include <net/switchdev.h> #include <net/ip_fib.h> +#include <net/nexthop.h> #include <net/arp.h> #include "rocker.h" @@ -2282,8 +2283,8 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, __be32 dst, /* XXX support ECMP */ - nh = fi->fib_nh; - nh_on_port = (fi->fib_dev == ofdpa_port->dev); + nh = fib_info_nh(fi, 0); + nh_on_port = (nh->fib_nh_dev == ofdpa_port->dev); has_gw = !!nh->fib_nh_gw4; if (has_gw && nh_on_port) { @@ -2733,11 +2734,13 @@ static int ofdpa_fib4_add(struct rocker *rocker, { struct ofdpa *ofdpa = rocker->wpriv; struct ofdpa_port *ofdpa_port; + struct fib_nh *nh; int err; if (ofdpa->fib_aborted) return 0; - ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + nh = fib_info_nh(fen_info->fi, 0); + ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker); if (!ofdpa_port) return 0; err = ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst), @@ -2745,7 +2748,7 @@ static int ofdpa_fib4_add(struct rocker *rocker, fen_info->tb_id, 0); if (err) return err; - fen_info->fi->fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; + nh->fib_nh_flags |= RTNH_F_OFFLOAD; return 0; } @@ -2754,13 +2757,15 @@ static int ofdpa_fib4_del(struct rocker *rocker, { struct ofdpa *ofdpa = rocker->wpriv; struct ofdpa_port *ofdpa_port; + struct fib_nh *nh; if (ofdpa->fib_aborted) return 0; - ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + nh = fib_info_nh(fen_info->fi, 0); + ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker); if (!ofdpa_port) return 0; - fen_info->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst), fen_info->dst_len, fen_info->fi, fen_info->tb_id, OFDPA_OP_FLAG_REMOVE); @@ -2780,14 +2785,16 @@ static void ofdpa_fib4_abort(struct rocker *rocker) spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags); hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) { + struct fib_nh *nh; + if (flow_entry->key.tbl_id != ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) continue; - ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev, - rocker); + nh = fib_info_nh(flow_entry->fi, 0); + ofdpa_port = ofdpa_port_dev_lower_find(nh->fib_nh_dev, rocker); if (!ofdpa_port) continue; - flow_entry->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; + nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE, flow_entry); } diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 53b726bfe945..ab58b837df47 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3614,11 +3614,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev, netif_warn(efx, probe, efx->net_dev, "failed to create MTDs (%d)\n", rc); - rc = pci_enable_pcie_error_reporting(pci_dev); - if (rc && rc != -EINVAL) - netif_notice(efx, probe, efx->net_dev, - "PCIE error reporting unavailable (%d).\n", - rc); + (void)pci_enable_pcie_error_reporting(pci_dev); if (efx->type->udp_tnl_push_ports) efx->type->udp_tnl_push_ports(efx); diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 9b036c857b1d..aba6eea72f15 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -360,7 +360,7 @@ static int sis635_get_mac_addr(struct pci_dev *pci_dev, * SiS962 or SiS963 model, use EEPROM to store MAC address. And EEPROM * is shared by * LAN and 1394. When access EEPROM, send EEREQ signal to hardware first - * and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be access + * and wait for EEGNT. If EEGNT is ON, EEPROM is permitted to be accessed * by LAN, otherwise is not. After MAC address is read from EEPROM, send * EEDONE signal to refuse EEPROM access by LAN. * The EEPROM map of SiS962 or SiS963 is different to SiS900. @@ -882,7 +882,7 @@ static void mdio_reset(struct sis900_private *sp) * mdio_read - read MII PHY register * @net_dev: the net device to read * @phy_id: the phy address to read - * @location: the phy regiester id to read + * @location: the phy register id to read * * Read MII registers through MDIO and MDC * using MDIO management frame structure and protocol(defined by ISO/IEC). @@ -926,7 +926,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location) * mdio_write - write MII PHY register * @net_dev: the net device to write * @phy_id: the phy address to write - * @location: the phy regiester id to write + * @location: the phy register id to write * @value: the register value to write with * * Write MII registers with @value through MDIO and MDC @@ -1057,7 +1057,7 @@ sis900_open(struct net_device *net_dev) sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC); sw32(cr, RxENA | sr32(cr)); sw32(ier, IE); @@ -1101,7 +1101,7 @@ sis900_init_rxfilter (struct net_device * net_dev) sw32(rfdr, w); if (netif_msg_hw(sis_priv)) { - printk(KERN_DEBUG "%s: Receive Filter Addrss[%d]=%x\n", + printk(KERN_DEBUG "%s: Receive Filter Address[%d]=%x\n", net_dev->name, i, sr32(rfdr)); } } @@ -1148,7 +1148,7 @@ sis900_init_tx_ring(struct net_device *net_dev) * @net_dev: the net device to initialize for * * Initialize the Rx descriptor ring, - * and pre-allocate recevie buffers (socket buffer) + * and pre-allocate receive buffers (socket buffer) */ static void @@ -1578,7 +1578,7 @@ static void sis900_tx_timeout(struct net_device *net_dev) sw32(txdp, sis_priv->tx_ring_dma); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC); } /** @@ -1674,8 +1674,8 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance) do { status = sr32(isr); - if ((status & (HIBERR|TxURN|TxERR|TxIDLE|TxDESC|RxORN|RxERR|RxOK)) == 0) - /* nothing intresting happened */ + if ((status & (HIBERR|TxURN|TxERR|TxDESC|RxORN|RxERR|RxOK)) == 0) + /* nothing interesting happened */ break; handled = 1; @@ -1684,7 +1684,7 @@ static irqreturn_t sis900_interrupt(int irq, void *dev_instance) /* Rx interrupt */ sis900_rx(net_dev); - if (status & (TxURN | TxERR | TxIDLE | TxDESC)) + if (status & (TxURN | TxERR | TxDESC)) /* Tx interrupt */ sis900_finish_xmit(net_dev); @@ -1897,7 +1897,7 @@ static void sis900_finish_xmit (struct net_device *net_dev) if (tx_status & OWN) { /* The packet is not transmitted yet (owned by hardware) ! * Note: this is an almost impossible condition - * in case of TxDESC ('descriptor interrupt') */ + * on TxDESC interrupt ('descriptor interrupt') */ break; } @@ -2473,7 +2473,7 @@ static int sis900_resume(struct pci_dev *pci_dev) sis900_set_mode(sis_priv, HW_SPEED_10_MBPS, FDX_CAPABLE_HALF_SELECTED); /* Enable all known interrupts by setting the interrupt mask. */ - sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxIDLE | TxDESC); + sw32(imr, RxSOVR | RxORN | RxERR | RxOK | TxURN | TxERR | TxDESC); sw32(cr, RxENA | sr32(cr)); sw32(ier, IE); diff --git a/drivers/net/ethernet/socionext/Kconfig b/drivers/net/ethernet/socionext/Kconfig index 25f18be27423..95e99baf3f45 100644 --- a/drivers/net/ethernet/socionext/Kconfig +++ b/drivers/net/ethernet/socionext/Kconfig @@ -26,6 +26,7 @@ config SNI_NETSEC tristate "Socionext NETSEC ethernet support" depends on (ARCH_SYNQUACER || COMPILE_TEST) && OF select PHYLIB + select PAGE_POOL select MII ---help--- Enable to add support for the SocioNext NetSec Gigabit Ethernet diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index cba5881b2746..1502fe8b0456 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -9,8 +9,12 @@ #include <linux/etherdevice.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/netlink.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <net/tcp.h> +#include <net/page_pool.h> #include <net/ip6_checksum.h> #define NETSEC_REG_SOFT_RST 0x104 @@ -235,22 +239,41 @@ #define DESC_NUM 256 #define NETSEC_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#define NETSEC_RX_BUF_SZ 1536 +#define NETSEC_RXBUF_HEADROOM (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \ + NET_IP_ALIGN) +#define NETSEC_RX_BUF_NON_DATA (NETSEC_RXBUF_HEADROOM + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define DESC_SZ sizeof(struct netsec_de) #define NETSEC_F_NETSEC_VER_MAJOR_NUM(x) ((x) & 0xffff0000) +#define NETSEC_XDP_PASS 0 +#define NETSEC_XDP_CONSUMED BIT(0) +#define NETSEC_XDP_TX BIT(1) +#define NETSEC_XDP_REDIR BIT(2) +#define NETSEC_XDP_RX_OK (NETSEC_XDP_PASS | NETSEC_XDP_TX | NETSEC_XDP_REDIR) + enum ring_id { NETSEC_RING_TX = 0, NETSEC_RING_RX }; +enum buf_type { + TYPE_NETSEC_SKB = 0, + TYPE_NETSEC_XDP_TX, + TYPE_NETSEC_XDP_NDO, +}; + struct netsec_desc { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct xdp_frame *xdpf; + }; dma_addr_t dma_addr; void *addr; u16 len; + u8 buf_type; }; struct netsec_desc_ring { @@ -258,11 +281,17 @@ struct netsec_desc_ring { struct netsec_desc *desc; void *vaddr; u16 head, tail; + u16 xdp_xmit; /* netsec_xdp_xmit packets */ + bool is_xdp; + struct page_pool *page_pool; + struct xdp_rxq_info xdp_rxq; + spinlock_t lock; /* XDP tx queue locking */ }; struct netsec_priv { struct netsec_desc_ring desc_ring[NETSEC_RING_MAX]; struct ethtool_coalesce et_coalesce; + struct bpf_prog *xdp_prog; spinlock_t reglock; /* protect reg access */ struct napi_struct napi; phy_interface_t phy_interface; @@ -600,12 +629,14 @@ static void netsec_set_rx_de(struct netsec_priv *priv, static bool netsec_clean_tx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; - unsigned int pkts, bytes; struct netsec_de *entry; int tail = dring->tail; + unsigned int bytes; int cnt = 0; - pkts = 0; + if (dring->is_xdp) + spin_lock(&dring->lock); + bytes = 0; entry = dring->vaddr + DESC_SZ * tail; @@ -618,13 +649,23 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv) eop = (entry->attr >> NETSEC_TX_LAST) & 1; dma_rmb(); - dma_unmap_single(priv->dev, desc->dma_addr, desc->len, - DMA_TO_DEVICE); - if (eop) { - pkts++; + /* if buf_type is either TYPE_NETSEC_SKB or + * TYPE_NETSEC_XDP_NDO we mapped it + */ + if (desc->buf_type != TYPE_NETSEC_XDP_TX) + dma_unmap_single(priv->dev, desc->dma_addr, desc->len, + DMA_TO_DEVICE); + + if (!eop) + goto next; + + if (desc->buf_type == TYPE_NETSEC_SKB) { bytes += desc->skb->len; dev_kfree_skb(desc->skb); + } else { + xdp_return_frame(desc->xdpf); } +next: /* clean up so netsec_uninit_pkt_dring() won't free the skb * again */ @@ -641,6 +682,8 @@ static bool netsec_clean_tx_dring(struct netsec_priv *priv) entry = dring->vaddr + DESC_SZ * tail; cnt++; } + if (dring->is_xdp) + spin_unlock(&dring->lock); if (!cnt) return false; @@ -673,33 +716,31 @@ static void netsec_process_tx(struct netsec_priv *priv) } static void *netsec_alloc_rx_data(struct netsec_priv *priv, - dma_addr_t *dma_handle, u16 *desc_len, - bool napi) + dma_addr_t *dma_handle, u16 *desc_len) + { - size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - size_t payload_len = NETSEC_RX_BUF_SZ; - dma_addr_t mapping; - void *buf; - total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD); + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; + enum dma_data_direction dma_dir; + struct page *page; - buf = napi ? napi_alloc_frag(total_len) : netdev_alloc_frag(total_len); - if (!buf) + page = page_pool_dev_alloc_pages(dring->page_pool); + if (!page) return NULL; - mapping = dma_map_single(priv->dev, buf + NETSEC_SKB_PAD, payload_len, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(priv->dev, mapping))) - goto err_out; - - *dma_handle = mapping; - *desc_len = payload_len; - - return buf; + /* We allocate the same buffer length for XDP and non-XDP cases. + * page_pool API will map the whole page, skip what's needed for + * network payloads and/or XDP + */ + *dma_handle = page_pool_get_dma_addr(page) + NETSEC_RXBUF_HEADROOM; + /* Make sure the incoming payload fits in the page for XDP and non-XDP + * cases and reserve enough space for headroom + skb_shared_info + */ + *desc_len = PAGE_SIZE - NETSEC_RX_BUF_NON_DATA; + dma_dir = page_pool_get_dma_dir(dring->page_pool); + dma_sync_single_for_device(priv->dev, *dma_handle, *desc_len, dma_dir); -err_out: - skb_free_frag(buf); - return NULL; + return page_address(page); } static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num) @@ -716,22 +757,201 @@ static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num) } } +static void netsec_xdp_ring_tx_db(struct netsec_priv *priv, u16 pkts) +{ + if (likely(pkts)) + netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, pkts); +} + +static void netsec_finalize_xdp_rx(struct netsec_priv *priv, u32 xdp_res, + u16 pkts) +{ + if (xdp_res & NETSEC_XDP_REDIR) + xdp_do_flush_map(); + + if (xdp_res & NETSEC_XDP_TX) + netsec_xdp_ring_tx_db(priv, pkts); +} + +static void netsec_set_tx_de(struct netsec_priv *priv, + struct netsec_desc_ring *dring, + const struct netsec_tx_pkt_ctrl *tx_ctrl, + const struct netsec_desc *desc, void *buf) +{ + int idx = dring->head; + struct netsec_de *de; + u32 attr; + + de = dring->vaddr + (DESC_SZ * idx); + + attr = (1 << NETSEC_TX_SHIFT_OWN_FIELD) | + (1 << NETSEC_TX_SHIFT_PT_FIELD) | + (NETSEC_RING_GMAC << NETSEC_TX_SHIFT_TDRID_FIELD) | + (1 << NETSEC_TX_SHIFT_FS_FIELD) | + (1 << NETSEC_TX_LAST) | + (tx_ctrl->cksum_offload_flag << NETSEC_TX_SHIFT_CO) | + (tx_ctrl->tcp_seg_offload_flag << NETSEC_TX_SHIFT_SO) | + (1 << NETSEC_TX_SHIFT_TRS_FIELD); + if (idx == DESC_NUM - 1) + attr |= (1 << NETSEC_TX_SHIFT_LD_FIELD); + + de->data_buf_addr_up = upper_32_bits(desc->dma_addr); + de->data_buf_addr_lw = lower_32_bits(desc->dma_addr); + de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len; + de->attr = attr; + /* under spin_lock if using XDP */ + if (!dring->is_xdp) + dma_wmb(); + + dring->desc[idx] = *desc; + if (desc->buf_type == TYPE_NETSEC_SKB) + dring->desc[idx].skb = buf; + else if (desc->buf_type == TYPE_NETSEC_XDP_TX || + desc->buf_type == TYPE_NETSEC_XDP_NDO) + dring->desc[idx].xdpf = buf; + + /* move head ahead */ + dring->head = (dring->head + 1) % DESC_NUM; +} + +/* The current driver only supports 1 Txq, this should run under spin_lock() */ +static u32 netsec_xdp_queue_one(struct netsec_priv *priv, + struct xdp_frame *xdpf, bool is_ndo) + +{ + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX]; + struct page *page = virt_to_page(xdpf->data); + struct netsec_tx_pkt_ctrl tx_ctrl = {}; + struct netsec_desc tx_desc; + dma_addr_t dma_handle; + u16 filled; + + if (tx_ring->head >= tx_ring->tail) + filled = tx_ring->head - tx_ring->tail; + else + filled = tx_ring->head + DESC_NUM - tx_ring->tail; + + if (DESC_NUM - filled <= 1) + return NETSEC_XDP_CONSUMED; + + if (is_ndo) { + /* this is for ndo_xdp_xmit, the buffer needs mapping before + * sending + */ + dma_handle = dma_map_single(priv->dev, xdpf->data, xdpf->len, + DMA_TO_DEVICE); + if (dma_mapping_error(priv->dev, dma_handle)) + return NETSEC_XDP_CONSUMED; + tx_desc.buf_type = TYPE_NETSEC_XDP_NDO; + } else { + /* This is the device Rx buffer from page_pool. No need to remap + * just sync and send it + */ + struct netsec_desc_ring *rx_ring = + &priv->desc_ring[NETSEC_RING_RX]; + enum dma_data_direction dma_dir = + page_pool_get_dma_dir(rx_ring->page_pool); + + dma_handle = page_pool_get_dma_addr(page) + + NETSEC_RXBUF_HEADROOM; + dma_sync_single_for_device(priv->dev, dma_handle, xdpf->len, + dma_dir); + tx_desc.buf_type = TYPE_NETSEC_XDP_TX; + } + + tx_desc.dma_addr = dma_handle; + tx_desc.addr = xdpf->data; + tx_desc.len = xdpf->len; + + netsec_set_tx_de(priv, tx_ring, &tx_ctrl, &tx_desc, xdpf); + + return NETSEC_XDP_TX; +} + +static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp) +{ + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX]; + struct xdp_frame *xdpf = convert_to_xdp_frame(xdp); + u32 ret; + + if (unlikely(!xdpf)) + return NETSEC_XDP_CONSUMED; + + spin_lock(&tx_ring->lock); + ret = netsec_xdp_queue_one(priv, xdpf, false); + spin_unlock(&tx_ring->lock); + + return ret; +} + +static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + u32 ret = NETSEC_XDP_PASS; + int err; + u32 act; + + act = bpf_prog_run_xdp(prog, xdp); + + switch (act) { + case XDP_PASS: + ret = NETSEC_XDP_PASS; + break; + case XDP_TX: + ret = netsec_xdp_xmit_back(priv, xdp); + if (ret != NETSEC_XDP_TX) + xdp_return_buff(xdp); + break; + case XDP_REDIRECT: + err = xdp_do_redirect(priv->ndev, xdp, prog); + if (!err) { + ret = NETSEC_XDP_REDIR; + } else { + ret = NETSEC_XDP_CONSUMED; + xdp_return_buff(xdp); + } + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(priv->ndev, prog, act); + /* fall through -- handle aborts by dropping packet */ + case XDP_DROP: + ret = NETSEC_XDP_CONSUMED; + xdp_return_buff(xdp); + break; + } + + return ret; +} + static int netsec_process_rx(struct netsec_priv *priv, int budget) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; struct net_device *ndev = priv->ndev; struct netsec_rx_pkt_info rx_info; - struct sk_buff *skb; + enum dma_data_direction dma_dir; + struct bpf_prog *xdp_prog; + struct sk_buff *skb = NULL; + u16 xdp_xmit = 0; + u32 xdp_act = 0; int done = 0; + rcu_read_lock(); + xdp_prog = READ_ONCE(priv->xdp_prog); + dma_dir = page_pool_get_dma_dir(dring->page_pool); + while (done < budget) { u16 idx = dring->tail; struct netsec_de *de = dring->vaddr + (DESC_SZ * idx); struct netsec_desc *desc = &dring->desc[idx]; + struct page *page = virt_to_page(desc->addr); + u32 xdp_result = XDP_PASS; u16 pkt_len, desc_len; dma_addr_t dma_handle; + struct xdp_buff xdp; void *buf_addr; - u32 truesize; if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) { /* reading the register clears the irq */ @@ -766,53 +986,71 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) /* allocate a fresh buffer and map it to the hardware. * This will eventually replace the old buffer in the hardware */ - buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len, - true); + buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len); + if (unlikely(!buf_addr)) break; dma_sync_single_for_cpu(priv->dev, desc->dma_addr, pkt_len, - DMA_FROM_DEVICE); + dma_dir); prefetch(desc->addr); - truesize = SKB_DATA_ALIGN(desc->len + NETSEC_SKB_PAD) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - skb = build_skb(desc->addr, truesize); + xdp.data_hard_start = desc->addr; + xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM; + xdp_set_data_meta_invalid(&xdp); + xdp.data_end = xdp.data + pkt_len; + xdp.rxq = &dring->xdp_rxq; + + if (xdp_prog) { + xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp); + if (xdp_result != NETSEC_XDP_PASS) { + xdp_act |= xdp_result; + if (xdp_result == NETSEC_XDP_TX) + xdp_xmit++; + goto next; + } + } + skb = build_skb(desc->addr, desc->len + NETSEC_RX_BUF_NON_DATA); + if (unlikely(!skb)) { - /* free the newly allocated buffer, we are not going to - * use it + /* If skb fails recycle_direct will either unmap and + * free the page or refill the cache depending on the + * cache state. Since we paid the allocation cost if + * building an skb fails try to put the page into cache */ - dma_unmap_single(priv->dev, dma_handle, desc_len, - DMA_FROM_DEVICE); - skb_free_frag(buf_addr); + page_pool_recycle_direct(dring->page_pool, page); netif_err(priv, drv, priv->ndev, "rx failed to build skb\n"); break; } - dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len, - DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); - - /* Update the descriptor with the new buffer we allocated */ - desc->len = desc_len; - desc->dma_addr = dma_handle; - desc->addr = buf_addr; + page_pool_release_page(dring->page_pool, page); - skb_reserve(skb, NETSEC_SKB_PAD); - skb_put(skb, pkt_len); + skb_reserve(skb, xdp.data - xdp.data_hard_start); + skb_put(skb, xdp.data_end - xdp.data); skb->protocol = eth_type_trans(skb, priv->ndev); if (priv->rx_cksum_offload_flag && rx_info.rx_cksum_result == NETSEC_RX_CKSUM_OK) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) { +next: + if ((skb && napi_gro_receive(&priv->napi, skb) != GRO_DROP) || + xdp_result & NETSEC_XDP_RX_OK) { ndev->stats.rx_packets++; - ndev->stats.rx_bytes += pkt_len; + ndev->stats.rx_bytes += xdp.data_end - xdp.data; } + /* Update the descriptor with fresh buffers */ + desc->len = desc_len; + desc->dma_addr = dma_handle; + desc->addr = buf_addr; + netsec_rx_fill(priv, idx, 1); dring->tail = (dring->tail + 1) % DESC_NUM; } + netsec_finalize_xdp_rx(priv, xdp_act, xdp_xmit); + + rcu_read_unlock(); return done; } @@ -820,19 +1058,12 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget) static int netsec_napi_poll(struct napi_struct *napi, int budget) { struct netsec_priv *priv; - int rx, done, todo; + int done; priv = container_of(napi, struct netsec_priv, napi); netsec_process_tx(priv); - - todo = budget; - do { - rx = netsec_process_rx(priv, todo); - todo -= rx; - } while (rx); - - done = budget - todo; + done = netsec_process_rx(priv, budget); if (done < budget && napi_complete_done(napi, done)) { unsigned long flags; @@ -846,41 +1077,6 @@ static int netsec_napi_poll(struct napi_struct *napi, int budget) return done; } -static void netsec_set_tx_de(struct netsec_priv *priv, - struct netsec_desc_ring *dring, - const struct netsec_tx_pkt_ctrl *tx_ctrl, - const struct netsec_desc *desc, - struct sk_buff *skb) -{ - int idx = dring->head; - struct netsec_de *de; - u32 attr; - - de = dring->vaddr + (DESC_SZ * idx); - - attr = (1 << NETSEC_TX_SHIFT_OWN_FIELD) | - (1 << NETSEC_TX_SHIFT_PT_FIELD) | - (NETSEC_RING_GMAC << NETSEC_TX_SHIFT_TDRID_FIELD) | - (1 << NETSEC_TX_SHIFT_FS_FIELD) | - (1 << NETSEC_TX_LAST) | - (tx_ctrl->cksum_offload_flag << NETSEC_TX_SHIFT_CO) | - (tx_ctrl->tcp_seg_offload_flag << NETSEC_TX_SHIFT_SO) | - (1 << NETSEC_TX_SHIFT_TRS_FIELD); - if (idx == DESC_NUM - 1) - attr |= (1 << NETSEC_TX_SHIFT_LD_FIELD); - - de->data_buf_addr_up = upper_32_bits(desc->dma_addr); - de->data_buf_addr_lw = lower_32_bits(desc->dma_addr); - de->buf_len_info = (tx_ctrl->tcp_seg_len << 16) | desc->len; - de->attr = attr; - dma_wmb(); - - dring->desc[idx] = *desc; - dring->desc[idx].skb = skb; - - /* move head ahead */ - dring->head = (dring->head + 1) % DESC_NUM; -} static int netsec_desc_used(struct netsec_desc_ring *dring) { @@ -927,8 +1123,12 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, u16 tso_seg_len = 0; int filled; + if (dring->is_xdp) + spin_lock_bh(&dring->lock); filled = netsec_desc_used(dring); if (netsec_check_stop_tx(priv, filled)) { + if (dring->is_xdp) + spin_unlock_bh(&dring->lock); net_warn_ratelimited("%s %s Tx queue full\n", dev_name(priv->dev), ndev->name); return NETDEV_TX_BUSY; @@ -961,6 +1161,8 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, tx_desc.dma_addr = dma_map_single(priv->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); if (dma_mapping_error(priv->dev, tx_desc.dma_addr)) { + if (dring->is_xdp) + spin_unlock_bh(&dring->lock); netif_err(priv, drv, priv->ndev, "%s: DMA mapping failed\n", __func__); ndev->stats.tx_dropped++; @@ -969,11 +1171,14 @@ static netdev_tx_t netsec_netdev_start_xmit(struct sk_buff *skb, } tx_desc.addr = skb->data; tx_desc.len = skb_headlen(skb); + tx_desc.buf_type = TYPE_NETSEC_SKB; skb_tx_timestamp(skb); netdev_sent_queue(priv->ndev, skb->len); netsec_set_tx_de(priv, dring, &tx_ctrl, &tx_desc, skb); + if (dring->is_xdp) + spin_unlock_bh(&dring->lock); netsec_write(priv, NETSEC_REG_NRM_TX_PKTCNT, 1); /* submit another tx */ return NETDEV_TX_OK; @@ -987,19 +1192,27 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id) if (!dring->vaddr || !dring->desc) return; - for (idx = 0; idx < DESC_NUM; idx++) { desc = &dring->desc[idx]; if (!desc->addr) continue; - dma_unmap_single(priv->dev, desc->dma_addr, desc->len, - id == NETSEC_RING_RX ? DMA_FROM_DEVICE : - DMA_TO_DEVICE); - if (id == NETSEC_RING_RX) - skb_free_frag(desc->addr); - else if (id == NETSEC_RING_TX) + if (id == NETSEC_RING_RX) { + struct page *page = virt_to_page(desc->addr); + + page_pool_put_page(dring->page_pool, page, false); + } else if (id == NETSEC_RING_TX) { + dma_unmap_single(priv->dev, desc->dma_addr, desc->len, + DMA_TO_DEVICE); dev_kfree_skb(desc->skb); + } + } + + /* Rx is currently using page_pool */ + if (id == NETSEC_RING_RX) { + if (xdp_rxq_info_is_reg(&dring->xdp_rxq)) + xdp_rxq_info_unreg(&dring->xdp_rxq); + page_pool_destroy(dring->page_pool); } memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM); @@ -1029,7 +1242,6 @@ static void netsec_free_dring(struct netsec_priv *priv, int id) static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) { struct netsec_desc_ring *dring = &priv->desc_ring[id]; - int i; dring->vaddr = dma_alloc_coherent(priv->dev, DESC_SZ * DESC_NUM, &dring->desc_dma, GFP_KERNEL); @@ -1040,19 +1252,6 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id) if (!dring->desc) goto err; - if (id == NETSEC_RING_TX) { - for (i = 0; i < DESC_NUM; i++) { - struct netsec_de *de; - - de = dring->vaddr + (DESC_SZ * i); - /* de->attr is not going to be accessed by the NIC - * until netsec_set_tx_de() is called. - * No need for a dma_wmb() here - */ - de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; - } - } - return 0; err: netsec_free_dring(priv, id); @@ -1060,10 +1259,60 @@ err: return -ENOMEM; } +static void netsec_setup_tx_dring(struct netsec_priv *priv) +{ + struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_TX]; + struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); + int i; + + for (i = 0; i < DESC_NUM; i++) { + struct netsec_de *de; + + de = dring->vaddr + (DESC_SZ * i); + /* de->attr is not going to be accessed by the NIC + * until netsec_set_tx_de() is called. + * No need for a dma_wmb() here + */ + de->attr = 1U << NETSEC_TX_SHIFT_OWN_FIELD; + } + + if (xdp_prog) + dring->is_xdp = true; + else + dring->is_xdp = false; + +} + static int netsec_setup_rx_dring(struct netsec_priv *priv) { struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX]; - int i; + struct bpf_prog *xdp_prog = READ_ONCE(priv->xdp_prog); + struct page_pool_params pp_params = { 0 }; + int i, err; + + pp_params.order = 0; + /* internal DMA mapping in page_pool */ + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = DESC_NUM; + pp_params.nid = cpu_to_node(0); + pp_params.dev = priv->dev; + pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + + dring->page_pool = page_pool_create(&pp_params); + if (IS_ERR(dring->page_pool)) { + err = PTR_ERR(dring->page_pool); + dring->page_pool = NULL; + goto err_out; + } + + err = xdp_rxq_info_reg(&dring->xdp_rxq, priv->ndev, 0); + if (err) + goto err_out; + + err = xdp_rxq_info_reg_mem_model(&dring->xdp_rxq, MEM_TYPE_PAGE_POOL, + dring->page_pool); + if (err) + goto err_out; for (i = 0; i < DESC_NUM; i++) { struct netsec_desc *desc = &dring->desc[i]; @@ -1071,10 +1320,10 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv) void *buf; u16 len; - buf = netsec_alloc_rx_data(priv, &dma_handle, &len, - false); + buf = netsec_alloc_rx_data(priv, &dma_handle, &len); + if (!buf) { - netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); + err = -ENOMEM; goto err_out; } desc->dma_addr = dma_handle; @@ -1087,7 +1336,8 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv) return 0; err_out: - return -ENOMEM; + netsec_uninit_pkt_dring(priv, NETSEC_RING_RX); + return err; } static int netsec_netdev_load_ucode_region(struct netsec_priv *priv, u32 reg, @@ -1361,6 +1611,7 @@ static int netsec_netdev_open(struct net_device *ndev) pm_runtime_get_sync(priv->dev); + netsec_setup_tx_dring(priv); ret = netsec_setup_rx_dring(priv); if (ret) { netif_err(priv, probe, priv->ndev, @@ -1466,6 +1717,9 @@ static int netsec_netdev_init(struct net_device *ndev) if (ret) goto err2; + spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock); + spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock); + return 0; err2: netsec_free_dring(priv, NETSEC_RING_RX); @@ -1498,6 +1752,81 @@ static int netsec_netdev_ioctl(struct net_device *ndev, struct ifreq *ifr, return phy_mii_ioctl(ndev->phydev, ifr, cmd); } +static int netsec_xdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct netsec_priv *priv = netdev_priv(ndev); + struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX]; + int drops = 0; + int i; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + spin_lock(&tx_ring->lock); + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + int err; + + err = netsec_xdp_queue_one(priv, xdpf, true); + if (err != NETSEC_XDP_TX) { + xdp_return_frame_rx_napi(xdpf); + drops++; + } else { + tx_ring->xdp_xmit++; + } + } + spin_unlock(&tx_ring->lock); + + if (unlikely(flags & XDP_XMIT_FLUSH)) { + netsec_xdp_ring_tx_db(priv, tx_ring->xdp_xmit); + tx_ring->xdp_xmit = 0; + } + + return n - drops; +} + +static int netsec_xdp_setup(struct netsec_priv *priv, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = priv->ndev; + struct bpf_prog *old_prog; + + /* For now just support only the usual MTU sized frames */ + if (prog && dev->mtu > 1500) { + NL_SET_ERR_MSG_MOD(extack, "Jumbo frames not supported on XDP"); + return -EOPNOTSUPP; + } + + if (netif_running(dev)) + netsec_netdev_stop(dev); + + /* Detach old prog, if any */ + old_prog = xchg(&priv->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + + if (netif_running(dev)) + netsec_netdev_open(dev); + + return 0; +} + +static int netsec_xdp(struct net_device *ndev, struct netdev_bpf *xdp) +{ + struct netsec_priv *priv = netdev_priv(ndev); + + switch (xdp->command) { + case XDP_SETUP_PROG: + return netsec_xdp_setup(priv, xdp->prog, xdp->extack); + case XDP_QUERY_PROG: + xdp->prog_id = priv->xdp_prog ? priv->xdp_prog->aux->id : 0; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops netsec_netdev_ops = { .ndo_init = netsec_netdev_init, .ndo_uninit = netsec_netdev_uninit, @@ -1508,6 +1837,8 @@ static const struct net_device_ops netsec_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = netsec_netdev_ioctl, + .ndo_xdp_xmit = netsec_xdp_xmit, + .ndo_bpf = netsec_xdp, }; static int netsec_of_probe(struct platform_device *pdev, diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 06545d7399fc..2325b40dff6e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only config STMMAC_ETH - tristate "STMicroelectronics 10/100/1000/EQOS Ethernet driver" + tristate "STMicroelectronics Multi-Gigabit Ethernet driver" depends on HAS_IOMEM && HAS_DMA select MII - select PHYLIB + select PAGE_POOL + select PHYLINK select CRC32 imply PTP_1588_CLOCK select RESET_CONTROLLER @@ -13,6 +14,16 @@ config STMMAC_ETH if STMMAC_ETH +config STMMAC_SELFTESTS + bool "Support for STMMAC Selftests" + depends on INET + depends on STMMAC_ETH + default n + ---help--- + This adds support for STMMAC Selftests using ethtool. Enable this + feature if you are facing problems with your HW and submit the test + results to the netdev Mailing List. + config STMMAC_PLATFORM tristate "STMMAC Platform bus support" depends on STMMAC_ETH @@ -31,7 +42,6 @@ if STMMAC_PLATFORM config DWMAC_DWC_QOS_ETH tristate "Support for snps,dwc-qos-ethernet.txt DT binding." - select PHYLIB select CRC32 select MII depends on OF && HAS_DMA diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index c529c21e9bdd..c59926d96bcc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -8,6 +8,8 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \ stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \ $(stmmac-y) +stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o + # Ordering matters. Generic driver must be last. obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o obj-$(CONFIG_DWMAC_ANARION) += dwmac-anarion.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index ceb0d23f5041..ed872eed1cab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -246,12 +246,13 @@ struct stmmac_safety_stats { /* Max/Min RI Watchdog Timer count value */ #define MAX_DMA_RIWT 0xff -#define MIN_DMA_RIWT 0x20 +#define MIN_DMA_RIWT 0x10 /* Tx coalesce parameters */ #define STMMAC_COAL_TX_TIMER 1000 #define STMMAC_MAX_COAL_TX_TICK 100000 #define STMMAC_TX_MAX_FRAMES 256 -#define STMMAC_TX_FRAMES 25 +#define STMMAC_TX_FRAMES 1 +#define STMMAC_RX_FRAMES 25 /* Packets types */ enum packets_types { @@ -325,6 +326,7 @@ struct dma_features { /* 802.3az - Energy-Efficient Ethernet (EEE) */ unsigned int eee; unsigned int av; + unsigned int hash_tb_sz; unsigned int tsoen; /* TX and RX csum */ unsigned int tx_coe; @@ -351,6 +353,7 @@ struct dma_features { unsigned int frpsel; unsigned int frpbs; unsigned int frpes; + unsigned int addr64; }; /* GMAC TX FIFO is 8K, Rx FIFO is 16K */ @@ -392,8 +395,12 @@ struct mac_link { u32 speed100; u32 speed1000; u32 speed2500; - u32 speed10000; u32 duplex; + struct { + u32 speed2500; + u32 speed5000; + u32 speed10000; + } xgmii; }; struct mii_regs { @@ -414,12 +421,13 @@ struct mac_device_info { const struct stmmac_mode_ops *mode; const struct stmmac_hwtimestamp *ptp; const struct stmmac_tc_ops *tc; + const struct stmmac_mmc_ops *mmc; struct mii_regs mii; /* MII register Addresses */ struct mac_link link; void __iomem *pcsr; /* vpointer to device CSRs */ - int multicast_filter_bins; - int unicast_filter_entries; - int mcast_bits_log2; + unsigned int multicast_filter_bins; + unsigned int unicast_filter_entries; + unsigned int mcast_bits_log2; unsigned int rx_csum; unsigned int pcs; unsigned int pmt; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 126b66bb73a6..79f2ee37afed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -9,6 +9,7 @@ #include <linux/of.h> #include <linux/of_device.h> #include <linux/of_net.h> +#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/stmmac.h> @@ -298,6 +299,9 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) return ret; } + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + return 0; } @@ -307,6 +311,9 @@ static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv) const struct mediatek_dwmac_variant *variant = plat->variant; clk_bulk_disable_unprepare(variant->num_clks, plat->clks); + + pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); } static int mediatek_dwmac_probe(struct platform_device *pdev) @@ -349,6 +356,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) plat_dat->has_gmac4 = 1; plat_dat->has_gmac = 0; plat_dat->pmt = 0; + plat_dat->riwt_off = 1; plat_dat->maxmtu = ETH_DATA_LEN; plat_dat->bsp_priv = priv_plat; plat_dat->init = mediatek_dwmac_init; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 8bdbddeec117..c141fe783e87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -27,9 +27,12 @@ #define SYSMGR_EMACGRP_CTRL_PHYSEL_WIDTH 2 #define SYSMGR_EMACGRP_CTRL_PHYSEL_MASK 0x00000003 #define SYSMGR_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000010 +#define SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK 0x00000100 #define SYSMGR_FPGAGRP_MODULE_REG 0x00000028 #define SYSMGR_FPGAGRP_MODULE_EMAC 0x00000004 +#define SYSMGR_FPGAINTF_EMAC_REG 0x00000070 +#define SYSMGR_FPGAINTF_EMAC_BIT 0x1 #define EMAC_SPLITTER_CTRL_REG 0x0 #define EMAC_SPLITTER_CTRL_SPEED_MASK 0x3 @@ -37,6 +40,11 @@ #define EMAC_SPLITTER_CTRL_SPEED_100 0x3 #define EMAC_SPLITTER_CTRL_SPEED_1000 0x0 +struct socfpga_dwmac; +struct socfpga_dwmac_ops { + int (*set_phy_mode)(struct socfpga_dwmac *dwmac_priv); +}; + struct socfpga_dwmac { int interface; u32 reg_offset; @@ -48,6 +56,7 @@ struct socfpga_dwmac { void __iomem *splitter_base; bool f2h_ptp_ref_clk; struct tse_pcs pcs; + const struct socfpga_dwmac_ops *ops; }; static void socfpga_dwmac_fix_mac_speed(void *priv, unsigned int speed) @@ -222,25 +231,36 @@ err_node_put: return ret; } -static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) +static int socfpga_set_phy_mode_common(int phymode, u32 *val) { - struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; - int phymode = dwmac->interface; - u32 reg_offset = dwmac->reg_offset; - u32 reg_shift = dwmac->reg_shift; - u32 ctrl, val, module; - switch (phymode) { case PHY_INTERFACE_MODE_RGMII: case PHY_INTERFACE_MODE_RGMII_ID: - val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII; + *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RGMII; break; case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_GMII: case PHY_INTERFACE_MODE_SGMII: - val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII; + *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII; + break; + case PHY_INTERFACE_MODE_RMII: + *val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_RMII; break; default: + return -EINVAL; + } + return 0; +} + +static int socfpga_gen5_set_phy_mode(struct socfpga_dwmac *dwmac) +{ + struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; + int phymode = dwmac->interface; + u32 reg_offset = dwmac->reg_offset; + u32 reg_shift = dwmac->reg_shift; + u32 ctrl, val, module; + + if (socfpga_set_phy_mode_common(phymode, &val)) { dev_err(dwmac->dev, "bad phy mode %d\n", phymode); return -EINVAL; } @@ -291,6 +311,62 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) return 0; } +static int socfpga_gen10_set_phy_mode(struct socfpga_dwmac *dwmac) +{ + struct regmap *sys_mgr_base_addr = dwmac->sys_mgr_base_addr; + int phymode = dwmac->interface; + u32 reg_offset = dwmac->reg_offset; + u32 reg_shift = dwmac->reg_shift; + u32 ctrl, val, module; + + if (socfpga_set_phy_mode_common(phymode, &val)) + return -EINVAL; + + /* Overwrite val to GMII if splitter core is enabled. The phymode here + * is the actual phy mode on phy hardware, but phy interface from + * EMAC core is GMII. + */ + if (dwmac->splitter_base) + val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII; + + /* Assert reset to the enet controller before changing the phy mode */ + reset_control_assert(dwmac->stmmac_ocp_rst); + reset_control_assert(dwmac->stmmac_rst); + + regmap_read(sys_mgr_base_addr, reg_offset, &ctrl); + ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK); + ctrl |= val; + + if (dwmac->f2h_ptp_ref_clk || + phymode == PHY_INTERFACE_MODE_MII || + phymode == PHY_INTERFACE_MODE_GMII || + phymode == PHY_INTERFACE_MODE_SGMII) { + ctrl |= SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK; + regmap_read(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG, + &module); + module |= (SYSMGR_FPGAINTF_EMAC_BIT << reg_shift); + regmap_write(sys_mgr_base_addr, SYSMGR_FPGAINTF_EMAC_REG, + module); + } else { + ctrl &= ~SYSMGR_GEN10_EMACGRP_CTRL_PTP_REF_CLK_MASK; + } + + regmap_write(sys_mgr_base_addr, reg_offset, ctrl); + + /* Deassert reset for the phy configuration to be sampled by + * the enet controller, and operation to start in requested mode + */ + reset_control_deassert(dwmac->stmmac_ocp_rst); + reset_control_deassert(dwmac->stmmac_rst); + if (phymode == PHY_INTERFACE_MODE_SGMII) { + if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) { + dev_err(dwmac->dev, "Unable to initialize TSE PCS"); + return -EINVAL; + } + } + return 0; +} + static int socfpga_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -300,6 +376,13 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) struct socfpga_dwmac *dwmac; struct net_device *ndev; struct stmmac_priv *stpriv; + const struct socfpga_dwmac_ops *ops; + + ops = device_get_match_data(&pdev->dev); + if (!ops) { + dev_err(&pdev->dev, "no of match data provided\n"); + return -EINVAL; + } ret = stmmac_get_platform_resources(pdev, &stmmac_res); if (ret) @@ -330,6 +413,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } + dwmac->ops = ops; plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed; @@ -346,7 +430,7 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) */ dwmac->stmmac_rst = stpriv->plat->stmmac_rst; - ret = socfpga_dwmac_set_phy_mode(dwmac); + ret = ops->set_phy_mode(dwmac); if (ret) goto err_dvr_remove; @@ -365,8 +449,9 @@ static int socfpga_dwmac_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); + struct socfpga_dwmac *dwmac_priv = get_stmmac_bsp_priv(dev); - socfpga_dwmac_set_phy_mode(priv->plat->bsp_priv); + dwmac_priv->ops->set_phy_mode(priv->plat->bsp_priv); /* Before the enet controller is suspended, the phy is suspended. * This causes the phy clock to be gated. The enet controller is @@ -393,8 +478,17 @@ static int socfpga_dwmac_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend, socfpga_dwmac_resume); +static const struct socfpga_dwmac_ops socfpga_gen5_ops = { + .set_phy_mode = socfpga_gen5_set_phy_mode, +}; + +static const struct socfpga_dwmac_ops socfpga_gen10_ops = { + .set_phy_mode = socfpga_gen10_set_phy_mode, +}; + static const struct of_device_id socfpga_dwmac_match[] = { - { .compatible = "altr,socfpga-stmmac" }, + { .compatible = "altr,socfpga-stmmac", .data = &socfpga_gen5_ops }, + { .compatible = "altr,socfpga-stmmac-a10-s10", .data = &socfpga_gen10_ops }, { } }; MODULE_DEVICE_TABLE(of, socfpga_dwmac_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index a69c34f605b1..2856f3fe5266 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -138,6 +138,20 @@ static const struct emac_variant emac_variant_a64 = { .tx_delay_max = 7, }; +static const struct emac_variant emac_variant_h6 = { + .default_syscon_value = 0x50000, + .syscon_field = &sun8i_syscon_reg_field, + /* The "Internal PHY" of H6 is not on the die. It's on the + * co-packaged AC200 chip instead. + */ + .soc_has_internal_phy = false, + .support_mii = true, + .support_rmii = true, + .support_rgmii = true, + .rx_delay_max = 31, + .tx_delay_max = 7, +}; + #define EMAC_BASIC_CTL0 0x00 #define EMAC_BASIC_CTL1 0x04 #define EMAC_INT_STA 0x08 @@ -275,18 +289,18 @@ static void sun8i_dwmac_dma_init(void __iomem *ioaddr, static void sun8i_dwmac_dma_init_rx(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_rx_phy, u32 chan) + dma_addr_t dma_rx_phy, u32 chan) { /* Write RX descriptors address */ - writel(dma_rx_phy, ioaddr + EMAC_RX_DESC_LIST); + writel(lower_32_bits(dma_rx_phy), ioaddr + EMAC_RX_DESC_LIST); } static void sun8i_dwmac_dma_init_tx(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx_phy, u32 chan) + dma_addr_t dma_tx_phy, u32 chan) { /* Write TX descriptors address */ - writel(dma_tx_phy, ioaddr + EMAC_TX_DESC_LIST); + writel(lower_32_bits(dma_tx_phy), ioaddr + EMAC_TX_DESC_LIST); } /* sun8i_dwmac_dump_regs() - Dump EMAC address space @@ -884,6 +898,11 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) * address. No need to mask it again. */ reg |= 1 << H3_EPHY_ADDR_SHIFT; + } else { + /* For SoCs without internal PHY the PHY selection bit should be + * set to 0 (external PHY). + */ + reg &= ~H3_EPHY_SELECT; } if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { @@ -977,6 +996,18 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) regulator_disable(gmac->regulator); } +static void sun8i_dwmac_set_mac_loopback(void __iomem *ioaddr, bool enable) +{ + u32 value = readl(ioaddr + EMAC_BASIC_CTL0); + + if (enable) + value |= EMAC_LOOPBACK; + else + value &= ~EMAC_LOOPBACK; + + writel(value, ioaddr + EMAC_BASIC_CTL0); +} + static const struct stmmac_ops sun8i_dwmac_ops = { .core_init = sun8i_dwmac_core_init, .set_mac = sun8i_dwmac_set_mac, @@ -986,6 +1017,7 @@ static const struct stmmac_ops sun8i_dwmac_ops = { .flow_ctrl = sun8i_dwmac_flow_ctrl, .set_umac_addr = sun8i_dwmac_set_umac_addr, .get_umac_addr = sun8i_dwmac_get_umac_addr, + .set_mac_loopback = sun8i_dwmac_set_mac_loopback, }; static struct mac_device_info *sun8i_dwmac_setup(void *ppriv) @@ -1203,6 +1235,8 @@ static const struct of_device_id sun8i_dwmac_match[] = { .data = &emac_variant_r40 }, { .compatible = "allwinner,sun50i-a64-emac", .data = &emac_variant_a64 }, + { .compatible = "allwinner,sun50i-h6-emac", + .data = &emac_variant_h6 }, { } }; MODULE_DEVICE_TABLE(of, sun8i_dwmac_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h index b83d3a98f5f1..b70d44ac0990 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h @@ -136,6 +136,7 @@ enum inter_frame_gap { #define GMAC_FRAME_FILTER_DAIF 0x00000008 /* DA Inverse Filtering */ #define GMAC_FRAME_FILTER_PM 0x00000010 /* Pass all multicast */ #define GMAC_FRAME_FILTER_DBF 0x00000020 /* Disable Broadcast frames */ +#define GMAC_FRAME_FILTER_PCF 0x00000080 /* Pass Control frames */ #define GMAC_FRAME_FILTER_SAIF 0x00000100 /* Inverse Filtering */ #define GMAC_FRAME_FILTER_SAF 0x00000200 /* Source Address Filter */ #define GMAC_FRAME_FILTER_HPF 0x00000400 /* Hash or perfect Filter */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 9fff81170163..3d69da112625 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -162,7 +162,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, memset(mc_filter, 0, sizeof(mc_filter)); if (dev->flags & IFF_PROMISC) { - value = GMAC_FRAME_FILTER_PR; + value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF; } else if (dev->flags & IFF_ALLMULTI) { value = GMAC_FRAME_FILTER_PM; /* pass all multi */ } else if (!netdev_mc_empty(dev)) { @@ -188,6 +188,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, } } + value |= GMAC_FRAME_FILTER_HPF; dwmac1000_set_mchash(ioaddr, mc_filter, mcbitslog2); /* Handle multiple unicast addresses (perfect filtering) */ @@ -206,6 +207,12 @@ static void dwmac1000_set_filter(struct mac_device_info *hw, GMAC_ADDR_LOW(reg)); reg++; } + + while (reg <= perfect_addr_number) { + writel(0, ioaddr + GMAC_ADDR_HIGH(reg)); + writel(0, ioaddr + GMAC_ADDR_LOW(reg)); + reg++; + } } #ifdef FRAME_FILTER_DEBUG @@ -489,6 +496,18 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x, x->mac_gmii_rx_proto_engine++; } +static void dwmac1000_set_mac_loopback(void __iomem *ioaddr, bool enable) +{ + u32 value = readl(ioaddr + GMAC_CONTROL); + + if (enable) + value |= GMAC_CONTROL_LM; + else + value &= ~GMAC_CONTROL_LM; + + writel(value, ioaddr + GMAC_CONTROL); +} + const struct stmmac_ops dwmac1000_ops = { .core_init = dwmac1000_core_init, .set_mac = stmmac_set_mac, @@ -508,6 +527,7 @@ const struct stmmac_ops dwmac1000_ops = { .pcs_ctrl_ane = dwmac1000_ctrl_ane, .pcs_rane = dwmac1000_rane, .pcs_get_adv_lp = dwmac1000_get_adv_lp, + .set_mac_loopback = dwmac1000_set_mac_loopback, }; int dwmac1000_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 1fdedf77678f..2bac49b49f73 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -112,18 +112,18 @@ static void dwmac1000_dma_init(void __iomem *ioaddr, static void dwmac1000_dma_init_rx(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_rx_phy, u32 chan) + dma_addr_t dma_rx_phy, u32 chan) { /* RX descriptor base address list must be written into DMA CSR3 */ - writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR); + writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR); } static void dwmac1000_dma_init_tx(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx_phy, u32 chan) + dma_addr_t dma_tx_phy, u32 chan) { /* TX descriptor base address list must be written into DMA CSR4 */ - writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR); + writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR); } static u32 dwmac1000_configure_fc(u32 csr6, int rxfifosz) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c index 8842f6627cb8..ebcad8dd99db 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c @@ -150,6 +150,18 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode) return; } +static void dwmac100_set_mac_loopback(void __iomem *ioaddr, bool enable) +{ + u32 value = readl(ioaddr + MAC_CONTROL); + + if (enable) + value |= MAC_CONTROL_OM; + else + value &= ~MAC_CONTROL_OM; + + writel(value, ioaddr + MAC_CONTROL); +} + const struct stmmac_ops dwmac100_ops = { .core_init = dwmac100_core_init, .set_mac = stmmac_set_mac, @@ -161,6 +173,7 @@ const struct stmmac_ops dwmac100_ops = { .pmt = dwmac100_pmt, .set_umac_addr = dwmac100_set_umac_addr, .get_umac_addr = dwmac100_get_umac_addr, + .set_mac_loopback = dwmac100_set_mac_loopback, }; int dwmac100_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c index c980cc7360a4..8f0d9bc7cab5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c @@ -31,18 +31,18 @@ static void dwmac100_dma_init(void __iomem *ioaddr, static void dwmac100_dma_init_rx(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_rx_phy, u32 chan) + dma_addr_t dma_rx_phy, u32 chan) { /* RX descriptor base addr lists must be written into DMA CSR3 */ - writel(dma_rx_phy, ioaddr + DMA_RCV_BASE_ADDR); + writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_RCV_BASE_ADDR); } static void dwmac100_dma_init_tx(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx_phy, u32 chan) + dma_addr_t dma_tx_phy, u32 chan) { /* TX descriptor base addr lists must be written into DMA CSR4 */ - writel(dma_tx_phy, ioaddr + DMA_TX_BASE_ADDR); + writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_TX_BASE_ADDR); } /* Store and Forward capability is not used at all. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index 80234f12bf7f..2ed11a581d80 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -15,8 +15,7 @@ /* MAC registers */ #define GMAC_CONFIG 0x00000000 #define GMAC_PACKET_FILTER 0x00000008 -#define GMAC_HASH_TAB_0_31 0x00000010 -#define GMAC_HASH_TAB_32_63 0x00000014 +#define GMAC_HASH_TAB(x) (0x10 + (x) * 4) #define GMAC_RX_FLOW_CTRL 0x00000090 #define GMAC_QX_TX_FLOW_CTRL(x) (0x70 + x * 4) #define GMAC_TXQ_PRTY_MAP0 0x98 @@ -61,6 +60,8 @@ #define GMAC_PACKET_FILTER_PR BIT(0) #define GMAC_PACKET_FILTER_HMC BIT(2) #define GMAC_PACKET_FILTER_PM BIT(4) +#define GMAC_PACKET_FILTER_PCF BIT(7) +#define GMAC_PACKET_FILTER_HPF BIT(10) #define GMAC_MAX_PERFECT_ADDRESSES 128 @@ -157,6 +158,7 @@ enum power_event { #define GMAC_CONFIG_PS BIT(15) #define GMAC_CONFIG_FES BIT(14) #define GMAC_CONFIG_DM BIT(13) +#define GMAC_CONFIG_LM BIT(12) #define GMAC_CONFIG_DCRS BIT(9) #define GMAC_CONFIG_TE BIT(1) #define GMAC_CONFIG_RE BIT(0) @@ -178,6 +180,7 @@ enum power_event { #define GMAC_HW_FEAT_MIISEL BIT(0) /* MAC HW features1 bitmap */ +#define GMAC_HW_HASH_TB_SZ GENMASK(25, 24) #define GMAC_HW_FEAT_AVSEL BIT(20) #define GMAC_HW_TSOEN BIT(18) #define GMAC_HW_TXFIFOSIZE GENMASK(10, 6) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 99d772517242..01c2e2d83e76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -400,57 +400,74 @@ static void dwmac4_set_filter(struct mac_device_info *hw, struct net_device *dev) { void __iomem *ioaddr = (void __iomem *)dev->base_addr; - unsigned int value = 0; + int numhashregs = (hw->multicast_filter_bins >> 5); + int mcbitslog2 = hw->mcast_bits_log2; + unsigned int value; + int i; + value = readl(ioaddr + GMAC_PACKET_FILTER); + value &= ~GMAC_PACKET_FILTER_HMC; + value &= ~GMAC_PACKET_FILTER_HPF; + value &= ~GMAC_PACKET_FILTER_PCF; + value &= ~GMAC_PACKET_FILTER_PM; + value &= ~GMAC_PACKET_FILTER_PR; if (dev->flags & IFF_PROMISC) { - value = GMAC_PACKET_FILTER_PR; + value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_PCF; } else if ((dev->flags & IFF_ALLMULTI) || - (netdev_mc_count(dev) > HASH_TABLE_SIZE)) { + (netdev_mc_count(dev) > hw->multicast_filter_bins)) { /* Pass all multi */ - value = GMAC_PACKET_FILTER_PM; - /* Set the 64 bits of the HASH tab. To be updated if taller - * hash table is used - */ - writel(0xffffffff, ioaddr + GMAC_HASH_TAB_0_31); - writel(0xffffffff, ioaddr + GMAC_HASH_TAB_32_63); + value |= GMAC_PACKET_FILTER_PM; + /* Set all the bits of the HASH tab */ + for (i = 0; i < numhashregs; i++) + writel(0xffffffff, ioaddr + GMAC_HASH_TAB(i)); } else if (!netdev_mc_empty(dev)) { - u32 mc_filter[2]; struct netdev_hw_addr *ha; + u32 mc_filter[8]; /* Hash filter for multicast */ - value = GMAC_PACKET_FILTER_HMC; + value |= GMAC_PACKET_FILTER_HMC; memset(mc_filter, 0, sizeof(mc_filter)); netdev_for_each_mc_addr(ha, dev) { - /* The upper 6 bits of the calculated CRC are used to - * index the content of the Hash Table Reg 0 and 1. + /* The upper n bits of the calculated CRC are used to + * index the contents of the hash table. The number of + * bits used depends on the hardware configuration + * selected at core configuration time. */ - int bit_nr = - (bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26); - /* The most significant bit determines the register - * to use while the other 5 bits determines the bit - * within the selected register + int bit_nr = bitrev32(~crc32_le(~0, ha->addr, + ETH_ALEN)) >> (32 - mcbitslog2); + /* The most significant bit determines the register to + * use (H/L) while the other 5 bits determine the bit + * within the register. */ - mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1F)); + mc_filter[bit_nr >> 5] |= (1 << (bit_nr & 0x1f)); } - writel(mc_filter[0], ioaddr + GMAC_HASH_TAB_0_31); - writel(mc_filter[1], ioaddr + GMAC_HASH_TAB_32_63); + for (i = 0; i < numhashregs; i++) + writel(mc_filter[i], ioaddr + GMAC_HASH_TAB(i)); } + value |= GMAC_PACKET_FILTER_HPF; + /* Handle multiple unicast addresses */ if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) { /* Switch to promiscuous mode if more than 128 addrs * are required */ value |= GMAC_PACKET_FILTER_PR; - } else if (!netdev_uc_empty(dev)) { - int reg = 1; + } else { struct netdev_hw_addr *ha; + int reg = 1; netdev_for_each_uc_addr(ha, dev) { dwmac4_set_umac_addr(hw, ha->addr, reg); reg++; } + + while (reg < GMAC_MAX_PERFECT_ADDRESSES) { + writel(0, ioaddr + GMAC_ADDR_HIGH(reg)); + writel(0, ioaddr + GMAC_ADDR_LOW(reg)); + reg++; + } } writel(value, ioaddr + GMAC_PACKET_FILTER); @@ -468,8 +485,9 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, if (fc & FLOW_RX) { pr_debug("\tReceive Flow-Control ON\n"); flow |= GMAC_RX_FLOW_CTRL_RFE; - writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); } + writel(flow, ioaddr + GMAC_RX_FLOW_CTRL); + if (fc & FLOW_TX) { pr_debug("\tTransmit Flow-Control ON\n"); @@ -477,7 +495,7 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, pr_debug("\tduplex mode: PAUSE %d\n", pause_time); for (queue = 0; queue < tx_cnt; queue++) { - flow |= GMAC_TX_FLOW_CTRL_TFE; + flow = GMAC_TX_FLOW_CTRL_TFE; if (duplex) flow |= @@ -485,6 +503,9 @@ static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex, writel(flow, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue)); } + } else { + for (queue = 0; queue < tx_cnt; queue++) + writel(0, ioaddr + GMAC_QX_TX_FLOW_CTRL(queue)); } } @@ -700,6 +721,18 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x, x->mac_gmii_rx_proto_engine++; } +static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable) +{ + u32 value = readl(ioaddr + GMAC_CONFIG); + + if (enable) + value |= GMAC_CONFIG_LM; + else + value &= ~GMAC_CONFIG_LM; + + writel(value, ioaddr + GMAC_CONFIG); +} + const struct stmmac_ops dwmac4_ops = { .core_init = dwmac4_core_init, .set_mac = stmmac_set_mac, @@ -729,6 +762,7 @@ const struct stmmac_ops dwmac4_ops = { .pcs_get_adv_lp = dwmac4_get_adv_lp, .debug = dwmac4_debug, .set_filter = dwmac4_set_filter, + .set_mac_loopback = dwmac4_set_mac_loopback, }; const struct stmmac_ops dwmac410_ops = { @@ -760,6 +794,7 @@ const struct stmmac_ops dwmac410_ops = { .pcs_get_adv_lp = dwmac4_get_adv_lp, .debug = dwmac4_debug, .set_filter = dwmac4_set_filter, + .set_mac_loopback = dwmac4_set_mac_loopback, }; const struct stmmac_ops dwmac510_ops = { @@ -796,6 +831,7 @@ const struct stmmac_ops dwmac510_ops = { .safety_feat_dump = dwmac5_safety_feat_dump, .rxp_config = dwmac5_rxp_config, .flex_pps_config = dwmac5_flex_pps_config, + .set_mac_loopback = dwmac4_set_mac_loopback, }; int dwmac4_setup(struct stmmac_priv *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index cf6436d3d6c7..dbde23e7e169 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -443,6 +443,15 @@ static void dwmac4_clear(struct dma_desc *p) p->des3 = 0; } +static int set_16kib_bfsize(int mtu) +{ + int ret = 0; + + if (unlikely(mtu >= BUF_SIZE_8KiB)) + ret = BUF_SIZE_16KiB; + return ret; +} + const struct stmmac_desc_ops dwmac4_desc_ops = { .tx_status = dwmac4_wrback_get_tx_status, .rx_status = dwmac4_wrback_get_rx_status, @@ -469,4 +478,6 @@ const struct stmmac_desc_ops dwmac4_desc_ops = { .clear = dwmac4_clear, }; -const struct stmmac_mode_ops dwmac4_ring_mode_ops = { }; +const struct stmmac_mode_ops dwmac4_ring_mode_ops = { + .set_16kib_bfsize = set_16kib_bfsize, +}; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 0f208e13da9f..3ed5508586ef 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -70,7 +70,7 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_rx_phy, u32 chan) + dma_addr_t dma_rx_phy, u32 chan) { u32 value; u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl; @@ -79,12 +79,12 @@ static void dwmac4_dma_init_rx_chan(void __iomem *ioaddr, value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT); writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan)); - writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(chan)); + writel(lower_32_bits(dma_rx_phy), ioaddr + DMA_CHAN_RX_BASE_ADDR(chan)); } static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx_phy, u32 chan) + dma_addr_t dma_tx_phy, u32 chan) { u32 value; u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl; @@ -97,7 +97,7 @@ static void dwmac4_dma_init_tx_chan(void __iomem *ioaddr, writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan)); - writel(dma_tx_phy, ioaddr + DMA_CHAN_TX_BASE_ADDR(chan)); + writel(lower_32_bits(dma_tx_phy), ioaddr + DMA_CHAN_TX_BASE_ADDR(chan)); } static void dwmac4_dma_init_channel(void __iomem *ioaddr, @@ -351,6 +351,7 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature1 */ hw_cap = readl(ioaddr + GMAC_HW_FEATURE1); + dma_cap->hash_tb_sz = (hw_cap & GMAC_HW_HASH_TB_SZ) >> 24; dma_cap->av = (hw_cap & GMAC_HW_FEAT_AVSEL) >> 20; dma_cap->tsoen = (hw_cap & GMAC_HW_TSOEN) >> 18; /* RX and TX FIFO sizes are encoded as log2(n / 128). Undo that by diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 85826524683c..f2a29a90e085 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -85,10 +85,6 @@ void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan) value &= ~DMA_CONTROL_SR; writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan)); - - value = readl(ioaddr + GMAC_CONFIG); - value &= ~GMAC_CONFIG_RE; - writel(value, ioaddr + GMAC_CONFIG); } void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 085b700a4994..7f86dffb264d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -15,10 +15,14 @@ /* MAC Registers */ #define XGMAC_TX_CONFIG 0x00000000 #define XGMAC_CONFIG_SS_OFF 29 -#define XGMAC_CONFIG_SS_MASK GENMASK(30, 29) +#define XGMAC_CONFIG_SS_MASK GENMASK(31, 29) #define XGMAC_CONFIG_SS_10000 (0x0 << XGMAC_CONFIG_SS_OFF) -#define XGMAC_CONFIG_SS_2500 (0x2 << XGMAC_CONFIG_SS_OFF) -#define XGMAC_CONFIG_SS_1000 (0x3 << XGMAC_CONFIG_SS_OFF) +#define XGMAC_CONFIG_SS_2500_GMII (0x2 << XGMAC_CONFIG_SS_OFF) +#define XGMAC_CONFIG_SS_1000_GMII (0x3 << XGMAC_CONFIG_SS_OFF) +#define XGMAC_CONFIG_SS_100_MII (0x4 << XGMAC_CONFIG_SS_OFF) +#define XGMAC_CONFIG_SS_5000 (0x5 << XGMAC_CONFIG_SS_OFF) +#define XGMAC_CONFIG_SS_2500 (0x6 << XGMAC_CONFIG_SS_OFF) +#define XGMAC_CONFIG_SS_10_MII (0x7 << XGMAC_CONFIG_SS_OFF) #define XGMAC_CONFIG_SARC GENMASK(22, 20) #define XGMAC_CONFIG_SARC_SHIFT 20 #define XGMAC_CONFIG_JD BIT(16) @@ -29,6 +33,7 @@ #define XGMAC_CONFIG_GPSL GENMASK(29, 16) #define XGMAC_CONFIG_GPSL_SHIFT 16 #define XGMAC_CONFIG_S2KP BIT(11) +#define XGMAC_CONFIG_LM BIT(10) #define XGMAC_CONFIG_IPC BIT(9) #define XGMAC_CONFIG_JE BIT(8) #define XGMAC_CONFIG_WD BIT(7) @@ -39,6 +44,7 @@ #define XGMAC_CORE_INIT_RX 0 #define XGMAC_PACKET_FILTER 0x00000008 #define XGMAC_FILTER_RA BIT(31) +#define XGMAC_FILTER_PCF BIT(7) #define XGMAC_FILTER_PM BIT(4) #define XGMAC_FILTER_HMC BIT(2) #define XGMAC_FILTER_PR BIT(0) @@ -81,6 +87,7 @@ #define XGMAC_HWFEAT_GMIISEL BIT(1) #define XGMAC_HW_FEATURE1 0x00000120 #define XGMAC_HWFEAT_TSOEN BIT(18) +#define XGMAC_HWFEAT_ADDR64 GENMASK(15, 14) #define XGMAC_HWFEAT_TXFIFOSIZE GENMASK(10, 6) #define XGMAC_HWFEAT_RXFIFOSIZE GENMASK(4, 0) #define XGMAC_HW_FEATURE2 0x00000124 @@ -166,6 +173,7 @@ #define XGMAC_EN_LPI BIT(15) #define XGMAC_LPI_XIT_PKT BIT(14) #define XGMAC_AAL BIT(12) +#define XGMAC_EAME BIT(11) #define XGMAC_BLEN GENMASK(7, 1) #define XGMAC_BLEN256 BIT(7) #define XGMAC_BLEN128 BIT(6) @@ -175,6 +183,10 @@ #define XGMAC_BLEN8 BIT(2) #define XGMAC_BLEN4 BIT(1) #define XGMAC_UNDEF BIT(0) +#define XGMAC_TX_EDMA_CTRL 0x00003040 +#define XGMAC_TDPS GENMASK(29, 0) +#define XGMAC_RX_EDMA_CTRL 0x00003044 +#define XGMAC_RDPS GENMASK(29, 0) #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x))) #define XGMAC_PBLx8 BIT(16) #define XGMAC_DMA_CH_TX_CONTROL(x) (0x00003104 + (0x80 * (x))) @@ -187,7 +199,9 @@ #define XGMAC_RxPBL GENMASK(21, 16) #define XGMAC_RxPBL_SHIFT 16 #define XGMAC_RXST BIT(0) +#define XGMAC_DMA_CH_TxDESC_HADDR(x) (0x00003110 + (0x80 * (x))) #define XGMAC_DMA_CH_TxDESC_LADDR(x) (0x00003114 + (0x80 * (x))) +#define XGMAC_DMA_CH_RxDESC_HADDR(x) (0x00003118 + (0x80 * (x))) #define XGMAC_DMA_CH_RxDESC_LADDR(x) (0x0000311c + (0x80 * (x))) #define XGMAC_DMA_CH_TxDESC_TAIL_LPTR(x) (0x00003124 + (0x80 * (x))) #define XGMAC_DMA_CH_RxDESC_TAIL_LPTR(x) (0x0000312c + (0x80 * (x))) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 64b8cb88ea45..0a32c96a7854 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -36,7 +36,7 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, switch (hw->ps) { case SPEED_10000: - tx |= hw->link.speed10000; + tx |= hw->link.xgmii.speed10000; break; case SPEED_2500: tx |= hw->link.speed2500; @@ -310,7 +310,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw, u32 value = XGMAC_FILTER_RA; if (dev->flags & IFF_PROMISC) { - value |= XGMAC_FILTER_PR; + value |= XGMAC_FILTER_PR | XGMAC_FILTER_PCF; } else if ((dev->flags & IFF_ALLMULTI) || (netdev_mc_count(dev) > HASH_TABLE_SIZE)) { value |= XGMAC_FILTER_PM; @@ -321,6 +321,18 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw, writel(value, ioaddr + XGMAC_PACKET_FILTER); } +static void dwxgmac2_set_mac_loopback(void __iomem *ioaddr, bool enable) +{ + u32 value = readl(ioaddr + XGMAC_RX_CONFIG); + + if (enable) + value |= XGMAC_CONFIG_LM; + else + value &= ~XGMAC_CONFIG_LM; + + writel(value, ioaddr + XGMAC_RX_CONFIG); +} + const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, .set_mac = dwxgmac2_set_mac, @@ -350,6 +362,7 @@ const struct stmmac_ops dwxgmac210_ops = { .pcs_get_adv_lp = NULL, .debug = NULL, .set_filter = dwxgmac2_set_filter, + .set_mac_loopback = dwxgmac2_set_mac_loopback, }; int dwxgmac2_setup(struct stmmac_priv *priv) @@ -368,11 +381,13 @@ int dwxgmac2_setup(struct stmmac_priv *priv) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); mac->link.duplex = 0; - mac->link.speed10 = 0; - mac->link.speed100 = 0; - mac->link.speed1000 = XGMAC_CONFIG_SS_1000; - mac->link.speed2500 = XGMAC_CONFIG_SS_2500; - mac->link.speed10000 = XGMAC_CONFIG_SS_10000; + mac->link.speed10 = XGMAC_CONFIG_SS_10_MII; + mac->link.speed100 = XGMAC_CONFIG_SS_100_MII; + mac->link.speed1000 = XGMAC_CONFIG_SS_1000_GMII; + mac->link.speed2500 = XGMAC_CONFIG_SS_2500_GMII; + mac->link.xgmii.speed2500 = XGMAC_CONFIG_SS_2500; + mac->link.xgmii.speed5000 = XGMAC_CONFIG_SS_5000; + mac->link.xgmii.speed10000 = XGMAC_CONFIG_SS_10000; mac->link.speed_mask = XGMAC_CONFIG_SS_MASK; mac->mii.addr = XGMAC_MDIO_ADDR; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 98fa471da7c0..c4c45402b8f8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -242,8 +242,8 @@ static void dwxgmac2_get_addr(struct dma_desc *p, unsigned int *addr) static void dwxgmac2_set_addr(struct dma_desc *p, dma_addr_t addr) { - p->des0 = cpu_to_le32(addr); - p->des1 = 0; + p->des0 = cpu_to_le32(lower_32_bits(addr)); + p->des1 = cpu_to_le32(upper_32_bits(addr)); } static void dwxgmac2_clear(struct dma_desc *p) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index e79037f511e1..a4f236e3593e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -27,7 +27,7 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr, if (dma_cfg->aal) value |= XGMAC_AAL; - writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); + writel(value | XGMAC_EAME, ioaddr + XGMAC_DMA_SYSBUS_MODE); } static void dwxgmac2_dma_init_chan(void __iomem *ioaddr, @@ -44,7 +44,7 @@ static void dwxgmac2_dma_init_chan(void __iomem *ioaddr, static void dwxgmac2_dma_init_rx_chan(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_rx_phy, u32 chan) + dma_addr_t phy, u32 chan) { u32 rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl; u32 value; @@ -54,12 +54,13 @@ static void dwxgmac2_dma_init_rx_chan(void __iomem *ioaddr, value |= (rxpbl << XGMAC_RxPBL_SHIFT) & XGMAC_RxPBL; writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); - writel(dma_rx_phy, ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); + writel(upper_32_bits(phy), ioaddr + XGMAC_DMA_CH_RxDESC_HADDR(chan)); + writel(lower_32_bits(phy), ioaddr + XGMAC_DMA_CH_RxDESC_LADDR(chan)); } static void dwxgmac2_dma_init_tx_chan(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx_phy, u32 chan) + dma_addr_t phy, u32 chan) { u32 txpbl = dma_cfg->txpbl ?: dma_cfg->pbl; u32 value; @@ -70,7 +71,8 @@ static void dwxgmac2_dma_init_tx_chan(void __iomem *ioaddr, value |= XGMAC_OSP; writel(value, ioaddr + XGMAC_DMA_CH_TX_CONTROL(chan)); - writel(dma_tx_phy, ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); + writel(upper_32_bits(phy), ioaddr + XGMAC_DMA_CH_TxDESC_HADDR(chan)); + writel(lower_32_bits(phy), ioaddr + XGMAC_DMA_CH_TxDESC_LADDR(chan)); } static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) @@ -91,11 +93,11 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) value |= (axi->axi_rd_osr_lmt << XGMAC_RD_OSR_LMT_SHIFT) & XGMAC_RD_OSR_LMT; + if (!axi->axi_fb) + value |= XGMAC_UNDEF; + value &= ~XGMAC_BLEN; for (i = 0; i < AXI_BLEN; i++) { - if (axi->axi_blen[i]) - value &= ~XGMAC_UNDEF; - switch (axi->axi_blen[i]) { case 256: value |= XGMAC_BLEN256; @@ -122,6 +124,8 @@ static void dwxgmac2_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi) } writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); + writel(XGMAC_TDPS, ioaddr + XGMAC_TX_EDMA_CTRL); + writel(XGMAC_RDPS, ioaddr + XGMAC_RX_EDMA_CTRL); } static void dwxgmac2_dma_rx_mode(void __iomem *ioaddr, int mode, @@ -299,10 +303,6 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan) value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); value &= ~XGMAC_RXST; writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan)); - - value = readl(ioaddr + XGMAC_RX_CONFIG); - value &= ~XGMAC_CONFIG_RE; - writel(value, ioaddr + XGMAC_RX_CONFIG); } static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, @@ -363,6 +363,23 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, /* MAC HW feature 1 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1); dma_cap->tsoen = (hw_cap & XGMAC_HWFEAT_TSOEN) >> 18; + + dma_cap->addr64 = (hw_cap & XGMAC_HWFEAT_ADDR64) >> 14; + switch (dma_cap->addr64) { + case 0: + dma_cap->addr64 = 32; + break; + case 1: + dma_cap->addr64 = 40; + break; + case 2: + dma_cap->addr64 = 48; + break; + default: + dma_cap->addr64 = 32; + break; + } + dma_cap->tx_fifo_size = 128 << ((hw_cap & XGMAC_HWFEAT_TXFIFOSIZE) >> 6); dma_cap->rx_fifo_size = diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 81b966a8261b..6c61b753b55e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -81,6 +81,7 @@ static const struct stmmac_hwif_entry { const void *hwtimestamp; const void *mode; const void *tc; + const void *mmc; int (*setup)(struct stmmac_priv *priv); int (*quirks)(struct stmmac_priv *priv); } stmmac_hw[] = { @@ -100,6 +101,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .mode = NULL, .tc = NULL, + .mmc = &dwmac_mmc_ops, .setup = dwmac100_setup, .quirks = stmmac_dwmac1_quirks, }, { @@ -117,6 +119,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .mode = NULL, .tc = NULL, + .mmc = &dwmac_mmc_ops, .setup = dwmac1000_setup, .quirks = stmmac_dwmac1_quirks, }, { @@ -134,6 +137,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .mode = NULL, .tc = &dwmac510_tc_ops, + .mmc = &dwmac_mmc_ops, .setup = dwmac4_setup, .quirks = stmmac_dwmac4_quirks, }, { @@ -151,6 +155,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .mode = &dwmac4_ring_mode_ops, .tc = &dwmac510_tc_ops, + .mmc = &dwmac_mmc_ops, .setup = dwmac4_setup, .quirks = NULL, }, { @@ -168,6 +173,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .mode = &dwmac4_ring_mode_ops, .tc = &dwmac510_tc_ops, + .mmc = &dwmac_mmc_ops, .setup = dwmac4_setup, .quirks = NULL, }, { @@ -185,6 +191,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .mode = &dwmac4_ring_mode_ops, .tc = &dwmac510_tc_ops, + .mmc = &dwmac_mmc_ops, .setup = dwmac4_setup, .quirks = NULL, }, { @@ -202,6 +209,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .mode = NULL, .tc = &dwmac510_tc_ops, + .mmc = NULL, .setup = dwxgmac2_setup, .quirks = NULL, }, @@ -267,6 +275,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv) mac->ptp = mac->ptp ? : entry->hwtimestamp; mac->mode = mac->mode ? : entry->mode; mac->tc = mac->tc ? : entry->tc; + mac->mmc = mac->mmc ? : entry->mmc; priv->hw = mac; priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 5bb00234d961..278c0dbec9d9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -6,6 +6,7 @@ #define __STMMAC_HWIF_H__ #include <linux/netdevice.h> +#include <linux/stmmac.h> #define stmmac_do_void_callback(__priv, __module, __cname, __arg0, __args...) \ ({ \ @@ -149,10 +150,10 @@ struct stmmac_dma_ops { struct stmmac_dma_cfg *dma_cfg, u32 chan); void (*init_rx_chan)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_rx_phy, u32 chan); + dma_addr_t phy, u32 chan); void (*init_tx_chan)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, - u32 dma_tx_phy, u32 chan); + dma_addr_t phy, u32 chan); /* Configure the AXI Bus Mode Register */ void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi); /* Dump DMA registers */ @@ -324,6 +325,8 @@ struct stmmac_ops { int (*flex_pps_config)(void __iomem *ioaddr, int index, struct stmmac_pps_cfg *cfg, bool enable, u32 sub_second_inc, u32 systime_flags); + /* Loopback for selftests */ + void (*set_mac_loopback)(void __iomem *ioaddr, bool enable); }; #define stmmac_core_init(__priv, __args...) \ @@ -392,6 +395,8 @@ struct stmmac_ops { stmmac_do_callback(__priv, mac, rxp_config, __args) #define stmmac_flex_pps_config(__priv, __args...) \ stmmac_do_callback(__priv, mac, flex_pps_config, __args) +#define stmmac_set_mac_loopback(__priv, __args...) \ + stmmac_do_void_callback(__priv, mac, set_mac_loopback, __args) /* PTP and HW Timer helpers */ struct stmmac_hwtimestamp { @@ -464,6 +469,21 @@ struct stmmac_tc_ops { #define stmmac_tc_setup_cbs(__priv, __args...) \ stmmac_do_callback(__priv, tc, setup_cbs, __args) +struct stmmac_counters; + +struct stmmac_mmc_ops { + void (*ctrl)(void __iomem *ioaddr, unsigned int mode); + void (*intr_all_mask)(void __iomem *ioaddr); + void (*read)(void __iomem *ioaddr, struct stmmac_counters *mmc); +}; + +#define stmmac_mmc_ctrl(__priv, __args...) \ + stmmac_do_void_callback(__priv, mmc, ctrl, __args) +#define stmmac_mmc_intr_all_mask(__priv, __args...) \ + stmmac_do_void_callback(__priv, mmc, intr_all_mask, __args) +#define stmmac_mmc_read(__priv, __args...) \ + stmmac_do_void_callback(__priv, mmc, read, __args) + struct stmmac_regs_off { u32 ptp_off; u32 mmc_off; @@ -482,6 +502,7 @@ extern const struct stmmac_tc_ops dwmac510_tc_ops; extern const struct stmmac_ops dwxgmac210_ops; extern const struct stmmac_dma_ops dwxgmac210_dma_ops; extern const struct stmmac_desc_ops dwxgmac210_desc_ops; +extern const struct stmmac_mmc_ops dwmac_mmc_ops; #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ #define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */ diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h index 6c8fdee3b25a..3587ceb9faf5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc.h +++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h @@ -118,8 +118,4 @@ struct stmmac_counters { unsigned int mmc_rx_icmp_err_octets; }; -void dwmac_mmc_ctrl(void __iomem *ioaddr, unsigned int mode); -void dwmac_mmc_intr_all_mask(void __iomem *ioaddr); -void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc); - #endif /* __MMC_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c index 1d967b8f91a0..a471db6d7b11 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/io.h> +#include "hwif.h" #include "mmc.h" /* MAC Management Counters register offset */ @@ -118,7 +119,7 @@ #define MMC_RX_ICMP_GD_OCTETS 0x180 #define MMC_RX_ICMP_ERR_OCTETS 0x184 -void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode) +static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode) { u32 value = readl(mmcaddr + MMC_CNTRL); @@ -131,7 +132,7 @@ void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode) } /* To mask all all interrupts.*/ -void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr) +static void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr) { writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK); writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK); @@ -143,7 +144,7 @@ void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr) * counter after a read. So all the field of the mmc struct * have to be incremented. */ -void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) +static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) { mmc->mmc_tx_octetcount_gb += readl(mmcaddr + MMC_TX_OCTETCOUNT_GB); mmc->mmc_tx_framecount_gb += readl(mmcaddr + MMC_TX_FRAMECOUNT_GB); @@ -256,3 +257,9 @@ void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc) mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS); mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS); } + +const struct stmmac_mmc_ops dwmac_mmc_ops = { + .ctrl = dwmac_mmc_ctrl, + .intr_all_mask = dwmac_mmc_intr_all_mask, + .read = dwmac_mmc_read, +}; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 62a64356ad22..5cd966c154f3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -14,12 +14,13 @@ #include <linux/clk.h> #include <linux/stmmac.h> -#include <linux/phy.h> +#include <linux/phylink.h> #include <linux/pci.h> #include "common.h" #include <linux/ptp_clock_kernel.h> #include <linux/net_tstamp.h> #include <linux/reset.h> +#include <net/page_pool.h> struct stmmac_resources { void __iomem *addr; @@ -54,13 +55,19 @@ struct stmmac_tx_queue { u32 mss; }; +struct stmmac_rx_buffer { + struct page *page; + dma_addr_t addr; +}; + struct stmmac_rx_queue { + u32 rx_count_frames; u32 queue_index; + struct page_pool *page_pool; + struct stmmac_rx_buffer *buf_pool; struct stmmac_priv *priv_data; struct dma_extended_desc *dma_erx; struct dma_desc *dma_rx ____cacheline_aligned_in_smp; - struct sk_buff **rx_skbuff; - dma_addr_t *rx_skbuff_dma; unsigned int cur_rx; unsigned int dirty_rx; u32 rx_zeroc_thresh; @@ -110,6 +117,7 @@ struct stmmac_priv { /* Frequently used values are kept adjacent for cache effect */ u32 tx_coal_frames; u32 tx_coal_timer; + u32 rx_coal_frames; int tx_coalesce; int hwts_tx_en; @@ -137,14 +145,15 @@ struct stmmac_priv { /* Generic channel for NAPI */ struct stmmac_channel channel[STMMAC_CH_MAX]; - bool oldlink; int speed; - int oldduplex; unsigned int flow_ctrl; unsigned int pause; struct mii_bus *mii; int mii_irq[PHY_MAX_ADDR]; + struct phylink_config phylink_config; + struct phylink *phylink; + struct stmmac_extra_stats xstats ____cacheline_aligned_in_smp; struct stmmac_safety_stats sstats; struct plat_stmmacenet_data *plat; @@ -219,4 +228,26 @@ int stmmac_dvr_probe(struct device *device, void stmmac_disable_eee_mode(struct stmmac_priv *priv); bool stmmac_eee_init(struct stmmac_priv *priv); +#if IS_ENABLED(CONFIG_STMMAC_SELFTESTS) +void stmmac_selftest_run(struct net_device *dev, + struct ethtool_test *etest, u64 *buf); +void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data); +int stmmac_selftest_get_count(struct stmmac_priv *priv); +#else +static inline void stmmac_selftest_run(struct net_device *dev, + struct ethtool_test *etest, u64 *buf) +{ + /* Not enabled */ +} +static inline void stmmac_selftest_get_strings(struct stmmac_priv *priv, + u8 *data) +{ + /* Not enabled */ +} +static inline int stmmac_selftest_get_count(struct stmmac_priv *priv) +{ + return -EOPNOTSUPP; +} +#endif /* CONFIG_STMMAC_SELFTESTS */ + #endif /* __STMMAC_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index e7af3dc3dd8f..6efb66820d4c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -12,7 +12,7 @@ #include <linux/ethtool.h> #include <linux/interrupt.h> #include <linux/mii.h> -#include <linux/phy.h> +#include <linux/phylink.h> #include <linux/net_tstamp.h> #include <asm/io.h> @@ -264,7 +264,6 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *cmd) { struct stmmac_priv *priv = netdev_priv(dev); - struct phy_device *phy = dev->phydev; if (priv->hw->pcs & STMMAC_PCS_RGMII || priv->hw->pcs & STMMAC_PCS_SGMII) { @@ -343,18 +342,7 @@ static int stmmac_ethtool_get_link_ksettings(struct net_device *dev, return 0; } - if (phy == NULL) { - pr_err("%s: %s: PHY is not registered\n", - __func__, dev->name); - return -ENODEV; - } - if (!netif_running(dev)) { - pr_err("%s: interface is disabled: we cannot track " - "link speed / duplex setting\n", dev->name); - return -EBUSY; - } - phy_ethtool_ksettings_get(phy, cmd); - return 0; + return phylink_ethtool_ksettings_get(priv->phylink, cmd); } static int @@ -362,8 +350,6 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd) { struct stmmac_priv *priv = netdev_priv(dev); - struct phy_device *phy = dev->phydev; - int rc; if (priv->hw->pcs & STMMAC_PCS_RGMII || priv->hw->pcs & STMMAC_PCS_SGMII) { @@ -387,9 +373,7 @@ stmmac_ethtool_set_link_ksettings(struct net_device *dev, return 0; } - rc = phy_ethtool_ksettings_set(phy, cmd); - - return rc; + return phylink_ethtool_ksettings_set(priv->phylink, cmd); } static u32 stmmac_ethtool_getmsglevel(struct net_device *dev) @@ -433,6 +417,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev, NUM_DWMAC1000_DMA_REGS * 4); } +static int stmmac_nway_reset(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + return phylink_ethtool_nway_reset(priv->phylink); +} + static void stmmac_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) @@ -440,28 +431,13 @@ stmmac_get_pauseparam(struct net_device *netdev, struct stmmac_priv *priv = netdev_priv(netdev); struct rgmii_adv adv_lp; - pause->rx_pause = 0; - pause->tx_pause = 0; - if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) { pause->autoneg = 1; if (!adv_lp.pause) return; } else { - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, - netdev->phydev->supported) || - !linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - netdev->phydev->supported)) - return; + phylink_ethtool_get_pauseparam(priv->phylink, pause); } - - pause->autoneg = netdev->phydev->autoneg; - - if (priv->flow_ctrl & FLOW_RX) - pause->rx_pause = 1; - if (priv->flow_ctrl & FLOW_TX) - pause->tx_pause = 1; - } static int @@ -469,39 +445,16 @@ stmmac_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct stmmac_priv *priv = netdev_priv(netdev); - u32 tx_cnt = priv->plat->tx_queues_to_use; - struct phy_device *phy = netdev->phydev; - int new_pause = FLOW_OFF; struct rgmii_adv adv_lp; if (priv->hw->pcs && !stmmac_pcs_get_adv_lp(priv, priv->ioaddr, &adv_lp)) { pause->autoneg = 1; if (!adv_lp.pause) return -EOPNOTSUPP; + return 0; } else { - if (!linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phy->supported) || - !linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phy->supported)) - return -EOPNOTSUPP; - } - - if (pause->rx_pause) - new_pause |= FLOW_RX; - if (pause->tx_pause) - new_pause |= FLOW_TX; - - priv->flow_ctrl = new_pause; - phy->autoneg = pause->autoneg; - - if (phy->autoneg) { - if (netif_running(netdev)) - return phy_start_aneg(phy); + return phylink_ethtool_set_pauseparam(priv->phylink, pause); } - - stmmac_flow_ctrl(priv, priv->hw, phy->duplex, priv->flow_ctrl, - priv->pause, tx_cnt); - return 0; } static void stmmac_get_ethtool_stats(struct net_device *dev, @@ -527,7 +480,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, if (ret) { /* If supported, for new GMAC chips expose the MMC counters */ if (priv->dma_cap.rmon) { - dwmac_mmc_read(priv->mmcaddr, &priv->mmc); + stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc); for (i = 0; i < STMMAC_MMC_STATS_LEN; i++) { char *p; @@ -539,7 +492,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev, } } if (priv->eee_enabled) { - int val = phy_get_eee_err(dev->phydev); + int val = phylink_get_eee_err(priv->phylink); if (val) priv->xstats.phy_eee_wakeup_error_n = val; } @@ -579,6 +532,8 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset) } return len; + case ETH_SS_TEST: + return stmmac_selftest_get_count(priv); default: return -EOPNOTSUPP; } @@ -615,6 +570,9 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data) p += ETH_GSTRING_LEN; } break; + case ETH_SS_TEST: + stmmac_selftest_get_strings(priv, p); + break; default: WARN_ON(1); break; @@ -679,7 +637,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, edata->eee_active = priv->eee_active; edata->tx_lpi_timer = priv->tx_lpi_timer; - return phy_ethtool_get_eee(dev->phydev, edata); + return phylink_ethtool_get_eee(priv->phylink, edata); } static int stmmac_ethtool_op_set_eee(struct net_device *dev, @@ -700,7 +658,7 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, return -EOPNOTSUPP; } - ret = phy_ethtool_set_eee(dev->phydev, edata); + ret = phylink_ethtool_set_eee(priv->phylink, edata); if (ret) return ret; @@ -743,8 +701,10 @@ static int stmmac_get_coalesce(struct net_device *dev, ec->tx_coalesce_usecs = priv->tx_coal_timer; ec->tx_max_coalesced_frames = priv->tx_coal_frames; - if (priv->use_riwt) + if (priv->use_riwt) { + ec->rx_max_coalesced_frames = priv->rx_coal_frames; ec->rx_coalesce_usecs = stmmac_riwt2usec(priv->rx_riwt, priv); + } return 0; } @@ -757,7 +717,7 @@ static int stmmac_set_coalesce(struct net_device *dev, unsigned int rx_riwt; /* Check not supported parameters */ - if ((ec->rx_max_coalesced_frames) || (ec->rx_coalesce_usecs_irq) || + if ((ec->rx_coalesce_usecs_irq) || (ec->rx_max_coalesced_frames_irq) || (ec->tx_coalesce_usecs_irq) || (ec->use_adaptive_rx_coalesce) || (ec->use_adaptive_tx_coalesce) || (ec->pkt_rate_low) || (ec->rx_coalesce_usecs_low) || @@ -791,6 +751,7 @@ static int stmmac_set_coalesce(struct net_device *dev, /* Only copy relevant parameters, ignore all others. */ priv->tx_coal_frames = ec->tx_max_coalesced_frames; priv->tx_coal_timer = ec->tx_coalesce_usecs; + priv->rx_coal_frames = ec->rx_max_coalesced_frames; priv->rx_riwt = rx_riwt; stmmac_rx_watchdog(priv, priv->ioaddr, priv->rx_riwt, rx_cnt); @@ -877,9 +838,10 @@ static const struct ethtool_ops stmmac_ethtool_ops = { .get_regs = stmmac_ethtool_gregs, .get_regs_len = stmmac_ethtool_get_regs_len, .get_link = ethtool_op_get_link, - .nway_reset = phy_ethtool_nway_reset, + .nway_reset = stmmac_nway_reset, .get_pauseparam = stmmac_get_pauseparam, .set_pauseparam = stmmac_set_pauseparam, + .self_test = stmmac_selftest_run, .get_ethtool_stats = stmmac_get_ethtool_stats, .get_strings = stmmac_get_strings, .get_wol = stmmac_get_wol, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 06358fe5b245..c7c9e5f162e6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -35,6 +35,7 @@ #include <linux/seq_file.h> #endif /* CONFIG_DEBUG_FS */ #include <linux/net_tstamp.h> +#include <linux/phylink.h> #include <net/pkt_cls.h> #include "stmmac_ptp.h" #include "stmmac.h" @@ -318,21 +319,6 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue) } /** - * stmmac_hw_fix_mac_speed - callback for speed selection - * @priv: driver private structure - * Description: on some platforms (e.g. ST), some HW system configuration - * registers have to be set according to the link speed negotiated. - */ -static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv) -{ - struct net_device *ndev = priv->dev; - struct phy_device *phydev = ndev->phydev; - - if (likely(priv->plat->fix_mac_speed)) - priv->plat->fix_mac_speed(priv->plat->bsp_priv, phydev->speed); -} - -/** * stmmac_enable_eee_mode - check and enter in LPI mode * @priv: driver private structure * Description: this function is to verify and enter in LPI mode in case of @@ -395,14 +381,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) */ bool stmmac_eee_init(struct stmmac_priv *priv) { - struct net_device *ndev = priv->dev; - int interface = priv->plat->interface; - bool ret = false; - - if ((interface != PHY_INTERFACE_MODE_MII) && - (interface != PHY_INTERFACE_MODE_GMII) && - !phy_interface_mode_is_rgmii(interface)) - goto out; + int tx_lpi_timer = priv->tx_lpi_timer; /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. @@ -410,52 +389,35 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if ((priv->hw->pcs == STMMAC_PCS_RGMII) || (priv->hw->pcs == STMMAC_PCS_TBI) || (priv->hw->pcs == STMMAC_PCS_RTBI)) - goto out; - - /* MAC core supports the EEE feature. */ - if (priv->dma_cap.eee) { - int tx_lpi_timer = priv->tx_lpi_timer; - - /* Check if the PHY supports EEE */ - if (phy_init_eee(ndev->phydev, 1)) { - /* To manage at run-time if the EEE cannot be supported - * anymore (for example because the lp caps have been - * changed). - * In that case the driver disable own timers. - */ - mutex_lock(&priv->lock); - if (priv->eee_active) { - netdev_dbg(priv->dev, "disable EEE\n"); - del_timer_sync(&priv->eee_ctrl_timer); - stmmac_set_eee_timer(priv, priv->hw, 0, - tx_lpi_timer); - } - priv->eee_active = 0; - mutex_unlock(&priv->lock); - goto out; - } - /* Activate the EEE and start timers */ - mutex_lock(&priv->lock); - if (!priv->eee_active) { - priv->eee_active = 1; - timer_setup(&priv->eee_ctrl_timer, - stmmac_eee_ctrl_timer, 0); - mod_timer(&priv->eee_ctrl_timer, - STMMAC_LPI_T(eee_timer)); - - stmmac_set_eee_timer(priv, priv->hw, - STMMAC_DEFAULT_LIT_LS, tx_lpi_timer); - } - /* Set HW EEE according to the speed */ - stmmac_set_eee_pls(priv, priv->hw, ndev->phydev->link); + return false; + + /* Check if MAC core supports the EEE feature. */ + if (!priv->dma_cap.eee) + return false; - ret = true; + mutex_lock(&priv->lock); + + /* Check if it needs to be deactivated */ + if (!priv->eee_active) { + if (priv->eee_enabled) { + netdev_dbg(priv->dev, "disable EEE\n"); + del_timer_sync(&priv->eee_ctrl_timer); + stmmac_set_eee_timer(priv, priv->hw, 0, tx_lpi_timer); + } mutex_unlock(&priv->lock); + return false; + } - netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); + if (priv->eee_active && !priv->eee_enabled) { + timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0); + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); + stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, + tx_lpi_timer); } -out: - return ret; + + mutex_unlock(&priv->lock); + netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); + return true; } /* stmmac_get_tx_hwtstamp - get HW TX timestamps @@ -838,97 +800,171 @@ static void stmmac_mac_flow_ctrl(struct stmmac_priv *priv, u32 duplex) priv->pause, tx_cnt); } -/** - * stmmac_adjust_link - adjusts the link parameters - * @dev: net device structure - * Description: this is the helper called by the physical abstraction layer - * drivers to communicate the phy link status. According the speed and duplex - * this driver can invoke registered glue-logic as well. - * It also invoke the eee initialization because it could happen when switch - * on different networks (that are eee capable). - */ -static void stmmac_adjust_link(struct net_device *dev) +static void stmmac_validate(struct phylink_config *config, + unsigned long *supported, + struct phylink_link_state *state) { - struct stmmac_priv *priv = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - bool new_state = false; - - if (!phydev) - return; + struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + __ETHTOOL_DECLARE_LINK_MODE_MASK(mac_supported) = { 0, }; + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + int tx_cnt = priv->plat->tx_queues_to_use; + int max_speed = priv->plat->max_speed; - mutex_lock(&priv->lock); + phylink_set(mac_supported, 10baseT_Half); + phylink_set(mac_supported, 10baseT_Full); + phylink_set(mac_supported, 100baseT_Half); + phylink_set(mac_supported, 100baseT_Full); + + phylink_set(mac_supported, Autoneg); + phylink_set(mac_supported, Pause); + phylink_set(mac_supported, Asym_Pause); + phylink_set_port_modes(mac_supported); + + if (priv->plat->has_gmac || + priv->plat->has_gmac4 || + priv->plat->has_xgmac) { + phylink_set(mac_supported, 1000baseT_Half); + phylink_set(mac_supported, 1000baseT_Full); + phylink_set(mac_supported, 1000baseKX_Full); + } + + /* Cut down 1G if asked to */ + if ((max_speed > 0) && (max_speed < 1000)) { + phylink_set(mask, 1000baseT_Full); + phylink_set(mask, 1000baseX_Full); + } else if (priv->plat->has_xgmac) { + phylink_set(mac_supported, 2500baseT_Full); + phylink_set(mac_supported, 5000baseT_Full); + phylink_set(mac_supported, 10000baseSR_Full); + phylink_set(mac_supported, 10000baseLR_Full); + phylink_set(mac_supported, 10000baseER_Full); + phylink_set(mac_supported, 10000baseLRM_Full); + phylink_set(mac_supported, 10000baseT_Full); + phylink_set(mac_supported, 10000baseKX4_Full); + phylink_set(mac_supported, 10000baseKR_Full); + } + + /* Half-Duplex can only work with single queue */ + if (tx_cnt > 1) { + phylink_set(mask, 10baseT_Half); + phylink_set(mask, 100baseT_Half); + phylink_set(mask, 1000baseT_Half); + } + + bitmap_and(supported, supported, mac_supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_andnot(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mac_supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_andnot(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} - if (phydev->link) { - u32 ctrl = readl(priv->ioaddr + MAC_CTRL_REG); +static int stmmac_mac_link_state(struct phylink_config *config, + struct phylink_link_state *state) +{ + return -EOPNOTSUPP; +} - /* Now we make sure that we can be in full duplex mode. - * If not, we operate in half-duplex mode. */ - if (phydev->duplex != priv->oldduplex) { - new_state = true; - if (!phydev->duplex) - ctrl &= ~priv->hw->link.duplex; - else - ctrl |= priv->hw->link.duplex; - priv->oldduplex = phydev->duplex; - } - /* Flow Control operation */ - if (phydev->pause) - stmmac_mac_flow_ctrl(priv, phydev->duplex); - - if (phydev->speed != priv->speed) { - new_state = true; - ctrl &= ~priv->hw->link.speed_mask; - switch (phydev->speed) { - case SPEED_1000: - ctrl |= priv->hw->link.speed1000; - break; - case SPEED_100: - ctrl |= priv->hw->link.speed100; - break; - case SPEED_10: - ctrl |= priv->hw->link.speed10; - break; - default: - netif_warn(priv, link, priv->dev, - "broken speed: %d\n", phydev->speed); - phydev->speed = SPEED_UNKNOWN; - break; - } - if (phydev->speed != SPEED_UNKNOWN) - stmmac_hw_fix_mac_speed(priv); - priv->speed = phydev->speed; - } +static void stmmac_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ + struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + u32 ctrl; - writel(ctrl, priv->ioaddr + MAC_CTRL_REG); + ctrl = readl(priv->ioaddr + MAC_CTRL_REG); + ctrl &= ~priv->hw->link.speed_mask; - if (!priv->oldlink) { - new_state = true; - priv->oldlink = true; + if (state->interface == PHY_INTERFACE_MODE_USXGMII) { + switch (state->speed) { + case SPEED_10000: + ctrl |= priv->hw->link.xgmii.speed10000; + break; + case SPEED_5000: + ctrl |= priv->hw->link.xgmii.speed5000; + break; + case SPEED_2500: + ctrl |= priv->hw->link.xgmii.speed2500; + break; + default: + return; + } + } else { + switch (state->speed) { + case SPEED_2500: + ctrl |= priv->hw->link.speed2500; + break; + case SPEED_1000: + ctrl |= priv->hw->link.speed1000; + break; + case SPEED_100: + ctrl |= priv->hw->link.speed100; + break; + case SPEED_10: + ctrl |= priv->hw->link.speed10; + break; + default: + return; } - } else if (priv->oldlink) { - new_state = true; - priv->oldlink = false; - priv->speed = SPEED_UNKNOWN; - priv->oldduplex = DUPLEX_UNKNOWN; } - if (new_state && netif_msg_link(priv)) - phy_print_status(phydev); + priv->speed = state->speed; - mutex_unlock(&priv->lock); + if (priv->plat->fix_mac_speed) + priv->plat->fix_mac_speed(priv->plat->bsp_priv, state->speed); - if (phydev->is_pseudo_fixed_link) - /* Stop PHY layer to call the hook to adjust the link in case - * of a switch is attached to the stmmac driver. - */ - phydev->irq = PHY_IGNORE_INTERRUPT; + if (!state->duplex) + ctrl &= ~priv->hw->link.duplex; else - /* At this stage, init the EEE if supported. - * Never called in case of fixed_link. - */ + ctrl |= priv->hw->link.duplex; + + /* Flow Control operation */ + if (state->pause) + stmmac_mac_flow_ctrl(priv, state->duplex); + + writel(ctrl, priv->ioaddr + MAC_CTRL_REG); +} + +static void stmmac_mac_an_restart(struct phylink_config *config) +{ + /* Not Supported */ +} + +static void stmmac_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) +{ + struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + + stmmac_mac_set(priv, priv->ioaddr, false); + priv->eee_active = false; + stmmac_eee_init(priv); + stmmac_set_eee_pls(priv, priv->hw, false); +} + +static void stmmac_mac_link_up(struct phylink_config *config, + unsigned int mode, phy_interface_t interface, + struct phy_device *phy) +{ + struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + + stmmac_mac_set(priv, priv->ioaddr, true); + if (phy && priv->dma_cap.eee) { + priv->eee_active = phy_init_eee(phy, 1) >= 0; priv->eee_enabled = stmmac_eee_init(priv); + stmmac_set_eee_pls(priv, priv->hw, true); + } } +static const struct phylink_mac_ops stmmac_phylink_mac_ops = { + .validate = stmmac_validate, + .mac_link_state = stmmac_mac_link_state, + .mac_config = stmmac_mac_config, + .mac_an_restart = stmmac_mac_an_restart, + .mac_link_down = stmmac_mac_link_down, + .mac_link_up = stmmac_mac_link_up, +}; + /** * stmmac_check_pcs_mode - verify if RGMII/SGMII is supported * @priv: driver private structure @@ -965,79 +1001,48 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); - u32 tx_cnt = priv->plat->tx_queues_to_use; - struct phy_device *phydev; - char phy_id_fmt[MII_BUS_ID_SIZE + 3]; - char bus_id[MII_BUS_ID_SIZE]; - int interface = priv->plat->interface; - int max_speed = priv->plat->max_speed; - priv->oldlink = false; - priv->speed = SPEED_UNKNOWN; - priv->oldduplex = DUPLEX_UNKNOWN; + struct device_node *node; + int ret; - if (priv->plat->phy_node) { - phydev = of_phy_connect(dev, priv->plat->phy_node, - &stmmac_adjust_link, 0, interface); - } else { - snprintf(bus_id, MII_BUS_ID_SIZE, "stmmac-%x", - priv->plat->bus_id); + node = priv->plat->phylink_node; - snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id, - priv->plat->phy_addr); - netdev_dbg(priv->dev, "%s: trying to attach to %s\n", __func__, - phy_id_fmt); + if (node) + ret = phylink_of_phy_connect(priv->phylink, node, 0); - phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link, - interface); - } + /* Some DT bindings do not set-up the PHY handle. Let's try to + * manually parse it + */ + if (!node || ret) { + int addr = priv->plat->phy_addr; + struct phy_device *phydev; - if (IS_ERR_OR_NULL(phydev)) { - netdev_err(priv->dev, "Could not attach to PHY\n"); - if (!phydev) + phydev = mdiobus_get_phy(priv->mii, addr); + if (!phydev) { + netdev_err(priv->dev, "no phy at addr %d\n", addr); return -ENODEV; + } - return PTR_ERR(phydev); + ret = phylink_connect_phy(priv->phylink, phydev); } - /* Stop Advertising 1000BASE Capability if interface is not GMII */ - if ((interface == PHY_INTERFACE_MODE_MII) || - (interface == PHY_INTERFACE_MODE_RMII) || - (max_speed < 1000 && max_speed > 0)) - phy_set_max_speed(phydev, SPEED_100); + return ret; +} - /* - * Half-duplex mode not supported with multiqueue - * half-duplex can only works with single queue - */ - if (tx_cnt > 1) { - phy_remove_link_mode(phydev, - ETHTOOL_LINK_MODE_10baseT_Half_BIT); - phy_remove_link_mode(phydev, - ETHTOOL_LINK_MODE_100baseT_Half_BIT); - phy_remove_link_mode(phydev, - ETHTOOL_LINK_MODE_1000baseT_Half_BIT); - } +static int stmmac_phy_setup(struct stmmac_priv *priv) +{ + struct fwnode_handle *fwnode = of_fwnode_handle(priv->plat->phylink_node); + int mode = priv->plat->interface; + struct phylink *phylink; - /* - * Broken HW is sometimes missing the pull-up resistor on the - * MDIO line, which results in reads to non-existent devices returning - * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent - * device as well. - * Note: phydev->phy_id is the result of reading the UID PHY registers. - */ - if (!priv->plat->phy_node && phydev->phy_id == 0) { - phy_disconnect(phydev); - return -ENODEV; - } + priv->phylink_config.dev = &priv->dev->dev; + priv->phylink_config.type = PHYLINK_NETDEV; - /* stmmac_adjust_link will change this to PHY_IGNORE_INTERRUPT to avoid - * subsequent PHY polling, make sure we force a link transition if - * we have a UP/DOWN/UP transition - */ - if (phydev->is_pseudo_fixed_link) - phydev->irq = PHY_POLL; + phylink = phylink_create(&priv->phylink_config, fwnode, + mode, &stmmac_phylink_mac_ops); + if (IS_ERR(phylink)) + return PTR_ERR(phylink); - phy_attached_info(phydev); + priv->phylink = phylink; return 0; } @@ -1192,26 +1197,14 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, int i, gfp_t flags, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - struct sk_buff *skb; + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags); - if (!skb) { - netdev_err(priv->dev, - "%s: Rx init fails; skb is NULL\n", __func__); + buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + if (!buf->page) return -ENOMEM; - } - rx_q->rx_skbuff[i] = skb; - rx_q->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data, - priv->dma_buf_sz, - DMA_FROM_DEVICE); - if (dma_mapping_error(priv->device, rx_q->rx_skbuff_dma[i])) { - netdev_err(priv->dev, "%s: DMA mapping error\n", __func__); - dev_kfree_skb_any(skb); - return -EINVAL; - } - - stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[i]); + buf->addr = page_pool_get_dma_addr(buf->page); + stmmac_set_desc_addr(priv, p, buf->addr); if (priv->dma_buf_sz == BUF_SIZE_16KiB) stmmac_init_desc3(priv, p); @@ -1227,13 +1220,11 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, static void stmmac_free_rx_buffer(struct stmmac_priv *priv, u32 queue, int i) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; - if (rx_q->rx_skbuff[i]) { - dma_unmap_single(priv->device, rx_q->rx_skbuff_dma[i], - priv->dma_buf_sz, DMA_FROM_DEVICE); - dev_kfree_skb_any(rx_q->rx_skbuff[i]); - } - rx_q->rx_skbuff[i] = NULL; + if (buf->page) + page_pool_put_page(rx_q->page_pool, buf->page, false); + buf->page = NULL; } /** @@ -1316,10 +1307,6 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags) queue); if (ret) goto err_init_rx_buffers; - - netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n", - rx_q->rx_skbuff[i], rx_q->rx_skbuff[i]->data, - (unsigned int)rx_q->rx_skbuff_dma[i]); } rx_q->cur_rx = 0; @@ -1493,8 +1480,11 @@ static void free_dma_rx_desc_resources(struct stmmac_priv *priv) sizeof(struct dma_extended_desc), rx_q->dma_erx, rx_q->dma_rx_phy); - kfree(rx_q->rx_skbuff_dma); - kfree(rx_q->rx_skbuff); + kfree(rx_q->buf_pool); + if (rx_q->page_pool) { + page_pool_request_shutdown(rx_q->page_pool); + page_pool_destroy(rx_q->page_pool); + } } } @@ -1546,20 +1536,29 @@ static int alloc_dma_rx_desc_resources(struct stmmac_priv *priv) /* RX queues buffers and DMA */ for (queue = 0; queue < rx_count; queue++) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; + struct page_pool_params pp_params = { 0 }; rx_q->queue_index = queue; rx_q->priv_data = priv; - rx_q->rx_skbuff_dma = kmalloc_array(DMA_RX_SIZE, - sizeof(dma_addr_t), - GFP_KERNEL); - if (!rx_q->rx_skbuff_dma) + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = DMA_RX_SIZE; + pp_params.order = DIV_ROUND_UP(priv->dma_buf_sz, PAGE_SIZE); + pp_params.nid = dev_to_node(priv->device); + pp_params.dev = priv->device; + pp_params.dma_dir = DMA_FROM_DEVICE; + + rx_q->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rx_q->page_pool)) { + ret = PTR_ERR(rx_q->page_pool); + rx_q->page_pool = NULL; goto err_dma; + } - rx_q->rx_skbuff = kmalloc_array(DMA_RX_SIZE, - sizeof(struct sk_buff *), - GFP_KERNEL); - if (!rx_q->rx_skbuff) + rx_q->buf_pool = kmalloc_array(DMA_RX_SIZE, + sizeof(*rx_q->buf_pool), + GFP_KERNEL); + if (!rx_q->buf_pool) goto err_dma; if (priv->extend_desc) { @@ -2049,14 +2048,15 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) struct stmmac_channel *ch = &priv->channel[chan]; if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) { - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); - napi_schedule_irqoff(&ch->rx_napi); + if (napi_schedule_prep(&ch->rx_napi)) { + stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + __napi_schedule_irqoff(&ch->rx_napi); + status |= handle_tx; + } } - if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) { - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); + if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) napi_schedule_irqoff(&ch->tx_napi); - } return status; } @@ -2118,10 +2118,10 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv) unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET | MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET; - dwmac_mmc_intr_all_mask(priv->mmcaddr); + stmmac_mmc_intr_all_mask(priv, priv->mmcaddr); if (priv->dma_cap.rmon) { - dwmac_mmc_ctrl(priv->mmcaddr, mode); + stmmac_mmc_ctrl(priv, priv->mmcaddr, mode); memset(&priv->mmc, 0, sizeof(struct stmmac_counters)); } else netdev_info(priv->dev, "No MAC Management Counters available\n"); @@ -2154,8 +2154,8 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv) stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0); if (!is_valid_ether_addr(priv->dev->dev_addr)) eth_hw_addr_random(priv->dev); - netdev_info(priv->dev, "device MAC address %pM\n", - priv->dev->dev_addr); + dev_info(priv->device, "device MAC address %pM\n", + priv->dev->dev_addr); } } @@ -2262,20 +2262,21 @@ static void stmmac_tx_timer(struct timer_list *t) } /** - * stmmac_init_tx_coalesce - init tx mitigation options. + * stmmac_init_coalesce - init mitigation options. * @priv: driver private structure * Description: - * This inits the transmit coalesce parameters: i.e. timer rate, + * This inits the coalesce parameters: i.e. timer rate, * timer handler and default threshold used for enabling the * interrupt on completion bit. */ -static void stmmac_init_tx_coalesce(struct stmmac_priv *priv) +static void stmmac_init_coalesce(struct stmmac_priv *priv) { u32 tx_channel_count = priv->plat->tx_queues_to_use; u32 chan; priv->tx_coal_frames = STMMAC_TX_FRAMES; priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; + priv->rx_coal_frames = STMMAC_RX_FRAMES; for (chan = 0; chan < tx_channel_count; chan++) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; @@ -2561,9 +2562,9 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; if (priv->use_riwt) { - ret = stmmac_rx_watchdog(priv, priv->ioaddr, MAX_DMA_RIWT, rx_cnt); + ret = stmmac_rx_watchdog(priv, priv->ioaddr, MIN_DMA_RIWT, rx_cnt); if (!ret) - priv->rx_riwt = MAX_DMA_RIWT; + priv->rx_riwt = MIN_DMA_RIWT; } if (priv->hw->pcs) @@ -2645,10 +2646,9 @@ static int stmmac_open(struct net_device *dev) goto init_error; } - stmmac_init_tx_coalesce(priv); + stmmac_init_coalesce(priv); - if (dev->phydev) - phy_start(dev->phydev); + phylink_start(priv->phylink); /* Request the IRQ lines */ ret = request_irq(dev->irq, stmmac_interrupt, @@ -2695,8 +2695,7 @@ lpiirq_error: wolirq_error: free_irq(dev->irq, dev); irq_error: - if (dev->phydev) - phy_stop(dev->phydev); + phylink_stop(priv->phylink); for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) del_timer_sync(&priv->tx_queue[chan].txtimer); @@ -2705,9 +2704,7 @@ irq_error: init_error: free_dma_desc_resources(priv); dma_desc_error: - if (dev->phydev) - phy_disconnect(dev->phydev); - + phylink_disconnect_phy(priv->phylink); return ret; } @@ -2726,10 +2723,8 @@ static int stmmac_release(struct net_device *dev) del_timer_sync(&priv->eee_ctrl_timer); /* Stop and disconnect the PHY */ - if (dev->phydev) { - phy_stop(dev->phydev); - phy_disconnect(dev->phydev); - } + phylink_stop(priv->phylink); + phylink_disconnect_phy(priv->phylink); stmmac_stop_all_queues(priv); @@ -2772,7 +2767,7 @@ static int stmmac_release(struct net_device *dev) * This function fills descriptor and request new descriptors according to * buffer length to fill */ -static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des, +static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des, int total_len, bool last_segment, u32 queue) { struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; @@ -2783,11 +2778,18 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des, tmp_len = total_len; while (tmp_len > 0) { + dma_addr_t curr_addr; + tx_q->cur_tx = STMMAC_GET_ENTRY(tx_q->cur_tx, DMA_TX_SIZE); WARN_ON(tx_q->tx_skbuff[tx_q->cur_tx]); desc = tx_q->dma_tx + tx_q->cur_tx; - desc->des0 = cpu_to_le32(des + (total_len - tmp_len)); + curr_addr = des + (total_len - tmp_len); + if (priv->dma_cap.addr64 <= 32) + desc->des0 = cpu_to_le32(curr_addr); + else + stmmac_set_desc_addr(priv, desc, curr_addr); + buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ? TSO_MAX_BUFF_SIZE : tmp_len; @@ -2833,11 +2835,12 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); int nfrags = skb_shinfo(skb)->nr_frags; u32 queue = skb_get_queue_mapping(skb); - unsigned int first_entry, des; + unsigned int first_entry; struct stmmac_tx_queue *tx_q; int tmp_pay_len = 0; u32 pay_len, mss; u8 proto_hdr_len; + dma_addr_t des; int i; tx_q = &priv->tx_queue[queue]; @@ -2894,14 +2897,19 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) tx_q->tx_skbuff_dma[first_entry].buf = des; tx_q->tx_skbuff_dma[first_entry].len = skb_headlen(skb); - first->des0 = cpu_to_le32(des); + if (priv->dma_cap.addr64 <= 32) { + first->des0 = cpu_to_le32(des); - /* Fill start of payload in buff2 of first descriptor */ - if (pay_len) - first->des1 = cpu_to_le32(des + proto_hdr_len); + /* Fill start of payload in buff2 of first descriptor */ + if (pay_len) + first->des1 = cpu_to_le32(des + proto_hdr_len); - /* If needed take extra descriptors to fill the remaining payload */ - tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE; + /* If needed take extra descriptors to fill the remaining payload */ + tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE; + } else { + stmmac_set_desc_addr(priv, first, des); + tmp_pay_len = pay_len; + } stmmac_tso_allocator(priv, des, tmp_pay_len, (nfrags == 0), queue); @@ -3031,12 +3039,12 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) int i, csum_insertion = 0, is_jumbo = 0; u32 queue = skb_get_queue_mapping(skb); int nfrags = skb_shinfo(skb)->nr_frags; - int entry; - unsigned int first_entry; struct dma_desc *desc, *first; struct stmmac_tx_queue *tx_q; + unsigned int first_entry; unsigned int enh_desc; - unsigned int des; + dma_addr_t des; + int entry; tx_q = &priv->tx_queue[queue]; @@ -3045,17 +3053,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { - /* - * There is no way to determine the number of TSO - * capable Queues. Let's use always the Queue 0 - * because if TSO is supported then at least this - * one will be capable. - */ - skb_set_queue_mapping(skb, 0); - + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) return stmmac_tso_xmit(skb, dev); - } } if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { @@ -3281,59 +3280,38 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) int dirty = stmmac_rx_dirty(priv, queue); unsigned int entry = rx_q->dirty_rx; - int bfsize = priv->dma_buf_sz; - while (dirty-- > 0) { + struct stmmac_rx_buffer *buf = &rx_q->buf_pool[entry]; struct dma_desc *p; + bool use_rx_wd; if (priv->extend_desc) p = (struct dma_desc *)(rx_q->dma_erx + entry); else p = rx_q->dma_rx + entry; - if (likely(!rx_q->rx_skbuff[entry])) { - struct sk_buff *skb; - - skb = netdev_alloc_skb_ip_align(priv->dev, bfsize); - if (unlikely(!skb)) { - /* so for a while no zero-copy! */ - rx_q->rx_zeroc_thresh = STMMAC_RX_THRESH; - if (unlikely(net_ratelimit())) - dev_err(priv->device, - "fail to alloc skb entry %d\n", - entry); + if (!buf->page) { + buf->page = page_pool_dev_alloc_pages(rx_q->page_pool); + if (!buf->page) break; - } - - rx_q->rx_skbuff[entry] = skb; - rx_q->rx_skbuff_dma[entry] = - dma_map_single(priv->device, skb->data, bfsize, - DMA_FROM_DEVICE); - if (dma_mapping_error(priv->device, - rx_q->rx_skbuff_dma[entry])) { - netdev_err(priv->dev, "Rx DMA map failed\n"); - dev_kfree_skb(skb); - break; - } - - stmmac_set_desc_addr(priv, p, rx_q->rx_skbuff_dma[entry]); - stmmac_refill_desc3(priv, rx_q, p); - - if (rx_q->rx_zeroc_thresh > 0) - rx_q->rx_zeroc_thresh--; - - netif_dbg(priv, rx_status, priv->dev, - "refill entry #%d\n", entry); } - dma_wmb(); - stmmac_set_rx_owner(priv, p, priv->use_riwt); + buf->addr = page_pool_get_dma_addr(buf->page); + stmmac_set_desc_addr(priv, p, buf->addr); + stmmac_refill_desc3(priv, rx_q, p); + + rx_q->rx_count_frames++; + rx_q->rx_count_frames %= priv->rx_coal_frames; + use_rx_wd = priv->use_riwt && rx_q->rx_count_frames; dma_wmb(); + stmmac_set_rx_owner(priv, p, use_rx_wd); entry = STMMAC_GET_ENTRY(entry, DMA_RX_SIZE); } rx_q->dirty_rx = entry; + rx_q->rx_tail_addr = rx_q->dma_rx_phy + + (rx_q->dirty_rx * sizeof(struct dma_desc)); stmmac_set_rx_tail_ptr(priv, priv->ioaddr, rx_q->rx_tail_addr, queue); } @@ -3352,9 +3330,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) unsigned int next_entry = rx_q->cur_rx; int coe = priv->hw->rx_csum; unsigned int count = 0; - bool xmac; - - xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; if (netif_msg_rx_status(priv)) { void *rx_head; @@ -3368,11 +3343,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) stmmac_display_ring(priv, rx_head, DMA_RX_SIZE, true); } while (count < limit) { + struct stmmac_rx_buffer *buf; + struct dma_desc *np, *p; int entry, status; - struct dma_desc *p; - struct dma_desc *np; entry = next_entry; + buf = &rx_q->buf_pool[entry]; if (priv->extend_desc) p = (struct dma_desc *)(rx_q->dma_erx + entry); @@ -3402,20 +3378,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) stmmac_rx_extended_status(priv, &priv->dev->stats, &priv->xstats, rx_q->dma_erx + entry); if (unlikely(status == discard_frame)) { + page_pool_recycle_direct(rx_q->page_pool, buf->page); priv->dev->stats.rx_errors++; - if (priv->hwts_rx_en && !priv->extend_desc) { - /* DESC2 & DESC3 will be overwritten by device - * with timestamp value, hence reinitialize - * them in stmmac_rx_refill() function so that - * device can reuse it. - */ - dev_kfree_skb_any(rx_q->rx_skbuff[entry]); - rx_q->rx_skbuff[entry] = NULL; - dma_unmap_single(priv->device, - rx_q->rx_skbuff_dma[entry], - priv->dma_buf_sz, - DMA_FROM_DEVICE); - } + buf->page = NULL; } else { struct sk_buff *skb; int frame_len; @@ -3455,58 +3420,20 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) frame_len, status); } - /* The zero-copy is always used for all the sizes - * in case of GMAC4 because it needs - * to refill the used descriptors, always. - */ - if (unlikely(!xmac && - ((frame_len < priv->rx_copybreak) || - stmmac_rx_threshold_count(rx_q)))) { - skb = netdev_alloc_skb_ip_align(priv->dev, - frame_len); - if (unlikely(!skb)) { - if (net_ratelimit()) - dev_warn(priv->device, - "packet dropped\n"); - priv->dev->stats.rx_dropped++; - continue; - } - - dma_sync_single_for_cpu(priv->device, - rx_q->rx_skbuff_dma - [entry], frame_len, - DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, - rx_q-> - rx_skbuff[entry]->data, - frame_len); - - skb_put(skb, frame_len); - dma_sync_single_for_device(priv->device, - rx_q->rx_skbuff_dma - [entry], frame_len, - DMA_FROM_DEVICE); - } else { - skb = rx_q->rx_skbuff[entry]; - if (unlikely(!skb)) { - if (net_ratelimit()) - netdev_err(priv->dev, - "%s: Inconsistent Rx chain\n", - priv->dev->name); - priv->dev->stats.rx_dropped++; - continue; - } - prefetch(skb->data - NET_IP_ALIGN); - rx_q->rx_skbuff[entry] = NULL; - rx_q->rx_zeroc_thresh++; - - skb_put(skb, frame_len); - dma_unmap_single(priv->device, - rx_q->rx_skbuff_dma[entry], - priv->dma_buf_sz, - DMA_FROM_DEVICE); + skb = netdev_alloc_skb_ip_align(priv->dev, frame_len); + if (unlikely(!skb)) { + priv->dev->stats.rx_dropped++; + continue; } + dma_sync_single_for_cpu(priv->device, buf->addr, + frame_len, DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, page_address(buf->page), + frame_len); + skb_put(skb, frame_len); + dma_sync_single_for_device(priv->device, buf->addr, + frame_len, DMA_FROM_DEVICE); + if (netif_msg_pktdata(priv)) { netdev_dbg(priv->dev, "frame received (%dbytes)", frame_len); @@ -3526,6 +3453,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) napi_gro_receive(&ch->rx_napi, skb); + /* Data payload copied into SKB, page ready for recycle */ + page_pool_recycle_direct(rx_q->page_pool, buf->page); + buf->page = NULL; + priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += frame_len; } @@ -3568,8 +3499,8 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget) work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan); work_done = min(work_done, budget); - if (work_done < budget && napi_complete_done(napi, work_done)) - stmmac_enable_dma_irq(priv, priv->ioaddr, chan); + if (work_done < budget) + napi_complete_done(napi, work_done); /* Force transmission restart */ tx_q = &priv->tx_queue[chan]; @@ -3792,6 +3723,7 @@ static void stmmac_poll_controller(struct net_device *dev) */ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { + struct stmmac_priv *priv = netdev_priv (dev); int ret = -EOPNOTSUPP; if (!netif_running(dev)) @@ -3801,9 +3733,7 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - if (!dev->phydev) - return -EINVAL; - ret = phy_mii_ioctl(dev->phydev, rq, cmd); + ret = phylink_mii_ioctl(priv->phylink, rq, cmd); break; case SIOCSHWTSTAMP: ret = stmmac_hwtstamp_set(dev, rq); @@ -3839,23 +3769,7 @@ static int stmmac_setup_tc_block_cb(enum tc_setup_type type, void *type_data, return ret; } -static int stmmac_setup_tc_block(struct stmmac_priv *priv, - struct tc_block_offload *f) -{ - if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) - return -EOPNOTSUPP; - - switch (f->command) { - case TC_BLOCK_BIND: - return tcf_block_cb_register(f->block, stmmac_setup_tc_block_cb, - priv, priv, f->extack); - case TC_BLOCK_UNBIND: - tcf_block_cb_unregister(f->block, stmmac_setup_tc_block_cb, priv); - return 0; - default: - return -EOPNOTSUPP; - } -} +static LIST_HEAD(stmmac_block_cb_list); static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data) @@ -3864,7 +3778,10 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, switch (type) { case TC_SETUP_BLOCK: - return stmmac_setup_tc_block(priv, type_data); + return flow_block_cb_setup_simple(type_data, + &stmmac_block_cb_list, + stmmac_setup_tc_block_cb, + priv, priv, true); case TC_SETUP_QDISC_CBS: return stmmac_tc_setup_cbs(priv, priv, type_data); default: @@ -3872,6 +3789,22 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type, } } +static u16 stmmac_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + return 0; + } + + return netdev_pick_tx(dev, skb, NULL) % dev->real_num_tx_queues; +} + static int stmmac_set_mac_address(struct net_device *ndev, void *addr) { struct stmmac_priv *priv = netdev_priv(ndev); @@ -4088,6 +4021,7 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_tx_timeout = stmmac_tx_timeout, .ndo_do_ioctl = stmmac_ioctl, .ndo_setup_tc = stmmac_setup_tc, + .ndo_select_queue = stmmac_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = stmmac_poll_controller, #endif @@ -4160,6 +4094,12 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->enh_desc = priv->dma_cap.enh_desc; priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up; priv->hw->pmt = priv->plat->pmt; + if (priv->dma_cap.hash_tb_sz) { + priv->hw->multicast_filter_bins = + (BIT(priv->dma_cap.hash_tb_sz) << 5); + priv->hw->mcast_bits_log2 = + ilog2(priv->hw->multicast_filter_bins); + } /* TXCOE doesn't work in thresh DMA mode */ if (priv->plat->force_thresh_dma_mode) @@ -4237,9 +4177,8 @@ int stmmac_dvr_probe(struct device *device, u32 queue, maxq; int ret = 0; - ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv), - MTL_MAX_TX_QUEUES, - MTL_MAX_RX_QUEUES); + ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv), + MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES); if (!ndev) return -ENOMEM; @@ -4271,8 +4210,7 @@ int stmmac_dvr_probe(struct device *device, priv->wq = create_singlethread_workqueue("stmmac_wq"); if (!priv->wq) { dev_err(priv->device, "failed to create workqueue\n"); - ret = -ENOMEM; - goto error_wq; + return -ENOMEM; } INIT_WORK(&priv->service_task, stmmac_service_task); @@ -4319,6 +4257,24 @@ int stmmac_dvr_probe(struct device *device, priv->tso = true; dev_info(priv->device, "TSO feature enabled\n"); } + + if (priv->dma_cap.addr64) { + ret = dma_set_mask_and_coherent(device, + DMA_BIT_MASK(priv->dma_cap.addr64)); + if (!ret) { + dev_info(priv->device, "Using %d bits DMA width\n", + priv->dma_cap.addr64); + } else { + ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(32)); + if (ret) { + dev_err(priv->device, "Failed to set DMA Mask\n"); + goto error_hw_init; + } + + priv->dma_cap.addr64 = 32; + } + } + ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA; ndev->watchdog_timeo = msecs_to_jiffies(watchdog); #ifdef STMMAC_VLAN_TAG_USED @@ -4396,6 +4352,12 @@ int stmmac_dvr_probe(struct device *device, } } + ret = stmmac_phy_setup(priv); + if (ret) { + netdev_err(ndev, "failed to setup phy (%d)\n", ret); + goto error_phy_setup; + } + ret = register_netdev(ndev); if (ret) { dev_err(priv->device, "%s: ERROR %i registering the device\n", @@ -4413,6 +4375,8 @@ int stmmac_dvr_probe(struct device *device, return ret; error_netdev_register: + phylink_destroy(priv->phylink); +error_phy_setup: if (priv->hw->pcs != STMMAC_PCS_RGMII && priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) @@ -4428,8 +4392,6 @@ error_mdio_register: } error_hw_init: destroy_workqueue(priv->wq); -error_wq: - free_netdev(ndev); return ret; } @@ -4456,6 +4418,7 @@ int stmmac_dvr_remove(struct device *dev) stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); unregister_netdev(ndev); + phylink_destroy(priv->phylink); if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); clk_disable_unprepare(priv->plat->pclk); @@ -4466,7 +4429,6 @@ int stmmac_dvr_remove(struct device *dev) stmmac_mdio_unregister(ndev); destroy_workqueue(priv->wq); mutex_destroy(&priv->lock); - free_netdev(ndev); return 0; } @@ -4487,8 +4449,7 @@ int stmmac_suspend(struct device *dev) if (!ndev || !netif_running(ndev)) return 0; - if (ndev->phydev) - phy_stop(ndev->phydev); + phylink_stop(priv->phylink); mutex_lock(&priv->lock); @@ -4513,9 +4474,7 @@ int stmmac_suspend(struct device *dev) } mutex_unlock(&priv->lock); - priv->oldlink = false; priv->speed = SPEED_UNKNOWN; - priv->oldduplex = DUPLEX_UNKNOWN; return 0; } EXPORT_SYMBOL_GPL(stmmac_suspend); @@ -4590,7 +4549,7 @@ int stmmac_resume(struct device *dev) stmmac_clear_descriptors(priv); stmmac_hw_setup(ndev, false); - stmmac_init_tx_coalesce(priv); + stmmac_init_coalesce(priv); stmmac_set_rx_mode(ndev); stmmac_enable_all_queues(priv); @@ -4599,8 +4558,7 @@ int stmmac_resume(struct device *dev) mutex_unlock(&priv->lock); - if (ndev->phydev) - phy_start(ndev->phydev); + phylink_start(priv->phylink); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 1341bb5f693c..4304c1abc5d1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -10,13 +10,13 @@ Maintainer: Giuseppe Cavallaro <peppe.cavallaro@st.com> *******************************************************************************/ +#include <linux/gpio/consumer.h> #include <linux/io.h> #include <linux/iopoll.h> #include <linux/mii.h> -#include <linux/of.h> -#include <linux/of_gpio.h> #include <linux/of_mdio.h> #include <linux/phy.h> +#include <linux/property.h> #include <linux/slab.h> #include "dwxgmac2.h" @@ -24,11 +24,14 @@ #define MII_BUSY 0x00000001 #define MII_WRITE 0x00000002 +#define MII_DATA_MASK GENMASK(15, 0) /* GMAC4 defines */ #define MII_GMAC4_GOC_SHIFT 2 +#define MII_GMAC4_REG_ADDR_SHIFT 16 #define MII_GMAC4_WRITE (1 << MII_GMAC4_GOC_SHIFT) #define MII_GMAC4_READ (3 << MII_GMAC4_GOC_SHIFT) +#define MII_GMAC4_C45E BIT(1) /* XGMAC defines */ #define MII_XGMAC_SADDR BIT(18) @@ -155,22 +158,34 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; - u32 v; - int data; u32 value = MII_BUSY; + int data = 0; + u32 v; value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) & priv->hw->mii.clk_csr_mask; - if (priv->plat->has_gmac4) + if (priv->plat->has_gmac4) { value |= MII_GMAC4_READ; + if (phyreg & MII_ADDR_C45) { + value |= MII_GMAC4_C45E; + value &= ~priv->hw->mii.reg_mask; + value |= ((phyreg >> MII_DEVADDR_C45_SHIFT) << + priv->hw->mii.reg_shift) & + priv->hw->mii.reg_mask; + + data |= (phyreg & MII_REGADDR_C45_MASK) << + MII_GMAC4_REG_ADDR_SHIFT; + } + } if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), 100, 10000)) return -EBUSY; + writel(data, priv->ioaddr + mii_data); writel(value, priv->ioaddr + mii_address); if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), @@ -178,7 +193,7 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) return -EBUSY; /* Read the data from the MII data register */ - data = (int)readl(priv->ioaddr + mii_data); + data = (int)readl(priv->ioaddr + mii_data) & MII_DATA_MASK; return data; } @@ -198,8 +213,9 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; - u32 v; u32 value = MII_BUSY; + int data = phydata; + u32 v; value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; @@ -207,10 +223,21 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) & priv->hw->mii.clk_csr_mask; - if (priv->plat->has_gmac4) + if (priv->plat->has_gmac4) { value |= MII_GMAC4_WRITE; - else + if (phyreg & MII_ADDR_C45) { + value |= MII_GMAC4_C45E; + value &= ~priv->hw->mii.reg_mask; + value |= ((phyreg >> MII_DEVADDR_C45_SHIFT) << + priv->hw->mii.reg_shift) & + priv->hw->mii.reg_mask; + + data |= (phyreg & MII_REGADDR_C45_MASK) << + MII_GMAC4_REG_ADDR_SHIFT; + } + } else { value |= MII_WRITE; + } /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), @@ -218,7 +245,7 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, return -EBUSY; /* Set the MII address register to write */ - writel(phydata, priv->ioaddr + mii_data); + writel(data, priv->ioaddr + mii_data); writel(value, priv->ioaddr + mii_address); /* Wait until any existing MII operation is complete */ @@ -237,51 +264,35 @@ int stmmac_mdio_reset(struct mii_bus *bus) struct net_device *ndev = bus->priv; struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; - struct stmmac_mdio_bus_data *data = priv->plat->mdio_bus_data; #ifdef CONFIG_OF if (priv->device->of_node) { - if (data->reset_gpio < 0) { - struct device_node *np = priv->device->of_node; + struct gpio_desc *reset_gpio; + u32 delays[3] = { 0, 0, 0 }; - if (!np) - return 0; + reset_gpio = devm_gpiod_get_optional(priv->device, + "snps,reset", + GPIOD_OUT_LOW); + if (IS_ERR(reset_gpio)) + return PTR_ERR(reset_gpio); - data->reset_gpio = of_get_named_gpio(np, - "snps,reset-gpio", 0); - if (data->reset_gpio < 0) - return 0; + device_property_read_u32_array(priv->device, + "snps,reset-delays-us", + delays, ARRAY_SIZE(delays)); - data->active_low = of_property_read_bool(np, - "snps,reset-active-low"); - of_property_read_u32_array(np, - "snps,reset-delays-us", data->delays, 3); + if (delays[0]) + msleep(DIV_ROUND_UP(delays[0], 1000)); - if (devm_gpio_request(priv->device, data->reset_gpio, - "mdio-reset")) - return 0; - } - - gpio_direction_output(data->reset_gpio, - data->active_low ? 1 : 0); - if (data->delays[0]) - msleep(DIV_ROUND_UP(data->delays[0], 1000)); + gpiod_set_value_cansleep(reset_gpio, 1); + if (delays[1]) + msleep(DIV_ROUND_UP(delays[1], 1000)); - gpio_set_value(data->reset_gpio, data->active_low ? 0 : 1); - if (data->delays[1]) - msleep(DIV_ROUND_UP(data->delays[1], 1000)); - - gpio_set_value(data->reset_gpio, data->active_low ? 1 : 0); - if (data->delays[2]) - msleep(DIV_ROUND_UP(data->delays[2], 1000)); + gpiod_set_value_cansleep(reset_gpio, 0); + if (delays[2]) + msleep(DIV_ROUND_UP(delays[2], 1000)); } #endif - if (data->phy_reset) { - netdev_dbg(ndev, "stmmac_mdio_reset: calling phy_reset\n"); - data->phy_reset(priv->plat->bsp_priv); - } - /* This is a workaround for problems with the STE101P PHY. * It doesn't complete its reset until at least one clock cycle * on MDC, so perform a dummy mdio read. To be updated for GMAC4 @@ -318,11 +329,6 @@ int stmmac_mdio_register(struct net_device *ndev) if (mdio_bus_data->irqs) memcpy(new_bus->irq, mdio_bus_data->irqs, sizeof(new_bus->irq)); -#ifdef CONFIG_OF - if (priv->device->of_node) - mdio_bus_data->reset_gpio = -1; -#endif - new_bus->name = "stmmac"; if (priv->plat->has_xgmac) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 0bd72739a071..86f9c07a38cf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -63,7 +63,6 @@ static void common_default_data(struct plat_stmmacenet_data *plat) plat->has_gmac = 1; plat->force_sf_dma_mode = 1; - plat->mdio_bus_data->phy_reset = NULL; plat->mdio_bus_data->phy_mask = 0; /* Set default value for multicast hash bins */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 0f0f4b31eb7e..73fc2524372e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -323,21 +323,6 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat, {}, }; - /* If phy-handle property is passed from DT, use it as the PHY */ - plat->phy_node = of_parse_phandle(np, "phy-handle", 0); - if (plat->phy_node) - dev_dbg(dev, "Found phy-handle subnode\n"); - - /* If phy-handle is not specified, check if we have a fixed-phy */ - if (!plat->phy_node && of_phy_is_fixed_link(np)) { - if ((of_phy_register_fixed_link(np) < 0)) - return -ENODEV; - - dev_dbg(dev, "Found fixed-link subnode\n"); - plat->phy_node = of_node_get(np); - mdio = false; - } - if (of_match_node(need_mdio_ids, np)) { plat->mdio_node = of_get_child_by_name(np, "mdio"); } else { @@ -387,6 +372,13 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) *mac = of_get_mac_address(np); plat->interface = of_get_phy_mode(np); + /* Some wrapper drivers still rely on phy_node. Let's save it while + * they are not converted to phylink. */ + plat->phy_node = of_parse_phandle(np, "phy-handle", 0); + + /* PHYLINK automatically parses the phy-handle property */ + plat->phylink_node = np; + /* Get max speed of operation from device tree */ if (of_property_read_u32(np, "max-speed", &plat->max_speed)) plat->max_speed = -1; @@ -581,10 +573,6 @@ error_pclk_get: void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat) { - struct device_node *np = pdev->dev.of_node; - - if (of_phy_is_fixed_link(np)) - of_phy_deregister_fixed_link(np); of_node_put(plat->phy_node); of_node_put(plat->mdio_node); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c new file mode 100644 index 000000000000..a97b1ea76438 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c @@ -0,0 +1,850 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2019 Synopsys, Inc. and/or its affiliates. + * stmmac Selftests Support + * + * Author: Jose Abreu <joabreu@synopsys.com> + */ + +#include <linux/completion.h> +#include <linux/ethtool.h> +#include <linux/ip.h> +#include <linux/phy.h> +#include <linux/udp.h> +#include <net/tcp.h> +#include <net/udp.h> +#include "stmmac.h" + +struct stmmachdr { + __be32 version; + __be64 magic; + u8 id; +} __packed; + +#define STMMAC_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct stmmachdr)) +#define STMMAC_TEST_PKT_MAGIC 0xdeadcafecafedeadULL +#define STMMAC_LB_TIMEOUT msecs_to_jiffies(200) + +struct stmmac_packet_attrs { + int vlan; + int vlan_id_in; + int vlan_id_out; + unsigned char *src; + unsigned char *dst; + u32 ip_src; + u32 ip_dst; + int tcp; + int sport; + int dport; + u32 exp_hash; + int dont_wait; + int timeout; + int size; + int remove_sa; + u8 id; +}; + +static u8 stmmac_test_next_id; + +static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv, + struct stmmac_packet_attrs *attr) +{ + struct sk_buff *skb = NULL; + struct udphdr *uhdr = NULL; + struct tcphdr *thdr = NULL; + struct stmmachdr *shdr; + struct ethhdr *ehdr; + struct iphdr *ihdr; + int iplen, size; + + size = attr->size + STMMAC_TEST_PKT_SIZE; + if (attr->vlan) { + size += 4; + if (attr->vlan > 1) + size += 4; + } + + if (attr->tcp) + size += sizeof(struct tcphdr); + else + size += sizeof(struct udphdr); + + skb = netdev_alloc_skb(priv->dev, size); + if (!skb) + return NULL; + + prefetchw(skb->data); + skb_reserve(skb, NET_IP_ALIGN); + + if (attr->vlan > 1) + ehdr = skb_push(skb, ETH_HLEN + 8); + else if (attr->vlan) + ehdr = skb_push(skb, ETH_HLEN + 4); + else if (attr->remove_sa) + ehdr = skb_push(skb, ETH_HLEN - 6); + else + ehdr = skb_push(skb, ETH_HLEN); + skb_reset_mac_header(skb); + + skb_set_network_header(skb, skb->len); + ihdr = skb_put(skb, sizeof(*ihdr)); + + skb_set_transport_header(skb, skb->len); + if (attr->tcp) + thdr = skb_put(skb, sizeof(*thdr)); + else + uhdr = skb_put(skb, sizeof(*uhdr)); + + if (!attr->remove_sa) + eth_zero_addr(ehdr->h_source); + eth_zero_addr(ehdr->h_dest); + if (attr->src && !attr->remove_sa) + ether_addr_copy(ehdr->h_source, attr->src); + if (attr->dst) + ether_addr_copy(ehdr->h_dest, attr->dst); + + if (!attr->remove_sa) { + ehdr->h_proto = htons(ETH_P_IP); + } else { + __be16 *ptr = (__be16 *)ehdr; + + /* HACK */ + ptr[3] = htons(ETH_P_IP); + } + + if (attr->vlan) { + __be16 *tag, *proto; + + if (!attr->remove_sa) { + tag = (void *)ehdr + ETH_HLEN; + proto = (void *)ehdr + (2 * ETH_ALEN); + } else { + tag = (void *)ehdr + ETH_HLEN - 6; + proto = (void *)ehdr + ETH_ALEN; + } + + proto[0] = htons(ETH_P_8021Q); + tag[0] = htons(attr->vlan_id_out); + tag[1] = htons(ETH_P_IP); + if (attr->vlan > 1) { + proto[0] = htons(ETH_P_8021AD); + tag[1] = htons(ETH_P_8021Q); + tag[2] = htons(attr->vlan_id_in); + tag[3] = htons(ETH_P_IP); + } + } + + if (attr->tcp) { + thdr->source = htons(attr->sport); + thdr->dest = htons(attr->dport); + thdr->doff = sizeof(struct tcphdr) / 4; + thdr->check = 0; + } else { + uhdr->source = htons(attr->sport); + uhdr->dest = htons(attr->dport); + uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size); + uhdr->check = 0; + } + + ihdr->ihl = 5; + ihdr->ttl = 32; + ihdr->version = 4; + if (attr->tcp) + ihdr->protocol = IPPROTO_TCP; + else + ihdr->protocol = IPPROTO_UDP; + iplen = sizeof(*ihdr) + sizeof(*shdr) + attr->size; + if (attr->tcp) + iplen += sizeof(*thdr); + else + iplen += sizeof(*uhdr); + ihdr->tot_len = htons(iplen); + ihdr->frag_off = 0; + ihdr->saddr = 0; + ihdr->daddr = htonl(attr->ip_dst); + ihdr->tos = 0; + ihdr->id = 0; + ip_send_check(ihdr); + + shdr = skb_put(skb, sizeof(*shdr)); + shdr->version = 0; + shdr->magic = cpu_to_be64(STMMAC_TEST_PKT_MAGIC); + attr->id = stmmac_test_next_id; + shdr->id = stmmac_test_next_id++; + + if (attr->size) + skb_put(skb, attr->size); + + skb->csum = 0; + skb->ip_summed = CHECKSUM_PARTIAL; + if (attr->tcp) { + thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, ihdr->daddr, 0); + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct tcphdr, check); + } else { + udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr); + } + + skb->protocol = htons(ETH_P_IP); + skb->pkt_type = PACKET_HOST; + skb->dev = priv->dev; + + return skb; +} + +struct stmmac_test_priv { + struct stmmac_packet_attrs *packet; + struct packet_type pt; + struct completion comp; + int double_vlan; + int vlan_id; + int ok; +}; + +static int stmmac_test_loopback_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct stmmac_test_priv *tpriv = pt->af_packet_priv; + struct stmmachdr *shdr; + struct ethhdr *ehdr; + struct udphdr *uhdr; + struct tcphdr *thdr; + struct iphdr *ihdr; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (skb_linearize(skb)) + goto out; + if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN)) + goto out; + + ehdr = (struct ethhdr *)skb_mac_header(skb); + if (tpriv->packet->dst) { + if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst)) + goto out; + } + if (tpriv->packet->src) { + if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr)) + goto out; + } + + ihdr = ip_hdr(skb); + if (tpriv->double_vlan) + ihdr = (struct iphdr *)(skb_network_header(skb) + 4); + + if (tpriv->packet->tcp) { + if (ihdr->protocol != IPPROTO_TCP) + goto out; + + thdr = (struct tcphdr *)((u8 *)ihdr + 4 * ihdr->ihl); + if (thdr->dest != htons(tpriv->packet->dport)) + goto out; + + shdr = (struct stmmachdr *)((u8 *)thdr + sizeof(*thdr)); + } else { + if (ihdr->protocol != IPPROTO_UDP) + goto out; + + uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl); + if (uhdr->dest != htons(tpriv->packet->dport)) + goto out; + + shdr = (struct stmmachdr *)((u8 *)uhdr + sizeof(*uhdr)); + } + + if (shdr->magic != cpu_to_be64(STMMAC_TEST_PKT_MAGIC)) + goto out; + if (tpriv->packet->exp_hash && !skb->hash) + goto out; + if (tpriv->packet->id != shdr->id) + goto out; + + tpriv->ok = true; + complete(&tpriv->comp); +out: + kfree_skb(skb); + return 0; +} + +static int __stmmac_test_loopback(struct stmmac_priv *priv, + struct stmmac_packet_attrs *attr) +{ + struct stmmac_test_priv *tpriv; + struct sk_buff *skb = NULL; + int ret = 0; + + tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); + if (!tpriv) + return -ENOMEM; + + tpriv->ok = false; + init_completion(&tpriv->comp); + + tpriv->pt.type = htons(ETH_P_IP); + tpriv->pt.func = stmmac_test_loopback_validate; + tpriv->pt.dev = priv->dev; + tpriv->pt.af_packet_priv = tpriv; + tpriv->packet = attr; + dev_add_pack(&tpriv->pt); + + skb = stmmac_test_get_udp_skb(priv, attr); + if (!skb) { + ret = -ENOMEM; + goto cleanup; + } + + skb_set_queue_mapping(skb, 0); + ret = dev_queue_xmit(skb); + if (ret) + goto cleanup; + + if (attr->dont_wait) + goto cleanup; + + if (!attr->timeout) + attr->timeout = STMMAC_LB_TIMEOUT; + + wait_for_completion_timeout(&tpriv->comp, attr->timeout); + ret = !tpriv->ok; + +cleanup: + dev_remove_pack(&tpriv->pt); + kfree(tpriv); + return ret; +} + +static int stmmac_test_mac_loopback(struct stmmac_priv *priv) +{ + struct stmmac_packet_attrs attr = { }; + + attr.dst = priv->dev->dev_addr; + return __stmmac_test_loopback(priv, &attr); +} + +static int stmmac_test_phy_loopback(struct stmmac_priv *priv) +{ + struct stmmac_packet_attrs attr = { }; + int ret; + + if (!priv->dev->phydev) + return -EBUSY; + + ret = phy_loopback(priv->dev->phydev, true); + if (ret) + return ret; + + attr.dst = priv->dev->dev_addr; + ret = __stmmac_test_loopback(priv, &attr); + + phy_loopback(priv->dev->phydev, false); + return ret; +} + +static int stmmac_test_mmc(struct stmmac_priv *priv) +{ + struct stmmac_counters initial, final; + int ret; + + memset(&initial, 0, sizeof(initial)); + memset(&final, 0, sizeof(final)); + + if (!priv->dma_cap.rmon) + return -EOPNOTSUPP; + + /* Save previous results into internal struct */ + stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc); + + ret = stmmac_test_mac_loopback(priv); + if (ret) + return ret; + + /* These will be loopback results so no need to save them */ + stmmac_mmc_read(priv, priv->mmcaddr, &final); + + /* + * The number of MMC counters available depends on HW configuration + * so we just use this one to validate the feature. I hope there is + * not a version without this counter. + */ + if (final.mmc_tx_framecount_g <= initial.mmc_tx_framecount_g) + return -EINVAL; + + return 0; +} + +static int stmmac_test_eee(struct stmmac_priv *priv) +{ + struct stmmac_extra_stats *initial, *final; + int retries = 10; + int ret; + + if (!priv->dma_cap.eee || !priv->eee_active) + return -EOPNOTSUPP; + + initial = kzalloc(sizeof(*initial), GFP_KERNEL); + if (!initial) + return -ENOMEM; + + final = kzalloc(sizeof(*final), GFP_KERNEL); + if (!final) { + ret = -ENOMEM; + goto out_free_initial; + } + + memcpy(initial, &priv->xstats, sizeof(*initial)); + + ret = stmmac_test_mac_loopback(priv); + if (ret) + goto out_free_final; + + /* We have no traffic in the line so, sooner or later it will go LPI */ + while (--retries) { + memcpy(final, &priv->xstats, sizeof(*final)); + + if (final->irq_tx_path_in_lpi_mode_n > + initial->irq_tx_path_in_lpi_mode_n) + break; + msleep(100); + } + + if (!retries) { + ret = -ETIMEDOUT; + goto out_free_final; + } + + if (final->irq_tx_path_in_lpi_mode_n <= + initial->irq_tx_path_in_lpi_mode_n) { + ret = -EINVAL; + goto out_free_final; + } + + if (final->irq_tx_path_exit_lpi_mode_n <= + initial->irq_tx_path_exit_lpi_mode_n) { + ret = -EINVAL; + goto out_free_final; + } + +out_free_final: + kfree(final); +out_free_initial: + kfree(initial); + return ret; +} + +static int stmmac_filter_check(struct stmmac_priv *priv) +{ + if (!(priv->dev->flags & IFF_PROMISC)) + return 0; + + netdev_warn(priv->dev, "Test can't be run in promiscuous mode!\n"); + return -EOPNOTSUPP; +} + +static int stmmac_test_hfilt(struct stmmac_priv *priv) +{ + unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd}; + unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb}; + struct stmmac_packet_attrs attr = { }; + int ret; + + ret = stmmac_filter_check(priv); + if (ret) + return ret; + + ret = dev_mc_add(priv->dev, gd_addr); + if (ret) + return ret; + + attr.dst = gd_addr; + + /* Shall receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto cleanup; + + attr.dst = bd_addr; + + /* Shall NOT receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + ret = !ret; + +cleanup: + dev_mc_del(priv->dev, gd_addr); + return ret; +} + +static int stmmac_test_pfilt(struct stmmac_priv *priv) +{ + unsigned char gd_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char bd_addr[ETH_ALEN] = {0x08, 0x00, 0x22, 0x33, 0x44, 0x55}; + struct stmmac_packet_attrs attr = { }; + int ret; + + if (stmmac_filter_check(priv)) + return -EOPNOTSUPP; + + ret = dev_uc_add(priv->dev, gd_addr); + if (ret) + return ret; + + attr.dst = gd_addr; + + /* Shall receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto cleanup; + + attr.dst = bd_addr; + + /* Shall NOT receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + ret = !ret; + +cleanup: + dev_uc_del(priv->dev, gd_addr); + return ret; +} + +static int stmmac_dummy_sync(struct net_device *netdev, const u8 *addr) +{ + return 0; +} + +static void stmmac_test_set_rx_mode(struct net_device *netdev) +{ + /* As we are in test mode of ethtool we already own the rtnl lock + * so no address will change from user. We can just call the + * ndo_set_rx_mode() callback directly */ + if (netdev->netdev_ops->ndo_set_rx_mode) + netdev->netdev_ops->ndo_set_rx_mode(netdev); +} + +static int stmmac_test_mcfilt(struct stmmac_priv *priv) +{ + unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77}; + struct stmmac_packet_attrs attr = { }; + int ret; + + if (stmmac_filter_check(priv)) + return -EOPNOTSUPP; + + /* Remove all MC addresses */ + __dev_mc_unsync(priv->dev, NULL); + stmmac_test_set_rx_mode(priv->dev); + + ret = dev_uc_add(priv->dev, uc_addr); + if (ret) + goto cleanup; + + attr.dst = uc_addr; + + /* Shall receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto cleanup; + + attr.dst = mc_addr; + + /* Shall NOT receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + ret = !ret; + +cleanup: + dev_uc_del(priv->dev, uc_addr); + __dev_mc_sync(priv->dev, stmmac_dummy_sync, NULL); + stmmac_test_set_rx_mode(priv->dev); + return ret; +} + +static int stmmac_test_ucfilt(struct stmmac_priv *priv) +{ + unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77}; + unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77}; + struct stmmac_packet_attrs attr = { }; + int ret; + + if (stmmac_filter_check(priv)) + return -EOPNOTSUPP; + + /* Remove all UC addresses */ + __dev_uc_unsync(priv->dev, NULL); + stmmac_test_set_rx_mode(priv->dev); + + ret = dev_mc_add(priv->dev, mc_addr); + if (ret) + goto cleanup; + + attr.dst = mc_addr; + + /* Shall receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto cleanup; + + attr.dst = uc_addr; + + /* Shall NOT receive packet */ + ret = __stmmac_test_loopback(priv, &attr); + ret = !ret; + +cleanup: + dev_mc_del(priv->dev, mc_addr); + __dev_uc_sync(priv->dev, stmmac_dummy_sync, NULL); + stmmac_test_set_rx_mode(priv->dev); + return ret; +} + +static int stmmac_test_flowctrl_validate(struct sk_buff *skb, + struct net_device *ndev, + struct packet_type *pt, + struct net_device *orig_ndev) +{ + struct stmmac_test_priv *tpriv = pt->af_packet_priv; + struct ethhdr *ehdr; + + ehdr = (struct ethhdr *)skb_mac_header(skb); + if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr)) + goto out; + if (ehdr->h_proto != htons(ETH_P_PAUSE)) + goto out; + + tpriv->ok = true; + complete(&tpriv->comp); +out: + kfree_skb(skb); + return 0; +} + +static int stmmac_test_flowctrl(struct stmmac_priv *priv) +{ + unsigned char paddr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x01}; + struct phy_device *phydev = priv->dev->phydev; + u32 rx_cnt = priv->plat->rx_queues_to_use; + struct stmmac_test_priv *tpriv; + unsigned int pkt_count; + int i, ret = 0; + + if (!phydev || !phydev->pause) + return -EOPNOTSUPP; + + tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL); + if (!tpriv) + return -ENOMEM; + + tpriv->ok = false; + init_completion(&tpriv->comp); + tpriv->pt.type = htons(ETH_P_PAUSE); + tpriv->pt.func = stmmac_test_flowctrl_validate; + tpriv->pt.dev = priv->dev; + tpriv->pt.af_packet_priv = tpriv; + dev_add_pack(&tpriv->pt); + + /* Compute minimum number of packets to make FIFO full */ + pkt_count = priv->plat->rx_fifo_size; + if (!pkt_count) + pkt_count = priv->dma_cap.rx_fifo_size; + pkt_count /= 1400; + pkt_count *= 2; + + for (i = 0; i < rx_cnt; i++) + stmmac_stop_rx(priv, priv->ioaddr, i); + + ret = dev_set_promiscuity(priv->dev, 1); + if (ret) + goto cleanup; + + ret = dev_mc_add(priv->dev, paddr); + if (ret) + goto cleanup; + + for (i = 0; i < pkt_count; i++) { + struct stmmac_packet_attrs attr = { }; + + attr.dst = priv->dev->dev_addr; + attr.dont_wait = true; + attr.size = 1400; + + ret = __stmmac_test_loopback(priv, &attr); + if (ret) + goto cleanup; + if (tpriv->ok) + break; + } + + /* Wait for some time in case RX Watchdog is enabled */ + msleep(200); + + for (i = 0; i < rx_cnt; i++) { + struct stmmac_channel *ch = &priv->channel[i]; + + stmmac_start_rx(priv, priv->ioaddr, i); + local_bh_disable(); + napi_reschedule(&ch->rx_napi); + local_bh_enable(); + } + + wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT); + ret = !tpriv->ok; + +cleanup: + dev_mc_del(priv->dev, paddr); + dev_set_promiscuity(priv->dev, -1); + dev_remove_pack(&tpriv->pt); + kfree(tpriv); + return ret; +} + +#define STMMAC_LOOPBACK_NONE 0 +#define STMMAC_LOOPBACK_MAC 1 +#define STMMAC_LOOPBACK_PHY 2 + +static const struct stmmac_test { + char name[ETH_GSTRING_LEN]; + int lb; + int (*fn)(struct stmmac_priv *priv); +} stmmac_selftests[] = { + { + .name = "MAC Loopback ", + .lb = STMMAC_LOOPBACK_MAC, + .fn = stmmac_test_mac_loopback, + }, { + .name = "PHY Loopback ", + .lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */ + .fn = stmmac_test_phy_loopback, + }, { + .name = "MMC Counters ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_mmc, + }, { + .name = "EEE ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_eee, + }, { + .name = "Hash Filter MC ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_hfilt, + }, { + .name = "Perfect Filter UC ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_pfilt, + }, { + .name = "MC Filter ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_mcfilt, + }, { + .name = "UC Filter ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_ucfilt, + }, { + .name = "Flow Control ", + .lb = STMMAC_LOOPBACK_PHY, + .fn = stmmac_test_flowctrl, + }, +}; + +void stmmac_selftest_run(struct net_device *dev, + struct ethtool_test *etest, u64 *buf) +{ + struct stmmac_priv *priv = netdev_priv(dev); + int count = stmmac_selftest_get_count(priv); + int carrier = netif_carrier_ok(dev); + int i, ret; + + memset(buf, 0, sizeof(*buf) * count); + stmmac_test_next_id = 0; + + if (etest->flags != ETH_TEST_FL_OFFLINE) { + netdev_err(priv->dev, "Only offline tests are supported\n"); + etest->flags |= ETH_TEST_FL_FAILED; + return; + } else if (!carrier) { + netdev_err(priv->dev, "You need valid Link to execute tests\n"); + etest->flags |= ETH_TEST_FL_FAILED; + return; + } + + /* We don't want extra traffic */ + netif_carrier_off(dev); + + /* Wait for queues drain */ + msleep(200); + + for (i = 0; i < count; i++) { + ret = 0; + + switch (stmmac_selftests[i].lb) { + case STMMAC_LOOPBACK_PHY: + ret = -EOPNOTSUPP; + if (dev->phydev) + ret = phy_loopback(dev->phydev, true); + if (!ret) + break; + /* Fallthrough */ + case STMMAC_LOOPBACK_MAC: + ret = stmmac_set_mac_loopback(priv, priv->ioaddr, true); + break; + case STMMAC_LOOPBACK_NONE: + break; + default: + ret = -EOPNOTSUPP; + break; + } + + /* + * First tests will always be MAC / PHY loobpack. If any of + * them is not supported we abort earlier. + */ + if (ret) { + netdev_err(priv->dev, "Loopback is not supported\n"); + etest->flags |= ETH_TEST_FL_FAILED; + break; + } + + ret = stmmac_selftests[i].fn(priv); + if (ret && (ret != -EOPNOTSUPP)) + etest->flags |= ETH_TEST_FL_FAILED; + buf[i] = ret; + + switch (stmmac_selftests[i].lb) { + case STMMAC_LOOPBACK_PHY: + ret = -EOPNOTSUPP; + if (dev->phydev) + ret = phy_loopback(dev->phydev, false); + if (!ret) + break; + /* Fallthrough */ + case STMMAC_LOOPBACK_MAC: + stmmac_set_mac_loopback(priv, priv->ioaddr, false); + break; + default: + break; + } + } + + /* Restart everything */ + if (carrier) + netif_carrier_on(dev); +} + +void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data) +{ + u8 *p = data; + int i; + + for (i = 0; i < stmmac_selftest_get_count(priv); i++) { + snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1, + stmmac_selftests[i].name); + p += ETH_GSTRING_LEN; + } +} + +int stmmac_selftest_get_count(struct stmmac_priv *priv) +{ + return ARRAY_SIZE(stmmac_selftests); +} diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 6f99437a6962..0bc5863bffeb 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -1217,8 +1217,6 @@ static int link_status_1g_rgmii(struct niu *np, int *link_up_p) spin_lock_irqsave(&np->lock, flags); - err = -EINVAL; - err = mii_read(np, np->phy_addr, MII_BMSR); if (err < 0) goto out; diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index bd05a977ee7e..834afca3a019 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -50,6 +50,7 @@ config TI_CPSW depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST select TI_DAVINCI_MDIO select MFD_SYSCON + select PAGE_POOL select REGMAP ---help--- This driver supports TI's CPSW Ethernet Switch. @@ -60,6 +61,7 @@ config TI_CPSW config TI_CPTS bool "TI Common Platform Time Sync (CPTS) Support" depends on TI_CPSW || TI_KEYSTONE_NETCP || COMPILE_TEST + depends on COMMON_CLK depends on POSIX_TIMERS ---help--- This driver supports the Common Platform Time Sync unit of diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 634fc484a0b3..f320f9a0de8b 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -31,6 +31,10 @@ #include <linux/if_vlan.h> #include <linux/kmemleak.h> #include <linux/sys_soc.h> +#include <net/page_pool.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> +#include <linux/filter.h> #include <linux/pinctrl/consumer.h> #include <net/pkt_cls.h> @@ -60,6 +64,10 @@ static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT; module_param(descs_pool_size, int, 0444); MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool"); +/* The buf includes headroom compatible with both skb and xdpf */ +#define CPSW_HEADROOM_NA (max(XDP_PACKET_HEADROOM, NET_SKB_PAD) + NET_IP_ALIGN) +#define CPSW_HEADROOM ALIGN(CPSW_HEADROOM_NA, sizeof(long)) + #define for_each_slave(priv, func, arg...) \ do { \ struct cpsw_slave *slave; \ @@ -74,6 +82,11 @@ MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool"); (func)(slave++, ##arg); \ } while (0) +#define CPSW_XMETA_OFFSET ALIGN(sizeof(struct xdp_frame), sizeof(long)) + +#define CPSW_XDP_CONSUMED 1 +#define CPSW_XDP_PASS 0 + static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid); @@ -337,24 +350,58 @@ void cpsw_intr_disable(struct cpsw_common *cpsw) return; } +static int cpsw_is_xdpf_handle(void *handle) +{ + return (unsigned long)handle & BIT(0); +} + +static void *cpsw_xdpf_to_handle(struct xdp_frame *xdpf) +{ + return (void *)((unsigned long)xdpf | BIT(0)); +} + +static struct xdp_frame *cpsw_handle_to_xdpf(void *handle) +{ + return (struct xdp_frame *)((unsigned long)handle & ~BIT(0)); +} + +struct __aligned(sizeof(long)) cpsw_meta_xdp { + struct net_device *ndev; + int ch; +}; + void cpsw_tx_handler(void *token, int len, int status) { + struct cpsw_meta_xdp *xmeta; + struct xdp_frame *xdpf; + struct net_device *ndev; struct netdev_queue *txq; - struct sk_buff *skb = token; - struct net_device *ndev = skb->dev; - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct sk_buff *skb; + int ch; + + if (cpsw_is_xdpf_handle(token)) { + xdpf = cpsw_handle_to_xdpf(token); + xmeta = (void *)xdpf + CPSW_XMETA_OFFSET; + ndev = xmeta->ndev; + ch = xmeta->ch; + xdp_return_frame(xdpf); + } else { + skb = token; + ndev = skb->dev; + ch = skb_get_queue_mapping(skb); + cpts_tx_timestamp(ndev_to_cpsw(ndev)->cpts, skb); + dev_kfree_skb_any(skb); + } /* Check whether the queue is stopped due to stalled tx dma, if the * queue is stopped then start the queue as we have free desc for tx */ - txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb)); + txq = netdev_get_tx_queue(ndev, ch); if (unlikely(netif_tx_queue_stopped(txq))) netif_tx_wake_queue(txq); - cpts_tx_timestamp(cpsw->cpts, skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += len; - dev_kfree_skb_any(skb); } static void cpsw_rx_vlan_encap(struct sk_buff *skb) @@ -400,24 +447,252 @@ static void cpsw_rx_vlan_encap(struct sk_buff *skb) } } +static int cpsw_xdp_tx_frame(struct cpsw_priv *priv, struct xdp_frame *xdpf, + struct page *page) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_meta_xdp *xmeta; + struct cpdma_chan *txch; + dma_addr_t dma; + int ret, port; + + xmeta = (void *)xdpf + CPSW_XMETA_OFFSET; + xmeta->ndev = priv->ndev; + xmeta->ch = 0; + txch = cpsw->txv[0].ch; + + port = priv->emac_port + cpsw->data.dual_emac; + if (page) { + dma = page_pool_get_dma_addr(page); + dma += xdpf->headroom + sizeof(struct xdp_frame); + ret = cpdma_chan_submit_mapped(txch, cpsw_xdpf_to_handle(xdpf), + dma, xdpf->len, port); + } else { + if (sizeof(*xmeta) > xdpf->headroom) { + xdp_return_frame_rx_napi(xdpf); + return -EINVAL; + } + + ret = cpdma_chan_submit(txch, cpsw_xdpf_to_handle(xdpf), + xdpf->data, xdpf->len, port); + } + + if (ret) { + priv->ndev->stats.tx_dropped++; + xdp_return_frame_rx_napi(xdpf); + } + + return ret; +} + +static int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp, + struct page *page) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct net_device *ndev = priv->ndev; + int ret = CPSW_XDP_CONSUMED; + struct xdp_frame *xdpf; + struct bpf_prog *prog; + u32 act; + + rcu_read_lock(); + + prog = READ_ONCE(priv->xdp_prog); + if (!prog) { + ret = CPSW_XDP_PASS; + goto out; + } + + act = bpf_prog_run_xdp(prog, xdp); + switch (act) { + case XDP_PASS: + ret = CPSW_XDP_PASS; + break; + case XDP_TX: + xdpf = convert_to_xdp_frame(xdp); + if (unlikely(!xdpf)) + goto drop; + + cpsw_xdp_tx_frame(priv, xdpf, page); + break; + case XDP_REDIRECT: + if (xdp_do_redirect(ndev, xdp, prog)) + goto drop; + + /* Have to flush here, per packet, instead of doing it in bulk + * at the end of the napi handler. The RX devices on this + * particular hardware is sharing a common queue, so the + * incoming device might change per packet. + */ + xdp_do_flush_map(); + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(ndev, prog, act); + /* fall through -- handle aborts by dropping packet */ + case XDP_DROP: + goto drop; + } +out: + rcu_read_unlock(); + return ret; +drop: + rcu_read_unlock(); + page_pool_recycle_direct(cpsw->page_pool[ch], page); + return ret; +} + +static unsigned int cpsw_rxbuf_total_len(unsigned int len) +{ + len += CPSW_HEADROOM; + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + return SKB_DATA_ALIGN(len); +} + +static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw, + int size) +{ + struct page_pool_params pp_params; + struct page_pool *pool; + + pp_params.order = 0; + pp_params.flags = PP_FLAG_DMA_MAP; + pp_params.pool_size = size; + pp_params.nid = NUMA_NO_NODE; + pp_params.dma_dir = DMA_BIDIRECTIONAL; + pp_params.dev = cpsw->dev; + + pool = page_pool_create(&pp_params); + if (IS_ERR(pool)) + dev_err(cpsw->dev, "cannot create rx page pool\n"); + + return pool; +} + +static int cpsw_ndev_create_xdp_rxq(struct cpsw_priv *priv, int ch) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct xdp_rxq_info *rxq; + struct page_pool *pool; + int ret; + + pool = cpsw->page_pool[ch]; + rxq = &priv->xdp_rxq[ch]; + + ret = xdp_rxq_info_reg(rxq, priv->ndev, ch); + if (ret) + return ret; + + ret = xdp_rxq_info_reg_mem_model(rxq, MEM_TYPE_PAGE_POOL, pool); + if (ret) + xdp_rxq_info_unreg(rxq); + + return ret; +} + +static void cpsw_ndev_destroy_xdp_rxq(struct cpsw_priv *priv, int ch) +{ + struct xdp_rxq_info *rxq = &priv->xdp_rxq[ch]; + + if (!xdp_rxq_info_is_reg(rxq)) + return; + + xdp_rxq_info_unreg(rxq); +} + +static int cpsw_create_rx_pool(struct cpsw_common *cpsw, int ch) +{ + struct page_pool *pool; + int ret = 0, pool_size; + + pool_size = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch); + pool = cpsw_create_page_pool(cpsw, pool_size); + if (IS_ERR(pool)) + ret = PTR_ERR(pool); + else + cpsw->page_pool[ch] = pool; + + return ret; +} + +void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw) +{ + struct net_device *ndev; + int i, ch; + + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (!ndev) + continue; + + cpsw_ndev_destroy_xdp_rxq(netdev_priv(ndev), ch); + } + + page_pool_destroy(cpsw->page_pool[ch]); + cpsw->page_pool[ch] = NULL; + } +} + +int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw) +{ + struct net_device *ndev; + int i, ch, ret; + + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + ret = cpsw_create_rx_pool(cpsw, ch); + if (ret) + goto err_cleanup; + + /* using same page pool is allowed as no running rx handlers + * simultaneously for both ndevs + */ + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (!ndev) + continue; + + ret = cpsw_ndev_create_xdp_rxq(netdev_priv(ndev), ch); + if (ret) + goto err_cleanup; + } + } + + return 0; + +err_cleanup: + cpsw_destroy_xdp_rxqs(cpsw); + + return ret; +} + static void cpsw_rx_handler(void *token, int len, int status) { - struct cpdma_chan *ch; - struct sk_buff *skb = token; - struct sk_buff *new_skb; - struct net_device *ndev = skb->dev; - int ret = 0, port; - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct page *new_page, *page = token; + void *pa = page_address(page); + struct cpsw_meta_xdp *xmeta = pa + CPSW_XMETA_OFFSET; + struct cpsw_common *cpsw = ndev_to_cpsw(xmeta->ndev); + int pkt_size = cpsw->rx_packet_max; + int ret = 0, port, ch = xmeta->ch; + int headroom = CPSW_HEADROOM; + struct net_device *ndev = xmeta->ndev; struct cpsw_priv *priv; + struct page_pool *pool; + struct sk_buff *skb; + struct xdp_buff xdp; + dma_addr_t dma; - if (cpsw->data.dual_emac) { + if (cpsw->data.dual_emac && status >= 0) { port = CPDMA_RX_SOURCE_PORT(status); - if (port) { + if (port) ndev = cpsw->slaves[--port].ndev; - skb->dev = ndev; - } } + priv = netdev_priv(ndev); + pool = cpsw->page_pool[ch]; if (unlikely(status < 0) || unlikely(!netif_running(ndev))) { /* In dual emac mode check for all interfaces */ if (cpsw->data.dual_emac && cpsw->usage_count && @@ -426,47 +701,88 @@ static void cpsw_rx_handler(void *token, int len, int status) * is already down and the other interface is up * and running, instead of freeing which results * in reducing of the number of rx descriptor in - * DMA engine, requeue skb back to cpdma. + * DMA engine, requeue page back to cpdma. */ - new_skb = skb; + new_page = page; goto requeue; } - /* the interface is going down, skbs are purged */ - dev_kfree_skb_any(skb); + /* the interface is going down, pages are purged */ + page_pool_recycle_direct(pool, page); return; } - new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max); - if (new_skb) { - skb_copy_queue_mapping(new_skb, skb); - skb_put(skb, len); - if (status & CPDMA_RX_VLAN_ENCAP) - cpsw_rx_vlan_encap(skb); - priv = netdev_priv(ndev); - if (priv->rx_ts_enabled) - cpts_rx_timestamp(cpsw->cpts, skb); - skb->protocol = eth_type_trans(skb, ndev); - netif_receive_skb(skb); - ndev->stats.rx_bytes += len; - ndev->stats.rx_packets++; - kmemleak_not_leak(new_skb); - } else { + new_page = page_pool_dev_alloc_pages(pool); + if (unlikely(!new_page)) { + new_page = page; ndev->stats.rx_dropped++; - new_skb = skb; + goto requeue; } -requeue: - if (netif_dormant(ndev)) { - dev_kfree_skb_any(new_skb); - return; + if (priv->xdp_prog) { + if (status & CPDMA_RX_VLAN_ENCAP) { + xdp.data = pa + CPSW_HEADROOM + + CPSW_RX_VLAN_ENCAP_HDR_SIZE; + xdp.data_end = xdp.data + len - + CPSW_RX_VLAN_ENCAP_HDR_SIZE; + } else { + xdp.data = pa + CPSW_HEADROOM; + xdp.data_end = xdp.data + len; + } + + xdp_set_data_meta_invalid(&xdp); + + xdp.data_hard_start = pa; + xdp.rxq = &priv->xdp_rxq[ch]; + + ret = cpsw_run_xdp(priv, ch, &xdp, page); + if (ret != CPSW_XDP_PASS) + goto requeue; + + /* XDP prog might have changed packet data and boundaries */ + len = xdp.data_end - xdp.data; + headroom = xdp.data - xdp.data_hard_start; + + /* XDP prog can modify vlan tag, so can't use encap header */ + status &= ~CPDMA_RX_VLAN_ENCAP; } - ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch; - ret = cpdma_chan_submit(ch, new_skb, new_skb->data, - skb_tailroom(new_skb), 0); - if (WARN_ON(ret < 0)) - dev_kfree_skb_any(new_skb); + /* pass skb to netstack if no XDP prog or returned XDP_PASS */ + skb = build_skb(pa, cpsw_rxbuf_total_len(pkt_size)); + if (!skb) { + ndev->stats.rx_dropped++; + page_pool_recycle_direct(pool, page); + goto requeue; + } + + skb_reserve(skb, headroom); + skb_put(skb, len); + skb->dev = ndev; + if (status & CPDMA_RX_VLAN_ENCAP) + cpsw_rx_vlan_encap(skb); + if (priv->rx_ts_enabled) + cpts_rx_timestamp(cpsw->cpts, skb); + skb->protocol = eth_type_trans(skb, ndev); + + /* unmap page as no netstack skb page recycling */ + page_pool_release_page(pool, page); + netif_receive_skb(skb); + + ndev->stats.rx_bytes += len; + ndev->stats.rx_packets++; + +requeue: + xmeta = page_address(new_page) + CPSW_XMETA_OFFSET; + xmeta->ndev = ndev; + xmeta->ch = ch; + + dma = page_pool_get_dma_addr(new_page) + CPSW_HEADROOM; + ret = cpdma_chan_submit_mapped(cpsw->rxv[ch].ch, new_page, dma, + pkt_size, 0); + if (ret < 0) { + WARN_ON(ret == -ENOMEM); + page_pool_recycle_direct(pool, new_page); + } } void cpsw_split_res(struct cpsw_common *cpsw) @@ -1035,33 +1351,39 @@ static void cpsw_init_host_port(struct cpsw_priv *priv) int cpsw_fill_rx_channels(struct cpsw_priv *priv) { struct cpsw_common *cpsw = priv->cpsw; - struct sk_buff *skb; + struct cpsw_meta_xdp *xmeta; + struct page_pool *pool; + struct page *page; int ch_buf_num; int ch, i, ret; + dma_addr_t dma; for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + pool = cpsw->page_pool[ch]; ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch); for (i = 0; i < ch_buf_num; i++) { - skb = __netdev_alloc_skb_ip_align(priv->ndev, - cpsw->rx_packet_max, - GFP_KERNEL); - if (!skb) { - cpsw_err(priv, ifup, "cannot allocate skb\n"); + page = page_pool_dev_alloc_pages(pool); + if (!page) { + cpsw_err(priv, ifup, "allocate rx page err\n"); return -ENOMEM; } - skb_set_queue_mapping(skb, ch); - ret = cpdma_chan_submit(cpsw->rxv[ch].ch, skb, - skb->data, skb_tailroom(skb), - 0); + xmeta = page_address(page) + CPSW_XMETA_OFFSET; + xmeta->ndev = priv->ndev; + xmeta->ch = ch; + + dma = page_pool_get_dma_addr(page) + CPSW_HEADROOM; + ret = cpdma_chan_idle_submit_mapped(cpsw->rxv[ch].ch, + page, dma, + cpsw->rx_packet_max, + 0); if (ret < 0) { cpsw_err(priv, ifup, - "cannot submit skb to channel %d rx, error %d\n", + "cannot submit page to channel %d rx, error %d\n", ch, ret); - kfree_skb(skb); + page_pool_recycle_direct(pool, page); return ret; } - kmemleak_not_leak(skb); } cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n", @@ -1397,6 +1719,13 @@ static int cpsw_ndo_open(struct net_device *ndev) enable_irq(cpsw->irqs_table[0]); } + /* create rxqs for both infs in dual mac as they use same pool + * and must be destroyed together when no users. + */ + ret = cpsw_create_xdp_rxqs(cpsw); + if (ret < 0) + goto err_cleanup; + ret = cpsw_fill_rx_channels(priv); if (ret < 0) goto err_cleanup; @@ -1423,7 +1752,11 @@ static int cpsw_ndo_open(struct net_device *ndev) return 0; err_cleanup: - cpdma_ctlr_stop(cpsw->dma); + if (!cpsw->usage_count) { + cpdma_ctlr_stop(cpsw->dma); + cpsw_destroy_xdp_rxqs(cpsw); + } + for_each_slave(priv, cpsw_slave_stop, cpsw); pm_runtime_put_sync(cpsw->dev); netif_carrier_off(priv->ndev); @@ -1447,6 +1780,7 @@ static int cpsw_ndo_stop(struct net_device *ndev) cpsw_intr_disable(cpsw); cpdma_ctlr_stop(cpsw->dma); cpsw_ale_stop(cpsw->ale); + cpsw_destroy_xdp_rxqs(cpsw); } for_each_slave(priv, cpsw_slave_stop, cpsw); @@ -2004,6 +2338,64 @@ static int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, } } +static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->prog; + + if (!priv->xdpi.prog && !prog) + return 0; + + if (!xdp_attachment_flags_ok(&priv->xdpi, bpf)) + return -EBUSY; + + WRITE_ONCE(priv->xdp_prog, prog); + + xdp_attachment_setup(&priv->xdpi, bpf); + + return 0; +} + +static int cpsw_ndo_bpf(struct net_device *ndev, struct netdev_bpf *bpf) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + + switch (bpf->command) { + case XDP_SETUP_PROG: + return cpsw_xdp_prog_setup(priv, bpf); + + case XDP_QUERY_PROG: + return xdp_attachment_query(&priv->xdpi, bpf); + + default: + return -EINVAL; + } +} + +static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + struct xdp_frame *xdpf; + int i, drops = 0; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + for (i = 0; i < n; i++) { + xdpf = frames[i]; + if (xdpf->len < CPSW_MIN_PACKET_SIZE) { + xdp_return_frame_rx_napi(xdpf); + drops++; + continue; + } + + if (cpsw_xdp_tx_frame(priv, xdpf, NULL)) + drops++; + } + + return n - drops; +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void cpsw_ndo_poll_controller(struct net_device *ndev) { @@ -2032,6 +2424,8 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_vlan_rx_add_vid = cpsw_ndo_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = cpsw_ndo_vlan_rx_kill_vid, .ndo_setup_tc = cpsw_ndo_setup_tc, + .ndo_bpf = cpsw_ndo_bpf, + .ndo_xdp_xmit = cpsw_ndo_xdp_xmit, }; static void cpsw_get_drvinfo(struct net_device *ndev, @@ -2179,6 +2573,7 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, return ret; } + slave_data->slave_node = slave_node; slave_data->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", &lenp); @@ -2262,8 +2657,7 @@ no_phy_slave: static void cpsw_remove_dt(struct platform_device *pdev) { - struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpsw_common *cpsw = platform_get_drvdata(pdev); struct cpsw_platform_data *data = &cpsw->data; struct device_node *node = pdev->dev.of_node; struct device_node *slave_node; @@ -2330,6 +2724,7 @@ static int cpsw_probe_dual_emac(struct cpsw_priv *priv) /* register the network device */ SET_NETDEV_DEV(ndev, cpsw->dev); + ndev->dev.of_node = cpsw->slaves[1].data->slave_node; ret = register_netdev(ndev); if (ret) dev_err(cpsw->dev, "cpsw: error registering net device\n"); @@ -2474,7 +2869,7 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_cpts; } - platform_set_drvdata(pdev, ndev); + platform_set_drvdata(pdev, cpsw); priv = netdev_priv(ndev); priv->cpsw = cpsw; priv->ndev = ndev; @@ -2507,6 +2902,7 @@ static int cpsw_probe(struct platform_device *pdev) /* register the network device */ SET_NETDEV_DEV(ndev, dev); + ndev->dev.of_node = cpsw->slaves[0].data->slave_node; ret = register_netdev(ndev); if (ret) { dev_err(dev, "error registering net device\n"); @@ -2567,9 +2963,8 @@ clean_runtime_disable_ret: static int cpsw_remove(struct platform_device *pdev) { - struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - int ret; + struct cpsw_common *cpsw = platform_get_drvdata(pdev); + int i, ret; ret = pm_runtime_get_sync(&pdev->dev); if (ret < 0) { @@ -2577,9 +2972,9 @@ static int cpsw_remove(struct platform_device *pdev) return ret; } - if (cpsw->data.dual_emac) - unregister_netdev(cpsw->slaves[1].ndev); - unregister_netdev(ndev); + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].ndev) + unregister_netdev(cpsw->slaves[i].ndev); cpts_release(cpsw->cpts); cpdma_ctlr_destroy(cpsw->dma); @@ -2592,20 +2987,13 @@ static int cpsw_remove(struct platform_device *pdev) #ifdef CONFIG_PM_SLEEP static int cpsw_suspend(struct device *dev) { - struct net_device *ndev = dev_get_drvdata(dev); - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - - if (cpsw->data.dual_emac) { - int i; + struct cpsw_common *cpsw = dev_get_drvdata(dev); + int i; - for (i = 0; i < cpsw->data.slaves; i++) { + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].ndev) if (netif_running(cpsw->slaves[i].ndev)) cpsw_ndo_stop(cpsw->slaves[i].ndev); - } - } else { - if (netif_running(ndev)) - cpsw_ndo_stop(ndev); - } /* Select sleep pin state */ pinctrl_pm_select_sleep_state(dev); @@ -2615,25 +3003,20 @@ static int cpsw_suspend(struct device *dev) static int cpsw_resume(struct device *dev) { - struct net_device *ndev = dev_get_drvdata(dev); - struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpsw_common *cpsw = dev_get_drvdata(dev); + int i; /* Select default pin state */ pinctrl_pm_select_default_state(dev); /* shut up ASSERT_RTNL() warning in netif_set_real_num_tx/rx_queues */ rtnl_lock(); - if (cpsw->data.dual_emac) { - int i; - for (i = 0; i < cpsw->data.slaves; i++) { + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].ndev) if (netif_running(cpsw->slaves[i].ndev)) cpsw_ndo_open(cpsw->slaves[i].ndev); - } - } else { - if (netif_running(ndev)) - cpsw_ndo_open(ndev); - } + rtnl_unlock(); return 0; diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index 6d1c9ebae7cc..31248a6cc642 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -458,21 +458,22 @@ int cpsw_nway_reset(struct net_device *ndev) static void cpsw_suspend_data_pass(struct net_device *ndev) { struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - struct cpsw_slave *slave; int i; /* Disable NAPI scheduling */ cpsw_intr_disable(cpsw); /* Stop all transmit queues for every network device. - * Disable re-using rx descriptors with dormant_on. */ - for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { - if (!(slave->ndev && netif_running(slave->ndev))) + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (!(ndev && netif_running(ndev))) continue; - netif_tx_stop_all_queues(slave->ndev); - netif_dormant_on(slave->ndev); + netif_tx_stop_all_queues(ndev); + + /* Barrier, so that stop_queue visible to other cpus */ + smp_mb__after_atomic(); } /* Handle rest of tx packets and stop cpdma channels */ @@ -483,14 +484,8 @@ static int cpsw_resume_data_pass(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; - struct cpsw_slave *slave; int i, ret; - /* Allow rx packets handling */ - for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) - if (slave->ndev && netif_running(slave->ndev)) - netif_dormant_off(slave->ndev); - /* After this receive is started */ if (cpsw->usage_count) { ret = cpsw_fill_rx_channels(priv); @@ -502,9 +497,11 @@ static int cpsw_resume_data_pass(struct net_device *ndev) } /* Resume transmit for every affected interface */ - for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) - if (slave->ndev && netif_running(slave->ndev)) - netif_tx_start_all_queues(slave->ndev); + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (ndev && netif_running(ndev)) + netif_tx_start_all_queues(ndev); + } return 0; } @@ -581,14 +578,26 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx, return 0; } +static void cpsw_fail(struct cpsw_common *cpsw) +{ + struct net_device *ndev; + int i; + + for (i = 0; i < cpsw->data.slaves; i++) { + ndev = cpsw->slaves[i].ndev; + if (ndev) + dev_close(ndev); + } +} + int cpsw_set_channels_common(struct net_device *ndev, struct ethtool_channels *chs, cpdma_handler_fn rx_handler) { struct cpsw_priv *priv = netdev_priv(ndev); struct cpsw_common *cpsw = priv->cpsw; - struct cpsw_slave *slave; - int i, ret; + struct net_device *sl_ndev; + int i, new_pools, ret; ret = cpsw_check_ch_settings(cpsw, chs); if (ret < 0) @@ -596,6 +605,8 @@ int cpsw_set_channels_common(struct net_device *ndev, cpsw_suspend_data_pass(ndev); + new_pools = (chs->rx_count != cpsw->rx_ch_num) && cpsw->usage_count; + ret = cpsw_update_channels_res(priv, chs->rx_count, 1, rx_handler); if (ret) goto err; @@ -604,35 +615,40 @@ int cpsw_set_channels_common(struct net_device *ndev, if (ret) goto err; - for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { - if (!(slave->ndev && netif_running(slave->ndev))) + for (i = 0; i < cpsw->data.slaves; i++) { + sl_ndev = cpsw->slaves[i].ndev; + if (!(sl_ndev && netif_running(sl_ndev))) continue; /* Inform stack about new count of queues */ - ret = netif_set_real_num_tx_queues(slave->ndev, - cpsw->tx_ch_num); + ret = netif_set_real_num_tx_queues(sl_ndev, cpsw->tx_ch_num); if (ret) { dev_err(priv->dev, "cannot set real number of tx queues\n"); goto err; } - ret = netif_set_real_num_rx_queues(slave->ndev, - cpsw->rx_ch_num); + ret = netif_set_real_num_rx_queues(sl_ndev, cpsw->rx_ch_num); if (ret) { dev_err(priv->dev, "cannot set real number of rx queues\n"); goto err; } } - if (cpsw->usage_count) - cpsw_split_res(cpsw); + cpsw_split_res(cpsw); + + if (new_pools) { + cpsw_destroy_xdp_rxqs(cpsw); + ret = cpsw_create_xdp_rxqs(cpsw); + if (ret) + goto err; + } ret = cpsw_resume_data_pass(ndev); if (!ret) return 0; err: dev_err(priv->dev, "cannot update channels number, closing device\n"); - dev_close(ndev); + cpsw_fail(cpsw); return ret; } @@ -652,9 +668,8 @@ void cpsw_get_ringparam(struct net_device *ndev, int cpsw_set_ringparam(struct net_device *ndev, struct ethtool_ringparam *ering) { - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpsw_common *cpsw = priv->cpsw; - int ret; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + int descs_num, ret; /* ignore ering->tx_pending - only rx_pending adjustment is supported */ @@ -663,22 +678,34 @@ int cpsw_set_ringparam(struct net_device *ndev, ering->rx_pending > (cpsw->descs_pool_size - CPSW_MAX_QUEUES)) return -EINVAL; - if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma)) + descs_num = cpdma_get_num_rx_descs(cpsw->dma); + if (ering->rx_pending == descs_num) return 0; cpsw_suspend_data_pass(ndev); - cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending); + ret = cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending); + if (ret) { + if (cpsw_resume_data_pass(ndev)) + goto err; + + return ret; + } - if (cpsw->usage_count) - cpdma_chan_split_pool(cpsw->dma); + if (cpsw->usage_count) { + cpsw_destroy_xdp_rxqs(cpsw); + ret = cpsw_create_xdp_rxqs(cpsw); + if (ret) + goto err; + } ret = cpsw_resume_data_pass(ndev); if (!ret) return 0; - +err: + cpdma_set_num_rx_descs(cpsw->dma, descs_num); dev_err(cpsw->dev, "cannot set ring params, closing device\n"); - dev_close(ndev); + cpsw_fail(cpsw); return ret; } diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 04795b97ee71..362c5a986869 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -272,6 +272,7 @@ struct cpsw_host_regs { }; struct cpsw_slave_data { + struct device_node *slave_node; struct device_node *phy_node; char phy_id[MII_BUS_ID_SIZE]; int phy_if; @@ -346,6 +347,7 @@ struct cpsw_common { int rx_ch_num, tx_ch_num; int speed; int usage_count; + struct page_pool *page_pool[CPSW_MAX_QUEUES]; }; struct cpsw_priv { @@ -360,6 +362,10 @@ struct cpsw_priv { int shp_cfg_speed; int tx_ts_enabled; int rx_ts_enabled; + struct bpf_prog *xdp_prog; + struct xdp_rxq_info xdp_rxq[CPSW_MAX_QUEUES]; + struct xdp_attachment_info xdpi; + u32 emac_port; struct cpsw_common *cpsw; }; @@ -391,6 +397,8 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv); void cpsw_intr_enable(struct cpsw_common *cpsw); void cpsw_intr_disable(struct cpsw_common *cpsw); void cpsw_tx_handler(void *token, int len, int status); +int cpsw_create_xdp_rxqs(struct cpsw_common *cpsw); +void cpsw_destroy_xdp_rxqs(struct cpsw_common *cpsw); /* ethtool */ u32 cpsw_get_msglevel(struct net_device *ndev); diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index e257018ada71..61136428e2c0 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -5,6 +5,7 @@ * Copyright (C) 2012 Richard Cochran <richardcochran@gmail.com> * */ +#include <linux/clk-provider.h> #include <linux/err.h> #include <linux/if.h> #include <linux/hrtimer.h> @@ -532,6 +533,82 @@ static void cpts_calc_mult_shift(struct cpts *cpts) freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC)); } +static int cpts_of_mux_clk_setup(struct cpts *cpts, struct device_node *node) +{ + struct device_node *refclk_np; + const char **parent_names; + unsigned int num_parents; + struct clk_hw *clk_hw; + int ret = -EINVAL; + u32 *mux_table; + + refclk_np = of_get_child_by_name(node, "cpts-refclk-mux"); + if (!refclk_np) + /* refclk selection supported not for all SoCs */ + return 0; + + num_parents = of_clk_get_parent_count(refclk_np); + if (num_parents < 1) { + dev_err(cpts->dev, "mux-clock %s must have parents\n", + refclk_np->name); + goto mux_fail; + } + + parent_names = devm_kzalloc(cpts->dev, (sizeof(char *) * num_parents), + GFP_KERNEL); + + mux_table = devm_kzalloc(cpts->dev, sizeof(*mux_table) * num_parents, + GFP_KERNEL); + if (!mux_table || !parent_names) { + ret = -ENOMEM; + goto mux_fail; + } + + of_clk_parent_fill(refclk_np, parent_names, num_parents); + + ret = of_property_read_variable_u32_array(refclk_np, "ti,mux-tbl", + mux_table, + num_parents, num_parents); + if (ret < 0) + goto mux_fail; + + clk_hw = clk_hw_register_mux_table(cpts->dev, refclk_np->name, + parent_names, num_parents, + 0, + &cpts->reg->rftclk_sel, 0, 0x1F, + 0, mux_table, NULL); + if (IS_ERR(clk_hw)) { + ret = PTR_ERR(clk_hw); + goto mux_fail; + } + + ret = devm_add_action_or_reset(cpts->dev, + (void(*)(void *))clk_hw_unregister_mux, + clk_hw); + if (ret) { + dev_err(cpts->dev, "add clkmux unreg action %d", ret); + goto mux_fail; + } + + ret = of_clk_add_hw_provider(refclk_np, of_clk_hw_simple_get, clk_hw); + if (ret) + goto mux_fail; + + ret = devm_add_action_or_reset(cpts->dev, + (void(*)(void *))of_clk_del_provider, + refclk_np); + if (ret) { + dev_err(cpts->dev, "add clkmux provider unreg action %d", ret); + goto mux_fail; + } + + return ret; + +mux_fail: + of_node_put(refclk_np); + return ret; +} + static int cpts_of_parse(struct cpts *cpts, struct device_node *node) { int ret = -EINVAL; @@ -547,7 +624,7 @@ static int cpts_of_parse(struct cpts *cpts, struct device_node *node) (!cpts->cc.mult && cpts->cc.shift)) goto of_error; - return 0; + return cpts_of_mux_clk_setup(cpts, node); of_error: dev_err(cpts->dev, "CPTS: Missing property in the DT.\n"); @@ -572,9 +649,14 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, if (ret) return ERR_PTR(ret); - cpts->refclk = devm_clk_get(dev, "cpts"); + cpts->refclk = devm_get_clk_from_child(dev, node, "cpts"); + if (IS_ERR(cpts->refclk)) + /* try get clk from dev node for compatibility */ + cpts->refclk = devm_clk_get(dev, "cpts"); + if (IS_ERR(cpts->refclk)) { - dev_err(dev, "Failed to get cpts refclk\n"); + dev_err(dev, "Failed to get cpts refclk %ld\n", + PTR_ERR(cpts->refclk)); return ERR_CAST(cpts->refclk); } diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h index 024aab6af12f..bb997c11ee15 100644 --- a/drivers/net/ethernet/ti/cpts.h +++ b/drivers/net/ethernet/ti/cpts.h @@ -24,7 +24,7 @@ struct cpsw_cpts { u32 idver; /* Identification and version */ u32 control; /* Time sync control */ - u32 res1; + u32 rftclk_sel; /* Reference Clock Select Register */ u32 ts_push; /* Time stamp event push */ u32 ts_load_val; /* Time stamp load value */ u32 ts_load_en; /* Time stamp load enable */ diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 35bf14d8e7af..0ca2a1a254de 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -134,6 +134,15 @@ struct cpdma_control_info { #define ACCESS_RW (ACCESS_RO | ACCESS_WO) }; +struct submit_info { + struct cpdma_chan *chan; + int directed; + void *token; + void *data; + int flags; + int len; +}; + static struct cpdma_control_info controls[] = { [CPDMA_TX_RLIM] = {CPDMA_DMACONTROL, 8, 0xffff, ACCESS_RW}, [CPDMA_CMD_IDLE] = {CPDMA_DMACONTROL, 3, 1, ACCESS_WO}, @@ -176,6 +185,8 @@ static struct cpdma_control_info controls[] = { (directed << CPDMA_TO_PORT_SHIFT)); \ } while (0) +#define CPDMA_DMA_EXT_MAP BIT(16) + static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr) { struct cpdma_desc_pool *pool = ctlr->pool; @@ -1002,34 +1013,26 @@ static void __cpdma_chan_submit(struct cpdma_chan *chan, } } -int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, - int len, int directed) +static int cpdma_chan_submit_si(struct submit_info *si) { + struct cpdma_chan *chan = si->chan; struct cpdma_ctlr *ctlr = chan->ctlr; + int len = si->len; + int swlen = len; struct cpdma_desc __iomem *desc; dma_addr_t buffer; - unsigned long flags; u32 mode; - int ret = 0; - - spin_lock_irqsave(&chan->lock, flags); - - if (chan->state == CPDMA_STATE_TEARDOWN) { - ret = -EINVAL; - goto unlock_ret; - } + int ret; if (chan->count >= chan->desc_num) { chan->stats.desc_alloc_fail++; - ret = -ENOMEM; - goto unlock_ret; + return -ENOMEM; } desc = cpdma_desc_alloc(ctlr->pool); if (!desc) { chan->stats.desc_alloc_fail++; - ret = -ENOMEM; - goto unlock_ret; + return -ENOMEM; } if (len < ctlr->params.min_packet_size) { @@ -1037,16 +1040,21 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, chan->stats.runt_transmit_buff++; } - buffer = dma_map_single(ctlr->dev, data, len, chan->dir); - ret = dma_mapping_error(ctlr->dev, buffer); - if (ret) { - cpdma_desc_free(ctlr->pool, desc, 1); - ret = -EINVAL; - goto unlock_ret; - } - mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP; - cpdma_desc_to_port(chan, mode, directed); + cpdma_desc_to_port(chan, mode, si->directed); + + if (si->flags & CPDMA_DMA_EXT_MAP) { + buffer = (dma_addr_t)si->data; + dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir); + swlen |= CPDMA_DMA_EXT_MAP; + } else { + buffer = dma_map_single(ctlr->dev, si->data, len, chan->dir); + ret = dma_mapping_error(ctlr->dev, buffer); + if (ret) { + cpdma_desc_free(ctlr->pool, desc, 1); + return -EINVAL; + } + } /* Relaxed IO accessors can be used here as there is read barrier * at the end of write sequence. @@ -1055,9 +1063,9 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, writel_relaxed(buffer, &desc->hw_buffer); writel_relaxed(len, &desc->hw_len); writel_relaxed(mode | len, &desc->hw_mode); - writel_relaxed((uintptr_t)token, &desc->sw_token); + writel_relaxed((uintptr_t)si->token, &desc->sw_token); writel_relaxed(buffer, &desc->sw_buffer); - writel_relaxed(len, &desc->sw_len); + writel_relaxed(swlen, &desc->sw_len); desc_read(desc, sw_len); __cpdma_chan_submit(chan, desc); @@ -1066,8 +1074,105 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, chan_write(chan, rxfree, 1); chan->count++; + return 0; +} -unlock_ret: +int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data, + int len, int directed) +{ + struct submit_info si; + unsigned long flags; + int ret; + + si.chan = chan; + si.token = token; + si.data = data; + si.len = len; + si.directed = directed; + si.flags = 0; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->state == CPDMA_STATE_TEARDOWN) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EINVAL; + } + + ret = cpdma_chan_submit_si(&si); + spin_unlock_irqrestore(&chan->lock, flags); + return ret; +} + +int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed) +{ + struct submit_info si; + unsigned long flags; + int ret; + + si.chan = chan; + si.token = token; + si.data = (void *)data; + si.len = len; + si.directed = directed; + si.flags = CPDMA_DMA_EXT_MAP; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->state == CPDMA_STATE_TEARDOWN) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EINVAL; + } + + ret = cpdma_chan_submit_si(&si); + spin_unlock_irqrestore(&chan->lock, flags); + return ret; +} + +int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, + int len, int directed) +{ + struct submit_info si; + unsigned long flags; + int ret; + + si.chan = chan; + si.token = token; + si.data = data; + si.len = len; + si.directed = directed; + si.flags = 0; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->state != CPDMA_STATE_ACTIVE) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EINVAL; + } + + ret = cpdma_chan_submit_si(&si); + spin_unlock_irqrestore(&chan->lock, flags); + return ret; +} + +int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed) +{ + struct submit_info si; + unsigned long flags; + int ret; + + si.chan = chan; + si.token = token; + si.data = (void *)data; + si.len = len; + si.directed = directed; + si.flags = CPDMA_DMA_EXT_MAP; + + spin_lock_irqsave(&chan->lock, flags); + if (chan->state != CPDMA_STATE_ACTIVE) { + spin_unlock_irqrestore(&chan->lock, flags); + return -EINVAL; + } + + ret = cpdma_chan_submit_si(&si); spin_unlock_irqrestore(&chan->lock, flags); return ret; } @@ -1097,10 +1202,17 @@ static void __cpdma_chan_free(struct cpdma_chan *chan, uintptr_t token; token = desc_read(desc, sw_token); - buff_dma = desc_read(desc, sw_buffer); origlen = desc_read(desc, sw_len); - dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir); + buff_dma = desc_read(desc, sw_buffer); + if (origlen & CPDMA_DMA_EXT_MAP) { + origlen &= ~CPDMA_DMA_EXT_MAP; + dma_sync_single_for_cpu(ctlr->dev, buff_dma, origlen, + chan->dir); + } else { + dma_unmap_single(ctlr->dev, buff_dma, origlen, chan->dir); + } + cpdma_desc_free(pool, desc, 1); (*chan->handler)((void *)token, outlen, status); } @@ -1311,8 +1423,23 @@ int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr) return ctlr->num_tx_desc; } -void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc) +int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc) { + unsigned long flags; + int temp, ret; + + spin_lock_irqsave(&ctlr->lock, flags); + + temp = ctlr->num_rx_desc; ctlr->num_rx_desc = num_rx_desc; ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc; + ret = cpdma_chan_split_pool(ctlr); + if (ret) { + ctlr->num_rx_desc = temp; + ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc; + } + + spin_unlock_irqrestore(&ctlr->lock, flags); + + return ret; } diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 10376062dafa..d3cfe234d16a 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -77,8 +77,14 @@ int cpdma_chan_stop(struct cpdma_chan *chan); int cpdma_chan_get_stats(struct cpdma_chan *chan, struct cpdma_chan_stats *stats); +int cpdma_chan_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed); int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, int len, int directed); +int cpdma_chan_idle_submit_mapped(struct cpdma_chan *chan, void *token, + dma_addr_t data, int len, int directed); +int cpdma_chan_idle_submit(struct cpdma_chan *chan, void *token, void *data, + int len, int directed); int cpdma_chan_process(struct cpdma_chan *chan, int quota); int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable); @@ -110,8 +116,7 @@ enum cpdma_control { int cpdma_control_get(struct cpdma_ctlr *ctlr, int control); int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value); int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr); -void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc); +int cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc); int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr); -int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr); #endif diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 4bf65cab79e6..5f4ece0d5a73 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1428,8 +1428,8 @@ static int emac_dev_open(struct net_device *ndev) if (!skb) break; - ret = cpdma_chan_submit(priv->rxchan, skb, skb->data, - skb_tailroom(skb), 0); + ret = cpdma_chan_idle_submit(priv->rxchan, skb, skb->data, + skb_tailroom(skb), 0); if (WARN_ON(ret < 0)) break; } diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index ec179700c184..2c1fac33136c 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3554,7 +3554,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev, static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, struct device_node *node, void **inst_priv) { - struct device_node *interfaces, *interface; + struct device_node *interfaces, *interface, *cpts_node; struct device_node *secondary_ports; struct cpsw_ale_params ale_params; struct gbe_priv *gbe_dev; @@ -3713,7 +3713,12 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, dev_dbg(gbe_dev->dev, "Created a gbe ale engine\n"); } - gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, node); + cpts_node = of_get_child_by_name(node, "cpts"); + if (!cpts_node) + cpts_node = of_node_get(node); + + gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, cpts_node); + of_node_put(cpts_node); if (IS_ENABLED(CONFIG_TI_CPTS) && IS_ERR(gbe_dev->cpts)) { ret = PTR_ERR(gbe_dev->cpts); goto free_sec_ports; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 3ecddb72f45a..051033580f0a 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -301,7 +301,7 @@ struct gelic_card { */ unsigned int irq; struct gelic_descr *tx_top, *rx_top; - struct gelic_descr descr[0]; /* must be the last */ + struct gelic_descr descr[]; /* must be the last */ }; struct gelic_port { diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h index c0ecc6c7b5e0..cdfe7809e3c1 100644 --- a/drivers/net/ethernet/via/via-velocity.h +++ b/drivers/net/ethernet/via/via-velocity.h @@ -1509,7 +1509,7 @@ static inline int velocity_get_ip(struct velocity_info *vptr) rcu_read_lock(); in_dev = __in_dev_get_rcu(vptr->netdev); if (in_dev != NULL) { - ifa = (struct in_ifaddr *) in_dev->ifa_list; + ifa = rcu_dereference(in_dev->ifa_list); if (ifa != NULL) { memcpy(vptr->ip_addr, &ifa->ifa_address, 4); res = 0; diff --git a/drivers/net/ethernet/wiznet/w5100-spi.c b/drivers/net/ethernet/wiznet/w5100-spi.c index 918b3e50850a..2b4126d2427d 100644 --- a/drivers/net/ethernet/wiznet/w5100-spi.c +++ b/drivers/net/ethernet/wiznet/w5100-spi.c @@ -15,6 +15,7 @@ #include <linux/delay.h> #include <linux/netdevice.h> #include <linux/of_net.h> +#include <linux/of_device.h> #include <linux/spi/spi.h> #include "w5100.h" @@ -409,14 +410,32 @@ static const struct w5100_ops w5500_ops = { .init = w5500_spi_init, }; +static const struct of_device_id w5100_of_match[] = { + { .compatible = "wiznet,w5100", .data = (const void*)W5100, }, + { .compatible = "wiznet,w5200", .data = (const void*)W5200, }, + { .compatible = "wiznet,w5500", .data = (const void*)W5500, }, + { }, +}; +MODULE_DEVICE_TABLE(of, w5100_of_match); + static int w5100_spi_probe(struct spi_device *spi) { - const struct spi_device_id *id = spi_get_device_id(spi); + const struct of_device_id *of_id; const struct w5100_ops *ops; + kernel_ulong_t driver_data; int priv_size; const void *mac = of_get_mac_address(spi->dev.of_node); - switch (id->driver_data) { + if (spi->dev.of_node) { + of_id = of_match_device(w5100_of_match, &spi->dev); + if (!of_id) + return -ENODEV; + driver_data = (kernel_ulong_t)of_id->data; + } else { + driver_data = spi_get_device_id(spi)->driver_data; + } + + switch (driver_data) { case W5100: ops = &w5100_spi_ops; priv_size = 0; @@ -453,6 +472,7 @@ static struct spi_driver w5100_spi_driver = { .driver = { .name = "w5100", .pm = &w5100_pm_ops, + .of_match_table = w5100_of_match, }, .probe = w5100_spi_probe, .remove = w5100_spi_remove, diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index af96e05c5bcd..8d994cebb6b0 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -6,7 +6,7 @@ config NET_VENDOR_XILINX bool "Xilinx devices" default y - depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS || X86 || COMPILE_TEST + depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS || X86 || ARM || COMPILE_TEST ---help--- If you have a network (Ethernet) card belonging to this class, say Y. @@ -26,8 +26,8 @@ config XILINX_EMACLITE config XILINX_AXI_EMAC tristate "Xilinx 10/100/1000 AXI Ethernet support" - depends on MICROBLAZE - select PHYLIB + depends on MICROBLAZE || X86 || ARM || COMPILE_TEST + select PHYLINK ---help--- This driver supports the 10/100/1000 Ethernet from Xilinx for the AXI bus interface used in Xilinx Virtex FPGAs. diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h index 1aeda084b8f1..276292bca334 100644 --- a/drivers/net/ethernet/xilinx/ll_temac.h +++ b/drivers/net/ethernet/xilinx/ll_temac.h @@ -361,7 +361,7 @@ struct temac_local { /* For synchronization of indirect register access. Must be * shared mutex between interfaces in same TEMAC block. */ - struct mutex *indirect_mutex; + spinlock_t *indirect_lock; u32 options; /* Current options word */ int last_link; unsigned int temac_features; @@ -388,8 +388,9 @@ struct temac_local { /* xilinx_temac.c */ int temac_indirect_busywait(struct temac_local *lp); u32 temac_indirect_in32(struct temac_local *lp, int reg); +u32 temac_indirect_in32_locked(struct temac_local *lp, int reg); void temac_indirect_out32(struct temac_local *lp, int reg, u32 value); - +void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value); /* xilinx_temac_mdio.c */ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 14870d659f7d..21c1b4322ea7 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -22,7 +22,6 @@ * * TODO: * - Factor out locallink DMA code into separate driver - * - Fix multicast assignment. * - Fix support for hardware checksumming. * - Testing. Lots and lots of testing. * @@ -53,6 +52,7 @@ #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/dma-mapping.h> +#include <linux/processor.h> #include <linux/platform_data/xilinx-ll-temac.h> #include "ll_temac.h" @@ -84,51 +84,118 @@ static void _temac_iow_le(struct temac_local *lp, int offset, u32 value) return iowrite32(value, lp->regs + offset); } +static bool hard_acs_rdy(struct temac_local *lp) +{ + return temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK; +} + +static bool hard_acs_rdy_or_timeout(struct temac_local *lp, ktime_t timeout) +{ + ktime_t cur = ktime_get(); + + return hard_acs_rdy(lp) || ktime_after(cur, timeout); +} + +/* Poll for maximum 20 ms. This is similar to the 2 jiffies @ 100 Hz + * that was used before, and should cover MDIO bus speed down to 3200 + * Hz. + */ +#define HARD_ACS_RDY_POLL_NS (20 * NSEC_PER_MSEC) + +/** + * temac_indirect_busywait - Wait for current indirect register access + * to complete. + */ int temac_indirect_busywait(struct temac_local *lp) { - unsigned long end = jiffies + 2; + ktime_t timeout = ktime_add_ns(ktime_get(), HARD_ACS_RDY_POLL_NS); - while (!(temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK)) { - if (time_before_eq(end, jiffies)) { - WARN_ON(1); - return -ETIMEDOUT; - } - usleep_range(500, 1000); - } - return 0; + spin_until_cond(hard_acs_rdy_or_timeout(lp, timeout)); + if (WARN_ON(!hard_acs_rdy(lp))) + return -ETIMEDOUT; + else + return 0; } /** - * temac_indirect_in32 - * - * lp->indirect_mutex must be held when calling this function + * temac_indirect_in32 - Indirect register read access. This function + * must be called without lp->indirect_lock being held. */ u32 temac_indirect_in32(struct temac_local *lp, int reg) { - u32 val; + unsigned long flags; + int val; + + spin_lock_irqsave(lp->indirect_lock, flags); + val = temac_indirect_in32_locked(lp, reg); + spin_unlock_irqrestore(lp->indirect_lock, flags); + return val; +} - if (temac_indirect_busywait(lp)) +/** + * temac_indirect_in32_locked - Indirect register read access. This + * function must be called with lp->indirect_lock being held. Use + * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid + * repeated lock/unlock and to ensure uninterrupted access to indirect + * registers. + */ +u32 temac_indirect_in32_locked(struct temac_local *lp, int reg) +{ + /* This initial wait should normally not spin, as we always + * try to wait for indirect access to complete before + * releasing the indirect_lock. + */ + if (WARN_ON(temac_indirect_busywait(lp))) return -ETIMEDOUT; + /* Initiate read from indirect register */ temac_iow(lp, XTE_CTL0_OFFSET, reg); - if (temac_indirect_busywait(lp)) + /* Wait for indirect register access to complete. We really + * should not see timeouts, and could even end up causing + * problem for following indirect access, so let's make a bit + * of WARN noise. + */ + if (WARN_ON(temac_indirect_busywait(lp))) return -ETIMEDOUT; - val = temac_ior(lp, XTE_LSW0_OFFSET); - - return val; + /* Value is ready now */ + return temac_ior(lp, XTE_LSW0_OFFSET); } /** - * temac_indirect_out32 - * - * lp->indirect_mutex must be held when calling this function + * temac_indirect_out32 - Indirect register write access. This function + * must be called without lp->indirect_lock being held. */ void temac_indirect_out32(struct temac_local *lp, int reg, u32 value) { - if (temac_indirect_busywait(lp)) + unsigned long flags; + + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, reg, value); + spin_unlock_irqrestore(lp->indirect_lock, flags); +} + +/** + * temac_indirect_out32_locked - Indirect register write access. This + * function must be called with lp->indirect_lock being held. Use + * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid + * repeated lock/unlock and to ensure uninterrupted access to indirect + * registers. + */ +void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value) +{ + /* As in temac_indirect_in32_locked(), we should normally not + * spin here. And if it happens, we actually end up silently + * ignoring the write request. Ouch. + */ + if (WARN_ON(temac_indirect_busywait(lp))) return; + /* Initiate write to indirect register */ temac_iow(lp, XTE_LSW0_OFFSET, value); temac_iow(lp, XTE_CTL0_OFFSET, CNTLREG_WRITE_ENABLE_MASK | reg); - temac_indirect_busywait(lp); + /* As in temac_indirect_in32_locked(), we should not see timeouts + * here. And if it happens, we continue before the write has + * completed. Not good. + */ + WARN_ON(temac_indirect_busywait(lp)); } /** @@ -344,20 +411,21 @@ out: static void temac_do_set_mac_address(struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); + unsigned long flags; /* set up unicast MAC address filter set its mac address */ - mutex_lock(lp->indirect_mutex); - temac_indirect_out32(lp, XTE_UAW0_OFFSET, - (ndev->dev_addr[0]) | - (ndev->dev_addr[1] << 8) | - (ndev->dev_addr[2] << 16) | - (ndev->dev_addr[3] << 24)); + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, XTE_UAW0_OFFSET, + (ndev->dev_addr[0]) | + (ndev->dev_addr[1] << 8) | + (ndev->dev_addr[2] << 16) | + (ndev->dev_addr[3] << 24)); /* There are reserved bits in EUAW1 * so don't affect them Set MAC bits [47:32] in EUAW1 */ - temac_indirect_out32(lp, XTE_UAW1_OFFSET, - (ndev->dev_addr[4] & 0x000000ff) | - (ndev->dev_addr[5] << 8)); - mutex_unlock(lp->indirect_mutex); + temac_indirect_out32_locked(lp, XTE_UAW1_OFFSET, + (ndev->dev_addr[4] & 0x000000ff) | + (ndev->dev_addr[5] << 8)); + spin_unlock_irqrestore(lp->indirect_lock, flags); } static int temac_init_mac_address(struct net_device *ndev, const void *address) @@ -383,49 +451,58 @@ static int temac_set_mac_address(struct net_device *ndev, void *p) static void temac_set_multicast_list(struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); - u32 multi_addr_msw, multi_addr_lsw, val; - int i; + u32 multi_addr_msw, multi_addr_lsw; + int i = 0; + unsigned long flags; + bool promisc_mode_disabled = false; - mutex_lock(lp->indirect_mutex); - if (ndev->flags & (IFF_ALLMULTI | IFF_PROMISC) || - netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM) { - /* - * We must make the kernel realise we had to move - * into promisc mode or we start all out war on - * the cable. If it was a promisc request the - * flag is already set. If not we assert it. - */ - ndev->flags |= IFF_PROMISC; + if (ndev->flags & (IFF_PROMISC | IFF_ALLMULTI) || + (netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM)) { temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK); dev_info(&ndev->dev, "Promiscuous mode enabled.\n"); - } else if (!netdev_mc_empty(ndev)) { + return; + } + + spin_lock_irqsave(lp->indirect_lock, flags); + + if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; - i = 0; netdev_for_each_mc_addr(ha, ndev) { - if (i >= MULTICAST_CAM_TABLE_NUM) + if (WARN_ON(i >= MULTICAST_CAM_TABLE_NUM)) break; multi_addr_msw = ((ha->addr[3] << 24) | (ha->addr[2] << 16) | (ha->addr[1] << 8) | (ha->addr[0])); - temac_indirect_out32(lp, XTE_MAW0_OFFSET, - multi_addr_msw); + temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, + multi_addr_msw); multi_addr_lsw = ((ha->addr[5] << 8) | (ha->addr[4]) | (i << 16)); - temac_indirect_out32(lp, XTE_MAW1_OFFSET, - multi_addr_lsw); + temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, + multi_addr_lsw); i++; } - } else { - val = temac_indirect_in32(lp, XTE_AFM_OFFSET); - temac_indirect_out32(lp, XTE_AFM_OFFSET, - val & ~XTE_AFM_EPPRM_MASK); - temac_indirect_out32(lp, XTE_MAW0_OFFSET, 0); - temac_indirect_out32(lp, XTE_MAW1_OFFSET, 0); - dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); } - mutex_unlock(lp->indirect_mutex); + + /* Clear all or remaining/unused address table entries */ + while (i < MULTICAST_CAM_TABLE_NUM) { + temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, i << 16); + i++; + } + + /* Enable address filter block if currently disabled */ + if (temac_indirect_in32_locked(lp, XTE_AFM_OFFSET) + & XTE_AFM_EPPRM_MASK) { + temac_indirect_out32_locked(lp, XTE_AFM_OFFSET, 0); + promisc_mode_disabled = true; + } + + spin_unlock_irqrestore(lp->indirect_lock, flags); + + if (promisc_mode_disabled) + dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); } static struct temac_option { @@ -516,17 +593,19 @@ static u32 temac_setoptions(struct net_device *ndev, u32 options) struct temac_local *lp = netdev_priv(ndev); struct temac_option *tp = &temac_options[0]; int reg; + unsigned long flags; - mutex_lock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); while (tp->opt) { - reg = temac_indirect_in32(lp, tp->reg) & ~tp->m_or; - if (options & tp->opt) + reg = temac_indirect_in32_locked(lp, tp->reg) & ~tp->m_or; + if (options & tp->opt) { reg |= tp->m_or; - temac_indirect_out32(lp, tp->reg, reg); + temac_indirect_out32_locked(lp, tp->reg, reg); + } tp++; } + spin_unlock_irqrestore(lp->indirect_lock, flags); lp->options |= options; - mutex_unlock(lp->indirect_mutex); return 0; } @@ -537,6 +616,7 @@ static void temac_device_reset(struct net_device *ndev) struct temac_local *lp = netdev_priv(ndev); u32 timeout; u32 val; + unsigned long flags; /* Perform a software reset */ @@ -545,7 +625,6 @@ static void temac_device_reset(struct net_device *ndev) dev_dbg(&ndev->dev, "%s()\n", __func__); - mutex_lock(lp->indirect_mutex); /* Reset the receiver and wait for it to finish reset */ temac_indirect_out32(lp, XTE_RXC1_OFFSET, XTE_RXC1_RXRST_MASK); timeout = 1000; @@ -571,8 +650,11 @@ static void temac_device_reset(struct net_device *ndev) } /* Disable the receiver */ - val = temac_indirect_in32(lp, XTE_RXC1_OFFSET); - temac_indirect_out32(lp, XTE_RXC1_OFFSET, val & ~XTE_RXC1_RXEN_MASK); + spin_lock_irqsave(lp->indirect_lock, flags); + val = temac_indirect_in32_locked(lp, XTE_RXC1_OFFSET); + temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, + val & ~XTE_RXC1_RXEN_MASK); + spin_unlock_irqrestore(lp->indirect_lock, flags); /* Reset Local Link (DMA) */ lp->dma_out(lp, DMA_CONTROL_REG, DMA_CONTROL_RST); @@ -592,12 +674,12 @@ static void temac_device_reset(struct net_device *ndev) "temac_device_reset descriptor allocation failed\n"); } - temac_indirect_out32(lp, XTE_RXC0_OFFSET, 0); - temac_indirect_out32(lp, XTE_RXC1_OFFSET, 0); - temac_indirect_out32(lp, XTE_TXC_OFFSET, 0); - temac_indirect_out32(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK); - - mutex_unlock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, XTE_RXC0_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_TXC_OFFSET, 0); + temac_indirect_out32_locked(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK); + spin_unlock_irqrestore(lp->indirect_lock, flags); /* Sync default options with HW * but leave receiver and transmitter disabled. */ @@ -621,13 +703,14 @@ static void temac_adjust_link(struct net_device *ndev) struct phy_device *phy = ndev->phydev; u32 mii_speed; int link_state; + unsigned long flags; /* hash together the state values to decide if something has changed */ link_state = phy->speed | (phy->duplex << 1) | phy->link; - mutex_lock(lp->indirect_mutex); if (lp->last_link != link_state) { - mii_speed = temac_indirect_in32(lp, XTE_EMCFG_OFFSET); + spin_lock_irqsave(lp->indirect_lock, flags); + mii_speed = temac_indirect_in32_locked(lp, XTE_EMCFG_OFFSET); mii_speed &= ~XTE_EMCFG_LINKSPD_MASK; switch (phy->speed) { @@ -637,11 +720,12 @@ static void temac_adjust_link(struct net_device *ndev) } /* Write new speed setting out to TEMAC */ - temac_indirect_out32(lp, XTE_EMCFG_OFFSET, mii_speed); + temac_indirect_out32_locked(lp, XTE_EMCFG_OFFSET, mii_speed); + spin_unlock_irqrestore(lp->indirect_lock, flags); + lp->last_link = link_state; phy_print_status(phy); } - mutex_unlock(lp->indirect_mutex); } #ifdef CONFIG_64BIT @@ -1011,6 +1095,7 @@ static const struct net_device_ops temac_netdev_ops = { .ndo_open = temac_open, .ndo_stop = temac_stop, .ndo_start_xmit = temac_start_xmit, + .ndo_set_rx_mode = temac_set_multicast_list, .ndo_set_mac_address = temac_set_mac_address, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = temac_ioctl, @@ -1076,7 +1161,6 @@ static int temac_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); SET_NETDEV_DEV(ndev, &pdev->dev); - ndev->flags &= ~IFF_MULTICAST; /* clear multicast */ ndev->features = NETIF_F_SG; ndev->netdev_ops = &temac_netdev_ops; ndev->ethtool_ops = &temac_ethtool_ops; @@ -1103,17 +1187,17 @@ static int temac_probe(struct platform_device *pdev) /* Setup mutex for synchronization of indirect register access */ if (pdata) { - if (!pdata->indirect_mutex) { + if (!pdata->indirect_lock) { dev_err(&pdev->dev, - "indirect_mutex missing in platform_data\n"); + "indirect_lock missing in platform_data\n"); return -EINVAL; } - lp->indirect_mutex = pdata->indirect_mutex; + lp->indirect_lock = pdata->indirect_lock; } else { - lp->indirect_mutex = devm_kmalloc(&pdev->dev, - sizeof(*lp->indirect_mutex), - GFP_KERNEL); - mutex_init(lp->indirect_mutex); + lp->indirect_lock = devm_kmalloc(&pdev->dev, + sizeof(*lp->indirect_lock), + GFP_KERNEL); + spin_lock_init(lp->indirect_lock); } /* map device registers */ diff --git a/drivers/net/ethernet/xilinx/ll_temac_mdio.c b/drivers/net/ethernet/xilinx/ll_temac_mdio.c index a4667326f745..6fd2dea4e60f 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_mdio.c +++ b/drivers/net/ethernet/xilinx/ll_temac_mdio.c @@ -25,14 +25,15 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg) { struct temac_local *lp = bus->priv; u32 rc; + unsigned long flags; /* Write the PHY address to the MIIM Access Initiator register. * When the transfer completes, the PHY register value will appear * in the LSW0 register */ - mutex_lock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); temac_iow(lp, XTE_LSW0_OFFSET, (phy_id << 5) | reg); - rc = temac_indirect_in32(lp, XTE_MIIMAI_OFFSET); - mutex_unlock(lp->indirect_mutex); + rc = temac_indirect_in32_locked(lp, XTE_MIIMAI_OFFSET); + spin_unlock_irqrestore(lp->indirect_lock, flags); dev_dbg(lp->dev, "temac_mdio_read(phy_id=%i, reg=%x) == %x\n", phy_id, reg, rc); @@ -43,6 +44,7 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg) static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) { struct temac_local *lp = bus->priv; + unsigned long flags; dev_dbg(lp->dev, "temac_mdio_write(phy_id=%i, reg=%x, val=%x)\n", phy_id, reg, val); @@ -50,10 +52,10 @@ static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val) /* First write the desired value into the write data register * and then write the address into the access initiator register */ - mutex_lock(lp->indirect_mutex); - temac_indirect_out32(lp, XTE_MGTDR_OFFSET, val); - temac_indirect_out32(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg); - mutex_unlock(lp->indirect_mutex); + spin_lock_irqsave(lp->indirect_lock, flags); + temac_indirect_out32_locked(lp, XTE_MGTDR_OFFSET, val); + temac_indirect_out32_locked(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg); + spin_unlock_irqrestore(lp->indirect_lock, flags); return 0; } @@ -87,9 +89,7 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev) /* Enable the MDIO bus by asserting the enable bit and writing * in the clock config */ - mutex_lock(lp->indirect_mutex); temac_indirect_out32(lp, XTE_MC_OFFSET, 1 << 6 | clk_div); - mutex_unlock(lp->indirect_mutex); bus = devm_mdiobus_alloc(&pdev->dev); if (!bus) @@ -116,10 +116,8 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev) if (rc) return rc; - mutex_lock(lp->indirect_mutex); dev_dbg(lp->dev, "MDIO bus registered; MC:%x\n", temac_indirect_in32(lp, XTE_MC_OFFSET)); - mutex_unlock(lp->indirect_mutex); return 0; } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index 011adae32b89..2dacfc85b3ba 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -13,6 +13,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/if_vlan.h> +#include <linux/phylink.h> /* Packet size info */ #define XAE_HDR_SIZE 14 /* Size of Ethernet header */ @@ -83,6 +84,8 @@ #define XAXIDMA_CR_RUNSTOP_MASK 0x00000001 /* Start/stop DMA channel */ #define XAXIDMA_CR_RESET_MASK 0x00000004 /* Reset DMA engine */ +#define XAXIDMA_SR_HALT_MASK 0x00000001 /* Indicates DMA channel halted */ + #define XAXIDMA_BD_NDESC_OFFSET 0x00 /* Next descriptor pointer */ #define XAXIDMA_BD_BUFA_OFFSET 0x08 /* Buffer address */ #define XAXIDMA_BD_CTRL_LEN_OFFSET 0x18 /* Control/buffer length */ @@ -356,9 +359,6 @@ * @app2: MM2S/S2MM User Application Field 2. * @app3: MM2S/S2MM User Application Field 3. * @app4: MM2S/S2MM User Application Field 4. - * @sw_id_offset: MM2S/S2MM Sw ID - * @reserved5: Reserved and not used - * @reserved6: Reserved and not used */ struct axidma_bd { u32 next; /* Physical address of next buffer descriptor */ @@ -373,11 +373,9 @@ struct axidma_bd { u32 app1; /* TX start << 16 | insert */ u32 app2; /* TX csum seed */ u32 app3; - u32 app4; - u32 sw_id_offset; - u32 reserved5; - u32 reserved6; -}; + u32 app4; /* Last field used by HW */ + struct sk_buff *skb; +} __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT); /** * struct axienet_local - axienet private per device data @@ -385,6 +383,7 @@ struct axidma_bd { * @dev: Pointer to device structure * @phy_node: Pointer to device node structure * @mii_bus: Pointer to MII bus structure + * @regs_start: Resource start for axienet device addresses * @regs: Base address for the axienet_local device address space * @dma_regs: Base address for the axidma device address space * @dma_err_tasklet: Tasklet structure to process Axi DMA errors @@ -422,10 +421,17 @@ struct axienet_local { /* Connection to PHY device */ struct device_node *phy_node; + struct phylink *phylink; + struct phylink_config phylink_config; + + /* Clock for AXI bus */ + struct clk *clk; + /* MDIO bus data */ struct mii_bus *mii_bus; /* MII bus reference */ /* IO registers, dma functions and IRQs */ + resource_size_t regs_start; void __iomem *regs; void __iomem *dma_regs; @@ -433,17 +439,19 @@ struct axienet_local { int tx_irq; int rx_irq; + int eth_irq; phy_interface_t phy_mode; u32 options; /* Current options word */ - u32 last_link; u32 features; /* Buffer descriptors */ struct axidma_bd *tx_bd_v; dma_addr_t tx_bd_p; + u32 tx_bd_num; struct axidma_bd *rx_bd_v; dma_addr_t rx_bd_p; + u32 rx_bd_num; u32 tx_bd_ci; u32 tx_bd_tail; u32 rx_bd_ci; @@ -481,7 +489,7 @@ struct axienet_option { */ static inline u32 axienet_ior(struct axienet_local *lp, off_t offset) { - return in_be32(lp->regs + offset); + return ioread32(lp->regs + offset); } static inline u32 axinet_ior_read_mcr(struct axienet_local *lp) @@ -501,12 +509,13 @@ static inline u32 axinet_ior_read_mcr(struct axienet_local *lp) static inline void axienet_iow(struct axienet_local *lp, off_t offset, u32 value) { - out_be32((lp->regs + offset), value); + iowrite32(value, lp->regs + offset); } /* Function prototypes visible in xilinx_axienet_mdio.c for other files */ -int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np); -int axienet_mdio_wait_until_ready(struct axienet_local *lp); +int axienet_mdio_enable(struct axienet_local *lp); +void axienet_mdio_disable(struct axienet_local *lp); +int axienet_mdio_setup(struct axienet_local *lp); void axienet_mdio_teardown(struct axienet_local *lp); #endif /* XILINX_AXI_ENET_H */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 831967f6eff8..4fc627fb4d11 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -7,6 +7,7 @@ * Copyright (c) 2008-2009 Secret Lab Technologies Ltd. * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu> * Copyright (c) 2010 - 2011 PetaLogix + * Copyright (c) 2019 SED Systems, a division of Calian Ltd. * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved. * * This is a driver for the Xilinx Axi Ethernet which is used in the Virtex6 @@ -21,6 +22,7 @@ * - Add support for extended VLAN support. */ +#include <linux/clk.h> #include <linux/delay.h> #include <linux/etherdevice.h> #include <linux/module.h> @@ -38,16 +40,18 @@ #include "xilinx_axienet.h" -/* Descriptors defines for Tx and Rx DMA - 2^n for the best performance */ -#define TX_BD_NUM 64 -#define RX_BD_NUM 128 +/* Descriptors defines for Tx and Rx DMA */ +#define TX_BD_NUM_DEFAULT 64 +#define RX_BD_NUM_DEFAULT 1024 +#define TX_BD_NUM_MAX 4096 +#define RX_BD_NUM_MAX 4096 /* Must be shorter than length of ethtool_drvinfo.driver field to fit */ #define DRIVER_NAME "xaxienet" #define DRIVER_DESCRIPTION "Xilinx Axi Ethernet driver" #define DRIVER_VERSION "1.00a" -#define AXIENET_REGS_N 32 +#define AXIENET_REGS_N 40 /* Match table for of_platform binding */ static const struct of_device_id axienet_of_match[] = { @@ -125,7 +129,7 @@ static struct axienet_option axienet_options[] = { */ static inline u32 axienet_dma_in32(struct axienet_local *lp, off_t reg) { - return in_be32(lp->dma_regs + reg); + return ioread32(lp->dma_regs + reg); } /** @@ -140,7 +144,7 @@ static inline u32 axienet_dma_in32(struct axienet_local *lp, off_t reg) static inline void axienet_dma_out32(struct axienet_local *lp, off_t reg, u32 value) { - out_be32((lp->dma_regs + reg), value); + iowrite32(value, lp->dma_regs + reg); } /** @@ -156,22 +160,21 @@ static void axienet_dma_bd_release(struct net_device *ndev) int i; struct axienet_local *lp = netdev_priv(ndev); - for (i = 0; i < RX_BD_NUM; i++) { + for (i = 0; i < lp->rx_bd_num; i++) { dma_unmap_single(ndev->dev.parent, lp->rx_bd_v[i].phys, lp->max_frm_size, DMA_FROM_DEVICE); - dev_kfree_skb((struct sk_buff *) - (lp->rx_bd_v[i].sw_id_offset)); + dev_kfree_skb(lp->rx_bd_v[i].skb); } if (lp->rx_bd_v) { dma_free_coherent(ndev->dev.parent, - sizeof(*lp->rx_bd_v) * RX_BD_NUM, + sizeof(*lp->rx_bd_v) * lp->rx_bd_num, lp->rx_bd_v, lp->rx_bd_p); } if (lp->tx_bd_v) { dma_free_coherent(ndev->dev.parent, - sizeof(*lp->tx_bd_v) * TX_BD_NUM, + sizeof(*lp->tx_bd_v) * lp->tx_bd_num, lp->tx_bd_v, lp->tx_bd_p); } @@ -201,33 +204,33 @@ static int axienet_dma_bd_init(struct net_device *ndev) /* Allocate the Tx and Rx buffer descriptors. */ lp->tx_bd_v = dma_alloc_coherent(ndev->dev.parent, - sizeof(*lp->tx_bd_v) * TX_BD_NUM, + sizeof(*lp->tx_bd_v) * lp->tx_bd_num, &lp->tx_bd_p, GFP_KERNEL); if (!lp->tx_bd_v) goto out; lp->rx_bd_v = dma_alloc_coherent(ndev->dev.parent, - sizeof(*lp->rx_bd_v) * RX_BD_NUM, + sizeof(*lp->rx_bd_v) * lp->rx_bd_num, &lp->rx_bd_p, GFP_KERNEL); if (!lp->rx_bd_v) goto out; - for (i = 0; i < TX_BD_NUM; i++) { + for (i = 0; i < lp->tx_bd_num; i++) { lp->tx_bd_v[i].next = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * - ((i + 1) % TX_BD_NUM); + ((i + 1) % lp->tx_bd_num); } - for (i = 0; i < RX_BD_NUM; i++) { + for (i = 0; i < lp->rx_bd_num; i++) { lp->rx_bd_v[i].next = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * - ((i + 1) % RX_BD_NUM); + ((i + 1) % lp->rx_bd_num); skb = netdev_alloc_skb_ip_align(ndev, lp->max_frm_size); if (!skb) goto out; - lp->rx_bd_v[i].sw_id_offset = (u32) skb; + lp->rx_bd_v[i].skb = skb; lp->rx_bd_v[i].phys = dma_map_single(ndev->dev.parent, skb->data, lp->max_frm_size, @@ -269,7 +272,7 @@ static int axienet_dma_bd_init(struct net_device *ndev) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr | XAXIDMA_CR_RUNSTOP_MASK); axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + - (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1))); + (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the @@ -434,17 +437,20 @@ static void axienet_setoptions(struct net_device *ndev, u32 options) lp->options |= options; } -static void __axienet_device_reset(struct axienet_local *lp, off_t offset) +static void __axienet_device_reset(struct axienet_local *lp) { u32 timeout; /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset * process of Axi DMA takes a while to complete as all pending * commands/transfers will be flushed or completed during this * reset process. + * Note that even though both TX and RX have their own reset register, + * they both reset the entire DMA core, so only one needs to be used. */ - axienet_dma_out32(lp, offset, XAXIDMA_CR_RESET_MASK); + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK); timeout = DELAY_OF_ONE_MILLISEC; - while (axienet_dma_in32(lp, offset) & XAXIDMA_CR_RESET_MASK) { + while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) & + XAXIDMA_CR_RESET_MASK) { udelay(1); if (--timeout == 0) { netdev_err(lp->ndev, "%s: DMA reset timeout!\n", @@ -470,8 +476,7 @@ static void axienet_device_reset(struct net_device *ndev) u32 axienet_status; struct axienet_local *lp = netdev_priv(ndev); - __axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET); - __axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET); + __axienet_device_reset(lp); lp->max_frm_size = XAE_MAX_VLAN_FRAME_SIZE; lp->options |= XAE_OPTION_VLAN; @@ -498,6 +503,8 @@ static void axienet_device_reset(struct net_device *ndev) axienet_status = axienet_ior(lp, XAE_IP_OFFSET); if (axienet_status & XAE_INT_RXRJECT_MASK) axienet_iow(lp, XAE_IS_OFFSET, XAE_INT_RXRJECT_MASK); + axienet_iow(lp, XAE_IE_OFFSET, lp->eth_irq > 0 ? + XAE_INT_RECV_ERROR_MASK : 0); axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK); @@ -514,63 +521,6 @@ static void axienet_device_reset(struct net_device *ndev) } /** - * axienet_adjust_link - Adjust the PHY link speed/duplex. - * @ndev: Pointer to the net_device structure - * - * This function is called to change the speed and duplex setting after - * auto negotiation is done by the PHY. This is the function that gets - * registered with the PHY interface through the "of_phy_connect" call. - */ -static void axienet_adjust_link(struct net_device *ndev) -{ - u32 emmc_reg; - u32 link_state; - u32 setspeed = 1; - struct axienet_local *lp = netdev_priv(ndev); - struct phy_device *phy = ndev->phydev; - - link_state = phy->speed | (phy->duplex << 1) | phy->link; - if (lp->last_link != link_state) { - if ((phy->speed == SPEED_10) || (phy->speed == SPEED_100)) { - if (lp->phy_mode == PHY_INTERFACE_MODE_1000BASEX) - setspeed = 0; - } else { - if ((phy->speed == SPEED_1000) && - (lp->phy_mode == PHY_INTERFACE_MODE_MII)) - setspeed = 0; - } - - if (setspeed == 1) { - emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET); - emmc_reg &= ~XAE_EMMC_LINKSPEED_MASK; - - switch (phy->speed) { - case SPEED_1000: - emmc_reg |= XAE_EMMC_LINKSPD_1000; - break; - case SPEED_100: - emmc_reg |= XAE_EMMC_LINKSPD_100; - break; - case SPEED_10: - emmc_reg |= XAE_EMMC_LINKSPD_10; - break; - default: - dev_err(&ndev->dev, "Speed other than 10, 100 " - "or 1Gbps is not supported\n"); - break; - } - - axienet_iow(lp, XAE_EMMC_OFFSET, emmc_reg); - lp->last_link = link_state; - phy_print_status(phy); - } else { - netdev_err(ndev, - "Error setting Axi Ethernet mac speed\n"); - } - } -} - -/** * axienet_start_xmit_done - Invoked once a transmit is completed by the * Axi DMA Tx channel. * @ndev: Pointer to the net_device structure @@ -595,26 +545,31 @@ static void axienet_start_xmit_done(struct net_device *ndev) dma_unmap_single(ndev->dev.parent, cur_p->phys, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); - if (cur_p->app4) - dev_consume_skb_irq((struct sk_buff *)cur_p->app4); + if (cur_p->skb) + dev_consume_skb_irq(cur_p->skb); /*cur_p->phys = 0;*/ cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; cur_p->app4 = 0; cur_p->status = 0; + cur_p->skb = NULL; size += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; packets++; - ++lp->tx_bd_ci; - lp->tx_bd_ci %= TX_BD_NUM; + if (++lp->tx_bd_ci >= lp->tx_bd_num) + lp->tx_bd_ci = 0; cur_p = &lp->tx_bd_v[lp->tx_bd_ci]; status = cur_p->status; } ndev->stats.tx_packets += packets; ndev->stats.tx_bytes += size; + + /* Matches barrier in axienet_start_xmit */ + smp_mb(); + netif_wake_queue(ndev); } @@ -635,7 +590,7 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, int num_frag) { struct axidma_bd *cur_p; - cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % TX_BD_NUM]; + cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK) return NETDEV_TX_BUSY; return 0; @@ -670,9 +625,19 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; if (axienet_check_tx_bd_space(lp, num_frag)) { - if (!netif_queue_stopped(ndev)) - netif_stop_queue(ndev); - return NETDEV_TX_BUSY; + if (netif_queue_stopped(ndev)) + return NETDEV_TX_BUSY; + + netif_stop_queue(ndev); + + /* Matches barrier in axienet_start_xmit_done */ + smp_mb(); + + /* Space might have just been freed - check again */ + if (axienet_check_tx_bd_space(lp, num_frag)) + return NETDEV_TX_BUSY; + + netif_wake_queue(ndev); } if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -695,8 +660,8 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_headlen(skb), DMA_TO_DEVICE); for (ii = 0; ii < num_frag; ii++) { - ++lp->tx_bd_tail; - lp->tx_bd_tail %= TX_BD_NUM; + if (++lp->tx_bd_tail >= lp->tx_bd_num) + lp->tx_bd_tail = 0; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; frag = &skb_shinfo(skb)->frags[ii]; cur_p->phys = dma_map_single(ndev->dev.parent, @@ -707,13 +672,13 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) } cur_p->cntrl |= XAXIDMA_BD_CTRL_TXEOF_MASK; - cur_p->app4 = (unsigned long)skb; + cur_p->skb = skb; tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail; /* Start the transfer */ axienet_dma_out32(lp, XAXIDMA_TX_TDESC_OFFSET, tail_p); - ++lp->tx_bd_tail; - lp->tx_bd_tail %= TX_BD_NUM; + if (++lp->tx_bd_tail >= lp->tx_bd_num) + lp->tx_bd_tail = 0; return NETDEV_TX_OK; } @@ -742,13 +707,15 @@ static void axienet_recv(struct net_device *ndev) while ((cur_p->status & XAXIDMA_BD_STS_COMPLETE_MASK)) { tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; - skb = (struct sk_buff *) (cur_p->sw_id_offset); - length = cur_p->app4 & 0x0000FFFF; dma_unmap_single(ndev->dev.parent, cur_p->phys, lp->max_frm_size, DMA_FROM_DEVICE); + skb = cur_p->skb; + cur_p->skb = NULL; + length = cur_p->app4 & 0x0000FFFF; + skb_put(skb, length); skb->protocol = eth_type_trans(skb, ndev); /*skb_checksum_none_assert(skb);*/ @@ -783,10 +750,10 @@ static void axienet_recv(struct net_device *ndev) DMA_FROM_DEVICE); cur_p->cntrl = lp->max_frm_size; cur_p->status = 0; - cur_p->sw_id_offset = (u32) new_skb; + cur_p->skb = new_skb; - ++lp->rx_bd_ci; - lp->rx_bd_ci %= RX_BD_NUM; + if (++lp->rx_bd_ci >= lp->rx_bd_num) + lp->rx_bd_ci = 0; cur_p = &lp->rx_bd_v[lp->rx_bd_ci]; } @@ -802,7 +769,7 @@ static void axienet_recv(struct net_device *ndev) * @irq: irq number * @_ndev: net_device pointer * - * Return: IRQ_HANDLED for all cases. + * Return: IRQ_HANDLED if device generated a TX interrupt, IRQ_NONE otherwise. * * This is the Axi DMA Tx done Isr. It invokes "axienet_start_xmit_done" * to complete the BD processing. @@ -821,7 +788,7 @@ static irqreturn_t axienet_tx_irq(int irq, void *_ndev) goto out; } if (!(status & XAXIDMA_IRQ_ALL_MASK)) - dev_err(&ndev->dev, "No interrupts asserted in Tx path\n"); + return IRQ_NONE; if (status & XAXIDMA_IRQ_ERROR_MASK) { dev_err(&ndev->dev, "DMA Tx error 0x%x\n", status); dev_err(&ndev->dev, "Current BD is at: 0x%x\n", @@ -851,7 +818,7 @@ out: * @irq: irq number * @_ndev: net_device pointer * - * Return: IRQ_HANDLED for all cases. + * Return: IRQ_HANDLED if device generated a RX interrupt, IRQ_NONE otherwise. * * This is the Axi DMA Rx Isr. It invokes "axienet_recv" to complete the BD * processing. @@ -870,7 +837,7 @@ static irqreturn_t axienet_rx_irq(int irq, void *_ndev) goto out; } if (!(status & XAXIDMA_IRQ_ALL_MASK)) - dev_err(&ndev->dev, "No interrupts asserted in Rx path\n"); + return IRQ_NONE; if (status & XAXIDMA_IRQ_ERROR_MASK) { dev_err(&ndev->dev, "DMA Rx error 0x%x\n", status); dev_err(&ndev->dev, "Current BD is at: 0x%x\n", @@ -895,6 +862,35 @@ out: return IRQ_HANDLED; } +/** + * axienet_eth_irq - Ethernet core Isr. + * @irq: irq number + * @_ndev: net_device pointer + * + * Return: IRQ_HANDLED if device generated a core interrupt, IRQ_NONE otherwise. + * + * Handle miscellaneous conditions indicated by Ethernet core IRQ. + */ +static irqreturn_t axienet_eth_irq(int irq, void *_ndev) +{ + struct net_device *ndev = _ndev; + struct axienet_local *lp = netdev_priv(ndev); + unsigned int pending; + + pending = axienet_ior(lp, XAE_IP_OFFSET); + if (!pending) + return IRQ_NONE; + + if (pending & XAE_INT_RXFIFOOVR_MASK) + ndev->stats.rx_missed_errors++; + + if (pending & XAE_INT_RXRJECT_MASK) + ndev->stats.rx_frame_errors++; + + axienet_iow(lp, XAE_IS_OFFSET, pending); + return IRQ_HANDLED; +} + static void axienet_dma_err_handler(unsigned long data); /** @@ -904,67 +900,72 @@ static void axienet_dma_err_handler(unsigned long data); * Return: 0, on success. * non-zero error value on failure * - * This is the driver open routine. It calls phy_start to start the PHY device. + * This is the driver open routine. It calls phylink_start to start the + * PHY device. * It also allocates interrupt service routines, enables the interrupt lines * and ISR handling. Axi Ethernet core is reset through Axi DMA core. Buffer * descriptors are initialized. */ static int axienet_open(struct net_device *ndev) { - int ret, mdio_mcreg; + int ret; struct axienet_local *lp = netdev_priv(ndev); - struct phy_device *phydev = NULL; dev_dbg(&ndev->dev, "axienet_open()\n"); - mdio_mcreg = axienet_ior(lp, XAE_MDIO_MC_OFFSET); - ret = axienet_mdio_wait_until_ready(lp); - if (ret < 0) - return ret; /* Disable the MDIO interface till Axi Ethernet Reset is completed. * When we do an Axi Ethernet reset, it resets the complete core - * including the MDIO. If MDIO is not disabled when the reset - * process is started, MDIO will be broken afterwards. + * including the MDIO. MDIO must be disabled before resetting + * and re-enabled afterwards. + * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - axienet_iow(lp, XAE_MDIO_MC_OFFSET, - (mdio_mcreg & (~XAE_MDIO_MC_MDIOEN_MASK))); + mutex_lock(&lp->mii_bus->mdio_lock); + axienet_mdio_disable(lp); axienet_device_reset(ndev); - /* Enable the MDIO */ - axienet_iow(lp, XAE_MDIO_MC_OFFSET, mdio_mcreg); - ret = axienet_mdio_wait_until_ready(lp); + ret = axienet_mdio_enable(lp); + mutex_unlock(&lp->mii_bus->mdio_lock); if (ret < 0) return ret; - if (lp->phy_node) { - phydev = of_phy_connect(lp->ndev, lp->phy_node, - axienet_adjust_link, 0, lp->phy_mode); - - if (!phydev) - dev_err(lp->dev, "of_phy_connect() failed\n"); - else - phy_start(phydev); + ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0); + if (ret) { + dev_err(lp->dev, "phylink_of_phy_connect() failed: %d\n", ret); + return ret; } + phylink_start(lp->phylink); + /* Enable tasklets for Axi DMA error handling */ tasklet_init(&lp->dma_err_tasklet, axienet_dma_err_handler, (unsigned long) lp); /* Enable interrupts for Axi DMA Tx */ - ret = request_irq(lp->tx_irq, axienet_tx_irq, 0, ndev->name, ndev); + ret = request_irq(lp->tx_irq, axienet_tx_irq, IRQF_SHARED, + ndev->name, ndev); if (ret) goto err_tx_irq; /* Enable interrupts for Axi DMA Rx */ - ret = request_irq(lp->rx_irq, axienet_rx_irq, 0, ndev->name, ndev); + ret = request_irq(lp->rx_irq, axienet_rx_irq, IRQF_SHARED, + ndev->name, ndev); if (ret) goto err_rx_irq; + /* Enable interrupts for Axi Ethernet core (if defined) */ + if (lp->eth_irq > 0) { + ret = request_irq(lp->eth_irq, axienet_eth_irq, IRQF_SHARED, + ndev->name, ndev); + if (ret) + goto err_eth_irq; + } return 0; +err_eth_irq: + free_irq(lp->rx_irq, ndev); err_rx_irq: free_irq(lp->tx_irq, ndev); err_tx_irq: - if (phydev) - phy_disconnect(phydev); + phylink_stop(lp->phylink); + phylink_disconnect_phy(lp->phylink); tasklet_kill(&lp->dma_err_tasklet); dev_err(lp->dev, "request_irq() failed\n"); return ret; @@ -976,34 +977,61 @@ err_tx_irq: * * Return: 0, on success. * - * This is the driver stop routine. It calls phy_disconnect to stop the PHY + * This is the driver stop routine. It calls phylink_disconnect to stop the PHY * device. It also removes the interrupt handlers and disables the interrupts. * The Axi DMA Tx/Rx BDs are released. */ static int axienet_stop(struct net_device *ndev) { - u32 cr; + u32 cr, sr; + int count; struct axienet_local *lp = netdev_priv(ndev); dev_dbg(&ndev->dev, "axienet_close()\n"); - cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, - cr & (~XAXIDMA_CR_RUNSTOP_MASK)); - cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); - axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, - cr & (~XAXIDMA_CR_RUNSTOP_MASK)); + phylink_stop(lp->phylink); + phylink_disconnect_phy(lp->phylink); + axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); + cr = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); + cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr); + + cr = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); + cr &= ~(XAXIDMA_CR_RUNSTOP_MASK | XAXIDMA_IRQ_ALL_MASK); + axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, cr); + + axienet_iow(lp, XAE_IE_OFFSET, 0); + + /* Give DMAs a chance to halt gracefully */ + sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { + msleep(20); + sr = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + } + + sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + for (count = 0; !(sr & XAXIDMA_SR_HALT_MASK) && count < 5; ++count) { + msleep(20); + sr = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + } + + /* Do a reset to ensure DMA is really stopped */ + mutex_lock(&lp->mii_bus->mdio_lock); + axienet_mdio_disable(lp); + __axienet_device_reset(lp); + axienet_mdio_enable(lp); + mutex_unlock(&lp->mii_bus->mdio_lock); + tasklet_kill(&lp->dma_err_tasklet); + if (lp->eth_irq > 0) + free_irq(lp->eth_irq, ndev); free_irq(lp->tx_irq, ndev); free_irq(lp->rx_irq, ndev); - if (ndev->phydev) - phy_disconnect(ndev->phydev); - axienet_dma_bd_release(ndev); return 0; } @@ -1151,6 +1179,48 @@ static void axienet_ethtools_get_regs(struct net_device *ndev, data[29] = axienet_ior(lp, XAE_FMI_OFFSET); data[30] = axienet_ior(lp, XAE_AF0_OFFSET); data[31] = axienet_ior(lp, XAE_AF1_OFFSET); + data[32] = axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET); + data[33] = axienet_dma_in32(lp, XAXIDMA_TX_SR_OFFSET); + data[34] = axienet_dma_in32(lp, XAXIDMA_TX_CDESC_OFFSET); + data[35] = axienet_dma_in32(lp, XAXIDMA_TX_TDESC_OFFSET); + data[36] = axienet_dma_in32(lp, XAXIDMA_RX_CR_OFFSET); + data[37] = axienet_dma_in32(lp, XAXIDMA_RX_SR_OFFSET); + data[38] = axienet_dma_in32(lp, XAXIDMA_RX_CDESC_OFFSET); + data[39] = axienet_dma_in32(lp, XAXIDMA_RX_TDESC_OFFSET); +} + +static void axienet_ethtools_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ering) +{ + struct axienet_local *lp = netdev_priv(ndev); + + ering->rx_max_pending = RX_BD_NUM_MAX; + ering->rx_mini_max_pending = 0; + ering->rx_jumbo_max_pending = 0; + ering->tx_max_pending = TX_BD_NUM_MAX; + ering->rx_pending = lp->rx_bd_num; + ering->rx_mini_pending = 0; + ering->rx_jumbo_pending = 0; + ering->tx_pending = lp->tx_bd_num; +} + +static int axienet_ethtools_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ering) +{ + struct axienet_local *lp = netdev_priv(ndev); + + if (ering->rx_pending > RX_BD_NUM_MAX || + ering->rx_mini_pending || + ering->rx_jumbo_pending || + ering->rx_pending > TX_BD_NUM_MAX) + return -EINVAL; + + if (netif_running(ndev)) + return -EBUSY; + + lp->rx_bd_num = ering->rx_pending; + lp->tx_bd_num = ering->tx_pending; + return 0; } /** @@ -1166,12 +1236,9 @@ static void axienet_ethtools_get_pauseparam(struct net_device *ndev, struct ethtool_pauseparam *epauseparm) { - u32 regval; struct axienet_local *lp = netdev_priv(ndev); - epauseparm->autoneg = 0; - regval = axienet_ior(lp, XAE_FCC_OFFSET); - epauseparm->tx_pause = regval & XAE_FCC_FCTX_MASK; - epauseparm->rx_pause = regval & XAE_FCC_FCRX_MASK; + + phylink_ethtool_get_pauseparam(lp->phylink, epauseparm); } /** @@ -1190,27 +1257,9 @@ static int axienet_ethtools_set_pauseparam(struct net_device *ndev, struct ethtool_pauseparam *epauseparm) { - u32 regval = 0; struct axienet_local *lp = netdev_priv(ndev); - if (netif_running(ndev)) { - netdev_err(ndev, - "Please stop netif before applying configuration\n"); - return -EFAULT; - } - - regval = axienet_ior(lp, XAE_FCC_OFFSET); - if (epauseparm->tx_pause) - regval |= XAE_FCC_FCTX_MASK; - else - regval &= ~XAE_FCC_FCTX_MASK; - if (epauseparm->rx_pause) - regval |= XAE_FCC_FCRX_MASK; - else - regval &= ~XAE_FCC_FCRX_MASK; - axienet_iow(lp, XAE_FCC_OFFSET, regval); - - return 0; + return phylink_ethtool_set_pauseparam(lp->phylink, epauseparm); } /** @@ -1289,17 +1338,170 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev, return 0; } +static int +axienet_ethtools_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) +{ + struct axienet_local *lp = netdev_priv(ndev); + + return phylink_ethtool_ksettings_get(lp->phylink, cmd); +} + +static int +axienet_ethtools_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) +{ + struct axienet_local *lp = netdev_priv(ndev); + + return phylink_ethtool_ksettings_set(lp->phylink, cmd); +} + static const struct ethtool_ops axienet_ethtool_ops = { .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, .get_link = ethtool_op_get_link, + .get_ringparam = axienet_ethtools_get_ringparam, + .set_ringparam = axienet_ethtools_set_ringparam, .get_pauseparam = axienet_ethtools_get_pauseparam, .set_pauseparam = axienet_ethtools_set_pauseparam, .get_coalesce = axienet_ethtools_get_coalesce, .set_coalesce = axienet_ethtools_set_coalesce, - .get_link_ksettings = phy_ethtool_get_link_ksettings, - .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_link_ksettings = axienet_ethtools_get_link_ksettings, + .set_link_ksettings = axienet_ethtools_set_link_ksettings, +}; + +static void axienet_validate(struct phylink_config *config, + unsigned long *supported, + struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct axienet_local *lp = netdev_priv(ndev); + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; + + /* Only support the mode we are configured for */ + if (state->interface != PHY_INTERFACE_MODE_NA && + state->interface != lp->phy_mode) { + netdev_warn(ndev, "Cannot use PHY mode %s, supported: %s\n", + phy_modes(state->interface), + phy_modes(lp->phy_mode)); + bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS); + return; + } + + phylink_set(mask, Autoneg); + phylink_set_port_modes(mask); + + phylink_set(mask, Asym_Pause); + phylink_set(mask, Pause); + phylink_set(mask, 1000baseX_Full); + phylink_set(mask, 10baseT_Full); + phylink_set(mask, 100baseT_Full); + phylink_set(mask, 1000baseT_Full); + + bitmap_and(supported, supported, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); + bitmap_and(state->advertising, state->advertising, mask, + __ETHTOOL_LINK_MODE_MASK_NBITS); +} + +static int axienet_mac_link_state(struct phylink_config *config, + struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct axienet_local *lp = netdev_priv(ndev); + u32 emmc_reg, fcc_reg; + + state->interface = lp->phy_mode; + + emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET); + if (emmc_reg & XAE_EMMC_LINKSPD_1000) + state->speed = SPEED_1000; + else if (emmc_reg & XAE_EMMC_LINKSPD_100) + state->speed = SPEED_100; + else + state->speed = SPEED_10; + + state->pause = 0; + fcc_reg = axienet_ior(lp, XAE_FCC_OFFSET); + if (fcc_reg & XAE_FCC_FCTX_MASK) + state->pause |= MLO_PAUSE_TX; + if (fcc_reg & XAE_FCC_FCRX_MASK) + state->pause |= MLO_PAUSE_RX; + + state->an_complete = 0; + state->duplex = 1; + + return 1; +} + +static void axienet_mac_an_restart(struct phylink_config *config) +{ + /* Unsupported, do nothing */ +} + +static void axienet_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct axienet_local *lp = netdev_priv(ndev); + u32 emmc_reg, fcc_reg; + + emmc_reg = axienet_ior(lp, XAE_EMMC_OFFSET); + emmc_reg &= ~XAE_EMMC_LINKSPEED_MASK; + + switch (state->speed) { + case SPEED_1000: + emmc_reg |= XAE_EMMC_LINKSPD_1000; + break; + case SPEED_100: + emmc_reg |= XAE_EMMC_LINKSPD_100; + break; + case SPEED_10: + emmc_reg |= XAE_EMMC_LINKSPD_10; + break; + default: + dev_err(&ndev->dev, + "Speed other than 10, 100 or 1Gbps is not supported\n"); + break; + } + + axienet_iow(lp, XAE_EMMC_OFFSET, emmc_reg); + + fcc_reg = axienet_ior(lp, XAE_FCC_OFFSET); + if (state->pause & MLO_PAUSE_TX) + fcc_reg |= XAE_FCC_FCTX_MASK; + else + fcc_reg &= ~XAE_FCC_FCTX_MASK; + if (state->pause & MLO_PAUSE_RX) + fcc_reg |= XAE_FCC_FCRX_MASK; + else + fcc_reg &= ~XAE_FCC_FCRX_MASK; + axienet_iow(lp, XAE_FCC_OFFSET, fcc_reg); +} + +static void axienet_mac_link_down(struct phylink_config *config, + unsigned int mode, + phy_interface_t interface) +{ + /* nothing meaningful to do */ +} + +static void axienet_mac_link_up(struct phylink_config *config, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phy) +{ + /* nothing meaningful to do */ +} + +static const struct phylink_mac_ops axienet_phylink_ops = { + .validate = axienet_validate, + .mac_link_state = axienet_mac_link_state, + .mac_an_restart = axienet_mac_an_restart, + .mac_config = axienet_mac_config, + .mac_link_down = axienet_mac_link_down, + .mac_link_up = axienet_mac_link_up, }; /** @@ -1313,38 +1515,33 @@ static void axienet_dma_err_handler(unsigned long data) { u32 axienet_status; u32 cr, i; - int mdio_mcreg; struct axienet_local *lp = (struct axienet_local *) data; struct net_device *ndev = lp->ndev; struct axidma_bd *cur_p; axienet_setoptions(ndev, lp->options & ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); - mdio_mcreg = axienet_ior(lp, XAE_MDIO_MC_OFFSET); - axienet_mdio_wait_until_ready(lp); /* Disable the MDIO interface till Axi Ethernet Reset is completed. * When we do an Axi Ethernet reset, it resets the complete core - * including the MDIO. So if MDIO is not disabled when the reset - * process is started, MDIO will be broken afterwards. + * including the MDIO. MDIO must be disabled before resetting + * and re-enabled afterwards. + * Hold MDIO bus lock to avoid MDIO accesses during the reset. */ - axienet_iow(lp, XAE_MDIO_MC_OFFSET, (mdio_mcreg & - ~XAE_MDIO_MC_MDIOEN_MASK)); + mutex_lock(&lp->mii_bus->mdio_lock); + axienet_mdio_disable(lp); + __axienet_device_reset(lp); + axienet_mdio_enable(lp); + mutex_unlock(&lp->mii_bus->mdio_lock); - __axienet_device_reset(lp, XAXIDMA_TX_CR_OFFSET); - __axienet_device_reset(lp, XAXIDMA_RX_CR_OFFSET); - - axienet_iow(lp, XAE_MDIO_MC_OFFSET, mdio_mcreg); - axienet_mdio_wait_until_ready(lp); - - for (i = 0; i < TX_BD_NUM; i++) { + for (i = 0; i < lp->tx_bd_num; i++) { cur_p = &lp->tx_bd_v[i]; if (cur_p->phys) dma_unmap_single(ndev->dev.parent, cur_p->phys, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), DMA_TO_DEVICE); - if (cur_p->app4) - dev_kfree_skb_irq((struct sk_buff *) cur_p->app4); + if (cur_p->skb) + dev_kfree_skb_irq(cur_p->skb); cur_p->phys = 0; cur_p->cntrl = 0; cur_p->status = 0; @@ -1353,10 +1550,10 @@ static void axienet_dma_err_handler(unsigned long data) cur_p->app2 = 0; cur_p->app3 = 0; cur_p->app4 = 0; - cur_p->sw_id_offset = 0; + cur_p->skb = NULL; } - for (i = 0; i < RX_BD_NUM; i++) { + for (i = 0; i < lp->rx_bd_num; i++) { cur_p = &lp->rx_bd_v[i]; cur_p->status = 0; cur_p->app0 = 0; @@ -1404,7 +1601,7 @@ static void axienet_dma_err_handler(unsigned long data) axienet_dma_out32(lp, XAXIDMA_RX_CR_OFFSET, cr | XAXIDMA_CR_RUNSTOP_MASK); axienet_dma_out32(lp, XAXIDMA_RX_TDESC_OFFSET, lp->rx_bd_p + - (sizeof(*lp->rx_bd_v) * (RX_BD_NUM - 1))); + (sizeof(*lp->rx_bd_v) * (lp->rx_bd_num - 1))); /* Write to the RS (Run-stop) bit in the Tx channel control register. * Tx channel is now ready to run. But only after we write to the @@ -1422,6 +1619,8 @@ static void axienet_dma_err_handler(unsigned long data) axienet_status = axienet_ior(lp, XAE_IP_OFFSET); if (axienet_status & XAE_INT_RXRJECT_MASK) axienet_iow(lp, XAE_IS_OFFSET, XAE_INT_RXRJECT_MASK); + axienet_iow(lp, XAE_IE_OFFSET, lp->eth_irq > 0 ? + XAE_INT_RECV_ERROR_MASK : 0); axienet_iow(lp, XAE_FCC_OFFSET, XAE_FCC_FCRX_MASK); /* Sync default options with HW but leave receiver and @@ -1453,7 +1652,7 @@ static int axienet_probe(struct platform_device *pdev) struct axienet_local *lp; struct net_device *ndev; const void *mac_addr; - struct resource *ethres, dmares; + struct resource *ethres; u32 value; ndev = alloc_etherdev(sizeof(*lp)); @@ -1476,6 +1675,8 @@ static int axienet_probe(struct platform_device *pdev) lp->ndev = ndev; lp->dev = &pdev->dev; lp->options = XAE_OPTION_DEFAULTS; + lp->rx_bd_num = RX_BD_NUM_DEFAULT; + lp->tx_bd_num = TX_BD_NUM_DEFAULT; /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); @@ -1484,6 +1685,7 @@ static int axienet_probe(struct platform_device *pdev) ret = PTR_ERR(lp->regs); goto free_netdev; } + lp->regs_start = ethres->start; /* Setup checksum offload, but default to off if not specified */ lp->features = 0; @@ -1568,38 +1770,56 @@ static int axienet_probe(struct platform_device *pdev) /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ np = of_parse_phandle(pdev->dev.of_node, "axistream-connected", 0); - if (!np) { - dev_err(&pdev->dev, "could not find DMA node\n"); - ret = -ENODEV; - goto free_netdev; - } - ret = of_address_to_resource(np, 0, &dmares); - if (ret) { - dev_err(&pdev->dev, "unable to get DMA resource\n"); + if (np) { + struct resource dmares; + + ret = of_address_to_resource(np, 0, &dmares); + if (ret) { + dev_err(&pdev->dev, + "unable to get DMA resource\n"); + of_node_put(np); + goto free_netdev; + } + lp->dma_regs = devm_ioremap_resource(&pdev->dev, + &dmares); + lp->rx_irq = irq_of_parse_and_map(np, 1); + lp->tx_irq = irq_of_parse_and_map(np, 0); of_node_put(np); - goto free_netdev; + lp->eth_irq = platform_get_irq(pdev, 0); + } else { + /* Check for these resources directly on the Ethernet node. */ + struct resource *res = platform_get_resource(pdev, + IORESOURCE_MEM, 1); + if (!res) { + dev_err(&pdev->dev, "unable to get DMA memory resource\n"); + goto free_netdev; + } + lp->dma_regs = devm_ioremap_resource(&pdev->dev, res); + lp->rx_irq = platform_get_irq(pdev, 1); + lp->tx_irq = platform_get_irq(pdev, 0); + lp->eth_irq = platform_get_irq(pdev, 2); } - lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); ret = PTR_ERR(lp->dma_regs); - of_node_put(np); goto free_netdev; } - lp->rx_irq = irq_of_parse_and_map(np, 1); - lp->tx_irq = irq_of_parse_and_map(np, 0); - of_node_put(np); if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) { dev_err(&pdev->dev, "could not determine irqs\n"); ret = -ENOMEM; goto free_netdev; } + /* Check for Ethernet core IRQ (optional) */ + if (lp->eth_irq <= 0) + dev_info(&pdev->dev, "Ethernet core IRQ not defined\n"); + /* Retrieve the MAC address */ mac_addr = of_get_mac_address(pdev->dev.of_node); if (IS_ERR(mac_addr)) { - dev_err(&pdev->dev, "could not find MAC address\n"); - goto free_netdev; + dev_warn(&pdev->dev, "could not find MAC address property: %ld\n", + PTR_ERR(mac_addr)); + mac_addr = NULL; } axienet_set_mac_address(ndev, mac_addr); @@ -1608,9 +1828,36 @@ static int axienet_probe(struct platform_device *pdev) lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (lp->phy_node) { - ret = axienet_mdio_setup(lp, pdev->dev.of_node); + lp->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(lp->clk)) { + dev_warn(&pdev->dev, "Failed to get clock: %ld\n", + PTR_ERR(lp->clk)); + lp->clk = NULL; + } else { + ret = clk_prepare_enable(lp->clk); + if (ret) { + dev_err(&pdev->dev, "Unable to enable clock: %d\n", + ret); + goto free_netdev; + } + } + + ret = axienet_mdio_setup(lp); if (ret) - dev_warn(&pdev->dev, "error registering MDIO bus\n"); + dev_warn(&pdev->dev, + "error registering MDIO bus: %d\n", ret); + } + + lp->phylink_config.dev = &ndev->dev; + lp->phylink_config.type = PHYLINK_NETDEV; + + lp->phylink = phylink_create(&lp->phylink_config, pdev->dev.fwnode, + lp->phy_mode, + &axienet_phylink_ops); + if (IS_ERR(lp->phylink)) { + ret = PTR_ERR(lp->phylink); + dev_err(&pdev->dev, "phylink_create error (%i)\n", ret); + goto free_netdev; } ret = register_netdev(lp->ndev); @@ -1632,9 +1879,16 @@ static int axienet_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct axienet_local *lp = netdev_priv(ndev); - axienet_mdio_teardown(lp); unregister_netdev(ndev); + if (lp->phylink) + phylink_destroy(lp->phylink); + + axienet_mdio_teardown(lp); + + if (lp->clk) + clk_disable_unprepare(lp->clk); + of_node_put(lp->phy_node); lp->phy_node = NULL; @@ -1643,9 +1897,23 @@ static int axienet_remove(struct platform_device *pdev) return 0; } +static void axienet_shutdown(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + + rtnl_lock(); + netif_device_detach(ndev); + + if (netif_running(ndev)) + dev_close(ndev); + + rtnl_unlock(); +} + static struct platform_driver axienet_driver = { .probe = axienet_probe, .remove = axienet_remove, + .shutdown = axienet_shutdown, .driver = { .name = "xilinx_axienet", .of_match_table = axienet_of_match, diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c index 704babdbc8a2..435ed308d990 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_mdio.c @@ -5,9 +5,11 @@ * Copyright (c) 2009 Secret Lab Technologies, Ltd. * Copyright (c) 2010 - 2011 Michal Simek <monstr@monstr.eu> * Copyright (c) 2010 - 2011 PetaLogix + * Copyright (c) 2019 SED Systems, a division of Calian Ltd. * Copyright (c) 2010 - 2012 Xilinx, Inc. All rights reserved. */ +#include <linux/clk.h> #include <linux/of_address.h> #include <linux/of_mdio.h> #include <linux/jiffies.h> @@ -16,10 +18,10 @@ #include "xilinx_axienet.h" #define MAX_MDIO_FREQ 2500000 /* 2.5 MHz */ -#define DEFAULT_CLOCK_DIVISOR XAE_MDIO_DIV_DFT +#define DEFAULT_HOST_CLOCK 150000000 /* 150 MHz */ /* Wait till MDIO interface is ready to accept a new transaction.*/ -int axienet_mdio_wait_until_ready(struct axienet_local *lp) +static int axienet_mdio_wait_until_ready(struct axienet_local *lp) { u32 val; @@ -112,23 +114,42 @@ static int axienet_mdio_write(struct mii_bus *bus, int phy_id, int reg, } /** - * axienet_mdio_setup - MDIO setup function + * axienet_mdio_enable - MDIO hardware setup function * @lp: Pointer to axienet local data structure. - * @np: Pointer to device node * - * Return: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when - * mdiobus_alloc (to allocate memory for mii bus structure) fails. + * Return: 0 on success, -ETIMEDOUT on a timeout. * * Sets up the MDIO interface by initializing the MDIO clock and enabling the - * MDIO interface in hardware. Register the MDIO interface. + * MDIO interface in hardware. **/ -int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) +int axienet_mdio_enable(struct axienet_local *lp) { - int ret; u32 clk_div, host_clock; - struct mii_bus *bus; - struct resource res; - struct device_node *np1; + + if (lp->clk) { + host_clock = clk_get_rate(lp->clk); + } else { + struct device_node *np1; + + /* Legacy fallback: detect CPU clock frequency and use as AXI + * bus clock frequency. This only works on certain platforms. + */ + np1 = of_find_node_by_name(NULL, "cpu"); + if (!np1) { + netdev_warn(lp->ndev, "Could not find CPU device node.\n"); + host_clock = DEFAULT_HOST_CLOCK; + } else { + int ret = of_property_read_u32(np1, "clock-frequency", + &host_clock); + if (ret) { + netdev_warn(lp->ndev, "CPU clock-frequency property not found.\n"); + host_clock = DEFAULT_HOST_CLOCK; + } + of_node_put(np1); + } + netdev_info(lp->ndev, "Setting assumed host clock to %u\n", + host_clock); + } /* clk_div can be calculated by deriving it from the equation: * fMDIO = fHOST / ((1 + clk_div) * 2) @@ -155,25 +176,6 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) * "clock-frequency" from the CPU */ - np1 = of_find_node_by_name(NULL, "cpu"); - if (!np1) { - netdev_warn(lp->ndev, "Could not find CPU device node.\n"); - netdev_warn(lp->ndev, - "Setting MDIO clock divisor to default %d\n", - DEFAULT_CLOCK_DIVISOR); - clk_div = DEFAULT_CLOCK_DIVISOR; - goto issue; - } - if (of_property_read_u32(np1, "clock-frequency", &host_clock)) { - netdev_warn(lp->ndev, "clock-frequency property not found.\n"); - netdev_warn(lp->ndev, - "Setting MDIO clock divisor to default %d\n", - DEFAULT_CLOCK_DIVISOR); - clk_div = DEFAULT_CLOCK_DIVISOR; - of_node_put(np1); - goto issue; - } - clk_div = (host_clock / (MAX_MDIO_FREQ * 2)) - 1; /* If there is any remainder from the division of * fHOST / (MAX_MDIO_FREQ * 2), then we need to add @@ -186,12 +188,39 @@ int axienet_mdio_setup(struct axienet_local *lp, struct device_node *np) "Setting MDIO clock divisor to %u/%u Hz host clock.\n", clk_div, host_clock); - of_node_put(np1); -issue: - axienet_iow(lp, XAE_MDIO_MC_OFFSET, - (((u32) clk_div) | XAE_MDIO_MC_MDIOEN_MASK)); + axienet_iow(lp, XAE_MDIO_MC_OFFSET, clk_div | XAE_MDIO_MC_MDIOEN_MASK); - ret = axienet_mdio_wait_until_ready(lp); + return axienet_mdio_wait_until_ready(lp); +} + +/** + * axienet_mdio_disable - MDIO hardware disable function + * @lp: Pointer to axienet local data structure. + * + * Disable the MDIO interface in hardware. + **/ +void axienet_mdio_disable(struct axienet_local *lp) +{ + axienet_iow(lp, XAE_MDIO_MC_OFFSET, 0); +} + +/** + * axienet_mdio_setup - MDIO setup function + * @lp: Pointer to axienet local data structure. + * + * Return: 0 on success, -ETIMEDOUT on a timeout, -ENOMEM when + * mdiobus_alloc (to allocate memory for mii bus structure) fails. + * + * Sets up the MDIO interface by initializing the MDIO clock and enabling the + * MDIO interface in hardware. Register the MDIO interface. + **/ +int axienet_mdio_setup(struct axienet_local *lp) +{ + struct device_node *mdio_node; + struct mii_bus *bus; + int ret; + + ret = axienet_mdio_enable(lp); if (ret < 0) return ret; @@ -199,10 +228,8 @@ issue: if (!bus) return -ENOMEM; - np1 = of_get_parent(lp->phy_node); - of_address_to_resource(np1, 0, &res); - snprintf(bus->id, MII_BUS_ID_SIZE, "%.8llx", - (unsigned long long) res.start); + snprintf(bus->id, MII_BUS_ID_SIZE, "axienet-%.8llx", + (unsigned long long)lp->regs_start); bus->priv = lp; bus->name = "Xilinx Axi Ethernet MDIO"; @@ -211,7 +238,9 @@ issue: bus->parent = lp->dev; lp->mii_bus = bus; - ret = of_mdiobus_register(bus, np1); + mdio_node = of_get_child_by_name(lp->dev->of_node, "mdio"); + ret = of_mdiobus_register(bus, mdio_node); + of_node_put(mdio_node); if (ret) { mdiobus_free(bus); lp->mii_bus = NULL; |