diff options
Diffstat (limited to 'drivers/net/ethernet/emulex')
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be.h | 57 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_cmds.c | 203 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_cmds.h | 23 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_ethtool.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/emulex/benet/be_main.c | 1200 |
5 files changed, 843 insertions, 642 deletions
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 27b9fe99a9bd..1bf1cdce74ac 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -30,11 +30,12 @@ #include <linux/firmware.h> #include <linux/slab.h> #include <linux/u64_stats_sync.h> +#include <linux/cpumask.h> #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "10.4u" +#define DRV_VER "10.6.0.1" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" @@ -87,6 +88,7 @@ #define BE3_MAX_EVT_QS 16 #define BE3_SRIOV_MAX_EVT_QS 8 +#define MAX_RSS_IFACES 15 #define MAX_RX_QS 32 #define MAX_EVT_QS 32 #define MAX_TX_QS 32 @@ -97,6 +99,7 @@ #define BE_NAPI_WEIGHT 64 #define MAX_RX_POST BE_NAPI_WEIGHT /* Frags posted at a time */ #define RX_FRAGS_REFILL_WM (RX_Q_LEN - MAX_RX_POST) +#define MAX_NUM_POST_ERX_DB 255u #define MAX_VFS 30 /* Max VFs supported by BE3 FW */ #define FW_VER_LEN 32 @@ -182,6 +185,7 @@ struct be_eq_obj { u16 spurious_intr; struct napi_struct napi; struct be_adapter *adapter; + cpumask_var_t affinity_mask; #ifdef CONFIG_NET_RX_BUSY_POLL #define BE_EQ_IDLE 0 @@ -238,10 +242,17 @@ struct be_tx_stats { struct u64_stats_sync sync_compl; }; +/* Structure to hold some data of interest obtained from a TX CQE */ +struct be_tx_compl_info { + u8 status; /* Completion status */ + u16 end_index; /* Completed TXQ Index */ +}; + struct be_tx_obj { u32 db_offset; struct be_queue_info q; struct be_queue_info cq; + struct be_tx_compl_info txcp; /* Remember the skbs that were transmitted */ struct sk_buff *sent_skb_list[TX_Q_LEN]; struct be_tx_stats stats; @@ -370,6 +381,7 @@ enum vf_state { #define BE_FLAGS_VXLAN_OFFLOADS BIT(8) #define BE_FLAGS_SETUP_DONE BIT(9) #define BE_FLAGS_EVT_INCOMPATIBLE_SFP BIT(10) +#define BE_FLAGS_ERR_DETECTION_SCHEDULED BIT(11) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 @@ -404,8 +416,11 @@ struct be_resources { u16 max_tx_qs; u16 max_rss_qs; u16 max_rx_qs; + u16 max_cq_count; u16 max_uc_mac; /* Max UC MACs programmable */ u16 max_vlans; /* Number of vlans supported */ + u16 max_iface_count; + u16 max_mcc_count; u16 max_evt_qs; u32 if_cap_flags; u32 vf_if_cap_flags; /* VF if capability flags */ @@ -418,6 +433,39 @@ struct rss_info { u8 rss_hkey[RSS_HASH_KEY_LEN]; }; +/* Macros to read/write the 'features' word of be_wrb_params structure. + */ +#define BE_WRB_F_BIT(name) BE_WRB_F_##name##_BIT +#define BE_WRB_F_MASK(name) BIT_MASK(BE_WRB_F_##name##_BIT) + +#define BE_WRB_F_GET(word, name) \ + (((word) & (BE_WRB_F_MASK(name))) >> BE_WRB_F_BIT(name)) + +#define BE_WRB_F_SET(word, name, val) \ + ((word) |= (((val) << BE_WRB_F_BIT(name)) & BE_WRB_F_MASK(name))) + +/* Feature/offload bits */ +enum { + BE_WRB_F_CRC_BIT, /* Ethernet CRC */ + BE_WRB_F_IPCS_BIT, /* IP csum */ + BE_WRB_F_TCPCS_BIT, /* TCP csum */ + BE_WRB_F_UDPCS_BIT, /* UDP csum */ + BE_WRB_F_LSO_BIT, /* LSO */ + BE_WRB_F_LSO6_BIT, /* LSO6 */ + BE_WRB_F_VLAN_BIT, /* VLAN */ + BE_WRB_F_VLAN_SKIP_HW_BIT /* Skip VLAN tag (workaround) */ +}; + +/* The structure below provides a HW-agnostic abstraction of WRB params + * retrieved from a TX skb. This is in turn passed to chip specific routines + * during transmit, to set the corresponding params in the WRB. + */ +struct be_wrb_params { + u32 features; /* Feature bits */ + u16 vlan_tag; /* VLAN tag */ + u16 lso_mss; /* MSS for LSO */ +}; + struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; @@ -449,6 +497,8 @@ struct be_adapter { /* Rx rings */ u16 num_rx_qs; + u16 num_rss_qs; + u16 need_def_rxq; struct be_rx_obj rx_obj[MAX_RX_QS]; u32 big_page_size; /* Compounded page size shared by rx wrbs */ @@ -463,7 +513,7 @@ struct be_adapter { struct delayed_work work; u16 work_counter; - struct delayed_work func_recovery_work; + struct delayed_work be_err_detection_work; u32 flags; u32 cmd_privileges; /* Ethtool knobs and info */ @@ -596,9 +646,8 @@ extern const struct ethtool_ops be_ethtool_ops; for (i = 0, rxo = &adapter->rx_obj[i]; i < adapter->num_rx_qs; \ i++, rxo++) -/* Skip the default non-rss queue (last one)*/ #define for_all_rss_queues(adapter, rxo, i) \ - for (i = 0, rxo = &adapter->rx_obj[i]; i < (adapter->num_rx_qs - 1);\ + for (i = 0, rxo = &adapter->rx_obj[i]; i < adapter->num_rss_qs; \ i++, rxo++) #define for_all_tx_queues(adapter, txo, i) \ diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 7f05f309e935..fb140faeafb1 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -635,73 +635,16 @@ static int lancer_wait_ready(struct be_adapter *adapter) for (i = 0; i < SLIPORT_READY_TIMEOUT; i++) { sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); if (sliport_status & SLIPORT_STATUS_RDY_MASK) - break; - - msleep(1000); - } - - if (i == SLIPORT_READY_TIMEOUT) - return sliport_status ? : -1; - - return 0; -} - -static bool lancer_provisioning_error(struct be_adapter *adapter) -{ - u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; - - sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); - if (sliport_status & SLIPORT_STATUS_ERR_MASK) { - sliport_err1 = ioread32(adapter->db + SLIPORT_ERROR1_OFFSET); - sliport_err2 = ioread32(adapter->db + SLIPORT_ERROR2_OFFSET); - - if (sliport_err1 == SLIPORT_ERROR_NO_RESOURCE1 && - sliport_err2 == SLIPORT_ERROR_NO_RESOURCE2) - return true; - } - return false; -} - -int lancer_test_and_set_rdy_state(struct be_adapter *adapter) -{ - int status; - u32 sliport_status, err, reset_needed; - bool resource_error; + return 0; - resource_error = lancer_provisioning_error(adapter); - if (resource_error) - return -EAGAIN; + if (sliport_status & SLIPORT_STATUS_ERR_MASK && + !(sliport_status & SLIPORT_STATUS_RN_MASK)) + return -EIO; - status = lancer_wait_ready(adapter); - if (!status) { - sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET); - err = sliport_status & SLIPORT_STATUS_ERR_MASK; - reset_needed = sliport_status & SLIPORT_STATUS_RN_MASK; - if (err && reset_needed) { - iowrite32(SLI_PORT_CONTROL_IP_MASK, - adapter->db + SLIPORT_CONTROL_OFFSET); - - /* check if adapter has corrected the error */ - status = lancer_wait_ready(adapter); - sliport_status = ioread32(adapter->db + - SLIPORT_STATUS_OFFSET); - sliport_status &= (SLIPORT_STATUS_ERR_MASK | - SLIPORT_STATUS_RN_MASK); - if (status || sliport_status) - status = -1; - } else if (err || reset_needed) { - status = -1; - } + msleep(1000); } - /* Stop error recovery if error is not recoverable. - * No resource error is temporary errors and will go away - * when PF provisions resources. - */ - resource_error = lancer_provisioning_error(adapter); - if (resource_error) - status = -EAGAIN; - return status; + return sliport_status ? : -1; } int be_fw_wait_ready(struct be_adapter *adapter) @@ -720,6 +663,10 @@ int be_fw_wait_ready(struct be_adapter *adapter) } do { + /* There's no means to poll POST state on BE2/3 VFs */ + if (BEx_chip(adapter) && be_virtfn(adapter)) + return 0; + stage = be_POST_stage_get(adapter); if (stage == POST_STAGE_ARMFW_RDY) return 0; @@ -734,7 +681,7 @@ int be_fw_wait_ready(struct be_adapter *adapter) err: dev_err(dev, "POST timeout; stage=%#x\n", stage); - return -1; + return -ETIMEDOUT; } static inline struct be_sge *nonembedded_sgl(struct be_mcc_wrb *wrb) @@ -2123,16 +2070,12 @@ int be_cmd_reset_function(struct be_adapter *adapter) int status; if (lancer_chip(adapter)) { + iowrite32(SLI_PORT_CONTROL_IP_MASK, + adapter->db + SLIPORT_CONTROL_OFFSET); status = lancer_wait_ready(adapter); - if (!status) { - iowrite32(SLI_PORT_CONTROL_IP_MASK, - adapter->db + SLIPORT_CONTROL_OFFSET); - status = lancer_test_and_set_rdy_state(adapter); - } - if (status) { + if (status) dev_err(&adapter->pdev->dev, "Adapter in non recoverable error\n"); - } return status; } @@ -3075,7 +3018,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, mac_count = resp->true_mac_count + resp->pseudo_mac_count; /* Mac list returned could contain one or more active mac_ids - * or one or more true or pseudo permanant mac addresses. + * or one or more true or pseudo permanent mac addresses. * If an active mac_id is present, return first active mac_id * found. */ @@ -3130,7 +3073,7 @@ int be_cmd_get_perm_mac(struct be_adapter *adapter, u8 *mac) int status; bool pmac_valid = false; - memset(mac, 0, ETH_ALEN); + eth_zero_addr(mac); if (BEx_chip(adapter)) { if (be_physfn(adapter)) @@ -3631,12 +3574,12 @@ static void be_copy_nic_desc(struct be_resources *res, res->max_rss_qs = le16_to_cpu(desc->rssq_count); res->max_rx_qs = le16_to_cpu(desc->rq_count); res->max_evt_qs = le16_to_cpu(desc->eq_count); + res->max_cq_count = le16_to_cpu(desc->cq_count); + res->max_iface_count = le16_to_cpu(desc->iface_count); + res->max_mcc_count = le16_to_cpu(desc->mcc_count); /* Clear flags that driver is not interested in */ res->if_cap_flags = le32_to_cpu(desc->cap_flags) & BE_IF_CAP_FLAGS_WANT; - /* Need 1 RXQ as the default RXQ */ - if (res->max_rss_qs && res->max_rss_qs == res->max_rx_qs) - res->max_rss_qs -= 1; } /* Uses Mbox */ @@ -3698,7 +3641,7 @@ err: /* Will use MBOX only if MCCQ has not been created */ int be_cmd_get_profile_config(struct be_adapter *adapter, - struct be_resources *res, u8 domain) + struct be_resources *res, u8 query, u8 domain) { struct be_cmd_resp_get_profile_config *resp; struct be_cmd_req_get_profile_config *req; @@ -3708,7 +3651,7 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, struct be_nic_res_desc *nic; struct be_mcc_wrb wrb = {0}; struct be_dma_mem cmd; - u32 desc_count; + u16 desc_count; int status; memset(&cmd, 0, sizeof(struct be_dma_mem)); @@ -3727,12 +3670,19 @@ int be_cmd_get_profile_config(struct be_adapter *adapter, req->hdr.version = 1; req->type = ACTIVE_PROFILE_TYPE; + /* When QUERY_MODIFIABLE_FIELDS_TYPE bit is set, cmd returns the + * descriptors with all bits set to "1" for the fields which can be + * modified using SET_PROFILE_CONFIG cmd. + */ + if (query == RESOURCE_MODIFIABLE) + req->type |= QUERY_MODIFIABLE_FIELDS_TYPE; + status = be_cmd_notify_wait(adapter, &wrb); if (status) goto err; resp = cmd.va; - desc_count = le32_to_cpu(resp->desc_count); + desc_count = le16_to_cpu(resp->desc_count); pcie = be_get_pcie_desc(adapter->pdev->devfn, resp->func_param, desc_count); @@ -3857,23 +3807,80 @@ int be_cmd_config_qos(struct be_adapter *adapter, u32 max_rate, u16 link_speed, 1, version, domain); } +static void be_fill_vf_res_template(struct be_adapter *adapter, + struct be_resources pool_res, + u16 num_vfs, u16 num_vf_qs, + struct be_nic_res_desc *nic_vft) +{ + u32 vf_if_cap_flags = pool_res.vf_if_cap_flags; + struct be_resources res_mod = {0}; + + /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd, + * which are modifiable using SET_PROFILE_CONFIG cmd. + */ + be_cmd_get_profile_config(adapter, &res_mod, RESOURCE_MODIFIABLE, 0); + + /* If RSS IFACE capability flags are modifiable for a VF, set the + * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if + * more than 1 RSSQ is available for a VF. + * Otherwise, provision only 1 queue pair for VF. + */ + if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) { + nic_vft->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT); + if (num_vf_qs > 1) { + vf_if_cap_flags |= BE_IF_FLAGS_RSS; + if (pool_res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS) + vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS; + } else { + vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS | + BE_IF_FLAGS_DEFQ_RSS); + } + + nic_vft->cap_flags = cpu_to_le32(vf_if_cap_flags); + } else { + num_vf_qs = 1; + } + + nic_vft->rq_count = cpu_to_le16(num_vf_qs); + nic_vft->txq_count = cpu_to_le16(num_vf_qs); + nic_vft->rssq_count = cpu_to_le16(num_vf_qs); + nic_vft->cq_count = cpu_to_le16(pool_res.max_cq_count / + (num_vfs + 1)); + + /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally + * among the PF and it's VFs, if the fields are changeable + */ + if (res_mod.max_uc_mac == FIELD_MODIFIABLE) + nic_vft->unicast_mac_count = cpu_to_le16(pool_res.max_uc_mac / + (num_vfs + 1)); + + if (res_mod.max_vlans == FIELD_MODIFIABLE) + nic_vft->vlan_count = cpu_to_le16(pool_res.max_vlans / + (num_vfs + 1)); + + if (res_mod.max_iface_count == FIELD_MODIFIABLE) + nic_vft->iface_count = cpu_to_le16(pool_res.max_iface_count / + (num_vfs + 1)); + + if (res_mod.max_mcc_count == FIELD_MODIFIABLE) + nic_vft->mcc_count = cpu_to_le16(pool_res.max_mcc_count / + (num_vfs + 1)); +} + int be_cmd_set_sriov_config(struct be_adapter *adapter, - struct be_resources res, u16 num_vfs) + struct be_resources pool_res, u16 num_vfs, + u16 num_vf_qs) { struct { struct be_pcie_res_desc pcie; struct be_nic_res_desc nic_vft; } __packed desc; - u16 vf_q_count; - - if (BEx_chip(adapter) || lancer_chip(adapter)) - return 0; /* PF PCIE descriptor */ be_reset_pcie_desc(&desc.pcie); desc.pcie.hdr.desc_type = PCIE_RESOURCE_DESC_TYPE_V1; desc.pcie.hdr.desc_len = RESOURCE_DESC_SIZE_V1; - desc.pcie.flags = (1 << IMM_SHIFT) | (1 << NOSV_SHIFT); + desc.pcie.flags = BIT(IMM_SHIFT) | BIT(NOSV_SHIFT); desc.pcie.pf_num = adapter->pdev->devfn; desc.pcie.sriov_state = num_vfs ? 1 : 0; desc.pcie.num_vfs = cpu_to_le16(num_vfs); @@ -3882,32 +3889,12 @@ int be_cmd_set_sriov_config(struct be_adapter *adapter, be_reset_nic_desc(&desc.nic_vft); desc.nic_vft.hdr.desc_type = NIC_RESOURCE_DESC_TYPE_V1; desc.nic_vft.hdr.desc_len = RESOURCE_DESC_SIZE_V1; - desc.nic_vft.flags = (1 << VFT_SHIFT) | (1 << IMM_SHIFT) | - (1 << NOSV_SHIFT); + desc.nic_vft.flags = BIT(VFT_SHIFT) | BIT(IMM_SHIFT) | BIT(NOSV_SHIFT); desc.nic_vft.pf_num = adapter->pdev->devfn; desc.nic_vft.vf_num = 0; - if (num_vfs && res.vf_if_cap_flags & BE_IF_FLAGS_RSS) { - /* If number of VFs requested is 8 less than max supported, - * assign 8 queue pairs to the PF and divide the remaining - * resources evenly among the VFs - */ - if (num_vfs < (be_max_vfs(adapter) - 8)) - vf_q_count = (res.max_rss_qs - 8) / num_vfs; - else - vf_q_count = res.max_rss_qs / num_vfs; - - desc.nic_vft.rq_count = cpu_to_le16(vf_q_count); - desc.nic_vft.txq_count = cpu_to_le16(vf_q_count); - desc.nic_vft.rssq_count = cpu_to_le16(vf_q_count - 1); - desc.nic_vft.cq_count = cpu_to_le16(3 * vf_q_count); - } else { - desc.nic_vft.txq_count = cpu_to_le16(1); - desc.nic_vft.rq_count = cpu_to_le16(1); - desc.nic_vft.rssq_count = cpu_to_le16(0); - /* One CQ for each TX, RX and MCCQ */ - desc.nic_vft.cq_count = cpu_to_le16(3); - } + be_fill_vf_res_template(adapter, pool_res, num_vfs, num_vf_qs, + &desc.nic_vft); return be_cmd_set_profile_config(adapter, &desc, 2 * RESOURCE_DESC_SIZE_V1, 2, 1, 0); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index a7634a3f052a..1ec22300e254 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -588,14 +588,15 @@ enum be_if_flags { BE_IF_FLAGS_MCAST_PROMISCUOUS = 0x200, BE_IF_FLAGS_PASS_L2_ERRORS = 0x400, BE_IF_FLAGS_PASS_L3L4_ERRORS = 0x800, - BE_IF_FLAGS_MULTICAST = 0x1000 + BE_IF_FLAGS_MULTICAST = 0x1000, + BE_IF_FLAGS_DEFQ_RSS = 0x1000000 }; #define BE_IF_CAP_FLAGS_WANT (BE_IF_FLAGS_RSS | BE_IF_FLAGS_PROMISCUOUS |\ BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_VLAN_PROMISCUOUS |\ BE_IF_FLAGS_VLAN | BE_IF_FLAGS_MCAST_PROMISCUOUS |\ BE_IF_FLAGS_PASS_L3L4_ERRORS | BE_IF_FLAGS_MULTICAST |\ - BE_IF_FLAGS_UNTAGGED) + BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_DEFQ_RSS) #define BE_IF_FLAGS_ALL_PROMISCUOUS (BE_IF_FLAGS_PROMISCUOUS | \ BE_IF_FLAGS_VLAN_PROMISCUOUS |\ @@ -2021,6 +2022,7 @@ struct be_cmd_req_set_ext_fat_caps { #define PORT_RESOURCE_DESC_TYPE_V1 0x55 #define MAX_RESOURCE_DESC 264 +#define IF_CAPS_FLAGS_VALID_SHIFT 0 /* IF caps valid */ #define VFT_SHIFT 3 /* VF template */ #define IMM_SHIFT 6 /* Immediate */ #define NOSV_SHIFT 7 /* No save */ @@ -2131,20 +2133,28 @@ struct be_cmd_resp_get_func_config { u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE_V1]; }; -#define ACTIVE_PROFILE_TYPE 0x2 +enum { + RESOURCE_LIMITS, + RESOURCE_MODIFIABLE +}; + struct be_cmd_req_get_profile_config { struct be_cmd_req_hdr hdr; u8 rsvd; +#define ACTIVE_PROFILE_TYPE 0x2 +#define QUERY_MODIFIABLE_FIELDS_TYPE BIT(3) u8 type; u16 rsvd1; }; struct be_cmd_resp_get_profile_config { struct be_cmd_resp_hdr hdr; - u32 desc_count; + __le16 desc_count; + u16 rsvd; u8 func_param[MAX_RESOURCE_DESC * RESOURCE_DESC_SIZE_V1]; }; +#define FIELD_MODIFIABLE 0xFFFF struct be_cmd_req_set_profile_config { struct be_cmd_req_hdr hdr; u32 rsvd; @@ -2344,7 +2354,7 @@ int be_cmd_query_port_name(struct be_adapter *adapter); int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res); int be_cmd_get_profile_config(struct be_adapter *adapter, - struct be_resources *res, u8 domain); + struct be_resources *res, u8 query, u8 domain); int be_cmd_get_active_profile(struct be_adapter *adapter, u16 *profile); int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, int vf_num); @@ -2355,4 +2365,5 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter, int be_cmd_set_vxlan_port(struct be_adapter *adapter, __be16 port); int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op); int be_cmd_set_sriov_config(struct be_adapter *adapter, - struct be_resources res, u16 num_vfs); + struct be_resources res, u16 num_vfs, + u16 num_vf_qs); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 4d2de4700769..b765c24625bf 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -1097,7 +1097,7 @@ static int be_set_rss_hash_opts(struct be_adapter *adapter, return status; if (be_multi_rxq(adapter)) { - for (j = 0; j < 128; j += adapter->num_rx_qs - 1) { + for (j = 0; j < 128; j += adapter->num_rss_qs) { for_all_rss_queues(adapter, rxo, i) { if ((j + i) >= 128) break; diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index e6b790f0d9dc..fb0bc3c3620e 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -30,6 +30,9 @@ MODULE_DESCRIPTION(DRV_DESC " " DRV_VER); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); +/* num_vfs module param is obsolete. + * Use sysfs method to enable/disable VFs. + */ static unsigned int num_vfs; module_param(num_vfs, uint, S_IRUGO); MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize"); @@ -727,48 +730,86 @@ static u16 skb_ip_proto(struct sk_buff *skb) ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; } -static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr, - struct sk_buff *skb, u32 wrb_cnt, u32 len, - bool skip_hw_vlan) +static inline bool be_is_txq_full(struct be_tx_obj *txo) { - u16 vlan_tag, proto; + return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len; +} - memset(hdr, 0, sizeof(*hdr)); +static inline bool be_can_txq_wake(struct be_tx_obj *txo) +{ + return atomic_read(&txo->q.used) < txo->q.len / 2; +} - SET_TX_WRB_HDR_BITS(crc, hdr, 1); +static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo) +{ + return atomic_read(&txo->q.used) > txo->pend_wrb_cnt; +} + +static void be_get_wrb_params_from_skb(struct be_adapter *adapter, + struct sk_buff *skb, + struct be_wrb_params *wrb_params) +{ + u16 proto; if (skb_is_gso(skb)) { - SET_TX_WRB_HDR_BITS(lso, hdr, 1); - SET_TX_WRB_HDR_BITS(lso_mss, hdr, skb_shinfo(skb)->gso_size); + BE_WRB_F_SET(wrb_params->features, LSO, 1); + wrb_params->lso_mss = skb_shinfo(skb)->gso_size; if (skb_is_gso_v6(skb) && !lancer_chip(adapter)) - SET_TX_WRB_HDR_BITS(lso6, hdr, 1); + BE_WRB_F_SET(wrb_params->features, LSO6, 1); } else if (skb->ip_summed == CHECKSUM_PARTIAL) { if (skb->encapsulation) { - SET_TX_WRB_HDR_BITS(ipcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, IPCS, 1); proto = skb_inner_ip_proto(skb); } else { proto = skb_ip_proto(skb); } if (proto == IPPROTO_TCP) - SET_TX_WRB_HDR_BITS(tcpcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, TCPCS, 1); else if (proto == IPPROTO_UDP) - SET_TX_WRB_HDR_BITS(udpcs, hdr, 1); + BE_WRB_F_SET(wrb_params->features, UDPCS, 1); } if (skb_vlan_tag_present(skb)) { - SET_TX_WRB_HDR_BITS(vlan, hdr, 1); - vlan_tag = be_get_tx_vlan_tag(adapter, skb); - SET_TX_WRB_HDR_BITS(vlan_tag, hdr, vlan_tag); + BE_WRB_F_SET(wrb_params->features, VLAN, 1); + wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb); } - SET_TX_WRB_HDR_BITS(num_wrb, hdr, wrb_cnt); - SET_TX_WRB_HDR_BITS(len, hdr, len); + BE_WRB_F_SET(wrb_params->features, CRC, 1); +} - /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0 - * When this hack is not needed, the evt bit is set while ringing DB +static void wrb_fill_hdr(struct be_adapter *adapter, + struct be_eth_hdr_wrb *hdr, + struct be_wrb_params *wrb_params, + struct sk_buff *skb) +{ + memset(hdr, 0, sizeof(*hdr)); + + SET_TX_WRB_HDR_BITS(crc, hdr, + BE_WRB_F_GET(wrb_params->features, CRC)); + SET_TX_WRB_HDR_BITS(ipcs, hdr, + BE_WRB_F_GET(wrb_params->features, IPCS)); + SET_TX_WRB_HDR_BITS(tcpcs, hdr, + BE_WRB_F_GET(wrb_params->features, TCPCS)); + SET_TX_WRB_HDR_BITS(udpcs, hdr, + BE_WRB_F_GET(wrb_params->features, UDPCS)); + + SET_TX_WRB_HDR_BITS(lso, hdr, + BE_WRB_F_GET(wrb_params->features, LSO)); + SET_TX_WRB_HDR_BITS(lso6, hdr, + BE_WRB_F_GET(wrb_params->features, LSO6)); + SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss); + + /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this + * hack is not needed, the evt bit is set while ringing DB. */ - if (skip_hw_vlan) - SET_TX_WRB_HDR_BITS(event, hdr, 1); + SET_TX_WRB_HDR_BITS(event, hdr, + BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW)); + SET_TX_WRB_HDR_BITS(vlan, hdr, + BE_WRB_F_GET(wrb_params->features, VLAN)); + SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag); + + SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb)); + SET_TX_WRB_HDR_BITS(len, hdr, skb->len); } static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb, @@ -788,77 +829,124 @@ static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb, } } -/* Returns the number of WRBs used up by the skb */ +/* Grab a WRB header for xmit */ +static u16 be_tx_get_wrb_hdr(struct be_tx_obj *txo) +{ + u16 head = txo->q.head; + + queue_head_inc(&txo->q); + return head; +} + +/* Set up the WRB header for xmit */ +static void be_tx_setup_wrb_hdr(struct be_adapter *adapter, + struct be_tx_obj *txo, + struct be_wrb_params *wrb_params, + struct sk_buff *skb, u16 head) +{ + u32 num_frags = skb_wrb_cnt(skb); + struct be_queue_info *txq = &txo->q; + struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head); + + wrb_fill_hdr(adapter, hdr, wrb_params, skb); + be_dws_cpu_to_le(hdr, sizeof(*hdr)); + + BUG_ON(txo->sent_skb_list[head]); + txo->sent_skb_list[head] = skb; + txo->last_req_hdr = head; + atomic_add(num_frags, &txq->used); + txo->last_req_wrb_cnt = num_frags; + txo->pend_wrb_cnt += num_frags; +} + +/* Setup a WRB fragment (buffer descriptor) for xmit */ +static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr, + int len) +{ + struct be_eth_wrb *wrb; + struct be_queue_info *txq = &txo->q; + + wrb = queue_head_node(txq); + wrb_fill(wrb, busaddr, len); + queue_head_inc(txq); +} + +/* Bring the queue back to the state it was in before be_xmit_enqueue() routine + * was invoked. The producer index is restored to the previous packet and the + * WRBs of the current packet are unmapped. Invoked to handle tx setup errors. + */ +static void be_xmit_restore(struct be_adapter *adapter, + struct be_tx_obj *txo, u16 head, bool map_single, + u32 copied) +{ + struct device *dev; + struct be_eth_wrb *wrb; + struct be_queue_info *txq = &txo->q; + + dev = &adapter->pdev->dev; + txq->head = head; + + /* skip the first wrb (hdr); it's not mapped */ + queue_head_inc(txq); + while (copied) { + wrb = queue_head_node(txq); + unmap_tx_frag(dev, wrb, map_single); + map_single = false; + copied -= le32_to_cpu(wrb->frag_len); + queue_head_inc(txq); + } + + txq->head = head; +} + +/* Enqueue the given packet for transmit. This routine allocates WRBs for the + * packet, dma maps the packet buffers and sets up the WRBs. Returns the number + * of WRBs used up by the packet. + */ static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo, - struct sk_buff *skb, bool skip_hw_vlan) + struct sk_buff *skb, + struct be_wrb_params *wrb_params) { u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb); struct device *dev = &adapter->pdev->dev; struct be_queue_info *txq = &txo->q; - struct be_eth_hdr_wrb *hdr; bool map_single = false; - struct be_eth_wrb *wrb; - dma_addr_t busaddr; u16 head = txq->head; + dma_addr_t busaddr; + int len; - hdr = queue_head_node(txq); - wrb_fill_hdr(adapter, hdr, skb, wrb_cnt, skb->len, skip_hw_vlan); - be_dws_cpu_to_le(hdr, sizeof(*hdr)); - - queue_head_inc(txq); + head = be_tx_get_wrb_hdr(txo); if (skb->len > skb->data_len) { - int len = skb_headlen(skb); + len = skb_headlen(skb); busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, busaddr)) goto dma_err; map_single = true; - wrb = queue_head_node(txq); - wrb_fill(wrb, busaddr, len); - queue_head_inc(txq); + be_tx_setup_wrb_frag(txo, busaddr, len); copied += len; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + len = skb_frag_size(frag); - busaddr = skb_frag_dma_map(dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); + busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, busaddr)) goto dma_err; - wrb = queue_head_node(txq); - wrb_fill(wrb, busaddr, skb_frag_size(frag)); - queue_head_inc(txq); - copied += skb_frag_size(frag); + be_tx_setup_wrb_frag(txo, busaddr, len); + copied += len; } - BUG_ON(txo->sent_skb_list[head]); - txo->sent_skb_list[head] = skb; - txo->last_req_hdr = head; - atomic_add(wrb_cnt, &txq->used); - txo->last_req_wrb_cnt = wrb_cnt; - txo->pend_wrb_cnt += wrb_cnt; + be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head); be_tx_stats_update(txo, skb); return wrb_cnt; dma_err: - /* Bring the queue back to the state it was in before this - * routine was invoked. - */ - txq->head = head; - /* skip the first wrb (hdr); it's not mapped */ - queue_head_inc(txq); - while (copied) { - wrb = queue_head_node(txq); - unmap_tx_frag(dev, wrb, map_single); - map_single = false; - copied -= le32_to_cpu(wrb->frag_len); - adapter->drv_stats.dma_map_errors++; - queue_head_inc(txq); - } - txq->head = head; + adapter->drv_stats.dma_map_errors++; + be_xmit_restore(adapter, txo, head, map_single, copied); return 0; } @@ -869,7 +957,8 @@ static inline int qnq_async_evt_rcvd(struct be_adapter *adapter) static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params + *wrb_params) { u16 vlan_tag = 0; @@ -886,8 +975,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to * skip VLAN insertion */ - if (skip_hw_vlan) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); } if (vlan_tag) { @@ -905,8 +993,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, vlan_tag); if (unlikely(!skb)) return skb; - if (skip_hw_vlan) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); } return skb; @@ -946,7 +1033,8 @@ static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb) static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params + *wrb_params) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; unsigned int eth_hdr_len; @@ -970,7 +1058,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (be_pvid_tagging_enabled(adapter) && veh->h_vlan_proto == htons(ETH_P_8021Q)) - *skip_hw_vlan = true; + BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1); /* HW has a bug wherein it will calculate CSUM for VLAN * pkts even though it is disabled. @@ -978,7 +1066,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (skb->ip_summed != CHECKSUM_PARTIAL && skb_vlan_tag_present(skb)) { - skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); + skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err; } @@ -1000,7 +1088,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter, */ if (be_ipv6_tx_stall_chk(adapter, skb) && be_vlan_tag_tx_chk(adapter, skb)) { - skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan); + skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err; } @@ -1014,7 +1102,7 @@ err: static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, struct sk_buff *skb, - bool *skip_hw_vlan) + struct be_wrb_params *wrb_params) { /* Lancer, SH-R ASICs have a bug wherein Packets that are 32 bytes or * less may cause a transmit stall on that port. So the work-around is @@ -1026,7 +1114,7 @@ static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter, } if (BEx_chip(adapter) || lancer_chip(adapter)) { - skb = be_lancer_xmit_workarounds(adapter, skb, skip_hw_vlan); + skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params); if (!skb) return NULL; } @@ -1060,24 +1148,26 @@ static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo) static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev) { - bool skip_hw_vlan = false, flush = !skb->xmit_more; struct be_adapter *adapter = netdev_priv(netdev); u16 q_idx = skb_get_queue_mapping(skb); struct be_tx_obj *txo = &adapter->tx_obj[q_idx]; - struct be_queue_info *txq = &txo->q; + struct be_wrb_params wrb_params = { 0 }; + bool flush = !skb->xmit_more; u16 wrb_cnt; - skb = be_xmit_workarounds(adapter, skb, &skip_hw_vlan); + skb = be_xmit_workarounds(adapter, skb, &wrb_params); if (unlikely(!skb)) goto drop; - wrb_cnt = be_xmit_enqueue(adapter, txo, skb, skip_hw_vlan); + be_get_wrb_params_from_skb(adapter, skb, &wrb_params); + + wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); if (unlikely(!wrb_cnt)) { dev_kfree_skb_any(skb); goto drop; } - if ((atomic_read(&txq->used) + BE_MAX_TX_FRAG_COUNT) >= txq->len) { + if (be_is_txq_full(txo)) { netif_stop_subqueue(netdev, q_idx); tx_stats(txo)->tx_stops++; } @@ -2032,7 +2122,7 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) if (rxo->rx_post_starved) rxo->rx_post_starved = false; do { - notify = min(256u, posted); + notify = min(MAX_NUM_POST_ERX_DB, posted); be_rxq_notify(adapter, rxq->id, notify); posted -= notify; } while (posted); @@ -2042,18 +2132,23 @@ static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed) } } -static struct be_eth_tx_compl *be_tx_compl_get(struct be_queue_info *tx_cq) +static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo) { - struct be_eth_tx_compl *txcp = queue_tail_node(tx_cq); + struct be_queue_info *tx_cq = &txo->cq; + struct be_tx_compl_info *txcp = &txo->txcp; + struct be_eth_tx_compl *compl = queue_tail_node(tx_cq); - if (txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0) + if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0) return NULL; + /* Ensure load ordering of valid bit dword and other dwords below */ rmb(); - be_dws_le_to_cpu(txcp, sizeof(*txcp)); + be_dws_le_to_cpu(compl, sizeof(*compl)); - txcp->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0; + txcp->status = GET_TX_COMPL_BITS(status, compl); + txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl); + compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0; queue_tail_inc(tx_cq); return txcp; } @@ -2174,9 +2269,9 @@ static void be_tx_compl_clean(struct be_adapter *adapter) { u16 end_idx, notified_idx, cmpl = 0, timeo = 0, num_wrbs = 0; struct device *dev = &adapter->pdev->dev; - struct be_tx_obj *txo; + struct be_tx_compl_info *txcp; struct be_queue_info *txq; - struct be_eth_tx_compl *txcp; + struct be_tx_obj *txo; int i, pending_txqs; /* Stop polling for compls when HW has been silent for 10ms */ @@ -2187,10 +2282,10 @@ static void be_tx_compl_clean(struct be_adapter *adapter) cmpl = 0; num_wrbs = 0; txq = &txo->q; - while ((txcp = be_tx_compl_get(&txo->cq))) { - end_idx = GET_TX_COMPL_BITS(wrb_index, txcp); - num_wrbs += be_tx_compl_process(adapter, txo, - end_idx); + while ((txcp = be_tx_compl_get(txo))) { + num_wrbs += + be_tx_compl_process(adapter, txo, + txcp->end_index); cmpl++; } if (cmpl) { @@ -2198,7 +2293,7 @@ static void be_tx_compl_clean(struct be_adapter *adapter) atomic_sub(num_wrbs, &txq->used); timeo = 0; } - if (atomic_read(&txq->used) == txo->pend_wrb_cnt) + if (!be_is_tx_compl_pending(txo)) pending_txqs--; } @@ -2247,6 +2342,7 @@ static void be_evt_queues_destroy(struct be_adapter *adapter) napi_hash_del(&eqo->napi); netif_napi_del(&eqo->napi); } + free_cpumask_var(eqo->affinity_mask); be_queue_free(adapter, &eqo->q); } } @@ -2262,6 +2358,11 @@ static int be_evt_queues_create(struct be_adapter *adapter) adapter->cfg_num_qs); for_all_evt_queues(adapter, eqo, i) { + if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL)) + return -ENOMEM; + cpumask_set_cpu_local_first(i, dev_to_node(&adapter->pdev->dev), + eqo->affinity_mask); + netif_napi_add(adapter->netdev, &eqo->napi, be_poll, BE_NAPI_WEIGHT); napi_hash_add(&eqo->napi); @@ -2353,8 +2454,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter) static int be_tx_qs_create(struct be_adapter *adapter) { - struct be_queue_info *cq, *eq; + struct be_queue_info *cq; struct be_tx_obj *txo; + struct be_eq_obj *eqo; int status, i; adapter->num_tx_qs = min(adapter->num_evt_qs, be_max_txqs(adapter)); @@ -2372,8 +2474,8 @@ static int be_tx_qs_create(struct be_adapter *adapter) /* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq */ - eq = &adapter->eq_obj[i % adapter->num_evt_qs].q; - status = be_cmd_cq_create(adapter, cq, eq, false, 3); + eqo = &adapter->eq_obj[i % adapter->num_evt_qs]; + status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3); if (status) return status; @@ -2385,6 +2487,9 @@ static int be_tx_qs_create(struct be_adapter *adapter) status = be_cmd_txq_create(adapter, txo); if (status) return status; + + netif_set_xps_queue(adapter->netdev, eqo->affinity_mask, + eqo->idx); } dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n", @@ -2413,13 +2518,19 @@ static int be_rx_cqs_create(struct be_adapter *adapter) int rc, i; /* We can create as many RSS rings as there are EQs. */ - adapter->num_rx_qs = adapter->num_evt_qs; + adapter->num_rss_qs = adapter->num_evt_qs; + + /* We'll use RSS only if atleast 2 RSS rings are supported. */ + if (adapter->num_rss_qs <= 1) + adapter->num_rss_qs = 0; + + adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq; - /* We'll use RSS only if atleast 2 RSS rings are supported. - * When RSS is used, we'll need a default RXQ for non-IP traffic. + /* When the interface is not capable of RSS rings (and there is no + * need to create a default RXQ) we'll still need one RXQ */ - if (adapter->num_rx_qs > 1) - adapter->num_rx_qs++; + if (adapter->num_rx_qs == 0) + adapter->num_rx_qs = 1; adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE; for_all_rx_queues(adapter, rxo, i) { @@ -2438,8 +2549,7 @@ static int be_rx_cqs_create(struct be_adapter *adapter) } dev_info(&adapter->pdev->dev, - "created %d RSS queue(s) and 1 default RX queue\n", - adapter->num_rx_qs - 1); + "created %d RX queue(s)\n", adapter->num_rx_qs); return 0; } @@ -2549,7 +2659,7 @@ loop_continue: return work_done; } -static inline void be_update_tx_err(struct be_tx_obj *txo, u32 status) +static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status) { switch (status) { case BE_TX_COMP_HDR_PARSE_ERR: @@ -2564,7 +2674,7 @@ static inline void be_update_tx_err(struct be_tx_obj *txo, u32 status) } } -static inline void lancer_update_tx_err(struct be_tx_obj *txo, u32 status) +static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status) { switch (status) { case LANCER_TX_COMP_LSO_ERR: @@ -2589,22 +2699,18 @@ static inline void lancer_update_tx_err(struct be_tx_obj *txo, u32 status) static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo, int idx) { - struct be_eth_tx_compl *txcp; int num_wrbs = 0, work_done = 0; - u32 compl_status; - u16 last_idx; + struct be_tx_compl_info *txcp; - while ((txcp = be_tx_compl_get(&txo->cq))) { - last_idx = GET_TX_COMPL_BITS(wrb_index, txcp); - num_wrbs += be_tx_compl_process(adapter, txo, last_idx); + while ((txcp = be_tx_compl_get(txo))) { + num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index); work_done++; - compl_status = GET_TX_COMPL_BITS(status, txcp); - if (compl_status) { + if (txcp->status) { if (lancer_chip(adapter)) - lancer_update_tx_err(txo, compl_status); + lancer_update_tx_err(txo, txcp->status); else - be_update_tx_err(txo, compl_status); + be_update_tx_err(txo, txcp->status); } } @@ -2615,7 +2721,7 @@ static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo, /* As Tx wrbs have been freed up, wake up netdev queue * if it was stopped due to lack of tx wrbs. */ if (__netif_subqueue_stopped(adapter->netdev, idx) && - atomic_read(&txo->q.used) < txo->q.len / 2) { + be_can_txq_wake(txo)) { netif_wake_subqueue(adapter->netdev, idx); } @@ -2807,12 +2913,12 @@ void be_detect_error(struct be_adapter *adapter) sliport_err2 = ioread32(adapter->db + SLIPORT_ERROR2_OFFSET); adapter->hw_error = true; + error_detected = true; /* Do not log error messages if its a FW reset */ if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 && sliport_err2 == SLIPORT_ERROR_FW_RESET2) { dev_info(dev, "Firmware update in progress\n"); } else { - error_detected = true; dev_err(dev, "Error detected in the card\n"); dev_err(dev, "ERR: sliport status 0x%x\n", sliport_status); @@ -2932,6 +3038,8 @@ static int be_msix_register(struct be_adapter *adapter) status = request_irq(vec, be_msix, 0, eqo->desc, eqo); if (status) goto err_msix; + + irq_set_affinity_hint(vec, eqo->affinity_mask); } return 0; @@ -2976,7 +3084,7 @@ static void be_irq_unregister(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct be_eq_obj *eqo; - int i; + int i, vec; if (!adapter->isr_registered) return; @@ -2988,8 +3096,11 @@ static void be_irq_unregister(struct be_adapter *adapter) } /* MSIx */ - for_all_evt_queues(adapter, eqo, i) - free_irq(be_msix_vec_get(adapter, eqo), eqo); + for_all_evt_queues(adapter, eqo, i) { + vec = be_msix_vec_get(adapter, eqo); + irq_set_affinity_hint(vec, NULL); + free_irq(vec, eqo); + } done: adapter->isr_registered = false; @@ -3071,12 +3182,14 @@ static int be_rx_qs_create(struct be_adapter *adapter) return rc; } - /* The FW would like the default RXQ to be created first */ - rxo = default_rxo(adapter); - rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, rx_frag_size, - adapter->if_handle, false, &rxo->rss_id); - if (rc) - return rc; + if (adapter->need_def_rxq || !adapter->num_rss_qs) { + rxo = default_rxo(adapter); + rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, + rx_frag_size, adapter->if_handle, + false, &rxo->rss_id); + if (rc) + return rc; + } for_all_rss_queues(adapter, rxo, i) { rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id, @@ -3087,8 +3200,7 @@ static int be_rx_qs_create(struct be_adapter *adapter) } if (be_multi_rxq(adapter)) { - for (j = 0; j < RSS_INDIR_TABLE_LEN; - j += adapter->num_rx_qs - 1) { + for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) { for_all_rss_queues(adapter, rxo, i) { if ((j + i) >= RSS_INDIR_TABLE_LEN) break; @@ -3179,7 +3291,7 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable) int status = 0; u8 mac[ETH_ALEN]; - memset(mac, 0, ETH_ALEN); + eth_zero_addr(mac); cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config); cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma, @@ -3324,6 +3436,14 @@ static void be_cancel_worker(struct be_adapter *adapter) } } +static void be_cancel_err_detection(struct be_adapter *adapter) +{ + if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) { + cancel_delayed_work_sync(&adapter->be_err_detection_work); + adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED; + } +} + static void be_mac_clear(struct be_adapter *adapter) { if (adapter->pmac_id) { @@ -3355,8 +3475,39 @@ static void be_disable_vxlan_offloads(struct be_adapter *adapter) } #endif +static u16 be_calculate_vf_qs(struct be_adapter *adapter, u16 num_vfs) +{ + struct be_resources res = adapter->pool_res; + u16 num_vf_qs = 1; + + /* Distribute the queue resources equally among the PF and it's VFs + * Do not distribute queue resources in multi-channel configuration. + */ + if (num_vfs && !be_is_mc(adapter)) { + /* If number of VFs requested is 8 less than max supported, + * assign 8 queue pairs to the PF and divide the remaining + * resources evenly among the VFs + */ + if (num_vfs < (be_max_vfs(adapter) - 8)) + num_vf_qs = (res.max_rss_qs - 8) / num_vfs; + else + num_vf_qs = res.max_rss_qs / num_vfs; + + /* Skyhawk-R chip supports only MAX_RSS_IFACES RSS capable + * interfaces per port. Provide RSS on VFs, only if number + * of VFs requested is less than MAX_RSS_IFACES limit. + */ + if (num_vfs >= MAX_RSS_IFACES) + num_vf_qs = 1; + } + return num_vf_qs; +} + static int be_clear(struct be_adapter *adapter) { + struct pci_dev *pdev = adapter->pdev; + u16 num_vf_qs; + be_cancel_worker(adapter); if (sriov_enabled(adapter)) @@ -3365,9 +3516,14 @@ static int be_clear(struct be_adapter *adapter) /* Re-configure FW to distribute resources evenly across max-supported * number of VFs, only when VFs are not already enabled. */ - if (be_physfn(adapter) && !pci_vfs_assigned(adapter->pdev)) + if (skyhawk_chip(adapter) && be_physfn(adapter) && + !pci_vfs_assigned(pdev)) { + num_vf_qs = be_calculate_vf_qs(adapter, + pci_sriov_get_totalvfs(pdev)); be_cmd_set_sriov_config(adapter, adapter->pool_res, - pci_sriov_get_totalvfs(adapter->pdev)); + pci_sriov_get_totalvfs(pdev), + num_vf_qs); + } #ifdef CONFIG_BE2NET_VXLAN be_disable_vxlan_offloads(adapter); @@ -3391,7 +3547,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle, en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS | - BE_IF_FLAGS_RSS; + BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS; en_flags &= cap_flags; @@ -3412,6 +3568,7 @@ static int be_vfs_if_create(struct be_adapter *adapter) for_all_vfs(adapter, vf_cfg, vf) { if (!BE3_chip(adapter)) { status = be_cmd_get_profile_config(adapter, &res, + RESOURCE_LIMITS, vf + 1); if (!status) { cap_flags = res.if_cap_flags; @@ -3585,7 +3742,8 @@ static void BEx_get_resources(struct be_adapter *adapter, /* On a SuperNIC profile, the driver needs to use the * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits */ - be_cmd_get_profile_config(adapter, &super_nic_res, 0); + be_cmd_get_profile_config(adapter, &super_nic_res, + RESOURCE_LIMITS, 0); /* Some old versions of BE3 FW don't report max_tx_qs value */ res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS; } else { @@ -3605,6 +3763,7 @@ static void BEx_get_resources(struct be_adapter *adapter, res->max_evt_qs = 1; res->if_cap_flags = BE_IF_CAP_FLAGS_WANT; + res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS; if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS)) res->if_cap_flags &= ~BE_IF_FLAGS_RSS; } @@ -3624,13 +3783,12 @@ static void be_setup_init(struct be_adapter *adapter) static int be_get_sriov_config(struct be_adapter *adapter) { - struct device *dev = &adapter->pdev->dev; struct be_resources res = {0}; int max_vfs, old_vfs; - /* Some old versions of BE3 FW don't report max_vfs value */ - be_cmd_get_profile_config(adapter, &res, 0); + be_cmd_get_profile_config(adapter, &res, RESOURCE_LIMITS, 0); + /* Some old versions of BE3 FW don't report max_vfs value */ if (BE3_chip(adapter) && !res.max_vfs) { max_vfs = pci_sriov_get_totalvfs(adapter->pdev); res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0; @@ -3638,35 +3796,49 @@ static int be_get_sriov_config(struct be_adapter *adapter) adapter->pool_res = res; - if (!be_max_vfs(adapter)) { - if (num_vfs) - dev_warn(dev, "SRIOV is disabled. Ignoring num_vfs\n"); - adapter->num_vfs = 0; - return 0; - } - - pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter)); - - /* validate num_vfs module param */ + /* If during previous unload of the driver, the VFs were not disabled, + * then we cannot rely on the PF POOL limits for the TotalVFs value. + * Instead use the TotalVFs value stored in the pci-dev struct. + */ old_vfs = pci_num_vf(adapter->pdev); if (old_vfs) { - dev_info(dev, "%d VFs are already enabled\n", old_vfs); - if (old_vfs != num_vfs) - dev_warn(dev, "Ignoring num_vfs=%d setting\n", num_vfs); + dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n", + old_vfs); + + adapter->pool_res.max_vfs = + pci_sriov_get_totalvfs(adapter->pdev); adapter->num_vfs = old_vfs; - } else { - if (num_vfs > be_max_vfs(adapter)) { - dev_info(dev, "Resources unavailable to init %d VFs\n", - num_vfs); - dev_info(dev, "Limiting to %d VFs\n", - be_max_vfs(adapter)); - } - adapter->num_vfs = min_t(u16, num_vfs, be_max_vfs(adapter)); } return 0; } +static void be_alloc_sriov_res(struct be_adapter *adapter) +{ + int old_vfs = pci_num_vf(adapter->pdev); + u16 num_vf_qs; + int status; + + be_get_sriov_config(adapter); + + if (!old_vfs) + pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter)); + + /* When the HW is in SRIOV capable configuration, the PF-pool + * resources are given to PF during driver load, if there are no + * old VFs. This facility is not available in BE3 FW. + * Also, this is done by FW in Lancer chip. + */ + if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) { + num_vf_qs = be_calculate_vf_qs(adapter, 0); + status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0, + num_vf_qs); + if (status) + dev_err(&adapter->pdev->dev, + "Failed to optimize SRIOV resources\n"); + } +} + static int be_get_resources(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; @@ -3687,12 +3859,23 @@ static int be_get_resources(struct be_adapter *adapter) if (status) return status; + /* If a deafault RXQ must be created, we'll use up one RSSQ*/ + if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs && + !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)) + res.max_rss_qs -= 1; + /* If RoCE may be enabled stash away half the EQs for RoCE */ if (be_roce_supported(adapter)) res.max_evt_qs /= 2; adapter->res = res; } + /* If FW supports RSS default queue, then skip creating non-RSS + * queue for non-IP traffic. + */ + adapter->need_def_rxq = (be_if_cap_flags(adapter) & + BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1; + dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n", be_max_txqs(adapter), be_max_rxqs(adapter), be_max_rss(adapter), be_max_eqs(adapter), @@ -3701,47 +3884,33 @@ static int be_get_resources(struct be_adapter *adapter) be_max_uc(adapter), be_max_mc(adapter), be_max_vlans(adapter)); + /* Sanitize cfg_num_qs based on HW and platform limits */ + adapter->cfg_num_qs = min_t(u16, netif_get_num_default_rss_queues(), + be_max_qs(adapter)); return 0; } -static void be_sriov_config(struct be_adapter *adapter) -{ - struct device *dev = &adapter->pdev->dev; - int status; - - status = be_get_sriov_config(adapter); - if (status) { - dev_err(dev, "Failed to query SR-IOV configuration\n"); - dev_err(dev, "SR-IOV cannot be enabled\n"); - return; - } - - /* When the HW is in SRIOV capable configuration, the PF-pool - * resources are equally distributed across the max-number of - * VFs. The user may request only a subset of the max-vfs to be - * enabled. Based on num_vfs, redistribute the resources across - * num_vfs so that each VF will have access to more number of - * resources. This facility is not available in BE3 FW. - * Also, this is done by FW in Lancer chip. - */ - if (be_max_vfs(adapter) && !pci_num_vf(adapter->pdev)) { - status = be_cmd_set_sriov_config(adapter, - adapter->pool_res, - adapter->num_vfs); - if (status) - dev_err(dev, "Failed to optimize SR-IOV resources\n"); - } -} - static int be_get_config(struct be_adapter *adapter) { + int status, level; u16 profile_id; - int status; + + status = be_cmd_get_cntl_attributes(adapter); + if (status) + return status; status = be_cmd_query_fw_cfg(adapter); if (status) return status; + if (BEx_chip(adapter)) { + level = be_cmd_get_fw_log_level(adapter); + adapter->msg_enable = + level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0; + } + + be_cmd_get_acpi_wol_cap(adapter); + be_cmd_query_port_name(adapter); if (be_physfn(adapter)) { @@ -3751,9 +3920,6 @@ static int be_get_config(struct be_adapter *adapter) "Using profile 0x%x\n", profile_id); } - if (!BE2_chip(adapter) && be_physfn(adapter)) - be_sriov_config(adapter); - status = be_get_resources(adapter); if (status) return status; @@ -3763,9 +3929,6 @@ static int be_get_config(struct be_adapter *adapter) if (!adapter->pmac_id) return -ENOMEM; - /* Sanitize cfg_num_qs based on HW and platform limits */ - adapter->cfg_num_qs = min(adapter->cfg_num_qs, be_max_qs(adapter)); - return 0; } @@ -3799,6 +3962,13 @@ static void be_schedule_worker(struct be_adapter *adapter) adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } +static void be_schedule_err_detection(struct be_adapter *adapter) +{ + schedule_delayed_work(&adapter->be_err_detection_work, + msecs_to_jiffies(1000)); + adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED; +} + static int be_setup_queues(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; @@ -3881,16 +4051,61 @@ static inline int fw_major_num(const char *fw_ver) return fw_major; } +/* If any VFs are already enabled don't FLR the PF */ +static bool be_reset_required(struct be_adapter *adapter) +{ + return pci_num_vf(adapter->pdev) ? false : true; +} + +/* Wait for the FW to be ready and perform the required initialization */ +static int be_func_init(struct be_adapter *adapter) +{ + int status; + + status = be_fw_wait_ready(adapter); + if (status) + return status; + + if (be_reset_required(adapter)) { + status = be_cmd_reset_function(adapter); + if (status) + return status; + + /* Wait for interrupts to quiesce after an FLR */ + msleep(100); + + /* We can clear all errors when function reset succeeds */ + be_clear_all_error(adapter); + } + + /* Tell FW we're ready to fire cmds */ + status = be_cmd_fw_init(adapter); + if (status) + return status; + + /* Allow interrupts for other ULPs running on NIC function */ + be_intr_set(adapter, true); + + return 0; +} + static int be_setup(struct be_adapter *adapter) { struct device *dev = &adapter->pdev->dev; int status; + status = be_func_init(adapter); + if (status) + return status; + be_setup_init(adapter); if (!lancer_chip(adapter)) be_cmd_req_native_mode(adapter); + if (!BE2_chip(adapter) && be_physfn(adapter)) + be_alloc_sriov_res(adapter); + status = be_get_config(adapter); if (status) goto err; @@ -3931,8 +4146,6 @@ static int be_setup(struct be_adapter *adapter) be_set_rx_mode(adapter->netdev); - be_cmd_get_acpi_wol_cap(adapter); - status = be_cmd_set_flow_control(adapter, adapter->tx_fc, adapter->rx_fc); if (status) @@ -4842,6 +5055,142 @@ static void be_netdev_init(struct net_device *netdev) netdev->ethtool_ops = &be_ethtool_ops; } +static void be_cleanup(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + rtnl_lock(); + netif_device_detach(netdev); + if (netif_running(netdev)) + be_close(netdev); + rtnl_unlock(); + + be_clear(adapter); +} + +static int be_resume(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int status; + + status = be_setup(adapter); + if (status) + return status; + + if (netif_running(netdev)) { + status = be_open(netdev); + if (status) + return status; + } + + netif_device_attach(netdev); + + return 0; +} + +static int be_err_recover(struct be_adapter *adapter) +{ + struct device *dev = &adapter->pdev->dev; + int status; + + status = be_resume(adapter); + if (status) + goto err; + + dev_info(dev, "Adapter recovery successful\n"); + return 0; +err: + if (be_physfn(adapter)) + dev_err(dev, "Adapter recovery failed\n"); + else + dev_err(dev, "Re-trying adapter recovery\n"); + + return status; +} + +static void be_err_detection_task(struct work_struct *work) +{ + struct be_adapter *adapter = + container_of(work, struct be_adapter, + be_err_detection_work.work); + int status = 0; + + be_detect_error(adapter); + + if (adapter->hw_error) { + be_cleanup(adapter); + + /* As of now error recovery support is in Lancer only */ + if (lancer_chip(adapter)) + status = be_err_recover(adapter); + } + + /* Always attempt recovery on VFs */ + if (!status || be_virtfn(adapter)) + be_schedule_err_detection(adapter); +} + +static void be_log_sfp_info(struct be_adapter *adapter) +{ + int status; + + status = be_cmd_query_sfp_info(adapter); + if (!status) { + dev_err(&adapter->pdev->dev, + "Unqualified SFP+ detected on %c from %s part no: %s", + adapter->port_name, adapter->phy.vendor_name, + adapter->phy.vendor_pn); + } + adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP; +} + +static void be_worker(struct work_struct *work) +{ + struct be_adapter *adapter = + container_of(work, struct be_adapter, work.work); + struct be_rx_obj *rxo; + int i; + + /* when interrupts are not yet enabled, just reap any pending + * mcc completions + */ + if (!netif_running(adapter->netdev)) { + local_bh_disable(); + be_process_mcc(adapter); + local_bh_enable(); + goto reschedule; + } + + if (!adapter->stats_cmd_sent) { + if (lancer_chip(adapter)) + lancer_cmd_get_pport_stats(adapter, + &adapter->stats_cmd); + else + be_cmd_get_stats(adapter, &adapter->stats_cmd); + } + + if (be_physfn(adapter) && + MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0) + be_cmd_get_die_temperature(adapter); + + for_all_rx_queues(adapter, rxo, i) { + /* Replenish RX-queues starved due to memory + * allocation failures. + */ + if (rxo->rx_post_starved) + be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); + } + + be_eqd_update(adapter); + + if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) + be_log_sfp_info(adapter); + +reschedule: + adapter->work_counter++; + schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); +} + static void be_unmap_pci_bars(struct be_adapter *adapter) { if (adapter->csr) @@ -4874,6 +5223,12 @@ static int be_map_pci_bars(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; u8 __iomem *addr; + u32 sli_intf; + + pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); + adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >> + SLI_INTF_FAMILY_SHIFT; + adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; if (BEx_chip(adapter) && be_physfn(adapter)) { adapter->csr = pci_iomap(pdev, 2, 0); @@ -4907,109 +5262,93 @@ pci_map_err: return -ENOMEM; } -static void be_ctrl_cleanup(struct be_adapter *adapter) +static void be_drv_cleanup(struct be_adapter *adapter) { struct be_dma_mem *mem = &adapter->mbox_mem_alloced; - - be_unmap_pci_bars(adapter); + struct device *dev = &adapter->pdev->dev; if (mem->va) - dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, - mem->dma); + dma_free_coherent(dev, mem->size, mem->va, mem->dma); mem = &adapter->rx_filter; if (mem->va) - dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va, - mem->dma); + dma_free_coherent(dev, mem->size, mem->va, mem->dma); + + mem = &adapter->stats_cmd; + if (mem->va) + dma_free_coherent(dev, mem->size, mem->va, mem->dma); } -static int be_ctrl_init(struct be_adapter *adapter) +/* Allocate and initialize various fields in be_adapter struct */ +static int be_drv_init(struct be_adapter *adapter) { struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced; struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem; struct be_dma_mem *rx_filter = &adapter->rx_filter; - u32 sli_intf; - int status; - - pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); - adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >> - SLI_INTF_FAMILY_SHIFT; - adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; - - status = be_map_pci_bars(adapter); - if (status) - goto done; + struct be_dma_mem *stats_cmd = &adapter->stats_cmd; + struct device *dev = &adapter->pdev->dev; + int status = 0; mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16; - mbox_mem_alloc->va = dma_alloc_coherent(&adapter->pdev->dev, - mbox_mem_alloc->size, + mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size, &mbox_mem_alloc->dma, GFP_KERNEL); - if (!mbox_mem_alloc->va) { - status = -ENOMEM; - goto unmap_pci_bars; - } + if (!mbox_mem_alloc->va) + return -ENOMEM; + mbox_mem_align->size = sizeof(struct be_mcc_mailbox); mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16); mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16); memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox)); rx_filter->size = sizeof(struct be_cmd_req_rx_filter); - rx_filter->va = dma_zalloc_coherent(&adapter->pdev->dev, - rx_filter->size, &rx_filter->dma, - GFP_KERNEL); + rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size, + &rx_filter->dma, GFP_KERNEL); if (!rx_filter->va) { status = -ENOMEM; goto free_mbox; } + if (lancer_chip(adapter)) + stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats); + else if (BE2_chip(adapter)) + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0); + else if (BE3_chip(adapter)) + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1); + else + stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2); + stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size, + &stats_cmd->dma, GFP_KERNEL); + if (!stats_cmd->va) { + status = -ENOMEM; + goto free_rx_filter; + } + mutex_init(&adapter->mbox_lock); spin_lock_init(&adapter->mcc_lock); spin_lock_init(&adapter->mcc_cq_lock); - init_completion(&adapter->et_cmd_compl); - pci_save_state(adapter->pdev); - return 0; -free_mbox: - dma_free_coherent(&adapter->pdev->dev, mbox_mem_alloc->size, - mbox_mem_alloc->va, mbox_mem_alloc->dma); - -unmap_pci_bars: - be_unmap_pci_bars(adapter); - -done: - return status; -} - -static void be_stats_cleanup(struct be_adapter *adapter) -{ - struct be_dma_mem *cmd = &adapter->stats_cmd; + pci_save_state(adapter->pdev); - if (cmd->va) - dma_free_coherent(&adapter->pdev->dev, cmd->size, - cmd->va, cmd->dma); -} + INIT_DELAYED_WORK(&adapter->work, be_worker); + INIT_DELAYED_WORK(&adapter->be_err_detection_work, + be_err_detection_task); -static int be_stats_init(struct be_adapter *adapter) -{ - struct be_dma_mem *cmd = &adapter->stats_cmd; + adapter->rx_fc = true; + adapter->tx_fc = true; - if (lancer_chip(adapter)) - cmd->size = sizeof(struct lancer_cmd_req_pport_stats); - else if (BE2_chip(adapter)) - cmd->size = sizeof(struct be_cmd_req_get_stats_v0); - else if (BE3_chip(adapter)) - cmd->size = sizeof(struct be_cmd_req_get_stats_v1); - else - /* ALL non-BE ASICs */ - cmd->size = sizeof(struct be_cmd_req_get_stats_v2); + /* Must be a power of 2 or else MODULO will BUG_ON */ + adapter->be_get_temp_freq = 64; - cmd->va = dma_zalloc_coherent(&adapter->pdev->dev, cmd->size, &cmd->dma, - GFP_KERNEL); - if (!cmd->va) - return -ENOMEM; return 0; + +free_rx_filter: + dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma); +free_mbox: + dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va, + mbox_mem_alloc->dma); + return status; } static void be_remove(struct pci_dev *pdev) @@ -5022,7 +5361,7 @@ static void be_remove(struct pci_dev *pdev) be_roce_dev_remove(adapter); be_intr_set(adapter, false); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); unregister_netdev(adapter->netdev); @@ -5031,9 +5370,8 @@ static void be_remove(struct pci_dev *pdev) /* tell fw we're done with firing cmds */ be_cmd_fw_clean(adapter); - be_stats_cleanup(adapter); - - be_ctrl_cleanup(adapter); + be_unmap_pci_bars(adapter); + be_drv_cleanup(adapter); pci_disable_pcie_error_reporting(pdev); @@ -5043,156 +5381,6 @@ static void be_remove(struct pci_dev *pdev) free_netdev(adapter->netdev); } -static int be_get_initial_config(struct be_adapter *adapter) -{ - int status, level; - - status = be_cmd_get_cntl_attributes(adapter); - if (status) - return status; - - /* Must be a power of 2 or else MODULO will BUG_ON */ - adapter->be_get_temp_freq = 64; - - if (BEx_chip(adapter)) { - level = be_cmd_get_fw_log_level(adapter); - adapter->msg_enable = - level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0; - } - - adapter->cfg_num_qs = netif_get_num_default_rss_queues(); - return 0; -} - -static int lancer_recover_func(struct be_adapter *adapter) -{ - struct device *dev = &adapter->pdev->dev; - int status; - - status = lancer_test_and_set_rdy_state(adapter); - if (status) - goto err; - - if (netif_running(adapter->netdev)) - be_close(adapter->netdev); - - be_clear(adapter); - - be_clear_all_error(adapter); - - status = be_setup(adapter); - if (status) - goto err; - - if (netif_running(adapter->netdev)) { - status = be_open(adapter->netdev); - if (status) - goto err; - } - - dev_err(dev, "Adapter recovery successful\n"); - return 0; -err: - if (status == -EAGAIN) - dev_err(dev, "Waiting for resource provisioning\n"); - else - dev_err(dev, "Adapter recovery failed\n"); - - return status; -} - -static void be_func_recovery_task(struct work_struct *work) -{ - struct be_adapter *adapter = - container_of(work, struct be_adapter, func_recovery_work.work); - int status = 0; - - be_detect_error(adapter); - - if (adapter->hw_error && lancer_chip(adapter)) { - rtnl_lock(); - netif_device_detach(adapter->netdev); - rtnl_unlock(); - - status = lancer_recover_func(adapter); - if (!status) - netif_device_attach(adapter->netdev); - } - - /* In Lancer, for all errors other than provisioning error (-EAGAIN), - * no need to attempt further recovery. - */ - if (!status || status == -EAGAIN) - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); -} - -static void be_log_sfp_info(struct be_adapter *adapter) -{ - int status; - - status = be_cmd_query_sfp_info(adapter); - if (!status) { - dev_err(&adapter->pdev->dev, - "Unqualified SFP+ detected on %c from %s part no: %s", - adapter->port_name, adapter->phy.vendor_name, - adapter->phy.vendor_pn); - } - adapter->flags &= ~BE_FLAGS_EVT_INCOMPATIBLE_SFP; -} - -static void be_worker(struct work_struct *work) -{ - struct be_adapter *adapter = - container_of(work, struct be_adapter, work.work); - struct be_rx_obj *rxo; - int i; - - /* when interrupts are not yet enabled, just reap any pending - * mcc completions */ - if (!netif_running(adapter->netdev)) { - local_bh_disable(); - be_process_mcc(adapter); - local_bh_enable(); - goto reschedule; - } - - if (!adapter->stats_cmd_sent) { - if (lancer_chip(adapter)) - lancer_cmd_get_pport_stats(adapter, - &adapter->stats_cmd); - else - be_cmd_get_stats(adapter, &adapter->stats_cmd); - } - - if (be_physfn(adapter) && - MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0) - be_cmd_get_die_temperature(adapter); - - for_all_rx_queues(adapter, rxo, i) { - /* Replenish RX-queues starved due to memory - * allocation failures. - */ - if (rxo->rx_post_starved) - be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST); - } - - be_eqd_update(adapter); - - if (adapter->flags & BE_FLAGS_EVT_INCOMPATIBLE_SFP) - be_log_sfp_info(adapter); - -reschedule: - adapter->work_counter++; - schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); -} - -/* If any VFs are already enabled don't FLR the PF */ -static bool be_reset_required(struct be_adapter *adapter) -{ - return pci_num_vf(adapter->pdev) ? false : true; -} - static char *mc_name(struct be_adapter *adapter) { char *str = ""; /* default */ @@ -5291,50 +5479,17 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) if (!status) dev_info(&pdev->dev, "PCIe error reporting enabled\n"); - status = be_ctrl_init(adapter); + status = be_map_pci_bars(adapter); if (status) goto free_netdev; - /* sync up with fw's ready state */ - if (be_physfn(adapter)) { - status = be_fw_wait_ready(adapter); - if (status) - goto ctrl_clean; - } - - if (be_reset_required(adapter)) { - status = be_cmd_reset_function(adapter); - if (status) - goto ctrl_clean; - - /* Wait for interrupts to quiesce after an FLR */ - msleep(100); - } - - /* Allow interrupts for other ULPs running on NIC function */ - be_intr_set(adapter, true); - - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); - if (status) - goto ctrl_clean; - - status = be_stats_init(adapter); - if (status) - goto ctrl_clean; - - status = be_get_initial_config(adapter); + status = be_drv_init(adapter); if (status) - goto stats_clean; - - INIT_DELAYED_WORK(&adapter->work, be_worker); - INIT_DELAYED_WORK(&adapter->func_recovery_work, be_func_recovery_task); - adapter->rx_fc = true; - adapter->tx_fc = true; + goto unmap_bars; status = be_setup(adapter); if (status) - goto stats_clean; + goto drv_cleanup; be_netdev_init(netdev); status = register_netdev(netdev); @@ -5343,8 +5498,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_roce_dev_add(adapter); - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); + be_schedule_err_detection(adapter); dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev), func_name(adapter), mc_name(adapter), adapter->port_name); @@ -5353,10 +5507,10 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) unsetup: be_clear(adapter); -stats_clean: - be_stats_cleanup(adapter); -ctrl_clean: - be_ctrl_cleanup(adapter); +drv_cleanup: + be_drv_cleanup(adapter); +unmap_bars: + be_unmap_pci_bars(adapter); free_netdev: free_netdev(netdev); rel_reg: @@ -5371,21 +5525,14 @@ do_none: static int be_suspend(struct pci_dev *pdev, pm_message_t state) { struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; if (adapter->wol_en) be_setup_wol(adapter, true); be_intr_set(adapter, false); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); - netif_device_detach(netdev); - if (netif_running(netdev)) { - rtnl_lock(); - be_close(netdev); - rtnl_unlock(); - } - be_clear(adapter); + be_cleanup(adapter); pci_save_state(pdev); pci_disable_device(pdev); @@ -5393,13 +5540,10 @@ static int be_suspend(struct pci_dev *pdev, pm_message_t state) return 0; } -static int be_resume(struct pci_dev *pdev) +static int be_pci_resume(struct pci_dev *pdev) { - int status = 0; struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; - - netif_device_detach(netdev); + int status = 0; status = pci_enable_device(pdev); if (status) @@ -5408,30 +5552,11 @@ static int be_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); - status = be_fw_wait_ready(adapter); - if (status) - return status; - - status = be_cmd_reset_function(adapter); - if (status) - return status; - - be_intr_set(adapter, true); - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); + status = be_resume(adapter); if (status) return status; - be_setup(adapter); - if (netif_running(netdev)) { - rtnl_lock(); - be_open(netdev); - rtnl_unlock(); - } - - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); - netif_device_attach(netdev); + be_schedule_err_detection(adapter); if (adapter->wol_en) be_setup_wol(adapter, false); @@ -5451,7 +5576,7 @@ static void be_shutdown(struct pci_dev *pdev) be_roce_dev_shutdown(adapter); cancel_delayed_work_sync(&adapter->work); - cancel_delayed_work_sync(&adapter->func_recovery_work); + be_cancel_err_detection(adapter); netif_device_detach(adapter->netdev); @@ -5464,22 +5589,15 @@ static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; dev_err(&adapter->pdev->dev, "EEH error detected\n"); if (!adapter->eeh_error) { adapter->eeh_error = true; - cancel_delayed_work_sync(&adapter->func_recovery_work); - - rtnl_lock(); - netif_device_detach(netdev); - if (netif_running(netdev)) - be_close(netdev); - rtnl_unlock(); + be_cancel_err_detection(adapter); - be_clear(adapter); + be_cleanup(adapter); } if (state == pci_channel_io_perm_failure) @@ -5530,43 +5648,73 @@ static void be_eeh_resume(struct pci_dev *pdev) { int status = 0; struct be_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; dev_info(&adapter->pdev->dev, "EEH resume\n"); pci_save_state(pdev); - status = be_cmd_reset_function(adapter); + status = be_resume(adapter); if (status) goto err; - /* On some BE3 FW versions, after a HW reset, - * interrupts will remain disabled for each function. - * So, explicitly enable interrupts + be_schedule_err_detection(adapter); + return; +err: + dev_err(&adapter->pdev->dev, "EEH resume failed\n"); +} + +static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + struct be_adapter *adapter = pci_get_drvdata(pdev); + u16 num_vf_qs; + int status; + + if (!num_vfs) + be_vf_clear(adapter); + + adapter->num_vfs = num_vfs; + + if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) { + dev_warn(&pdev->dev, + "Cannot disable VFs while they are assigned\n"); + return -EBUSY; + } + + /* When the HW is in SRIOV capable configuration, the PF-pool resources + * are equally distributed across the max-number of VFs. The user may + * request only a subset of the max-vfs to be enabled. + * Based on num_vfs, redistribute the resources across num_vfs so that + * each VF will have access to more number of resources. + * This facility is not available in BE3 FW. + * Also, this is done by FW in Lancer chip. */ - be_intr_set(adapter, true); + if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) { + num_vf_qs = be_calculate_vf_qs(adapter, adapter->num_vfs); + status = be_cmd_set_sriov_config(adapter, adapter->pool_res, + adapter->num_vfs, num_vf_qs); + if (status) + dev_err(&pdev->dev, + "Failed to optimize SR-IOV resources\n"); + } - /* tell fw we're ready to fire cmds */ - status = be_cmd_fw_init(adapter); + status = be_get_resources(adapter); if (status) - goto err; + return be_cmd_status(status); - status = be_setup(adapter); + /* Updating real_num_tx/rx_queues() requires rtnl_lock() */ + rtnl_lock(); + status = be_update_queues(adapter); + rtnl_unlock(); if (status) - goto err; + return be_cmd_status(status); - if (netif_running(netdev)) { - status = be_open(netdev); - if (status) - goto err; - } + if (adapter->num_vfs) + status = be_vf_setup(adapter); - schedule_delayed_work(&adapter->func_recovery_work, - msecs_to_jiffies(1000)); - netif_device_attach(netdev); - return; -err: - dev_err(&adapter->pdev->dev, "EEH resume failed\n"); + if (!status) + return adapter->num_vfs; + + return 0; } static const struct pci_error_handlers be_eeh_handlers = { @@ -5581,8 +5729,9 @@ static struct pci_driver be_driver = { .probe = be_probe, .remove = be_remove, .suspend = be_suspend, - .resume = be_resume, + .resume = be_pci_resume, .shutdown = be_shutdown, + .sriov_configure = be_pci_sriov_configure, .err_handler = &be_eeh_handlers }; @@ -5596,6 +5745,11 @@ static int __init be_init_module(void) rx_frag_size = 2048; } + if (num_vfs > 0) { + pr_info(DRV_NAME " : Module param num_vfs is obsolete."); + pr_info(DRV_NAME " : Use sysfs method to enable VFs\n"); + } + return pci_register_driver(&be_driver); } module_init(be_init_module); |