diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice')
34 files changed, 2381 insertions, 891 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 001500afc4a6..2f0b604abc5e 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -137,6 +137,21 @@ */ #define ICE_BW_KBPS_DIVISOR 125 +/* Default recipes have priority 4 and below, hence priority values between 5..7 + * can be used as filter priority for advanced switch filter (advanced switch + * filters need new recipe to be created for specified extraction sequence + * because default recipe extraction sequence does not represent custom + * extraction) + */ +#define ICE_SWITCH_FLTR_PRIO_QUEUE 7 +/* prio 6 is reserved for future use (e.g. switch filter with L3 fields + + * (Optional: IP TOS/TTL) + L4 fields + (optionally: TCP fields such as + * SYN/FIN/RST)) + */ +#define ICE_SWITCH_FLTR_PRIO_RSVD 6 +#define ICE_SWITCH_FLTR_PRIO_VSI 5 +#define ICE_SWITCH_FLTR_PRIO_QGRP ICE_SWITCH_FLTR_PRIO_VSI + /* Macro for each VSI in a PF */ #define ice_for_each_vsi(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) @@ -305,6 +320,11 @@ enum ice_vsi_state { ICE_VSI_STATE_NBITS /* must be last */ }; +struct ice_vsi_stats { + struct ice_ring_stats **tx_ring_stats; /* Tx ring stats array */ + struct ice_ring_stats **rx_ring_stats; /* Rx ring stats array */ +}; + /* struct that defines a VSI, associated with a dev */ struct ice_vsi { struct net_device *netdev; @@ -358,6 +378,7 @@ struct ice_vsi { /* VSI stats */ struct rtnl_link_stats64 net_stats; + struct rtnl_link_stats64 net_stats_prev; struct ice_eth_stats eth_stats; struct ice_eth_stats eth_stats_prev; @@ -525,6 +546,7 @@ struct ice_pf { u16 ctrl_vsi_idx; /* control VSI index in pf->vsi array */ struct ice_vsi **vsi; /* VSIs created by the driver */ + struct ice_vsi_stats **vsi_stats; struct ice_sw *first_sw; /* first switch created by firmware */ u16 eswitch_mode; /* current mode of eswitch */ struct ice_vfs vfs; @@ -594,6 +616,8 @@ struct ice_pf { u16 num_dmac_chnl_fltrs; struct hlist_head tc_flower_fltr_list; + u64 supported_rxdids; + __le64 nvm_phy_type_lo; /* NVM PHY type low */ __le64 nvm_phy_type_hi; /* NVM PHY type high */ struct ice_link_default_override_tlv link_dflt_override; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1bdc70aa979d..958c1e435232 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -848,9 +848,9 @@ struct ice_aqc_txsched_elem { u8 generic; #define ICE_AQC_ELEM_GENERIC_MODE_M 0x1 #define ICE_AQC_ELEM_GENERIC_PRIO_S 0x1 -#define ICE_AQC_ELEM_GENERIC_PRIO_M (0x7 << ICE_AQC_ELEM_GENERIC_PRIO_S) +#define ICE_AQC_ELEM_GENERIC_PRIO_M GENMASK(3, 1) #define ICE_AQC_ELEM_GENERIC_SP_S 0x4 -#define ICE_AQC_ELEM_GENERIC_SP_M (0x1 << ICE_AQC_ELEM_GENERIC_SP_S) +#define ICE_AQC_ELEM_GENERIC_SP_M GENMASK(4, 4) #define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S 0x5 #define ICE_AQC_ELEM_GENERIC_ADJUST_VAL_M \ (0x3 << ICE_AQC_ELEM_GENERIC_ADJUST_VAL_S) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index e864634d66bc..554095b25f44 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -389,7 +389,7 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) * Indicates the starting address of the descriptor queue defined in * 128 Byte units. */ - rlan_ctx.base = ring->dma >> 7; + rlan_ctx.base = ring->dma >> ICE_RLAN_BASE_S; rlan_ctx.qlen = ring->count; diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 039342a0ed15..d02b55b6aa9c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1105,6 +1105,9 @@ int ice_init_hw(struct ice_hw *hw) hw->evb_veb = true; + /* init xarray for identifying scheduling nodes uniquely */ + xa_init_flags(&hw->port_info->sched_node_ids, XA_FLAGS_ALLOC); + /* Query the allocated resources for Tx scheduler */ status = ice_sched_query_res_alloc(hw); if (status) { @@ -2945,8 +2948,8 @@ bool ice_is_100m_speed_supported(struct ice_hw *hw) * Note: In the structure of [phy_type_low, phy_type_high], there should * be one bit set, as this function will convert one PHY type to its * speed. - * If no bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned - * If more than one bit gets set, ICE_LINK_SPEED_UNKNOWN will be returned + * If no bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned + * If more than one bit gets set, ICE_AQ_LINK_SPEED_UNKNOWN will be returned */ static u16 ice_get_link_speed_based_on_phy_type(u64 phy_type_low, u64 phy_type_high) @@ -4600,7 +4603,7 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, q_ctx->q_teid = le32_to_cpu(node.node_teid); /* add a leaf node into scheduler tree queue layer */ - status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node); + status = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, &node, NULL); if (!status) status = ice_sched_replay_q_bw(pi, q_ctx); @@ -4835,7 +4838,7 @@ ice_ena_vsi_rdma_qset(struct ice_port_info *pi, u16 vsi_handle, u8 tc, for (i = 0; i < num_qsets; i++) { node.node_teid = buf->rdma_qsets[i].qset_teid; ret = ice_sched_add_node(pi, hw->num_tx_sched_layers - 1, - &node); + &node, NULL); if (ret) break; qset_teid[i] = le32_to_cpu(node.node_teid); @@ -5512,3 +5515,40 @@ bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw) ICE_FW_API_REPORT_DFLT_CFG_MIN, ICE_FW_API_REPORT_DFLT_CFG_PATCH); } + +/* each of the indexes into the following array match the speed of a return + * value from the list of AQ returned speeds like the range: + * ICE_AQ_LINK_SPEED_10MB .. ICE_AQ_LINK_SPEED_100GB excluding + * ICE_AQ_LINK_SPEED_UNKNOWN which is BIT(15) and maps to BIT(14) in this + * array. The array is defined as 15 elements long because the link_speed + * returned by the firmware is a 16 bit * value, but is indexed + * by [fls(speed) - 1] + */ +static const u32 ice_aq_to_link_speed[15] = { + SPEED_10, /* BIT(0) */ + SPEED_100, + SPEED_1000, + SPEED_2500, + SPEED_5000, + SPEED_10000, + SPEED_20000, + SPEED_25000, + SPEED_40000, + SPEED_50000, + SPEED_100000, /* BIT(10) */ + 0, + 0, + 0, + 0 /* BIT(14) */ +}; + +/** + * ice_get_link_speed - get integer speed from table + * @index: array index from fls(aq speed) - 1 + * + * Returns: u32 value containing integer speed + */ +u32 ice_get_link_speed(u16 index) +{ + return ice_aq_to_link_speed[index]; +} diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 8b6712b92e84..4c6a0b5c9304 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -163,6 +163,7 @@ int ice_aq_sff_eeprom(struct ice_hw *hw, u16 lport, u8 bus_addr, u16 mem_addr, u8 page, u8 set_page, u8 *data, u8 length, bool write, struct ice_sq_cd *cd); +u32 ice_get_link_speed(u16 index); int ice_cfg_vsi_rdma(struct ice_port_info *pi, u16 vsi_handle, u16 tc_bitmap, diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 0b146a0d4205..6be02f9b0b8c 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -1580,7 +1580,7 @@ ice_update_port_tc_tree_cfg(struct ice_port_info *pi, /* new TC */ status = ice_sched_query_elem(pi->hw, teid2, &elem); if (!status) - status = ice_sched_add_node(pi, 1, &elem); + status = ice_sched_add_node(pi, 1, &elem, NULL); if (status) break; /* update the TC number */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index add90e75f05c..4f24d441c35e 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -3,6 +3,7 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" +#include "ice_devlink.h" /** * ice_dcb_get_ena_tc - return bitmap of enabled TCs @@ -364,6 +365,12 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) /* Enable DCB tagging only when more than one TC */ if (ice_dcb_get_num_tc(new_cfg) > 1) { dev_dbg(dev, "DCB tagging enabled (num TC > 1)\n"); + if (pf->hw.port_info->is_custom_tx_enabled) { + dev_err(dev, "Custom Tx scheduler feature enabled, can't configure DCB\n"); + return -EBUSY; + } + ice_tear_down_devlink_rate_tree(pf); + set_bit(ICE_FLAG_DCB_ENA, pf->flags); } else { dev_dbg(dev, "DCB tagging disabled (num TC = 1)\n"); @@ -874,6 +881,9 @@ void ice_update_dcb_stats(struct ice_pf *pf) prev_ps = &pf->stats_prev; cur_ps = &pf->stats; + if (ice_is_reset_in_progress(pf->state)) + pf->stat_prev_loaded = false; + for (i = 0; i < 8; i++) { ice_stat_update32(hw, GLPRT_PXOFFRXC(port, i), pf->stat_prev_loaded, diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index e6ec20079ced..8286e47b4bae 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -8,6 +8,7 @@ #include "ice_devlink.h" #include "ice_eswitch.h" #include "ice_fw_update.h" +#include "ice_dcb_lib.h" static int ice_active_port_option = -1; @@ -310,12 +311,6 @@ static int ice_devlink_info_get(struct devlink *devlink, } } - err = devlink_info_driver_name_put(req, KBUILD_MODNAME); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to set driver name"); - goto out_free_ctx; - } - ice_info_get_dsn(pf, ctx); err = devlink_info_serial_number_put(req, ctx->buf); @@ -713,6 +708,490 @@ ice_devlink_port_unsplit(struct devlink *devlink, struct devlink_port *port, return ice_devlink_port_split(devlink, port, 1, extack); } +/** + * ice_tear_down_devlink_rate_tree - removes devlink-rate exported tree + * @pf: pf struct + * + * This function tears down tree exported during VF's creation. + */ +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf) +{ + struct devlink *devlink; + struct ice_vf *vf; + unsigned int bkt; + + devlink = priv_to_devlink(pf); + + devl_lock(devlink); + mutex_lock(&pf->vfs.table_lock); + ice_for_each_vf(pf, bkt, vf) { + if (vf->devlink_port.devlink_rate) + devl_rate_leaf_destroy(&vf->devlink_port); + } + mutex_unlock(&pf->vfs.table_lock); + + devl_rate_nodes_destroy(devlink); + devl_unlock(devlink); +} + +/** + * ice_enable_custom_tx - try to enable custom Tx feature + * @pf: pf struct + * + * This function tries to enable custom Tx feature, + * it's not possible to enable it, if DCB or ADQ is active. + */ +static bool ice_enable_custom_tx(struct ice_pf *pf) +{ + struct ice_port_info *pi = ice_get_main_vsi(pf)->port_info; + struct device *dev = ice_pf_to_dev(pf); + + if (pi->is_custom_tx_enabled) + /* already enabled, return true */ + return true; + + if (ice_is_adq_active(pf)) { + dev_err(dev, "ADQ active, can't modify Tx scheduler tree\n"); + return false; + } + + if (ice_is_dcb_active(pf)) { + dev_err(dev, "DCB active, can't modify Tx scheduler tree\n"); + return false; + } + + pi->is_custom_tx_enabled = true; + + return true; +} + +/** + * ice_traverse_tx_tree - traverse Tx scheduler tree + * @devlink: devlink struct + * @node: current node, used for recursion + * @tc_node: tc_node struct, that is treated as a root + * @pf: pf struct + * + * This function traverses Tx scheduler tree and exports + * entire structure to the devlink-rate. + */ +static void ice_traverse_tx_tree(struct devlink *devlink, struct ice_sched_node *node, + struct ice_sched_node *tc_node, struct ice_pf *pf) +{ + struct devlink_rate *rate_node = NULL; + struct ice_vf *vf; + int i; + + if (node->parent == tc_node) { + /* create root node */ + rate_node = devl_rate_node_create(devlink, node, node->name, NULL); + } else if (node->vsi_handle && + pf->vsi[node->vsi_handle]->vf) { + vf = pf->vsi[node->vsi_handle]->vf; + if (!vf->devlink_port.devlink_rate) + /* leaf nodes doesn't have children + * so we don't set rate_node + */ + devl_rate_leaf_create(&vf->devlink_port, node, + node->parent->rate_node); + } else if (node->info.data.elem_type != ICE_AQC_ELEM_TYPE_LEAF && + node->parent->rate_node) { + rate_node = devl_rate_node_create(devlink, node, node->name, + node->parent->rate_node); + } + + if (rate_node && !IS_ERR(rate_node)) + node->rate_node = rate_node; + + for (i = 0; i < node->num_children; i++) + ice_traverse_tx_tree(devlink, node->children[i], tc_node, pf); +} + +/** + * ice_devlink_rate_init_tx_topology - export Tx scheduler tree to devlink rate + * @devlink: devlink struct + * @vsi: main vsi struct + * + * This function finds a root node, then calls ice_traverse_tx tree, which + * traverses the tree and exports it's contents to devlink rate. + */ +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi) +{ + struct ice_port_info *pi = vsi->port_info; + struct ice_sched_node *tc_node; + struct ice_pf *pf = vsi->back; + int i; + + tc_node = pi->root->children[0]; + mutex_lock(&pi->sched_lock); + devl_lock(devlink); + for (i = 0; i < tc_node->num_children; i++) + ice_traverse_tx_tree(devlink, tc_node->children[i], tc_node, pf); + devl_unlock(devlink); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +/** + * ice_set_object_tx_share - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MIN_BW scheduling BW limit. + */ +static int ice_set_object_tx_share(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second to kilo bits per second */ + node->tx_share = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MIN_BW, node->tx_share); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_share"); + + return status; +} + +/** + * ice_set_object_tx_max - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @bw: bandwidth in bytes per second + * @extack: extended netdev ack structure + * + * This function sets ICE_MAX_BW scheduling BW limit. + */ +static int ice_set_object_tx_max(struct ice_port_info *pi, struct ice_sched_node *node, + u64 bw, struct netlink_ext_ack *extack) +{ + int status; + + mutex_lock(&pi->sched_lock); + /* converts bytes per second value to kilo bits per second */ + node->tx_max = div_u64(bw, 125); + status = ice_sched_set_node_bw_lmt(pi, node, ICE_MAX_BW, node->tx_max); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_max"); + + return status; +} + +/** + * ice_set_object_tx_priority - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @priority: value representing priority for strict priority arbitration + * @extack: extended netdev ack structure + * + * This function sets priority of node among siblings. + */ +static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u32 priority, struct netlink_ext_ack *extack) +{ + int status; + + if (node->tx_priority >= 8) { + NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_priority = priority; + status = ice_sched_set_node_priority(pi, node, node->tx_priority); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_priority"); + + return status; +} + +/** + * ice_set_object_tx_weight - sets node scheduling parameter + * @pi: devlink struct instance + * @node: node struct instance + * @weight: value represeting relative weight for WFQ arbitration + * @extack: extended netdev ack structure + * + * This function sets node weight for WFQ algorithm. + */ +static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_node *node, + u32 weight, struct netlink_ext_ack *extack) +{ + int status; + + if (node->tx_weight > 200 || node->tx_weight < 1) { + NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); + return -EINVAL; + } + + mutex_lock(&pi->sched_lock); + node->tx_weight = weight; + status = ice_sched_set_node_weight(pi, node, node->tx_weight); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't set scheduling node tx_weight"); + + return status; +} + +/** + * ice_get_pi_from_dev_rate - get port info from devlink_rate + * @rate_node: devlink struct instance + * + * This function returns corresponding port_info struct of devlink_rate + */ +static struct ice_port_info *ice_get_pi_from_dev_rate(struct devlink_rate *rate_node) +{ + struct ice_pf *pf = devlink_priv(rate_node->devlink); + + return ice_get_main_vsi(pf)->port_info; +} + +static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* preallocate memory for ice_sched_node */ + node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + *priv = node; + + return 0; +} + +static int ice_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node, *tc_node; + struct ice_port_info *pi; + + pi = ice_get_pi_from_dev_rate(rate_node); + tc_node = pi->root->children[0]; + node = priv; + + if (!rate_node->parent || !node || tc_node == node || !extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + /* can't allow to delete a node with children */ + if (node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; +} + +static int ice_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_leaf), + node, tx_max, extack); +} + +static int ice_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_share, extack); +} + +static int ice_devlink_rate_leaf_tx_priority_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_priority, extack); +} + +static int ice_devlink_rate_leaf_tx_weight_set(struct devlink_rate *rate_leaf, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_leaf->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_leaf), node, + tx_weight, extack); +} + +static int ice_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_max(ice_get_pi_from_dev_rate(rate_node), + node, tx_max, extack); +} + +static int ice_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_share(ice_get_pi_from_dev_rate(rate_node), + node, tx_share, extack); +} + +static int ice_devlink_rate_node_tx_priority_set(struct devlink_rate *rate_node, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_priority(ice_get_pi_from_dev_rate(rate_node), + node, tx_priority, extack); +} + +static int ice_devlink_rate_node_tx_weight_set(struct devlink_rate *rate_node, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack) +{ + struct ice_sched_node *node = priv; + + if (!ice_enable_custom_tx(devlink_priv(rate_node->devlink))) + return -EBUSY; + + if (!node) + return 0; + + return ice_set_object_tx_weight(ice_get_pi_from_dev_rate(rate_node), + node, tx_weight, extack); +} + +static int ice_devlink_set_parent(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct ice_port_info *pi = ice_get_pi_from_dev_rate(devlink_rate); + struct ice_sched_node *tc_node, *node, *parent_node; + u16 num_nodes_added; + u32 first_node_teid; + u32 node_teid; + int status; + + tc_node = pi->root->children[0]; + node = priv; + + if (!extack) + return 0; + + if (!ice_enable_custom_tx(devlink_priv(devlink_rate->devlink))) + return -EBUSY; + + if (!parent) { + if (!node || tc_node == node || node->num_children) + return -EINVAL; + + mutex_lock(&pi->sched_lock); + ice_free_sched_node(pi, node); + mutex_unlock(&pi->sched_lock); + + return 0; + } + + parent_node = parent_priv; + + /* if the node doesn't exist, create it */ + if (!node->parent) { + mutex_lock(&pi->sched_lock); + status = ice_sched_add_elems(pi, tc_node, parent_node, + parent_node->tx_sched_layer + 1, + 1, &num_nodes_added, &first_node_teid, + &node); + mutex_unlock(&pi->sched_lock); + + if (status) { + NL_SET_ERR_MSG_MOD(extack, "Can't add a new node"); + return status; + } + + if (devlink_rate->tx_share) + ice_set_object_tx_share(pi, node, devlink_rate->tx_share, extack); + if (devlink_rate->tx_max) + ice_set_object_tx_max(pi, node, devlink_rate->tx_max, extack); + if (devlink_rate->tx_priority) + ice_set_object_tx_priority(pi, node, devlink_rate->tx_priority, extack); + if (devlink_rate->tx_weight) + ice_set_object_tx_weight(pi, node, devlink_rate->tx_weight, extack); + } else { + node_teid = le32_to_cpu(node->info.node_teid); + mutex_lock(&pi->sched_lock); + status = ice_sched_move_nodes(pi, parent_node, 1, &node_teid); + mutex_unlock(&pi->sched_lock); + + if (status) + NL_SET_ERR_MSG_MOD(extack, "Can't move existing node to a new parent"); + } + + return status; +} + static const struct devlink_ops ice_devlink_ops = { .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, .reload_actions = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), @@ -725,6 +1204,22 @@ static const struct devlink_ops ice_devlink_ops = { .eswitch_mode_set = ice_eswitch_mode_set, .info_get = ice_devlink_info_get, .flash_update = ice_devlink_flash_update, + + .rate_node_new = ice_devlink_rate_node_new, + .rate_node_del = ice_devlink_rate_node_del, + + .rate_leaf_tx_max_set = ice_devlink_rate_leaf_tx_max_set, + .rate_leaf_tx_share_set = ice_devlink_rate_leaf_tx_share_set, + .rate_leaf_tx_priority_set = ice_devlink_rate_leaf_tx_priority_set, + .rate_leaf_tx_weight_set = ice_devlink_rate_leaf_tx_weight_set, + + .rate_node_tx_max_set = ice_devlink_rate_node_tx_max_set, + .rate_node_tx_share_set = ice_devlink_rate_node_tx_share_set, + .rate_node_tx_priority_set = ice_devlink_rate_node_tx_priority_set, + .rate_node_tx_weight_set = ice_devlink_rate_node_tx_weight_set, + + .rate_leaf_parent_set = ice_devlink_set_parent, + .rate_node_parent_set = ice_devlink_set_parent, }; static int @@ -1033,12 +1528,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) */ void ice_devlink_destroy_pf_port(struct ice_pf *pf) { - struct devlink_port *devlink_port; - - devlink_port = &pf->devlink_port; - - devlink_port_type_clear(devlink_port); - devlink_port_unregister(devlink_port); + devlink_port_unregister(&pf->devlink_port); } /** @@ -1094,31 +1584,28 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) */ void ice_devlink_destroy_vf_port(struct ice_vf *vf) { - struct devlink_port *devlink_port; - - devlink_port = &vf->devlink_port; - - devlink_port_type_clear(devlink_port); - devlink_port_unregister(devlink_port); + devl_rate_leaf_destroy(&vf->devlink_port); + devlink_port_unregister(&vf->devlink_port); } #define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024) +static const struct devlink_region_ops ice_nvm_region_ops; +static const struct devlink_region_ops ice_sram_region_ops; + /** * ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure * @data: on exit points to snapshot data buffer * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the nvm-flash devlink region. It captures a snapshot of the full NVM flash - * contents, including both banks of flash. This snapshot can later be viewed - * via the devlink-region interface. + * This function is called in response to a DEVLINK_CMD_REGION_NEW for either + * the nvm-flash or shadow-ram region. * - * It captures the flash using the FLASH_ONLY bit set when reading via - * firmware, so it does not read the current Shadow RAM contents. For that, - * use the shadow-ram region. + * It captures a snapshot of the NVM or Shadow RAM flash contents. This + * snapshot can then later be viewed via the DEVLINK_CMD_REGION_READ netlink + * interface. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. @@ -1130,17 +1617,27 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; + bool read_shadow_ram; u8 *nvm_data, *tmp, i; u32 nvm_size, left; s8 num_blks; int status; - nvm_size = hw->flash.flash_size; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } + nvm_data = vzalloc(nvm_size); if (!nvm_data) return -ENOMEM; - num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE); tmp = nvm_data; left = nvm_size; @@ -1164,7 +1661,7 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE, - &read_sz, tmp, false); + &read_sz, tmp, read_shadow_ram); if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", read_sz, status, hw->adminq.sq_last_status); @@ -1185,62 +1682,69 @@ static int ice_devlink_nvm_snapshot(struct devlink *devlink, } /** - * ice_devlink_sram_snapshot - Capture a snapshot of the Shadow RAM contents + * ice_devlink_nvm_read - Read a portion of NVM flash contents * @devlink: the devlink instance - * @ops: the devlink region being snapshotted + * @ops: the devlink region to snapshot * @extack: extended ACK response structure - * @data: on exit points to snapshot data buffer + * @offset: the offset to start at + * @size: the amount to read + * @data: the data buffer to read into * - * This function is called in response to the DEVLINK_CMD_REGION_TRIGGER for - * the shadow-ram devlink region. It captures a snapshot of the shadow ram - * contents. This snapshot can later be viewed via the devlink-region - * interface. + * This function is called in response to DEVLINK_CMD_REGION_READ to directly + * read a section of the NVM contents. + * + * It reads from either the nvm-flash or shadow-ram region contents. * * @returns zero on success, and updates the data pointer. Returns a non-zero * error code on failure. */ -static int -ice_devlink_sram_snapshot(struct devlink *devlink, - const struct devlink_region_ops __always_unused *ops, - struct netlink_ext_ack *extack, u8 **data) +static int ice_devlink_nvm_read(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data) { struct ice_pf *pf = devlink_priv(devlink); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; - u8 *sram_data; - u32 sram_size; - int err; + bool read_shadow_ram; + u64 nvm_size; + int status; - sram_size = hw->flash.sr_words * 2u; - sram_data = vzalloc(sram_size); - if (!sram_data) - return -ENOMEM; + if (ops == &ice_nvm_region_ops) { + read_shadow_ram = false; + nvm_size = hw->flash.flash_size; + } else if (ops == &ice_sram_region_ops) { + read_shadow_ram = true; + nvm_size = hw->flash.sr_words * 2u; + } else { + NL_SET_ERR_MSG_MOD(extack, "Unexpected region in snapshot function"); + return -EOPNOTSUPP; + } - err = ice_acquire_nvm(hw, ICE_RES_READ); - if (err) { + if (offset + size >= nvm_size) { + NL_SET_ERR_MSG_MOD(extack, "Cannot read beyond the region size"); + return -ERANGE; + } + + status = ice_acquire_nvm(hw, ICE_RES_READ); + if (status) { dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n", - err, hw->adminq.sq_last_status); + status, hw->adminq.sq_last_status); NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore"); - vfree(sram_data); - return err; + return -EIO; } - /* Read from the Shadow RAM, rather than directly from NVM */ - err = ice_read_flat_nvm(hw, 0, &sram_size, sram_data, true); - if (err) { + status = ice_read_flat_nvm(hw, (u32)offset, &size, data, + read_shadow_ram); + if (status) { dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n", - sram_size, err, hw->adminq.sq_last_status); - NL_SET_ERR_MSG_MOD(extack, - "Failed to read Shadow RAM contents"); + size, status, hw->adminq.sq_last_status); + NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents"); ice_release_nvm(hw); - vfree(sram_data); - return err; + return -EIO; } - ice_release_nvm(hw); - *data = sram_data; - return 0; } @@ -1292,12 +1796,14 @@ static const struct devlink_region_ops ice_nvm_region_ops = { .name = "nvm-flash", .destructor = vfree, .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_sram_region_ops = { .name = "shadow-ram", .destructor = vfree, - .snapshot = ice_devlink_sram_snapshot, + .snapshot = ice_devlink_nvm_snapshot, + .read = ice_devlink_nvm_read, }; static const struct devlink_region_ops ice_devcaps_region_ops = { diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h index fe006d9946f8..6ec96779f52e 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.h +++ b/drivers/net/ethernet/intel/ice/ice_devlink.h @@ -18,4 +18,7 @@ void ice_devlink_destroy_vf_port(struct ice_vf *vf); void ice_devlink_init_regions(struct ice_pf *pf); void ice_devlink_destroy_regions(struct ice_pf *pf); +int ice_devlink_rate_init_tx_topology(struct devlink *devlink, struct ice_vsi *vsi); +void ice_tear_down_devlink_rate_tree(struct ice_pf *pf); + #endif /* _ICE_DEVLINK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index b7be84bbe72d..4191994d8f3a 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -151,6 +151,175 @@ static const u32 ice_regs_dump_list[] = { QINT_RQCTL(0), PFINT_OICR_ENA, QRX_ITR(0), +#define GLDCB_TLPM_PCI_DM 0x000A0180 + GLDCB_TLPM_PCI_DM, +#define GLDCB_TLPM_TC2PFC 0x000A0194 + GLDCB_TLPM_TC2PFC, +#define TCDCB_TLPM_WAIT_DM(_i) (0x000A0080 + ((_i) * 4)) + TCDCB_TLPM_WAIT_DM(0), + TCDCB_TLPM_WAIT_DM(1), + TCDCB_TLPM_WAIT_DM(2), + TCDCB_TLPM_WAIT_DM(3), + TCDCB_TLPM_WAIT_DM(4), + TCDCB_TLPM_WAIT_DM(5), + TCDCB_TLPM_WAIT_DM(6), + TCDCB_TLPM_WAIT_DM(7), + TCDCB_TLPM_WAIT_DM(8), + TCDCB_TLPM_WAIT_DM(9), + TCDCB_TLPM_WAIT_DM(10), + TCDCB_TLPM_WAIT_DM(11), + TCDCB_TLPM_WAIT_DM(12), + TCDCB_TLPM_WAIT_DM(13), + TCDCB_TLPM_WAIT_DM(14), + TCDCB_TLPM_WAIT_DM(15), + TCDCB_TLPM_WAIT_DM(16), + TCDCB_TLPM_WAIT_DM(17), + TCDCB_TLPM_WAIT_DM(18), + TCDCB_TLPM_WAIT_DM(19), + TCDCB_TLPM_WAIT_DM(20), + TCDCB_TLPM_WAIT_DM(21), + TCDCB_TLPM_WAIT_DM(22), + TCDCB_TLPM_WAIT_DM(23), + TCDCB_TLPM_WAIT_DM(24), + TCDCB_TLPM_WAIT_DM(25), + TCDCB_TLPM_WAIT_DM(26), + TCDCB_TLPM_WAIT_DM(27), + TCDCB_TLPM_WAIT_DM(28), + TCDCB_TLPM_WAIT_DM(29), + TCDCB_TLPM_WAIT_DM(30), + TCDCB_TLPM_WAIT_DM(31), +#define GLPCI_WATMK_CLNT_PIPEMON 0x000BFD90 + GLPCI_WATMK_CLNT_PIPEMON, +#define GLPCI_CUR_CLNT_COMMON 0x000BFD84 + GLPCI_CUR_CLNT_COMMON, +#define GLPCI_CUR_CLNT_PIPEMON 0x000BFD88 + GLPCI_CUR_CLNT_PIPEMON, +#define GLPCI_PCIERR 0x0009DEB0 + GLPCI_PCIERR, +#define GLPSM_DEBUG_CTL_STATUS 0x000B0600 + GLPSM_DEBUG_CTL_STATUS, +#define GLPSM0_DEBUG_FIFO_OVERFLOW_DETECT 0x000B0680 + GLPSM0_DEBUG_FIFO_OVERFLOW_DETECT, +#define GLPSM0_DEBUG_FIFO_UNDERFLOW_DETECT 0x000B0684 + GLPSM0_DEBUG_FIFO_UNDERFLOW_DETECT, +#define GLPSM0_DEBUG_DT_OUT_OF_WINDOW 0x000B0688 + GLPSM0_DEBUG_DT_OUT_OF_WINDOW, +#define GLPSM0_DEBUG_INTF_HW_ERROR_DETECT 0x000B069C + GLPSM0_DEBUG_INTF_HW_ERROR_DETECT, +#define GLPSM0_DEBUG_MISC_HW_ERROR_DETECT 0x000B06A0 + GLPSM0_DEBUG_MISC_HW_ERROR_DETECT, +#define GLPSM1_DEBUG_FIFO_OVERFLOW_DETECT 0x000B0E80 + GLPSM1_DEBUG_FIFO_OVERFLOW_DETECT, +#define GLPSM1_DEBUG_FIFO_UNDERFLOW_DETECT 0x000B0E84 + GLPSM1_DEBUG_FIFO_UNDERFLOW_DETECT, +#define GLPSM1_DEBUG_SRL_FIFO_OVERFLOW_DETECT 0x000B0E88 + GLPSM1_DEBUG_SRL_FIFO_OVERFLOW_DETECT, +#define GLPSM1_DEBUG_SRL_FIFO_UNDERFLOW_DETECT 0x000B0E8C + GLPSM1_DEBUG_SRL_FIFO_UNDERFLOW_DETECT, +#define GLPSM1_DEBUG_MISC_HW_ERROR_DETECT 0x000B0E90 + GLPSM1_DEBUG_MISC_HW_ERROR_DETECT, +#define GLPSM2_DEBUG_FIFO_OVERFLOW_DETECT 0x000B1680 + GLPSM2_DEBUG_FIFO_OVERFLOW_DETECT, +#define GLPSM2_DEBUG_FIFO_UNDERFLOW_DETECT 0x000B1684 + GLPSM2_DEBUG_FIFO_UNDERFLOW_DETECT, +#define GLPSM2_DEBUG_MISC_HW_ERROR_DETECT 0x000B1688 + GLPSM2_DEBUG_MISC_HW_ERROR_DETECT, +#define GLTDPU_TCLAN_COMP_BOB(_i) (0x00049ADC + ((_i) * 4)) + GLTDPU_TCLAN_COMP_BOB(1), + GLTDPU_TCLAN_COMP_BOB(2), + GLTDPU_TCLAN_COMP_BOB(3), + GLTDPU_TCLAN_COMP_BOB(4), + GLTDPU_TCLAN_COMP_BOB(5), + GLTDPU_TCLAN_COMP_BOB(6), + GLTDPU_TCLAN_COMP_BOB(7), + GLTDPU_TCLAN_COMP_BOB(8), +#define GLTDPU_TCB_CMD_BOB(_i) (0x0004975C + ((_i) * 4)) + GLTDPU_TCB_CMD_BOB(1), + GLTDPU_TCB_CMD_BOB(2), + GLTDPU_TCB_CMD_BOB(3), + GLTDPU_TCB_CMD_BOB(4), + GLTDPU_TCB_CMD_BOB(5), + GLTDPU_TCB_CMD_BOB(6), + GLTDPU_TCB_CMD_BOB(7), + GLTDPU_TCB_CMD_BOB(8), +#define GLTDPU_PSM_UPDATE_BOB(_i) (0x00049B5C + ((_i) * 4)) + GLTDPU_PSM_UPDATE_BOB(1), + GLTDPU_PSM_UPDATE_BOB(2), + GLTDPU_PSM_UPDATE_BOB(3), + GLTDPU_PSM_UPDATE_BOB(4), + GLTDPU_PSM_UPDATE_BOB(5), + GLTDPU_PSM_UPDATE_BOB(6), + GLTDPU_PSM_UPDATE_BOB(7), + GLTDPU_PSM_UPDATE_BOB(8), +#define GLTCB_CMD_IN_BOB(_i) (0x000AE288 + ((_i) * 4)) + GLTCB_CMD_IN_BOB(1), + GLTCB_CMD_IN_BOB(2), + GLTCB_CMD_IN_BOB(3), + GLTCB_CMD_IN_BOB(4), + GLTCB_CMD_IN_BOB(5), + GLTCB_CMD_IN_BOB(6), + GLTCB_CMD_IN_BOB(7), + GLTCB_CMD_IN_BOB(8), +#define GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(_i) (0x000FC148 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_FBK_BOB_CTL(8), +#define GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(_i) (0x000FC248 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_SCHED_BOB_CTL(8), +#define GLLAN_TCLAN_CACHE_CTL_BOB_CTL(_i) (0x000FC1C8 + ((_i) * 4)) + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(1), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(2), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(3), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(4), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(5), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(6), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(7), + GLLAN_TCLAN_CACHE_CTL_BOB_CTL(8), +#define GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(_i) (0x000FC188 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_PROC_BOB_CTL(8), +#define GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(_i) (0x000FC288 + ((_i) * 4)) + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(1), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(2), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(3), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(4), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(5), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(6), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(7), + GLLAN_TCLAN_FETCH_CTL_PCIE_RD_BOB_CTL(8), +#define PRTDCB_TCUPM_REG_CM(_i) (0x000BC360 + ((_i) * 4)) + PRTDCB_TCUPM_REG_CM(0), + PRTDCB_TCUPM_REG_CM(1), + PRTDCB_TCUPM_REG_CM(2), + PRTDCB_TCUPM_REG_CM(3), +#define PRTDCB_TCUPM_REG_DM(_i) (0x000BC3A0 + ((_i) * 4)) + PRTDCB_TCUPM_REG_DM(0), + PRTDCB_TCUPM_REG_DM(1), + PRTDCB_TCUPM_REG_DM(2), + PRTDCB_TCUPM_REG_DM(3), +#define PRTDCB_TLPM_REG_DM(_i) (0x000A0000 + ((_i) * 4)) + PRTDCB_TLPM_REG_DM(0), + PRTDCB_TLPM_REG_DM(1), + PRTDCB_TLPM_REG_DM(2), + PRTDCB_TLPM_REG_DM(3), }; struct ice_priv_flag { @@ -1375,9 +1544,9 @@ __ice_get_ethtool_stats(struct net_device *netdev, ice_for_each_alloc_txq(vsi, j) { tx_ring = READ_ONCE(vsi->tx_rings[j]); - if (tx_ring) { - data[i++] = tx_ring->stats.pkts; - data[i++] = tx_ring->stats.bytes; + if (tx_ring && tx_ring->ring_stats) { + data[i++] = tx_ring->ring_stats->stats.pkts; + data[i++] = tx_ring->ring_stats->stats.bytes; } else { data[i++] = 0; data[i++] = 0; @@ -1386,9 +1555,9 @@ __ice_get_ethtool_stats(struct net_device *netdev, ice_for_each_alloc_rxq(vsi, j) { rx_ring = READ_ONCE(vsi->rx_rings[j]); - if (rx_ring) { - data[i++] = rx_ring->stats.pkts; - data[i++] = rx_ring->stats.bytes; + if (rx_ring && rx_ring->ring_stats) { + data[i++] = rx_ring->ring_stats->stats.pkts; + data[i++] = rx_ring->ring_stats->stats.bytes; } else { data[i++] = 0; data[i++] = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index d16738a3d3a7..a92dc9a16035 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -110,6 +110,9 @@ #define PRTDCB_TUP2TC 0x001D26C0 #define GL_PREEXT_L2_PMASK0(_i) (0x0020F0FC + ((_i) * 4)) #define GL_PREEXT_L2_PMASK1(_i) (0x0020F108 + ((_i) * 4)) +#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256)) +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0 +#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, 0) #define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4)) #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0 #define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, 0) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index b3baf7c3f910..89f986a75cc8 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -908,17 +908,5 @@ static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) return ice_ptype_lkup[ptype]; } -#define ICE_LINK_SPEED_UNKNOWN 0 -#define ICE_LINK_SPEED_10MBPS 10 -#define ICE_LINK_SPEED_100MBPS 100 -#define ICE_LINK_SPEED_1000MBPS 1000 -#define ICE_LINK_SPEED_2500MBPS 2500 -#define ICE_LINK_SPEED_5000MBPS 5000 -#define ICE_LINK_SPEED_10000MBPS 10000 -#define ICE_LINK_SPEED_20000MBPS 20000 -#define ICE_LINK_SPEED_25000MBPS 25000 -#define ICE_LINK_SPEED_40000MBPS 40000 -#define ICE_LINK_SPEED_50000MBPS 50000 -#define ICE_LINK_SPEED_100000MBPS 100000 #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7276badfa19e..94aa834cd9a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -448,6 +448,49 @@ static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *d } /** + * ice_vsi_alloc_stat_arrays - Allocate statistics arrays + * @vsi: VSI pointer + */ +static int ice_vsi_alloc_stat_arrays(struct ice_vsi *vsi) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + + if (vsi->type == ICE_VSI_CHNL) + return 0; + if (!pf->vsi_stats) + return -ENOENT; + + vsi_stat = kzalloc(sizeof(*vsi_stat), GFP_KERNEL); + if (!vsi_stat) + return -ENOMEM; + + vsi_stat->tx_ring_stats = + kcalloc(vsi->alloc_txq, sizeof(*vsi_stat->tx_ring_stats), + GFP_KERNEL); + if (!vsi_stat->tx_ring_stats) + goto err_alloc_tx; + + vsi_stat->rx_ring_stats = + kcalloc(vsi->alloc_rxq, sizeof(*vsi_stat->rx_ring_stats), + GFP_KERNEL); + if (!vsi_stat->rx_ring_stats) + goto err_alloc_rx; + + pf->vsi_stats[vsi->idx] = vsi_stat; + + return 0; + +err_alloc_rx: + kfree(vsi_stat->rx_ring_stats); +err_alloc_tx: + kfree(vsi_stat->tx_ring_stats); + kfree(vsi_stat); + pf->vsi_stats[vsi->idx] = NULL; + return -ENOMEM; +} + +/** * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure * @vsi_type: type of VSI @@ -560,6 +603,11 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, if (vsi->type == ICE_VSI_CTRL && vf) vf->ctrl_vsi_idx = vsi->idx; + + /* allocate memory for Tx/Rx ring stat pointers */ + if (ice_vsi_alloc_stat_arrays(vsi)) + goto err_rings; + goto unlock_pf; err_rings: @@ -1536,6 +1584,106 @@ err_out: } /** + * ice_vsi_free_stats - Free the ring statistics structures + * @vsi: VSI pointer + */ +static void ice_vsi_free_stats(struct ice_vsi *vsi) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + int i; + + if (vsi->type == ICE_VSI_CHNL) + return; + if (!pf->vsi_stats) + return; + + vsi_stat = pf->vsi_stats[vsi->idx]; + if (!vsi_stat) + return; + + ice_for_each_alloc_txq(vsi, i) { + if (vsi_stat->tx_ring_stats[i]) { + kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); + } + } + + ice_for_each_alloc_rxq(vsi, i) { + if (vsi_stat->rx_ring_stats[i]) { + kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); + } + } + + kfree(vsi_stat->tx_ring_stats); + kfree(vsi_stat->rx_ring_stats); + kfree(vsi_stat); + pf->vsi_stats[vsi->idx] = NULL; +} + +/** + * ice_vsi_alloc_ring_stats - Allocates Tx and Rx ring stats for the VSI + * @vsi: VSI which is having stats allocated + */ +static int ice_vsi_alloc_ring_stats(struct ice_vsi *vsi) +{ + struct ice_ring_stats **tx_ring_stats; + struct ice_ring_stats **rx_ring_stats; + struct ice_vsi_stats *vsi_stats; + struct ice_pf *pf = vsi->back; + u16 i; + + vsi_stats = pf->vsi_stats[vsi->idx]; + tx_ring_stats = vsi_stats->tx_ring_stats; + rx_ring_stats = vsi_stats->rx_ring_stats; + + /* Allocate Tx ring stats */ + ice_for_each_alloc_txq(vsi, i) { + struct ice_ring_stats *ring_stats; + struct ice_tx_ring *ring; + + ring = vsi->tx_rings[i]; + ring_stats = tx_ring_stats[i]; + + if (!ring_stats) { + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) + goto err_out; + + WRITE_ONCE(tx_ring_stats[i], ring_stats); + } + + ring->ring_stats = ring_stats; + } + + /* Allocate Rx ring stats */ + ice_for_each_alloc_rxq(vsi, i) { + struct ice_ring_stats *ring_stats; + struct ice_rx_ring *ring; + + ring = vsi->rx_rings[i]; + ring_stats = rx_ring_stats[i]; + + if (!ring_stats) { + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) + goto err_out; + + WRITE_ONCE(rx_ring_stats[i], ring_stats); + } + + ring->ring_stats = ring_stats; + } + + return 0; + +err_out: + ice_vsi_free_stats(vsi); + return -ENOMEM; +} + +/** * ice_vsi_manage_rss_lut - disable/enable RSS * @vsi: the VSI being changed * @ena: boolean value indicating if this is an enable or disable request @@ -1795,11 +1943,15 @@ void ice_update_eth_stats(struct ice_vsi *vsi) { struct ice_eth_stats *prev_es, *cur_es; struct ice_hw *hw = &vsi->back->hw; + struct ice_pf *pf = vsi->back; u16 vsi_num = vsi->vsi_num; /* HW absolute index of a VSI */ prev_es = &vsi->eth_stats_prev; cur_es = &vsi->eth_stats; + if (ice_is_reset_in_progress(pf->state)) + vsi->stat_offsets_loaded = false; + ice_stat_update40(hw, GLV_GORCL(vsi_num), vsi->stat_offsets_loaded, &prev_es->rx_bytes, &cur_es->rx_bytes); @@ -2576,6 +2728,10 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; + ice_vsi_map_rings_to_vectors(vsi); /* ICE_VSI_CTRL does not need RSS so skip RSS processing */ @@ -2614,6 +2770,9 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (ret) goto unroll_vector_base; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; /* Do not exit if configuring RSS had an issue, at least * receive traffic on first queue. Hence no need to capture * return value @@ -2627,6 +2786,11 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ret = ice_vsi_alloc_rings(vsi); if (ret) goto unroll_vsi_init; + + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto unroll_vector_base; + break; default: /* clean up the resources and exit */ @@ -2686,6 +2850,7 @@ unroll_vector_base: unroll_alloc_q_vector: ice_vsi_free_q_vectors(vsi); unroll_vsi_init: + ice_vsi_free_stats(vsi); ice_vsi_delete(vsi); unroll_get_qs: ice_vsi_put_qs(vsi); @@ -3077,7 +3242,7 @@ int ice_vsi_release(struct ice_vsi *vsi) vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; ice_vsi_clear_rings(vsi); - + ice_vsi_free_stats(vsi); ice_vsi_put_qs(vsi); /* retain SW VSI data structure since it is needed to unregister and @@ -3205,6 +3370,47 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } /** + * ice_vsi_realloc_stat_arrays - Frees unused stat structures + * @vsi: VSI pointer + * @prev_txq: Number of Tx rings before ring reallocation + * @prev_rxq: Number of Rx rings before ring reallocation + */ +static int +ice_vsi_realloc_stat_arrays(struct ice_vsi *vsi, int prev_txq, int prev_rxq) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf = vsi->back; + int i; + + if (!prev_txq || !prev_rxq) + return 0; + if (vsi->type == ICE_VSI_CHNL) + return 0; + + vsi_stat = pf->vsi_stats[vsi->idx]; + + if (vsi->num_txq < prev_txq) { + for (i = vsi->num_txq; i < prev_txq; i++) { + if (vsi_stat->tx_ring_stats[i]) { + kfree_rcu(vsi_stat->tx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->tx_ring_stats[i], NULL); + } + } + } + + if (vsi->num_rxq < prev_rxq) { + for (i = vsi->num_rxq; i < prev_rxq; i++) { + if (vsi_stat->rx_ring_stats[i]) { + kfree_rcu(vsi_stat->rx_ring_stats[i], rcu); + WRITE_ONCE(vsi_stat->rx_ring_stats[i], NULL); + } + } + } + + return 0; +} + +/** * ice_vsi_rebuild - Rebuild VSI after reset * @vsi: VSI to be rebuild * @init_vsi: is this an initialization or a reconfigure of the VSI @@ -3215,10 +3421,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_coalesce_stored *coalesce; + int ret, i, prev_txq, prev_rxq; int prev_num_q_vectors = 0; enum ice_vsi_type vtype; struct ice_pf *pf; - int ret, i; if (!vsi) return -EINVAL; @@ -3237,6 +3443,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); + prev_txq = vsi->num_txq; + prev_rxq = vsi->num_rxq; + ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); ret = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); if (ret) @@ -3303,7 +3512,13 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto err_vectors; + ice_vsi_map_rings_to_vectors(vsi); + + vsi->stat_offsets_loaded = false; if (ice_is_xdp_ena_vsi(vsi)) { ret = ice_vsi_determine_xdp_res(vsi); if (ret) @@ -3340,6 +3555,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) if (ret) goto err_vectors; + ret = ice_vsi_alloc_ring_stats(vsi); + if (ret) + goto err_vectors; + + vsi->stat_offsets_loaded = false; break; case ICE_VSI_CHNL: if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { @@ -3387,6 +3607,10 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) return ice_schedule_reset(pf, ICE_RESET_PFR); } } + + if (ice_vsi_realloc_stat_arrays(vsi, prev_txq, prev_rxq)) + goto err_vectors; + ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); kfree(coalesce); @@ -3728,9 +3952,9 @@ static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes */ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) { - u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(&tx_ring->stats, pkts, bytes); - u64_stats_update_end(&tx_ring->syncp); + u64_stats_update_begin(&tx_ring->ring_stats->syncp); + ice_update_ring_stats(&tx_ring->ring_stats->stats, pkts, bytes); + u64_stats_update_end(&tx_ring->ring_stats->syncp); } /** @@ -3741,9 +3965,9 @@ void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) */ void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) { - u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(&rx_ring->stats, pkts, bytes); - u64_stats_update_end(&rx_ring->syncp); + u64_stats_update_begin(&rx_ring->ring_stats->syncp); + ice_update_ring_stats(&rx_ring->ring_stats->stats, pkts, bytes); + u64_stats_update_end(&rx_ring->ring_stats->syncp); } /** @@ -3850,33 +4074,11 @@ int ice_clear_dflt_vsi(struct ice_vsi *vsi) */ int ice_get_link_speed_mbps(struct ice_vsi *vsi) { - switch (vsi->port_info->phy.link_info.link_speed) { - case ICE_AQ_LINK_SPEED_100GB: - return SPEED_100000; - case ICE_AQ_LINK_SPEED_50GB: - return SPEED_50000; - case ICE_AQ_LINK_SPEED_40GB: - return SPEED_40000; - case ICE_AQ_LINK_SPEED_25GB: - return SPEED_25000; - case ICE_AQ_LINK_SPEED_20GB: - return SPEED_20000; - case ICE_AQ_LINK_SPEED_10GB: - return SPEED_10000; - case ICE_AQ_LINK_SPEED_5GB: - return SPEED_5000; - case ICE_AQ_LINK_SPEED_2500MB: - return SPEED_2500; - case ICE_AQ_LINK_SPEED_1000MB: - return SPEED_1000; - case ICE_AQ_LINK_SPEED_100MB: - return SPEED_100; - case ICE_AQ_LINK_SPEED_10MB: - return SPEED_10; - case ICE_AQ_LINK_SPEED_UNKNOWN: - default: - return 0; - } + unsigned int link_speed; + + link_speed = vsi->port_info->phy.link_info.link_speed; + + return (int)ice_get_link_speed(fls(link_speed) - 1); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ca2898467dcb..a9a7f8b52140 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -130,12 +130,17 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) ice_for_each_txq(vsi, i) { struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; + struct ice_ring_stats *ring_stats; if (!tx_ring) continue; if (ice_ring_ch_enabled(tx_ring)) continue; + ring_stats = tx_ring->ring_stats; + if (!ring_stats) + continue; + if (tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this @@ -144,8 +149,8 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * prev_pkt would be negative if there was no * pending work. */ - packets = tx_ring->stats.pkts & INT_MAX; - if (tx_ring->tx_stats.prev_pkt == packets) { + packets = ring_stats->stats.pkts & INT_MAX; + if (ring_stats->tx_stats.prev_pkt == packets) { /* Trigger sw interrupt to revive the queue */ ice_trigger_sw_intr(hw, tx_ring->q_vector); continue; @@ -155,7 +160,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) * to ice_get_tx_pending() */ smp_rmb(); - tx_ring->tx_stats.prev_pkt = + ring_stats->tx_stats.prev_pkt = ice_get_tx_pending(tx_ring) ? packets : -1; } } @@ -299,20 +304,6 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) } /** - * ice_get_devlink_port - Get devlink port from netdev - * @netdev: the netdevice structure - */ -static struct devlink_port *ice_get_devlink_port(struct net_device *netdev) -{ - struct ice_pf *pf = ice_netdev_to_pf(netdev); - - if (!ice_is_switchdev_running(pf)) - return NULL; - - return &pf->devlink_port; -} - -/** * ice_vsi_sync_fltr - Update the VSI filter list to the HW * @vsi: ptr to the VSI * @@ -1120,8 +1111,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + ice_ptp_link_change(pf, pf->hw.pf_id, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -2560,13 +2550,20 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) ice_for_each_xdp_txq(vsi, i) { u16 xdp_q_idx = vsi->alloc_txq + i; + struct ice_ring_stats *ring_stats; struct ice_tx_ring *xdp_ring; xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); - if (!xdp_ring) goto free_xdp_rings; + ring_stats = kzalloc(sizeof(*ring_stats), GFP_KERNEL); + if (!ring_stats) { + ice_free_tx_ring(xdp_ring); + goto free_xdp_rings; + } + + xdp_ring->ring_stats = ring_stats; xdp_ring->q_index = xdp_q_idx; xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; xdp_ring->vsi = vsi; @@ -2589,9 +2586,13 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) return 0; free_xdp_rings: - for (; i >= 0; i--) - if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) + for (; i >= 0; i--) { + if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc) { + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; ice_free_tx_ring(vsi->xdp_rings[i]); + } + } return -ENOMEM; } @@ -2792,6 +2793,8 @@ free_qmap: synchronize_rcu(); ice_free_tx_ring(vsi->xdp_rings[i]); } + kfree_rcu(vsi->xdp_rings[i]->ring_stats, rcu); + vsi->xdp_rings[i]->ring_stats = NULL; kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; } @@ -4603,6 +4606,7 @@ static int ice_register_netdev(struct ice_pf *pf) if (err) goto err_devlink_create; + SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); err = register_netdev(vsi->netdev); if (err) goto err_register_netdev; @@ -4611,8 +4615,6 @@ static int ice_register_netdev(struct ice_pf *pf) netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); - devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); - return 0; err_register_netdev: ice_devlink_destroy_pf_port(pf); @@ -4771,11 +4773,18 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_init_pf_unroll; } + pf->vsi_stats = devm_kcalloc(dev, pf->num_alloc_vsi, + sizeof(*pf->vsi_stats), GFP_KERNEL); + if (!pf->vsi_stats) { + err = -ENOMEM; + goto err_init_vsi_unroll; + } + err = ice_init_interrupt_scheme(pf); if (err) { dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err); err = -EIO; - goto err_init_vsi_unroll; + goto err_init_vsi_stats_unroll; } /* In case of MSIX we are going to setup the misc vector right here @@ -4956,6 +4965,9 @@ err_msix_misc_unroll: ice_free_irq_msix_misc(pf); err_init_interrupt_unroll: ice_clear_interrupt_scheme(pf); +err_init_vsi_stats_unroll: + devm_kfree(dev, pf->vsi_stats); + pf->vsi_stats = NULL; err_init_vsi_unroll: devm_kfree(dev, pf->vsi); err_init_pf_unroll: @@ -5078,6 +5090,8 @@ static void ice_remove(struct pci_dev *pdev) continue; ice_vsi_free_q_vectors(pf->vsi[i]); } + devm_kfree(&pdev->dev, pf->vsi_stats); + pf->vsi_stats = NULL; ice_deinit_pf(pf); ice_devlink_destroy_regions(pf); ice_deinit_hw(&pf->hw); @@ -6325,8 +6339,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, pf->hw.pf_id, true); } /* Perform an initial read of the statistics registers now to @@ -6370,10 +6383,10 @@ ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, unsigned int start; do { - start = u64_stats_fetch_begin_irq(syncp); + start = u64_stats_fetch_begin(syncp); *pkts = stats.pkts; *bytes = stats.bytes; - } while (u64_stats_fetch_retry_irq(syncp, start)); + } while (u64_stats_fetch_retry(syncp, start)); } /** @@ -6395,14 +6408,16 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - if (!ring) + if (!ring || !ring->ring_stats) continue; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ice_fetch_u64_stats_per_ring(&ring->ring_stats->syncp, + ring->ring_stats->stats, &pkts, + &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; - vsi->tx_restart += ring->tx_stats.restart_q; - vsi->tx_busy += ring->tx_stats.tx_busy; - vsi->tx_linearize += ring->tx_stats.tx_linearize; + vsi->tx_restart += ring->ring_stats->tx_stats.restart_q; + vsi->tx_busy += ring->ring_stats->tx_stats.tx_busy; + vsi->tx_linearize += ring->ring_stats->tx_stats.tx_linearize; } } @@ -6412,6 +6427,7 @@ ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, */ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { + struct rtnl_link_stats64 *net_stats, *stats_prev; struct rtnl_link_stats64 *vsi_stats; u64 pkts, bytes; int i; @@ -6436,12 +6452,16 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_ring_stats *ring_stats; - ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); + ring_stats = ring->ring_stats; + ice_fetch_u64_stats_per_ring(&ring_stats->syncp, + ring_stats->stats, &pkts, + &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; - vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; - vsi->rx_page_failed += ring->rx_stats.alloc_page_failed; + vsi->rx_buf_failed += ring_stats->rx_stats.alloc_buf_failed; + vsi->rx_page_failed += ring_stats->rx_stats.alloc_page_failed; } /* update XDP Tx rings counters */ @@ -6451,10 +6471,28 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) rcu_read_unlock(); - vsi->net_stats.tx_packets = vsi_stats->tx_packets; - vsi->net_stats.tx_bytes = vsi_stats->tx_bytes; - vsi->net_stats.rx_packets = vsi_stats->rx_packets; - vsi->net_stats.rx_bytes = vsi_stats->rx_bytes; + net_stats = &vsi->net_stats; + stats_prev = &vsi->net_stats_prev; + + /* clear prev counters after reset */ + if (vsi_stats->tx_packets < stats_prev->tx_packets || + vsi_stats->rx_packets < stats_prev->rx_packets) { + stats_prev->tx_packets = 0; + stats_prev->tx_bytes = 0; + stats_prev->rx_packets = 0; + stats_prev->rx_bytes = 0; + } + + /* update netdev counters */ + net_stats->tx_packets += vsi_stats->tx_packets - stats_prev->tx_packets; + net_stats->tx_bytes += vsi_stats->tx_bytes - stats_prev->tx_bytes; + net_stats->rx_packets += vsi_stats->rx_packets - stats_prev->rx_packets; + net_stats->rx_bytes += vsi_stats->rx_bytes - stats_prev->rx_bytes; + + stats_prev->tx_packets = vsi_stats->tx_packets; + stats_prev->tx_bytes = vsi_stats->tx_bytes; + stats_prev->rx_packets = vsi_stats->rx_packets; + stats_prev->rx_bytes = vsi_stats->rx_bytes; kfree(vsi_stats); } @@ -6516,6 +6554,9 @@ void ice_update_pf_stats(struct ice_pf *pf) prev_ps = &pf->stats_prev; cur_ps = &pf->stats; + if (ice_is_reset_in_progress(pf->state)) + pf->stat_prev_loaded = false; + ice_stat_update40(hw, GLPRT_GORCL(port), pf->stat_prev_loaded, &prev_ps->eth.rx_bytes, &cur_ps->eth.rx_bytes); @@ -6730,8 +6771,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev && vsi->type == ICE_VSI_PF) { vlan_err = ice_vsi_del_vlan_zero(vsi); - if (!ice_is_e810(&vsi->back->hw)) - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { @@ -8283,7 +8323,7 @@ static void ice_rem_all_chnl_fltrs(struct ice_pf *pf) rule.rid = fltr->rid; rule.rule_id = fltr->rule_id; - rule.vsi_handle = fltr->dest_id; + rule.vsi_handle = fltr->dest_vsi_handle; status = ice_rem_adv_rule_by_id(&pf->hw, &rule); if (status) { if (status == -ENOENT) @@ -8595,6 +8635,12 @@ static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) switch (mode) { case TC_MQPRIO_MODE_CHANNEL: + if (pf->hw.port_info->is_custom_tx_enabled) { + dev_err(dev, "Custom Tx scheduler feature enabled, can't configure ADQ\n"); + return -EBUSY; + } + ice_tear_down_devlink_rate_tree(pf); + ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt); if (ret) { netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", @@ -9108,5 +9154,4 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_bpf = ice_xdp, .ndo_xdp_xmit = ice_xdp_xmit, .ndo_xsk_wakeup = ice_xsk_wakeup, - .ndo_get_devlink_port = ice_get_devlink_port, }; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0f668468d141..d63161d73eb1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -600,6 +600,23 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) } /** + * ice_ptp_is_tx_tracker_up - Check if Tx tracker is ready for new timestamps + * @tx: the PTP Tx timestamp tracker to check + * + * Check that a given PTP Tx timestamp tracker is up, i.e. that it is ready + * to accept new timestamp requests. + * + * Assumes the tx->lock spinlock is already held. + */ +static bool +ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) +{ + lockdep_assert_held(&tx->lock); + + return tx->init && !tx->calibrating; +} + +/** * ice_ptp_tx_tstamp - Process Tx timestamps for a port * @tx: the PTP Tx timestamp tracker * @@ -608,11 +625,13 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * * If a given index has a valid timestamp, perform the following steps: * - * 1) copy the timestamp out of the PHY register - * 4) clear the timestamp valid bit in the PHY register - * 5) unlock the index by clearing the associated in_use bit. - * 2) extend the 40b timestamp value to get a 64bit timestamp - * 3) send that timestamp to the stack + * 1) check that the timestamp request is not stale + * 2) check that a timestamp is ready and available in the PHY memory bank + * 3) read and copy the timestamp out of the PHY register + * 4) unlock the index by clearing the associated in_use bit + * 5) check if the timestamp is stale, and discard if so + * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value + * 7) send this 64 bit timestamp to the stack * * Returns true if all timestamps were handled, and false if any slots remain * without a timestamp. @@ -623,24 +642,45 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * interrupt. In some cases hardware might not interrupt us again when the * timestamp is captured. * - * Note that we only take the tracking lock when clearing the bit and when - * checking if we need to re-queue this task. The only place where bits can be - * set is the hard xmit routine where an SKB has a request flag set. The only - * places where we clear bits are this work function, or the periodic cleanup - * thread. If the cleanup thread clears a bit we're processing we catch it - * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread - * starts a new timestamp, we might not begin processing it right away but we - * will notice it at the end when we re-queue the task. If a Tx thread starts - * a new timestamp just after this function exits without re-queuing, - * the interrupt when the timestamp finishes should trigger. Avoiding holding - * the lock for the entire function is important in order to ensure that Tx - * threads do not get blocked while waiting for the lock. + * Note that we do not hold the tracking lock while reading the Tx timestamp. + * This is because reading the timestamp requires taking a mutex that might + * sleep. + * + * The only place where we set in_use is when a new timestamp is initiated + * with a slot index. This is only called in the hard xmit routine where an + * SKB has a request flag set. The only places where we clear this bit is this + * function, or during teardown when the Tx timestamp tracker is being + * removed. A timestamp index will never be re-used until the in_use bit for + * that index is cleared. + * + * If a Tx thread starts a new timestamp, we might not begin processing it + * right away but we will notice it at the end when we re-queue the task. + * + * If a Tx thread starts a new timestamp just after this function exits, the + * interrupt for that timestamp should re-trigger this function once + * a timestamp is ready. + * + * In cases where the PTP hardware clock was directly adjusted, some + * timestamps may not be able to safely use the timestamp extension math. In + * this case, software will set the stale bit for any outstanding Tx + * timestamps when the clock is adjusted. Then this function will discard + * those captured timestamps instead of sending them to the stack. + * + * If a Tx packet has been waiting for more than 2 seconds, it is not possible + * to correctly extend the timestamp using the cached PHC time. It is + * extremely unlikely that a packet will ever take this long to timestamp. If + * we detect a Tx timestamp request that has waited for this long we assume + * the packet will never be sent by hardware and discard it without reading + * the timestamp register. */ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; - bool ts_handled = true; + bool more_timestamps; struct ice_pf *pf; + struct ice_hw *hw; + u64 tstamp_ready; + int err; u8 idx; if (!tx->init) @@ -648,44 +688,86 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); + hw = &pf->hw; + + /* Read the Tx ready status first */ + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) + return false; for_each_set_bit(idx, tx->in_use, tx->len) { struct skb_shared_hwtstamps shhwtstamps = {}; - u8 phy_idx = idx + tx->quad_offset; - u64 raw_tstamp, tstamp; + u8 phy_idx = idx + tx->offset; + u64 raw_tstamp = 0, tstamp; + bool drop_ts = false; struct sk_buff *skb; - int err; + + /* Drop packets which have waited for more than 2 seconds */ + if (time_is_before_jiffies(tx->tstamps[idx].start + 2 * HZ)) { + drop_ts = true; + + /* Count the number of Tx timestamps that timed out */ + pf->ptp.tx_hwtstamp_timeouts++; + } + + /* Only read a timestamp from the PHY if its marked as ready + * by the tstamp_ready register. This avoids unnecessary + * reading of timestamps which are not yet valid. This is + * important as we must read all timestamps which are valid + * and only timestamps which are valid during each interrupt. + * If we do not, the hardware logic for generating a new + * interrupt can get stuck on some devices. + */ + if (!(tstamp_ready & BIT_ULL(phy_idx))) { + if (drop_ts) + goto skip_ts_read; + + continue; + } ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); - err = ice_read_phy_tstamp(&pf->hw, tx->quad, phy_idx, - &raw_tstamp); + err = ice_read_phy_tstamp(hw, tx->block, phy_idx, &raw_tstamp); if (err) continue; ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); - /* Check if the timestamp is invalid or stale */ - if (!(raw_tstamp & ICE_PTP_TS_VALID) || + /* For PHYs which don't implement a proper timestamp ready + * bitmap, verify that the timestamp value is different + * from the last cached timestamp. If it is not, skip this for + * now assuming it hasn't yet been captured by hardware. + */ + if (!drop_ts && tx->verify_cached && raw_tstamp == tx->tstamps[idx].cached_tstamp) continue; - /* The timestamp is valid, so we'll go ahead and clear this - * index and then send the timestamp up to the stack. - */ + /* Discard any timestamp value without the valid bit set */ + if (!(raw_tstamp & ICE_PTP_TS_VALID)) + drop_ts = true; + +skip_ts_read: spin_lock(&tx->lock); - tx->tstamps[idx].cached_tstamp = raw_tstamp; + if (tx->verify_cached && raw_tstamp) + tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; + if (test_and_clear_bit(idx, tx->stale)) + drop_ts = true; spin_unlock(&tx->lock); - /* it's (unlikely but) possible we raced with the cleanup - * thread for discarding old timestamp requests. + /* It is unlikely but possible that the SKB will have been + * flushed at this point due to link change or teardown. */ if (!skb) continue; + if (drop_ts) { + dev_kfree_skb_any(skb); + continue; + } + /* Extend the timestamp using cached PHC time */ tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); if (tstamp) { @@ -701,11 +783,10 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * poll for remaining timestamps. */ spin_lock(&tx->lock); - if (!bitmap_empty(tx->in_use, tx->len)) - ts_handled = false; + more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); spin_unlock(&tx->lock); - return ts_handled; + return !more_timestamps; } /** @@ -713,26 +794,33 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * @tx: Tx tracking structure to initialize * * Assumes that the length has already been initialized. Do not call directly, - * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead. + * use the ice_ptp_init_tx_* instead. */ static int ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) { - tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL); - if (!tx->tstamps) - return -ENOMEM; + unsigned long *in_use, *stale; + struct ice_tx_tstamp *tstamps; + + tstamps = kcalloc(tx->len, sizeof(*tstamps), GFP_KERNEL); + in_use = bitmap_zalloc(tx->len, GFP_KERNEL); + stale = bitmap_zalloc(tx->len, GFP_KERNEL); + + if (!tstamps || !in_use || !stale) { + kfree(tstamps); + bitmap_free(in_use); + bitmap_free(stale); - tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL); - if (!tx->in_use) { - kfree(tx->tstamps); - tx->tstamps = NULL; return -ENOMEM; } - spin_lock_init(&tx->lock); - + tx->tstamps = tstamps; + tx->in_use = in_use; + tx->stale = stale; tx->init = 1; + spin_lock_init(&tx->lock); + return 0; } @@ -740,31 +828,71 @@ ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker * @pf: Board private structure * @tx: the tracker to flush + * + * Called during teardown when a Tx tracker is being removed. */ static void ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + struct ice_hw *hw = &pf->hw; + u64 tstamp_ready; + int err; u8 idx; - for (idx = 0; idx < tx->len; idx++) { - u8 phy_idx = idx + tx->quad_offset; + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) { + dev_dbg(ice_pf_to_dev(pf), "Failed to get the Tx tstamp ready bitmap for block %u, err %d\n", + tx->block, err); + + /* If we fail to read the Tx timestamp ready bitmap just + * skip clearing the PHY timestamps. + */ + tstamp_ready = 0; + } + + for_each_set_bit(idx, tx->in_use, tx->len) { + u8 phy_idx = idx + tx->offset; + struct sk_buff *skb; + + /* In case this timestamp is ready, we need to clear it. */ + if (!hw->reset_ongoing && (tstamp_ready & BIT_ULL(phy_idx))) + ice_clear_phy_tstamp(hw, tx->block, phy_idx); spin_lock(&tx->lock); - if (tx->tstamps[idx].skb) { - dev_kfree_skb_any(tx->tstamps[idx].skb); - tx->tstamps[idx].skb = NULL; - pf->ptp.tx_hwtstamp_flushed++; - } + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; clear_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); spin_unlock(&tx->lock); - /* Clear any potential residual timestamp in the PHY block */ - if (!pf->hw.reset_ongoing) - ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); + /* Count the number of Tx timestamps flushed */ + pf->ptp.tx_hwtstamp_flushed++; + + /* Free the SKB after we've cleared the bit */ + dev_kfree_skb_any(skb); } } /** + * ice_ptp_mark_tx_tracker_stale - Mark unfinished timestamps as stale + * @tx: the tracker to mark + * + * Mark currently outstanding Tx timestamps as stale. This prevents sending + * their timestamp value to the stack. This is required to prevent extending + * the 40bit hardware timestamp incorrectly. + * + * This should be called when the PTP clock is modified such as after a set + * time request. + */ +static void +ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx) +{ + spin_lock(&tx->lock); + bitmap_or(tx->stale, tx->stale, tx->in_use, tx->len); + spin_unlock(&tx->lock); +} + +/** * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker * @pf: Board private structure * @tx: Tx tracking structure to release @@ -774,7 +902,12 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static void ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + spin_lock(&tx->lock); tx->init = 0; + spin_unlock(&tx->lock); + + /* wait for potentially outstanding interrupt to complete */ + synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); ice_ptp_flush_tx_tracker(pf, tx); @@ -784,6 +917,9 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) bitmap_free(tx->in_use); tx->in_use = NULL; + bitmap_free(tx->stale); + tx->stale = NULL; + tx->len = 0; } @@ -801,9 +937,10 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static int ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) { - tx->quad = port / ICE_PORTS_PER_QUAD; - tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT; - tx->len = INDEX_PER_PORT; + tx->block = port / ICE_PORTS_PER_QUAD; + tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E822; + tx->len = INDEX_PER_PORT_E822; + tx->verify_cached = 0; return ice_ptp_alloc_tx_tracker(tx); } @@ -819,59 +956,19 @@ ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) static int ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) { - tx->quad = pf->hw.port_info->lport; - tx->quad_offset = 0; - tx->len = INDEX_PER_QUAD; + tx->block = pf->hw.port_info->lport; + tx->offset = 0; + tx->len = INDEX_PER_PORT_E810; + /* The E810 PHY does not provide a timestamp ready bitmap. Instead, + * verify new timestamps against cached copy of the last read + * timestamp. + */ + tx->verify_cached = 1; return ice_ptp_alloc_tx_tracker(tx); } /** - * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped - * @pf: pointer to the PF struct - * @tx: PTP Tx tracker to clean up - * - * Loop through the Tx timestamp requests and see if any of them have been - * waiting for a long time. Discard any SKBs that have been waiting for more - * than 2 seconds. This is long enough to be reasonably sure that the - * timestamp will never be captured. This might happen if the packet gets - * discarded before it reaches the PHY timestamping block. - */ -static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx) -{ - struct ice_hw *hw = &pf->hw; - u8 idx; - - if (!tx->init) - return; - - for_each_set_bit(idx, tx->in_use, tx->len) { - struct sk_buff *skb; - u64 raw_tstamp; - - /* Check if this SKB has been waiting for too long */ - if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) - continue; - - /* Read tstamp to be able to use this register again */ - ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, - &raw_tstamp); - - spin_lock(&tx->lock); - skb = tx->tstamps[idx].skb; - tx->tstamps[idx].skb = NULL; - clear_bit(idx, tx->in_use); - spin_unlock(&tx->lock); - - /* Count the number of Tx timestamps which have timed out */ - pf->ptp.tx_hwtstamp_timeouts++; - - /* Free the SKB after we've cleared the bit */ - dev_kfree_skb_any(skb); - } -} - -/** * ice_ptp_update_cached_phctime - Update the cached PHC time values * @pf: Board specific private structure * @@ -941,20 +1038,13 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf) * @pf: Board specific private structure * * This function must be called when the cached PHC time is no longer valid, - * such as after a time adjustment. It discards any outstanding Tx timestamps, - * and updates the cached PHC time for both the PF and Rx rings. If updating - * the PHC time cannot be done immediately, a warning message is logged and - * the work item is scheduled. - * - * These steps are required in order to ensure that we do not accidentally - * report a timestamp extended by the wrong PHC cached copy. Note that we - * do not directly update the cached timestamp here because it is possible - * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we - * would have to try again. During that time window, timestamps might be - * requested and returned with an invalid extension. Thus, on failure to - * immediately update the cached PHC time we would need to zero the value - * anyways. For this reason, we just zero the value immediately and queue the - * update work item. + * such as after a time adjustment. It marks any currently outstanding Tx + * timestamps as stale and updates the cached PHC time for both the PF and Rx + * rings. + * + * If updating the PHC time cannot be done immediately, a warning message is + * logged and the work item is scheduled immediately to minimize the window + * with a wrong cached timestamp. */ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) { @@ -978,8 +1068,12 @@ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) msecs_to_jiffies(10)); } - /* Flush any outstanding Tx timestamps */ - ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx); + /* Mark any outstanding timestamps as stale, since they might have + * been captured in hardware before the time update. This could lead + * to us extending them with the wrong cached value resulting in + * incorrect timestamp values. + */ + ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx); } /** @@ -1060,19 +1154,6 @@ static u64 ice_base_incval(struct ice_pf *pf) } /** - * ice_ptp_reset_ts_memory_quad - Reset timestamp memory for one quad - * @pf: The PF private data structure - * @quad: The quad (0-4) - */ -static void ice_ptp_reset_ts_memory_quad(struct ice_pf *pf, int quad) -{ - struct ice_hw *hw = &pf->hw; - - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); -} - -/** * ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state * @port: PTP port for which Tx FIFO is checked */ @@ -1124,7 +1205,7 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) dev_dbg(ice_pf_to_dev(pf), "Port %d Tx FIFO still not empty; resetting quad %d\n", port->port_num, quad); - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_reset_ts_memory_quad_e822(hw, quad); port->tx_fifo_busy_cnt = FIFO_OK; return 0; } @@ -1133,130 +1214,49 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) } /** - * ice_ptp_check_tx_offset_valid - Check if the Tx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Tx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_tx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - u32 val; - int err; - - err = ice_ptp_check_tx_fifo(port); - if (err) - return err; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_TX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read TX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return -EAGAIN; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_rx_offset_valid - Check if the Rx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Rx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_rx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_RX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read RX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return err; - } - - if (!(val & P_REG_RX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_offset_valid - Check port offset valid bit - * @port: Port for which offset valid bit is checked - * - * Returns 0 if both Tx and Rx offset are valid, and -EAGAIN if one of the - * offset is not ready. - */ -static int ice_ptp_check_offset_valid(struct ice_ptp_port *port) -{ - int tx_err, rx_err; - - /* always check both Tx and Rx offset validity */ - tx_err = ice_ptp_check_tx_offset_valid(port); - rx_err = ice_ptp_check_rx_offset_valid(port); - - if (tx_err || rx_err) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_wait_for_offset_valid - Check for valid Tx and Rx offsets + * ice_ptp_wait_for_offsets - Check for valid Tx and Rx offsets * @work: Pointer to the kthread_work structure for this task * - * Check whether both the Tx and Rx offsets are valid for enabling the vernier - * calibration. + * Check whether hardware has completed measuring the Tx and Rx offset values + * used to configure and enable vernier timestamp calibration. + * + * Once the offset in either direction is measured, configure the associated + * registers with the calibrated offset values and enable timestamping. The Tx + * and Rx directions are configured independently as soon as their associated + * offsets are known. * - * Once we have valid offsets from hardware, update the total Tx and Rx - * offsets, and exit bypass mode. This enables more precise timestamps using - * the extra data measured during the vernier calibration process. + * This function reschedules itself until both Tx and Rx calibration have + * completed. */ -static void ice_ptp_wait_for_offset_valid(struct kthread_work *work) +static void ice_ptp_wait_for_offsets(struct kthread_work *work) { struct ice_ptp_port *port; - int err; - struct device *dev; struct ice_pf *pf; struct ice_hw *hw; + int tx_err; + int rx_err; port = container_of(work, struct ice_ptp_port, ov_work.work); pf = ptp_port_to_pf(port); hw = &pf->hw; - dev = ice_pf_to_dev(pf); - if (ice_is_reset_in_progress(pf->state)) - return; - - if (ice_ptp_check_offset_valid(port)) { - /* Offsets not ready yet, try again later */ + if (ice_is_reset_in_progress(pf->state)) { + /* wait for device driver to complete reset */ kthread_queue_delayed_work(pf->ptp.kworker, &port->ov_work, msecs_to_jiffies(100)); return; } - /* Offsets are valid, so it is safe to exit bypass mode */ - err = ice_phy_exit_bypass_e822(hw, port->port_num); - if (err) { - dev_warn(dev, "Failed to exit bypass mode for PHY port %u, err %d\n", - port->port_num, err); + tx_err = ice_ptp_check_tx_fifo(port); + if (!tx_err) + tx_err = ice_phy_cfg_tx_offset_e822(hw, port->port_num); + rx_err = ice_phy_cfg_rx_offset_e822(hw, port->port_num); + if (tx_err || rx_err) { + /* Tx and/or Rx offset not yet configured, try again later */ + kthread_queue_delayed_work(pf->ptp.kworker, + &port->ov_work, + msecs_to_jiffies(100)); return; } } @@ -1317,16 +1317,20 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_cancel_delayed_work_sync(&ptp_port->ov_work); /* temporarily disable Tx timestamps while calibrating PHY offset */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = true; + spin_unlock(&ptp_port->tx.lock); ptp_port->tx_fifo_busy_cnt = 0; - /* Start the PHY timer in bypass mode */ - err = ice_start_phy_timer_e822(hw, port, true); + /* Start the PHY timer in Vernier mode */ + err = ice_start_phy_timer_e822(hw, port); if (err) goto out_unlock; /* Enable Tx timestamps right away */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = false; + spin_unlock(&ptp_port->tx.lock); kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); @@ -1341,45 +1345,33 @@ out_unlock: } /** - * ice_ptp_link_change - Set or clear port registers for timestamping + * ice_ptp_link_change - Reconfigure PTP after link status change * @pf: Board private structure * @port: Port for which the PHY start is set * @linkup: Link is up or down */ -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) { struct ice_ptp_port *ptp_port; - if (!test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - return 0; + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; - if (port >= ICE_NUM_EXTERNAL_PORTS) - return -EINVAL; + if (WARN_ON_ONCE(port >= ICE_NUM_EXTERNAL_PORTS)) + return; ptp_port = &pf->ptp.port; - if (ptp_port->port_num != port) - return -EINVAL; + if (WARN_ON_ONCE(ptp_port->port_num != port)) + return; - /* Update cached link err for this port immediately */ + /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; - if (!test_bit(ICE_FLAG_PTP, pf->flags)) - /* PTP is not setup */ - return -EAGAIN; - - return ice_ptp_port_phy_restart(ptp_port); -} - -/** - * ice_ptp_reset_ts_memory - Reset timestamp memory for all quads - * @pf: The PF private data structure - */ -static void ice_ptp_reset_ts_memory(struct ice_pf *pf) -{ - int quad; + /* E810 devices do not need to reconfigure the PHY */ + if (ice_is_e810(&pf->hw)) + return; - quad = pf->hw.port_info->lport / ICE_PORTS_PER_QUAD; - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_port_phy_restart(ptp_port); } /** @@ -1397,7 +1389,7 @@ static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold) int quad; u32 val; - ice_ptp_reset_ts_memory(pf); + ice_ptp_reset_ts_memory(hw); for (quad = 0; quad < ICE_MAX_QUAD; quad++) { err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, @@ -1447,24 +1439,10 @@ static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) { struct ice_pf *pf = ptp_info_to_pf(info); struct ice_hw *hw = &pf->hw; - u64 incval, diff; - int neg_adj = 0; + u64 incval; int err; - incval = ice_base_incval(pf); - - if (scaled_ppm < 0) { - neg_adj = 1; - scaled_ppm = -scaled_ppm; - } - - diff = mul_u64_u64_div_u64(incval, (u64)scaled_ppm, - 1000000ULL << 16); - if (neg_adj) - incval -= diff; - else - incval += diff; - + incval = adjust_by_scaled_ppm(ice_base_incval(pf), scaled_ppm); err = ice_ptp_write_incval_locked(hw, incval); if (err) { dev_err(ice_pf_to_dev(pf), "PTP failed to set incval, err %d\n", @@ -2346,11 +2324,14 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) { u8 idx; - /* Check if this tracker is initialized */ - if (!tx->init || tx->calibrating) + spin_lock(&tx->lock); + + /* Check that this tracker is accepting new timestamp requests */ + if (!ice_ptp_is_tx_tracker_up(tx)) { + spin_unlock(&tx->lock); return -1; + } - spin_lock(&tx->lock); /* Find and set the first available index */ idx = find_first_zero_bit(tx->in_use, tx->len); if (idx < tx->len) { @@ -2359,6 +2340,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) * requests. */ set_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); tx->tstamps[idx].start = jiffies; tx->tstamps[idx].skb = skb_get(skb); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; @@ -2373,7 +2355,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) if (idx >= tx->len) return -1; else - return idx + tx->quad_offset; + return idx + tx->offset; } /** @@ -2398,8 +2380,6 @@ static void ice_ptp_periodic_work(struct kthread_work *work) err = ice_ptp_update_cached_phctime(pf); - ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx); - /* Run twice a second or reschedule if phc update failed */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, msecs_to_jiffies(err ? 10 : 500)); @@ -2476,7 +2456,7 @@ pfr: err = ice_ptp_init_tx_e810(pf, &ptp->port.tx); } else { kthread_init_delayed_work(&ptp->port.ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); err = ice_ptp_init_tx_e822(pf, &ptp->port.tx, ptp->port.port_num); } @@ -2639,7 +2619,7 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) return ice_ptp_init_tx_e810(pf, &ptp_port->tx); kthread_init_delayed_work(&ptp_port->ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num); } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 028349295b71..9cda2f43e0e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -93,9 +93,14 @@ struct ice_perout_channel { * we discard old requests that were not fulfilled within a 2 second time * window. * Timestamp values in the PHY are read only and do not get cleared except at - * hardware reset or when a new timestamp value is captured. The cached_tstamp - * field is used to detect the case where a new timestamp has not yet been - * captured, ensuring that we avoid sending stale timestamp data to the stack. + * hardware reset or when a new timestamp value is captured. + * + * Some PHY types do not provide a "ready" bitmap indicating which timestamp + * indexes are valid. In these cases, we use a cached_tstamp to keep track of + * the last timestamp we read for a given index. If the current timestamp + * value is the same as the cached value, we assume a new timestamp hasn't + * been captured. This avoids reporting stale timestamps to the stack. This is + * only done if the verify_cached flag is set in ice_ptp_tx structure. */ struct ice_tx_tstamp { struct sk_buff *skb; @@ -105,30 +110,35 @@ struct ice_tx_tstamp { /** * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port - * @lock: lock to prevent concurrent write to in_use bitmap + * @lock: lock to prevent concurrent access to fields of this struct * @tstamps: array of len to store outstanding requests * @in_use: bitmap of len to indicate which slots are in use - * @quad: which quad the timestamps are captured in - * @quad_offset: offset into timestamp block of the quad to get the real index + * @stale: bitmap of len to indicate slots which have stale timestamps + * @block: which memory block (quad or port) the timestamps are captured in + * @offset: offset into timestamp block to get the real index * @len: length of the tstamps and in_use fields. * @init: if true, the tracker is initialized; * @calibrating: if true, the PHY is calibrating the Tx offset. During this * window, timestamps are temporarily disabled. + * @verify_cached: if true, verify new timestamp differs from last read value */ struct ice_ptp_tx { spinlock_t lock; /* lock protecting in_use bitmap */ struct ice_tx_tstamp *tstamps; unsigned long *in_use; - u8 quad; - u8 quad_offset; + unsigned long *stale; + u8 block; + u8 offset; u8 len; - u8 init; - u8 calibrating; + u8 init : 1; + u8 calibrating : 1; + u8 verify_cached : 1; }; /* Quad and port information for initializing timestamp blocks */ #define INDEX_PER_QUAD 64 -#define INDEX_PER_PORT (INDEX_PER_QUAD / ICE_PORTS_PER_QUAD) +#define INDEX_PER_PORT_E822 16 +#define INDEX_PER_PORT_E810 64 /** * struct ice_ptp_port - data used to initialize an external port for PTP @@ -256,7 +266,7 @@ void ice_ptp_reset(struct ice_pf *pf); void ice_ptp_prepare_for_reset(struct ice_pf *pf); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) { @@ -291,7 +301,8 @@ static inline void ice_ptp_reset(struct ice_pf *pf) { } static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } -static inline int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) -{ return 0; } +static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +{ +} #endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ #endif /* _ICE_PTP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 772b1f566d6e..a38614d21ea8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -656,6 +656,32 @@ ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx) } /** + * ice_ptp_reset_ts_memory_quad_e822 - Clear all timestamps from the quad block + * @hw: pointer to the HW struct + * @quad: the quad to read from + * + * Clear all timestamps from the PHY quad block that is shared between the + * internal PHYs on the E822 devices. + */ +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad) +{ + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); +} + +/** + * ice_ptp_reset_ts_memory_e822 - Clear all timestamps from all quad blocks + * @hw: pointer to the HW struct + */ +static void ice_ptp_reset_ts_memory_e822(struct ice_hw *hw) +{ + unsigned int quad; + + for (quad = 0; quad < ICE_MAX_QUAD; quad++) + ice_ptp_reset_ts_memory_quad_e822(hw, quad); +} + +/** * ice_read_cgu_reg_e822 - Read a CGU register * @hw: pointer to the HW struct * @addr: Register address to read @@ -1715,21 +1741,48 @@ ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * adjust Tx timestamps by. This is calculated by combining some known static * latency along with the Vernier offset computations done by hardware. * - * This function must be called only after the offset registers are valid, - * i.e. after the Vernier calibration wait has passed, to ensure that the PHY - * has measured the offset. + * This function will not return successfully until the Tx offset calculations + * have been completed, which requires waiting until at least one packet has + * been transmitted by the device. It is safe to call this function + * periodically until calibration succeeds, as it will only program the offset + * once. * * To avoid overflow, when calculating the offset based on the known static * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_TX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -1783,46 +1836,8 @@ static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_tx_offset_e822 - Configure Tx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Tx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_tx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd); - - /* Program the fixed Tx offset into the P_REG_TOTAL_TX_OFFSET_L - * register, then indicate that the Tx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by the Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Tx vernier offset calibration complete\n", + port); return 0; } @@ -2026,6 +2041,11 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * measurements taken in hardware with some data about known fixed delay as * well as adjusting for multi-lane alignment delay. * + * This function will not return successfully until the Rx offset calculations + * have been completed, which requires waiting until at least one packet has + * been received by the device. It is safe to call this function periodically + * until calibration succeeds, as it will only program the offset once. + * * This function must be called only after the offset registers are valid, * i.e. after the Vernier calibration wait has passed, to ensure that the PHY * has measured the offset. @@ -2034,13 +2054,38 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, pmd, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_RX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -2101,46 +2146,8 @@ static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_rx_offset_e822 - Configure fixed Rx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Rx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_rx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd); - - /* Program the fixed Rx offset into the P_REG_TOTAL_RX_OFFSET_L - * register, then indicate that the Rx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Rx vernier offset calibration complete\n", + port); return 0; } @@ -2323,20 +2330,14 @@ ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset) * ice_start_phy_timer_e822 - Start the PHY clock timer * @hw: pointer to the HW struct * @port: the PHY port to start - * @bypass: if true, start the PHY in bypass mode * * Start the clock of a PHY port. This must be done as part of the flow to * re-calibrate Tx and Rx timestamping offsets whenever the clock time is * initialized or when link speed changes. * - * Bypass mode enables timestamps immediately without waiting for Vernier - * calibration to complete. Hardware will still continue taking Vernier - * measurements on Tx or Rx of packets, but they will not be applied to - * timestamps. Use ice_phy_exit_bypass_e822 to exit bypass mode once hardware - * has completed offset calculation. + * Hardware will take Vernier measurements on Tx or Rx of packets. */ -int -ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) { u32 lo, hi, val; u64 incval; @@ -2414,110 +2415,42 @@ ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) if (err) return err; - if (bypass) { - val |= P_REG_PS_BYPASS_MODE_M; - /* Enter BYPASS mode, enabling timestamps immediately. */ - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); - if (err) - return err; - - /* Program the fixed Tx offset */ - err = ice_phy_cfg_fixed_tx_offset_e822(hw, port); - if (err) - return err; - - /* Program the fixed Rx offset */ - err = ice_phy_cfg_fixed_rx_offset_e822(hw, port); - if (err) - return err; - } - ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port); return 0; } /** - * ice_phy_exit_bypass_e822 - Exit bypass mode, after vernier calculations + * ice_get_phy_tx_tstamp_ready_e822 - Read Tx memory status register * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * After hardware finishes vernier calculations for the Tx and Rx offset, this - * function can be used to exit bypass mode by updating the total Tx and Rx - * offsets, and then disabling bypass. This will enable hardware to include - * the more precise offset calibrations, increasing precision of the generated - * timestamps. + * @quad: the timestamp quad to read from + * @tstamp_ready: contents of the Tx memory status register * - * This cannot be done until hardware has measured the offsets, which requires - * waiting until at least one packet has been sent and received by the device. + * Read the Q_REG_TX_MEMORY_STATUS register indicating which timestamps in + * the PHY are ready. A set bit means the corresponding timestamp is valid and + * ready to be captured from the PHY timestamp block. */ -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port) +static int +ice_get_phy_tx_tstamp_ready_e822(struct ice_hw *hw, u8 quad, u64 *tstamp_ready) { + u32 hi, lo; int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Tx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - - err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Rx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - err = ice_phy_cfg_tx_offset_e822(hw, port); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_U, &hi); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Tx offset for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_U for quad %u, err %d\n", + quad, err); return err; } - err = ice_phy_cfg_rx_offset_e822(hw, port); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_L, &lo); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Rx offset for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_L for quad %u, err %d\n", + quad, err); return err; } - /* Exit bypass mode now that the offset has been updated */ - err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read P_REG_PS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_PS_BYPASS_MODE_M)) - ice_debug(hw, ICE_DBG_PTP, "Port %u not in bypass mode\n", - port); - - val &= ~P_REG_PS_BYPASS_MODE_M; - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to disable bypass for port %u, err %d\n", - port, err); - return err; - } - - dev_info(ice_hw_to_dev(hw), "Exiting bypass mode on PHY port %u\n", - port); + *tstamp_ready = (u64)hi << 32 | (u64)lo; return 0; } @@ -2963,16 +2896,18 @@ bool ice_ptp_lock(struct ice_hw *hw) u32 hw_lock; int i; -#define MAX_TRIES 5 +#define MAX_TRIES 15 for (i = 0; i < MAX_TRIES; i++) { hw_lock = rd32(hw, PFTSYN_SEM + (PFTSYN_SEM_BYTES * hw->pf_id)); hw_lock = hw_lock & PFTSYN_SEM_BUSY_M; - if (!hw_lock) - break; + if (hw_lock) { + /* Somebody is holding the lock */ + usleep_range(5000, 6000); + continue; + } - /* Somebody is holding the lock */ - usleep_range(10000, 20000); + break; } return !hw_lock; @@ -3194,6 +3129,22 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) return ice_clear_phy_tstamp_e822(hw, block, idx); } +/** + * ice_get_phy_tx_tstamp_ready_e810 - Read Tx memory status register + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @tstamp_ready: contents of the Tx memory status register + * + * E810 devices do not use a Tx memory status register. Instead simply + * indicate that all timestamps are currently ready. + */ +static int +ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready) +{ + *tstamp_ready = 0xFFFFFFFFFFFFFFFF; + return 0; +} + /* E810T SMA functions * * The following functions operate specifically on E810T hardware and are used @@ -3377,6 +3328,18 @@ bool ice_is_pca9575_present(struct ice_hw *hw) } /** + * ice_ptp_reset_ts_memory - Reset timestamp memory for all blocks + * @hw: pointer to the HW struct + */ +void ice_ptp_reset_ts_memory(struct ice_hw *hw) +{ + if (ice_is_e810(hw)) + return; + + ice_ptp_reset_ts_memory_e822(hw); +} + +/** * ice_ptp_init_phc - Initialize PTP hardware clock * @hw: pointer to the HW struct * @@ -3397,3 +3360,24 @@ int ice_ptp_init_phc(struct ice_hw *hw) else return ice_ptp_init_phc_e822(hw); } + +/** + * ice_get_phy_tx_tstamp_ready - Read PHY Tx memory status indication + * @hw: pointer to the HW struct + * @block: the timestamp block to check + * @tstamp_ready: storage for the PHY Tx memory status information + * + * Check the PHY for Tx timestamp memory status. This reports a 64 bit value + * which indicates which timestamps in the block may be captured. A set bit + * means the timestamp can be read. An unset bit means the timestamp is not + * ready and software should avoid reading the register. + */ +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) +{ + if (ice_is_e810(hw)) + return ice_get_phy_tx_tstamp_ready_e810(hw, block, + tstamp_ready); + else + return ice_get_phy_tx_tstamp_ready_e822(hw, block, + tstamp_ready); +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 2bda64c76abc..3b68cb91bd81 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -133,7 +133,9 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval); int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj); int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp); int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); +void ice_ptp_reset_ts_memory(struct ice_hw *hw); int ice_ptp_init_phc(struct ice_hw *hw); +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready); /* E822 family functions */ int ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val); @@ -141,6 +143,7 @@ int ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val); int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val); int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val); int ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time); +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad); /** * ice_e822_time_ref - Get the current TIME_REF from capabilities @@ -184,8 +187,9 @@ static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref) /* E822 Vernier calibration functions */ int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset); -int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass); -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port); +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port); /* E810 family functions */ int ice_ptp_init_phy_e810(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index bd31748aae1b..fd1f8b0ad0ab 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -6,6 +6,7 @@ #include "ice_devlink.h" #include "ice_sriov.h" #include "ice_tc_lib.h" +#include "ice_dcb_lib.h" /** * ice_repr_get_sw_port_id - get port ID associated with representor @@ -134,14 +135,6 @@ static int ice_repr_stop(struct net_device *netdev) return 0; } -static struct devlink_port * -ice_repr_get_devlink_port(struct net_device *netdev) -{ - struct ice_repr *repr = ice_netdev_to_repr(netdev); - - return &repr->vf->devlink_port; -} - /** * ice_repr_sp_stats64 - get slow path stats for port representor * @dev: network interface device structure @@ -163,18 +156,20 @@ ice_repr_sp_stats64(const struct net_device *dev, u64 pkts, bytes; tx_ring = np->vsi->tx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&tx_ring->syncp, tx_ring->stats, + ice_fetch_u64_stats_per_ring(&tx_ring->ring_stats->syncp, + tx_ring->ring_stats->stats, &pkts, &bytes); stats->rx_packets = pkts; stats->rx_bytes = bytes; rx_ring = np->vsi->rx_rings[vf_id]; - ice_fetch_u64_stats_per_ring(&rx_ring->syncp, rx_ring->stats, + ice_fetch_u64_stats_per_ring(&rx_ring->ring_stats->syncp, + rx_ring->ring_stats->stats, &pkts, &bytes); stats->tx_packets = pkts; stats->tx_bytes = bytes; - stats->tx_dropped = rx_ring->rx_stats.alloc_page_failed + - rx_ring->rx_stats.alloc_buf_failed; + stats->tx_dropped = rx_ring->ring_stats->rx_stats.alloc_page_failed + + rx_ring->ring_stats->rx_stats.alloc_buf_failed; return 0; } @@ -250,7 +245,6 @@ static const struct net_device_ops ice_repr_netdev_ops = { .ndo_open = ice_repr_open, .ndo_stop = ice_repr_stop, .ndo_start_xmit = ice_eswitch_port_start_xmit, - .ndo_get_devlink_port = ice_repr_get_devlink_port, .ndo_setup_tc = ice_repr_setup_tc, .ndo_has_offload_stats = ice_repr_ndo_has_offload_stats, .ndo_get_offload_stats = ice_repr_ndo_get_offload_stats, @@ -339,12 +333,11 @@ static int ice_repr_add(struct ice_vf *vf) repr->netdev->max_mtu = ICE_MAX_MTU; SET_NETDEV_DEV(repr->netdev, ice_pf_to_dev(vf->pf)); + SET_NETDEV_DEVLINK_PORT(repr->netdev, &vf->devlink_port); err = ice_repr_reg_netdev(repr->netdev); if (err) goto err_netdev; - devlink_port_type_eth_set(&vf->devlink_port, repr->netdev); - ice_virtchnl_set_repr_ops(vf); return 0; @@ -399,6 +392,7 @@ static void ice_repr_rem(struct ice_vf *vf) */ void ice_repr_rem_from_all_vfs(struct ice_pf *pf) { + struct devlink *devlink; struct ice_vf *vf; unsigned int bkt; @@ -406,6 +400,14 @@ void ice_repr_rem_from_all_vfs(struct ice_pf *pf) ice_for_each_vf(pf, bkt, vf) ice_repr_rem(vf); + + /* since all port representors are destroyed, there is + * no point in keeping the nodes + */ + devlink = priv_to_devlink(pf); + devl_lock(devlink); + devl_rate_nodes_destroy(devlink); + devl_unlock(devlink); } /** @@ -414,6 +416,7 @@ void ice_repr_rem_from_all_vfs(struct ice_pf *pf) */ int ice_repr_add_for_all_vfs(struct ice_pf *pf) { + struct devlink *devlink; struct ice_vf *vf; unsigned int bkt; int err; @@ -426,6 +429,13 @@ int ice_repr_add_for_all_vfs(struct ice_pf *pf) goto err; } + /* only export if ADQ and DCB disabled */ + if (ice_is_adq_active(pf) || ice_is_dcb_active(pf)) + return 0; + + devlink = priv_to_devlink(pf); + ice_devlink_rate_init_tx_topology(devlink, ice_get_main_vsi(pf)); + return 0; err: diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 118595763bba..6d08b397df2a 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2018, Intel Corporation. */ +#include <net/devlink.h> #include "ice_sched.h" /** @@ -142,12 +143,14 @@ ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, * @pi: port information structure * @layer: Scheduler layer of the node * @info: Scheduler element information from firmware + * @prealloc_node: preallocated ice_sched_node struct for SW DB * * This function inserts a scheduler node to the SW DB. */ int ice_sched_add_node(struct ice_port_info *pi, u8 layer, - struct ice_aqc_txsched_elem_data *info) + struct ice_aqc_txsched_elem_data *info, + struct ice_sched_node *prealloc_node) { struct ice_aqc_txsched_elem_data elem; struct ice_sched_node *parent; @@ -176,7 +179,10 @@ ice_sched_add_node(struct ice_port_info *pi, u8 layer, if (status) return status; - node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL); + if (prealloc_node) + node = prealloc_node; + else + node = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; if (hw->max_children[layer]) { @@ -355,6 +361,9 @@ void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node) /* leaf nodes have no children */ if (node->children) devm_kfree(ice_hw_to_dev(hw), node->children); + + kfree(node->name); + xa_erase(&pi->sched_node_ids, node->id); devm_kfree(ice_hw_to_dev(hw), node); } @@ -872,13 +881,15 @@ void ice_sched_cleanup_all(struct ice_hw *hw) * @num_nodes: number of nodes * @num_nodes_added: pointer to num nodes added * @first_node_teid: if new nodes are added then return the TEID of first node + * @prealloc_nodes: preallocated nodes struct for software DB * * This function add nodes to HW as well as to SW DB for a given layer */ -static int +int ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, struct ice_sched_node *parent, u8 layer, u16 num_nodes, - u16 *num_nodes_added, u32 *first_node_teid) + u16 *num_nodes_added, u32 *first_node_teid, + struct ice_sched_node **prealloc_nodes) { struct ice_sched_node *prev, *new_node; struct ice_aqc_add_elem *buf; @@ -924,7 +935,11 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, *num_nodes_added = num_nodes; /* add nodes to the SW DB */ for (i = 0; i < num_nodes; i++) { - status = ice_sched_add_node(pi, layer, &buf->generic[i]); + if (prealloc_nodes) + status = ice_sched_add_node(pi, layer, &buf->generic[i], prealloc_nodes[i]); + else + status = ice_sched_add_node(pi, layer, &buf->generic[i], NULL); + if (status) { ice_debug(hw, ICE_DBG_SCHED, "add nodes in SW DB failed status =%d\n", status); @@ -940,6 +955,22 @@ ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, new_node->sibling = NULL; new_node->tc_num = tc_node->tc_num; + new_node->tx_weight = ICE_SCHED_DFLT_BW_WT; + new_node->tx_share = ICE_SCHED_DFLT_BW; + new_node->tx_max = ICE_SCHED_DFLT_BW; + new_node->name = kzalloc(SCHED_NODE_NAME_MAX_LEN, GFP_KERNEL); + if (!new_node->name) + return -ENOMEM; + + status = xa_alloc(&pi->sched_node_ids, &new_node->id, NULL, XA_LIMIT(0, UINT_MAX), + GFP_KERNEL); + if (status) { + ice_debug(hw, ICE_DBG_SCHED, "xa_alloc failed for sched node status =%d\n", + status); + break; + } + + snprintf(new_node->name, SCHED_NODE_NAME_MAX_LEN, "node_%u", new_node->id); /* add it to previous node sibling pointer */ /* Note: siblings are not linked across branches */ @@ -1003,7 +1034,7 @@ ice_sched_add_nodes_to_hw_layer(struct ice_port_info *pi, } return ice_sched_add_elems(pi, tc_node, parent, layer, num_nodes, - num_nodes_added, first_node_teid); + num_nodes_added, first_node_teid, NULL); } /** @@ -1268,7 +1299,7 @@ int ice_sched_init_port(struct ice_port_info *pi) ICE_AQC_ELEM_TYPE_ENTRY_POINT) hw->sw_entry_point_layer = j; - status = ice_sched_add_node(pi, j, &buf[i].generic[j]); + status = ice_sched_add_node(pi, j, &buf[i].generic[j], NULL); if (status) goto err_init_port; } @@ -2154,7 +2185,7 @@ ice_sched_get_free_vsi_parent(struct ice_hw *hw, struct ice_sched_node *node, * This function removes the child from the old parent and adds it to a new * parent */ -static void +void ice_sched_update_parent(struct ice_sched_node *new_parent, struct ice_sched_node *node) { @@ -2188,7 +2219,7 @@ ice_sched_update_parent(struct ice_sched_node *new_parent, * * This function move the child nodes to a given parent. */ -static int +int ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent, u16 num_items, u32 *list) { @@ -3560,7 +3591,7 @@ ice_sched_set_eir_srl_excl(struct ice_port_info *pi, * node's RL profile ID of type CIR, EIR, or SRL, and removes old profile * ID from local database. The caller needs to hold scheduler lock. */ -static int +int ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, enum ice_rl_type rl_type, u32 bw, u8 layer_num) { @@ -3597,6 +3628,57 @@ ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, } /** + * ice_sched_set_node_priority - set node's priority + * @pi: port information structure + * @node: tree node + * @priority: number 0-7 representing priority among siblings + * + * This function sets priority of a node among it's siblings. + */ +int +ice_sched_set_node_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u16 priority) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + + buf = node->info; + data = &buf.data; + + data->valid_sections |= ICE_AQC_ELEM_VALID_GENERIC; + data->generic |= FIELD_PREP(ICE_AQC_ELEM_GENERIC_PRIO_M, priority); + + return ice_sched_update_elem(pi->hw, node, &buf); +} + +/** + * ice_sched_set_node_weight - set node's weight + * @pi: port information structure + * @node: tree node + * @weight: number 1-200 representing weight for WFQ + * + * This function sets weight of the node for WFQ algorithm. + */ +int +ice_sched_set_node_weight(struct ice_port_info *pi, struct ice_sched_node *node, u16 weight) +{ + struct ice_aqc_txsched_elem_data buf; + struct ice_aqc_txsched_elem *data; + + buf = node->info; + data = &buf.data; + + data->valid_sections = ICE_AQC_ELEM_VALID_CIR | ICE_AQC_ELEM_VALID_EIR | + ICE_AQC_ELEM_VALID_GENERIC; + data->cir_bw.bw_alloc = cpu_to_le16(weight); + data->eir_bw.bw_alloc = cpu_to_le16(weight); + + data->generic |= FIELD_PREP(ICE_AQC_ELEM_GENERIC_SP_M, 0x0); + + return ice_sched_update_elem(pi->hw, node, &buf); +} + +/** * ice_sched_set_node_bw_lmt - set node's BW limit * @pi: port information structure * @node: tree node @@ -3606,7 +3688,7 @@ ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, * It updates node's BW limit parameters like BW RL profile ID of type CIR, * EIR, or SRL. The caller needs to hold scheduler lock. */ -static int +int ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node, enum ice_rl_type rl_type, u32 bw) { diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index 4f91577fed56..9c100747445a 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -6,6 +6,8 @@ #include "ice_common.h" +#define SCHED_NODE_NAME_MAX_LEN 32 + #define ICE_QGRP_LAYER_OFFSET 2 #define ICE_VSI_LAYER_OFFSET 4 #define ICE_AGG_LAYER_OFFSET 6 @@ -69,6 +71,29 @@ int ice_aq_query_sched_elems(struct ice_hw *hw, u16 elems_req, struct ice_aqc_txsched_elem_data *buf, u16 buf_size, u16 *elems_ret, struct ice_sq_cd *cd); + +int +ice_sched_set_node_bw_lmt(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw); + +int +ice_sched_set_node_bw(struct ice_port_info *pi, struct ice_sched_node *node, + enum ice_rl_type rl_type, u32 bw, u8 layer_num); + +int +ice_sched_add_elems(struct ice_port_info *pi, struct ice_sched_node *tc_node, + struct ice_sched_node *parent, u8 layer, u16 num_nodes, + u16 *num_nodes_added, u32 *first_node_teid, + struct ice_sched_node **prealloc_node); + +int +ice_sched_move_nodes(struct ice_port_info *pi, struct ice_sched_node *parent, + u16 num_items, u32 *list); + +int ice_sched_set_node_priority(struct ice_port_info *pi, struct ice_sched_node *node, + u16 priority); +int ice_sched_set_node_weight(struct ice_port_info *pi, struct ice_sched_node *node, u16 weight); + int ice_sched_init_port(struct ice_port_info *pi); int ice_sched_query_res_alloc(struct ice_hw *hw); void ice_sched_get_psm_clk_freq(struct ice_hw *hw); @@ -81,7 +106,11 @@ struct ice_sched_node * ice_sched_find_node_by_teid(struct ice_sched_node *start_node, u32 teid); int ice_sched_add_node(struct ice_port_info *pi, u8 layer, - struct ice_aqc_txsched_elem_data *info); + struct ice_aqc_txsched_elem_data *info, + struct ice_sched_node *prealloc_node); +void +ice_sched_update_parent(struct ice_sched_node *new_parent, + struct ice_sched_node *node); void ice_free_sched_node(struct ice_port_info *pi, struct ice_sched_node *node); struct ice_sched_node *ice_sched_get_tc_node(struct ice_port_info *pi, u8 tc); struct ice_sched_node * diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c index f68c555be4e9..faba0f857cd9 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -724,7 +724,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) */ fltr->rid = rule_added.rid; fltr->rule_id = rule_added.rule_id; - fltr->dest_id = rule_added.vsi_handle; + fltr->dest_vsi_handle = rule_added.vsi_handle; exit: kfree(list); @@ -732,6 +732,116 @@ exit: } /** + * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action) + * @vsi: Pointer to VSI + * @tc_fltr: Pointer to tc_flower_filter + * + * Locate the VSI using specified queue. When ADQ is not enabled, always + * return input VSI, otherwise locate corresponding VSI based on per channel + * offset and qcount + */ +static struct ice_vsi * +ice_locate_vsi_using_queue(struct ice_vsi *vsi, + struct ice_tc_flower_fltr *tc_fltr) +{ + int num_tc, tc, queue; + + /* if ADQ is not active, passed VSI is the candidate VSI */ + if (!ice_is_adq_active(vsi->back)) + return vsi; + + /* Locate the VSI (it could still be main PF VSI or CHNL_VSI depending + * upon queue number) + */ + num_tc = vsi->mqprio_qopt.qopt.num_tc; + queue = tc_fltr->action.fwd.q.queue; + + for (tc = 0; tc < num_tc; tc++) { + int qcount = vsi->mqprio_qopt.qopt.count[tc]; + int offset = vsi->mqprio_qopt.qopt.offset[tc]; + + if (queue >= offset && queue < offset + qcount) { + /* for non-ADQ TCs, passed VSI is the candidate VSI */ + if (tc < ICE_CHNL_START_TC) + return vsi; + else + return vsi->tc_map_vsi[tc]; + } + } + return NULL; +} + +static struct ice_rx_ring * +ice_locate_rx_ring_using_queue(struct ice_vsi *vsi, + struct ice_tc_flower_fltr *tc_fltr) +{ + u16 queue = tc_fltr->action.fwd.q.queue; + + return queue < vsi->num_rxq ? vsi->rx_rings[queue] : NULL; +} + +/** + * ice_tc_forward_action - Determine destination VSI and queue for the action + * @vsi: Pointer to VSI + * @tc_fltr: Pointer to TC flower filter structure + * + * Validates the tc forward action and determines the destination VSI and queue + * for the forward action. + */ +static struct ice_vsi * +ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr) +{ + struct ice_rx_ring *ring = NULL; + struct ice_vsi *ch_vsi = NULL; + struct ice_pf *pf = vsi->back; + struct device *dev; + u32 tc_class; + + dev = ice_pf_to_dev(pf); + + /* Get the destination VSI and/or destination queue and validate them */ + switch (tc_fltr->action.fltr_act) { + case ICE_FWD_TO_VSI: + tc_class = tc_fltr->action.fwd.tc.tc_class; + /* Select the destination VSI */ + if (tc_class < ICE_CHNL_START_TC) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter because of unsupported destination"); + return ERR_PTR(-EOPNOTSUPP); + } + /* Locate ADQ VSI depending on hw_tc number */ + ch_vsi = vsi->tc_map_vsi[tc_class]; + break; + case ICE_FWD_TO_Q: + /* Locate the Rx queue */ + ring = ice_locate_rx_ring_using_queue(vsi, tc_fltr); + if (!ring) { + dev_err(dev, + "Unable to locate Rx queue for action fwd_to_queue: %u\n", + tc_fltr->action.fwd.q.queue); + return ERR_PTR(-EINVAL); + } + /* Determine destination VSI even though the action is + * FWD_TO_QUEUE, because QUEUE is associated with VSI + */ + ch_vsi = tc_fltr->dest_vsi; + break; + default: + dev_err(dev, + "Unable to add filter because of unsupported action %u (supported actions: fwd to tc, fwd to queue)\n", + tc_fltr->action.fltr_act); + return ERR_PTR(-EINVAL); + } + /* Must have valid ch_vsi (it could be main VSI or ADQ VSI) */ + if (!ch_vsi) { + dev_err(dev, + "Unable to add filter because specified destination VSI doesn't exist\n"); + return ERR_PTR(-EINVAL); + } + return ch_vsi; +} + +/** * ice_add_tc_flower_adv_fltr - add appropriate filter rules * @vsi: Pointer to VSI * @tc_fltr: Pointer to TC flower filter structure @@ -772,11 +882,10 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, return -EOPNOTSUPP; } - /* get the channel (aka ADQ VSI) */ - if (tc_fltr->dest_vsi) - ch_vsi = tc_fltr->dest_vsi; - else - ch_vsi = vsi->tc_map_vsi[tc_fltr->action.tc_class]; + /* validate forwarding action VSI and queue */ + ch_vsi = ice_tc_forward_action(vsi, tc_fltr); + if (IS_ERR(ch_vsi)) + return PTR_ERR(ch_vsi); lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr); list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); @@ -790,30 +899,40 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, } rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act; - if (tc_fltr->action.tc_class >= ICE_CHNL_START_TC) { - if (!ch_vsi) { - NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because specified destination doesn't exist"); - ret = -EINVAL; - goto exit; - } + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = tc_fltr->cookie; - rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + switch (tc_fltr->action.fltr_act) { + case ICE_FWD_TO_VSI: rule_info.sw_act.vsi_handle = ch_vsi->idx; - rule_info.priority = 7; + rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; rule_info.sw_act.src = hw->pf_id; rule_info.rx = true; dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n", - tc_fltr->action.tc_class, + tc_fltr->action.fwd.tc.tc_class, rule_info.sw_act.vsi_handle, lkups_cnt); - } else { + break; + case ICE_FWD_TO_Q: + /* HW queue number in global space */ + rule_info.sw_act.fwd_id.q_id = tc_fltr->action.fwd.q.hw_queue; + rule_info.sw_act.vsi_handle = ch_vsi->idx; + rule_info.priority = ICE_SWITCH_FLTR_PRIO_QUEUE; + rule_info.sw_act.src = hw->pf_id; + rule_info.rx = true; + dev_dbg(dev, "add switch rule action to forward to queue:%u (HW queue %u), lkups_cnt:%u\n", + tc_fltr->action.fwd.q.queue, + tc_fltr->action.fwd.q.hw_queue, lkups_cnt); + break; + default: rule_info.sw_act.flag |= ICE_FLTR_TX; + /* In case of Tx (LOOKUP_TX), src needs to be src VSI */ rule_info.sw_act.src = vsi->idx; + /* 'Rx' is false, direction of rule(LOOKUPTRX) */ rule_info.rx = false; + rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI; + break; } - /* specify the cookie as filter_rule_id */ - rule_info.fltr_rule_id = tc_fltr->cookie; - ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); if (ret == -EEXIST) { NL_SET_ERR_MSG_MOD(tc_fltr->extack, @@ -831,19 +950,14 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, */ tc_fltr->rid = rule_added.rid; tc_fltr->rule_id = rule_added.rule_id; - if (tc_fltr->action.tc_class > 0 && ch_vsi) { - /* For PF ADQ, VSI type is set as ICE_VSI_CHNL, and - * for PF ADQ filter, it is not yet set in tc_fltr, - * hence store the dest_vsi ptr in tc_fltr - */ - if (ch_vsi->type == ICE_VSI_CHNL) - tc_fltr->dest_vsi = ch_vsi; + tc_fltr->dest_vsi_handle = rule_added.vsi_handle; + if (tc_fltr->action.fltr_act == ICE_FWD_TO_VSI || + tc_fltr->action.fltr_act == ICE_FWD_TO_Q) { + tc_fltr->dest_vsi = ch_vsi; /* keep track of advanced switch filter for - * destination VSI (channel VSI) + * destination VSI */ ch_vsi->num_chnl_fltr++; - /* in this case, dest_id is VSI handle (sw handle) */ - tc_fltr->dest_id = rule_added.vsi_handle; /* keeps track of channel filters for PF VSI */ if (vsi->type == ICE_VSI_PF && @@ -851,10 +965,22 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, ICE_TC_FLWR_FIELD_ENC_DST_MAC))) pf->num_dmac_chnl_fltrs++; } - dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x) for TC %u, rid %u, rule_id %u, vsi_idx %u\n", - lkups_cnt, flags, - tc_fltr->action.tc_class, rule_added.rid, - rule_added.rule_id, rule_added.vsi_handle); + switch (tc_fltr->action.fltr_act) { + case ICE_FWD_TO_VSI: + dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x), action is forward to TC %u, rid %u, rule_id %u, vsi_idx %u\n", + lkups_cnt, flags, + tc_fltr->action.fwd.tc.tc_class, rule_added.rid, + rule_added.rule_id, rule_added.vsi_handle); + break; + case ICE_FWD_TO_Q: + dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x), action is forward to queue: %u (HW queue %u) , rid %u, rule_id %u\n", + lkups_cnt, flags, tc_fltr->action.fwd.q.queue, + tc_fltr->action.fwd.q.hw_queue, rule_added.rid, + rule_added.rule_id); + break; + default: + break; + } exit: kfree(list); return ret; @@ -1455,43 +1581,15 @@ ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) } /** - * ice_handle_tclass_action - Support directing to a traffic class + * ice_prep_adq_filter - Prepare ADQ filter with the required additional headers * @vsi: Pointer to VSI - * @cls_flower: Pointer to TC flower offload structure * @fltr: Pointer to TC flower filter structure * - * Support directing traffic to a traffic class + * Prepare ADQ filter with the required additional header fields */ static int -ice_handle_tclass_action(struct ice_vsi *vsi, - struct flow_cls_offload *cls_flower, - struct ice_tc_flower_fltr *fltr) +ice_prep_adq_filter(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) { - int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); - struct ice_vsi *main_vsi; - - if (tc < 0) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid"); - return -EINVAL; - } - if (!tc) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of invalid destination"); - return -EINVAL; - } - - if (!(vsi->all_enatc & BIT(tc))) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination"); - return -EINVAL; - } - - /* Redirect to a TC class or Queue Group */ - main_vsi = ice_get_main_vsi(vsi->back); - if (!main_vsi || !main_vsi->netdev) { - NL_SET_ERR_MSG_MOD(fltr->extack, - "Unable to add filter because of invalid netdevice"); - return -EINVAL; - } - if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) && (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | ICE_TC_FLWR_FIELD_SRC_MAC))) { @@ -1503,9 +1601,8 @@ ice_handle_tclass_action(struct ice_vsi *vsi, /* For ADQ, filter must include dest MAC address, otherwise unwanted * packets with unrelated MAC address get delivered to ADQ VSIs as long * as remaining filter criteria is satisfied such as dest IP address - * and dest/src L4 port. Following code is trying to handle: - * 1. For non-tunnel, if user specify MAC addresses, use them (means - * this code won't do anything + * and dest/src L4 port. Below code handles the following cases: + * 1. For non-tunnel, if user specify MAC addresses, use them. * 2. For non-tunnel, if user didn't specify MAC address, add implicit * dest MAC to be lower netdev's active unicast MAC address * 3. For tunnel, as of now TC-filter through flower classifier doesn't @@ -1528,35 +1625,97 @@ ice_handle_tclass_action(struct ice_vsi *vsi, eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac); } - /* validate specified dest MAC address, make sure either it belongs to - * lower netdev or any of MACVLAN. MACVLANs MAC address are added as - * unicast MAC filter destined to main VSI. - */ - if (!ice_mac_fltr_exist(&main_vsi->back->hw, - fltr->outer_headers.l2_key.dst_mac, - main_vsi->idx)) { - NL_SET_ERR_MSG_MOD(fltr->extack, - "Unable to add filter because legacy MAC filter for specified destination doesn't exist"); - return -EINVAL; - } - /* Make sure VLAN is already added to main VSI, before allowing ADQ to * add a VLAN based filter such as MAC + VLAN + L4 port. */ if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) { u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id); - if (!ice_vlan_fltr_exist(&main_vsi->back->hw, vlan_id, - main_vsi->idx)) { + if (!ice_vlan_fltr_exist(&vsi->back->hw, vlan_id, vsi->idx)) { NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because legacy VLAN filter for specified destination doesn't exist"); return -EINVAL; } } + return 0; +} + +/** + * ice_handle_tclass_action - Support directing to a traffic class + * @vsi: Pointer to VSI + * @cls_flower: Pointer to TC flower offload structure + * @fltr: Pointer to TC flower filter structure + * + * Support directing traffic to a traffic class/queue-set + */ +static int +ice_handle_tclass_action(struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, + struct ice_tc_flower_fltr *fltr) +{ + int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + + /* user specified hw_tc (must be non-zero for ADQ TC), action is forward + * to hw_tc (i.e. ADQ channel number) + */ + if (tc < ICE_CHNL_START_TC) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because of unsupported destination"); + return -EOPNOTSUPP; + } + if (!(vsi->all_enatc & BIT(tc))) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because of non-existence destination"); + return -EINVAL; + } fltr->action.fltr_act = ICE_FWD_TO_VSI; - fltr->action.tc_class = tc; + fltr->action.fwd.tc.tc_class = tc; - return 0; + return ice_prep_adq_filter(vsi, fltr); +} + +static int +ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, + struct flow_action_entry *act) +{ + struct ice_vsi *ch_vsi = NULL; + u16 queue = act->rx_queue; + + if (queue > vsi->num_rxq) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because specified queue is invalid"); + return -EINVAL; + } + fltr->action.fltr_act = ICE_FWD_TO_Q; + fltr->action.fwd.q.queue = queue; + /* determine corresponding HW queue */ + fltr->action.fwd.q.hw_queue = vsi->rxq_map[queue]; + + /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare + * ADQ switch filter + */ + ch_vsi = ice_locate_vsi_using_queue(vsi, fltr); + if (!ch_vsi) + return -EINVAL; + fltr->dest_vsi = ch_vsi; + if (!ice_is_chnl_fltr(fltr)) + return 0; + + return ice_prep_adq_filter(vsi, fltr); +} + +static int +ice_tc_parse_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr, + struct flow_action_entry *act) +{ + switch (act->id) { + case FLOW_ACTION_RX_QUEUE_MAPPING: + /* forward to queue */ + return ice_tc_forward_to_queue(vsi, fltr, act); + default: + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported TC action"); + return -EOPNOTSUPP; + } } /** @@ -1575,7 +1734,7 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi, struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower); struct flow_action *flow_action = &rule->action; struct flow_action_entry *act; - int i; + int i, err; if (cls_flower->classid) return ice_handle_tclass_action(vsi, cls_flower, fltr); @@ -1584,21 +1743,13 @@ ice_parse_tc_flower_actions(struct ice_vsi *vsi, return -EINVAL; flow_action_for_each(i, act, flow_action) { - if (ice_is_eswitch_mode_switchdev(vsi->back)) { - int err = ice_eswitch_tc_parse_action(fltr, act); - - if (err) - return err; - continue; - } - /* Allow only one rule per filter */ - - /* Drop action */ - if (act->id == FLOW_ACTION_DROP) { - NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action DROP"); - return -EINVAL; - } - fltr->action.fltr_act = ICE_FWD_TO_VSI; + if (ice_is_eswitch_mode_switchdev(vsi->back)) + err = ice_eswitch_tc_parse_action(fltr, act); + else + err = ice_tc_parse_action(vsi, fltr, act); + if (err) + return err; + continue; } return 0; } @@ -1618,7 +1769,7 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) rule_rem.rid = fltr->rid; rule_rem.rule_id = fltr->rule_id; - rule_rem.vsi_handle = fltr->dest_id; + rule_rem.vsi_handle = fltr->dest_vsi_handle; err = ice_rem_adv_rule_by_id(&pf->hw, &rule_rem); if (err) { if (err == -ENOENT) { diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h index 92642faad595..d916d1e92aa3 100644 --- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -45,7 +45,20 @@ struct ice_indr_block_priv { }; struct ice_tc_flower_action { - u32 tc_class; + /* forward action specific params */ + union { + struct { + u32 tc_class; /* forward to hw_tc */ + u32 rsvd; + } tc; + struct { + u16 queue; /* forward to queue */ + /* To add filter in HW, absolute queue number in global + * space of queues (between 0...N) is needed + */ + u16 hw_queue; + } q; + } fwd; enum ice_sw_fwd_act_type fltr_act; }; @@ -131,11 +144,11 @@ struct ice_tc_flower_fltr { */ u16 rid; u16 rule_id; - /* this could be queue/vsi_idx (sw handle)/queue_group, depending upon - * destination type + /* VSI handle of the destination VSI (it could be main PF VSI, CHNL_VSI, + * VF VSI) */ - u16 dest_id; - /* if dest_id is vsi_idx, then need to store destination VSI ptr */ + u16 dest_vsi_handle; + /* ptr to destination VSI */ struct ice_vsi *dest_vsi; /* direction of fltr for eswitch use case */ enum ice_eswitch_fltr_direction direction; @@ -162,12 +175,23 @@ struct ice_tc_flower_fltr { * @f: Pointer to tc-flower filter * * Criteria to determine of given filter is valid channel filter - * or not is based on its "destination". If destination is hw_tc (aka tc_class) - * and it is non-zero, then it is valid channel (aka ADQ) filter + * or not is based on its destination. + * For forward to VSI action, if destination is valid hw_tc (aka tc_class) + * and in supported range of TCs for ADQ, then return true. + * For forward to queue, as long as dest_vsi is valid and it is of type + * VSI_CHNL (PF ADQ VSI is of type VSI_CHNL), return true. + * NOTE: For forward to queue, correct dest_vsi is still set in tc_fltr based + * on destination queue specified. */ static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f) { - return !!f->action.tc_class; + if (f->action.fltr_act == ICE_FWD_TO_VSI) + return f->action.fwd.tc.tc_class >= ICE_CHNL_START_TC && + f->action.fwd.tc.tc_class < ICE_CHNL_MAX_TC; + else if (f->action.fltr_act == ICE_FWD_TO_Q) + return f->dest_vsi && f->dest_vsi->type == ICE_VSI_CHNL; + + return false; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index dbe80e5053a8..086f0b3ab68d 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -325,7 +325,7 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) if (netif_tx_queue_stopped(txring_txq(tx_ring)) && !test_bit(ICE_VSI_DOWN, vsi->state)) { netif_tx_wake_queue(txring_txq(tx_ring)); - ++tx_ring->tx_stats.restart_q; + ++tx_ring->ring_stats->tx_stats.restart_q; } } @@ -367,7 +367,7 @@ int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; - tx_ring->tx_stats.prev_pkt = -1; + tx_ring->ring_stats->tx_stats.prev_pkt = -1; return 0; err: @@ -667,7 +667,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) /* alloc new page for storage */ page = dev_alloc_pages(ice_rx_pg_order(rx_ring)); if (unlikely(!page)) { - rx_ring->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_page_failed++; return false; } @@ -680,7 +680,7 @@ ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) */ if (dma_mapping_error(rx_ring->dev, dma)) { __free_pages(page, ice_rx_pg_order(rx_ring)); - rx_ring->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_page_failed++; return false; } @@ -1091,7 +1091,7 @@ ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) if (likely(ice_test_staterr(rx_desc->wb.status_error0, ICE_RXD_EOF))) return false; - rx_ring->rx_stats.non_eop_descs++; + rx_ring->ring_stats->rx_stats.non_eop_descs++; return true; } @@ -1222,7 +1222,7 @@ construct_skb: } /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->rx_stats.alloc_buf_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; if (rx_buf) rx_buf->pagecnt_bias++; break; @@ -1275,7 +1275,9 @@ construct_skb: ice_finalize_xdp_rx(xdp_ring, xdp_xmit); rx_ring->skb = skb; - ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); + if (rx_ring->ring_stats) + ice_update_rx_ring_stats(rx_ring, total_rx_pkts, + total_rx_bytes); /* guarantee a trip back through this routine if there was a failure */ return failure ? budget : (int)total_rx_pkts; @@ -1292,15 +1294,25 @@ static void __ice_update_sample(struct ice_q_vector *q_vector, struct ice_tx_ring *tx_ring; ice_for_each_tx_ring(tx_ring, *rc) { - packets += tx_ring->stats.pkts; - bytes += tx_ring->stats.bytes; + struct ice_ring_stats *ring_stats; + + ring_stats = tx_ring->ring_stats; + if (!ring_stats) + continue; + packets += ring_stats->stats.pkts; + bytes += ring_stats->stats.bytes; } } else { struct ice_rx_ring *rx_ring; ice_for_each_rx_ring(rx_ring, *rc) { - packets += rx_ring->stats.pkts; - bytes += rx_ring->stats.bytes; + struct ice_ring_stats *ring_stats; + + ring_stats = rx_ring->ring_stats; + if (!ring_stats) + continue; + packets += ring_stats->stats.pkts; + bytes += ring_stats->stats.bytes; } } @@ -1549,7 +1561,7 @@ static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) /* A reprieve! - use start_queue because it doesn't call schedule */ netif_tx_start_queue(txring_txq(tx_ring)); - ++tx_ring->tx_stats.restart_q; + ++tx_ring->ring_stats->tx_stats.restart_q; return 0; } @@ -2293,7 +2305,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) if (__skb_linearize(skb)) goto out_drop; count = ice_txd_use_count(skb->len); - tx_ring->tx_stats.tx_linearize++; + tx_ring->ring_stats->tx_stats.tx_linearize++; } /* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD, @@ -2304,7 +2316,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) */ if (ice_maybe_stop_tx(tx_ring, count + ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_CTX_DESC)) { - tx_ring->tx_stats.tx_busy++; + tx_ring->ring_stats->tx_stats.tx_busy++; return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 932b5661ec4d..4fd0e5d0a313 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -191,6 +191,16 @@ struct ice_rxq_stats { u64 alloc_buf_failed; }; +struct ice_ring_stats { + struct rcu_head rcu; /* to avoid race on free */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; + union { + struct ice_txq_stats tx_stats; + struct ice_rxq_stats rx_stats; + }; +}; + enum ice_ring_state_t { ICE_TX_XPS_INIT_DONE, ICE_TX_NBITS, @@ -283,9 +293,7 @@ struct ice_rx_ring { u16 rx_buf_len; /* stats structs */ - struct ice_rxq_stats rx_stats; - struct ice_q_stats stats; - struct u64_stats_sync syncp; + struct ice_ring_stats *ring_stats; struct rcu_head rcu; /* to avoid race on free */ /* CL4 - 3rd cacheline starts here */ @@ -325,10 +333,8 @@ struct ice_tx_ring { u16 count; /* Number of descriptors */ u16 q_index; /* Queue number of ring */ /* stats structs */ - struct ice_txq_stats tx_stats; + struct ice_ring_stats *ring_stats; /* CL3 - 3rd cacheline starts here */ - struct ice_q_stats stats; - struct u64_stats_sync syncp; struct rcu_head rcu; /* to avoid race on free */ DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ struct ice_channel *ch; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 7ee38d02d1e5..25f04266c668 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -285,7 +285,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) ice_clean_xdp_irq(xdp_ring); if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { - xdp_ring->tx_stats.tx_busy++; + xdp_ring->ring_stats->tx_stats.tx_busy++; return ICE_XDP_CONSUMED; } diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index e1abfcee96dc..e3f622cad425 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -524,7 +524,14 @@ struct ice_sched_node { struct ice_sched_node *sibling; /* next sibling in the same layer */ struct ice_sched_node **children; struct ice_aqc_txsched_elem_data info; + char *name; + struct devlink_rate *rate_node; + u64 tx_max; + u64 tx_share; u32 agg_id; /* aggregator group ID */ + u32 id; + u32 tx_priority; + u32 tx_weight; u16 vsi_handle; u8 in_use; /* suspended or in use */ u8 tx_sched_layer; /* Logical Layer (1-9) */ @@ -706,7 +713,9 @@ struct ice_port_info { /* List contain profile ID(s) and other params per layer */ struct list_head rl_prof_list[ICE_AQC_TOPO_MAX_LEVEL_NUM]; struct ice_qos_cfg qos_cfg; + struct xarray sched_node_ids; u8 is_vf:1; + u8 is_custom_tx_enabled:1; }; struct ice_switch_info { diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 1c51778db951..375eb6493f0f 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -700,6 +700,30 @@ void ice_dis_vf_qs(struct ice_vf *vf) } /** + * ice_err_to_virt_err - translate errors for VF return code + * @err: error return code + */ +enum virtchnl_status_code ice_err_to_virt_err(int err) +{ + switch (err) { + case 0: + return VIRTCHNL_STATUS_SUCCESS; + case -EINVAL: + case -ENODEV: + return VIRTCHNL_STATUS_ERR_PARAM; + case -ENOMEM: + return VIRTCHNL_STATUS_ERR_NO_MEMORY; + case -EALREADY: + case -EBUSY: + case -EIO: + case -ENOSPC: + return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; + default: + return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + } +} + +/** * ice_check_vf_init - helper to check if VF init complete * @vf: the pointer to the VF to check */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h index 15887e772c76..9c8ef2b01f0f 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h @@ -25,6 +25,7 @@ void ice_dis_vf_qs(struct ice_vf *vf); int ice_check_vf_init(struct ice_vf *vf); +enum virtchnl_status_code ice_err_to_virt_err(int err); struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf); int ice_vsi_apply_spoofchk(struct ice_vsi *vsi, bool enable); bool ice_is_vf_trusted(struct ice_vf *vf); diff --git a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c index fc8c93fa4455..d4a4001b6e5d 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_mbx.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_mbx.c @@ -39,6 +39,24 @@ ice_aq_send_msg_to_vf(struct ice_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, return ice_sq_send_cmd(hw, &hw->mailboxq, &desc, msg, msglen, cd); } +static const u32 ice_legacy_aq_to_vc_speed[15] = { + VIRTCHNL_LINK_SPEED_100MB, /* BIT(0) */ + VIRTCHNL_LINK_SPEED_100MB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_1GB, + VIRTCHNL_LINK_SPEED_10GB, + VIRTCHNL_LINK_SPEED_20GB, + VIRTCHNL_LINK_SPEED_25GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_40GB, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN, + VIRTCHNL_LINK_SPEED_UNKNOWN /* BIT(14) */ +}; + /** * ice_conv_link_speed_to_virtchnl * @adv_link_support: determines the format of the returned link speed @@ -55,79 +73,17 @@ u32 ice_conv_link_speed_to_virtchnl(bool adv_link_support, u16 link_speed) { u32 speed; - if (adv_link_support) - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - speed = ICE_LINK_SPEED_10MBPS; - break; - case ICE_AQ_LINK_SPEED_100MB: - speed = ICE_LINK_SPEED_100MBPS; - break; - case ICE_AQ_LINK_SPEED_1000MB: - speed = ICE_LINK_SPEED_1000MBPS; - break; - case ICE_AQ_LINK_SPEED_2500MB: - speed = ICE_LINK_SPEED_2500MBPS; - break; - case ICE_AQ_LINK_SPEED_5GB: - speed = ICE_LINK_SPEED_5000MBPS; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = ICE_LINK_SPEED_10000MBPS; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = ICE_LINK_SPEED_20000MBPS; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = ICE_LINK_SPEED_25000MBPS; - break; - case ICE_AQ_LINK_SPEED_40GB: - speed = ICE_LINK_SPEED_40000MBPS; - break; - case ICE_AQ_LINK_SPEED_50GB: - speed = ICE_LINK_SPEED_50000MBPS; - break; - case ICE_AQ_LINK_SPEED_100GB: - speed = ICE_LINK_SPEED_100000MBPS; - break; - default: - speed = ICE_LINK_SPEED_UNKNOWN; - break; - } - else + if (adv_link_support) { + /* convert a BIT() value into an array index */ + speed = ice_get_link_speed(fls(link_speed) - 1); + } else { /* Virtchnl speeds are not defined for every speed supported in * the hardware. To maintain compatibility with older AVF * drivers, while reporting the speed the new speed values are * resolved to the closest known virtchnl speeds */ - switch (link_speed) { - case ICE_AQ_LINK_SPEED_10MB: - case ICE_AQ_LINK_SPEED_100MB: - speed = (u32)VIRTCHNL_LINK_SPEED_100MB; - break; - case ICE_AQ_LINK_SPEED_1000MB: - case ICE_AQ_LINK_SPEED_2500MB: - case ICE_AQ_LINK_SPEED_5GB: - speed = (u32)VIRTCHNL_LINK_SPEED_1GB; - break; - case ICE_AQ_LINK_SPEED_10GB: - speed = (u32)VIRTCHNL_LINK_SPEED_10GB; - break; - case ICE_AQ_LINK_SPEED_20GB: - speed = (u32)VIRTCHNL_LINK_SPEED_20GB; - break; - case ICE_AQ_LINK_SPEED_25GB: - speed = (u32)VIRTCHNL_LINK_SPEED_25GB; - break; - case ICE_AQ_LINK_SPEED_40GB: - case ICE_AQ_LINK_SPEED_50GB: - case ICE_AQ_LINK_SPEED_100GB: - speed = (u32)VIRTCHNL_LINK_SPEED_40GB; - break; - default: - speed = (u32)VIRTCHNL_LINK_SPEED_UNKNOWN; - break; - } + speed = ice_legacy_aq_to_vc_speed[fls(link_speed) - 1]; + } return speed; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index 2b4c791b6cba..dab3cd5d300e 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -462,6 +462,9 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RSS_REG; } + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) + vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC; + if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF) vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_FDIR_PF; @@ -1658,6 +1661,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* copy Rx queue info from VF into VSI */ if (qpi->rxq.ring_len > 0) { u16 max_frame_size = ice_vc_get_max_frame_size(vf); + u32 rxdid; vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; @@ -1685,6 +1689,24 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) vf->vf_id, i); goto error_param; } + + /* If Rx flex desc is supported, select RXDID for Rx + * queues. Otherwise, use legacy 32byte descriptor + * format. Legacy 16byte descriptor is not supported. + * If this RXDID is selected, return error. + */ + if (vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { + rxdid = qpi->rxq.rxdid; + if (!(BIT(rxdid) & pf->supported_rxdids)) + goto error_param; + } else { + rxdid = ICE_RXDID_LEGACY_1; + } + + ice_write_qrxflxp_cntxt(&vsi->back->hw, + vsi->rxq_map[q_idx], + rxdid, 0x03, false); } } @@ -2457,6 +2479,164 @@ error_param: } /** + * ice_vc_get_rss_hena - return the RSS HENA bits allowed by the hardware + * @vf: pointer to the VF info + */ +static int ice_vc_get_rss_hena(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_rss_hena *vrh = NULL; + int len = 0, ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = sizeof(struct virtchnl_rss_hena); + vrh = kzalloc(len, GFP_KERNEL); + if (!vrh) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + vrh->hena = ICE_DEFAULT_RSS_HENA; +err: + /* send the response back to the VF */ + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HENA_CAPS, v_ret, + (u8 *)vrh, len); + kfree(vrh); + return ret; +} + +/** + * ice_vc_set_rss_hena - set RSS HENA bits for the VF + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + */ +static int ice_vc_set_rss_hena(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_hena *vrh = (struct virtchnl_rss_hena *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + struct device *dev; + int status; + + dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + dev_err(dev, "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + /* clear all previously programmed RSS configuration to allow VF drivers + * the ability to customize the RSS configuration and/or completely + * disable RSS + */ + status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); + if (status && !vrh->hena) { + /* only report failure to clear the current RSS configuration if + * that was clearly the VF's intention (i.e. vrh->hena = 0) + */ + v_ret = ice_err_to_virt_err(status); + goto err; + } else if (status) { + /* allow the VF to update the RSS configuration even on failure + * to clear the current RSS confguration in an attempt to keep + * RSS in a working state + */ + dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n", + vf->vf_id); + } + + if (vrh->hena) { + status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, vrh->hena); + v_ret = ice_err_to_virt_err(status); + } + + /* send the response to the VF */ +err: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HENA, v_ret, + NULL, 0); +} + +/** + * ice_vc_query_rxdid - query RXDID supported by DDP package + * @vf: pointer to VF info + * + * Called from VF to query a bitmap of supported flexible + * descriptor RXDIDs of a DDP package. + */ +static int ice_vc_query_rxdid(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_supported_rxdids *rxdid = NULL; + struct ice_hw *hw = &vf->pf->hw; + struct ice_pf *pf = vf->pf; + int len = 0; + int ret, i; + u32 regval; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = sizeof(struct virtchnl_supported_rxdids); + rxdid = kzalloc(len, GFP_KERNEL); + if (!rxdid) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + /* Read flexiflag registers to determine whether the + * corresponding RXDID is configured and supported or not. + * Since Legacy 16byte descriptor format is not supported, + * start from Legacy 32byte descriptor. + */ + for (i = ICE_RXDID_LEGACY_1; i < ICE_FLEX_DESC_RXDID_MAX_NUM; i++) { + regval = rd32(hw, GLFLXP_RXDID_FLAGS(i, 0)); + if ((regval >> GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) + & GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) + rxdid->supported_rxdids |= BIT(i); + } + + pf->supported_rxdids = rxdid->supported_rxdids; + +err: + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, + v_ret, (u8 *)rxdid, len); + kfree(rxdid); + return ret; +} + +/** * ice_vf_init_vlan_stripping - enable/disable VLAN stripping on initialization * @vf: VF to enable/disable VLAN stripping for on initialization * @@ -3490,6 +3670,9 @@ static const struct ice_virtchnl_ops ice_virtchnl_dflt_ops = { .cfg_promiscuous_mode_msg = ice_vc_cfg_promiscuous_mode_msg, .add_vlan_msg = ice_vc_add_vlan_msg, .remove_vlan_msg = ice_vc_remove_vlan_msg, + .query_rxdid = ice_vc_query_rxdid, + .get_rss_hena = ice_vc_get_rss_hena, + .set_rss_hena_msg = ice_vc_set_rss_hena, .ena_vlan_stripping = ice_vc_ena_vlan_stripping, .dis_vlan_stripping = ice_vc_dis_vlan_stripping, .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, @@ -3624,6 +3807,9 @@ static const struct ice_virtchnl_ops ice_virtchnl_repr_ops = { .cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode, .add_vlan_msg = ice_vc_add_vlan_msg, .remove_vlan_msg = ice_vc_remove_vlan_msg, + .query_rxdid = ice_vc_query_rxdid, + .get_rss_hena = ice_vc_get_rss_hena, + .set_rss_hena_msg = ice_vc_set_rss_hena, .ena_vlan_stripping = ice_vc_ena_vlan_stripping, .dis_vlan_stripping = ice_vc_dis_vlan_stripping, .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, @@ -3764,6 +3950,15 @@ error_handler: case VIRTCHNL_OP_DEL_VLAN: err = ops->remove_vlan_msg(vf, msg); break; + case VIRTCHNL_OP_GET_SUPPORTED_RXDIDS: + err = ops->query_rxdid(vf); + break; + case VIRTCHNL_OP_GET_RSS_HENA_CAPS: + err = ops->get_rss_hena(vf); + break; + case VIRTCHNL_OP_SET_RSS_HENA: + err = ops->set_rss_hena_msg(vf, msg); + break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: err = ops->ena_vlan_stripping(vf); break; diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/ice_virtchnl.h index b5a3fd8adbb4..b454654d7b0c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.h @@ -17,6 +17,7 @@ * broadcast, and 16 for additional unicast/multicast filters */ #define ICE_MAX_MACADDR_PER_VF 18 +#define ICE_FLEX_DESC_RXDID_MAX_NUM 64 struct ice_virtchnl_ops { int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); @@ -35,6 +36,9 @@ struct ice_virtchnl_ops { int (*cfg_promiscuous_mode_msg)(struct ice_vf *vf, u8 *msg); int (*add_vlan_msg)(struct ice_vf *vf, u8 *msg); int (*remove_vlan_msg)(struct ice_vf *vf, u8 *msg); + int (*query_rxdid)(struct ice_vf *vf); + int (*get_rss_hena)(struct ice_vf *vf); + int (*set_rss_hena_msg)(struct ice_vf *vf, u8 *msg); int (*ena_vlan_stripping)(struct ice_vf *vf); int (*dis_vlan_stripping)(struct ice_vf *vf); int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add); diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c index 5a82216e7d03..7d547fa616fa 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c @@ -70,6 +70,11 @@ static const u32 rss_pf_allowlist_opcodes[] = { VIRTCHNL_OP_GET_RSS_HENA_CAPS, VIRTCHNL_OP_SET_RSS_HENA, }; +/* VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC */ +static const u32 rx_flex_desc_allowlist_opcodes[] = { + VIRTCHNL_OP_GET_SUPPORTED_RXDIDS, +}; + /* VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF */ static const u32 adv_rss_pf_allowlist_opcodes[] = { VIRTCHNL_OP_ADD_RSS_CFG, VIRTCHNL_OP_DEL_RSS_CFG, @@ -96,6 +101,7 @@ static const struct allowlist_opcode_info allowlist_opcodes[] = { ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_REQ_QUEUES, req_queues_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN, vlan_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RSS_PF, rss_pf_allowlist_opcodes), + ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC, rx_flex_desc_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF, adv_rss_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_FDIR_PF, fdir_pf_allowlist_opcodes), ALLOW_ITEM(VIRTCHNL_VF_OFFLOAD_VLAN_V2, vlan_v2_allowlist_opcodes), diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 056c904b83cc..907055b77af0 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -24,13 +24,24 @@ static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx) */ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) { - memset(&vsi->rx_rings[q_idx]->rx_stats, 0, - sizeof(vsi->rx_rings[q_idx]->rx_stats)); - memset(&vsi->tx_rings[q_idx]->stats, 0, - sizeof(vsi->tx_rings[q_idx]->stats)); + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf; + + pf = vsi->back; + if (!pf->vsi_stats) + return; + + vsi_stat = pf->vsi_stats[vsi->idx]; + if (!vsi_stat) + return; + + memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0, + sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); + memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, + sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); if (ice_is_xdp_ena_vsi(vsi)) - memset(&vsi->xdp_rings[q_idx]->stats, 0, - sizeof(vsi->xdp_rings[q_idx]->stats)); + memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0, + sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats)); } /** @@ -722,7 +733,7 @@ construct_skb: /* XDP_PASS path */ skb = ice_construct_skb_zc(rx_ring, xdp); if (!skb) { - rx_ring->rx_stats.alloc_buf_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; break; } |