summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/ice/ice_sriov.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-02-07 04:34:36 +0300
committerJakub Kicinski <kuba@kernel.org>2025-02-07 04:34:36 +0300
commit26db4dbb747813b5946aff31485873f071a10332 (patch)
treed1b01bcd3099f55fa77da5538f4e8d5e4c20db19 /drivers/net/ethernet/intel/ice/ice_sriov.c
parentec7309525a37b3f34c4efa85fc2ecaa65f41830e (diff)
parentd67627e7b53203ca150e54723abbed81a0716286 (diff)
downloadlinux-26db4dbb747813b5946aff31485873f071a10332.tar.xz
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue
Tony Nguyen says: ==================== ice: managing MSI-X in driver Michal Swiatkowski says: It is another try to allow user to manage amount of MSI-X used for each feature in ice. First was via devlink resources API, it wasn't accepted in upstream. Also static MSI-X allocation using devlink resources isn't really user friendly. This try is using more dynamic way. "Dynamic" across whole kernel when platform supports it and "dynamic" across the driver when not. To achieve that reuse global devlink parameter pf_msix_max and pf_msix_min. It fits how ice hardware counts MSI-X. In case of ice amount of MSI-X reported on PCI is a whole MSI-X for the card (with MSI-X for VFs also). Having pf_msix_max allow user to statically set how many MSI-X he wants on PF and how many should be reserved for VFs. pf_msix_min is used to set minimum number of MSI-X with which ice driver should probe correctly. Meaning of this field in case of dynamic vs static allocation: - on system with dynamic MSI-X allocation support * alloc pf_msix_min as static, rest will be allocated dynamically - on system without dynamic MSI-X allocation support * try alloc pf_msix_max as static, minimum acceptable result is pf_msix_min As Jesse and Piotr suggested pf_msix_max and pf_msix_min can (an probably should) be stored in NVM. This patchset isn't implementing that. Dynamic (kernel or driver) way means that splitting MSI-X across the RDMA and eth in case there is a MSI-X shortage isn't correct. Can work when dynamic is only on driver site, but can't when dynamic is on kernel site. Let's remove this code and move to MSI-X allocation feature by feature. If there is no more MSI-X for a feature, a feature is working with less MSI-X or it is turned off. There is a regression here. With MSI-X splitting user can run RDMA and eth even on system with not enough MSI-X. Now only eth will work. RDMA can be turned on by changing number of PF queues (lowering) and reprobe RDMA driver. Example: 72 CPU number, eth, RDMA and flow director (1 MSI-X), 1 MSI-X for OICR on PF, and 1 more for RDMA. Card is using 1 + 72 + 1 + 72 + 1 = 147. We set pf_msix_min = 2, pf_msix_max = 128 OICR: 1 eth: 72 flow director: 1 RDMA: 128 - 74 = 54 We can change number of queues on pf to 36 and do devlink reinit OICR: 1 eth: 36 RDMA: 73 flow director: 1 We can also (implemented in "ice: enable_rdma devlink param") turned RDMA off. OICR: 1 eth: 72 RDMA: 0 (turned off) flow director: 1 After this changes we have a static base vector for SRIOV (SIOV probably in the feature). Last patch from this series is simplifying managing VF MSI-X code based on static vector. Now changing queues using ethtool is also changing MSI-X. If there is enough MSI-X it is always one to one. When there is not enough there will be more queues than MSI-X. * 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue: ice: init flow director before RDMA ice: simplify VF MSI-X managing ice: enable_rdma devlink param ice: treat dyn_allowed only as suggestion ice, irdma: move interrupts code to irdma ice: get rid of num_lan_msix field ice: remove splitting MSI-X between features ice: devlink PF MSI-X max and min parameter ice: count combined queues using Rx/Tx count ==================== Link: https://patch.msgid.link/20250205185512.895887-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_sriov.c')
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c154
1 files changed, 11 insertions, 143 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index b83f99c01d91..33eac29b6a50 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -123,27 +123,6 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
}
/**
- * ice_sriov_free_msix_res - Reset/free any used MSIX resources
- * @pf: pointer to the PF structure
- *
- * Since no MSIX entries are taken from the pf->irq_tracker then just clear
- * the pf->sriov_base_vector.
- *
- * Returns 0 on success, and -EINVAL on error.
- */
-static int ice_sriov_free_msix_res(struct ice_pf *pf)
-{
- if (!pf)
- return -EINVAL;
-
- bitmap_free(pf->sriov_irq_bm);
- pf->sriov_irq_size = 0;
- pf->sriov_base_vector = 0;
-
- return 0;
-}
-
-/**
* ice_free_vfs - Free all VFs
* @pf: pointer to the PF structure
*/
@@ -177,6 +156,7 @@ void ice_free_vfs(struct ice_pf *pf)
ice_eswitch_detach_vf(pf, vf);
ice_dis_vf_qs(vf);
+ ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix);
if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
/* disable VF qp mappings and set VF disable state */
@@ -200,9 +180,6 @@ void ice_free_vfs(struct ice_pf *pf)
mutex_unlock(&vf->cfg_lock);
}
- if (ice_sriov_free_msix_res(pf))
- dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
-
vfs->num_qps_per = 0;
ice_free_vf_entries(pf);
@@ -372,40 +349,6 @@ void ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
}
/**
- * ice_sriov_set_msix_res - Set any used MSIX resources
- * @pf: pointer to PF structure
- * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
- *
- * This function allows SR-IOV resources to be taken from the end of the PF's
- * allowed HW MSIX vectors so that the irq_tracker will not be affected. We
- * just set the pf->sriov_base_vector and return success.
- *
- * If there are not enough resources available, return an error. This should
- * always be caught by ice_set_per_vf_res().
- *
- * Return 0 on success, and -EINVAL when there are not enough MSIX vectors
- * in the PF's space available for SR-IOV.
- */
-static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
-{
- u16 total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
- int vectors_used = ice_get_max_used_msix_vector(pf);
- int sriov_base_vector;
-
- sriov_base_vector = total_vectors - num_msix_needed;
-
- /* make sure we only grab irq_tracker entries from the list end and
- * that we have enough available MSIX vectors
- */
- if (sriov_base_vector < vectors_used)
- return -EINVAL;
-
- pf->sriov_base_vector = sriov_base_vector;
-
- return 0;
-}
-
-/**
* ice_set_per_vf_res - check if vectors and queues are available
* @pf: pointer to the PF structure
* @num_vfs: the number of SR-IOV VFs being configured
@@ -429,11 +372,9 @@ static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
*/
static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
{
- int vectors_used = ice_get_max_used_msix_vector(pf);
u16 num_msix_per_vf, num_txq, num_rxq, avail_qs;
int msix_avail_per_vf, msix_avail_for_sriov;
struct device *dev = ice_pf_to_dev(pf);
- int err;
lockdep_assert_held(&pf->vfs.table_lock);
@@ -441,8 +382,7 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
return -EINVAL;
/* determine MSI-X resources per VF */
- msix_avail_for_sriov = pf->hw.func_caps.common_cap.num_msix_vectors -
- vectors_used;
+ msix_avail_for_sriov = pf->virt_irq_tracker.num_entries;
msix_avail_per_vf = msix_avail_for_sriov / num_vfs;
if (msix_avail_per_vf >= ICE_NUM_VF_MSIX_MED) {
num_msix_per_vf = ICE_NUM_VF_MSIX_MED;
@@ -481,13 +421,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
return -ENOSPC;
}
- err = ice_sriov_set_msix_res(pf, num_msix_per_vf * num_vfs);
- if (err) {
- dev_err(dev, "Unable to set MSI-X resources for %d VFs, err %d\n",
- num_vfs, err);
- return err;
- }
-
/* only allow equal Tx/Rx queue count (i.e. queue pairs) */
pf->vfs.num_qps_per = min_t(int, num_txq, num_rxq);
pf->vfs.num_msix_per = num_msix_per_vf;
@@ -498,52 +431,6 @@ static int ice_set_per_vf_res(struct ice_pf *pf, u16 num_vfs)
}
/**
- * ice_sriov_get_irqs - get irqs for SR-IOV usacase
- * @pf: pointer to PF structure
- * @needed: number of irqs to get
- *
- * This returns the first MSI-X vector index in PF space that is used by this
- * VF. This index is used when accessing PF relative registers such as
- * GLINT_VECT2FUNC and GLINT_DYN_CTL.
- * This will always be the OICR index in the AVF driver so any functionality
- * using vf->first_vector_idx for queue configuration_id: id of VF which will
- * use this irqs
- *
- * Only SRIOV specific vectors are tracked in sriov_irq_bm. SRIOV vectors are
- * allocated from the end of global irq index. First bit in sriov_irq_bm means
- * last irq index etc. It simplifies extension of SRIOV vectors.
- * They will be always located from sriov_base_vector to the last irq
- * index. While increasing/decreasing sriov_base_vector can be moved.
- */
-static int ice_sriov_get_irqs(struct ice_pf *pf, u16 needed)
-{
- int res = bitmap_find_next_zero_area(pf->sriov_irq_bm,
- pf->sriov_irq_size, 0, needed, 0);
- /* conversion from number in bitmap to global irq index */
- int index = pf->sriov_irq_size - res - needed;
-
- if (res >= pf->sriov_irq_size || index < pf->sriov_base_vector)
- return -ENOENT;
-
- bitmap_set(pf->sriov_irq_bm, res, needed);
- return index;
-}
-
-/**
- * ice_sriov_free_irqs - free irqs used by the VF
- * @pf: pointer to PF structure
- * @vf: pointer to VF structure
- */
-static void ice_sriov_free_irqs(struct ice_pf *pf, struct ice_vf *vf)
-{
- /* Move back from first vector index to first index in bitmap */
- int bm_i = pf->sriov_irq_size - vf->first_vector_idx - vf->num_msix;
-
- bitmap_clear(pf->sriov_irq_bm, bm_i, vf->num_msix);
- vf->first_vector_idx = 0;
-}
-
-/**
* ice_init_vf_vsi_res - initialize/setup VF VSI resources
* @vf: VF to initialize/setup the VSI for
*
@@ -556,7 +443,7 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf)
struct ice_vsi *vsi;
int err;
- vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix);
+ vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix);
if (vf->first_vector_idx < 0)
return -ENOMEM;
@@ -856,16 +743,10 @@ err_free_entries:
*/
static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
{
- int total_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
int ret;
- pf->sriov_irq_bm = bitmap_zalloc(total_vectors, GFP_KERNEL);
- if (!pf->sriov_irq_bm)
- return -ENOMEM;
- pf->sriov_irq_size = total_vectors;
-
/* Disable global interrupt 0 so we don't try to handle the VFLR. */
wr32(hw, GLINT_DYN_CTL(pf->oicr_irq.index),
ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
@@ -918,7 +799,6 @@ err_unroll_intr:
/* rearm interrupts here */
ice_irq_dynamic_ena(hw, NULL, NULL);
clear_bit(ICE_OICR_INTR_DIS, pf->state);
- bitmap_free(pf->sriov_irq_bm);
return ret;
}
@@ -992,16 +872,7 @@ u32 ice_sriov_get_vf_total_msix(struct pci_dev *pdev)
{
struct ice_pf *pf = pci_get_drvdata(pdev);
- return pf->sriov_irq_size - ice_get_max_used_msix_vector(pf);
-}
-
-static int ice_sriov_move_base_vector(struct ice_pf *pf, int move)
-{
- if (pf->sriov_base_vector - move < ice_get_max_used_msix_vector(pf))
- return -ENOMEM;
-
- pf->sriov_base_vector -= move;
- return 0;
+ return pf->virt_irq_tracker.num_entries;
}
static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id)
@@ -1020,7 +891,8 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id)
continue;
ice_dis_vf_mappings(tmp_vf);
- ice_sriov_free_irqs(pf, tmp_vf);
+ ice_virt_free_irqs(pf, tmp_vf->first_vector_idx,
+ tmp_vf->num_msix);
vf_ids[to_remap] = tmp_vf->vf_id;
to_remap += 1;
@@ -1032,7 +904,7 @@ static void ice_sriov_remap_vectors(struct ice_pf *pf, u16 restricted_id)
continue;
tmp_vf->first_vector_idx =
- ice_sriov_get_irqs(pf, tmp_vf->num_msix);
+ ice_virt_get_irqs(pf, tmp_vf->num_msix);
/* there is no need to rebuild VSI as we are only changing the
* vector indexes not amount of MSI-X or queues
*/
@@ -1105,20 +977,15 @@ int ice_sriov_set_msix_vec_count(struct pci_dev *vf_dev, int msix_vec_count)
prev_msix = vf->num_msix;
prev_queues = vf->num_vf_qs;
- if (ice_sriov_move_base_vector(pf, msix_vec_count - prev_msix)) {
- ice_put_vf(vf);
- return -ENOSPC;
- }
-
ice_dis_vf_mappings(vf);
- ice_sriov_free_irqs(pf, vf);
+ ice_virt_free_irqs(pf, vf->first_vector_idx, vf->num_msix);
/* Remap all VFs beside the one is now configured */
ice_sriov_remap_vectors(pf, vf->vf_id);
vf->num_msix = msix_vec_count;
vf->num_vf_qs = queues;
- vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix);
+ vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix);
if (vf->first_vector_idx < 0)
goto unroll;
@@ -1147,7 +1014,8 @@ unroll:
vf->num_msix = prev_msix;
vf->num_vf_qs = prev_queues;
- vf->first_vector_idx = ice_sriov_get_irqs(pf, vf->num_msix);
+
+ vf->first_vector_idx = ice_virt_get_irqs(pf, vf->num_msix);
if (vf->first_vector_idx < 0) {
ice_put_vf(vf);
return -EINVAL;