summaryrefslogtreecommitdiff
path: root/drivers/scsi/lpfc/lpfc_sli.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_sli.c')
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c2291
1 files changed, 1479 insertions, 812 deletions
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 2242e9b3ca12..d0817facdae3 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1,7 +1,7 @@
/*******************************************************************
* This file is part of the Emulex Linux Device Driver for *
* Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2018 Broadcom. All Rights Reserved. The term *
+ * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
* “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
* Copyright (C) 2004-2016 Emulex. All rights reserved. *
* EMULEX and SLI are trademarks of Emulex. *
@@ -78,12 +78,13 @@ static void lpfc_sli4_send_seq_to_ulp(struct lpfc_vport *,
struct hbq_dmabuf *);
static void lpfc_sli4_handle_mds_loopback(struct lpfc_vport *vport,
struct hbq_dmabuf *dmabuf);
-static int lpfc_sli4_fp_handle_cqe(struct lpfc_hba *, struct lpfc_queue *,
- struct lpfc_cqe *);
+static bool lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba,
+ struct lpfc_queue *cq, struct lpfc_cqe *cqe);
static int lpfc_sli4_post_sgl_list(struct lpfc_hba *, struct list_head *,
int);
static void lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba,
- struct lpfc_eqe *eqe, uint32_t qidx);
+ struct lpfc_queue *eq,
+ struct lpfc_eqe *eqe);
static bool lpfc_sli4_mbox_completions_pending(struct lpfc_hba *phba);
static bool lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba);
static int lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba,
@@ -160,7 +161,7 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe128 *wqe)
}
q->WQ_posted++;
/* set consumption flag every once in a while */
- if (!((q->host_index + 1) % q->entry_repost))
+ if (!((q->host_index + 1) % q->notify_interval))
bf_set(wqe_wqec, &wqe->generic.wqe_com, 1);
else
bf_set(wqe_wqec, &wqe->generic.wqe_com, 0);
@@ -325,29 +326,16 @@ lpfc_sli4_mq_release(struct lpfc_queue *q)
static struct lpfc_eqe *
lpfc_sli4_eq_get(struct lpfc_queue *q)
{
- struct lpfc_hba *phba;
struct lpfc_eqe *eqe;
- uint32_t idx;
/* sanity check on queue memory */
if (unlikely(!q))
return NULL;
- phba = q->phba;
- eqe = q->qe[q->hba_index].eqe;
+ eqe = q->qe[q->host_index].eqe;
/* If the next EQE is not valid then we are done */
if (bf_get_le32(lpfc_eqe_valid, eqe) != q->qe_valid)
return NULL;
- /* If the host has not yet processed the next entry then we are done */
- idx = ((q->hba_index + 1) % q->entry_count);
- if (idx == q->host_index)
- return NULL;
-
- q->hba_index = idx;
- /* if the index wrapped around, toggle the valid bit */
- if (phba->sli4_hba.pc_sli4_params.eqav && !q->hba_index)
- q->qe_valid = (q->qe_valid) ? 0 : 1;
-
/*
* insert barrier for instruction interlock : data from the hardware
@@ -397,44 +385,25 @@ lpfc_sli4_if6_eq_clr_intr(struct lpfc_queue *q)
}
/**
- * lpfc_sli4_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
* @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
* @arm: Indicates whether the host wants to arms this CQ.
*
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
**/
-uint32_t
-lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+ uint32_t count, bool arm)
{
- uint32_t released = 0;
- struct lpfc_hba *phba;
- struct lpfc_eqe *temp_eqe;
struct lpfc_register doorbell;
/* sanity check on queue memory */
- if (unlikely(!q))
- return 0;
- phba = q->phba;
-
- /* while there are valid entries */
- while (q->hba_index != q->host_index) {
- if (!phba->sli4_hba.pc_sli4_params.eqav) {
- temp_eqe = q->qe[q->host_index].eqe;
- bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
- }
- released++;
- q->host_index = ((q->host_index + 1) % q->entry_count);
- }
- if (unlikely(released == 0 && !arm))
- return 0;
+ if (unlikely(!q || (count == 0 && !arm)))
+ return;
/* ring doorbell for number popped */
doorbell.word0 = 0;
@@ -442,7 +411,7 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
bf_set(lpfc_eqcq_doorbell_eqci, &doorbell, 1);
}
- bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+ bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_EVENT);
bf_set(lpfc_eqcq_doorbell_eqid_hi, &doorbell,
(q->queue_id >> LPFC_EQID_HI_FIELD_SHIFT));
@@ -451,60 +420,112 @@ lpfc_sli4_eq_release(struct lpfc_queue *q, bool arm)
/* PCI read to flush PCI pipeline on re-arming for INTx mode */
if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
readl(q->phba->sli4_hba.EQDBregaddr);
- return released;
}
/**
- * lpfc_sli4_if6_eq_release - Indicates the host has finished processing an EQ
+ * lpfc_sli4_if6_write_eq_db - write EQ DB for eqe's consumed or arm state
+ * @phba: adapter with EQ
* @q: The Event Queue that the host has completed processing for.
+ * @count: Number of elements that have been consumed
* @arm: Indicates whether the host wants to arms this CQ.
*
- * This routine will mark all Event Queue Entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the EQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of EQEs that were popped.
+ * This routine will notify the HBA, by ringing the doorbell, that count
+ * number of EQEs have been processed. The @arm parameter indicates whether
+ * the queue should be rearmed when ringing the doorbell.
**/
-uint32_t
-lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_eq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+ uint32_t count, bool arm)
{
- uint32_t released = 0;
- struct lpfc_hba *phba;
- struct lpfc_eqe *temp_eqe;
struct lpfc_register doorbell;
/* sanity check on queue memory */
- if (unlikely(!q))
- return 0;
- phba = q->phba;
-
- /* while there are valid entries */
- while (q->hba_index != q->host_index) {
- if (!phba->sli4_hba.pc_sli4_params.eqav) {
- temp_eqe = q->qe[q->host_index].eqe;
- bf_set_le32(lpfc_eqe_valid, temp_eqe, 0);
- }
- released++;
- q->host_index = ((q->host_index + 1) % q->entry_count);
- }
- if (unlikely(released == 0 && !arm))
- return 0;
+ if (unlikely(!q || (count == 0 && !arm)))
+ return;
/* ring doorbell for number popped */
doorbell.word0 = 0;
if (arm)
bf_set(lpfc_if6_eq_doorbell_arm, &doorbell, 1);
- bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, released);
+ bf_set(lpfc_if6_eq_doorbell_num_released, &doorbell, count);
bf_set(lpfc_if6_eq_doorbell_eqid, &doorbell, q->queue_id);
writel(doorbell.word0, q->phba->sli4_hba.EQDBregaddr);
/* PCI read to flush PCI pipeline on re-arming for INTx mode */
if ((q->phba->intr_type == INTx) && (arm == LPFC_QUEUE_REARM))
readl(q->phba->sli4_hba.EQDBregaddr);
- return released;
+}
+
+static void
+__lpfc_sli4_consume_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+ struct lpfc_eqe *eqe)
+{
+ if (!phba->sli4_hba.pc_sli4_params.eqav)
+ bf_set_le32(lpfc_eqe_valid, eqe, 0);
+
+ eq->host_index = ((eq->host_index + 1) % eq->entry_count);
+
+ /* if the index wrapped around, toggle the valid bit */
+ if (phba->sli4_hba.pc_sli4_params.eqav && !eq->host_index)
+ eq->qe_valid = (eq->qe_valid) ? 0 : 1;
+}
+
+static void
+lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+ struct lpfc_eqe *eqe;
+ uint32_t count = 0;
+
+ /* walk all the EQ entries and drop on the floor */
+ eqe = lpfc_sli4_eq_get(eq);
+ while (eqe) {
+ __lpfc_sli4_consume_eqe(phba, eq, eqe);
+ count++;
+ eqe = lpfc_sli4_eq_get(eq);
+ }
+
+ /* Clear and re-arm the EQ */
+ phba->sli4_hba.sli4_write_eq_db(phba, eq, count, LPFC_QUEUE_REARM);
+}
+
+static int
+lpfc_sli4_process_eq(struct lpfc_hba *phba, struct lpfc_queue *eq)
+{
+ struct lpfc_eqe *eqe;
+ int count = 0, consumed = 0;
+
+ if (cmpxchg(&eq->queue_claimed, 0, 1) != 0)
+ goto rearm_and_exit;
+
+ eqe = lpfc_sli4_eq_get(eq);
+ while (eqe) {
+ lpfc_sli4_hba_handle_eqe(phba, eq, eqe);
+ __lpfc_sli4_consume_eqe(phba, eq, eqe);
+
+ consumed++;
+ if (!(++count % eq->max_proc_limit))
+ break;
+
+ if (!(count % eq->notify_interval)) {
+ phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed,
+ LPFC_QUEUE_NOARM);
+ consumed = 0;
+ }
+
+ eqe = lpfc_sli4_eq_get(eq);
+ }
+ eq->EQ_processed += count;
+
+ /* Track the max number of EQEs processed in 1 intr */
+ if (count > eq->EQ_max_eqe)
+ eq->EQ_max_eqe = count;
+
+ eq->queue_claimed = 0;
+
+rearm_and_exit:
+ /* Always clear and re-arm the EQ */
+ phba->sli4_hba.sli4_write_eq_db(phba, eq, consumed, LPFC_QUEUE_REARM);
+
+ return count;
}
/**
@@ -519,28 +540,16 @@ lpfc_sli4_if6_eq_release(struct lpfc_queue *q, bool arm)
static struct lpfc_cqe *
lpfc_sli4_cq_get(struct lpfc_queue *q)
{
- struct lpfc_hba *phba;
struct lpfc_cqe *cqe;
- uint32_t idx;
/* sanity check on queue memory */
if (unlikely(!q))
return NULL;
- phba = q->phba;
- cqe = q->qe[q->hba_index].cqe;
+ cqe = q->qe[q->host_index].cqe;
/* If the next CQE is not valid then we are done */
if (bf_get_le32(lpfc_cqe_valid, cqe) != q->qe_valid)
return NULL;
- /* If the host has not yet processed the next entry then we are done */
- idx = ((q->hba_index + 1) % q->entry_count);
- if (idx == q->host_index)
- return NULL;
-
- q->hba_index = idx;
- /* if the index wrapped around, toggle the valid bit */
- if (phba->sli4_hba.pc_sli4_params.cqav && !q->hba_index)
- q->qe_valid = (q->qe_valid) ? 0 : 1;
/*
* insert barrier for instruction interlock : data from the hardware
@@ -554,107 +563,81 @@ lpfc_sli4_cq_get(struct lpfc_queue *q)
return cqe;
}
+static void
+__lpfc_sli4_consume_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+ struct lpfc_cqe *cqe)
+{
+ if (!phba->sli4_hba.pc_sli4_params.cqav)
+ bf_set_le32(lpfc_cqe_valid, cqe, 0);
+
+ cq->host_index = ((cq->host_index + 1) % cq->entry_count);
+
+ /* if the index wrapped around, toggle the valid bit */
+ if (phba->sli4_hba.pc_sli4_params.cqav && !cq->host_index)
+ cq->qe_valid = (cq->qe_valid) ? 0 : 1;
+}
+
/**
- * lpfc_sli4_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
* @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
* @arm: Indicates whether the host wants to arms this CQ.
*
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
**/
-uint32_t
-lpfc_sli4_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+ uint32_t count, bool arm)
{
- uint32_t released = 0;
- struct lpfc_hba *phba;
- struct lpfc_cqe *temp_qe;
struct lpfc_register doorbell;
/* sanity check on queue memory */
- if (unlikely(!q))
- return 0;
- phba = q->phba;
-
- /* while there are valid entries */
- while (q->hba_index != q->host_index) {
- if (!phba->sli4_hba.pc_sli4_params.cqav) {
- temp_qe = q->qe[q->host_index].cqe;
- bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
- }
- released++;
- q->host_index = ((q->host_index + 1) % q->entry_count);
- }
- if (unlikely(released == 0 && !arm))
- return 0;
+ if (unlikely(!q || (count == 0 && !arm)))
+ return;
/* ring doorbell for number popped */
doorbell.word0 = 0;
if (arm)
bf_set(lpfc_eqcq_doorbell_arm, &doorbell, 1);
- bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, released);
+ bf_set(lpfc_eqcq_doorbell_num_released, &doorbell, count);
bf_set(lpfc_eqcq_doorbell_qt, &doorbell, LPFC_QUEUE_TYPE_COMPLETION);
bf_set(lpfc_eqcq_doorbell_cqid_hi, &doorbell,
(q->queue_id >> LPFC_CQID_HI_FIELD_SHIFT));
bf_set(lpfc_eqcq_doorbell_cqid_lo, &doorbell, q->queue_id);
writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
- return released;
}
/**
- * lpfc_sli4_if6_cq_release - Indicates the host has finished processing a CQ
+ * lpfc_sli4_if6_write_cq_db - write cq DB for entries consumed or arm state.
+ * @phba: the adapter with the CQ
* @q: The Completion Queue that the host has completed processing for.
+ * @count: the number of elements that were consumed
* @arm: Indicates whether the host wants to arms this CQ.
*
- * This routine will mark all Completion queue entries on @q, from the last
- * known completed entry to the last entry that was processed, as completed
- * by clearing the valid bit for each completion queue entry. Then it will
- * notify the HBA, by ringing the doorbell, that the CQEs have been processed.
- * The internal host index in the @q will be updated by this routine to indicate
- * that the host has finished processing the entries. The @arm parameter
- * indicates that the queue should be rearmed when ringing the doorbell.
- *
- * This function will return the number of CQEs that were released.
+ * This routine will notify the HBA, by ringing the doorbell, that the
+ * CQEs have been processed. The @arm parameter specifies whether the
+ * queue should be rearmed when ringing the doorbell.
**/
-uint32_t
-lpfc_sli4_if6_cq_release(struct lpfc_queue *q, bool arm)
+void
+lpfc_sli4_if6_write_cq_db(struct lpfc_hba *phba, struct lpfc_queue *q,
+ uint32_t count, bool arm)
{
- uint32_t released = 0;
- struct lpfc_hba *phba;
- struct lpfc_cqe *temp_qe;
struct lpfc_register doorbell;
/* sanity check on queue memory */
- if (unlikely(!q))
- return 0;
- phba = q->phba;
-
- /* while there are valid entries */
- while (q->hba_index != q->host_index) {
- if (!phba->sli4_hba.pc_sli4_params.cqav) {
- temp_qe = q->qe[q->host_index].cqe;
- bf_set_le32(lpfc_cqe_valid, temp_qe, 0);
- }
- released++;
- q->host_index = ((q->host_index + 1) % q->entry_count);
- }
- if (unlikely(released == 0 && !arm))
- return 0;
+ if (unlikely(!q || (count == 0 && !arm)))
+ return;
/* ring doorbell for number popped */
doorbell.word0 = 0;
if (arm)
bf_set(lpfc_if6_cq_doorbell_arm, &doorbell, 1);
- bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, released);
+ bf_set(lpfc_if6_cq_doorbell_num_released, &doorbell, count);
bf_set(lpfc_if6_cq_doorbell_cqid, &doorbell, q->queue_id);
writel(doorbell.word0, q->phba->sli4_hba.CQDBregaddr);
- return released;
}
/**
@@ -703,15 +686,15 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq,
hq->RQ_buf_posted++;
/* Ring The Header Receive Queue Doorbell */
- if (!(hq->host_index % hq->entry_repost)) {
+ if (!(hq->host_index % hq->notify_interval)) {
doorbell.word0 = 0;
if (hq->db_format == LPFC_DB_RING_FORMAT) {
bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell,
- hq->entry_repost);
+ hq->notify_interval);
bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id);
} else if (hq->db_format == LPFC_DB_LIST_FORMAT) {
bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell,
- hq->entry_repost);
+ hq->notify_interval);
bf_set(lpfc_rq_db_list_fm_index, &doorbell,
hq->host_index);
bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id);
@@ -1025,7 +1008,7 @@ lpfc_test_rrq_active(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
if (!ndlp->active_rrqs_xri_bitmap)
return 0;
if (test_bit(xritag, ndlp->active_rrqs_xri_bitmap))
- return 1;
+ return 1;
else
return 0;
}
@@ -1133,14 +1116,14 @@ __lpfc_sli_get_els_sglq(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq)
struct list_head *lpfc_els_sgl_list = &phba->sli4_hba.lpfc_els_sgl_list;
struct lpfc_sglq *sglq = NULL;
struct lpfc_sglq *start_sglq = NULL;
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_io_buf *lpfc_cmd;
struct lpfc_nodelist *ndlp;
int found = 0;
lockdep_assert_held(&phba->hbalock);
if (piocbq->iocb_flag & LPFC_IO_FCP) {
- lpfc_cmd = (struct lpfc_scsi_buf *) piocbq->context1;
+ lpfc_cmd = (struct lpfc_io_buf *) piocbq->context1;
ndlp = lpfc_cmd->rdata->pnode;
} else if ((piocbq->iocb.ulpCommand == CMD_GEN_REQUEST64_CR) &&
!(piocbq->iocb_flag & LPFC_IO_LIBDFC)) {
@@ -1596,6 +1579,7 @@ lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
list_add_tail(&piocb->list, &pring->txcmplq);
piocb->iocb_flag |= LPFC_IO_ON_TXCMPLQ;
+ pring->txcmplq_cnt++;
if ((unlikely(pring->ringno == LPFC_ELS_RING)) &&
(piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
@@ -3008,6 +2992,7 @@ lpfc_sli_iocbq_lookup(struct lpfc_hba *phba,
/* remove from txcmpl queue list */
list_del_init(&cmd_iocb->list);
cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
+ pring->txcmplq_cnt--;
return cmd_iocb;
}
}
@@ -3045,6 +3030,7 @@ lpfc_sli_iocbq_lookup_by_tag(struct lpfc_hba *phba,
/* remove from txcmpl queue list */
list_del_init(&cmd_iocb->list);
cmd_iocb->iocb_flag &= ~LPFC_IO_ON_TXCMPLQ;
+ pring->txcmplq_cnt--;
return cmd_iocb;
}
}
@@ -3981,8 +3967,8 @@ lpfc_sli_abort_fcp_rings(struct lpfc_hba *phba)
/* Look on all the FCP Rings for the iotag */
if (phba->sli_rev >= LPFC_SLI_REV4) {
- for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
- pring = phba->sli4_hba.fcp_wq[i]->pring;
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
lpfc_sli_abort_iocb_ring(phba, pring);
}
} else {
@@ -4006,12 +3992,13 @@ lpfc_sli_abort_nvme_rings(struct lpfc_hba *phba)
struct lpfc_sli_ring *pring;
uint32_t i;
- if (phba->sli_rev < LPFC_SLI_REV4)
+ if ((phba->sli_rev < LPFC_SLI_REV4) ||
+ !(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
return;
/* Abort all IO on each NVME ring. */
- for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
- pring = phba->sli4_hba.nvme_wq[i]->pring;
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
lpfc_sli_abort_wqe_ring(phba, pring);
}
}
@@ -4044,8 +4031,8 @@ lpfc_sli_flush_fcp_rings(struct lpfc_hba *phba)
/* Look on all the FCP Rings for the iotag */
if (phba->sli_rev >= LPFC_SLI_REV4) {
- for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
- pring = phba->sli4_hba.fcp_wq[i]->pring;
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
spin_lock_irq(&pring->ring_lock);
/* Retrieve everything on txq */
@@ -4110,7 +4097,8 @@ lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
uint32_t i;
struct lpfc_iocbq *piocb, *next_iocb;
- if (phba->sli_rev < LPFC_SLI_REV4)
+ if ((phba->sli_rev < LPFC_SLI_REV4) ||
+ !(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME))
return;
/* Hint to other driver operations that a flush is in progress. */
@@ -4122,8 +4110,8 @@ lpfc_sli_flush_nvme_rings(struct lpfc_hba *phba)
* a local driver reason code. This is a flush so no
* abort exchange to FW.
*/
- for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
- pring = phba->sli4_hba.nvme_wq[i]->pring;
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
spin_lock_irq(&pring->ring_lock);
list_for_each_entry_safe(piocb, next_iocb,
@@ -5564,41 +5552,35 @@ lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba)
{
int qidx;
struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
+ struct lpfc_sli4_hdw_queue *qp;
- sli4_hba->sli4_cq_release(sli4_hba->mbx_cq, LPFC_QUEUE_REARM);
- sli4_hba->sli4_cq_release(sli4_hba->els_cq, LPFC_QUEUE_REARM);
+ sli4_hba->sli4_write_cq_db(phba, sli4_hba->mbx_cq, 0, LPFC_QUEUE_REARM);
+ sli4_hba->sli4_write_cq_db(phba, sli4_hba->els_cq, 0, LPFC_QUEUE_REARM);
if (sli4_hba->nvmels_cq)
- sli4_hba->sli4_cq_release(sli4_hba->nvmels_cq,
- LPFC_QUEUE_REARM);
+ sli4_hba->sli4_write_cq_db(phba, sli4_hba->nvmels_cq, 0,
+ LPFC_QUEUE_REARM);
- if (sli4_hba->fcp_cq)
- for (qidx = 0; qidx < phba->cfg_fcp_io_channel; qidx++)
- sli4_hba->sli4_cq_release(sli4_hba->fcp_cq[qidx],
- LPFC_QUEUE_REARM);
-
- if (sli4_hba->nvme_cq)
- for (qidx = 0; qidx < phba->cfg_nvme_io_channel; qidx++)
- sli4_hba->sli4_cq_release(sli4_hba->nvme_cq[qidx],
- LPFC_QUEUE_REARM);
-
- if (phba->cfg_fof)
- sli4_hba->sli4_cq_release(sli4_hba->oas_cq, LPFC_QUEUE_REARM);
+ qp = sli4_hba->hdwq;
+ if (sli4_hba->hdwq) {
+ for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) {
+ sli4_hba->sli4_write_cq_db(phba, qp[qidx].fcp_cq, 0,
+ LPFC_QUEUE_REARM);
+ sli4_hba->sli4_write_cq_db(phba, qp[qidx].nvme_cq, 0,
+ LPFC_QUEUE_REARM);
+ }
- if (sli4_hba->hba_eq)
- for (qidx = 0; qidx < phba->io_channel_irqs; qidx++)
- sli4_hba->sli4_eq_release(sli4_hba->hba_eq[qidx],
- LPFC_QUEUE_REARM);
+ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++)
+ sli4_hba->sli4_write_eq_db(phba, qp[qidx].hba_eq,
+ 0, LPFC_QUEUE_REARM);
+ }
if (phba->nvmet_support) {
for (qidx = 0; qidx < phba->cfg_nvmet_mrq; qidx++) {
- sli4_hba->sli4_cq_release(
- sli4_hba->nvmet_cqset[qidx],
+ sli4_hba->sli4_write_cq_db(phba,
+ sli4_hba->nvmet_cqset[qidx], 0,
LPFC_QUEUE_REARM);
}
}
-
- if (phba->cfg_fof)
- sli4_hba->sli4_eq_release(sli4_hba->fof_eq, LPFC_QUEUE_REARM);
}
/**
@@ -6027,11 +6009,8 @@ lpfc_sli4_alloc_extent(struct lpfc_hba *phba, uint16_t type)
list_add_tail(&rsrc_blks->list, ext_blk_list);
rsrc_start = rsrc_id;
if ((type == LPFC_RSC_TYPE_FCOE_XRI) && (j == 0)) {
- phba->sli4_hba.scsi_xri_start = rsrc_start +
+ phba->sli4_hba.io_xri_start = rsrc_start +
lpfc_sli4_get_iocb_cnt(phba);
- phba->sli4_hba.nvme_xri_start =
- phba->sli4_hba.scsi_xri_start +
- phba->sli4_hba.scsi_xri_max;
}
while (rsrc_id < (rsrc_start + rsrc_size)) {
@@ -7056,6 +7035,38 @@ lpfc_sli4_repost_sgl_list(struct lpfc_hba *phba,
return total_cnt;
}
+/**
+ * lpfc_sli4_repost_io_sgl_list - Repost all the allocated nvme buffer sgls
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine walks the list of nvme buffers that have been allocated and
+ * repost them to the port by using SGL block post. This is needed after a
+ * pci_function_reset/warm_start or start. The lpfc_hba_down_post_s4 routine
+ * is responsible for moving all nvme buffers on the lpfc_abts_nvme_sgl_list
+ * to the lpfc_io_buf_list. If the repost fails, reject all nvme buffers.
+ *
+ * Returns: 0 = success, non-zero failure.
+ **/
+int
+lpfc_sli4_repost_io_sgl_list(struct lpfc_hba *phba)
+{
+ LIST_HEAD(post_nblist);
+ int num_posted, rc = 0;
+
+ /* get all NVME buffers need to repost to a local list */
+ lpfc_io_buf_flush(phba, &post_nblist);
+
+ /* post the list of nvme buffer sgls to port if available */
+ if (!list_empty(&post_nblist)) {
+ num_posted = lpfc_sli4_post_io_sgl_list(
+ phba, &post_nblist, phba->sli4_hba.io_xri_cnt);
+ /* failed to post any nvme buffer, return error */
+ if (num_posted == 0)
+ rc = -EIO;
+ }
+ return rc;
+}
+
void
lpfc_set_host_data(struct lpfc_hba *phba, LPFC_MBOXQ_t *mbox)
{
@@ -7144,7 +7155,7 @@ lpfc_post_rq_buffer(struct lpfc_hba *phba, struct lpfc_queue *hrq,
int
lpfc_sli4_hba_setup(struct lpfc_hba *phba)
{
- int rc, i, cnt;
+ int rc, i, cnt, len;
LPFC_MBOXQ_t *mboxq;
struct lpfc_mqe *mqe;
uint8_t *vpd;
@@ -7517,24 +7528,26 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
/* We need 1 iocbq for every SGL, for IO processing */
cnt += phba->sli4_hba.nvmet_xri_cnt;
} else {
- /* update host scsi xri-sgl sizes and mappings */
- rc = lpfc_sli4_scsi_sgl_update(phba);
+ /* update host common xri-sgl sizes and mappings */
+ rc = lpfc_sli4_io_sgl_update(phba);
if (unlikely(rc)) {
lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
- "6309 Failed to update scsi-sgl size "
+ "6082 Failed to update nvme-sgl size "
"and mapping: %d\n", rc);
goto out_destroy_queue;
}
- /* update host nvme xri-sgl sizes and mappings */
- rc = lpfc_sli4_nvme_sgl_update(phba);
+ /* register the allocated common sgl pool to the port */
+ rc = lpfc_sli4_repost_io_sgl_list(phba);
if (unlikely(rc)) {
lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
- "6082 Failed to update nvme-sgl size "
- "and mapping: %d\n", rc);
+ "6116 Error %d during nvme sgl post "
+ "operation\n", rc);
+ /* Some NVME buffers were moved to abort nvme list */
+ /* A pci function reset will repost them */
+ rc = -ENODEV;
goto out_destroy_queue;
}
-
cnt = phba->cfg_iocb_cnt * 1024;
}
@@ -7571,36 +7584,6 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
}
}
- if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) {
- /* register the allocated scsi sgl pool to the port */
- rc = lpfc_sli4_repost_scsi_sgl_list(phba);
- if (unlikely(rc)) {
- lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
- "0383 Error %d during scsi sgl post "
- "operation\n", rc);
- /* Some Scsi buffers were moved to abort scsi list */
- /* A pci function reset will repost them */
- rc = -ENODEV;
- goto out_destroy_queue;
- }
- }
-
- if ((phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) &&
- (phba->nvmet_support == 0)) {
-
- /* register the allocated nvme sgl pool to the port */
- rc = lpfc_repost_nvme_sgl_list(phba);
- if (unlikely(rc)) {
- lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
- "6116 Error %d during nvme sgl post "
- "operation\n", rc);
- /* Some NVME buffers were moved to abort nvme list */
- /* A pci function reset will repost them */
- rc = -ENODEV;
- goto out_destroy_queue;
- }
- }
-
/* Post the rpi header region to the device. */
rc = lpfc_sli4_post_all_rpi_hdrs(phba);
if (unlikely(rc)) {
@@ -7650,6 +7633,25 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
lpfc_sli_read_link_ste(phba);
}
+ /* Don't post more new bufs if repost already recovered
+ * the nvme sgls.
+ */
+ if (phba->nvmet_support == 0) {
+ if (phba->sli4_hba.io_xri_cnt == 0) {
+ len = lpfc_new_io_buf(
+ phba, phba->sli4_hba.io_xri_max);
+ if (len == 0) {
+ rc = -ENOMEM;
+ goto out_unset_queue;
+ }
+
+ if (phba->cfg_xri_rebalancing)
+ lpfc_create_multixri_pools(phba);
+ }
+ } else {
+ phba->cfg_xri_rebalancing = 0;
+ }
+
/* Arm the CQs and then EQs on device */
lpfc_sli4_arm_cqeq_intr(phba);
@@ -7678,6 +7680,11 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
phba->hb_outstanding = 0;
phba->last_completion_time = jiffies;
+ /* start eq_delay heartbeat */
+ if (phba->cfg_auto_imax)
+ queue_delayed_work(phba->wq, &phba->eq_delay_work,
+ msecs_to_jiffies(LPFC_EQ_DELAY_MSECS));
+
/* Start error attention (ERATT) polling timer */
mod_timer(&phba->eratt_poll,
jiffies + msecs_to_jiffies(1000 * phba->eratt_poll_interval));
@@ -7729,18 +7736,21 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba)
lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_SLI,
"3104 Adapter failed to issue "
"DOWN_LINK mbox cmd, rc:x%x\n", rc);
- goto out_unset_queue;
+ goto out_io_buff_free;
}
} else if (phba->cfg_suppress_link_up == LPFC_INITIALIZE_LINK) {
/* don't perform init_link on SLI4 FC port loopback test */
if (!(phba->link_flag & LS_LOOPBACK_MODE)) {
rc = phba->lpfc_hba_init_link(phba, MBX_NOWAIT);
if (rc)
- goto out_unset_queue;
+ goto out_io_buff_free;
}
}
mempool_free(mboxq, phba->mbox_mem_pool);
return rc;
+out_io_buff_free:
+ /* Free allocated IO Buffers */
+ lpfc_io_free(phba);
out_unset_queue:
/* Unset all the queues set up in this routine when error out */
lpfc_sli4_queue_unset(phba);
@@ -7846,7 +7856,6 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba;
uint32_t eqidx;
struct lpfc_queue *fpeq = NULL;
- struct lpfc_eqe *eqe;
bool mbox_pending;
if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4))
@@ -7854,11 +7863,11 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
/* Find the eq associated with the mcq */
- if (sli4_hba->hba_eq)
- for (eqidx = 0; eqidx < phba->io_channel_irqs; eqidx++)
- if (sli4_hba->hba_eq[eqidx]->queue_id ==
+ if (sli4_hba->hdwq)
+ for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++)
+ if (sli4_hba->hdwq[eqidx].hba_eq->queue_id ==
sli4_hba->mbx_cq->assoc_qid) {
- fpeq = sli4_hba->hba_eq[eqidx];
+ fpeq = sli4_hba->hdwq[eqidx].hba_eq;
break;
}
if (!fpeq)
@@ -7880,14 +7889,11 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba)
*/
if (mbox_pending)
- while ((eqe = lpfc_sli4_eq_get(fpeq))) {
- lpfc_sli4_hba_handle_eqe(phba, eqe, eqidx);
- fpeq->EQ_processed++;
- }
-
- /* Always clear and re-arm the EQ */
-
- sli4_hba->sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+ /* process and rearm the EQ */
+ lpfc_sli4_process_eq(phba, fpeq);
+ else
+ /* Always clear and re-arm the EQ */
+ sli4_hba->sli4_write_eq_db(phba, fpeq, 0, LPFC_QUEUE_REARM);
return mbox_pending;
@@ -8557,7 +8563,6 @@ lpfc_sli4_post_sync_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
rc = lpfc_sli4_wait_bmbx_ready(phba, mboxq);
if (rc)
goto exit;
-
/*
* Initialize the bootstrap memory region to avoid stale data areas
* in the mailbox post. Then copy the caller's mailbox contents to
@@ -9476,7 +9481,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
bf_set(wqe_pbde, &wqe->fcp_iwrite.wqe_com, 0);
if (phba->fcp_embed_io) {
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_io_buf *lpfc_cmd;
struct sli4_sge *sgl;
struct fcp_cmnd *fcp_cmnd;
uint32_t *ptr;
@@ -9484,7 +9489,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
/* 128 byte wqe support here */
lpfc_cmd = iocbq->context1;
- sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+ sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
fcp_cmnd = lpfc_cmd->fcp_cmnd;
/* Word 0-2 - FCP_CMND */
@@ -9540,7 +9545,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
bf_set(wqe_pbde, &wqe->fcp_iread.wqe_com, 0);
if (phba->fcp_embed_io) {
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_io_buf *lpfc_cmd;
struct sli4_sge *sgl;
struct fcp_cmnd *fcp_cmnd;
uint32_t *ptr;
@@ -9548,7 +9553,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
/* 128 byte wqe support here */
lpfc_cmd = iocbq->context1;
- sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+ sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
fcp_cmnd = lpfc_cmd->fcp_cmnd;
/* Word 0-2 - FCP_CMND */
@@ -9597,7 +9602,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
/* Note, word 10 is already initialized to 0 */
if (phba->fcp_embed_io) {
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_io_buf *lpfc_cmd;
struct sli4_sge *sgl;
struct fcp_cmnd *fcp_cmnd;
uint32_t *ptr;
@@ -9605,7 +9610,7 @@ lpfc_sli4_iocb2wqe(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq,
/* 128 byte wqe support here */
lpfc_cmd = iocbq->context1;
- sgl = (struct sli4_sge *)lpfc_cmd->fcp_bpl;
+ sgl = (struct sli4_sge *)lpfc_cmd->dma_sgl;
fcp_cmnd = lpfc_cmd->fcp_cmnd;
/* Word 0-2 - FCP_CMND */
@@ -9864,10 +9869,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number,
/* Get the WQ */
if ((piocb->iocb_flag & LPFC_IO_FCP) ||
(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
- if (!phba->cfg_fof || (!(piocb->iocb_flag & LPFC_IO_OAS)))
- wq = phba->sli4_hba.fcp_wq[piocb->hba_wqidx];
- else
- wq = phba->sli4_hba.oas_wq;
+ wq = phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq;
} else {
wq = phba->sli4_hba.els_wq;
}
@@ -10001,29 +10003,20 @@ lpfc_sli_api_table_setup(struct lpfc_hba *phba, uint8_t dev_grp)
struct lpfc_sli_ring *
lpfc_sli4_calc_ring(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
{
+ struct lpfc_io_buf *lpfc_cmd;
+
if (piocb->iocb_flag & (LPFC_IO_FCP | LPFC_USE_FCPWQIDX)) {
- if (!(phba->cfg_fof) ||
- (!(piocb->iocb_flag & LPFC_IO_FOF))) {
- if (unlikely(!phba->sli4_hba.fcp_wq))
- return NULL;
- /*
- * for abort iocb hba_wqidx should already
- * be setup based on what work queue we used.
- */
- if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
- piocb->hba_wqidx =
- lpfc_sli4_scmd_to_wqidx_distr(phba,
- piocb->context1);
- piocb->hba_wqidx = piocb->hba_wqidx %
- phba->cfg_fcp_io_channel;
- }
- return phba->sli4_hba.fcp_wq[piocb->hba_wqidx]->pring;
- } else {
- if (unlikely(!phba->sli4_hba.oas_wq))
- return NULL;
- piocb->hba_wqidx = 0;
- return phba->sli4_hba.oas_wq->pring;
+ if (unlikely(!phba->sli4_hba.hdwq))
+ return NULL;
+ /*
+ * for abort iocb hba_wqidx should already
+ * be setup based on what work queue we used.
+ */
+ if (!(piocb->iocb_flag & LPFC_USE_FCPWQIDX)) {
+ lpfc_cmd = (struct lpfc_io_buf *)piocb->context1;
+ piocb->hba_wqidx = lpfc_cmd->hdwq_no;
}
+ return phba->sli4_hba.hdwq[piocb->hba_wqidx].fcp_wq->pring;
} else {
if (unlikely(!phba->sli4_hba.els_wq))
return NULL;
@@ -10049,12 +10042,9 @@ int
lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
struct lpfc_iocbq *piocb, uint32_t flag)
{
- struct lpfc_hba_eq_hdl *hba_eq_hdl;
struct lpfc_sli_ring *pring;
- struct lpfc_queue *fpeq;
- struct lpfc_eqe *eqe;
unsigned long iflags;
- int rc, idx;
+ int rc;
if (phba->sli_rev == LPFC_SLI_REV4) {
pring = lpfc_sli4_calc_ring(phba, piocb);
@@ -10064,34 +10054,6 @@ lpfc_sli_issue_iocb(struct lpfc_hba *phba, uint32_t ring_number,
spin_lock_irqsave(&pring->ring_lock, iflags);
rc = __lpfc_sli_issue_iocb(phba, ring_number, piocb, flag);
spin_unlock_irqrestore(&pring->ring_lock, iflags);
-
- if (lpfc_fcp_look_ahead && (piocb->iocb_flag & LPFC_IO_FCP)) {
- idx = piocb->hba_wqidx;
- hba_eq_hdl = &phba->sli4_hba.hba_eq_hdl[idx];
-
- if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use)) {
-
- /* Get associated EQ with this index */
- fpeq = phba->sli4_hba.hba_eq[idx];
-
- /* Turn off interrupts from this EQ */
- phba->sli4_hba.sli4_eq_clr_intr(fpeq);
-
- /*
- * Process all the events on FCP EQ
- */
- while ((eqe = lpfc_sli4_eq_get(fpeq))) {
- lpfc_sli4_hba_handle_eqe(phba,
- eqe, idx);
- fpeq->EQ_processed++;
- }
-
- /* Always clear and re-arm the EQ */
- phba->sli4_hba.sli4_eq_release(fpeq,
- LPFC_QUEUE_REARM);
- }
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
- }
} else {
/* For now, SLI2/3 will still use hbalock */
spin_lock_irqsave(&phba->hbalock, iflags);
@@ -10506,19 +10468,11 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
INIT_LIST_HEAD(&psli->mboxq);
INIT_LIST_HEAD(&psli->mboxq_cmpl);
/* Initialize list headers for txq and txcmplq as double linked lists */
- for (i = 0; i < phba->cfg_fcp_io_channel; i++) {
- pring = phba->sli4_hba.fcp_wq[i]->pring;
- pring->flag = 0;
- pring->ringno = LPFC_FCP_RING;
- INIT_LIST_HEAD(&pring->txq);
- INIT_LIST_HEAD(&pring->txcmplq);
- INIT_LIST_HEAD(&pring->iocb_continueq);
- spin_lock_init(&pring->ring_lock);
- }
- for (i = 0; i < phba->cfg_nvme_io_channel; i++) {
- pring = phba->sli4_hba.nvme_wq[i]->pring;
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ pring = phba->sli4_hba.hdwq[i].fcp_wq->pring;
pring->flag = 0;
pring->ringno = LPFC_FCP_RING;
+ pring->txcmplq_cnt = 0;
INIT_LIST_HEAD(&pring->txq);
INIT_LIST_HEAD(&pring->txcmplq);
INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -10527,25 +10481,27 @@ lpfc_sli4_queue_init(struct lpfc_hba *phba)
pring = phba->sli4_hba.els_wq->pring;
pring->flag = 0;
pring->ringno = LPFC_ELS_RING;
+ pring->txcmplq_cnt = 0;
INIT_LIST_HEAD(&pring->txq);
INIT_LIST_HEAD(&pring->txcmplq);
INIT_LIST_HEAD(&pring->iocb_continueq);
spin_lock_init(&pring->ring_lock);
- if (phba->cfg_nvme_io_channel) {
+ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) {
+ for (i = 0; i < phba->cfg_hdw_queue; i++) {
+ pring = phba->sli4_hba.hdwq[i].nvme_wq->pring;
+ pring->flag = 0;
+ pring->ringno = LPFC_FCP_RING;
+ pring->txcmplq_cnt = 0;
+ INIT_LIST_HEAD(&pring->txq);
+ INIT_LIST_HEAD(&pring->txcmplq);
+ INIT_LIST_HEAD(&pring->iocb_continueq);
+ spin_lock_init(&pring->ring_lock);
+ }
pring = phba->sli4_hba.nvmels_wq->pring;
pring->flag = 0;
pring->ringno = LPFC_ELS_RING;
- INIT_LIST_HEAD(&pring->txq);
- INIT_LIST_HEAD(&pring->txcmplq);
- INIT_LIST_HEAD(&pring->iocb_continueq);
- spin_lock_init(&pring->ring_lock);
- }
-
- if (phba->cfg_fof) {
- pring = phba->sli4_hba.oas_wq->pring;
- pring->flag = 0;
- pring->ringno = LPFC_FCP_RING;
+ pring->txcmplq_cnt = 0;
INIT_LIST_HEAD(&pring->txq);
INIT_LIST_HEAD(&pring->txcmplq);
INIT_LIST_HEAD(&pring->iocb_continueq);
@@ -11327,6 +11283,7 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
struct lpfc_iocbq *abtsiocbp;
union lpfc_wqe128 *abts_wqe;
int retval;
+ int idx = cmdiocb->hba_wqidx;
/*
* There are certain command types we don't want to abort. And we
@@ -11382,7 +11339,8 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
abtsiocbp->iocb_flag |= LPFC_IO_NVME;
abtsiocbp->vport = vport;
abtsiocbp->wqe_cmpl = lpfc_nvme_abort_fcreq_cmpl;
- retval = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abtsiocbp);
+ retval = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[idx],
+ abtsiocbp);
if (retval) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME,
"6147 Failed abts issue_wqe with status x%x "
@@ -11457,7 +11415,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
uint16_t tgt_id, uint64_t lun_id,
lpfc_ctx_cmd ctx_cmd)
{
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_io_buf *lpfc_cmd;
int rc = 1;
if (iocbq->vport != vport)
@@ -11467,7 +11425,7 @@ lpfc_sli_validate_fcp_iocb(struct lpfc_iocbq *iocbq, struct lpfc_vport *vport,
!(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ))
return rc;
- lpfc_cmd = container_of(iocbq, struct lpfc_scsi_buf, cur_iocbq);
+ lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
if (lpfc_cmd->pCmd == NULL)
return rc;
@@ -11694,14 +11652,14 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
uint16_t tgt_id, uint64_t lun_id, lpfc_ctx_cmd cmd)
{
struct lpfc_hba *phba = vport->phba;
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_io_buf *lpfc_cmd;
struct lpfc_iocbq *abtsiocbq;
struct lpfc_nodelist *ndlp;
struct lpfc_iocbq *iocbq;
IOCB_t *icmd;
int sum, i, ret_val;
unsigned long iflags;
- struct lpfc_sli_ring *pring_s4;
+ struct lpfc_sli_ring *pring_s4 = NULL;
spin_lock_irqsave(&phba->hbalock, iflags);
@@ -11719,17 +11677,46 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
cmd) != 0)
continue;
+ /* Guard against IO completion being called at same time */
+ lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
+ spin_lock(&lpfc_cmd->buf_lock);
+
+ if (!lpfc_cmd->pCmd) {
+ spin_unlock(&lpfc_cmd->buf_lock);
+ continue;
+ }
+
+ if (phba->sli_rev == LPFC_SLI_REV4) {
+ pring_s4 =
+ phba->sli4_hba.hdwq[iocbq->hba_wqidx].fcp_wq->pring;
+ if (!pring_s4) {
+ spin_unlock(&lpfc_cmd->buf_lock);
+ continue;
+ }
+ /* Note: both hbalock and ring_lock must be set here */
+ spin_lock(&pring_s4->ring_lock);
+ }
+
/*
* If the iocbq is already being aborted, don't take a second
* action, but do count it.
*/
- if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+ if ((iocbq->iocb_flag & LPFC_DRIVER_ABORTED) ||
+ !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ)) {
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ spin_unlock(&pring_s4->ring_lock);
+ spin_unlock(&lpfc_cmd->buf_lock);
continue;
+ }
/* issue ABTS for this IOCB based on iotag */
abtsiocbq = __lpfc_sli_get_iocbq(phba);
- if (abtsiocbq == NULL)
+ if (!abtsiocbq) {
+ if (phba->sli_rev == LPFC_SLI_REV4)
+ spin_unlock(&pring_s4->ring_lock);
+ spin_unlock(&lpfc_cmd->buf_lock);
continue;
+ }
icmd = &iocbq->iocb;
abtsiocbq->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
@@ -11750,7 +11737,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
if (iocbq->iocb_flag & LPFC_IO_FOF)
abtsiocbq->iocb_flag |= LPFC_IO_FOF;
- lpfc_cmd = container_of(iocbq, struct lpfc_scsi_buf, cur_iocbq);
ndlp = lpfc_cmd->rdata->pnode;
if (lpfc_is_link_up(phba) &&
@@ -11769,11 +11755,6 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
if (phba->sli_rev == LPFC_SLI_REV4) {
- pring_s4 = lpfc_sli4_calc_ring(phba, abtsiocbq);
- if (!pring_s4)
- continue;
- /* Note: both hbalock and ring_lock must be set here */
- spin_lock(&pring_s4->ring_lock);
ret_val = __lpfc_sli_issue_iocb(phba, pring_s4->ringno,
abtsiocbq, 0);
spin_unlock(&pring_s4->ring_lock);
@@ -11782,6 +11763,7 @@ lpfc_sli_abort_taskmgmt(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
abtsiocbq, 0);
}
+ spin_unlock(&lpfc_cmd->buf_lock);
if (ret_val == IOCB_ERROR)
__lpfc_sli_release_iocbq(phba, abtsiocbq);
@@ -11816,7 +11798,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
{
wait_queue_head_t *pdone_q;
unsigned long iflags;
- struct lpfc_scsi_buf *lpfc_cmd;
+ struct lpfc_io_buf *lpfc_cmd;
spin_lock_irqsave(&phba->hbalock, iflags);
if (cmdiocbq->iocb_flag & LPFC_IO_WAKE_TMO) {
@@ -11845,7 +11827,7 @@ lpfc_sli_wake_iocb_wait(struct lpfc_hba *phba,
/* Set the exchange busy flag for task management commands */
if ((cmdiocbq->iocb_flag & LPFC_IO_FCP) &&
!(cmdiocbq->iocb_flag & LPFC_IO_LIBDFC)) {
- lpfc_cmd = container_of(cmdiocbq, struct lpfc_scsi_buf,
+ lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf,
cur_iocbq);
lpfc_cmd->exch_busy = rspiocbq->iocb_flag & LPFC_EXCHANGE_BUSY;
}
@@ -12919,35 +12901,6 @@ lpfc_sli_intr_handler(int irq, void *dev_id)
} /* lpfc_sli_intr_handler */
/**
- * lpfc_sli4_fcp_xri_abort_event_proc - Process fcp xri abort event
- * @phba: pointer to lpfc hba data structure.
- *
- * This routine is invoked by the worker thread to process all the pending
- * SLI4 FCP abort XRI events.
- **/
-void lpfc_sli4_fcp_xri_abort_event_proc(struct lpfc_hba *phba)
-{
- struct lpfc_cq_event *cq_event;
-
- /* First, declare the fcp xri abort event has been handled */
- spin_lock_irq(&phba->hbalock);
- phba->hba_flag &= ~FCP_XRI_ABORT_EVENT;
- spin_unlock_irq(&phba->hbalock);
- /* Now, handle all the fcp xri abort events */
- while (!list_empty(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue)) {
- /* Get the first event from the head of the event queue */
- spin_lock_irq(&phba->hbalock);
- list_remove_head(&phba->sli4_hba.sp_fcp_xri_aborted_work_queue,
- cq_event, struct lpfc_cq_event, list);
- spin_unlock_irq(&phba->hbalock);
- /* Notify aborted XRI for FCP work queue */
- lpfc_sli4_fcp_xri_aborted(phba, &cq_event->cqe.wcqe_axri);
- /* Free the event processed back to the free pool */
- lpfc_sli4_cq_event_release(phba, cq_event);
- }
-}
-
-/**
* lpfc_sli4_els_xri_abort_event_proc - Process els xri abort event
* @phba: pointer to lpfc hba data structure.
*
@@ -13320,11 +13273,14 @@ out_no_mqe_complete:
* Return: true if work posted to worker thread, otherwise false.
**/
static bool
-lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_cqe *cqe)
+lpfc_sli4_sp_handle_mcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
+ struct lpfc_cqe *cqe)
{
struct lpfc_mcqe mcqe;
bool workposted;
+ cq->CQ_mbox++;
+
/* Copy the mailbox MCQE and convert endian order as needed */
lpfc_sli4_pcimem_bcopy(cqe, &mcqe, sizeof(struct lpfc_mcqe));
@@ -13443,17 +13399,8 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
switch (cq->subtype) {
case LPFC_FCP:
- cq_event = lpfc_cq_event_setup(
- phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
- if (!cq_event)
- return false;
- spin_lock_irqsave(&phba->hbalock, iflags);
- list_add_tail(&cq_event->list,
- &phba->sli4_hba.sp_fcp_xri_aborted_work_queue);
- /* Set the fcp xri abort event flag */
- phba->hba_flag |= FCP_XRI_ABORT_EVENT;
- spin_unlock_irqrestore(&phba->hbalock, iflags);
- workposted = true;
+ lpfc_sli4_fcp_xri_aborted(phba, wcqe, cq->hdwq);
+ workposted = false;
break;
case LPFC_NVME_LS: /* NVME LS uses ELS resources */
case LPFC_ELS:
@@ -13461,6 +13408,7 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
phba, wcqe, sizeof(struct sli4_wcqe_xri_aborted));
if (!cq_event)
return false;
+ cq_event->hdwq = cq->hdwq;
spin_lock_irqsave(&phba->hbalock, iflags);
list_add_tail(&cq_event->list,
&phba->sli4_hba.sp_els_xri_aborted_work_queue);
@@ -13474,7 +13422,7 @@ lpfc_sli4_sp_handle_abort_xri_wcqe(struct lpfc_hba *phba,
if (phba->nvmet_support)
lpfc_sli4_nvmet_xri_aborted(phba, wcqe);
else
- lpfc_sli4_nvme_xri_aborted(phba, wcqe);
+ lpfc_sli4_nvme_xri_aborted(phba, wcqe, cq->hdwq);
workposted = false;
break;
@@ -13592,7 +13540,7 @@ out:
* lpfc_sli4_sp_handle_cqe - Process a slow path completion queue entry
* @phba: Pointer to HBA context object.
* @cq: Pointer to the completion queue.
- * @wcqe: Pointer to a completion queue entry.
+ * @cqe: Pointer to a completion queue entry.
*
* This routine process a slow-path work-queue or receive queue completion queue
* entry.
@@ -13684,7 +13632,7 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
/* Save EQ associated with this CQ */
cq->assoc_qp = speq;
- if (!queue_work(phba->wq, &cq->spwork))
+ if (!queue_work_on(cq->chann, phba->wq, &cq->spwork))
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
"0390 Cannot schedule soft IRQ "
"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -13692,60 +13640,129 @@ lpfc_sli4_sp_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
}
/**
- * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * __lpfc_sli4_process_cq - Process elements of a CQ
* @phba: Pointer to HBA context object.
+ * @cq: Pointer to CQ to be processed
+ * @handler: Routine to process each cqe
+ * @delay: Pointer to usdelay to set in case of rescheduling of the handler
*
- * This routine process a event queue entry from the slow-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on that completion queue, rearm the
- * completion queue, and then return.
+ * This routine processes completion queue entries in a CQ. While a valid
+ * queue element is found, the handler is called. During processing checks
+ * are made for periodic doorbell writes to let the hardware know of
+ * element consumption.
+ *
+ * If the max limit on cqes to process is hit, or there are no more valid
+ * entries, the loop stops. If we processed a sufficient number of elements,
+ * meaning there is sufficient load, rather than rearming and generating
+ * another interrupt, a cq rescheduling delay will be set. A delay of 0
+ * indicates no rescheduling.
*
+ * Returns True if work scheduled, False otherwise.
**/
-static void
-lpfc_sli4_sp_process_cq(struct work_struct *work)
+static bool
+__lpfc_sli4_process_cq(struct lpfc_hba *phba, struct lpfc_queue *cq,
+ bool (*handler)(struct lpfc_hba *, struct lpfc_queue *,
+ struct lpfc_cqe *), unsigned long *delay)
{
- struct lpfc_queue *cq =
- container_of(work, struct lpfc_queue, spwork);
- struct lpfc_hba *phba = cq->phba;
struct lpfc_cqe *cqe;
bool workposted = false;
- int ccount = 0;
+ int count = 0, consumed = 0;
+ bool arm = true;
+
+ /* default - no reschedule */
+ *delay = 0;
+
+ if (cmpxchg(&cq->queue_claimed, 0, 1) != 0)
+ goto rearm_and_exit;
/* Process all the entries to the CQ */
+ cqe = lpfc_sli4_cq_get(cq);
+ while (cqe) {
+#if defined(CONFIG_SCSI_LPFC_DEBUG_FS) && defined(BUILD_NVME)
+ if (phba->ktime_on)
+ cq->isr_timestamp = ktime_get_ns();
+ else
+ cq->isr_timestamp = 0;
+#endif
+ workposted |= handler(phba, cq, cqe);
+ __lpfc_sli4_consume_cqe(phba, cq, cqe);
+
+ consumed++;
+ if (!(++count % cq->max_proc_limit))
+ break;
+
+ if (!(count % cq->notify_interval)) {
+ phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+ LPFC_QUEUE_NOARM);
+ consumed = 0;
+ }
+
+ cqe = lpfc_sli4_cq_get(cq);
+ }
+ if (count >= phba->cfg_cq_poll_threshold) {
+ *delay = 1;
+ arm = false;
+ }
+
+ /* Track the max number of CQEs processed in 1 EQ */
+ if (count > cq->CQ_max_cqe)
+ cq->CQ_max_cqe = count;
+
+ cq->assoc_qp->EQ_cqe_cnt += count;
+
+ /* Catch the no cq entry condition */
+ if (unlikely(count == 0))
+ lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ "0369 No entry from completion queue "
+ "qid=%d\n", cq->queue_id);
+
+ cq->queue_claimed = 0;
+
+rearm_and_exit:
+ phba->sli4_hba.sli4_write_cq_db(phba, cq, consumed,
+ arm ? LPFC_QUEUE_REARM : LPFC_QUEUE_NOARM);
+
+ return workposted;
+}
+
+/**
+ * lpfc_sli4_sp_process_cq - Process a slow-path event queue entry
+ * @cq: pointer to CQ to process
+ *
+ * This routine calls the cq processing routine with a handler specific
+ * to the type of queue bound to it.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
+ **/
+static void
+__lpfc_sli4_sp_process_cq(struct lpfc_queue *cq)
+{
+ struct lpfc_hba *phba = cq->phba;
+ unsigned long delay;
+ bool workposted = false;
+
+ /* Process and rearm the CQ */
switch (cq->type) {
case LPFC_MCQ:
- while ((cqe = lpfc_sli4_cq_get(cq))) {
- workposted |= lpfc_sli4_sp_handle_mcqe(phba, cqe);
- if (!(++ccount % cq->entry_repost))
- break;
- cq->CQ_mbox++;
- }
+ workposted |= __lpfc_sli4_process_cq(phba, cq,
+ lpfc_sli4_sp_handle_mcqe,
+ &delay);
break;
case LPFC_WCQ:
- while ((cqe = lpfc_sli4_cq_get(cq))) {
- if (cq->subtype == LPFC_FCP ||
- cq->subtype == LPFC_NVME) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
- if (phba->ktime_on)
- cq->isr_timestamp = ktime_get_ns();
- else
- cq->isr_timestamp = 0;
-#endif
- workposted |= lpfc_sli4_fp_handle_cqe(phba, cq,
- cqe);
- } else {
- workposted |= lpfc_sli4_sp_handle_cqe(phba, cq,
- cqe);
- }
- if (!(++ccount % cq->entry_repost))
- break;
- }
-
- /* Track the max number of CQEs processed in 1 EQ */
- if (ccount > cq->CQ_max_cqe)
- cq->CQ_max_cqe = ccount;
+ if (cq->subtype == LPFC_FCP || cq->subtype == LPFC_NVME)
+ workposted |= __lpfc_sli4_process_cq(phba, cq,
+ lpfc_sli4_fp_handle_cqe,
+ &delay);
+ else
+ workposted |= __lpfc_sli4_process_cq(phba, cq,
+ lpfc_sli4_sp_handle_cqe,
+ &delay);
break;
default:
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
@@ -13754,14 +13771,14 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
return;
}
- /* Catch the no cq entry condition, log an error */
- if (unlikely(ccount == 0))
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0371 No entry from the CQ: identifier "
- "(x%x), type (%d)\n", cq->queue_id, cq->type);
-
- /* In any case, flash and re-arm the RCQ */
- phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+ if (delay) {
+ if (!queue_delayed_work_on(cq->chann, phba->wq,
+ &cq->sched_spwork, delay))
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "0394 Cannot schedule soft IRQ "
+ "for cqid=%d on CPU %d\n",
+ cq->queue_id, cq->chann);
+ }
/* wake up worker thread if there are works to be done */
if (workposted)
@@ -13769,6 +13786,36 @@ lpfc_sli4_sp_process_cq(struct work_struct *work)
}
/**
+ * lpfc_sli4_sp_process_cq - slow-path work handler when started by
+ * interrupt
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_sp_process_cq(struct work_struct *work)
+{
+ struct lpfc_queue *cq = container_of(work, struct lpfc_queue, spwork);
+
+ __lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
+ * lpfc_sli4_dly_sp_process_cq - slow-path work handler when started by timer
+ * @work: pointer to work element
+ *
+ * translates from the work handler and calls the slow-path handler.
+ **/
+static void
+lpfc_sli4_dly_sp_process_cq(struct work_struct *work)
+{
+ struct lpfc_queue *cq = container_of(to_delayed_work(work),
+ struct lpfc_queue, sched_spwork);
+
+ __lpfc_sli4_sp_process_cq(cq);
+}
+
+/**
* lpfc_sli4_fp_handle_fcp_wcqe - Process fast-path work queue completion entry
* @phba: Pointer to HBA context object.
* @cq: Pointer to associated CQ
@@ -13999,13 +14046,16 @@ out:
/**
* lpfc_sli4_fp_handle_cqe - Process fast-path work queue completion entry
+ * @phba: adapter with cq
* @cq: Pointer to the completion queue.
* @eqe: Pointer to fast-path completion queue entry.
*
* This routine process a fast-path work queue completion entry from fast-path
* event queue for FCP command response completion.
+ *
+ * Return: true if work posted to worker thread, otherwise false.
**/
-static int
+static bool
lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
struct lpfc_cqe *cqe)
{
@@ -14072,10 +14122,11 @@ lpfc_sli4_fp_handle_cqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
* completion queue, and then return.
**/
static void
-lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
- uint32_t qidx)
+lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_queue *eq,
+ struct lpfc_eqe *eqe)
{
struct lpfc_queue *cq = NULL;
+ uint32_t qidx = eq->hdwq;
uint16_t cqid, id;
if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
@@ -14090,6 +14141,14 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
/* Get the reference to the corresponding CQ */
cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
+ /* Use the fast lookup method first */
+ if (cqid <= phba->sli4_hba.cq_max) {
+ cq = phba->sli4_hba.cq_lookup[cqid];
+ if (cq)
+ goto work_cq;
+ }
+
+ /* Next check for NVMET completion */
if (phba->cfg_nvmet_mrq && phba->sli4_hba.nvmet_cqset) {
id = phba->sli4_hba.nvmet_cqset[0]->queue_id;
if ((cqid >= id) && (cqid < (id + phba->cfg_nvmet_mrq))) {
@@ -14099,20 +14158,6 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
}
}
- if (phba->sli4_hba.nvme_cq_map &&
- (cqid == phba->sli4_hba.nvme_cq_map[qidx])) {
- /* Process NVME / NVMET command completion */
- cq = phba->sli4_hba.nvme_cq[qidx];
- goto process_cq;
- }
-
- if (phba->sli4_hba.fcp_cq_map &&
- (cqid == phba->sli4_hba.fcp_cq_map[qidx])) {
- /* Process FCP command completion */
- cq = phba->sli4_hba.fcp_cq[qidx];
- goto process_cq;
- }
-
if (phba->sli4_hba.nvmels_cq &&
(cqid == phba->sli4_hba.nvmels_cq->queue_id)) {
/* Process NVME unsol rcv */
@@ -14121,7 +14166,8 @@ lpfc_sli4_hba_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe,
/* Otherwise this is a Slow path event */
if (cq == NULL) {
- lpfc_sli4_sp_handle_eqe(phba, eqe, phba->sli4_hba.hba_eq[qidx]);
+ lpfc_sli4_sp_handle_eqe(phba, eqe,
+ phba->sli4_hba.hdwq[qidx].hba_eq);
return;
}
@@ -14134,10 +14180,8 @@ process_cq:
return;
}
- /* Save EQ associated with this CQ */
- cq->assoc_qp = phba->sli4_hba.hba_eq[qidx];
-
- if (!queue_work(phba->wq, &cq->irqwork))
+work_cq:
+ if (!queue_work_on(cq->chann, phba->wq, &cq->irqwork))
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
"0363 Cannot schedule soft IRQ "
"for CQ eqcqid=%d, cqid=%d on CPU %d\n",
@@ -14145,219 +14189,73 @@ process_cq:
}
/**
- * lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
- * @phba: Pointer to HBA context object.
- * @eqe: Pointer to fast-path event queue entry.
+ * __lpfc_sli4_hba_process_cq - Process a fast-path event queue entry
+ * @cq: Pointer to CQ to be processed
*
- * This routine process a event queue entry from the fast-path event queue.
- * It will check the MajorCode and MinorCode to determine this is for a
- * completion event on a completion queue, if not, an error shall be logged
- * and just return. Otherwise, it will get to the corresponding completion
- * queue and process all the entries on the completion queue, rearm the
- * completion queue, and then return.
+ * This routine calls the cq processing routine with the handler for
+ * fast path CQEs.
+ *
+ * The CQ routine returns two values: the first is the calling status,
+ * which indicates whether work was queued to the background discovery
+ * thread. If true, the routine should wakeup the discovery thread;
+ * the second is the delay parameter. If non-zero, rather than rearming
+ * the CQ and yet another interrupt, the CQ handler should be queued so
+ * that it is processed in a subsequent polling action. The value of
+ * the delay indicates when to reschedule it.
**/
static void
-lpfc_sli4_hba_process_cq(struct work_struct *work)
+__lpfc_sli4_hba_process_cq(struct lpfc_queue *cq)
{
- struct lpfc_queue *cq =
- container_of(work, struct lpfc_queue, irqwork);
struct lpfc_hba *phba = cq->phba;
- struct lpfc_cqe *cqe;
+ unsigned long delay;
bool workposted = false;
- int ccount = 0;
-
- /* Process all the entries to the CQ */
- while ((cqe = lpfc_sli4_cq_get(cq))) {
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
- if (phba->ktime_on)
- cq->isr_timestamp = ktime_get_ns();
- else
- cq->isr_timestamp = 0;
-#endif
- workposted |= lpfc_sli4_fp_handle_cqe(phba, cq, cqe);
- if (!(++ccount % cq->entry_repost))
- break;
- }
- /* Track the max number of CQEs processed in 1 EQ */
- if (ccount > cq->CQ_max_cqe)
- cq->CQ_max_cqe = ccount;
- cq->assoc_qp->EQ_cqe_cnt += ccount;
+ /* process and rearm the CQ */
+ workposted |= __lpfc_sli4_process_cq(phba, cq, lpfc_sli4_fp_handle_cqe,
+ &delay);
- /* Catch the no cq entry condition */
- if (unlikely(ccount == 0))
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0369 No entry from fast-path completion "
- "queue fcpcqid=%d\n", cq->queue_id);
-
- /* In any case, flash and re-arm the CQ */
- phba->sli4_hba.sli4_cq_release(cq, LPFC_QUEUE_REARM);
+ if (delay) {
+ if (!queue_delayed_work_on(cq->chann, phba->wq,
+ &cq->sched_irqwork, delay))
+ lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ "0367 Cannot schedule soft IRQ "
+ "for cqid=%d on CPU %d\n",
+ cq->queue_id, cq->chann);
+ }
/* wake up worker thread if there are works to be done */
if (workposted)
lpfc_worker_wake_up(phba);
}
-static void
-lpfc_sli4_eq_flush(struct lpfc_hba *phba, struct lpfc_queue *eq)
-{
- struct lpfc_eqe *eqe;
-
- /* walk all the EQ entries and drop on the floor */
- while ((eqe = lpfc_sli4_eq_get(eq)))
- ;
-
- /* Clear and re-arm the EQ */
- phba->sli4_hba.sli4_eq_release(eq, LPFC_QUEUE_REARM);
-}
-
-
/**
- * lpfc_sli4_fof_handle_eqe - Process a Flash Optimized Fabric event queue
- * entry
- * @phba: Pointer to HBA context object.
- * @eqe: Pointer to fast-path event queue entry.
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by
+ * interrupt
+ * @work: pointer to work element
*
- * This routine process a event queue entry from the Flash Optimized Fabric
- * event queue. It will check the MajorCode and MinorCode to determine this
- * is for a completion event on a completion queue, if not, an error shall be
- * logged and just return. Otherwise, it will get to the corresponding
- * completion queue and process all the entries on the completion queue, rearm
- * the completion queue, and then return.
+ * translates from the work handler and calls the fast-path handler.
**/
static void
-lpfc_sli4_fof_handle_eqe(struct lpfc_hba *phba, struct lpfc_eqe *eqe)
+lpfc_sli4_hba_process_cq(struct work_struct *work)
{
- struct lpfc_queue *cq;
- uint16_t cqid;
+ struct lpfc_queue *cq = container_of(work, struct lpfc_queue, irqwork);
- if (unlikely(bf_get_le32(lpfc_eqe_major_code, eqe) != 0)) {
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "9147 Not a valid completion "
- "event: majorcode=x%x, minorcode=x%x\n",
- bf_get_le32(lpfc_eqe_major_code, eqe),
- bf_get_le32(lpfc_eqe_minor_code, eqe));
- return;
- }
-
- /* Get the reference to the corresponding CQ */
- cqid = bf_get_le32(lpfc_eqe_resource_id, eqe);
-
- /* Next check for OAS */
- cq = phba->sli4_hba.oas_cq;
- if (unlikely(!cq)) {
- if (phba->sli.sli_flag & LPFC_SLI_ACTIVE)
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "9148 OAS completion queue "
- "does not exist\n");
- return;
- }
-
- if (unlikely(cqid != cq->queue_id)) {
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "9149 Miss-matched fast-path compl "
- "queue id: eqcqid=%d, fcpcqid=%d\n",
- cqid, cq->queue_id);
- return;
- }
-
- /* Save EQ associated with this CQ */
- cq->assoc_qp = phba->sli4_hba.fof_eq;
-
- /* CQ work will be processed on CPU affinitized to this IRQ */
- if (!queue_work(phba->wq, &cq->irqwork))
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "0367 Cannot schedule soft IRQ "
- "for CQ eqcqid=%d, cqid=%d on CPU %d\n",
- cqid, cq->queue_id, smp_processor_id());
+ __lpfc_sli4_hba_process_cq(cq);
}
/**
- * lpfc_sli4_fof_intr_handler - HBA interrupt handler to SLI-4 device
- * @irq: Interrupt number.
- * @dev_id: The device context pointer.
+ * lpfc_sli4_hba_process_cq - fast-path work handler when started by timer
+ * @work: pointer to work element
*
- * This function is directly called from the PCI layer as an interrupt
- * service routine when device with SLI-4 interface spec is enabled with
- * MSI-X multi-message interrupt mode and there is a Flash Optimized Fabric
- * IOCB ring event in the HBA. However, when the device is enabled with either
- * MSI or Pin-IRQ interrupt mode, this function is called as part of the
- * device-level interrupt handler. When the PCI slot is in error recovery
- * or the HBA is undergoing initialization, the interrupt handler will not
- * process the interrupt. The Flash Optimized Fabric ring event are handled in
- * the intrrupt context. This function is called without any lock held.
- * It gets the hbalock to access and update SLI data structures. Note that,
- * the EQ to CQ are one-to-one map such that the EQ index is
- * equal to that of CQ index.
- *
- * This function returns IRQ_HANDLED when interrupt is handled else it
- * returns IRQ_NONE.
+ * translates from the work handler and calls the fast-path handler.
**/
-irqreturn_t
-lpfc_sli4_fof_intr_handler(int irq, void *dev_id)
+static void
+lpfc_sli4_dly_hba_process_cq(struct work_struct *work)
{
- struct lpfc_hba *phba;
- struct lpfc_hba_eq_hdl *hba_eq_hdl;
- struct lpfc_queue *eq;
- struct lpfc_eqe *eqe;
- unsigned long iflag;
- int ecount = 0;
-
- /* Get the driver's phba structure from the dev_id */
- hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
- phba = hba_eq_hdl->phba;
-
- if (unlikely(!phba))
- return IRQ_NONE;
-
- /* Get to the EQ struct associated with this vector */
- eq = phba->sli4_hba.fof_eq;
- if (unlikely(!eq))
- return IRQ_NONE;
-
- /* Check device state for handling interrupt */
- if (unlikely(lpfc_intr_state_check(phba))) {
- /* Check again for link_state with lock held */
- spin_lock_irqsave(&phba->hbalock, iflag);
- if (phba->link_state < LPFC_LINK_DOWN)
- /* Flush, clear interrupt, and rearm the EQ */
- lpfc_sli4_eq_flush(phba, eq);
- spin_unlock_irqrestore(&phba->hbalock, iflag);
- return IRQ_NONE;
- }
-
- /*
- * Process all the event on FCP fast-path EQ
- */
- while ((eqe = lpfc_sli4_eq_get(eq))) {
- lpfc_sli4_fof_handle_eqe(phba, eqe);
- if (!(++ecount % eq->entry_repost))
- break;
- eq->EQ_processed++;
- }
+ struct lpfc_queue *cq = container_of(to_delayed_work(work),
+ struct lpfc_queue, sched_irqwork);
- /* Track the max number of EQEs processed in 1 intr */
- if (ecount > eq->EQ_max_eqe)
- eq->EQ_max_eqe = ecount;
-
-
- if (unlikely(ecount == 0)) {
- eq->EQ_no_entry++;
-
- if (phba->intr_type == MSIX)
- /* MSI-X treated interrupt served as no EQ share INT */
- lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
- "9145 MSI-X interrupt with no EQE\n");
- else {
- lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "9146 ISR interrupt with no EQE\n");
- /* Non MSI-X treated on interrupt as EQ share INT */
- return IRQ_NONE;
- }
- }
- /* Always clear and re-arm the fast-path EQ */
- phba->sli4_hba.sli4_eq_release(eq, LPFC_QUEUE_REARM);
- return IRQ_HANDLED;
+ __lpfc_sli4_hba_process_cq(cq);
}
/**
@@ -14392,10 +14290,11 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
struct lpfc_hba *phba;
struct lpfc_hba_eq_hdl *hba_eq_hdl;
struct lpfc_queue *fpeq;
- struct lpfc_eqe *eqe;
unsigned long iflag;
int ecount = 0;
int hba_eqidx;
+ struct lpfc_eq_intr_info *eqi;
+ uint32_t icnt;
/* Get the driver's phba structure from the dev_id */
hba_eq_hdl = (struct lpfc_hba_eq_hdl *)dev_id;
@@ -14404,23 +14303,14 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
if (unlikely(!phba))
return IRQ_NONE;
- if (unlikely(!phba->sli4_hba.hba_eq))
+ if (unlikely(!phba->sli4_hba.hdwq))
return IRQ_NONE;
/* Get to the EQ struct associated with this vector */
- fpeq = phba->sli4_hba.hba_eq[hba_eqidx];
+ fpeq = phba->sli4_hba.hdwq[hba_eqidx].hba_eq;
if (unlikely(!fpeq))
return IRQ_NONE;
- if (lpfc_fcp_look_ahead) {
- if (atomic_dec_and_test(&hba_eq_hdl->hba_eq_in_use))
- phba->sli4_hba.sli4_eq_clr_intr(fpeq);
- else {
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
- return IRQ_NONE;
- }
- }
-
/* Check device state for handling interrupt */
if (unlikely(lpfc_intr_state_check(phba))) {
/* Check again for link_state with lock held */
@@ -14429,36 +14319,25 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
/* Flush, clear interrupt, and rearm the EQ */
lpfc_sli4_eq_flush(phba, fpeq);
spin_unlock_irqrestore(&phba->hbalock, iflag);
- if (lpfc_fcp_look_ahead)
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
return IRQ_NONE;
}
- /*
- * Process all the event on FCP fast-path EQ
- */
- while ((eqe = lpfc_sli4_eq_get(fpeq))) {
- lpfc_sli4_hba_handle_eqe(phba, eqe, hba_eqidx);
- if (!(++ecount % fpeq->entry_repost))
- break;
- fpeq->EQ_processed++;
- }
+ eqi = phba->sli4_hba.eq_info;
+ icnt = this_cpu_inc_return(eqi->icnt);
+ fpeq->last_cpu = smp_processor_id();
- /* Track the max number of EQEs processed in 1 intr */
- if (ecount > fpeq->EQ_max_eqe)
- fpeq->EQ_max_eqe = ecount;
+ if (icnt > LPFC_EQD_ISR_TRIGGER &&
+ phba->cfg_irq_chann == 1 &&
+ phba->cfg_auto_imax &&
+ fpeq->q_mode != LPFC_MAX_AUTO_EQ_DELAY &&
+ phba->sli.sli_flag & LPFC_SLI_USE_EQDR)
+ lpfc_sli4_mod_hba_eq_delay(phba, fpeq, LPFC_MAX_AUTO_EQ_DELAY);
- /* Always clear and re-arm the fast-path EQ */
- phba->sli4_hba.sli4_eq_release(fpeq, LPFC_QUEUE_REARM);
+ /* process and rearm the EQ */
+ ecount = lpfc_sli4_process_eq(phba, fpeq);
if (unlikely(ecount == 0)) {
fpeq->EQ_no_entry++;
-
- if (lpfc_fcp_look_ahead) {
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
- return IRQ_NONE;
- }
-
if (phba->intr_type == MSIX)
/* MSI-X treated interrupt served as no EQ share INT */
lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
@@ -14468,9 +14347,6 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id)
return IRQ_NONE;
}
- if (lpfc_fcp_look_ahead)
- atomic_inc(&hba_eq_hdl->hba_eq_in_use);
-
return IRQ_HANDLED;
} /* lpfc_sli4_fp_intr_handler */
@@ -14508,20 +14384,13 @@ lpfc_sli4_intr_handler(int irq, void *dev_id)
/*
* Invoke fast-path host attention interrupt handling as appropriate.
*/
- for (qidx = 0; qidx < phba->io_channel_irqs; qidx++) {
+ for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) {
hba_irq_rc = lpfc_sli4_hba_intr_handler(irq,
&phba->sli4_hba.hba_eq_hdl[qidx]);
if (hba_irq_rc == IRQ_HANDLED)
hba_handled |= true;
}
- if (phba->cfg_fof) {
- hba_irq_rc = lpfc_sli4_fof_intr_handler(irq,
- &phba->sli4_hba.hba_eq_hdl[qidx]);
- if (hba_irq_rc == IRQ_HANDLED)
- hba_handled |= true;
- }
-
return (hba_handled == true) ? IRQ_HANDLED : IRQ_NONE;
} /* lpfc_sli4_intr_handler */
@@ -14553,6 +14422,9 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue)
kfree(queue->rqbp);
}
+ if (!list_empty(&queue->cpu_list))
+ list_del(&queue->cpu_list);
+
if (!list_empty(&queue->wq_list))
list_del(&queue->wq_list);
@@ -14601,6 +14473,7 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
INIT_LIST_HEAD(&queue->wqfull_list);
INIT_LIST_HEAD(&queue->page_list);
INIT_LIST_HEAD(&queue->child_list);
+ INIT_LIST_HEAD(&queue->cpu_list);
/* Set queue parameters now. If the system cannot provide memory
* resources, the free routine needs to know what was allocated.
@@ -14633,8 +14506,10 @@ lpfc_sli4_queue_alloc(struct lpfc_hba *phba, uint32_t page_size,
}
INIT_WORK(&queue->irqwork, lpfc_sli4_hba_process_cq);
INIT_WORK(&queue->spwork, lpfc_sli4_sp_process_cq);
+ INIT_DELAYED_WORK(&queue->sched_irqwork, lpfc_sli4_dly_hba_process_cq);
+ INIT_DELAYED_WORK(&queue->sched_spwork, lpfc_sli4_dly_sp_process_cq);
- /* entry_repost will be set during q creation */
+ /* notify_interval will be set during q creation */
return queue;
out_fail:
@@ -14671,43 +14546,76 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
}
/**
- * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on FCP EQs
- * @phba: HBA structure that indicates port to create a queue on.
- * @startq: The starting FCP EQ to modify
+ * lpfc_modify_hba_eq_delay - Modify Delay Multiplier on EQs
+ * @phba: HBA structure that EQs are on.
+ * @startq: The starting EQ index to modify
+ * @numq: The number of EQs (consecutive indexes) to modify
+ * @usdelay: amount of delay
*
- * This function sends an MODIFY_EQ_DELAY mailbox command to the HBA.
- * The command allows up to LPFC_MAX_EQ_DELAY_EQID_CNT EQ ID's to be
- * updated in one mailbox command.
+ * This function revises the EQ delay on 1 or more EQs. The EQ delay
+ * is set either by writing to a register (if supported by the SLI Port)
+ * or by mailbox command. The mailbox command allows several EQs to be
+ * updated at once.
*
- * The @phba struct is used to send mailbox command to HBA. The @startq
- * is used to get the starting FCP EQ to change.
- * This function is asynchronous and will wait for the mailbox
- * command to finish before continuing.
+ * The @phba struct is used to send a mailbox command to HBA. The @startq
+ * is used to get the starting EQ index to change. The @numq value is
+ * used to specify how many consecutive EQ indexes, starting at EQ index,
+ * are to be changed. This function is asynchronous and will wait for any
+ * mailbox commands to finish before returning.
*
- * On success this function will return a zero. If unable to allocate enough
- * memory this function will return -ENOMEM. If the queue create mailbox command
- * fails this function will return -ENXIO.
+ * On success this function will return a zero. If unable to allocate
+ * enough memory this function will return -ENOMEM. If a mailbox command
+ * fails this function will return -ENXIO. Note: on ENXIO, some EQs may
+ * have had their delay multipler changed.
**/
-int
+void
lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
- uint32_t numq, uint32_t imax)
+ uint32_t numq, uint32_t usdelay)
{
struct lpfc_mbx_modify_eq_delay *eq_delay;
LPFC_MBOXQ_t *mbox;
struct lpfc_queue *eq;
- int cnt, rc, length, status = 0;
+ int cnt = 0, rc, length;
uint32_t shdr_status, shdr_add_status;
- uint32_t result, val;
+ uint32_t dmult;
int qidx;
union lpfc_sli4_cfg_shdr *shdr;
- uint16_t dmult;
- if (startq >= phba->io_channel_irqs)
- return 0;
+ if (startq >= phba->cfg_irq_chann)
+ return;
+
+ if (usdelay > 0xFFFF) {
+ lpfc_printf_log(phba, KERN_INFO, LOG_INIT | LOG_FCP | LOG_NVME,
+ "6429 usdelay %d too large. Scaled down to "
+ "0xFFFF.\n", usdelay);
+ usdelay = 0xFFFF;
+ }
+
+ /* set values by EQ_DELAY register if supported */
+ if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
+ for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
+ eq = phba->sli4_hba.hdwq[qidx].hba_eq;
+ if (!eq)
+ continue;
+
+ lpfc_sli4_mod_hba_eq_delay(phba, eq, usdelay);
+
+ if (++cnt >= numq)
+ break;
+ }
+
+ return;
+ }
+
+ /* Otherwise, set values by mailbox cmd */
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
- if (!mbox)
- return -ENOMEM;
+ if (!mbox) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_FCP | LOG_NVME,
+ "6428 Failed allocating mailbox cmd buffer."
+ " EQ delay was not set.\n");
+ return;
+ }
length = (sizeof(struct lpfc_mbx_modify_eq_delay) -
sizeof(struct lpfc_sli4_cfg_mhdr));
lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_COMMON,
@@ -14716,45 +14624,22 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
eq_delay = &mbox->u.mqe.un.eq_delay;
/* Calculate delay multiper from maximum interrupt per second */
- result = imax / phba->io_channel_irqs;
- if (result > LPFC_DMULT_CONST || result == 0)
- dmult = 0;
- else
- dmult = LPFC_DMULT_CONST/result - 1;
+ dmult = (usdelay * LPFC_DMULT_CONST) / LPFC_SEC_TO_USEC;
+ if (dmult)
+ dmult--;
if (dmult > LPFC_DMULT_MAX)
dmult = LPFC_DMULT_MAX;
- cnt = 0;
- for (qidx = startq; qidx < phba->io_channel_irqs; qidx++) {
- eq = phba->sli4_hba.hba_eq[qidx];
+ for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) {
+ eq = phba->sli4_hba.hdwq[qidx].hba_eq;
if (!eq)
continue;
- eq->q_mode = imax;
+ eq->q_mode = usdelay;
eq_delay->u.request.eq[cnt].eq_id = eq->queue_id;
eq_delay->u.request.eq[cnt].phase = 0;
eq_delay->u.request.eq[cnt].delay_multi = dmult;
- cnt++;
-
- /* q_mode is only used for auto_imax */
- if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) {
- /* Use EQ Delay Register method for q_mode */
-
- /* Convert for EQ Delay register */
- val = phba->cfg_fcp_imax;
- if (val) {
- /* First, interrupts per sec per EQ */
- val = phba->cfg_fcp_imax /
- phba->io_channel_irqs;
-
- /* us delay between each interrupt */
- val = LPFC_SEC_TO_USEC / val;
- }
- eq->q_mode = val;
- } else {
- eq->q_mode = imax;
- }
- if (cnt >= numq)
+ if (++cnt >= numq)
break;
}
eq_delay->u.request.num_eq = cnt;
@@ -14772,10 +14657,9 @@ lpfc_modify_hba_eq_delay(struct lpfc_hba *phba, uint32_t startq,
"2512 MODIFY_EQ_DELAY mailbox failed with "
"status x%x add_status x%x, mbx status x%x\n",
shdr_status, shdr_add_status, rc);
- status = -ENXIO;
}
mempool_free(mbox, phba->mbox_mem_pool);
- return status;
+ return;
}
/**
@@ -14900,8 +14784,8 @@ lpfc_eq_create(struct lpfc_hba *phba, struct lpfc_queue *eq, uint32_t imax)
if (eq->queue_id == 0xFFFF)
status = -ENXIO;
eq->host_index = 0;
- eq->hba_index = 0;
- eq->entry_repost = LPFC_EQ_REPOST;
+ eq->notify_interval = LPFC_EQ_NOTIFY_INTRVL;
+ eq->max_proc_limit = LPFC_EQ_MAX_PROC_LIMIT;
mempool_free(mbox, phba->mbox_mem_pool);
return status;
@@ -15039,10 +14923,13 @@ lpfc_cq_create(struct lpfc_hba *phba, struct lpfc_queue *cq,
cq->subtype = subtype;
cq->queue_id = bf_get(lpfc_mbx_cq_create_q_id, &cq_create->u.response);
cq->assoc_qid = eq->queue_id;
+ cq->assoc_qp = eq;
cq->host_index = 0;
- cq->hba_index = 0;
- cq->entry_repost = LPFC_CQ_REPOST;
+ cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+ cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit, cq->entry_count);
+ if (cq->queue_id > phba->sli4_hba.cq_max)
+ phba->sli4_hba.cq_max = cq->queue_id;
out:
mempool_free(mbox, phba->mbox_mem_pool);
return status;
@@ -15052,7 +14939,7 @@ out:
* lpfc_cq_create_set - Create a set of Completion Queues on the HBA for MRQ
* @phba: HBA structure that indicates port to create a queue on.
* @cqp: The queue structure array to use to create the completion queues.
- * @eqp: The event queue array to bind these completion queues to.
+ * @hdwq: The hardware queue array with the EQ to bind completion queues to.
*
* This function creates a set of completion queue, s to support MRQ
* as detailed in @cqp, on a port,
@@ -15072,7 +14959,8 @@ out:
**/
int
lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
- struct lpfc_queue **eqp, uint32_t type, uint32_t subtype)
+ struct lpfc_sli4_hdw_queue *hdwq, uint32_t type,
+ uint32_t subtype)
{
struct lpfc_queue *cq;
struct lpfc_queue *eq;
@@ -15087,7 +14975,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
/* sanity check on queue memory */
numcq = phba->cfg_nvmet_mrq;
- if (!cqp || !eqp || !numcq)
+ if (!cqp || !hdwq || !numcq)
return -ENODEV;
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
@@ -15114,7 +15002,7 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
for (idx = 0; idx < numcq; idx++) {
cq = cqp[idx];
- eq = eqp[idx];
+ eq = hdwq[idx].hba_eq;
if (!cq || !eq) {
status = -ENOMEM;
goto out;
@@ -15247,9 +15135,11 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
cq->type = type;
cq->subtype = subtype;
cq->assoc_qid = eq->queue_id;
+ cq->assoc_qp = eq;
cq->host_index = 0;
- cq->hba_index = 0;
- cq->entry_repost = LPFC_CQ_REPOST;
+ cq->notify_interval = LPFC_CQ_NOTIFY_INTRVL;
+ cq->max_proc_limit = min(phba->cfg_cq_max_proc_limit,
+ cq->entry_count);
cq->chann = idx;
rc = 0;
@@ -15287,6 +15177,8 @@ lpfc_cq_create_set(struct lpfc_hba *phba, struct lpfc_queue **cqp,
for (idx = 0; idx < numcq; idx++) {
cq = cqp[idx];
cq->queue_id = rc + idx;
+ if (cq->queue_id > phba->sli4_hba.cq_max)
+ phba->sli4_hba.cq_max = cq->queue_id;
}
out:
@@ -15499,7 +15391,6 @@ lpfc_mq_create(struct lpfc_hba *phba, struct lpfc_queue *mq,
mq->subtype = subtype;
mq->host_index = 0;
mq->hba_index = 0;
- mq->entry_repost = LPFC_MQ_REPOST;
/* link the mq onto the parent cq child list */
list_add_tail(&mq->list, &cq->child_list);
@@ -15765,7 +15656,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq,
wq->subtype = subtype;
wq->host_index = 0;
wq->hba_index = 0;
- wq->entry_repost = LPFC_RELEASE_NOTIFICATION_INTERVAL;
+ wq->notify_interval = LPFC_WQ_NOTIFY_INTRVL;
/* link the wq onto the parent cq child list */
list_add_tail(&wq->list, &cq->child_list);
@@ -15959,7 +15850,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
hrq->subtype = subtype;
hrq->host_index = 0;
hrq->hba_index = 0;
- hrq->entry_repost = LPFC_RQ_REPOST;
+ hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
/* now create the data queue */
lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
@@ -16052,7 +15943,7 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq,
drq->subtype = subtype;
drq->host_index = 0;
drq->hba_index = 0;
- drq->entry_repost = LPFC_RQ_REPOST;
+ drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
/* link the header and data RQs onto the parent cq child list */
list_add_tail(&hrq->list, &cq->child_list);
@@ -16210,7 +16101,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
hrq->subtype = subtype;
hrq->host_index = 0;
hrq->hba_index = 0;
- hrq->entry_repost = LPFC_RQ_REPOST;
+ hrq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
drq->db_format = LPFC_DB_RING_FORMAT;
drq->db_regaddr = phba->sli4_hba.RQDBregaddr;
@@ -16219,7 +16110,7 @@ lpfc_mrq_create(struct lpfc_hba *phba, struct lpfc_queue **hrqp,
drq->subtype = subtype;
drq->host_index = 0;
drq->hba_index = 0;
- drq->entry_repost = LPFC_RQ_REPOST;
+ drq->notify_interval = LPFC_RQ_NOTIFY_INTRVL;
list_add_tail(&hrq->list, &cq->child_list);
list_add_tail(&drq->list, &cq->child_list);
@@ -16279,6 +16170,7 @@ lpfc_eq_destroy(struct lpfc_hba *phba, struct lpfc_queue *eq)
/* sanity check on queue memory */
if (!eq)
return -ENODEV;
+
mbox = mempool_alloc(eq->phba->mbox_mem_pool, GFP_KERNEL);
if (!mbox)
return -ENOMEM;
@@ -16828,22 +16720,21 @@ lpfc_sli4_post_sgl_list(struct lpfc_hba *phba,
}
/**
- * lpfc_sli4_post_scsi_sgl_block - post a block of scsi sgl list to firmware
+ * lpfc_sli4_post_io_sgl_block - post a block of nvme sgl list to firmware
* @phba: pointer to lpfc hba data structure.
- * @sblist: pointer to scsi buffer list.
+ * @nblist: pointer to nvme buffer list.
* @count: number of scsi buffers on the list.
*
* This routine is invoked to post a block of @count scsi sgl pages from a
- * SCSI buffer list @sblist to the HBA using non-embedded mailbox command.
+ * SCSI buffer list @nblist to the HBA using non-embedded mailbox command.
* No Lock is held.
*
**/
-int
-lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
- struct list_head *sblist,
- int count)
+static int
+lpfc_sli4_post_io_sgl_block(struct lpfc_hba *phba, struct list_head *nblist,
+ int count)
{
- struct lpfc_scsi_buf *psb;
+ struct lpfc_io_buf *lpfc_ncmd;
struct lpfc_mbx_post_uembed_sgl_page1 *sgl;
struct sgl_page_pairs *sgl_pg_pairs;
void *viraddr;
@@ -16861,25 +16752,25 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
sizeof(union lpfc_sli4_cfg_shdr) + sizeof(uint32_t);
if (reqlen > SLI4_PAGE_SIZE) {
lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
- "0217 Block sgl registration required DMA "
+ "6118 Block sgl registration required DMA "
"size (%d) great than a page\n", reqlen);
return -ENOMEM;
}
mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
if (!mbox) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "0283 Failed to allocate mbox cmd memory\n");
+ "6119 Failed to allocate mbox cmd memory\n");
return -ENOMEM;
}
/* Allocate DMA memory and set up the non-embedded mailbox command */
alloclen = lpfc_sli4_config(phba, mbox, LPFC_MBOX_SUBSYSTEM_FCOE,
- LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES, reqlen,
- LPFC_SLI4_MBX_NEMBED);
+ LPFC_MBOX_OPCODE_FCOE_POST_SGL_PAGES,
+ reqlen, LPFC_SLI4_MBX_NEMBED);
if (alloclen < reqlen) {
lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
- "2561 Allocated DMA memory size (%d) is "
+ "6120 Allocated DMA memory size (%d) is "
"less than the requested DMA memory "
"size (%d)\n", alloclen, reqlen);
lpfc_sli4_mbox_cmd_free(phba, mbox);
@@ -16894,14 +16785,15 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
sgl_pg_pairs = &sgl->sgl_pg_pairs;
pg_pairs = 0;
- list_for_each_entry(psb, sblist, list) {
+ list_for_each_entry(lpfc_ncmd, nblist, list) {
/* Set up the sge entry */
sgl_pg_pairs->sgl_pg0_addr_lo =
- cpu_to_le32(putPaddrLow(psb->dma_phys_bpl));
+ cpu_to_le32(putPaddrLow(lpfc_ncmd->dma_phys_sgl));
sgl_pg_pairs->sgl_pg0_addr_hi =
- cpu_to_le32(putPaddrHigh(psb->dma_phys_bpl));
+ cpu_to_le32(putPaddrHigh(lpfc_ncmd->dma_phys_sgl));
if (phba->cfg_sg_dma_buf_size > SGL_PAGE_SIZE)
- pdma_phys_bpl1 = psb->dma_phys_bpl + SGL_PAGE_SIZE;
+ pdma_phys_bpl1 = lpfc_ncmd->dma_phys_sgl +
+ SGL_PAGE_SIZE;
else
pdma_phys_bpl1 = 0;
sgl_pg_pairs->sgl_pg1_addr_lo =
@@ -16910,7 +16802,7 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
cpu_to_le32(putPaddrHigh(pdma_phys_bpl1));
/* Keep the first xritag on the list */
if (pg_pairs == 0)
- xritag_start = psb->cur_iocbq.sli4_xritag;
+ xritag_start = lpfc_ncmd->cur_iocbq.sli4_xritag;
sgl_pg_pairs++;
pg_pairs++;
}
@@ -16919,20 +16811,20 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
/* Perform endian conversion if necessary */
sgl->word0 = cpu_to_le32(sgl->word0);
- if (!phba->sli4_hba.intr_enable)
+ if (!phba->sli4_hba.intr_enable) {
rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL);
- else {
+ } else {
mbox_tmo = lpfc_mbox_tmo_val(phba, mbox);
rc = lpfc_sli_issue_mbox_wait(phba, mbox, mbox_tmo);
}
- shdr = (union lpfc_sli4_cfg_shdr *) &sgl->cfg_shdr;
+ shdr = (union lpfc_sli4_cfg_shdr *)&sgl->cfg_shdr;
shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response);
shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response);
if (rc != MBX_TIMEOUT)
lpfc_sli4_mbox_cmd_free(phba, mbox);
if (shdr_status || shdr_add_status || rc) {
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "2564 POST_SGL_BLOCK mailbox command failed "
+ "6125 POST_SGL_BLOCK mailbox command failed "
"status x%x add_status x%x mbx status x%x\n",
shdr_status, shdr_add_status, rc);
rc = -ENXIO;
@@ -16941,6 +16833,134 @@ lpfc_sli4_post_scsi_sgl_block(struct lpfc_hba *phba,
}
/**
+ * lpfc_sli4_post_io_sgl_list - Post blocks of nvme buffer sgls from a list
+ * @phba: pointer to lpfc hba data structure.
+ * @post_nblist: pointer to the nvme buffer list.
+ *
+ * This routine walks a list of nvme buffers that was passed in. It attempts
+ * to construct blocks of nvme buffer sgls which contains contiguous xris and
+ * uses the non-embedded SGL block post mailbox commands to post to the port.
+ * For single NVME buffer sgl with non-contiguous xri, if any, it shall use
+ * embedded SGL post mailbox command for posting. The @post_nblist passed in
+ * must be local list, thus no lock is needed when manipulate the list.
+ *
+ * Returns: 0 = failure, non-zero number of successfully posted buffers.
+ **/
+int
+lpfc_sli4_post_io_sgl_list(struct lpfc_hba *phba,
+ struct list_head *post_nblist, int sb_count)
+{
+ struct lpfc_io_buf *lpfc_ncmd, *lpfc_ncmd_next;
+ int status, sgl_size;
+ int post_cnt = 0, block_cnt = 0, num_posting = 0, num_posted = 0;
+ dma_addr_t pdma_phys_sgl1;
+ int last_xritag = NO_XRI;
+ int cur_xritag;
+ LIST_HEAD(prep_nblist);
+ LIST_HEAD(blck_nblist);
+ LIST_HEAD(nvme_nblist);
+
+ /* sanity check */
+ if (sb_count <= 0)
+ return -EINVAL;
+
+ sgl_size = phba->cfg_sg_dma_buf_size;
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next, post_nblist, list) {
+ list_del_init(&lpfc_ncmd->list);
+ block_cnt++;
+ if ((last_xritag != NO_XRI) &&
+ (lpfc_ncmd->cur_iocbq.sli4_xritag != last_xritag + 1)) {
+ /* a hole in xri block, form a sgl posting block */
+ list_splice_init(&prep_nblist, &blck_nblist);
+ post_cnt = block_cnt - 1;
+ /* prepare list for next posting block */
+ list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+ block_cnt = 1;
+ } else {
+ /* prepare list for next posting block */
+ list_add_tail(&lpfc_ncmd->list, &prep_nblist);
+ /* enough sgls for non-embed sgl mbox command */
+ if (block_cnt == LPFC_NEMBED_MBOX_SGL_CNT) {
+ list_splice_init(&prep_nblist, &blck_nblist);
+ post_cnt = block_cnt;
+ block_cnt = 0;
+ }
+ }
+ num_posting++;
+ last_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+
+ /* end of repost sgl list condition for NVME buffers */
+ if (num_posting == sb_count) {
+ if (post_cnt == 0) {
+ /* last sgl posting block */
+ list_splice_init(&prep_nblist, &blck_nblist);
+ post_cnt = block_cnt;
+ } else if (block_cnt == 1) {
+ /* last single sgl with non-contiguous xri */
+ if (sgl_size > SGL_PAGE_SIZE)
+ pdma_phys_sgl1 =
+ lpfc_ncmd->dma_phys_sgl +
+ SGL_PAGE_SIZE;
+ else
+ pdma_phys_sgl1 = 0;
+ cur_xritag = lpfc_ncmd->cur_iocbq.sli4_xritag;
+ status = lpfc_sli4_post_sgl(
+ phba, lpfc_ncmd->dma_phys_sgl,
+ pdma_phys_sgl1, cur_xritag);
+ if (status) {
+ /* Post error. Buffer unavailable. */
+ lpfc_ncmd->flags |=
+ LPFC_SBUF_NOT_POSTED;
+ } else {
+ /* Post success. Bffer available. */
+ lpfc_ncmd->flags &=
+ ~LPFC_SBUF_NOT_POSTED;
+ lpfc_ncmd->status = IOSTAT_SUCCESS;
+ num_posted++;
+ }
+ /* success, put on NVME buffer sgl list */
+ list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+ }
+ }
+
+ /* continue until a nembed page worth of sgls */
+ if (post_cnt == 0)
+ continue;
+
+ /* post block of NVME buffer list sgls */
+ status = lpfc_sli4_post_io_sgl_block(phba, &blck_nblist,
+ post_cnt);
+
+ /* don't reset xirtag due to hole in xri block */
+ if (block_cnt == 0)
+ last_xritag = NO_XRI;
+
+ /* reset NVME buffer post count for next round of posting */
+ post_cnt = 0;
+
+ /* put posted NVME buffer-sgl posted on NVME buffer sgl list */
+ while (!list_empty(&blck_nblist)) {
+ list_remove_head(&blck_nblist, lpfc_ncmd,
+ struct lpfc_io_buf, list);
+ if (status) {
+ /* Post error. Mark buffer unavailable. */
+ lpfc_ncmd->flags |= LPFC_SBUF_NOT_POSTED;
+ } else {
+ /* Post success, Mark buffer available. */
+ lpfc_ncmd->flags &= ~LPFC_SBUF_NOT_POSTED;
+ lpfc_ncmd->status = IOSTAT_SUCCESS;
+ num_posted++;
+ }
+ list_add_tail(&lpfc_ncmd->list, &nvme_nblist);
+ }
+ }
+ /* Push NVME buffers with sgl posted to the available list */
+ lpfc_io_buf_replenish(phba, &nvme_nblist);
+
+ return num_posted;
+}
+
+/**
* lpfc_fc_frame_check - Check that this frame is a valid frame to handle
* @phba: pointer to lpfc_hba struct that the frame was received on
* @fc_hdr: A pointer to the FC Header data (In Big Endian Format)
@@ -19500,7 +19520,7 @@ lpfc_drain_txq(struct lpfc_hba *phba)
if (phba->link_flag & LS_MDS_LOOPBACK) {
/* MDS WQE are posted only to first WQ*/
- wq = phba->sli4_hba.fcp_wq[0];
+ wq = phba->sli4_hba.hdwq[0].fcp_wq;
if (unlikely(!wq))
return 0;
pring = wq->pring;
@@ -19708,7 +19728,7 @@ lpfc_wqe_bpl2sgl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeq,
* @pwqe: Pointer to command WQE.
**/
int
-lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
+lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
struct lpfc_iocbq *pwqe)
{
union lpfc_wqe128 *wqe = &pwqe->wqe;
@@ -19722,7 +19742,8 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
/* NVME_LS and NVME_LS ABTS requests. */
if (pwqe->iocb_flag & LPFC_IO_NVME_LS) {
pring = phba->sli4_hba.nvmels_wq->pring;
- spin_lock_irqsave(&pring->ring_lock, iflags);
+ lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+ qp, wq_access);
sglq = __lpfc_sli_get_els_sglq(phba, pwqe);
if (!sglq) {
spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19750,12 +19771,13 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
/* NVME_FCREQ and NVME_ABTS requests */
if (pwqe->iocb_flag & LPFC_IO_NVME) {
/* Get the IO distribution (hba_wqidx) for WQ assignment. */
- pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+ wq = qp->nvme_wq;
+ pring = wq->pring;
- spin_lock_irqsave(&pring->ring_lock, iflags);
- wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
- bf_set(wqe_cqid, &wqe->generic.wqe_com,
- phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+ bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+
+ lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+ qp, wq_access);
ret = lpfc_sli4_wq_put(wq, wqe);
if (ret) {
spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19769,9 +19791,9 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
/* NVMET requests */
if (pwqe->iocb_flag & LPFC_IO_NVMET) {
/* Get the IO distribution (hba_wqidx) for WQ assignment. */
- pring = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx]->pring;
+ wq = qp->nvme_wq;
+ pring = wq->pring;
- spin_lock_irqsave(&pring->ring_lock, iflags);
ctxp = pwqe->context2;
sglq = ctxp->ctxbuf->sglq;
if (pwqe->sli4_xritag == NO_XRI) {
@@ -19780,9 +19802,10 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
}
bf_set(wqe_xri_tag, &pwqe->wqe.xmit_bls_rsp.wqe_com,
pwqe->sli4_xritag);
- wq = phba->sli4_hba.nvme_wq[pwqe->hba_wqidx];
- bf_set(wqe_cqid, &wqe->generic.wqe_com,
- phba->sli4_hba.nvme_cq[pwqe->hba_wqidx]->queue_id);
+ bf_set(wqe_cqid, &wqe->generic.wqe_com, qp->nvme_cq_map);
+
+ lpfc_qp_spin_lock_irqsave(&pring->ring_lock, iflags,
+ qp, wq_access);
ret = lpfc_sli4_wq_put(wq, wqe);
if (ret) {
spin_unlock_irqrestore(&pring->ring_lock, iflags);
@@ -19794,3 +19817,647 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, uint32_t ring_number,
}
return WQE_ERROR;
}
+
+#ifdef LPFC_MXP_STAT
+/**
+ * lpfc_snapshot_mxp - Snapshot pbl, pvt and busy count
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * The purpose of this routine is to take a snapshot of pbl, pvt and busy count
+ * 15 seconds after a test case is running.
+ *
+ * The user should call lpfc_debugfs_multixripools_write before running a test
+ * case to clear stat_snapshot_taken. Then the user starts a test case. During
+ * test case is running, stat_snapshot_taken is incremented by 1 every time when
+ * this routine is called from heartbeat timer. When stat_snapshot_taken is
+ * equal to LPFC_MXP_SNAPSHOT_TAKEN, a snapshot is taken.
+ **/
+void lpfc_snapshot_mxp(struct lpfc_hba *phba, u32 hwqid)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_multixri_pool *multixri_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+ struct lpfc_pbl_pool *pbl_pool;
+ u32 txcmplq_cnt;
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ multixri_pool = qp->p_multixri_pool;
+ if (!multixri_pool)
+ return;
+
+ if (multixri_pool->stat_snapshot_taken == LPFC_MXP_SNAPSHOT_TAKEN) {
+ pvt_pool = &qp->p_multixri_pool->pvt_pool;
+ pbl_pool = &qp->p_multixri_pool->pbl_pool;
+ txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+ if (qp->nvme_wq)
+ txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+
+ multixri_pool->stat_pbl_count = pbl_pool->count;
+ multixri_pool->stat_pvt_count = pvt_pool->count;
+ multixri_pool->stat_busy_count = txcmplq_cnt;
+ }
+
+ multixri_pool->stat_snapshot_taken++;
+}
+#endif
+
+/**
+ * lpfc_adjust_pvt_pool_count - Adjust private pool count
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine moves some XRIs from private to public pool when private pool
+ * is not busy.
+ **/
+void lpfc_adjust_pvt_pool_count(struct lpfc_hba *phba, u32 hwqid)
+{
+ struct lpfc_multixri_pool *multixri_pool;
+ u32 io_req_count;
+ u32 prev_io_req_count;
+
+ multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+ if (!multixri_pool)
+ return;
+ io_req_count = multixri_pool->io_req_count;
+ prev_io_req_count = multixri_pool->prev_io_req_count;
+
+ if (prev_io_req_count != io_req_count) {
+ /* Private pool is busy */
+ multixri_pool->prev_io_req_count = io_req_count;
+ } else {
+ /* Private pool is not busy.
+ * Move XRIs from private to public pool.
+ */
+ lpfc_move_xri_pvt_to_pbl(phba, hwqid);
+ }
+}
+
+/**
+ * lpfc_adjust_high_watermark - Adjust high watermark
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine sets high watermark as number of outstanding XRIs,
+ * but make sure the new value is between xri_limit/2 and xri_limit.
+ **/
+void lpfc_adjust_high_watermark(struct lpfc_hba *phba, u32 hwqid)
+{
+ u32 new_watermark;
+ u32 watermark_max;
+ u32 watermark_min;
+ u32 xri_limit;
+ u32 txcmplq_cnt;
+ u32 abts_io_bufs;
+ struct lpfc_multixri_pool *multixri_pool;
+ struct lpfc_sli4_hdw_queue *qp;
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ multixri_pool = qp->p_multixri_pool;
+ if (!multixri_pool)
+ return;
+ xri_limit = multixri_pool->xri_limit;
+
+ watermark_max = xri_limit;
+ watermark_min = xri_limit / 2;
+
+ txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+ abts_io_bufs = qp->abts_scsi_io_bufs;
+ if (qp->nvme_wq) {
+ txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+ abts_io_bufs += qp->abts_nvme_io_bufs;
+ }
+
+ new_watermark = txcmplq_cnt + abts_io_bufs;
+ new_watermark = min(watermark_max, new_watermark);
+ new_watermark = max(watermark_min, new_watermark);
+ multixri_pool->pvt_pool.high_watermark = new_watermark;
+
+#ifdef LPFC_MXP_STAT
+ multixri_pool->stat_max_hwm = max(multixri_pool->stat_max_hwm,
+ new_watermark);
+#endif
+}
+
+/**
+ * lpfc_move_xri_pvt_to_pbl - Move some XRIs from private to public pool
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ *
+ * This routine is called from hearbeat timer when pvt_pool is idle.
+ * All free XRIs are moved from private to public pool on hwqid with 2 steps.
+ * The first step moves (all - low_watermark) amount of XRIs.
+ * The second step moves the rest of XRIs.
+ **/
+void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid)
+{
+ struct lpfc_pbl_pool *pbl_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ unsigned long iflag;
+ struct list_head tmp_list;
+ u32 tmp_count;
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ pbl_pool = &qp->p_multixri_pool->pbl_pool;
+ pvt_pool = &qp->p_multixri_pool->pvt_pool;
+ tmp_count = 0;
+
+ lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag, qp, mv_to_pub_pool);
+ lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_from_pvt_pool);
+
+ if (pvt_pool->count > pvt_pool->low_watermark) {
+ /* Step 1: move (all - low_watermark) from pvt_pool
+ * to pbl_pool
+ */
+
+ /* Move low watermark of bufs from pvt_pool to tmp_list */
+ INIT_LIST_HEAD(&tmp_list);
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &pvt_pool->list, list) {
+ list_move_tail(&lpfc_ncmd->list, &tmp_list);
+ tmp_count++;
+ if (tmp_count >= pvt_pool->low_watermark)
+ break;
+ }
+
+ /* Move all bufs from pvt_pool to pbl_pool */
+ list_splice_init(&pvt_pool->list, &pbl_pool->list);
+
+ /* Move all bufs from tmp_list to pvt_pool */
+ list_splice(&tmp_list, &pvt_pool->list);
+
+ pbl_pool->count += (pvt_pool->count - tmp_count);
+ pvt_pool->count = tmp_count;
+ } else {
+ /* Step 2: move the rest from pvt_pool to pbl_pool */
+ list_splice_init(&pvt_pool->list, &pbl_pool->list);
+ pbl_pool->count += pvt_pool->count;
+ pvt_pool->count = 0;
+ }
+
+ spin_unlock(&pvt_pool->lock);
+ spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+}
+
+/**
+ * _lpfc_move_xri_pbl_to_pvt - Move some XRIs from public to private pool
+ * @phba: pointer to lpfc hba data structure
+ * @pbl_pool: specified public free XRI pool
+ * @pvt_pool: specified private free XRI pool
+ * @count: number of XRIs to move
+ *
+ * This routine tries to move some free common bufs from the specified pbl_pool
+ * to the specified pvt_pool. It might move less than count XRIs if there's not
+ * enough in public pool.
+ *
+ * Return:
+ * true - if XRIs are successfully moved from the specified pbl_pool to the
+ * specified pvt_pool
+ * false - if the specified pbl_pool is empty or locked by someone else
+ **/
+static bool
+_lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
+ struct lpfc_pbl_pool *pbl_pool,
+ struct lpfc_pvt_pool *pvt_pool, u32 count)
+{
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ unsigned long iflag;
+ int ret;
+
+ ret = spin_trylock_irqsave(&pbl_pool->lock, iflag);
+ if (ret) {
+ if (pbl_pool->count) {
+ /* Move a batch of XRIs from public to private pool */
+ lpfc_qp_spin_lock(&pvt_pool->lock, qp, mv_to_pvt_pool);
+ list_for_each_entry_safe(lpfc_ncmd,
+ lpfc_ncmd_next,
+ &pbl_pool->list,
+ list) {
+ list_move_tail(&lpfc_ncmd->list,
+ &pvt_pool->list);
+ pvt_pool->count++;
+ pbl_pool->count--;
+ count--;
+ if (count == 0)
+ break;
+ }
+
+ spin_unlock(&pvt_pool->lock);
+ spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+ return true;
+ }
+ spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+ }
+
+ return false;
+}
+
+/**
+ * lpfc_move_xri_pbl_to_pvt - Move some XRIs from public to private pool
+ * @phba: pointer to lpfc hba data structure.
+ * @hwqid: belong to which HWQ.
+ * @count: number of XRIs to move
+ *
+ * This routine tries to find some free common bufs in one of public pools with
+ * Round Robin method. The search always starts from local hwqid, then the next
+ * HWQ which was found last time (rrb_next_hwqid). Once a public pool is found,
+ * a batch of free common bufs are moved to private pool on hwqid.
+ * It might move less than count XRIs if there's not enough in public pool.
+ **/
+void lpfc_move_xri_pbl_to_pvt(struct lpfc_hba *phba, u32 hwqid, u32 count)
+{
+ struct lpfc_multixri_pool *multixri_pool;
+ struct lpfc_multixri_pool *next_multixri_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+ struct lpfc_pbl_pool *pbl_pool;
+ struct lpfc_sli4_hdw_queue *qp;
+ u32 next_hwqid;
+ u32 hwq_count;
+ int ret;
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ multixri_pool = qp->p_multixri_pool;
+ pvt_pool = &multixri_pool->pvt_pool;
+ pbl_pool = &multixri_pool->pbl_pool;
+
+ /* Check if local pbl_pool is available */
+ ret = _lpfc_move_xri_pbl_to_pvt(phba, qp, pbl_pool, pvt_pool, count);
+ if (ret) {
+#ifdef LPFC_MXP_STAT
+ multixri_pool->local_pbl_hit_count++;
+#endif
+ return;
+ }
+
+ hwq_count = phba->cfg_hdw_queue;
+
+ /* Get the next hwqid which was found last time */
+ next_hwqid = multixri_pool->rrb_next_hwqid;
+
+ do {
+ /* Go to next hwq */
+ next_hwqid = (next_hwqid + 1) % hwq_count;
+
+ next_multixri_pool =
+ phba->sli4_hba.hdwq[next_hwqid].p_multixri_pool;
+ pbl_pool = &next_multixri_pool->pbl_pool;
+
+ /* Check if the public free xri pool is available */
+ ret = _lpfc_move_xri_pbl_to_pvt(
+ phba, qp, pbl_pool, pvt_pool, count);
+
+ /* Exit while-loop if success or all hwqid are checked */
+ } while (!ret && next_hwqid != multixri_pool->rrb_next_hwqid);
+
+ /* Starting point for the next time */
+ multixri_pool->rrb_next_hwqid = next_hwqid;
+
+ if (!ret) {
+ /* stats: all public pools are empty*/
+ multixri_pool->pbl_empty_count++;
+ }
+
+#ifdef LPFC_MXP_STAT
+ if (ret) {
+ if (next_hwqid == hwqid)
+ multixri_pool->local_pbl_hit_count++;
+ else
+ multixri_pool->other_pbl_hit_count++;
+ }
+#endif
+}
+
+/**
+ * lpfc_keep_pvt_pool_above_lowwm - Keep pvt_pool above low watermark
+ * @phba: pointer to lpfc hba data structure.
+ * @qp: belong to which HWQ.
+ *
+ * This routine get a batch of XRIs from pbl_pool if pvt_pool is less than
+ * low watermark.
+ **/
+void lpfc_keep_pvt_pool_above_lowwm(struct lpfc_hba *phba, u32 hwqid)
+{
+ struct lpfc_multixri_pool *multixri_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+
+ multixri_pool = phba->sli4_hba.hdwq[hwqid].p_multixri_pool;
+ pvt_pool = &multixri_pool->pvt_pool;
+
+ if (pvt_pool->count < pvt_pool->low_watermark)
+ lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
+}
+
+/**
+ * lpfc_release_io_buf - Return one IO buf back to free pool
+ * @phba: pointer to lpfc hba data structure.
+ * @lpfc_ncmd: IO buf to be returned.
+ * @qp: belong to which HWQ.
+ *
+ * This routine returns one IO buf back to free pool. If this is an urgent IO,
+ * the IO buf is returned to expedite pool. If cfg_xri_rebalancing==1,
+ * the IO buf is returned to pbl_pool or pvt_pool based on watermark and
+ * xri_limit. If cfg_xri_rebalancing==0, the IO buf is returned to
+ * lpfc_io_buf_list_put.
+ **/
+void lpfc_release_io_buf(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_ncmd,
+ struct lpfc_sli4_hdw_queue *qp)
+{
+ unsigned long iflag;
+ struct lpfc_pbl_pool *pbl_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+ struct lpfc_epd_pool *epd_pool;
+ u32 txcmplq_cnt;
+ u32 xri_owned;
+ u32 xri_limit;
+ u32 abts_io_bufs;
+
+ /* MUST zero fields if buffer is reused by another protocol */
+ lpfc_ncmd->nvmeCmd = NULL;
+ lpfc_ncmd->cur_iocbq.wqe_cmpl = NULL;
+ lpfc_ncmd->cur_iocbq.iocb_cmpl = NULL;
+
+ if (phba->cfg_xri_rebalancing) {
+ if (lpfc_ncmd->expedite) {
+ /* Return to expedite pool */
+ epd_pool = &phba->epd_pool;
+ spin_lock_irqsave(&epd_pool->lock, iflag);
+ list_add_tail(&lpfc_ncmd->list, &epd_pool->list);
+ epd_pool->count++;
+ spin_unlock_irqrestore(&epd_pool->lock, iflag);
+ return;
+ }
+
+ /* Avoid invalid access if an IO sneaks in and is being rejected
+ * just _after_ xri pools are destroyed in lpfc_offline.
+ * Nothing much can be done at this point.
+ */
+ if (!qp->p_multixri_pool)
+ return;
+
+ pbl_pool = &qp->p_multixri_pool->pbl_pool;
+ pvt_pool = &qp->p_multixri_pool->pvt_pool;
+
+ txcmplq_cnt = qp->fcp_wq->pring->txcmplq_cnt;
+ abts_io_bufs = qp->abts_scsi_io_bufs;
+ if (qp->nvme_wq) {
+ txcmplq_cnt += qp->nvme_wq->pring->txcmplq_cnt;
+ abts_io_bufs += qp->abts_nvme_io_bufs;
+ }
+
+ xri_owned = pvt_pool->count + txcmplq_cnt + abts_io_bufs;
+ xri_limit = qp->p_multixri_pool->xri_limit;
+
+#ifdef LPFC_MXP_STAT
+ if (xri_owned <= xri_limit)
+ qp->p_multixri_pool->below_limit_count++;
+ else
+ qp->p_multixri_pool->above_limit_count++;
+#endif
+
+ /* XRI goes to either public or private free xri pool
+ * based on watermark and xri_limit
+ */
+ if ((pvt_pool->count < pvt_pool->low_watermark) ||
+ (xri_owned < xri_limit &&
+ pvt_pool->count < pvt_pool->high_watermark)) {
+ lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag,
+ qp, free_pvt_pool);
+ list_add_tail(&lpfc_ncmd->list,
+ &pvt_pool->list);
+ pvt_pool->count++;
+ spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+ } else {
+ lpfc_qp_spin_lock_irqsave(&pbl_pool->lock, iflag,
+ qp, free_pub_pool);
+ list_add_tail(&lpfc_ncmd->list,
+ &pbl_pool->list);
+ pbl_pool->count++;
+ spin_unlock_irqrestore(&pbl_pool->lock, iflag);
+ }
+ } else {
+ lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_put_lock, iflag,
+ qp, free_xri);
+ list_add_tail(&lpfc_ncmd->list,
+ &qp->lpfc_io_buf_list_put);
+ qp->put_io_bufs++;
+ spin_unlock_irqrestore(&qp->io_buf_list_put_lock,
+ iflag);
+ }
+}
+
+/**
+ * lpfc_get_io_buf_from_private_pool - Get one free IO buf from private pool
+ * @phba: pointer to lpfc hba data structure.
+ * @pvt_pool: pointer to private pool data structure.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ *
+ * This routine tries to get one free IO buf from private pool.
+ *
+ * Return:
+ * pointer to one free IO buf - if private pool is not empty
+ * NULL - if private pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_private_pool(struct lpfc_hba *phba,
+ struct lpfc_sli4_hdw_queue *qp,
+ struct lpfc_pvt_pool *pvt_pool,
+ struct lpfc_nodelist *ndlp)
+{
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ unsigned long iflag;
+
+ lpfc_qp_spin_lock_irqsave(&pvt_pool->lock, iflag, qp, alloc_pvt_pool);
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &pvt_pool->list, list) {
+ if (lpfc_test_rrq_active(
+ phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag))
+ continue;
+ list_del(&lpfc_ncmd->list);
+ pvt_pool->count--;
+ spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+ return lpfc_ncmd;
+ }
+ spin_unlock_irqrestore(&pvt_pool->lock, iflag);
+
+ return NULL;
+}
+
+/**
+ * lpfc_get_io_buf_from_expedite_pool - Get one free IO buf from expedite pool
+ * @phba: pointer to lpfc hba data structure.
+ *
+ * This routine tries to get one free IO buf from expedite pool.
+ *
+ * Return:
+ * pointer to one free IO buf - if expedite pool is not empty
+ * NULL - if expedite pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_expedite_pool(struct lpfc_hba *phba)
+{
+ struct lpfc_io_buf *lpfc_ncmd;
+ struct lpfc_io_buf *lpfc_ncmd_next;
+ unsigned long iflag;
+ struct lpfc_epd_pool *epd_pool;
+
+ epd_pool = &phba->epd_pool;
+ lpfc_ncmd = NULL;
+
+ spin_lock_irqsave(&epd_pool->lock, iflag);
+ if (epd_pool->count > 0) {
+ list_for_each_entry_safe(lpfc_ncmd, lpfc_ncmd_next,
+ &epd_pool->list, list) {
+ list_del(&lpfc_ncmd->list);
+ epd_pool->count--;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&epd_pool->lock, iflag);
+
+ return lpfc_ncmd;
+}
+
+/**
+ * lpfc_get_io_buf_from_multixri_pools - Get one free IO bufs
+ * @phba: pointer to lpfc hba data structure.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ * @hwqid: belong to which HWQ
+ * @expedite: 1 means this request is urgent.
+ *
+ * This routine will do the following actions and then return a pointer to
+ * one free IO buf.
+ *
+ * 1. If private free xri count is empty, move some XRIs from public to
+ * private pool.
+ * 2. Get one XRI from private free xri pool.
+ * 3. If we fail to get one from pvt_pool and this is an expedite request,
+ * get one free xri from expedite pool.
+ *
+ * Note: ndlp is only used on SCSI side for RRQ testing.
+ * The caller should pass NULL for ndlp on NVME side.
+ *
+ * Return:
+ * pointer to one free IO buf - if private pool is not empty
+ * NULL - if private pool is empty
+ **/
+static struct lpfc_io_buf *
+lpfc_get_io_buf_from_multixri_pools(struct lpfc_hba *phba,
+ struct lpfc_nodelist *ndlp,
+ int hwqid, int expedite)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_multixri_pool *multixri_pool;
+ struct lpfc_pvt_pool *pvt_pool;
+ struct lpfc_io_buf *lpfc_ncmd;
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ lpfc_ncmd = NULL;
+ multixri_pool = qp->p_multixri_pool;
+ pvt_pool = &multixri_pool->pvt_pool;
+ multixri_pool->io_req_count++;
+
+ /* If pvt_pool is empty, move some XRIs from public to private pool */
+ if (pvt_pool->count == 0)
+ lpfc_move_xri_pbl_to_pvt(phba, hwqid, XRI_BATCH);
+
+ /* Get one XRI from private free xri pool */
+ lpfc_ncmd = lpfc_get_io_buf_from_private_pool(phba, qp, pvt_pool, ndlp);
+
+ if (lpfc_ncmd) {
+ lpfc_ncmd->hdwq = qp;
+ lpfc_ncmd->hdwq_no = hwqid;
+ } else if (expedite) {
+ /* If we fail to get one from pvt_pool and this is an expedite
+ * request, get one free xri from expedite pool.
+ */
+ lpfc_ncmd = lpfc_get_io_buf_from_expedite_pool(phba);
+ }
+
+ return lpfc_ncmd;
+}
+
+static inline struct lpfc_io_buf *
+lpfc_io_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int idx)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ struct lpfc_io_buf *lpfc_cmd, *lpfc_cmd_next;
+
+ qp = &phba->sli4_hba.hdwq[idx];
+ list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next,
+ &qp->lpfc_io_buf_list_get, list) {
+ if (lpfc_test_rrq_active(phba, ndlp,
+ lpfc_cmd->cur_iocbq.sli4_lxritag))
+ continue;
+
+ if (lpfc_cmd->flags & LPFC_SBUF_NOT_POSTED)
+ continue;
+
+ list_del_init(&lpfc_cmd->list);
+ qp->get_io_bufs--;
+ lpfc_cmd->hdwq = qp;
+ lpfc_cmd->hdwq_no = idx;
+ return lpfc_cmd;
+ }
+ return NULL;
+}
+
+/**
+ * lpfc_get_io_buf - Get one IO buffer from free pool
+ * @phba: The HBA for which this call is being executed.
+ * @ndlp: pointer to lpfc nodelist data structure.
+ * @hwqid: belong to which HWQ
+ * @expedite: 1 means this request is urgent.
+ *
+ * This routine gets one IO buffer from free pool. If cfg_xri_rebalancing==1,
+ * removes a IO buffer from multiXRI pools. If cfg_xri_rebalancing==0, removes
+ * a IO buffer from head of @hdwq io_buf_list and returns to caller.
+ *
+ * Note: ndlp is only used on SCSI side for RRQ testing.
+ * The caller should pass NULL for ndlp on NVME side.
+ *
+ * Return codes:
+ * NULL - Error
+ * Pointer to lpfc_io_buf - Success
+ **/
+struct lpfc_io_buf *lpfc_get_io_buf(struct lpfc_hba *phba,
+ struct lpfc_nodelist *ndlp,
+ u32 hwqid, int expedite)
+{
+ struct lpfc_sli4_hdw_queue *qp;
+ unsigned long iflag;
+ struct lpfc_io_buf *lpfc_cmd;
+
+ qp = &phba->sli4_hba.hdwq[hwqid];
+ lpfc_cmd = NULL;
+
+ if (phba->cfg_xri_rebalancing)
+ lpfc_cmd = lpfc_get_io_buf_from_multixri_pools(
+ phba, ndlp, hwqid, expedite);
+ else {
+ lpfc_qp_spin_lock_irqsave(&qp->io_buf_list_get_lock, iflag,
+ qp, alloc_xri_get);
+ if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT || expedite)
+ lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
+ if (!lpfc_cmd) {
+ lpfc_qp_spin_lock(&qp->io_buf_list_put_lock,
+ qp, alloc_xri_put);
+ list_splice(&qp->lpfc_io_buf_list_put,
+ &qp->lpfc_io_buf_list_get);
+ qp->get_io_bufs += qp->put_io_bufs;
+ INIT_LIST_HEAD(&qp->lpfc_io_buf_list_put);
+ qp->put_io_bufs = 0;
+ spin_unlock(&qp->io_buf_list_put_lock);
+ if (qp->get_io_bufs > LPFC_NVME_EXPEDITE_XRICNT ||
+ expedite)
+ lpfc_cmd = lpfc_io_buf(phba, ndlp, hwqid);
+ }
+ spin_unlock_irqrestore(&qp->io_buf_list_get_lock, iflag);
+ }
+
+ return lpfc_cmd;
+}