summaryrefslogtreecommitdiff
path: root/drivers/scsi/lpfc/lpfc_nvme.c
diff options
context:
space:
mode:
authorJames Smart <jsmart2021@gmail.com>2019-08-15 02:57:09 +0300
committerMartin K. Petersen <martin.petersen@oracle.com>2019-08-20 05:41:12 +0300
commitd79c9e9d4b3d9330ee38f392a7c98e0fc494f7f8 (patch)
tree088e0dc43e6f8415f9bd529e49a3e072c89631d9 /drivers/scsi/lpfc/lpfc_nvme.c
parente62245d923caebc02582b12ce861c3d780b4106f (diff)
downloadlinux-d79c9e9d4b3d9330ee38f392a7c98e0fc494f7f8.tar.xz
scsi: lpfc: Support dynamic unbounded SGL lists on G7 hardware.
Typical SLI-4 hardware supports up to 2 4KB pages to be registered per XRI to contain the exchanges Scatter/Gather List. This caps the number of SGL elements that can be in the SGL. There are not extensions to extend the list out of the 2 pages. The G7 hardware adds a SGE type that allows the SGL to be vectored to a different scatter/gather list segment. And that segment can contain a SGE to go to another segment and so on. The initial segment must still be pre-registered for the XRI, but it can be a much smaller amount (256Bytes) as it can now be dynamically grown. This much smaller allocation can handle the SG list for most normal I/O, and the dynamic aspect allows it to support many MB's if needed. The implementation creates a pool which contains "segments" and which is initially sized to hold the initial small segment per xri. If an I/O requires additional segments, they are allocated from the pool. If the pool has no more segments, the pool is grown based on what is now needed. After the I/O completes, the additional segments are returned to the pool for use by other I/Os. Once allocated, the additional segments are not released under the assumption of "if needed once, it will be needed again". Pools are kept on a per-hardware queue basis, which is typically 1:1 per cpu, but may be shared by multiple cpus. The switch to the smaller initial allocation significantly reduces the memory footprint of the driver (which only grows if large ios are issued). Based on the several K of XRIs for the adapter, the 8KB->256B reduction can conserve 32MBs or more. It has been observed with per-cpu resource pools that allocating a resource on CPU A, may be put back on CPU B. While the get routines are distributed evenly, only a limited subset of CPUs may be handling the put routines. This can put a strain on the lpfc_put_cmd_rsp_buf_per_cpu routine because all the resources are being put on a limited subset of CPUs. Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com> Signed-off-by: James Smart <jsmart2021@gmail.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/lpfc/lpfc_nvme.c')
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c94
1 files changed, 76 insertions, 18 deletions
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index b599ddc40c6b..5e48318eb7a9 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1306,14 +1306,16 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
union lpfc_wqe128 *wqe = &lpfc_ncmd->cur_iocbq.wqe;
struct sli4_sge *sgl = lpfc_ncmd->dma_sgl;
+ struct sli4_hybrid_sgl *sgl_xtra = NULL;
struct scatterlist *data_sg;
struct sli4_sge *first_data_sgl;
struct ulp_bde64 *bde;
- dma_addr_t physaddr;
+ dma_addr_t physaddr = 0;
uint32_t num_bde = 0;
- uint32_t dma_len;
+ uint32_t dma_len = 0;
uint32_t dma_offset = 0;
- int nseg, i;
+ int nseg, i, j;
+ bool lsp_just_set = false;
/* Fix up the command and response DMA stuff. */
lpfc_nvme_adj_fcp_sgls(vport, lpfc_ncmd, nCmd);
@@ -1350,6 +1352,9 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
*/
nseg = nCmd->sg_cnt;
data_sg = nCmd->first_sgl;
+
+ /* for tracking the segment boundaries */
+ j = 2;
for (i = 0; i < nseg; i++) {
if (data_sg == NULL) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -1358,23 +1363,76 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport,
lpfc_ncmd->seg_cnt = 0;
return 1;
}
- physaddr = data_sg->dma_address;
- dma_len = data_sg->length;
- sgl->addr_lo = cpu_to_le32(putPaddrLow(physaddr));
- sgl->addr_hi = cpu_to_le32(putPaddrHigh(physaddr));
- sgl->word2 = le32_to_cpu(sgl->word2);
- if ((num_bde + 1) == nseg)
+
+ sgl->word2 = 0;
+ if ((num_bde + 1) == nseg) {
bf_set(lpfc_sli4_sge_last, sgl, 1);
- else
+ bf_set(lpfc_sli4_sge_type, sgl,
+ LPFC_SGE_TYPE_DATA);
+ } else {
bf_set(lpfc_sli4_sge_last, sgl, 0);
- bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
- bf_set(lpfc_sli4_sge_type, sgl, LPFC_SGE_TYPE_DATA);
- sgl->word2 = cpu_to_le32(sgl->word2);
- sgl->sge_len = cpu_to_le32(dma_len);
-
- dma_offset += dma_len;
- data_sg = sg_next(data_sg);
- sgl++;
+
+ /* expand the segment */
+ if (!lsp_just_set &&
+ !((j + 1) % phba->border_sge_num) &&
+ ((nseg - 1) != i)) {
+ /* set LSP type */
+ bf_set(lpfc_sli4_sge_type, sgl,
+ LPFC_SGE_TYPE_LSP);
+
+ sgl_xtra = lpfc_get_sgl_per_hdwq(
+ phba, lpfc_ncmd);
+
+ if (unlikely(!sgl_xtra)) {
+ lpfc_ncmd->seg_cnt = 0;
+ return 1;
+ }
+ sgl->addr_lo = cpu_to_le32(putPaddrLow(
+ sgl_xtra->dma_phys_sgl));
+ sgl->addr_hi = cpu_to_le32(putPaddrHigh(
+ sgl_xtra->dma_phys_sgl));
+
+ } else {
+ bf_set(lpfc_sli4_sge_type, sgl,
+ LPFC_SGE_TYPE_DATA);
+ }
+ }
+
+ if (!(bf_get(lpfc_sli4_sge_type, sgl) &
+ LPFC_SGE_TYPE_LSP)) {
+ if ((nseg - 1) == i)
+ bf_set(lpfc_sli4_sge_last, sgl, 1);
+
+ physaddr = data_sg->dma_address;
+ dma_len = data_sg->length;
+ sgl->addr_lo = cpu_to_le32(
+ putPaddrLow(physaddr));
+ sgl->addr_hi = cpu_to_le32(
+ putPaddrHigh(physaddr));
+
+ bf_set(lpfc_sli4_sge_offset, sgl, dma_offset);
+ sgl->word2 = cpu_to_le32(sgl->word2);
+ sgl->sge_len = cpu_to_le32(dma_len);
+
+ dma_offset += dma_len;
+ data_sg = sg_next(data_sg);
+
+ sgl++;
+
+ lsp_just_set = false;
+ } else {
+ sgl->word2 = cpu_to_le32(sgl->word2);
+
+ sgl->sge_len = cpu_to_le32(
+ phba->cfg_sg_dma_buf_size);
+
+ sgl = (struct sli4_sge *)sgl_xtra->dma_sgl;
+ i = i - 1;
+
+ lsp_just_set = true;
+ }
+
+ j++;
}
if (phba->cfg_enable_pbde) {
/* Use PBDE support for first SGL only, offset == 0 */