From c6d7b26791a2aefdf97f2af1e93161ed05acd631 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 13 Sep 2010 11:23:57 -0500 Subject: RDMA/cxgb4: Support on-chip SQs T4 support on-chip SQs to reduce latency. This patch adds support for this in iw_cxgb4: - Manage ocqp memory like other adapter mem resources. - Allocate user mode SQs from ocqp mem if available. - Map ocqp mem to user process using write combining. - Map PCIE_MA_SYNC reg to user process. Bump uverbs ABI. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/provider.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/hw/cxgb4/provider.c') diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 8f645c83a125..a49a9c1275a3 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -149,19 +149,28 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) addr = mm->addr; kfree(mm); - if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && - (addr < (pci_resource_start(rdev->lldi.pdev, 2) + - pci_resource_len(rdev->lldi.pdev, 2)))) { + if ((addr >= pci_resource_start(rdev->lldi.pdev, 0)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 0) + + pci_resource_len(rdev->lldi.pdev, 0)))) { /* - * Map T4 DB register. + * MA_SYNC register... */ - if (vma->vm_flags & VM_READ) - return -EPERM; - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; - vma->vm_flags &= ~VM_MAYREAD; + ret = io_remap_pfn_range(vma, vma->vm_start, + addr >> PAGE_SHIFT, + len, vma->vm_page_prot); + } else if ((addr >= pci_resource_start(rdev->lldi.pdev, 2)) && + (addr < (pci_resource_start(rdev->lldi.pdev, 2) + + pci_resource_len(rdev->lldi.pdev, 2)))) { + + /* + * Map user DB or OCQP memory... + */ + if (addr >= rdev->oc_mw_pa) + vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); ret = io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, len, vma->vm_page_prot); @@ -472,6 +481,7 @@ int c4iw_register_device(struct c4iw_dev *dev) dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; dev->ibdev.get_protocol_stats = c4iw_get_mib; + dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) -- cgit v1.2.3 From 40dbf6ee381008e471d3c4a332971247b7799744 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Fri, 17 Sep 2010 15:40:15 -0500 Subject: RDMA/cxgb4: Fastreg NSMR fixes - Remove dsgl support - doesn't work in T4. - Wrap the immediate PBL as needed when building it in the wr. - Adjust max pbl depth allowed based on ulptx alignment requirements. - Bump the slots per SQ to 5 to allow up to 128MB fast registers. - Advertise fastreg support by default. Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/provider.c | 4 +-- drivers/infiniband/hw/cxgb4/qp.c | 52 ++++++++++++++++------------------ drivers/infiniband/hw/cxgb4/t4.h | 4 +-- 3 files changed, 29 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband/hw/cxgb4/provider.c') diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index a49a9c1275a3..81e127713675 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -54,9 +54,9 @@ #include "iw_cxgb4.h" -static int fastreg_support; +static int fastreg_support = 1; module_param(fastreg_support, int, 0644); -MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=0)"); +MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); static int c4iw_modify_port(struct ib_device *ibdev, u8 port, int port_modify_mask, diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index ff04e5cc28ce..057cb2505ea1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -505,13 +505,15 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } -static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) { struct fw_ri_immd *imdp; __be64 *p; int i; int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32); + int rem; if (wr->wr.fast_reg.page_list_len > T4_MAX_FR_DEPTH) return -EINVAL; @@ -526,32 +528,28 @@ static int build_fastreg(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32); wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff); - if (pbllen > T4_MAX_FR_IMMD) { - struct c4iw_fr_page_list *c4pl = - to_c4iw_fr_page_list(wr->wr.fast_reg.page_list); - struct fw_ri_dsgl *sglp; - - sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1); - sglp->op = FW_RI_DATA_DSGL; - sglp->r1 = 0; - sglp->nsge = cpu_to_be16(1); - sglp->addr0 = cpu_to_be64(c4pl->dma_addr); - sglp->len0 = cpu_to_be32(pbllen); - - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *sglp, 16); - } else { - imdp = (struct fw_ri_immd *)(&wqe->fr + 1); - imdp->op = FW_RI_DATA_IMMD; - imdp->r1 = 0; - imdp->r2 = 0; - imdp->immdlen = cpu_to_be32(pbllen); - p = (__be64 *)(imdp + 1); - for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) - *p = cpu_to_be64( - (u64)wr->wr.fast_reg.page_list->page_list[i]); - *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, - 16); + WARN_ON(pbllen > T4_MAX_FR_IMMD); + imdp = (struct fw_ri_immd *)(&wqe->fr + 1); + imdp->op = FW_RI_DATA_IMMD; + imdp->r1 = 0; + imdp->r2 = 0; + imdp->immdlen = cpu_to_be32(pbllen); + p = (__be64 *)(imdp + 1); + rem = pbllen; + for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) { + *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]); + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; + } + BUG_ON(rem < 0); + while (rem) { + *p = 0; + rem -= sizeof *p; + if (++p == (__be64 *)&sq->queue[sq->size]) + p = (__be64 *)sq->queue; } + *len16 = DIV_ROUND_UP(sizeof wqe->fr + sizeof *imdp + pbllen, 16); return 0; } @@ -652,7 +650,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, case IB_WR_FAST_REG_MR: fw_opcode = FW_RI_FR_NSMR_WR; swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_fastreg(wqe, wr, &len16); + err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16); break; case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 17ea5fcb37e4..70004425d695 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -66,7 +66,7 @@ struct t4_status_page { #define T4_EQ_ENTRY_SIZE 64 -#define T4_SQ_NUM_SLOTS 4 +#define T4_SQ_NUM_SLOTS 5 #define T4_SQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_SQ_NUM_SLOTS) #define T4_MAX_SEND_SGE ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_send_wr) - \ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) @@ -79,7 +79,7 @@ struct t4_status_page { sizeof(struct fw_ri_rdma_write_wr) - \ sizeof(struct fw_ri_isgl)) / sizeof(struct fw_ri_sge)) #define T4_MAX_FR_IMMD ((T4_SQ_NUM_BYTES - sizeof(struct fw_ri_fr_nsmr_wr) - \ - sizeof(struct fw_ri_immd))) + sizeof(struct fw_ri_immd)) & ~31UL) #define T4_MAX_FR_DEPTH (T4_MAX_FR_IMMD / sizeof(u64)) #define T4_RQ_NUM_SLOTS 2 -- cgit v1.2.3 From de5dd81b49c27c7818492be0746bfed6ac3b1c8d Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Mon, 18 Oct 2010 15:16:40 +0000 Subject: RDMA/cxgb4: Export T4 TCP MIB Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/provider.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/hw/cxgb4/provider.c') diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 81e127713675..f66dd8bf5128 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -391,7 +391,17 @@ static ssize_t show_board(struct device *dev, struct device_attribute *attr, static int c4iw_get_mib(struct ib_device *ibdev, union rdma_protocol_stats *stats) { - return -ENOSYS; + struct tp_tcp_stats v4, v6; + struct c4iw_dev *c4iw_dev = to_c4iw_dev(ibdev); + + cxgb4_get_tcp_stats(c4iw_dev->rdev.lldi.pdev, &v4, &v6); + memset(stats, 0, sizeof *stats); + stats->iw.tcpInSegs = v4.tcpInSegs + v6.tcpInSegs; + stats->iw.tcpOutSegs = v4.tcpOutSegs + v6.tcpOutSegs; + stats->iw.tcpRetransSegs = v4.tcpRetransSegs + v6.tcpRetransSegs; + stats->iw.tcpOutRsts = v4.tcpOutRsts + v6.tcpOutSegs; + + return 0; } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); -- cgit v1.2.3