diff options
Diffstat (limited to 'drivers/infiniband/hw/cxgb4')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cm.c | 90 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/cq.c | 269 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/device.c | 20 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/ev.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 57 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/mem.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/provider.c | 56 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/qp.c | 1051 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/resource.c | 51 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4.h | 164 | ||||
-rw-r--r-- | drivers/infiniband/hw/cxgb4/t4fw_ri_api.h | 68 |
11 files changed, 1512 insertions, 321 deletions
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0912fa026327..0f83cbec33f3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -587,24 +587,29 @@ static int send_flowc(struct c4iw_ep *ep) { struct fw_flowc_wr *flowc; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int i; u16 vlan = ep->l2t->vlan; int nparams; + int flowclen, flowclen16; if (WARN_ON(!skb)) return -ENOMEM; if (vlan == CPL_L2T_VLAN_NONE) - nparams = 8; - else nparams = 9; + else + nparams = 10; - flowc = __skb_put(skb, FLOWC_LEN); + flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); + flowclen16 = DIV_ROUND_UP(flowclen, 16); + flowclen = flowclen16 * 16; + + flowc = __skb_put(skb, flowclen); + memset(flowc, 0, flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_FLOWC_WR_NPARAMS_V(nparams)); - flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN, - 16)) | FW_WR_FLOWID_V(ep->hwtid)); + flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | + FW_WR_FLOWID_V(ep->hwtid)); flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V @@ -623,21 +628,13 @@ static int send_flowc(struct c4iw_ep *ep) flowc->mnemval[6].val = cpu_to_be32(ep->snd_win); flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; flowc->mnemval[7].val = cpu_to_be32(ep->emss); - if (nparams == 9) { + flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE; + flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale); + if (nparams == 10) { u16 pri; - pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; - flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; - flowc->mnemval[8].val = cpu_to_be32(pri); - } else { - /* Pad WR to 16 byte boundary */ - flowc->mnemval[8].mnemonic = 0; - flowc->mnemval[8].val = 0; - } - for (i = 0; i < 9; i++) { - flowc->mnemval[i].r4[0] = 0; - flowc->mnemval[i].r4[1] = 0; - flowc->mnemval[i].r4[2] = 0; + flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[9].val = cpu_to_be32(pri); } set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); @@ -1176,6 +1173,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; struct cpl_act_establish *req = cplhdr(skb); + unsigned short tcp_opt = ntohs(req->tcp_opt); unsigned int tid = GET_TID(req); unsigned int atid = TID_TID_G(ntohl(req->tos_atid)); struct tid_info *t = dev->rdev.lldi.tids; @@ -1196,8 +1194,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); /* dealloc the atid */ remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid); @@ -1853,10 +1852,33 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) return 0; } +static void complete_cached_srq_buffers(struct c4iw_ep *ep, + __be32 srqidx_status) +{ + enum chip_type adapter_type; + u32 srqidx; + + adapter_type = ep->com.dev->rdev.lldi.adapter_type; + srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status)); + + /* + * If this TCB had a srq buffer cached, then we must complete + * it. For user mode, that means saving the srqidx in the + * user/kernel status page for this qp. For kernel mode, just + * synthesize the CQE now. + */ + if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { + if (ep->com.qp->ibqp.uobject) + t4_set_wq_in_error(&ep->com.qp->wq, srqidx); + else + c4iw_flush_srqidx(ep->com.qp, srqidx); + } +} + static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *ep; - struct cpl_abort_rpl_rss *rpl = cplhdr(skb); + struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb); int release = 0; unsigned int tid = GET_TID(rpl); @@ -1865,6 +1887,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) pr_warn("Abort rpl to freed endpoint\n"); return 0; } + + complete_cached_srq_buffers(ep, rpl->srqidx_status); + pr_debug("ep %p tid %u\n", ep, ep->hwtid); mutex_lock(&ep->com.mutex); switch (ep->com.state) { @@ -2603,16 +2628,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) struct cpl_pass_establish *req = cplhdr(skb); unsigned int tid = GET_TID(req); int ret; + u16 tcp_opt = ntohs(req->tcp_opt); ep = get_ep_from_tid(dev, tid); pr_debug("ep %p tid %u\n", ep, ep->hwtid); ep->snd_seq = be32_to_cpu(req->snd_isn); ep->rcv_seq = be32_to_cpu(req->rcv_isn); + ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt); - pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, - ntohs(req->tcp_opt)); + pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt); - set_emss(ep, ntohs(req->tcp_opt)); + set_emss(ep, tcp_opt); dst_confirm(ep->dst); mutex_lock(&ep->com.mutex); @@ -2719,28 +2745,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { - struct cpl_abort_req_rss *req = cplhdr(skb); + struct cpl_abort_req_rss6 *req = cplhdr(skb); struct c4iw_ep *ep; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u8 status; + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (cxgb_is_neg_adv(req->status)) { + status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status)); + + if (cxgb_is_neg_adv(status)) { pr_debug("Negative advice on abort- tid %u status %d (%s)\n", - ep->hwtid, req->status, neg_adv_str(req->status)); + ep->hwtid, status, neg_adv_str(status)); ep->stats.abort_neg_adv++; mutex_lock(&dev->rdev.stats.lock); dev->rdev.stats.neg_adv++; mutex_unlock(&dev->rdev.stats.lock); goto deref_ep; } + + complete_cached_srq_buffers(ep, req->srqidx_status); + pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state); set_bit(PEER_ABORT, &ep->com.history); @@ -3444,9 +3477,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) } insert_handle(dev, &dev->stid_idr, ep, ep->stid); - memcpy(&ep->com.local_addr, &cm_id->m_local_addr, - sizeof(ep->com.local_addr)); - state_set(&ep->com, LISTEN); if (ep->com.local_addr.ss_family == AF_INET) err = create_server4(dev, ep); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 2be2e1ac1b5f..6d3042794094 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, int user = (uctx != &rdev->uctx); int ret; struct sk_buff *skb; + struct c4iw_ucontext *ucontext = NULL; + + if (user) + ucontext = container_of(uctx, struct c4iw_ucontext, uctx); cq->cqid = c4iw_get_cqid(rdev, uctx); if (!cq->cqid) { @@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); + if (user && ucontext->is_32b_cqe) { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + (sizeof(*cq->queue) / 2)))->qp_err; + } else { + cq->qp_errp = &((struct t4_status_page *) + ((u8 *)cq->queue + (cq->size - 1) * + sizeof(*cq->queue)))->qp_err; + } + /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + sizeof *res; @@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, FW_RI_RES_WR_IQPCIECH_V(2) | FW_RI_RES_WR_IQINTCNTTHRESH_V(0) | FW_RI_RES_WR_IQO_F | - FW_RI_RES_WR_IQESIZE_V(1)); + ((user && ucontext->is_32b_cqe) ? + FW_RI_RES_WR_IQESIZE_V(1) : + FW_RI_RES_WR_IQESIZE_V(2))); res->u.cq.iqsize = cpu_to_be16(cq->size); res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr); @@ -166,7 +182,7 @@ err1: return ret; } -static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) +static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx) { struct t4_cqe cqe; @@ -179,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq) CQE_SWCQE_V(1) | CQE_QPID_V(wq->sq.qid)); cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen)); + if (srqidx) + cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx); cq->sw_queue[cq->sw_pidx] = cqe; t4_swcq_produce(cq); } @@ -191,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count) pr_debug("wq %p cq %p rq.in_use %u skip count %u\n", wq, cq, wq->rq.in_use, count); while (in_use--) { - insert_recv_cqe(wq, cq); + insert_recv_cqe(wq, cq, 0); flushed++; } return flushed; @@ -442,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) pr_debug("cq %p count %d\n", cq, *count); } +static void post_pending_srq_wrs(struct t4_srq *srq) +{ + struct t4_srq_pending_wr *pwr; + u16 idx = 0; + + while (srq->pending_in_use) { + pwr = &srq->pending_wrs[srq->pending_cidx]; + srq->sw_rq[srq->pidx].wr_id = pwr->wr_id; + srq->sw_rq[srq->pidx].valid = 1; + + pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, + srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + (unsigned long long)pwr->wr_id); + + c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16); + t4_srq_consume_pending_wr(srq); + t4_srq_produce(srq, pwr->len16); + idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE); + } + + if (idx) { + t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe); + srq->queue[srq->size].status.host_wq_pidx = + srq->wq_pidx; + } +} + +static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq) +{ + int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx; + u64 wr_id; + + srq->sw_rq[rel_idx].valid = 0; + wr_id = srq->sw_rq[rel_idx].wr_id; + + if (rel_idx == srq->cidx) { + pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, srq->size, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_consume(srq); + while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) { + pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, srq->cidx, srq->pidx, + srq->wq_pidx, srq->in_use, + srq->size, srq->ooo_count, + (unsigned long long) + srq->sw_rq[srq->cidx].wr_id); + t4_srq_consume_ooo(srq); + } + if (srq->ooo_count == 0 && srq->pending_in_use) + post_pending_srq_wrs(srq); + } else { + pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n", + __func__, rel_idx, srq->cidx, + srq->pidx, srq->wq_pidx, + srq->in_use, srq->size, + srq->ooo_count, + (unsigned long long)srq->sw_rq[rel_idx].wr_id); + t4_srq_produce_ooo(srq); + } + return wr_id; +} + /* * poll_cq * @@ -459,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) * -EOVERFLOW CQ overflow detected. */ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, - u8 *cqe_flushed, u64 *cookie, u32 *credit) + u8 *cqe_flushed, u64 *cookie, u32 *credit, + struct t4_srq *srq) { int ret = 0; struct t4_cqe *hw_cqe, read_cqe; @@ -524,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_TYPE(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -535,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (CQE_WRID_STAG(hw_cqe) == 1) { if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); ret = -EAGAIN; goto skip_cqe; } @@ -560,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); } /* @@ -574,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, * then we complete this with T4_ERR_MSN and mark the wq in * error. */ - - if (t4_rq_empty(wq)) { - t4_set_wq_in_error(wq); - ret = -EAGAIN; - goto skip_cqe; - } if (unlikely(!CQE_STATUS(hw_cqe) && CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) { - t4_set_wq_in_error(wq); + t4_set_wq_in_error(wq, 0); hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN)); } goto proc_cqe; @@ -641,11 +720,16 @@ proc_cqe: c4iw_log_wr_stats(wq, hw_cqe); t4_sq_consume(wq); } else { - pr_debug("completing rq idx %u\n", wq->rq.cidx); - *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; - if (c4iw_wr_log) - c4iw_log_wr_stats(wq, hw_cqe); - t4_rq_consume(wq); + if (!srq) { + pr_debug("completing rq idx %u\n", wq->rq.cidx); + *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; + if (c4iw_wr_log) + c4iw_log_wr_stats(wq, hw_cqe); + t4_rq_consume(wq); + } else { + *cookie = reap_srq_cqe(hw_cqe, srq); + } + wq->rq.msn++; goto skip_cqe; } @@ -668,46 +752,33 @@ skip_cqe: return ret; } -/* - * Get one cq entry from c4iw and map it to openib. - * - * Returns: - * 0 cqe returned - * -ENODATA EMPTY; - * -EAGAIN caller must try again - * any other -errno fatal error - */ -static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp, + struct ib_wc *wc, struct c4iw_srq *srq) { - struct c4iw_qp *qhp = NULL; - struct t4_cqe uninitialized_var(cqe), *rd_cqe; - struct t4_wq *wq; + struct t4_cqe uninitialized_var(cqe); + struct t4_wq *wq = qhp ? &qhp->wq : NULL; u32 credit = 0; u8 cqe_flushed; u64 cookie = 0; int ret; - ret = t4_next_cqe(&chp->cq, &rd_cqe); - - if (ret) - return ret; - - qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); - if (!qhp) - wq = NULL; - else { - spin_lock(&qhp->lock); - wq = &(qhp->wq); - } - ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit); + ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit, + srq ? &srq->wq : NULL); if (ret) goto out; wc->wr_id = cookie; - wc->qp = &qhp->ibqp; + wc->qp = qhp ? &qhp->ibqp : NULL; wc->vendor_err = CQE_STATUS(&cqe); wc->wc_flags = 0; + /* + * Simulate a SRQ_LIMIT_REACHED HW notification if required. + */ + if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed && + srq->wq.in_use < srq->srq_limit) + c4iw_dispatch_srq_limit_reached_event(srq); + pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n", CQE_QPID(&cqe), CQE_TYPE(&cqe), CQE_OPCODE(&cqe), @@ -720,15 +791,32 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) wc->byte_len = CQE_LEN(&cqe); else wc->byte_len = 0; - wc->opcode = IB_WC_RECV; - if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV || - CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { + + switch (CQE_OPCODE(&cqe)) { + case FW_RI_SEND: + wc->opcode = IB_WC_RECV; + break; + case FW_RI_SEND_WITH_INV: + case FW_RI_SEND_WITH_SE_INV: + wc->opcode = IB_WC_RECV; wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); + break; + case FW_RI_WRITE_IMMEDIATE: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->ex.imm_data = CQE_IMM_DATA(&cqe); + wc->wc_flags |= IB_WC_WITH_IMM; + break; + default: + pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n", + CQE_OPCODE(&cqe), CQE_QPID(&cqe)); + ret = -EINVAL; + goto out; } } else { switch (CQE_OPCODE(&cqe)) { + case FW_RI_WRITE_IMMEDIATE: case FW_RI_RDMA_WRITE: wc->opcode = IB_WC_RDMA_WRITE; break; @@ -819,8 +907,43 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) } } out: - if (wq) + return ret; +} + +/* + * Get one cq entry from c4iw and map it to openib. + * + * Returns: + * 0 cqe returned + * -ENODATA EMPTY; + * -EAGAIN caller must try again + * any other -errno fatal error + */ +static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) +{ + struct c4iw_srq *srq = NULL; + struct c4iw_qp *qhp = NULL; + struct t4_cqe *rd_cqe; + int ret; + + ret = t4_next_cqe(&chp->cq, &rd_cqe); + + if (ret) + return ret; + + qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe)); + if (qhp) { + spin_lock(&qhp->lock); + srq = qhp->srq; + if (srq) + spin_lock(&srq->lock); + ret = __c4iw_poll_cq_one(chp, qhp, wc, srq); spin_unlock(&qhp->lock); + if (srq) + spin_unlock(&srq->lock); + } else { + ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL); + } return ret; } @@ -876,6 +999,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int vector = attr->comp_vector; struct c4iw_dev *rhp; struct c4iw_cq *chp; + struct c4iw_create_cq ucmd; struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; int ret, wr_len; @@ -891,9 +1015,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (vector >= rhp->rdev.lldi.nciq) return ERR_PTR(-EINVAL); + if (ib_context) { + ucontext = to_c4iw_ucontext(ib_context); + if (udata->inlen < sizeof(ucmd)) + ucontext->is_32b_cqe = 1; + } + chp = kzalloc(sizeof(*chp), GFP_KERNEL); if (!chp) return ERR_PTR(-ENOMEM); + chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); if (!chp->wr_waitp) { ret = -ENOMEM; @@ -908,9 +1039,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, goto err_free_wr_wait; } - if (ib_context) - ucontext = to_c4iw_ucontext(ib_context); - /* account for the status page. */ entries++; @@ -934,13 +1062,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (hwentries < 64) hwentries = 64; - memsize = hwentries * sizeof *chp->cq.queue; + memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ? + (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue)); /* * memsize must be a multiple of the page size if its a user cq. */ if (ucontext) memsize = roundup(memsize, PAGE_SIZE); + chp->cq.size = hwentries; chp->cq.memsize = memsize; chp->cq.vector = vector; @@ -971,6 +1101,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, if (!mm2) goto err_free_mm; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; @@ -980,9 +1111,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, ucontext->key += PAGE_SIZE; uresp.gts_key = ucontext->key; ucontext->key += PAGE_SIZE; + /* communicate to the userspace that + * kernel driver supports 64B CQE + */ + uresp.flags |= C4IW_64B_CQE; + spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, - sizeof(uresp) - sizeof(uresp.reserved)); + ucontext->is_32b_cqe ? + sizeof(uresp) - sizeof(uresp.flags) : + sizeof(uresp)); if (ret) goto err_free_mm2; @@ -1019,11 +1157,6 @@ err_free_chp: return ERR_PTR(ret); } -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) -{ - return -ENOSYS; -} - int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; @@ -1039,3 +1172,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) spin_unlock_irqrestore(&chp->lock, flag); return ret; } + +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx) +{ + struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq); + unsigned long flag; + + /* locking heirarchy: cq lock first, then qp lock. */ + spin_lock_irqsave(&rchp->lock, flag); + spin_lock(&qhp->lock); + + /* create a SRQ RECV CQE for srqidx */ + insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx); + + spin_unlock(&qhp->lock); + spin_unlock_irqrestore(&rchp->lock, flag); +} diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a3c3418afd73..c13c0ba30f63 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void *data) set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin); cc = snprintf(qpd->buf + qpd->pos, space, - "rc qp sq id %u rq id %u state %u " + "rc qp sq id %u %s id %u state %u " "onchip %u ep tid %u state %u " "%pI4:%u/%u->%pI4:%u/%u\n", - qp->wq.sq.qid, qp->wq.rq.qid, + qp->wq.sq.qid, qp->srq ? "srq" : "rq", + qp->srq ? qp->srq->idx : qp->wq.rq.qid, (int)qp->attr.state, qp->wq.sq.flags & T4_SQ_ONCHIP, ep->hwtid, (int)ep->com.state, @@ -480,6 +481,9 @@ static int stats_show(struct seq_file *seq, void *v) seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur, dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail); + seq_printf(seq, " SRQS: %10llu %10llu %10llu %10llu\n", + dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur, + dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail); seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n", dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur, dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail); @@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf, dev->rdev.stats.pbl.fail = 0; dev->rdev.stats.rqt.max = 0; dev->rdev.stats.rqt.fail = 0; + dev->rdev.stats.rqt.max = 0; + dev->rdev.stats.rqt.fail = 0; dev->rdev.stats.ocqp.max = 0; dev->rdev.stats.ocqp.fail = 0; dev->rdev.stats.db_full = 0; @@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->qpmask = rdev->lldi.udb_density - 1; rdev->cqmask = rdev->lldi.ucq_density - 1; - pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n", + pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n", pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start, rdev->lldi.vr->stag.size, c4iw_num_stags(rdev), rdev->lldi.vr->pbl.start, @@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->lldi.vr->qp.start, rdev->lldi.vr->qp.size, rdev->lldi.vr->cq.start, - rdev->lldi.vr->cq.size); + rdev->lldi.vr->cq.size, + rdev->lldi.vr->srq.size); pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n", &rdev->lldi.pdev->resource[2], rdev->lldi.db_reg, rdev->lldi.gts_reg, @@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->stats.stag.total = rdev->lldi.vr->stag.size; rdev->stats.pbl.total = rdev->lldi.vr->pbl.size; rdev->stats.rqt.total = rdev->lldi.vr->rq.size; + rdev->stats.srqt.total = rdev->lldi.vr->srq.size; rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size; rdev->stats.qid.total = rdev->lldi.vr->qp.size; - err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); + err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), + T4_MAX_NUM_PD, rdev->lldi.vr->srq.size); if (err) { pr_err("error %d initializing resources\n", err); return err; @@ -857,6 +866,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) rdev->status_page->qp_size = rdev->lldi.vr->qp.size; rdev->status_page->cq_start = rdev->lldi.vr->cq.start; rdev->status_page->cq_size = rdev->lldi.vr->cq.size; + rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support; if (c4iw_wr_log) { rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order, diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 3e9d8b277ab9..8741d23168f3 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe) CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len), CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); - pr_debug("%016llx %016llx %016llx %016llx\n", + pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n", be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]), - be64_to_cpu(p[3])); + be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]), + be64_to_cpu(p[6]), be64_to_cpu(p[7])); /* * Ingress WRITE and READ_RESP errors provide diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 870649ff049c..f0fceadd0d12 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -97,6 +97,7 @@ struct c4iw_resource { struct c4iw_id_table tpt_table; struct c4iw_id_table qid_table; struct c4iw_id_table pdid_table; + struct c4iw_id_table srq_table; }; struct c4iw_qid_list { @@ -130,6 +131,8 @@ struct c4iw_stats { struct c4iw_stat stag; struct c4iw_stat pbl; struct c4iw_stat rqt; + struct c4iw_stat srqt; + struct c4iw_stat srq; struct c4iw_stat ocqp; u64 db_full; u64 db_empty; @@ -549,6 +552,7 @@ struct c4iw_qp { struct kref kref; wait_queue_head_t wait; int sq_sig_all; + struct c4iw_srq *srq; struct work_struct free_work; struct c4iw_ucontext *ucontext; struct c4iw_wr_wait *wr_waitp; @@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) return container_of(ibqp, struct c4iw_qp, ibqp); } +struct c4iw_srq { + struct ib_srq ibsrq; + struct list_head db_fc_entry; + struct c4iw_dev *rhp; + struct t4_srq wq; + struct sk_buff *destroy_skb; + u32 srq_limit; + u32 pdid; + int idx; + u32 flags; + spinlock_t lock; /* protects srq */ + struct c4iw_wr_wait *wr_waitp; + bool armed; +}; + +static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct c4iw_srq, ibsrq); +} + struct c4iw_ucontext { struct ib_ucontext ibucontext; struct c4iw_dev_ucontext uctx; @@ -566,6 +590,7 @@ struct c4iw_ucontext { spinlock_t mmap_lock; struct list_head mmaps; struct kref kref; + bool is_32b_cqe; }; static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c) @@ -885,7 +910,10 @@ enum conn_pre_alloc_buffers { CN_MAX_CON_BUF }; -#define FLOWC_LEN 80 +enum { + FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX]) +}; + union cpl_wr_size { struct cpl_abort_req abrt_req; struct cpl_abort_rpl abrt_rpl; @@ -952,6 +980,7 @@ struct c4iw_ep { unsigned int retry_count; int snd_win; int rcv_win; + u32 snd_wscale; struct c4iw_ep_stats stats; }; @@ -988,7 +1017,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, struct c4iw_dev_ucontext *uctx); u32 c4iw_get_resource(struct c4iw_id_table *id_table); void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt); int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); int c4iw_pblpool_create(struct c4iw_rdev *rdev); int c4iw_rqtpool_create(struct c4iw_rdev *rdev); @@ -1007,10 +1037,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev, void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx); int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr); -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr); +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr); +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog); int c4iw_destroy_listen(struct iw_cm_id *cm_id); @@ -1037,8 +1067,14 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, struct ib_ucontext *ib_context, struct ib_udata *udata); -int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq); +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *attrs, + struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp); struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, @@ -1075,12 +1111,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, enum cxgb4_bar2_qtype qtype, unsigned int *pbar2_qid, u64 *pbar2_pa); +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev); +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx); extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe); extern int c4iw_wr_log; extern int db_fc_threshold; extern int db_coalescing_threshold; extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq); +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16); +void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx); +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); typedef int c4iw_restrack_func(struct sk_buff *msg, diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 1445918e3239..7b76e6f81aeb 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -774,7 +774,7 @@ static int c4iw_set_page(struct ib_mr *ibmr, u64 addr) { struct c4iw_mr *mhp = to_c4iw_mr(ibmr); - if (unlikely(mhp->mpl_len == mhp->max_mpl_len)) + if (unlikely(mhp->mpl_len == mhp->attr.pbl_size)) return -ENOMEM; mhp->mpl[mhp->mpl_len++] = addr; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 1feade8bb4b3..4eda6872e617 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -58,41 +58,6 @@ static int fastreg_support = 1; module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); -static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) - -{ - return ERR_PTR(-ENOSYS); -} - -static int c4iw_ah_destroy(struct ib_ah *ah) -{ - return -ENOSYS; -} - -static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) -{ - return -ENOSYS; -} - -static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags, - u8 port_num, const struct ib_wc *in_wc, - const struct ib_grh *in_grh, - const struct ib_mad_hdr *in_mad, - size_t in_mad_size, - struct ib_mad_hdr *out_mad, - size_t *out_mad_size, - u16 *out_mad_pkey_index) -{ - return -ENOSYS; -} - void _c4iw_free_ucontext(struct kref *kref) { struct c4iw_ucontext *ucontext; @@ -342,8 +307,12 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device; props->max_mr_size = T4_MAX_MR_SIZE; props->max_qp = dev->rdev.lldi.vr->qp.size / 2; + props->max_srq = dev->rdev.lldi.vr->srq.size; props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth; - props->max_sge = T4_MAX_RECV_SGE; + props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth; + props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE); + props->max_recv_sge = T4_MAX_RECV_SGE; + props->max_srq_sge = T4_MAX_RECV_SGE; props->max_sge_rd = 1; props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter; props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp, @@ -592,7 +561,10 @@ void c4iw_register_device(struct work_struct *work) (1ull << IB_USER_VERBS_CMD_POLL_CQ) | (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | (1ull << IB_USER_VERBS_CMD_POST_SEND) | - (1ull << IB_USER_VERBS_CMD_POST_RECV); + (1ull << IB_USER_VERBS_CMD_POST_RECV) | + (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); dev->ibdev.node_type = RDMA_NODE_RNIC; BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); @@ -608,15 +580,15 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.mmap = c4iw_mmap; dev->ibdev.alloc_pd = c4iw_allocate_pd; dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_ah = c4iw_ah_create; - dev->ibdev.destroy_ah = c4iw_ah_destroy; dev->ibdev.create_qp = c4iw_create_qp; dev->ibdev.modify_qp = c4iw_ib_modify_qp; dev->ibdev.query_qp = c4iw_ib_query_qp; dev->ibdev.destroy_qp = c4iw_destroy_qp; + dev->ibdev.create_srq = c4iw_create_srq; + dev->ibdev.modify_srq = c4iw_modify_srq; + dev->ibdev.destroy_srq = c4iw_destroy_srq; dev->ibdev.create_cq = c4iw_create_cq; dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.resize_cq = c4iw_resize_cq; dev->ibdev.poll_cq = c4iw_poll_cq; dev->ibdev.get_dma_mr = c4iw_get_dma_mr; dev->ibdev.reg_user_mr = c4iw_reg_user_mr; @@ -625,12 +597,10 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.dealloc_mw = c4iw_dealloc_mw; dev->ibdev.alloc_mr = c4iw_alloc_mr; dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.attach_mcast = c4iw_multicast_attach; - dev->ibdev.detach_mcast = c4iw_multicast_detach; - dev->ibdev.process_mad = c4iw_process_mad; dev->ibdev.req_notify_cq = c4iw_arm_cq; dev->ibdev.post_send = c4iw_post_send; dev->ibdev.post_recv = c4iw_post_receive; + dev->ibdev.post_srq_recv = c4iw_post_srq_recv; dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index aef53305f1c3..b3203afa3b1d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user) } static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, - struct c4iw_dev_ucontext *uctx) + struct c4iw_dev_ucontext *uctx, int has_rq) { /* * uP clears EQ contexts when the connection exits rdma mode, * so no need to post a RESET WR for these EQs. */ - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); dealloc_sq(rdev, &wq->sq); - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); - kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); - c4iw_put_qpid(rdev, wq->rq.qid, uctx); c4iw_put_qpid(rdev, wq->sq.qid, uctx); + + if (has_rq) { + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + kfree(wq->rq.sw_rq); + c4iw_put_qpid(rdev, wq->rq.qid, uctx); + } return 0; } @@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid, static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, struct t4_cq *rcq, struct t4_cq *scq, struct c4iw_dev_ucontext *uctx, - struct c4iw_wr_wait *wr_waitp) + struct c4iw_wr_wait *wr_waitp, + int need_rq) { int user = (uctx != &rdev->uctx); struct fw_ri_res_wr *res_wr; @@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (!wq->sq.qid) return -ENOMEM; - wq->rq.qid = c4iw_get_qpid(rdev, uctx); - if (!wq->rq.qid) { - ret = -ENOMEM; - goto free_sq_qid; + if (need_rq) { + wq->rq.qid = c4iw_get_qpid(rdev, uctx); + if (!wq->rq.qid) { + ret = -ENOMEM; + goto free_sq_qid; + } } if (!user) { @@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, GFP_KERNEL); if (!wq->sq.sw_sq) { ret = -ENOMEM; - goto free_rq_qid; + goto free_rq_qid;//FIXME } - wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq), - GFP_KERNEL); - if (!wq->rq.sw_rq) { - ret = -ENOMEM; - goto free_sw_sq; + if (need_rq) { + wq->rq.sw_rq = kcalloc(wq->rq.size, + sizeof(*wq->rq.sw_rq), + GFP_KERNEL); + if (!wq->rq.sw_rq) { + ret = -ENOMEM; + goto free_sw_sq; + } } } - /* - * RQT must be a power of 2 and at least 16 deep. - */ - wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); - wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); - if (!wq->rq.rqt_hwaddr) { - ret = -ENOMEM; - goto free_sw_rq; + if (need_rq) { + /* + * RQT must be a power of 2 and at least 16 deep. + */ + wq->rq.rqt_size = + roundup_pow_of_two(max_t(u16, wq->rq.size, 16)); + wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); + if (!wq->rq.rqt_hwaddr) { + ret = -ENOMEM; + goto free_sw_rq; + } } ret = alloc_sq(rdev, &wq->sq, user); @@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, memset(wq->sq.queue, 0, wq->sq.memsize); dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); - wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, &(wq->rq.dma_addr), - GFP_KERNEL); - if (!wq->rq.queue) { - ret = -ENOMEM; - goto free_sq; + if (need_rq) { + wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, + &wq->rq.dma_addr, + GFP_KERNEL); + if (!wq->rq.queue) { + ret = -ENOMEM; + goto free_sq; + } + pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", + wq->sq.queue, + (unsigned long long)virt_to_phys(wq->sq.queue), + wq->rq.queue, + (unsigned long long)virt_to_phys(wq->rq.queue)); + memset(wq->rq.queue, 0, wq->rq.memsize); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); } - pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", - wq->sq.queue, - (unsigned long long)virt_to_phys(wq->sq.queue), - wq->rq.queue, - (unsigned long long)virt_to_phys(wq->rq.queue)); - memset(wq->rq.queue, 0, wq->rq.memsize); - dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS, &wq->sq.bar2_qid, user ? &wq->sq.bar2_pa : NULL); - wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS, - &wq->rq.bar2_qid, - user ? &wq->rq.bar2_pa : NULL); + if (need_rq) + wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, + T4_BAR2_QTYPE_EGRESS, + &wq->rq.bar2_qid, + user ? &wq->rq.bar2_pa : NULL); /* * User mode must have bar2 access. */ - if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) { + if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) { pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n", pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid); goto free_dma; @@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, /* build fw_ri_res_wr */ wr_len = sizeof *res_wr + 2 * sizeof *res; - + if (need_rq) + wr_len += sizeof(*res); skb = alloc_skb(wr_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; @@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, res_wr = __skb_put_zero(skb, wr_len); res_wr->op_nres = cpu_to_be32( FW_WR_OP_V(FW_RI_RES_WR) | - FW_RI_RES_WR_NRES_V(2) | + FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) | FW_WR_COMPL_F); res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); res_wr->cookie = (uintptr_t)wr_waitp; @@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, FW_RI_RES_WR_EQSIZE_V(eqsize)); res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid); res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr); - res++; - res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; - res->u.sqrq.op = FW_RI_RES_OP_WRITE; - /* - * eqsize is the number of 64B entries plus the status page size. - */ - eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + - rdev->hw_queue.t4_eq_status_entries; - res->u.sqrq.fetchszm_to_iqid = cpu_to_be32( - FW_RI_RES_WR_HOSTFCMODE_V(0) | /* no host cidx updates */ - FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ - FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ - FW_RI_RES_WR_IQID_V(rcq->cqid)); - res->u.sqrq.dcaen_to_eqsize = cpu_to_be32( - FW_RI_RES_WR_DCAEN_V(0) | - FW_RI_RES_WR_DCACPU_V(0) | - FW_RI_RES_WR_FBMIN_V(2) | - FW_RI_RES_WR_FBMAX_V(3) | - FW_RI_RES_WR_CIDXFTHRESHO_V(0) | - FW_RI_RES_WR_CIDXFTHRESH_V(0) | - FW_RI_RES_WR_EQSIZE_V(eqsize)); - res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); - res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + if (need_rq) { + res++; + res->u.sqrq.restype = FW_RI_RES_TYPE_RQ; + res->u.sqrq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size + */ + eqsize = wq->rq.size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.sqrq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + /* don't keep in chip cache */ + FW_RI_RES_WR_CPRIO_V(0) | + /* set by uP at ri_init time */ + FW_RI_RES_WR_PCIECHN_V(0) | + FW_RI_RES_WR_IQID_V(rcq->cqid)); + res->u.sqrq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid); + res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr); + } c4iw_init_wr_wait(wr_waitp); ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__); @@ -363,26 +387,30 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, return 0; free_dma: - dma_free_coherent(&(rdev->lldi.pdev->dev), - wq->rq.memsize, wq->rq.queue, - dma_unmap_addr(&wq->rq, mapping)); + if (need_rq) + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->rq.memsize, wq->rq.queue, + dma_unmap_addr(&wq->rq, mapping)); free_sq: dealloc_sq(rdev, &wq->sq); free_hwaddr: - c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); + if (need_rq) + c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); free_sw_rq: - kfree(wq->rq.sw_rq); + if (need_rq) + kfree(wq->rq.sw_rq); free_sw_sq: kfree(wq->sq.sw_sq); free_rq_qid: - c4iw_put_qpid(rdev, wq->rq.qid, uctx); + if (need_rq) + c4iw_put_qpid(rdev, wq->rq.qid, uctx); free_sq_qid: c4iw_put_qpid(rdev, wq->sq.qid, uctx); return ret; } static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, - struct ib_send_wr *wr, int max, u32 *plenp) + const struct ib_send_wr *wr, int max, u32 *plenp) { u8 *dstp, *srcp; u32 plen = 0; @@ -427,7 +455,12 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, { int i; u32 plen = 0; - __be64 *flitp = (__be64 *)isglp->sge; + __be64 *flitp; + + if ((__be64 *)isglp == queue_end) + isglp = (struct fw_ri_isgl *)queue_start; + + flitp = (__be64 *)isglp->sge; for (i = 0; i < num_sge; i++) { if ((plen + sg_list[i].length) < plen) @@ -452,7 +485,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end, } static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -519,7 +552,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe, } static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, - struct ib_send_wr *wr, u8 *len16) + const struct ib_send_wr *wr, u8 *len16) { u32 plen; int size; @@ -527,7 +560,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, if (wr->num_sge > T4_MAX_SEND_SGE) return -EINVAL; - wqe->write.r2 = 0; + + /* + * iWARP protocol supports 64 bit immediate data but rdma api + * limits it to 32bit. + */ + if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data; + else + wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0; wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); if (wr->num_sge) { @@ -561,7 +602,58 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp, + struct ib_send_wr *wr) +{ + memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16); + memset(immdp->r1, 0, 6); + immdp->op = FW_RI_DATA_IMMD; + immdp->immdlen = 16; +} + +static void build_rdma_write_cmpl(struct t4_sq *sq, + struct fw_ri_rdma_write_cmpl_wr *wcwr, + const struct ib_send_wr *wr, u8 *len16) +{ + u32 plen; + int size; + + /* + * This code assumes the struct fields preceding the write isgl + * fit in one 64B WR slot. This is because the WQE is built + * directly in the dma queue, and wrapping is only handled + * by the code buildling sgls. IE the "fixed part" of the wr + * structs must all fit in 64B. The WQE build code should probably be + * redesigned to avoid this restriction, but for now just add + * the BUILD_BUG_ON() to catch if this WQE struct gets too big. + */ + BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64); + + wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey); + wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr); + wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey); + wcwr->r2 = 0; + wcwr->r3 = 0; + + /* SEND_INV SGL */ + if (wr->next->send_flags & IB_SEND_INLINE) + build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next); + else + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL); + + /* WRITE SGL */ + build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size], + wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen); + + size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) + + wr->num_sge * sizeof(struct fw_ri_sge); + wcwr->plen = cpu_to_be32(plen); + *len16 = DIV_ROUND_UP(size, 16); +} + +static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { if (wr->num_sge > 1) return -EINVAL; @@ -590,8 +682,74 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) return 0; } +static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr) +{ + bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; + struct t4_swsqe *swsqe; + union t4_wr *wqe; + u16 write_wrid; + u8 len16; + u16 idx; + + /* + * The sw_sq entries still look like a WRITE and a SEND and consume + * 2 slots. The FW WR, however, will be a single uber-WR. + */ + wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue + + qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE); + build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16); + + /* WRITE swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_RDMA_WRITE; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = write_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + write_wrid = qhp->wq.sq.pidx; + + /* just bump the sw_sq */ + qhp->wq.sq.in_use++; + if (++qhp->wq.sq.pidx == qhp->wq.sq.size) + qhp->wq.sq.pidx = 0; + + /* SEND_WITH_INV swsqe */ + swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; + swsqe->opcode = FW_RI_SEND_WITH_INV; + swsqe->idx = qhp->wq.sq.pidx; + swsqe->complete = 0; + swsqe->signaled = send_signaled; + swsqe->flushed = 0; + swsqe->wr_id = wr->next->wr_id; + if (c4iw_wr_log) { + swsqe->sge_ts = + cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]); + swsqe->host_time = ktime_get(); + } + + wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0; + wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx; + + init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR, + write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16); + t4_sq_produce(&qhp->wq, len16); + idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + + t4_ring_sq_db(&qhp->wq, idx, wqe); +} + static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, - struct ib_recv_wr *wr, u8 *len16) + const struct ib_recv_wr *wr, u8 *len16) { int ret; @@ -605,8 +763,22 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr, + u8 *len16) +{ + int ret; + + ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1), + &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL); + if (ret) + return ret; + *len16 = DIV_ROUND_UP(sizeof(wqe->recv) + + wr->num_sge * sizeof(struct fw_ri_sge), 16); + return 0; +} + static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16) { __be64 *p = (__be64 *)fr->pbl; @@ -638,8 +810,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, } static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, - bool dsgl_supported) + const struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16, bool dsgl_supported) { struct fw_ri_immd *imdp; __be64 *p; @@ -701,7 +873,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16) +static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr, + u8 *len16) { wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; @@ -721,7 +894,7 @@ static void free_qp_work(struct work_struct *work) pr_debug("qhp %p ucontext %p\n", qhp, ucontext); destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq); if (ucontext) c4iw_put_ucontext(ucontext); @@ -804,6 +977,9 @@ static int ib_to_fw_opcode(int ib_opcode) case IB_WR_RDMA_WRITE: opcode = FW_RI_RDMA_WRITE; break; + case IB_WR_RDMA_WRITE_WITH_IMM: + opcode = FW_RI_WRITE_IMMEDIATE; + break; case IB_WR_RDMA_READ: case IB_WR_RDMA_READ_WITH_INV: opcode = FW_RI_READ_REQ; @@ -820,7 +996,8 @@ static int ib_to_fw_opcode(int ib_opcode) return opcode; } -static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) +static int complete_sq_drain_wr(struct c4iw_qp *qhp, + const struct ib_send_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *schp; @@ -858,8 +1035,9 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr) return 0; } -static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int complete_sq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int ret = 0; @@ -874,7 +1052,8 @@ static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr, return ret; } -static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wr(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { struct t4_cqe cqe = {}; struct c4iw_cq *rchp; @@ -906,7 +1085,8 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) +static void complete_rq_drain_wrs(struct c4iw_qp *qhp, + const struct ib_recv_wr *wr) { while (wr) { complete_rq_drain_wr(qhp, wr); @@ -914,14 +1094,15 @@ static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr) } } -int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, + const struct ib_send_wr **bad_wr) { int err = 0; u8 len16 = 0; enum fw_wr_opcodes fw_opcode = 0; enum fw_ri_wr_flags fw_flags; struct c4iw_qp *qhp; + struct c4iw_dev *rhp; union t4_wr *wqe = NULL; u32 num_wrs; struct t4_swsqe *swsqe; @@ -929,6 +1110,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, u16 idx = 0; qhp = to_c4iw_qp(ibqp); + rhp = qhp->rhp; spin_lock_irqsave(&qhp->lock, flag); /* @@ -946,6 +1128,30 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, *bad_wr = wr; return -ENOMEM; } + + /* + * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is + * the response for small NVMEe-oF READ requests. If the chain is + * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths + * meet the requirements of the fw_ri_write_cmpl_wr work request, + * then build and post the write_cmpl WR. If any of the tests + * below are not true, then we continue on with the tradtional WRITE + * and SEND WRs. + */ + if (qhp->rhp->rdev.lldi.write_cmpl_support && + CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >= + CHELSIO_T5 && + wr && wr->next && !wr->next->next && + wr->opcode == IB_WR_RDMA_WRITE && + wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL && + wr->next->opcode == IB_WR_SEND_WITH_INV && + wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE && + wr->next->num_sge == 1 && num_wrs >= 2) { + post_write_cmpl(qhp, wr); + spin_unlock_irqrestore(&qhp->lock, flag); + return 0; + } + while (wr) { if (num_wrs == 0) { err = -ENOMEM; @@ -973,6 +1179,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->opcode = FW_RI_SEND_WITH_INV; err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16); break; + case IB_WR_RDMA_WRITE_WITH_IMM: + if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) { + err = -EINVAL; + break; + } + fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE; + /*FALLTHROUGH*/ case IB_WR_RDMA_WRITE: fw_opcode = FW_RI_RDMA_WRITE_WR; swsqe->opcode = FW_RI_RDMA_WRITE; @@ -983,8 +1196,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_RDMA_READ_WR; swsqe->opcode = FW_RI_READ_REQ; if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) { - c4iw_invalidate_mr(qhp->rhp, - wr->sg_list[0].lkey); + c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey); fw_flags = FW_RI_RDMA_READ_INVALIDATE; } else { fw_flags = 0; @@ -1000,7 +1212,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); swsqe->opcode = FW_RI_FAST_REGISTER; - if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support && !mhp->attr.state && mhp->mpl_len <= 2) { fw_opcode = FW_RI_FR_NSMR_TPTE_WR; build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), @@ -1009,7 +1221,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_FR_NSMR_WR; err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), mhp, &len16, - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + rhp->rdev.lldi.ulptx_memwrite_dsgl); if (err) break; } @@ -1022,7 +1234,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; err = build_inv_stag(wqe, wr, &len16); - c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey); + c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey); break; default: pr_warn("%s post of type=%d TBD!\n", __func__, @@ -1041,7 +1253,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, swsqe->wr_id = wr->wr_id; if (c4iw_wr_log) { swsqe->sge_ts = cxgb4_read_sge_timestamp( - qhp->rhp->rdev.lldi.ports[0]); + rhp->rdev.lldi.ports[0]); swsqe->host_time = ktime_get(); } @@ -1055,7 +1267,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, t4_sq_produce(&qhp->wq, len16); idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE); } - if (!qhp->rhp->rdev.status_page->db_off) { + if (!rhp->rdev.status_page->db_off) { t4_ring_sq_db(&qhp->wq, idx, wqe); spin_unlock_irqrestore(&qhp->lock, flag); } else { @@ -1065,8 +1277,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return err; } -int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, - struct ib_recv_wr **bad_wr) +int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) { int err = 0; struct c4iw_qp *qhp; @@ -1145,6 +1357,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, return err; } +static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe, + u64 wr_id, u8 len16) +{ + struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx]; + + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n", + __func__, srq->cidx, srq->pidx, srq->wq_pidx, + srq->in_use, srq->ooo_count, + (unsigned long long)wr_id, srq->pending_cidx, + srq->pending_pidx, srq->pending_in_use); + pwr->wr_id = wr_id; + pwr->len16 = len16; + memcpy(&pwr->wqe, wqe, len16 * 16); + t4_srq_produce_pending_wr(srq); +} + +int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + union t4_recv_wr *wqe, lwqe; + struct c4iw_srq *srq; + unsigned long flag; + u8 len16 = 0; + u16 idx = 0; + int err = 0; + u32 num_wrs; + + srq = to_c4iw_srq(ibsrq); + spin_lock_irqsave(&srq->lock, flag); + num_wrs = t4_srq_avail(&srq->wq); + if (num_wrs == 0) { + spin_unlock_irqrestore(&srq->lock, flag); + return -ENOMEM; + } + while (wr) { + if (wr->num_sge > T4_MAX_RECV_SGE) { + err = -EINVAL; + *bad_wr = wr; + break; + } + wqe = &lwqe; + if (num_wrs) + err = build_srq_recv(wqe, wr, &len16); + else + err = -ENOMEM; + if (err) { + *bad_wr = wr; + break; + } + + wqe->recv.opcode = FW_RI_RECV_WR; + wqe->recv.r1 = 0; + wqe->recv.wrid = srq->wq.pidx; + wqe->recv.r2[0] = 0; + wqe->recv.r2[1] = 0; + wqe->recv.r2[2] = 0; + wqe->recv.len16 = len16; + + if (srq->wq.ooo_count || + srq->wq.pending_in_use || + srq->wq.sw_rq[srq->wq.pidx].valid) { + defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16); + } else { + srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id; + srq->wq.sw_rq[srq->wq.pidx].valid = 1; + c4iw_copy_wr_to_srq(&srq->wq, wqe, len16); + pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n", + __func__, srq->wq.cidx, + srq->wq.pidx, srq->wq.wq_pidx, + srq->wq.in_use, + (unsigned long long)wr->wr_id); + t4_srq_produce(&srq->wq, len16); + idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + } + wr = wr->next; + num_wrs--; + } + if (idx) + t4_ring_srq_db(&srq->wq, idx, len16, wqe); + spin_unlock_irqrestore(&srq->lock, flag); + return err; +} + static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type, u8 *ecode) { @@ -1321,7 +1616,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int rq_flushed, sq_flushed; + int rq_flushed = 0, sq_flushed; unsigned long flag; pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); @@ -1340,11 +1635,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, return; } qhp->wq.flushed = 1; - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); c4iw_flush_hw_cq(rchp, qhp); - c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + if (!qhp->srq) { + c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + } if (schp != rchp) c4iw_flush_hw_cq(schp, qhp); @@ -1388,7 +1685,7 @@ static void flush_qp(struct c4iw_qp *qhp) schp = to_c4iw_cq(qhp->ibqp.send_cq); if (qhp->ibqp.uobject) { - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1517,16 +1814,21 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd); wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid); wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid); - wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + if (qhp->srq) { + wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ | + qhp->srq->idx); + } else { + wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid); + wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); + wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - + rhp->rdev.lldi.vr->rq.start); + } wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq); wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq); wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord); wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird); wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq); wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq); - wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size); - wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr - - rhp->rdev.lldi.vr->rq.start); if (qhp->attr.mpa_attr.initiator) build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init); @@ -1643,7 +1945,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, case C4IW_QP_STATE_RTS: switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1656,7 +1958,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, goto err; break; case C4IW_QP_STATE_TERMINATE: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; @@ -1673,7 +1975,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, } break; case C4IW_QP_STATE_ERROR: - t4_set_wq_in_error(&qhp->wq); + t4_set_wq_in_error(&qhp->wq, 0); set_state(qhp, C4IW_QP_STATE_ERROR); if (!internal) { abort = 1; @@ -1819,7 +2121,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - unsigned int sqsize, rqsize; + unsigned int sqsize, rqsize = 0; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm; @@ -1840,11 +2142,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE) return ERR_PTR(-EINVAL); - if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) - return ERR_PTR(-E2BIG); - rqsize = attrs->cap.max_recv_wr + 1; - if (rqsize < 8) - rqsize = 8; + if (!attrs->srq) { + if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + rqsize = attrs->cap.max_recv_wr + 1; + if (rqsize < 8) + rqsize = 8; + } if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size) return ERR_PTR(-E2BIG); @@ -1869,19 +2173,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, (sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64); qhp->wq.sq.flush_cidx = -1; - qhp->wq.rq.size = rqsize; - qhp->wq.rq.memsize = - (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * - sizeof(*qhp->wq.rq.queue); + if (!attrs->srq) { + qhp->wq.rq.size = rqsize; + qhp->wq.rq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*qhp->wq.rq.queue); + } if (ucontext) { qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE); - qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE); + if (!attrs->srq) + qhp->wq.rq.memsize = + roundup(qhp->wq.rq.memsize, PAGE_SIZE); } ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, - qhp->wr_waitp); + qhp->wr_waitp, !attrs->srq); if (ret) goto err_free_wr_wait; @@ -1894,10 +2202,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid; qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid; qhp->attr.sq_num_entries = attrs->cap.max_send_wr; - qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; qhp->attr.sq_max_sges = attrs->cap.max_send_sge; qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge; - qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + if (!attrs->srq) { + qhp->attr.rq_num_entries = attrs->cap.max_recv_wr; + qhp->attr.rq_max_sges = attrs->cap.max_recv_sge; + } qhp->attr.state = C4IW_QP_STATE_IDLE; qhp->attr.next_state = C4IW_QP_STATE_IDLE; qhp->attr.enable_rdma_read = 1; @@ -1922,21 +2232,27 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, ret = -ENOMEM; goto err_remove_handle; } - rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); - if (!rq_key_mm) { - ret = -ENOMEM; - goto err_free_sq_key; + if (!attrs->srq) { + rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL); + if (!rq_key_mm) { + ret = -ENOMEM; + goto err_free_sq_key; + } } sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL); if (!sq_db_key_mm) { ret = -ENOMEM; goto err_free_rq_key; } - rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); - if (!rq_db_key_mm) { - ret = -ENOMEM; - goto err_free_sq_db_key; + if (!attrs->srq) { + rq_db_key_mm = + kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL); + if (!rq_db_key_mm) { + ret = -ENOMEM; + goto err_free_sq_db_key; + } } + memset(&uresp, 0, sizeof(uresp)); if (t4_sq_onchip(&qhp->wq.sq)) { ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm), GFP_KERNEL); @@ -1945,30 +2261,35 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, goto err_free_rq_db_key; } uresp.flags = C4IW_QPF_ONCHIP; - } else - uresp.flags = 0; + } + if (rhp->rdev.lldi.write_w_imm_support) + uresp.flags |= C4IW_QPF_WRITE_W_IMM; uresp.qid_mask = rhp->rdev.qpmask; uresp.sqid = qhp->wq.sq.qid; uresp.sq_size = qhp->wq.sq.size; uresp.sq_memsize = qhp->wq.sq.memsize; - uresp.rqid = qhp->wq.rq.qid; - uresp.rq_size = qhp->wq.rq.size; - uresp.rq_memsize = qhp->wq.rq.memsize; + if (!attrs->srq) { + uresp.rqid = qhp->wq.rq.qid; + uresp.rq_size = qhp->wq.rq.size; + uresp.rq_memsize = qhp->wq.rq.memsize; + } spin_lock(&ucontext->mmap_lock); if (ma_sync_key_mm) { uresp.ma_sync_key = ucontext->key; ucontext->key += PAGE_SIZE; - } else { - uresp.ma_sync_key = 0; } uresp.sq_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } uresp.sq_db_gts_key = ucontext->key; ucontext->key += PAGE_SIZE; - uresp.rq_db_gts_key = ucontext->key; - ucontext->key += PAGE_SIZE; + if (!attrs->srq) { + uresp.rq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + } spin_unlock(&ucontext->mmap_lock); ret = ib_copy_to_udata(udata, &uresp, sizeof uresp); if (ret) @@ -1977,18 +2298,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, sq_key_mm->addr = qhp->wq.sq.phys_addr; sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize); insert_mmap(ucontext, sq_key_mm); - rq_key_mm->key = uresp.rq_key; - rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); - rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); - insert_mmap(ucontext, rq_key_mm); + if (!attrs->srq) { + rq_key_mm->key = uresp.rq_key; + rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue); + rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize); + insert_mmap(ucontext, rq_key_mm); + } sq_db_key_mm->key = uresp.sq_db_gts_key; sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa; sq_db_key_mm->len = PAGE_SIZE; insert_mmap(ucontext, sq_db_key_mm); - rq_db_key_mm->key = uresp.rq_db_gts_key; - rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa; - rq_db_key_mm->len = PAGE_SIZE; - insert_mmap(ucontext, rq_db_key_mm); + if (!attrs->srq) { + rq_db_key_mm->key = uresp.rq_db_gts_key; + rq_db_key_mm->addr = + (u64)(unsigned long)qhp->wq.rq.bar2_pa; + rq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, rq_db_key_mm); + } if (ma_sync_key_mm) { ma_sync_key_mm->key = uresp.ma_sync_key; ma_sync_key_mm->addr = @@ -2001,7 +2327,19 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, c4iw_get_ucontext(ucontext); qhp->ucontext = ucontext; } + if (!attrs->srq) { + qhp->wq.qp_errp = + &qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err; + } else { + qhp->wq.qp_errp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err; + qhp->wq.srqidxp = + &qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx; + } + qhp->ibqp.qp_num = qhp->wq.sq.qid; + if (attrs->srq) + qhp->srq = to_c4iw_srq(attrs->srq); INIT_LIST_HEAD(&qhp->db_fc_entry); pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n", qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize, @@ -2011,18 +2349,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, err_free_ma_sync_key: kfree(ma_sync_key_mm); err_free_rq_db_key: - kfree(rq_db_key_mm); + if (!attrs->srq) + kfree(rq_db_key_mm); err_free_sq_db_key: kfree(sq_db_key_mm); err_free_rq_key: - kfree(rq_key_mm); + if (!attrs->srq) + kfree(rq_key_mm); err_free_sq_key: kfree(sq_key_mm); err_remove_handle: remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid); err_destroy_qp: destroy_qp(&rhp->rdev, &qhp->wq, - ucontext ? &ucontext->uctx : &rhp->rdev.uctx); + ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq); err_free_wr_wait: c4iw_put_wr_wait(qhp->wr_waitp); err_free_qhp: @@ -2088,6 +2428,45 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn) return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); } +void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq) +{ + struct ib_event event = {}; + + event.device = &srq->rhp->ibdev; + event.element.srq = &srq->ibsrq; + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + ib_dispatch_event(&event); +} + +int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct c4iw_srq *srq = to_c4iw_srq(ib_srq); + int ret = 0; + + /* + * XXX 0 mask == a SW interrupt for srq_limit reached... + */ + if (udata && !srq_attr_mask) { + c4iw_dispatch_srq_limit_reached_event(srq); + goto out; + } + + /* no support for this yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) { + ret = -EINVAL; + goto out; + } + + if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) { + srq->armed = true; + srq->srq_limit = attr->srq_limit; + } +out: + return ret; +} + int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { @@ -2104,3 +2483,359 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; } + +static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + struct sk_buff *skb = srq->destroy_skb; + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + int wr_len; + + wr_len = sizeof(*res_wr) + sizeof(*res); + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_RESET; + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.eqid = cpu_to_be32(wq->qid); + + c4iw_init_wr_wait(wr_waitp); + c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__); + + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); + kfree(wq->sw_rq); + c4iw_put_qpid(rdev, wq->qid, uctx); +} + +static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, + struct c4iw_wr_wait *wr_waitp) +{ + struct c4iw_rdev *rdev = &srq->rhp->rdev; + int user = (uctx != &rdev->uctx); + struct t4_srq *wq = &srq->wq; + struct fw_ri_res_wr *res_wr; + struct fw_ri_res *res; + struct sk_buff *skb; + int wr_len; + int eqsize; + int ret = -ENOMEM; + + wq->qid = c4iw_get_qpid(rdev, uctx); + if (!wq->qid) + goto err; + + if (!user) { + wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq), + GFP_KERNEL); + if (!wq->sw_rq) + goto err_put_qpid; + wq->pending_wrs = kcalloc(srq->wq.size, + sizeof(*srq->wq.pending_wrs), + GFP_KERNEL); + if (!wq->pending_wrs) + goto err_free_sw_rq; + } + + wq->rqt_size = wq->size; + wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size); + if (!wq->rqt_hwaddr) + goto err_free_pending_wrs; + wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> + T4_RQT_ENTRY_SHIFT; + + wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->memsize, &wq->dma_addr, + GFP_KERNEL); + if (!wq->queue) + goto err_free_rqtpool; + + memset(wq->queue, 0, wq->memsize); + pci_unmap_addr_set(wq, mapping, wq->dma_addr); + + wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS, + &wq->bar2_qid, + user ? &wq->bar2_pa : NULL); + + /* + * User mode must have bar2 access. + */ + + if (user && !wq->bar2_va) { + pr_warn(MOD "%s: srqid %u not in BAR2 range.\n", + pci_name(rdev->lldi.pdev), wq->qid); + ret = -EINVAL; + goto err_free_queue; + } + + /* build fw_ri_res_wr */ + wr_len = sizeof(*res_wr) + sizeof(*res); + + skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL); + if (!skb) + goto err_free_queue; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); + + res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len); + memset(res_wr, 0, wr_len); + res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) | + FW_RI_RES_WR_NRES_V(1) | + FW_WR_COMPL_F); + res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16)); + res_wr->cookie = (uintptr_t)wr_waitp; + res = res_wr->res; + res->u.srq.restype = FW_RI_RES_TYPE_SRQ; + res->u.srq.op = FW_RI_RES_OP_WRITE; + + /* + * eqsize is the number of 64B entries plus the status page size. + */ + eqsize = wq->size * T4_RQ_NUM_SLOTS + + rdev->hw_queue.t4_eq_status_entries; + res->u.srq.eqid = cpu_to_be32(wq->qid); + res->u.srq.fetchszm_to_iqid = + /* no host cidx updates */ + cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) | + FW_RI_RES_WR_CPRIO_V(0) | /* don't keep in chip cache */ + FW_RI_RES_WR_PCIECHN_V(0) | /* set by uP at ri_init time */ + FW_RI_RES_WR_FETCHRO_V(0)); /* relaxed_ordering */ + res->u.srq.dcaen_to_eqsize = + cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) | + FW_RI_RES_WR_DCACPU_V(0) | + FW_RI_RES_WR_FBMIN_V(2) | + FW_RI_RES_WR_FBMAX_V(3) | + FW_RI_RES_WR_CIDXFTHRESHO_V(0) | + FW_RI_RES_WR_CIDXFTHRESH_V(0) | + FW_RI_RES_WR_EQSIZE_V(eqsize)); + res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr); + res->u.srq.srqid = cpu_to_be32(srq->idx); + res->u.srq.pdid = cpu_to_be32(srq->pdid); + res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size); + res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr - + rdev->lldi.vr->rq.start); + + c4iw_init_wr_wait(wr_waitp); + + ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__); + if (ret) + goto err_free_queue; + + pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n" + " bar2_addr %p rqt addr 0x%x size %d\n", + __func__, srq->idx, wq->qid, srq->pdid, wq->queue, + (u64)virt_to_phys(wq->queue), wq->bar2_va, + wq->rqt_hwaddr, wq->rqt_size); + + return 0; +err_free_queue: + dma_free_coherent(&rdev->lldi.pdev->dev, + wq->memsize, wq->queue, + pci_unmap_addr(wq, mapping)); +err_free_rqtpool: + c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size); +err_free_pending_wrs: + if (!user) + kfree(wq->pending_wrs); +err_free_sw_rq: + if (!user) + kfree(wq->sw_rq); +err_put_qpid: + c4iw_put_qpid(rdev, wq->qid, uctx); +err: + return ret; +} + +void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16) +{ + u64 *src, *dst; + + src = (u64 *)wqe; + dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE); + while (len16) { + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + *dst++ = *src++; + if (dst >= (u64 *)&srq->queue[srq->size]) + dst = (u64 *)srq->queue; + len16--; + } +} + +struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, + struct ib_udata *udata) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_pd *php; + struct c4iw_create_srq_resp uresp; + struct c4iw_ucontext *ucontext; + struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm; + int rqsize; + int ret; + int wr_len; + + pr_debug("%s ib_pd %p\n", __func__, pd); + + php = to_c4iw_pd(pd); + rhp = php->rhp; + + if (!rhp->rdev.lldi.vr->srq.size) + return ERR_PTR(-EINVAL); + if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size) + return ERR_PTR(-E2BIG); + if (attrs->attr.max_sge > T4_MAX_RECV_SGE) + return ERR_PTR(-E2BIG); + + /* + * SRQ RQT and RQ must be a power of 2 and at least 16 deep. + */ + rqsize = attrs->attr.max_wr + 1; + rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); + + ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); + if (!srq->wr_waitp) { + ret = -ENOMEM; + goto err_free_srq; + } + + srq->idx = c4iw_alloc_srq_idx(&rhp->rdev); + if (srq->idx < 0) { + ret = -ENOMEM; + goto err_free_wr_wait; + } + + wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res); + srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL); + if (!srq->destroy_skb) { + ret = -ENOMEM; + goto err_free_srq_idx; + } + + srq->rhp = rhp; + srq->pdid = php->pdid; + + srq->wq.size = rqsize; + srq->wq.memsize = + (rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) * + sizeof(*srq->wq.queue); + if (ucontext) + srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE); + + ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx : + &rhp->rdev.uctx, srq->wr_waitp); + if (ret) + goto err_free_skb; + attrs->attr.max_wr = rqsize - 1; + + if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) + srq->flags = T4_SRQ_LIMIT_SUPPORT; + + ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid); + if (ret) + goto err_free_queue; + + if (udata) { + srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); + if (!srq_key_mm) { + ret = -ENOMEM; + goto err_remove_handle; + } + srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); + if (!srq_db_key_mm) { + ret = -ENOMEM; + goto err_free_srq_key_mm; + } + memset(&uresp, 0, sizeof(uresp)); + uresp.flags = srq->flags; + uresp.qid_mask = rhp->rdev.qpmask; + uresp.srqid = srq->wq.qid; + uresp.srq_size = srq->wq.size; + uresp.srq_memsize = srq->wq.memsize; + uresp.rqt_abs_idx = srq->wq.rqt_abs_idx; + spin_lock(&ucontext->mmap_lock); + uresp.srq_key = ucontext->key; + ucontext->key += PAGE_SIZE; + uresp.srq_db_gts_key = ucontext->key; + ucontext->key += PAGE_SIZE; + spin_unlock(&ucontext->mmap_lock); + ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (ret) + goto err_free_srq_db_key_mm; + srq_key_mm->key = uresp.srq_key; + srq_key_mm->addr = virt_to_phys(srq->wq.queue); + srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize); + insert_mmap(ucontext, srq_key_mm); + srq_db_key_mm->key = uresp.srq_db_gts_key; + srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa; + srq_db_key_mm->len = PAGE_SIZE; + insert_mmap(ucontext, srq_db_key_mm); + } + + pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n", + __func__, srq->wq.qid, srq->idx, srq->wq.size, + (unsigned long)srq->wq.memsize, attrs->attr.max_wr); + + spin_lock_init(&srq->lock); + return &srq->ibsrq; +err_free_srq_db_key_mm: + kfree(srq_db_key_mm); +err_free_srq_key_mm: + kfree(srq_key_mm); +err_remove_handle: + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); +err_free_queue: + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); +err_free_skb: + if (srq->destroy_skb) + kfree_skb(srq->destroy_skb); +err_free_srq_idx: + c4iw_free_srq_idx(&rhp->rdev, srq->idx); +err_free_wr_wait: + c4iw_put_wr_wait(srq->wr_waitp); +err_free_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int c4iw_destroy_srq(struct ib_srq *ibsrq) +{ + struct c4iw_dev *rhp; + struct c4iw_srq *srq; + struct c4iw_ucontext *ucontext; + + srq = to_c4iw_srq(ibsrq); + rhp = srq->rhp; + + pr_debug("%s id %d\n", __func__, srq->wq.qid); + + remove_handle(rhp, &rhp->qpidr, srq->wq.qid); + ucontext = ibsrq->uobject ? + to_c4iw_ucontext(ibsrq->uobject->context) : NULL; + free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, + srq->wr_waitp); + c4iw_free_srq_idx(&rhp->rdev, srq->idx); + c4iw_put_wr_wait(srq->wr_waitp); + kfree(srq); + return 0; +} diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c index 0ef25ae05e6f..57ed26b3cc21 100644 --- a/drivers/infiniband/hw/cxgb4/resource.c +++ b/drivers/infiniband/hw/cxgb4/resource.c @@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev) } /* nr_* must be power of 2 */ -int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) +int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, + u32 nr_pdid, u32 nr_srqt) { int err = 0; err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, @@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) nr_pdid, 1, 0); if (err) goto pdid_err; + if (!nr_srqt) + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + 1, 1, 0); + else + err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0, + nr_srqt, 0, 0); + if (err) + goto srq_err; return 0; + srq_err: + c4iw_id_table_free(&rdev->resource.pdid_table); pdid_err: c4iw_id_table_free(&rdev->resource.qid_table); qid_err: @@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) int c4iw_rqtpool_create(struct c4iw_rdev *rdev) { unsigned rqt_start, rqt_chunk, rqt_top; + int skip = 0; rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1); if (!rdev->rqt_pool) return -ENOMEM; - rqt_start = rdev->lldi.vr->rq.start; - rqt_chunk = rdev->lldi.vr->rq.size; + /* + * If SRQs are supported, then never use the first RQE from + * the RQT region. This is because HW uses RQT index 0 as NULL. + */ + if (rdev->lldi.vr->srq.size) + skip = T4_RQT_ENTRY_SIZE; + + rqt_start = rdev->lldi.vr->rq.start + skip; + rqt_chunk = rdev->lldi.vr->rq.size - skip; rqt_top = rqt_start + rqt_chunk; while (rqt_start < rqt_top) { @@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev) kref_put(&rdev->rqt_kref, destroy_rqtpool); } +int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev) +{ + int idx; + + idx = c4iw_id_alloc(&rdev->resource.srq_table); + mutex_lock(&rdev->stats.lock); + if (idx == -1) { + rdev->stats.srqt.fail++; + mutex_unlock(&rdev->stats.lock); + return -ENOMEM; + } + rdev->stats.srqt.cur++; + if (rdev->stats.srqt.cur > rdev->stats.srqt.max) + rdev->stats.srqt.max = rdev->stats.srqt.cur; + mutex_unlock(&rdev->stats.lock); + return idx; +} + +void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx) +{ + c4iw_id_free(&rdev->resource.srq_table, idx); + mutex_lock(&rdev->stats.lock); + rdev->stats.srqt.cur--; + mutex_unlock(&rdev->stats.lock); +} + /* * On-Chip QP Memory. */ diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 8369c7c8de83..e42021fd6fd6 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -52,12 +52,16 @@ struct t4_status_page { __be16 pidx; u8 qp_err; /* flit 1 - sw owns */ u8 db_off; - u8 pad; + u8 pad[2]; u16 host_wq_pidx; u16 host_cidx; u16 host_pidx; + u16 pad2; + u32 srqidx; }; +#define T4_RQT_ENTRY_SHIFT 6 +#define T4_RQT_ENTRY_SIZE BIT(T4_RQT_ENTRY_SHIFT) #define T4_EQ_ENTRY_SIZE 64 #define T4_SQ_NUM_SLOTS 5 @@ -87,6 +91,9 @@ static inline int t4_max_fr_depth(int use_dsgl) #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS) #define T4_MAX_RECV_SGE 4 +#define T4_WRITE_CMPL_MAX_SGL 4 +#define T4_WRITE_CMPL_MAX_CQE 16 + union t4_wr { struct fw_ri_res_wr res; struct fw_ri_wr ri; @@ -97,6 +104,7 @@ union t4_wr { struct fw_ri_fr_nsmr_wr fr; struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; + struct fw_ri_rdma_write_cmpl_wr write_cmpl; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; }; @@ -179,9 +187,32 @@ struct t4_cqe { __be32 wrid_hi; __be32 wrid_low; } gen; + struct { + __be32 stag; + __be32 msn; + __be32 reserved; + __be32 abs_rqe_idx; + } srcqe; + struct { + __be32 mo; + __be32 msn; + /* + * Use union for immediate data to be consistent with + * stack's 32 bit data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; + } imm_data_rcqe; + u64 drain_cookie; + __be64 flits[3]; } u; - __be64 reserved; + __be64 reserved[3]; __be64 bits_type_ts; }; @@ -237,6 +268,9 @@ struct t4_cqe { /* used for RQ completion processing */ #define CQE_WRID_STAG(x) (be32_to_cpu((x)->u.rcqe.stag)) #define CQE_WRID_MSN(x) (be32_to_cpu((x)->u.rcqe.msn)) +#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx)) +#define CQE_IMM_DATA(x)( \ + (x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32) /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) @@ -320,6 +354,7 @@ struct t4_swrqe { u64 wr_id; ktime_t host_time; u64 sge_ts; + int valid; }; struct t4_rq { @@ -349,8 +384,98 @@ struct t4_wq { void __iomem *db; struct c4iw_rdev *rdev; int flushed; + u8 *qp_errp; + u32 *srqidxp; +}; + +struct t4_srq_pending_wr { + u64 wr_id; + union t4_recv_wr wqe; + u8 len16; +}; + +struct t4_srq { + union t4_recv_wr *queue; + dma_addr_t dma_addr; + DECLARE_PCI_UNMAP_ADDR(mapping); + struct t4_swrqe *sw_rq; + void __iomem *bar2_va; + u64 bar2_pa; + size_t memsize; + u32 bar2_qid; + u32 qid; + u32 msn; + u32 rqt_hwaddr; + u32 rqt_abs_idx; + u16 rqt_size; + u16 size; + u16 cidx; + u16 pidx; + u16 wq_pidx; + u16 wq_pidx_inc; + u16 in_use; + struct t4_srq_pending_wr *pending_wrs; + u16 pending_cidx; + u16 pending_pidx; + u16 pending_in_use; + u16 ooo_count; }; +static inline u32 t4_srq_avail(struct t4_srq *srq) +{ + return srq->size - 1 - srq->in_use; +} + +static inline void t4_srq_produce(struct t4_srq *srq, u8 len16) +{ + srq->in_use++; + if (++srq->pidx == srq->size) + srq->pidx = 0; + srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE); + if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS) + srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS; + srq->queue[srq->size].status.host_pidx = srq->pidx; +} + +static inline void t4_srq_produce_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use++; + srq->in_use++; + if (++srq->pending_pidx == srq->size) + srq->pending_pidx = 0; +} + +static inline void t4_srq_consume_pending_wr(struct t4_srq *srq) +{ + srq->pending_in_use--; + srq->in_use--; + if (++srq->pending_cidx == srq->size) + srq->pending_cidx = 0; +} + +static inline void t4_srq_produce_ooo(struct t4_srq *srq) +{ + srq->in_use--; + srq->ooo_count++; +} + +static inline void t4_srq_consume_ooo(struct t4_srq *srq) +{ + srq->cidx++; + if (srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; + srq->ooo_count--; +} + +static inline void t4_srq_consume(struct t4_srq *srq) +{ + srq->in_use--; + if (++srq->cidx == srq->size) + srq->cidx = 0; + srq->queue[srq->size].status.host_cidx = srq->cidx; +} + static inline int t4_rqes_posted(struct t4_wq *wq) { return wq->rq.in_use; @@ -384,7 +509,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16) static inline void t4_rq_consume(struct t4_wq *wq) { wq->rq.in_use--; - wq->rq.msn++; if (++wq->rq.cidx == wq->rq.size) wq->rq.cidx = 0; } @@ -464,6 +588,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src) } } +static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16, + union t4_recv_wr *wqe) +{ + /* Flush host queue memory writes. */ + wmb(); + if (inc == 1 && srq->bar2_qid == 0 && wqe) { + pr_debug("%s : WC srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe); + } else { + pr_debug("%s: DB srq->pidx = %d; len16=%d\n", + __func__, srq->pidx, len16); + writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid), + srq->bar2_va + SGE_UDB_KDOORBELL); + } + /* Flush user doorbell area writes. */ + wmb(); +} + static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe) { @@ -515,12 +658,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, static inline int t4_wq_in_error(struct t4_wq *wq) { - return wq->rq.queue[wq->rq.size].status.qp_err; + return *wq->qp_errp; } -static inline void t4_set_wq_in_error(struct t4_wq *wq) +static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx) { - wq->rq.queue[wq->rq.size].status.qp_err = 1; + if (srqidx) + *wq->srqidxp = srqidx; + *wq->qp_errp = 1; } static inline void t4_disable_wq_db(struct t4_wq *wq) @@ -565,6 +710,7 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + u8 *qp_errp; unsigned long flags; }; @@ -698,18 +844,18 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe) static inline int t4_cq_in_error(struct t4_cq *cq) { - return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err; + return *cq->qp_errp; } static inline void t4_set_cq_in_error(struct t4_cq *cq) { - ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1; + *cq->qp_errp = 1; } #endif struct t4_dev_status_page { u8 db_off; - u8 pad1; + u8 write_cmpl_supported; u16 pad2; u32 pad3; u64 qp_start; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 58c531db4f4a..cbdb300a4794 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -50,7 +50,8 @@ enum fw_ri_wr_opcode { FW_RI_BYPASS = 0xd, FW_RI_RECEIVE = 0xe, - FW_RI_SGE_EC_CR_RETURN = 0xf + FW_RI_SGE_EC_CR_RETURN = 0xf, + FW_RI_WRITE_IMMEDIATE = FW_RI_RDMA_INIT }; enum fw_ri_wr_flags { @@ -59,7 +60,8 @@ enum fw_ri_wr_flags { FW_RI_SOLICITED_EVENT_FLAG = 0x04, FW_RI_READ_FENCE_FLAG = 0x08, FW_RI_LOCAL_FENCE_FLAG = 0x10, - FW_RI_RDMA_READ_INVALIDATE = 0x20 + FW_RI_RDMA_READ_INVALIDATE = 0x20, + FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40 }; enum fw_ri_mpa_attrs { @@ -263,6 +265,7 @@ enum fw_ri_res_type { FW_RI_RES_TYPE_SQ, FW_RI_RES_TYPE_RQ, FW_RI_RES_TYPE_CQ, + FW_RI_RES_TYPE_SRQ, }; enum fw_ri_res_op { @@ -296,6 +299,20 @@ struct fw_ri_res { __be32 r6_lo; __be64 r7; } cq; + struct fw_ri_res_srq { + __u8 restype; + __u8 op; + __be16 r3; + __be32 eqid; + __be32 r4[2]; + __be32 fetchszm_to_iqid; + __be32 dcaen_to_eqsize; + __be64 eqaddr; + __be32 srqid; + __be32 pdid; + __be32 hwsrqsize; + __be32 hwsrqaddr; + } srq; } u; }; @@ -531,7 +548,17 @@ struct fw_ri_rdma_write_wr { __u16 wrid; __u8 r1[3]; __u8 len16; - __be64 r2; + /* + * Use union for immediate data to be consistent with stack's 32 bit + * data and iWARP spec's 64 bit data. + */ + union { + struct { + __be32 imm_data32; + u32 reserved; + } ib_imm_data; + __be64 imm_data64; + } iw_imm_data; __be32 plen; __be32 stag_sink; __be64 to_sink; @@ -568,6 +595,37 @@ struct fw_ri_send_wr { #define FW_RI_SEND_WR_SENDOP_G(x) \ (((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M) +struct fw_ri_rdma_write_cmpl_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u8 r2; + __u8 flags_send; + __u16 wrid_send; + __be32 stag_inv; + __be32 plen; + __be32 stag_sink; + __be64 to_sink; + union fw_ri_cmpl { + struct fw_ri_immd_cmpl { + __u8 op; + __u8 r1[6]; + __u8 immdlen; + __u8 data[16]; + } immd_src; + struct fw_ri_isgl isgl_src; + } u_cmpl; + __be64 r3; +#ifndef C99_NOT_SUPPORTED + union fw_ri_write { + struct fw_ri_immd immd_src[0]; + struct fw_ri_isgl isgl_src[0]; + } u; +#endif +}; + struct fw_ri_rdma_read_wr { __u8 opcode; __u8 flags; @@ -707,6 +765,10 @@ enum fw_ri_init_p2ptype { FW_RI_INIT_P2PTYPE_DISABLED = 0xf, }; +enum fw_ri_init_rqeqid_srq { + FW_RI_INIT_RQEQID_SRQ = 1 << 31, +}; + struct fw_ri_wr { __be32 op_compl; __be32 flowid_len16; |