diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 03:57:19 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 03:57:19 +0400 |
commit | 877f075aac900288ce2e6a64075cceff09210a7e (patch) | |
tree | 2bcf23e903403a2bfa3c6fac14d1334614e4d0f1 /drivers/infiniband/hw | |
parent | 3cf59142bcd391680beb6932838fe09a73947705 (diff) | |
parent | f7eaa7ed8fd46542275cf249cd934a366f6556bb (diff) | |
download | linux-877f075aac900288ce2e6a64075cceff09210a7e.tar.xz |
Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband
Pull infiniband updates from Roland Dreier:
"Main batch of InfiniBand/RDMA changes for 3.15:
- The biggest change is core API extensions and mlx5 low-level driver
support for handling DIF/DIX-style protection information, and the
addition of PI support to the iSER initiator. Target support will
be arriving shortly through the SCSI target tree.
- A nice simplification to the "umem" memory pinning library now that
we have chained sg lists. Kudos to Yishai Hadas for realizing our
code didn't have to be so crazy.
- Another nice simplification to the sg wrappers used by qib, ipath
and ehca to handle their mapping of memory to adapter.
- The usual batch of fixes to bugs found by static checkers etc.
from intrepid people like Dan Carpenter and Yann Droneaud.
- A large batch of cxgb4, ocrdma, qib driver updates"
* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (102 commits)
RDMA/ocrdma: Unregister inet notifier when unloading ocrdma
RDMA/ocrdma: Fix warnings about pointer <-> integer casts
RDMA/ocrdma: Code clean-up
RDMA/ocrdma: Display FW version
RDMA/ocrdma: Query controller information
RDMA/ocrdma: Support non-embedded mailbox commands
RDMA/ocrdma: Handle CQ overrun error
RDMA/ocrdma: Display proper value for max_mw
RDMA/ocrdma: Use non-zero tag in SRQ posting
RDMA/ocrdma: Memory leak fix in ocrdma_dereg_mr()
RDMA/ocrdma: Increment abi version count
RDMA/ocrdma: Update version string
be2net: Add abi version between be2net and ocrdma
RDMA/ocrdma: ABI versioning between ocrdma and be2net
RDMA/ocrdma: Allow DPP QP creation
RDMA/ocrdma: Read ASIC_ID register to select asic_gen
RDMA/ocrdma: SQ and RQ doorbell offset clean up
RDMA/ocrdma: EQ full catastrophe avoidance
RDMA/cxgb4: Disable DSGL use by default
RDMA/cxgb4: rx_data() needs to hold the ep mutex
...
Diffstat (limited to 'drivers/infiniband/hw')
60 files changed, 3166 insertions, 1009 deletions
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 07eb3a8067d8..8af33cf1fc4e 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -431,9 +431,9 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 *pages; u64 kva = 0; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c2_pd *c2pd = to_c2pd(pd); struct c2_mr *c2mr; @@ -452,10 +452,7 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(c2mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) - n += chunk->nents; + n = c2mr->umem->nmap; pages = kmalloc(n * sizeof(u64), GFP_KERNEL); if (!pages) { @@ -464,14 +461,12 @@ static struct ib_mr *c2_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } i = 0; - list_for_each_entry(chunk, &c2mr->umem->chunk_list, list) { - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = - sg_dma_address(&chunk->page_list[j]) + - (c2mr->umem->page_size * k); - } + for_each_sg(c2mr->umem->sg_head.sgl, sg, c2mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = + sg_dma_address(sg) + + (c2mr->umem->page_size * k); } } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index d2283837d451..811b24a539c0 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -618,14 +618,13 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; struct iwch_dev *rhp; struct iwch_pd *php; struct iwch_mr *mhp; struct iwch_reg_user_mr_resp uresp; - + struct scatterlist *sg; PDBG("%s ib_pd %p\n", __func__, pd); php = to_iwch_pd(pd); @@ -645,9 +644,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; + n = mhp->umem->nmap; err = iwch_alloc_pbl(mhp, n); if (err) @@ -661,12 +658,10 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + + pages[i++] = cpu_to_be64(sg_dma_address(sg) + mhp->umem->page_size * k); if (i == PAGE_SIZE / sizeof *pages) { err = iwch_write_pbl(mhp, pages, i, n); @@ -676,7 +671,7 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = 0; } } - } + } if (i) err = iwch_write_pbl(mhp, pages, i, n); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 7e98a58aacfd..02436d5d0dab 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -98,9 +98,9 @@ int c4iw_debug; module_param(c4iw_debug, int, 0644); MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)"); -static int peer2peer; +static int peer2peer = 1; module_param(peer2peer, int, 0644); -MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)"); +MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)"); static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; module_param(p2p_type, int, 0644); @@ -400,7 +400,8 @@ static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, n = dst_neigh_lookup(&rt->dst, &peer_ip); if (!n) return NULL; - if (!our_interface(dev, n->dev)) { + if (!our_interface(dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { dst_release(&rt->dst); return NULL; } @@ -759,8 +760,9 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, ep->mpa_skb = skb; c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); start_ep_timer(ep); - state_set(&ep->com, MPA_REQ_SENT); + __state_set(&ep->com, MPA_REQ_SENT); ep->mpa_attr.initiator = 1; + ep->snd_seq += mpalen; return; } @@ -840,6 +842,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) t4_set_arp_err_handler(skb, NULL, arp_failure_discard); BUG_ON(ep->mpa_skb); ep->mpa_skb = skb; + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -923,7 +926,8 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) skb_get(skb); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); ep->mpa_skb = skb; - state_set(&ep->com, MPA_REP_SENT); + __state_set(&ep->com, MPA_REP_SENT); + ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } @@ -940,6 +944,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid, be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn)); + mutex_lock(&ep->com.mutex); dst_confirm(ep->dst); /* setup the hwtid for this connection */ @@ -963,17 +968,18 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb) send_mpa_req(ep, skb, 1); else send_mpa_req(ep, skb, mpa_rev); - + mutex_unlock(&ep->com.mutex); return 0; } -static void close_complete_upcall(struct c4iw_ep *ep) +static void close_complete_upcall(struct c4iw_ep *ep, int status) { struct iw_cm_event event; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); event.event = IW_CM_EVENT_CLOSE; + event.status = status; if (ep->com.cm_id) { PDBG("close complete delivered ep %p cm_id %p tid %u\n", ep, ep->com.cm_id, ep->hwtid); @@ -987,7 +993,6 @@ static void close_complete_upcall(struct c4iw_ep *ep) static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp) { PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - close_complete_upcall(ep); state_set(&ep->com, ABORTING); set_bit(ABORT_CONN, &ep->com.history); return send_abort(ep, skb, gfp); @@ -1066,9 +1071,10 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) } } -static void connect_request_upcall(struct c4iw_ep *ep) +static int connect_request_upcall(struct c4iw_ep *ep) { struct iw_cm_event event; + int ret; PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); memset(&event, 0, sizeof(event)); @@ -1093,15 +1099,14 @@ static void connect_request_upcall(struct c4iw_ep *ep) event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); } - if (state_read(&ep->parent_ep->com) != DEAD) { - c4iw_get_ep(&ep->com); - ep->parent_ep->com.cm_id->event_handler( - ep->parent_ep->com.cm_id, - &event); - } + c4iw_get_ep(&ep->com); + ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, + &event); + if (ret) + c4iw_put_ep(&ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); c4iw_put_ep(&ep->parent_ep->com); - ep->parent_ep = NULL; + return ret; } static void established_upcall(struct c4iw_ep *ep) @@ -1165,7 +1170,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * the connection. */ stop_ep_timer(ep); - if (state_read(&ep->com) != MPA_REQ_SENT) + if (ep->com.state != MPA_REQ_SENT) return; /* @@ -1240,7 +1245,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * start reply message including private data. And * the MPA header is valid. */ - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; ep->mpa_attr.recv_marker_enabled = markers_enabled; ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; @@ -1355,7 +1360,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) } goto out; err: - state_set(&ep->com, ABORTING); + __state_set(&ep->com, ABORTING); send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); @@ -1370,7 +1375,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) != MPA_REQ_WAIT) + if (ep->com.state != MPA_REQ_WAIT) return; /* @@ -1400,7 +1405,6 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) return; PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__); - stop_ep_timer(ep); mpa = (struct mpa_message *) ep->mpa_pkt; /* @@ -1492,10 +1496,18 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, ep->mpa_attr.p2p_type); - state_set(&ep->com, MPA_REQ_RCVD); + __state_set(&ep->com, MPA_REQ_RCVD); + stop_ep_timer(ep); /* drive upcall */ - connect_request_upcall(ep); + mutex_lock(&ep->parent_ep->com.mutex); + if (ep->parent_ep->com.state != DEAD) { + if (connect_request_upcall(ep)) + abort_connection(ep, skb, GFP_KERNEL); + } else { + abort_connection(ep, skb, GFP_KERNEL); + } + mutex_unlock(&ep->parent_ep->com.mutex); return; } @@ -1509,14 +1521,17 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) __u8 status = hdr->status; ep = lookup_tid(t, tid); + if (!ep) + return 0; PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); + mutex_lock(&ep->com.mutex); /* update RX credits */ update_rx_credits(ep, dlen); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: ep->rcv_seq += dlen; process_mpa_reply(ep, skb); @@ -1532,7 +1547,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) pr_err("%s Unexpected streaming data." \ " qpid %u ep %p state %d tid %u status %d\n", __func__, ep->com.qp->wq.sq.qid, ep, - state_read(&ep->com), ep->hwtid, status); + ep->com.state, ep->hwtid, status); attrs.next_state = C4IW_QP_STATE_TERMINATE; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); @@ -1541,6 +1556,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) default: break; } + mutex_unlock(&ep->com.mutex); return 0; } @@ -2255,7 +2271,7 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; disconnect = 0; @@ -2425,7 +2441,7 @@ static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb) C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); } - close_complete_upcall(ep); + close_complete_upcall(ep, 0); __state_set(&ep->com, DEAD); release = 1; break; @@ -2500,22 +2516,28 @@ static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb) int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) { - int err; + int err = 0; + int disconnect = 0; struct c4iw_ep *ep = to_ep(cm_id); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return -ECONNRESET; } set_bit(ULP_REJECT, &ep->com.history); - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); if (mpa_rev == 0) abort_connection(ep, NULL, GFP_KERNEL); else { err = send_mpa_reject(ep, pdata, pdata_len); - err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); + disconnect = 1; } + mutex_unlock(&ep->com.mutex); + if (disconnect) + err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL); c4iw_put_ep(&ep->com); return 0; } @@ -2530,12 +2552,14 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); - if (state_read(&ep->com) == DEAD) { + + mutex_lock(&ep->com.mutex); + if (ep->com.state == DEAD) { err = -ECONNRESET; goto err; } - BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); + BUG_ON(ep->com.state != MPA_REQ_RCVD); BUG_ON(!qp); set_bit(ULP_ACCEPT, &ep->com.history); @@ -2604,14 +2628,16 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (err) goto err1; - state_set(&ep->com, FPDU_MODE); + __state_set(&ep->com, FPDU_MODE); established_upcall(ep); + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return 0; err1: ep->com.cm_id = NULL; cm_id->rem_ref(cm_id); err: + mutex_unlock(&ep->com.mutex); c4iw_put_ep(&ep->com); return err; } @@ -2980,7 +3006,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) { fatal = 1; - close_complete_upcall(ep); + close_complete_upcall(ep, -EIO); ep->com.state = DEAD; } switch (ep->com.state) { @@ -3022,7 +3048,7 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) if (close) { if (abrupt) { set_bit(EP_DISC_ABORT, &ep->com.history); - close_complete_upcall(ep); + close_complete_upcall(ep, -ECONNRESET); ret = send_abort(ep, NULL, gfp); } else { set_bit(EP_DISC_CLOSE, &ep->com.history); @@ -3203,6 +3229,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, struct sk_buff *req_skb; struct fw_ofld_connection_wr *req; struct cpl_pass_accept_req *cpl = cplhdr(skb); + int ret; req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL); req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req)); @@ -3239,7 +3266,13 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); - cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); + if (ret < 0) { + pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__, + ret); + kfree_skb(skb); + kfree_skb(req_skb); + } } /* @@ -3346,13 +3379,13 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) pi = (struct port_info *)netdev_priv(pdev); tx_chan = cxgb4_port_chan(pdev); } + neigh_release(neigh); if (!e) { pr_err("%s - failed to allocate l2t entry!\n", __func__); goto free_dst; } - neigh_release(neigh); step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; window = (__force u16) htons((__force u16)tcph->window); @@ -3427,6 +3460,7 @@ static void process_timeout(struct c4iw_ep *ep) &attrs, 1); } __state_set(&ep->com, ABORTING); + close_complete_upcall(ep, -ETIMEDOUT); break; default: WARN(1, "%s unexpected state ep %p tid %u state %u\n", diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 88de3aa9c5b0..ce468e542428 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -365,8 +365,14 @@ void c4iw_flush_hw_cq(struct c4iw_cq *chp) if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { - /* - * drop peer2peer RTR reads. + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) + goto next_cqe; + + /* drop peer2peer RTR reads. */ if (CQE_WRID_STAG(hw_cqe) == 1) goto next_cqe; @@ -511,8 +517,18 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe, */ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { - /* - * If this is an unsolicited read response, then the read + /* If we have reached here because of async + * event or other error, and have egress error + * then drop + */ + if (CQE_TYPE(hw_cqe) == 1) { + if (CQE_STATUS(hw_cqe)) + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } + + /* If this is an unsolicited read response, then the read * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ @@ -603,7 +619,7 @@ proc_cqe: */ if (SQ_TYPE(hw_cqe)) { int idx = CQE_WRID_SQ_IDX(hw_cqe); - BUG_ON(idx > wq->sq.size); + BUG_ON(idx >= wq->sq.size); /* * Account for any unsignaled completions completed by @@ -617,7 +633,7 @@ proc_cqe: wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; else wq->sq.in_use -= idx - wq->sq.cidx; - BUG_ON(wq->sq.in_use < 0 && wq->sq.in_use < wq->sq.size); + BUG_ON(wq->sq.in_use <= 0 && wq->sq.in_use >= wq->sq.size); wq->sq.cidx = (uint16_t)idx; PDBG("%s completing sq idx %u\n", __func__, wq->sq.cidx); @@ -881,7 +897,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, /* * Make actual HW queue 2x to avoid cdix_inc overflows. */ - hwentries = entries * 2; + hwentries = min(entries * 2, T4_MAX_IQ_SIZE); /* * Make HW queue at least 64 entries so GTS updates aren't too @@ -930,6 +946,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, if (!mm2) goto err4; + memset(&uresp, 0, sizeof(uresp)); uresp.qid_mask = rhp->rdev.cqmask; uresp.cqid = chp->cq.cqid; uresp.size = chp->cq.size; diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ba7335fd4ebf..9489a388376c 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -898,11 +898,13 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, } opcode = *(u8 *)rsp; - if (c4iw_handlers[opcode]) + if (c4iw_handlers[opcode]) { c4iw_handlers[opcode](dev, skb); - else + } else { pr_info("%s no handler opcode 0x%x...\n", __func__, opcode); + kfree_skb(skb); + } return 0; nomem: diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index eb18f9be35e4..e872203c5424 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -373,6 +373,7 @@ struct c4iw_fr_page_list { DEFINE_DMA_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; + int pll_len; }; static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list( @@ -446,6 +447,7 @@ struct c4iw_qp { atomic_t refcnt; wait_queue_head_t wait; struct timer_list timer; + int sq_sig_all; }; static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 41b11951a30a..f9ca072a99ed 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -37,9 +37,9 @@ #include "iw_cxgb4.h" -int use_dsgl = 1; +int use_dsgl = 0; module_param(use_dsgl, int, 0644); -MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=1)"); +MODULE_PARM_DESC(use_dsgl, "Use DSGL for PBL/FastReg (default=0)"); #define T4_ULPTX_MIN_IO 32 #define C4IW_MAX_INLINE_SIZE 96 @@ -678,9 +678,9 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { __be64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -710,10 +710,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = ffs(mhp->umem->page_size) - 1; - n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - n += chunk->nents; - + n = mhp->umem->nmap; err = alloc_pbl(mhp, n); if (err) goto err; @@ -726,24 +723,22 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - list_for_each_entry(chunk, &mhp->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = cpu_to_be64(sg_dma_address( - &chunk->page_list[j]) + - mhp->umem->page_size * k); - if (i == PAGE_SIZE / sizeof *pages) { - err = write_pbl(&mhp->rhp->rdev, - pages, - mhp->attr.pbl_addr + (n << 3), i); - if (err) - goto pbl_done; - n += i; - i = 0; - } + for_each_sg(mhp->umem->sg_head.sgl, sg, mhp->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = cpu_to_be64(sg_dma_address(sg) + + mhp->umem->page_size * k); + if (i == PAGE_SIZE / sizeof *pages) { + err = write_pbl(&mhp->rhp->rdev, + pages, + mhp->attr.pbl_addr + (n << 3), i); + if (err) + goto pbl_done; + n += i; + i = 0; } } + } if (i) err = write_pbl(&mhp->rhp->rdev, pages, @@ -903,7 +898,11 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, dma_unmap_addr_set(c4pl, mapping, dma_addr); c4pl->dma_addr = dma_addr; c4pl->dev = dev; - c4pl->ibpl.max_page_list_len = pll_len; + c4pl->pll_len = pll_len; + + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); return &c4pl->ibpl; } @@ -912,8 +911,12 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) { struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); + PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n", + __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list, + &c4pl->dma_addr); + dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, - c4pl->ibpl.max_page_list_len, + c4pl->pll_len, c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping)); kfree(c4pl); } diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 3b62eb556a47..cb76eb5eee1f 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -715,7 +715,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, fw_flags = 0; if (wr->send_flags & IB_SEND_SOLICITED) fw_flags |= FW_RI_SOLICITED_EVENT_FLAG; - if (wr->send_flags & IB_SEND_SIGNALED) + if (wr->send_flags & IB_SEND_SIGNALED || qhp->sq_sig_all) fw_flags |= FW_RI_COMPLETION_FLAG; swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx]; switch (wr->opcode) { @@ -776,7 +776,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } swsqe->idx = qhp->wq.sq.pidx; swsqe->complete = 0; - swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED); + swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || + qhp->sq_sig_all; swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; @@ -1529,7 +1530,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, struct c4iw_cq *schp; struct c4iw_cq *rchp; struct c4iw_create_qp_resp uresp; - int sqsize, rqsize; + unsigned int sqsize, rqsize; struct c4iw_ucontext *ucontext; int ret; struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL; @@ -1601,6 +1602,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, qhp->attr.enable_bind = 1; qhp->attr.max_ord = 1; qhp->attr.max_ird = 1; + qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; spin_lock_init(&qhp->lock); mutex_init(&qhp->mutex); init_waitqueue_head(&qhp->wait); diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index f08f6eaf3fa8..bd45e0f3923f 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -322,7 +322,7 @@ struct ehca_mr_pginfo { } phy; struct { /* type EHCA_MR_PGI_USER section */ struct ib_umem *region; - struct ib_umem_chunk *next_chunk; + struct scatterlist *next_sg; u64 next_nmap; } usr; struct { /* type EHCA_MR_PGI_FMR section */ diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c index 212150c25ea0..8cc837537768 100644 --- a/drivers/infiniband/hw/ehca/ehca_cq.c +++ b/drivers/infiniband/hw/ehca/ehca_cq.c @@ -283,6 +283,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector, (my_cq->galpas.user.fw_handle & (PAGE_SIZE - 1)); if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { ehca_err(device, "Copy to udata failed."); + cq = ERR_PTR(-EFAULT); goto create_cq_exit4; } } diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index bcfb0c183620..3488e8c9fcb4 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -400,10 +400,7 @@ reg_user_mr_fallback: pginfo.num_hwpages = num_hwpages; pginfo.u.usr.region = e_mr->umem; pginfo.next_hwpage = e_mr->umem->offset / hwpage_size; - pginfo.u.usr.next_chunk = list_prepare_entry(pginfo.u.usr.next_chunk, - (&e_mr->umem->chunk_list), - list); - + pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl; ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags, e_pd, &pginfo, &e_mr->ib.ib_mr.lkey, &e_mr->ib.ib_mr.rkey, EHCA_REG_MR); @@ -1858,61 +1855,39 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr; - u32 i = 0; u32 j = 0; int hwpages_per_kpage = PAGE_SIZE / pginfo->hwpage_size; - - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ; - *kpage = pgaddr + (pginfo->next_hwpage * - pginfo->hwpage_size); - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx " - "chunk->page_list[i]=%llx " - "i=%x next_hwpage=%llx", - pgaddr, (u64)sg_dma_address( - &chunk->page_list[i]), - i, pginfo->next_hwpage); - return -EFAULT; - } - (pginfo->hwpage_cnt)++; - (pginfo->next_hwpage)++; - kpage++; - if (pginfo->next_hwpage % hwpages_per_kpage == 0) { - (pginfo->kpage_cnt)++; - (pginfo->u.usr.next_nmap)++; - pginfo->next_hwpage = 0; - i++; - } - j++; - if (j >= number) break; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { + pgaddr = page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT; + *kpage = pgaddr + (pginfo->next_hwpage * + pginfo->hwpage_size); + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx " + "sg_dma_address=%llx " + "entry=%llx next_hwpage=%llx", + pgaddr, (u64)sg_dma_address(*sg), + pginfo->u.usr.next_nmap, + pginfo->next_hwpage); + return -EFAULT; } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + (pginfo->hwpage_cnt)++; + (pginfo->next_hwpage)++; + kpage++; + if (pginfo->next_hwpage % hwpages_per_kpage == 0) { + (pginfo->kpage_cnt)++; + (pginfo->u.usr.next_nmap)++; + pginfo->next_hwpage = 0; + *sg = sg_next(*sg); + } + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -1920,20 +1895,19 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo, * check given pages for contiguous layout * last page addr is returned in prev_pgaddr for further check */ -static int ehca_check_kpages_per_ate(struct scatterlist *page_list, - int start_idx, int end_idx, +static int ehca_check_kpages_per_ate(struct scatterlist **sg, + int num_pages, u64 *prev_pgaddr) { - int t; - for (t = start_idx; t <= end_idx; t++) { - u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; + for (; *sg && num_pages > 0; *sg = sg_next(*sg), num_pages--) { + u64 pgaddr = page_to_pfn(sg_page(*sg)) << PAGE_SHIFT; if (ehca_debug_level >= 3) ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, *(u64 *)__va(pgaddr)); if (pgaddr - PAGE_SIZE != *prev_pgaddr) { ehca_gen_err("uncontiguous page found pgaddr=%llx " - "prev_pgaddr=%llx page_list_i=%x", - pgaddr, *prev_pgaddr, t); + "prev_pgaddr=%llx entries_left_in_hwpage=%x", + pgaddr, *prev_pgaddr, num_pages); return -EINVAL; } *prev_pgaddr = pgaddr; @@ -1947,111 +1921,80 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo, u64 *kpage) { int ret = 0; - struct ib_umem_chunk *prev_chunk; - struct ib_umem_chunk *chunk; u64 pgaddr, prev_pgaddr; - u32 i = 0; u32 j = 0; int kpages_per_hwpage = pginfo->hwpage_size / PAGE_SIZE; int nr_kpages = kpages_per_hwpage; + struct scatterlist **sg = &pginfo->u.usr.next_sg; + + while (*sg != NULL) { - /* loop over desired chunk entries */ - chunk = pginfo->u.usr.next_chunk; - prev_chunk = pginfo->u.usr.next_chunk; - list_for_each_entry_continue( - chunk, (&(pginfo->u.usr.region->chunk_list)), list) { - for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { - if (nr_kpages == kpages_per_hwpage) { - pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) - << PAGE_SHIFT ); - *kpage = pgaddr; - if ( !(*kpage) ) { - ehca_gen_err("pgaddr=%llx i=%x", - pgaddr, i); + if (nr_kpages == kpages_per_hwpage) { + pgaddr = (page_to_pfn(sg_page(*sg)) + << PAGE_SHIFT); + *kpage = pgaddr; + if (!(*kpage)) { + ehca_gen_err("pgaddr=%llx entry=%llx", + pgaddr, pginfo->u.usr.next_nmap); + ret = -EFAULT; + return ret; + } + /* + * The first page in a hwpage must be aligned; + * the first MR page is exempt from this rule. + */ + if (pgaddr & (pginfo->hwpage_size - 1)) { + if (pginfo->hwpage_cnt) { + ehca_gen_err( + "invalid alignment " + "pgaddr=%llx entry=%llx " + "mr_pgsize=%llx", + pgaddr, pginfo->u.usr.next_nmap, + pginfo->hwpage_size); ret = -EFAULT; return ret; } - /* - * The first page in a hwpage must be aligned; - * the first MR page is exempt from this rule. - */ - if (pgaddr & (pginfo->hwpage_size - 1)) { - if (pginfo->hwpage_cnt) { - ehca_gen_err( - "invalid alignment " - "pgaddr=%llx i=%x " - "mr_pgsize=%llx", - pgaddr, i, - pginfo->hwpage_size); - ret = -EFAULT; - return ret; - } - /* first MR page */ - pginfo->kpage_cnt = - (pgaddr & - (pginfo->hwpage_size - 1)) >> - PAGE_SHIFT; - nr_kpages -= pginfo->kpage_cnt; - *kpage = pgaddr & - ~(pginfo->hwpage_size - 1); - } - if (ehca_debug_level >= 3) { - u64 val = *(u64 *)__va(pgaddr); - ehca_gen_dbg("kpage=%llx chunk_page=%llx " - "value=%016llx", - *kpage, pgaddr, val); - } - prev_pgaddr = pgaddr; - i++; - pginfo->kpage_cnt++; - pginfo->u.usr.next_nmap++; - nr_kpages--; - if (!nr_kpages) - goto next_kpage; - continue; + /* first MR page */ + pginfo->kpage_cnt = + (pgaddr & + (pginfo->hwpage_size - 1)) >> + PAGE_SHIFT; + nr_kpages -= pginfo->kpage_cnt; + *kpage = pgaddr & + ~(pginfo->hwpage_size - 1); } - if (i + nr_kpages > chunk->nmap) { - ret = ehca_check_kpages_per_ate( - chunk->page_list, i, - chunk->nmap - 1, &prev_pgaddr); - if (ret) return ret; - pginfo->kpage_cnt += chunk->nmap - i; - pginfo->u.usr.next_nmap += chunk->nmap - i; - nr_kpages -= chunk->nmap - i; - break; + if (ehca_debug_level >= 3) { + u64 val = *(u64 *)__va(pgaddr); + ehca_gen_dbg("kpage=%llx page=%llx " + "value=%016llx", + *kpage, pgaddr, val); } + prev_pgaddr = pgaddr; + *sg = sg_next(*sg); + pginfo->kpage_cnt++; + pginfo->u.usr.next_nmap++; + nr_kpages--; + if (!nr_kpages) + goto next_kpage; + continue; + } + + ret = ehca_check_kpages_per_ate(sg, nr_kpages, + &prev_pgaddr); + if (ret) + return ret; + pginfo->kpage_cnt += nr_kpages; + pginfo->u.usr.next_nmap += nr_kpages; - ret = ehca_check_kpages_per_ate(chunk->page_list, i, - i + nr_kpages - 1, - &prev_pgaddr); - if (ret) return ret; - i += nr_kpages; - pginfo->kpage_cnt += nr_kpages; - pginfo->u.usr.next_nmap += nr_kpages; next_kpage: - nr_kpages = kpages_per_hwpage; - (pginfo->hwpage_cnt)++; - kpage++; - j++; - if (j >= number) break; - } - if ((pginfo->u.usr.next_nmap >= chunk->nmap) && - (j >= number)) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - break; - } else if (pginfo->u.usr.next_nmap >= chunk->nmap) { - pginfo->u.usr.next_nmap = 0; - prev_chunk = chunk; - } else if (j >= number) + nr_kpages = kpages_per_hwpage; + (pginfo->hwpage_cnt)++; + kpage++; + j++; + if (j >= number) break; - else - prev_chunk = chunk; } - pginfo->u.usr.next_chunk = - list_prepare_entry(prev_chunk, - (&(pginfo->u.usr.region->chunk_list)), - list); + return ret; } @@ -2591,16 +2534,6 @@ static void ehca_dma_unmap_sg(struct ib_device *dev, struct scatterlist *sg, /* This is only a stub; nothing to be done here */ } -static u64 ehca_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - return sg->dma_address; -} - -static unsigned int ehca_dma_len(struct ib_device *dev, struct scatterlist *sg) -{ - return sg->length; -} - static void ehca_dma_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) @@ -2653,8 +2586,6 @@ struct ib_dma_mapping_ops ehca_dma_mapping_ops = { .unmap_page = ehca_dma_unmap_page, .map_sg = ehca_dma_map_sg, .unmap_sg = ehca_dma_unmap_sg, - .dma_address = ehca_dma_address, - .dma_len = ehca_dma_len, .sync_single_for_cpu = ehca_dma_sync_single_for_cpu, .sync_single_for_device = ehca_dma_sync_single_for_device, .alloc_coherent = ehca_dma_alloc_coherent, diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c index 714293b78518..e2f9a51f4a38 100644 --- a/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/drivers/infiniband/hw/ipath/ipath_diag.c @@ -326,7 +326,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct ipath_diag_pkt odp; struct ipath_diag_xpkt dp; u32 *tmpbuf = NULL; @@ -335,51 +335,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp, u64 val; u32 l_state, lt_state; /* LinkState, LinkTrainingState */ - if (count < sizeof(odp)) { - ret = -EINVAL; - goto bail; - } if (count == sizeof(dp)) { if (copy_from_user(&dp, data, sizeof(dp))) { ret = -EFAULT; goto bail; } - } else if (copy_from_user(&odp, data, sizeof(odp))) { - ret = -EFAULT; + } else if (count == sizeof(odp)) { + if (copy_from_user(&odp, data, sizeof(odp))) { + ret = -EFAULT; + goto bail; + } + } else { + ret = -EINVAL; goto bail; } - /* - * Due to padding/alignment issues (lessened with new struct) - * the old and new structs are the same length. We need to - * disambiguate them, which we can do because odp.len has never - * been less than the total of LRH+BTH+DETH so far, while - * dp.unit (same offset) unit is unlikely to get that high. - * Similarly, dp.data, the pointer to user at the same offset - * as odp.unit, is almost certainly at least one (512byte)page - * "above" NULL. The if-block below can be omitted if compatibility - * between a new driver and older diagnostic code is unimportant. - * compatibility the other direction (new diags, old driver) is - * handled in the diagnostic code, with a warning. - */ - if (dp.unit >= 20 && dp.data < 512) { - /* very probable version mismatch. Fix it up */ - memcpy(&odp, &dp, sizeof(odp)); - /* We got a legacy dp, copy elements to dp */ - dp.unit = odp.unit; - dp.data = odp.data; - dp.len = odp.len; - dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */ - } - /* send count must be an exact number of dwords */ if (dp.len & 3) { ret = -EINVAL; goto bail; } - clen = dp.len >> 2; + plen = dp.len >> 2; dd = ipath_lookup(dp.unit); if (!dd || !(dd->ipath_flags & IPATH_PRESENT) || @@ -422,16 +400,22 @@ static ssize_t ipath_diagpkt_write(struct file *fp, goto bail; } - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - - if ((plen + 4) > dd->ipath_ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > dd->ipath_ibmaxlen - maxlen_reserve) { ipath_dbg("Pkt len 0x%x > ibmaxlen %x\n", - plen - 4, dd->ipath_ibmaxlen); + dp.len, dd->ipath_ibmaxlen); ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { dev_info(&dd->pcidev->dev, "Unable to allocate tmp buffer, " @@ -473,11 +457,11 @@ static ssize_t ipath_diagpkt_write(struct file *fp, */ if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) { ipath_flush_wc(); - __iowrite32_copy(piobuf + 2, tmpbuf, clen - 1); + __iowrite32_copy(piobuf + 2, tmpbuf, plen - 1); ipath_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - __iowrite32_copy(piobuf + 2, tmpbuf, clen); + __iowrite32_copy(piobuf + 2, tmpbuf, plen); ipath_flush_wc(); diff --git a/drivers/infiniband/hw/ipath/ipath_dma.c b/drivers/infiniband/hw/ipath/ipath_dma.c index 644c2c74e054..123a8c053539 100644 --- a/drivers/infiniband/hw/ipath/ipath_dma.c +++ b/drivers/infiniband/hw/ipath/ipath_dma.c @@ -115,6 +115,10 @@ static int ipath_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -126,21 +130,6 @@ static void ipath_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 ipath_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int ipath_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void ipath_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, @@ -176,17 +165,15 @@ static void ipath_dma_free_coherent(struct ib_device *dev, size_t size, } struct ib_dma_mapping_ops ipath_dma_mapping_ops = { - ipath_mapping_error, - ipath_dma_map_single, - ipath_dma_unmap_single, - ipath_dma_map_page, - ipath_dma_unmap_page, - ipath_map_sg, - ipath_unmap_sg, - ipath_sg_dma_address, - ipath_sg_dma_len, - ipath_sync_single_for_cpu, - ipath_sync_single_for_device, - ipath_dma_alloc_coherent, - ipath_dma_free_coherent + .mapping_error = ipath_mapping_error, + .map_single = ipath_dma_map_single, + .unmap_single = ipath_dma_unmap_single, + .map_page = ipath_dma_map_page, + .unmap_page = ipath_dma_unmap_page, + .map_sg = ipath_map_sg, + .unmap_sg = ipath_unmap_sg, + .sync_single_for_cpu = ipath_sync_single_for_cpu, + .sync_single_for_device = ipath_sync_single_for_device, + .alloc_coherent = ipath_dma_alloc_coherent, + .free_coherent = ipath_dma_free_coherent }; diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c index e346d3890a0e..5e61e9bff697 100644 --- a/drivers/infiniband/hw/ipath/ipath_mr.c +++ b/drivers/infiniband/hw/ipath/ipath_mr.c @@ -188,8 +188,8 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct ipath_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + int n, m, entry; + struct scatterlist *sg; struct ib_mr *ret; if (length == 0) { @@ -202,10 +202,7 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; - + n = umem->nmap; mr = alloc_mr(n, &to_idev(pd->device)->lk_table); if (!mr) { ret = ERR_PTR(-ENOMEM); @@ -224,22 +221,20 @@ struct ib_mr *ipath_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { - void *vaddr; - - vaddr = page_address(sg_page(&chunk->page_list[i])); - if (!vaddr) { - ret = ERR_PTR(-EINVAL); - goto bail; - } - mr->mr.map[m]->segs[n].vaddr = vaddr; - mr->mr.map[m]->segs[n].length = umem->page_size; - n++; - if (n == IPATH_SEGSZ) { - m++; - n = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + void *vaddr; + + vaddr = page_address(sg_page(sg)); + if (!vaddr) { + ret = ERR_PTR(-EINVAL); + goto bail; + } + mr->mr.map[m]->segs[n].vaddr = vaddr; + mr->mr.map[m]->segs[n].length = umem->page_size; + n++; + if (n == IPATH_SEGSZ) { + m++; + n = 0; } } ret = &mr->ibmr; diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index 8aee4233b388..c51740986367 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -45,7 +45,6 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db) { struct mlx4_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -73,8 +72,7 @@ int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 6cb85467dde7..1b6dbe156a37 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1803,7 +1803,7 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { - char name[32]; + char name[80]; int eq_per_port = 0; int added_eqs = 0; int total_eqs = 0; @@ -1833,8 +1833,8 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) eq = 0; mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { - sprintf(name, "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", + i, j, dev->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { @@ -2048,8 +2048,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) err = mlx4_counter_alloc(ibdev->dev, &ibdev->counters[i]); if (err) ibdev->counters[i] = -1; - } else - ibdev->counters[i] = -1; + } else { + ibdev->counters[i] = -1; + } } mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index e471f089ff00..cb2a8727f3fb 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -90,11 +90,11 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { u64 *pages; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k, entry; int n; int len; int err = 0; + struct scatterlist *sg; pages = (u64 *) __get_free_page(GFP_KERNEL); if (!pages) @@ -102,26 +102,25 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, i = n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> mtt->page_shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - umem->page_size * k; - /* - * Be friendly to mlx4_write_mtt() and - * pass it chunks of appropriate size. - */ - if (i == PAGE_SIZE / sizeof (u64)) { - err = mlx4_write_mtt(dev->dev, mtt, n, - i, pages); - if (err) - goto out; - n += i; - i = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + umem->page_size * k; + /* + * Be friendly to mlx4_write_mtt() and + * pass it chunks of appropriate size. + */ + if (i == PAGE_SIZE / sizeof (u64)) { + err = mlx4_write_mtt(dev->dev, mtt, n, + i, pages); + if (err) + goto out; + n += i; + i = 0; } } + } if (i) err = mlx4_write_mtt(dev->dev, mtt, n, i, pages); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index aadf7f82e1f3..41308af4163c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2082,7 +2082,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, return err; } - if (ah->av.eth.vlan != 0xffff) { + if (ah->av.eth.vlan != cpu_to_be16(0xffff)) { vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff; is_vlan = 1; } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index b1705ce6eb88..62bb6b49dc1d 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -366,6 +366,38 @@ static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) mlx5_buf_free(&dev->mdev, &buf->buf); } +static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, + struct ib_sig_err *item) +{ + u16 syndrome = be16_to_cpu(cqe->syndrome); + +#define GUARD_ERR (1 << 13) +#define APPTAG_ERR (1 << 12) +#define REFTAG_ERR (1 << 11) + + if (syndrome & GUARD_ERR) { + item->err_type = IB_SIG_BAD_GUARD; + item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; + item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; + } else + if (syndrome & REFTAG_ERR) { + item->err_type = IB_SIG_BAD_REFTAG; + item->expected = be32_to_cpu(cqe->expected_reftag); + item->actual = be32_to_cpu(cqe->actual_reftag); + } else + if (syndrome & APPTAG_ERR) { + item->err_type = IB_SIG_BAD_APPTAG; + item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; + item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; + } else { + pr_err("Got signature completion error with bad syndrome %04x\n", + syndrome); + } + + item->sig_err_offset = be64_to_cpu(cqe->err_offset); + item->key = be32_to_cpu(cqe->mkey); +} + static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_ib_qp **cur_qp, struct ib_wc *wc) @@ -375,6 +407,9 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq, struct mlx5_cqe64 *cqe64; struct mlx5_core_qp *mqp; struct mlx5_ib_wq *wq; + struct mlx5_sig_err_cqe *sig_err_cqe; + struct mlx5_core_mr *mmr; + struct mlx5_ib_mr *mr; uint8_t opcode; uint32_t qpn; u16 wqe_ctr; @@ -475,6 +510,33 @@ repoll: } } break; + case MLX5_CQE_SIG_ERR: + sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; + + read_lock(&dev->mdev.priv.mr_table.lock); + mmr = __mlx5_mr_lookup(&dev->mdev, + mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); + if (unlikely(!mmr)) { + read_unlock(&dev->mdev.priv.mr_table.lock); + mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", + cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); + return -EINVAL; + } + + mr = to_mibmr(mmr); + get_sig_err_item(sig_err_cqe, &mr->sig->err_item); + mr->sig->sig_err_exists = true; + mr->sig->sigerr_count++; + + mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", + cq->mcq.cqn, mr->sig->err_item.key, + mr->sig->err_item.err_type, + mr->sig->err_item.sig_err_offset, + mr->sig->err_item.expected, + mr->sig->err_item.actual); + + read_unlock(&dev->mdev.priv.mr_table.lock); + goto repoll; } return 0; diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 256a23344f28..ece028fc47d6 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -47,7 +47,6 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, struct mlx5_db *db) { struct mlx5_ib_user_db_page *page; - struct ib_umem_chunk *chunk; int err = 0; mutex_lock(&context->db_page_mutex); @@ -75,8 +74,7 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - chunk = list_entry(page->umem->chunk_list.next, struct ib_umem_chunk, list); - db->dma = sg_dma_address(chunk->page_list) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index bf900579ac08..fa6dc870adae 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -273,6 +273,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (flags & MLX5_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + if (flags & MLX5_DEV_CAP_FLAG_SIG_HAND_OVER) { + props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; + /* At this stage no support for signature handover */ + props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | + IB_PROT_T10DIF_TYPE_2 | + IB_PROT_T10DIF_TYPE_3; + props->sig_guard_cap = IB_GUARD_T10DIF_CRC | + IB_GUARD_T10DIF_CSUM; + } props->vendor_id = be32_to_cpup((__be32 *)(out_mad->data + 36)) & 0xffffff; @@ -1423,12 +1432,15 @@ static int init_one(struct pci_dev *pdev, dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; + dev->ib_dev.destroy_mr = mlx5_ib_destroy_mr; dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; dev->ib_dev.process_mad = mlx5_ib_process_mad; + dev->ib_dev.create_mr = mlx5_ib_create_mr; dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; + dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) { dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 3a5322870b96..8499aec94db6 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -44,16 +44,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, int *ncont, int *order) { - struct ib_umem_chunk *chunk; unsigned long tmp; unsigned long m; - int i, j, k; + int i, k; u64 base = 0; int p = 0; int skip; int mask; u64 len; u64 pfn; + struct scatterlist *sg; + int entry; addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; @@ -61,32 +62,31 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, skip = 1 << m; mask = skip - 1; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - pfn = sg_dma_address(&chunk->page_list[j]) >> PAGE_SHIFT; - for (k = 0; k < len; k++) { - if (!(i & mask)) { - tmp = (unsigned long)pfn; - m = min(m, find_first_bit(&tmp, sizeof(tmp))); + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + pfn = sg_dma_address(sg) >> PAGE_SHIFT; + for (k = 0; k < len; k++) { + if (!(i & mask)) { + tmp = (unsigned long)pfn; + m = min(m, find_first_bit(&tmp, sizeof(tmp))); + skip = 1 << m; + mask = skip - 1; + base = pfn; + p = 0; + } else { + if (base + p != pfn) { + tmp = (unsigned long)p; + m = find_first_bit(&tmp, sizeof(tmp)); skip = 1 << m; mask = skip - 1; base = pfn; p = 0; - } else { - if (base + p != pfn) { - tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); - skip = 1 << m; - mask = skip - 1; - base = pfn; - p = 0; - } } - p++; - i++; } + p++; + i++; } + } if (i) { m = min_t(unsigned long, ilog2(roundup_pow_of_two(i)), m); @@ -112,32 +112,32 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, { int shift = page_shift - PAGE_SHIFT; int mask = (1 << shift) - 1; - struct ib_umem_chunk *chunk; - int i, j, k; + int i, k; u64 cur = 0; u64 base; int len; + struct scatterlist *sg; + int entry; i = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - for (j = 0; j < chunk->nmap; j++) { - len = sg_dma_len(&chunk->page_list[j]) >> PAGE_SHIFT; - base = sg_dma_address(&chunk->page_list[j]); - for (k = 0; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - if (umr) - cur |= 3; + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + base = sg_dma_address(sg); + for (k = 0; k < len; k++) { + if (!(i & mask)) { + cur = base + (k << PAGE_SHIFT); + if (umr) + cur |= 3; - pas[i >> shift] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[i >> shift])); - } else - mlx5_ib_dbg(dev, "=====> 0x%llx\n", - base + (k << PAGE_SHIFT)); - i++; - } + pas[i >> shift] = cpu_to_be64(cur); + mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", + i >> shift, be64_to_cpu(pas[i >> shift])); + } else + mlx5_ib_dbg(dev, "=====> 0x%llx\n", + base + (k << PAGE_SHIFT)); + i++; } + } } int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 389e31965773..50541586e0a6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -189,6 +189,9 @@ struct mlx5_ib_qp { int create_type; u32 pa_lkey; + + /* Store signature errors */ + bool signature_en; }; struct mlx5_ib_cq_buf { @@ -265,6 +268,7 @@ struct mlx5_ib_mr { enum ib_wc_status status; struct mlx5_ib_dev *dev; struct mlx5_create_mkey_mbox_out out; + struct mlx5_core_sig_ctx *sig; }; struct mlx5_ib_fast_reg_page_list { @@ -396,6 +400,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp, mqp); } +static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr) +{ + return container_of(mmr, struct mlx5_ib_mr, mmr); +} + static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd) { return container_of(ibpd, struct mlx5_ib_pd, ibpd); @@ -495,6 +504,9 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx5_ib_dereg_mr(struct ib_mr *ibmr); +int mlx5_ib_destroy_mr(struct ib_mr *ibmr); +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr); struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, @@ -530,6 +542,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift); void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context); +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status); static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 7c95ca1f0c25..81392b26d078 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -992,6 +992,122 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, + struct ib_mr_init_attr *mr_init_attr) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_ib_mr *mr; + int access_mode, err; + int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + in = kzalloc(sizeof(*in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto err_free; + } + + in->seg.status = 1 << 6; /* free */ + in->seg.xlt_oct_size = cpu_to_be32(ndescs); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + access_mode = MLX5_ACCESS_MODE_MTT; + + if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { + u32 psv_index[2]; + + in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | + MLX5_MKEY_BSF_EN); + in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); + if (!mr->sig) { + err = -ENOMEM; + goto err_free_in; + } + + /* create mem & wire PSVs */ + err = mlx5_core_create_psv(&dev->mdev, to_mpd(pd)->pdn, + 2, psv_index); + if (err) + goto err_free_sig; + + access_mode = MLX5_ACCESS_MODE_KLM; + mr->sig->psv_memory.psv_idx = psv_index[0]; + mr->sig->psv_wire.psv_idx = psv_index[1]; + + mr->sig->sig_status_checked = true; + mr->sig->sig_err_exists = false; + /* Next UMR, Arm SIGERR */ + ++mr->sig->sigerr_count; + } + + in->seg.flags = MLX5_PERM_UMR_EN | access_mode; + err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), + NULL, NULL, NULL); + if (err) + goto err_destroy_psv; + + mr->ibmr.lkey = mr->mmr.key; + mr->ibmr.rkey = mr->mmr.key; + mr->umem = NULL; + kfree(in); + + return &mr->ibmr; + +err_destroy_psv: + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + } +err_free_sig: + kfree(mr->sig); +err_free_in: + kfree(in); +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mlx5_ib_destroy_mr(struct ib_mr *ibmr) +{ + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); + struct mlx5_ib_mr *mr = to_mmr(ibmr); + int err; + + if (mr->sig) { + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_memory.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", + mr->sig->psv_memory.psv_idx); + if (mlx5_core_destroy_psv(&dev->mdev, + mr->sig->psv_wire.psv_idx)) + mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", + mr->sig->psv_wire.psv_idx); + kfree(mr->sig); + } + + err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); + if (err) { + mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", + mr->mmr.key, err); + return err; + } + + kfree(mr); + + return err; +} + struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { @@ -1077,3 +1193,44 @@ void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); } + +int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, + struct ib_mr_status *mr_status) +{ + struct mlx5_ib_mr *mmr = to_mmr(ibmr); + int ret = 0; + + if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { + pr_err("Invalid status check mask\n"); + ret = -EINVAL; + goto done; + } + + mr_status->fail_status = 0; + if (check_mask & IB_MR_CHECK_SIG_STATUS) { + if (!mmr->sig) { + ret = -EINVAL; + pr_err("signature status check requested on a non-signature enabled MR\n"); + goto done; + } + + mmr->sig->sig_status_checked = true; + if (!mmr->sig->sig_err_exists) + goto done; + + if (ibmr->lkey == mmr->sig->err_item.key) + memcpy(&mr_status->sig_err, &mmr->sig->err_item, + sizeof(mr_status->sig_err)); + else { + mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; + mr_status->sig_err.sig_err_offset = 0; + mr_status->sig_err.key = mmr->sig->err_item.key; + } + + mmr->sig->sig_err_exists = false; + mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; + } + +done: + return ret; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7dfe8a1c84cf..ae788d27b93f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -256,8 +256,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) } size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); - - return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN && + ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB) < MLX5_SIG_WQE_SIZE) + return MLX5_SIG_WQE_SIZE; + else + return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, @@ -284,6 +287,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, sizeof(struct mlx5_wqe_inline_seg); attr->cap.max_inline_data = qp->max_inline_data; + if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN) + qp->signature_en = true; + wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > dev->mdev.caps.max_wqes) { @@ -665,7 +671,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, int err; uuari = &dev->mdev.priv.uuari; - if (init_attr->create_flags) + if (init_attr->create_flags & ~IB_QP_CREATE_SIGNATURE_EN) return -EINVAL; if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) @@ -1771,6 +1777,27 @@ static __be64 frwr_mkey_mask(void) return cpu_to_be64(result); } +static __be64 sig_mkey_mask(void) +{ + u64 result; + + result = MLX5_MKEY_MASK_LEN | + MLX5_MKEY_MASK_PAGE_SIZE | + MLX5_MKEY_MASK_START_ADDR | + MLX5_MKEY_MASK_EN_SIGERR | + MLX5_MKEY_MASK_EN_RINVAL | + MLX5_MKEY_MASK_KEY | + MLX5_MKEY_MASK_LR | + MLX5_MKEY_MASK_LW | + MLX5_MKEY_MASK_RR | + MLX5_MKEY_MASK_RW | + MLX5_MKEY_MASK_SMALL_FENCE | + MLX5_MKEY_MASK_FREE | + MLX5_MKEY_MASK_BSF_EN; + + return cpu_to_be64(result); +} + static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, struct ib_send_wr *wr, int li) { @@ -1826,7 +1853,7 @@ static u8 get_umr_flags(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX5_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX5_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX5_PERM_LOCAL_WRITE : 0) | - MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; + MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN; } static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, @@ -1838,7 +1865,8 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr, return; } - seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags); + seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) | + MLX5_ACCESS_MODE_MTT; *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE); seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); @@ -1954,6 +1982,342 @@ static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr, return 0; } +static u16 prot_field_size(enum ib_signature_type type) +{ + switch (type) { + case IB_SIG_TYPE_T10_DIF: + return MLX5_DIF_SIZE; + default: + return 0; + } +} + +static u8 bs_selector(int block_size) +{ + switch (block_size) { + case 512: return 0x1; + case 520: return 0x2; + case 4096: return 0x3; + case 4160: return 0x4; + case 1073741824: return 0x5; + default: return 0; + } +} + +static int format_selector(struct ib_sig_attrs *attr, + struct ib_sig_domain *domain, + int *selector) +{ + +#define FORMAT_DIF_NONE 0 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC 13 +#define FORMAT_DIF_CSUM_NO_INC 14 + + switch (domain->sig.dif.type) { + case IB_T10DIF_NONE: + /* No DIF */ + *selector = FORMAT_DIF_NONE; + break; + case IB_T10DIF_TYPE1: /* Fall through */ + case IB_T10DIF_TYPE2: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = FORMAT_DIF_CRC_INC; + break; + case IB_T10DIF_CSUM: + *selector = FORMAT_DIF_CSUM_INC; + break; + default: + return 1; + } + break; + case IB_T10DIF_TYPE3: + switch (domain->sig.dif.bg_type) { + case IB_T10DIF_CRC: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CRC_INC : + FORMAT_DIF_CRC_NO_INC; + break; + case IB_T10DIF_CSUM: + *selector = domain->sig.dif.type3_inc_reftag ? + FORMAT_DIF_CSUM_INC : + FORMAT_DIF_CSUM_NO_INC; + break; + default: + return 1; + } + break; + default: + return 1; + } + + return 0; +} + +static int mlx5_set_bsf(struct ib_mr *sig_mr, + struct ib_sig_attrs *sig_attrs, + struct mlx5_bsf *bsf, u32 data_size) +{ + struct mlx5_core_sig_ctx *msig = to_mmr(sig_mr)->sig; + struct mlx5_bsf_basic *basic = &bsf->basic; + struct ib_sig_domain *mem = &sig_attrs->mem; + struct ib_sig_domain *wire = &sig_attrs->wire; + int ret, selector; + + switch (sig_attrs->mem.sig_type) { + case IB_SIG_TYPE_T10_DIF: + if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF) + return -EINVAL; + + /* Input domain check byte mask */ + basic->check_byte_mask = sig_attrs->check_mask; + if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval && + mem->sig.dif.type == wire->sig.dif.type) { + /* Same block structure */ + basic->bsf_size_sbs = 1 << 4; + if (mem->sig.dif.bg_type == wire->sig.dif.bg_type) + basic->wire.copy_byte_mask = 0xff; + else + basic->wire.copy_byte_mask = 0x3f; + } else + basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval); + + basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval); + basic->raw_data_size = cpu_to_be32(data_size); + + ret = format_selector(sig_attrs, mem, &selector); + if (ret) + return -EINVAL; + basic->m_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_memory.psv_idx); + + ret = format_selector(sig_attrs, wire, &selector); + if (ret) + return -EINVAL; + basic->w_bfs_psv = cpu_to_be32(selector << 24 | + msig->psv_wire.psv_idx); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs; + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + struct mlx5_bsf *bsf; + u32 data_len = wr->sg_list->length; + u32 data_key = wr->sg_list->lkey; + u64 data_va = wr->sg_list->addr; + int ret; + int wqe_size; + + if (!wr->wr.sig_handover.prot) { + /** + * Source domain doesn't contain signature information + * So need construct: + * ------------------ + * | data_klm | + * ------------------ + * | BSF | + * ------------------ + **/ + struct mlx5_klm *data_klm = *seg; + + data_klm->bcount = cpu_to_be32(data_len); + data_klm->key = cpu_to_be32(data_key); + data_klm->va = cpu_to_be64(data_va); + wqe_size = ALIGN(sizeof(*data_klm), 64); + } else { + /** + * Source domain contains signature information + * So need construct a strided block format: + * --------------------------- + * | stride_block_ctrl | + * --------------------------- + * | data_klm | + * --------------------------- + * | prot_klm | + * --------------------------- + * | BSF | + * --------------------------- + **/ + struct mlx5_stride_block_ctrl_seg *sblock_ctrl; + struct mlx5_stride_block_entry *data_sentry; + struct mlx5_stride_block_entry *prot_sentry; + u32 prot_key = wr->wr.sig_handover.prot->lkey; + u64 prot_va = wr->wr.sig_handover.prot->addr; + u16 block_size = sig_attrs->mem.sig.dif.pi_interval; + int prot_size; + + sblock_ctrl = *seg; + data_sentry = (void *)sblock_ctrl + sizeof(*sblock_ctrl); + prot_sentry = (void *)data_sentry + sizeof(*data_sentry); + + prot_size = prot_field_size(sig_attrs->mem.sig_type); + if (!prot_size) { + pr_err("Bad block size given: %u\n", block_size); + return -EINVAL; + } + sblock_ctrl->bcount_per_cycle = cpu_to_be32(block_size + + prot_size); + sblock_ctrl->op = cpu_to_be32(MLX5_STRIDE_BLOCK_OP); + sblock_ctrl->repeat_count = cpu_to_be32(data_len / block_size); + sblock_ctrl->num_entries = cpu_to_be16(2); + + data_sentry->bcount = cpu_to_be16(block_size); + data_sentry->key = cpu_to_be32(data_key); + data_sentry->va = cpu_to_be64(data_va); + prot_sentry->bcount = cpu_to_be16(prot_size); + prot_sentry->key = cpu_to_be32(prot_key); + + if (prot_key == data_key && prot_va == data_va) { + /** + * The data and protection are interleaved + * in a single memory region + **/ + prot_sentry->va = cpu_to_be64(data_va + block_size); + prot_sentry->stride = cpu_to_be16(block_size + prot_size); + data_sentry->stride = prot_sentry->stride; + } else { + /* The data and protection are two different buffers */ + prot_sentry->va = cpu_to_be64(prot_va); + data_sentry->stride = cpu_to_be16(block_size); + prot_sentry->stride = cpu_to_be16(prot_size); + } + wqe_size = ALIGN(sizeof(*sblock_ctrl) + sizeof(*data_sentry) + + sizeof(*prot_sentry), 64); + } + + *seg += wqe_size; + *size += wqe_size / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + bsf = *seg; + ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); + if (ret) + return -EINVAL; + + *seg += sizeof(*bsf); + *size += sizeof(*bsf) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + return 0; +} + +static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, + struct ib_send_wr *wr, u32 nelements, + u32 length, u32 pdn) +{ + struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr; + u32 sig_key = sig_mr->rkey; + u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1; + + memset(seg, 0, sizeof(*seg)); + + seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) | + MLX5_ACCESS_MODE_KLM; + seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); + seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | + MLX5_MKEY_BSF_EN | pdn); + seg->len = cpu_to_be64(length); + seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements))); + seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); +} + +static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, + struct ib_send_wr *wr, u32 nelements) +{ + memset(umr, 0, sizeof(*umr)); + + umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE; + umr->klm_octowords = get_klm_octo(nelements); + umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE); + umr->mkey_mask = sig_mkey_mask(); +} + + +static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size) +{ + struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr); + u32 pdn = get_pd(qp)->pdn; + u32 klm_oct_size; + int region_len, ret; + + if (unlikely(wr->num_sge != 1) || + unlikely(wr->wr.sig_handover.access_flags & + IB_ACCESS_REMOTE_ATOMIC) || + unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) || + unlikely(!sig_mr->sig->sig_status_checked)) + return -EINVAL; + + /* length of the protected region, data + protection */ + region_len = wr->sg_list->length; + if (wr->wr.sig_handover.prot) + region_len += wr->wr.sig_handover.prot->length; + + /** + * KLM octoword size - if protection was provided + * then we use strided block format (3 octowords), + * else we use single KLM (1 octoword) + **/ + klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1; + + set_sig_umr_segment(*seg, wr, klm_oct_size); + *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); + *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn); + *seg += sizeof(struct mlx5_mkey_seg); + *size += sizeof(struct mlx5_mkey_seg) / 16; + if (unlikely((*seg == qp->sq.qend))) + *seg = mlx5_get_send_wqe(qp, 0); + + ret = set_sig_data_segment(wr, qp, seg, size); + if (ret) + return ret; + + sig_mr->sig->sig_status_checked = false; + return 0; +} + +static int set_psv_wr(struct ib_sig_domain *domain, + u32 psv_idx, void **seg, int *size) +{ + struct mlx5_seg_set_psv *psv_seg = *seg; + + memset(psv_seg, 0, sizeof(*psv_seg)); + psv_seg->psv_num = cpu_to_be32(psv_idx); + switch (domain->sig_type) { + case IB_SIG_TYPE_T10_DIF: + psv_seg->transient_sig = cpu_to_be32(domain->sig.dif.bg << 16 | + domain->sig.dif.app_tag); + psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag); + + *seg += sizeof(*psv_seg); + *size += sizeof(*psv_seg) / 16; + break; + + default: + pr_err("Bad signature type given.\n"); + return 1; + } + + return 0; +} + static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size, struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp) { @@ -2041,6 +2405,59 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) } } +static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, + struct mlx5_wqe_ctrl_seg **ctrl, + struct ib_send_wr *wr, int *idx, + int *size, int nreq) +{ + int err = 0; + + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + err = -ENOMEM; + return err; + } + + *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); + *seg = mlx5_get_send_wqe(qp, *idx); + *ctrl = *seg; + *(uint32_t *)(*seg + 8) = 0; + (*ctrl)->imm = send_ieth(wr); + (*ctrl)->fm_ce_se = qp->sq_signal_bits | + (wr->send_flags & IB_SEND_SIGNALED ? + MLX5_WQE_CTRL_CQ_UPDATE : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + MLX5_WQE_CTRL_SOLICITED : 0); + + *seg += sizeof(**ctrl); + *size = sizeof(**ctrl) / 16; + + return err; +} + +static void finish_wqe(struct mlx5_ib_qp *qp, + struct mlx5_wqe_ctrl_seg *ctrl, + u8 size, unsigned idx, u64 wr_id, + int nreq, u8 fence, u8 next_fence, + u32 mlx5_opcode) +{ + u8 opmod = 0; + + ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | + mlx5_opcode | ((u32)opmod << 24)); + ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); + ctrl->fm_ce_se |= fence; + qp->fm_cache = next_fence; + if (unlikely(qp->wq_sig)) + ctrl->signature = wq_sig(ctrl); + + qp->sq.wrid[idx] = wr_id; + qp->sq.w_list[idx].opcode = mlx5_opcode; + qp->sq.wqe_head[idx] = qp->sq.head + nreq; + qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); + qp->sq.w_list[idx].next = qp->sq.cur_post; +} + + int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -2048,13 +2465,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct mlx5_ib_dev *dev = to_mdev(ibqp->device); struct mlx5_core_dev *mdev = &dev->mdev; struct mlx5_ib_qp *qp = to_mqp(ibqp); + struct mlx5_ib_mr *mr; struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf = qp->bf; int uninitialized_var(size); void *qend = qp->sq.qend; unsigned long flags; - u32 mlx5_opcode; unsigned idx; int err = 0; int inl = 0; @@ -2063,7 +2480,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, int nreq; int i; u8 next_fence = 0; - u8 opmod = 0; u8 fence; spin_lock_irqsave(&qp->sq.lock, flags); @@ -2076,36 +2492,23 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { + fence = qp->fm_cache; + num_sge = wr->num_sge; + if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - fence = qp->fm_cache; - num_sge = wr->num_sge; - if (unlikely(num_sge > qp->sq.max_gs)) { + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; *bad_wr = wr; goto out; } - idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - seg = mlx5_get_send_wqe(qp, idx); - ctrl = seg; - *(uint32_t *)(seg + 8) = 0; - ctrl->imm = send_ieth(wr); - ctrl->fm_ce_se = qp->sq_signal_bits | - (wr->send_flags & IB_SEND_SIGNALED ? - MLX5_WQE_CTRL_CQ_UPDATE : 0) | - (wr->send_flags & IB_SEND_SOLICITED ? - MLX5_WQE_CTRL_SOLICITED : 0); - - seg += sizeof(*ctrl); - size = sizeof(*ctrl) / 16; - switch (ibqp->qp_type) { case IB_QPT_XRC_INI: xrc = seg; @@ -2158,6 +2561,73 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = 0; break; + case IB_WR_REG_SIG_MR: + qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR; + mr = to_mmr(wr->wr.sig_handover.sig_mr); + + ctrl->imm = cpu_to_be32(mr->ibmr.rkey); + err = set_sig_umr_wr(wr, qp, &seg, &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_UMR); + /* + * SET_PSV WQEs are not signaled and solicited + * on error + */ + wr->send_flags &= ~IB_SEND_SIGNALED; + wr->send_flags |= IB_SEND_SOLICITED; + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem, + mr->sig->psv_memory.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + err = begin_wqe(qp, &seg, &ctrl, wr, + &idx, &size, nreq); + if (err) { + mlx5_ib_warn(dev, "\n"); + err = -ENOMEM; + *bad_wr = wr; + goto out; + } + + next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; + err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire, + mr->sig->psv_wire.psv_idx, &seg, + &size); + if (err) { + mlx5_ib_warn(dev, "\n"); + *bad_wr = wr; + goto out; + } + + finish_wqe(qp, ctrl, size, idx, wr->wr_id, + nreq, get_fence(fence, wr), + next_fence, MLX5_OPCODE_SET_PSV); + num_sge = 0; + goto skip_psv; + default: break; } @@ -2238,22 +2708,10 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } } - mlx5_opcode = mlx5_ib_opcode[wr->opcode]; - ctrl->opmod_idx_opcode = cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | - mlx5_opcode | - ((u32)opmod << 24)); - ctrl->qpn_ds = cpu_to_be32(size | (qp->mqp.qpn << 8)); - ctrl->fm_ce_se |= get_fence(fence, wr); - qp->fm_cache = next_fence; - if (unlikely(qp->wq_sig)) - ctrl->signature = wq_sig(ctrl); - - qp->sq.wrid[idx] = wr->wr_id; - qp->sq.w_list[idx].opcode = mlx5_opcode; - qp->sq.wqe_head[idx] = qp->sq.head + nreq; - qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - + finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, + get_fence(fence, wr), next_fence, + mlx5_ib_opcode[wr->opcode]); +skip_psv: if (0) dump_wqe(qp, idx, size); } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 5b71d43bd89c..415f8e1a54db 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -695,6 +695,7 @@ static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { mthca_free_cq(to_mdev(ibdev), cq); + err = -EFAULT; goto err_free; } @@ -976,12 +977,12 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct mthca_mr *mr; struct mthca_reg_mr ucmd; u64 *pages; int shift, n, len; - int i, j, k; + int i, k, entry; int err = 0; int write_mtt_size; @@ -1009,10 +1010,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = ffs(mr->umem->page_size) - 1; - - n = 0; - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - n += chunk->nents; + n = mr->umem->nmap; mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -1030,25 +1028,24 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - list_for_each_entry(chunk, &mr->umem->chunk_list, list) - for (j = 0; j < chunk->nmap; ++j) { - len = sg_dma_len(&chunk->page_list[j]) >> shift; - for (k = 0; k < len; ++k) { - pages[i++] = sg_dma_address(&chunk->page_list[j]) + - mr->umem->page_size * k; - /* - * Be friendly to write_mtt and pass it chunks - * of appropriate size. - */ - if (i == write_mtt_size) { - err = mthca_write_mtt(dev, mr->mtt, n, pages, i); - if (err) - goto mtt_done; - n += i; - i = 0; - } + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + len = sg_dma_len(sg) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + + mr->umem->page_size * k; + /* + * Be friendly to write_mtt and pass it chunks + * of appropriate size. + */ + if (i == write_mtt_size) { + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; } } + } if (i) err = mthca_write_mtt(dev, mr->mtt, n, pages, i); diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 9c9f2f57e960..dfa9df484505 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -128,6 +128,7 @@ static void build_mpa_v1(struct nes_cm_node *, void *, u8); static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **); static void print_core(struct nes_cm_core *core); +static void record_ird_ord(struct nes_cm_node *, u16, u16); /* External CM API Interface */ /* instance of function pointers for client API */ @@ -317,7 +318,6 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, } } - if (priv_data_len + mpa_hdr_len != len) { nes_debug(NES_DBG_CM, "The received ietf buffer was not right" " complete (%x + %x != %x)\n", @@ -356,25 +356,57 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, /* send reset */ return -EINVAL; } + if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) + cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD; - if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + if (cm_node->mpav2_ird_ord != IETF_NO_IRD_ORD) { /* responder */ - if (cm_node->ord_size > ird_size) - cm_node->ord_size = ird_size; - } else { - /* initiator */ - if (cm_node->ord_size > ird_size) - cm_node->ord_size = ird_size; - - if (cm_node->ird_size < ord_size) { - /* no resources available */ - /* send terminate message */ - return -EINVAL; + if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { + /* we are still negotiating */ + if (ord_size > NES_MAX_IRD) { + cm_node->ird_size = NES_MAX_IRD; + } else { + cm_node->ird_size = ord_size; + if (ord_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + cm_node->ird_size = 1; + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ord=%u)\n", + __func__, ord_size); + } + } + if (ird_size > NES_MAX_ORD) + cm_node->ord_size = NES_MAX_ORD; + else + cm_node->ord_size = ird_size; + } else { /* initiator */ + if (ord_size > NES_MAX_IRD) { + nes_debug(NES_DBG_CM, + "%s: Unable to support the requested (ord =%u)\n", + __func__, ord_size); + return -EINVAL; + } + cm_node->ird_size = ord_size; + + if (ird_size > NES_MAX_ORD) { + cm_node->ord_size = NES_MAX_ORD; + } else { + if (ird_size == 0 && + (rtr_ctrl_ord & IETF_RDMA0_READ)) { + nes_debug(NES_DBG_CM, + "%s: Remote peer doesn't support RDMA0_READ (ird=%u)\n", + __func__, ird_size); + return -EINVAL; + } else { + cm_node->ord_size = ird_size; + } + } } } if (rtr_ctrl_ord & IETF_RDMA0_READ) { cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; + } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) { cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; } else { /* Not supported RDMA0 operation */ @@ -514,6 +546,19 @@ static void print_core(struct nes_cm_core *core) nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); } +static void record_ird_ord(struct nes_cm_node *cm_node, + u16 conn_ird, u16 conn_ord) +{ + if (conn_ird > NES_MAX_IRD) + conn_ird = NES_MAX_IRD; + + if (conn_ord > NES_MAX_ORD) + conn_ord = NES_MAX_ORD; + + cm_node->ird_size = conn_ird; + cm_node->ord_size = conn_ord; +} + /** * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame */ @@ -557,11 +602,13 @@ static void build_mpa_v2(struct nes_cm_node *cm_node, mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); /* initialize RTR msg */ - ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? - IETF_NO_IRD_ORD : cm_node->ird_size; - ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? - IETF_NO_IRD_ORD : cm_node->ord_size; - + if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + ctrl_ird = IETF_NO_IRD_ORD; + ctrl_ord = IETF_NO_IRD_ORD; + } else { + ctrl_ird = cm_node->ird_size & IETF_NO_IRD_ORD; + ctrl_ord = cm_node->ord_size & IETF_NO_IRD_ORD; + } ctrl_ird |= IETF_PEER_TO_PEER; ctrl_ird |= IETF_FLPDU_ZERO_LEN; @@ -610,7 +657,7 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a struct nes_qp *nesqp = *nesqp_addr; struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0]; - u64temp = (unsigned long)nesqp; + u64temp = (unsigned long)nesqp->nesuqp_addr; u64temp |= NES_SW_CONTEXT_ALIGN >> 1; set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp); @@ -1409,8 +1456,9 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, cm_node->mpa_frame_rev = mpa_version; cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; - cm_node->ird_size = IETF_NO_IRD_ORD; - cm_node->ord_size = IETF_NO_IRD_ORD; + cm_node->mpav2_ird_ord = 0; + cm_node->ird_size = 0; + cm_node->ord_size = 0; nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", &cm_node->loc_addr, cm_node->loc_port, @@ -3027,11 +3075,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) rem_ref_cm_node(cm_node->cm_core, cm_node); return -ECONNRESET; } - /* associate the node with the QP */ nesqp->cm_node = (void *)cm_node; cm_node->nesqp = nesqp; + nes_debug(NES_DBG_CM, "QP%u, cm_node=%p, jiffies = %lu listener = %p\n", nesqp->hwqp.qp_id, cm_node, jiffies, cm_node->listener); atomic_inc(&cm_accepts); @@ -3054,6 +3102,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (cm_node->mpa_frame_rev == IETF_MPA_V1) mpa_frame_offset = 4; + if (cm_node->mpa_frame_rev == IETF_MPA_V1 || + cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) { + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + } + memcpy(mpa_v2_frame->priv_data, conn_param->private_data, conn_param->private_data_len); @@ -3117,7 +3170,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } nesqp->skip_lsmm = 1; - /* Cache the cm_id in the qp */ nesqp->cm_id = cm_id; cm_node->cm_id = cm_id; @@ -3154,7 +3206,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32( ((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT)); nesqp->nesqp_context->ird_ord_sizes |= - cpu_to_le32((u32)conn_param->ord); + cpu_to_le32((u32)cm_node->ord_size); memset(&nes_quad, 0, sizeof(nes_quad)); nes_quad.DstIpAdrIndex = @@ -3194,6 +3246,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_event.remote_addr = cm_id->remote_addr; cm_event.private_data = NULL; cm_event.private_data_len = 0; + cm_event.ird = cm_node->ird_size; + cm_event.ord = cm_node->ord_size; + ret = cm_id->event_handler(cm_id, &cm_event); attr.qp_state = IB_QPS_RTS; nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); @@ -3290,14 +3345,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) /* cache the cm_id in the qp */ nesqp->cm_id = cm_id; - cm_id->provider_data = nesqp; - nesqp->private_data_len = conn_param->private_data_len; - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); - /* space for rdma0 read msg */ - if (conn_param->ord == 0) - nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1); nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); nes_debug(NES_DBG_CM, "mpa private data len =%u\n", @@ -3334,6 +3383,11 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return -ENOMEM; } + record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord); + if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO && + cm_node->ord_size == 0) + cm_node->ord_size = 1; + cm_node->apbvt_set = apbvt_set; nesqp->cm_node = cm_node; cm_node->nesqp = nesqp; @@ -3530,6 +3584,8 @@ static void cm_event_connected(struct nes_cm_event *event) nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)1 << NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); + nesqp->nesqp_context->ird_ord_sizes |= + cpu_to_le32((u32)cm_node->ord_size); /* Adjust tail for not having a LSMM */ /*nesqp->hwqp.sq_tail = 1;*/ @@ -3742,8 +3798,13 @@ static void cm_event_mpa_req(struct nes_cm_event *event) cm_event_raddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr); cm_event.private_data = cm_node->mpa_frame_buf; cm_event.private_data_len = (u8)cm_node->mpa_frame_size; + if (cm_node->mpa_frame_rev == IETF_MPA_V1) { + cm_event.ird = NES_MAX_IRD; + cm_event.ord = NES_MAX_ORD; + } else { cm_event.ird = cm_node->ird_size; cm_event.ord = cm_node->ord_size; + } ret = cm_id->event_handler(cm_id, &cm_event); if (ret) diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index 4646e6666087..522c99cd07c4 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -58,6 +58,8 @@ #define IETF_RDMA0_WRITE 0x8000 #define IETF_RDMA0_READ 0x4000 #define IETF_NO_IRD_ORD 0x3FFF +#define NES_MAX_IRD 0x40 +#define NES_MAX_ORD 0x7F enum ietf_mpa_flags { IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ @@ -333,6 +335,7 @@ struct nes_cm_node { enum mpa_frame_version mpa_frame_rev; u16 ird_size; u16 ord_size; + u16 mpav2_ird_ord; u16 mpa_frame_size; struct iw_cm_id *cm_id; diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index 4926de744488..529c421bb15c 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -39,8 +39,8 @@ #include <linux/types.h> -#define NES_ABI_USERSPACE_VER 1 -#define NES_ABI_KERNEL_VER 1 +#define NES_ABI_USERSPACE_VER 2 +#define NES_ABI_KERNEL_VER 2 /* * Make sure that all structs defined in this file remain laid out so @@ -78,6 +78,7 @@ struct nes_create_cq_req { struct nes_create_qp_req { __u64 user_wqe_buffers; + __u64 user_qp_buffer; }; enum iwnes_memreg_type { diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 8308e3634767..218dd3574285 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1186,11 +1186,13 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num); kfree(nesqp->allocated_buffer); nes_debug(NES_DBG_QP, "ib_copy_from_udata() Failed \n"); - return NULL; + return ERR_PTR(-EFAULT); } if (req.user_wqe_buffers) { virt_wqs = 1; } + if (req.user_qp_buffer) + nesqp->nesuqp_addr = req.user_qp_buffer; if ((ibpd->uobject) && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); @@ -2307,7 +2309,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_device *nesdev = nesvnic->nesdev; struct nes_adapter *nesadapter = nesdev->nesadapter; struct ib_mr *ibmr = ERR_PTR(-EINVAL); - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct nes_ucontext *nes_ucontext; struct nes_pbl *nespbl; struct nes_mr *nesmr; @@ -2315,7 +2317,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct nes_mem_reg_req req; struct nes_vpbl vpbl; struct nes_root_vpbl root_vpbl; - int nmap_index, page_index; + int entry, page_index; int page_count = 0; int err, pbl_depth = 0; int chunk_pages; @@ -2330,6 +2332,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u16 pbl_count; u8 single_page = 1; u8 stag_key; + int first_page = 1; region = ib_umem_get(pd->uobject->context, start, length, acc, 0); if (IS_ERR(region)) { @@ -2380,128 +2383,125 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } nesmr->region = region; - list_for_each_entry(chunk, ®ion->chunk_list, list) { - nes_debug(NES_DBG_MR, "Chunk: nents = %u, nmap = %u .\n", - chunk->nents, chunk->nmap); - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - if (sg_dma_address(&chunk->page_list[nmap_index]) & ~PAGE_MASK) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", - (unsigned int) sg_dma_address(&chunk->page_list[nmap_index])); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + if (sg_dma_address(sg) & ~PAGE_MASK) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + nes_debug(NES_DBG_MR, "Unaligned Memory Buffer: 0x%x\n", + (unsigned int) sg_dma_address(sg)); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - if (!sg_dma_len(&chunk->page_list[nmap_index])) { - ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); - ibmr = ERR_PTR(-EINVAL); - kfree(nesmr); - goto reg_user_mr_err; - } + if (!sg_dma_len(sg)) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); + nes_debug(NES_DBG_MR, "Invalid Buffer Size\n"); + ibmr = ERR_PTR(-EINVAL); + kfree(nesmr); + goto reg_user_mr_err; + } - region_length += sg_dma_len(&chunk->page_list[nmap_index]); - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - region_length -= skip_pages << 12; - for (page_index=skip_pages; page_index < chunk_pages; page_index++) { - skip_pages = 0; - if ((page_count!=0)&&(page_count<<12)-(region->offset&(4096-1))>=region->length) - goto enough_pages; - if ((page_count&0x01FF) == 0) { - if (page_count >= 1024 * 512) { + region_length += sg_dma_len(sg); + chunk_pages = sg_dma_len(sg) >> 12; + region_length -= skip_pages << 12; + for (page_index = skip_pages; page_index < chunk_pages; page_index++) { + skip_pages = 0; + if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length) + goto enough_pages; + if ((page_count&0x01FF) == 0) { + if (page_count >= 1024 * 512) { + ib_umem_release(region); + nes_free_resource(nesadapter, + nesadapter->allocated_mrs, stag_index); + kfree(nesmr); + ibmr = ERR_PTR(-E2BIG); + goto reg_user_mr_err; + } + if (root_pbl_index == 1) { + root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, + 8192, &root_vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", + root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); + if (!root_vpbl.pbl_vbase) { ib_umem_release(region); - nes_free_resource(nesadapter, - nesadapter->allocated_mrs, stag_index); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); - ibmr = ERR_PTR(-E2BIG); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (root_pbl_index == 1) { - root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, - 8192, &root_vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating root PBL, va = %p, pa = 0x%08X\n", - root_vpbl.pbl_vbase, (unsigned int)root_vpbl.pbl_pbase); - if (!root_vpbl.pbl_vbase) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, - GFP_KERNEL); - if (!root_vpbl.leaf_vpbl) { - ib_umem_release(region); - pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, - root_vpbl.pbl_pbase); - pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, - vpbl.pbl_pbase); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, - stag_index); - kfree(nesmr); - ibmr = ERR_PTR(-ENOMEM); - goto reg_user_mr_err; - } - root_vpbl.pbl_vbase[0].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[0].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); - root_vpbl.leaf_vpbl[0] = vpbl; - } - vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, - &vpbl.pbl_pbase); - nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", - vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); - if (!vpbl.pbl_vbase) { + root_vpbl.leaf_vpbl = kzalloc(sizeof(*root_vpbl.leaf_vpbl)*1024, + GFP_KERNEL); + if (!root_vpbl.leaf_vpbl) { ib_umem_release(region); - nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); - ibmr = ERR_PTR(-ENOMEM); + pci_free_consistent(nesdev->pcidev, 8192, root_vpbl.pbl_vbase, + root_vpbl.pbl_pbase); + pci_free_consistent(nesdev->pcidev, 4096, vpbl.pbl_vbase, + vpbl.pbl_pbase); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, + stag_index); kfree(nesmr); + ibmr = ERR_PTR(-ENOMEM); goto reg_user_mr_err; } - if (1 <= root_pbl_index) { - root_vpbl.pbl_vbase[root_pbl_index].pa_low = - cpu_to_le32((u32)vpbl.pbl_pbase); - root_vpbl.pbl_vbase[root_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); - root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; - } - root_pbl_index++; - cur_pbl_index = 0; + root_vpbl.pbl_vbase[0].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[0].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase) >> 32))); + root_vpbl.leaf_vpbl[0] = vpbl; } - if (single_page) { - if (page_count != 0) { - if ((last_dma_addr+4096) != - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) - single_page = 0; - last_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - } else { - first_dma_addr = sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096); - last_dma_addr = first_dma_addr; - } + vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096, + &vpbl.pbl_pbase); + nes_debug(NES_DBG_MR, "Allocating leaf PBL, va = %p, pa = 0x%08X\n", + vpbl.pbl_vbase, (unsigned int)vpbl.pbl_pbase); + if (!vpbl.pbl_vbase) { + ib_umem_release(region); + nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index); + ibmr = ERR_PTR(-ENOMEM); + kfree(nesmr); + goto reg_user_mr_err; + } + if (1 <= root_pbl_index) { + root_vpbl.pbl_vbase[root_pbl_index].pa_low = + cpu_to_le32((u32)vpbl.pbl_pbase); + root_vpbl.pbl_vbase[root_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32))); + root_vpbl.leaf_vpbl[root_pbl_index] = vpbl; + } + root_pbl_index++; + cur_pbl_index = 0; + } + if (single_page) { + if (page_count != 0) { + if ((last_dma_addr+4096) != + (sg_dma_address(sg)+ + (page_index*4096))) + single_page = 0; + last_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + } else { + first_dma_addr = sg_dma_address(sg)+ + (page_index*4096); + last_dma_addr = first_dma_addr; } - - vpbl.pbl_vbase[cur_pbl_index].pa_low = - cpu_to_le32((u32)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))); - vpbl.pbl_vbase[cur_pbl_index].pa_high = - cpu_to_le32((u32)((((u64)(sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))) >> 32))); - cur_pbl_index++; - page_count++; } + + vpbl.pbl_vbase[cur_pbl_index].pa_low = + cpu_to_le32((u32)(sg_dma_address(sg)+ + (page_index*4096))); + vpbl.pbl_vbase[cur_pbl_index].pa_high = + cpu_to_le32((u32)((((u64)(sg_dma_address(sg)+ + (page_index*4096))) >> 32))); + cur_pbl_index++; + page_count++; } } + enough_pages: nes_debug(NES_DBG_MR, "calculating stag, stag_index=0x%08x, driver_key=0x%08x," " stag_key=0x%08x\n", @@ -2613,25 +2613,28 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, nespbl->pbl_size, (unsigned long) nespbl->pbl_pbase, (void *) nespbl->pbl_vbase, nespbl->user_base); - list_for_each_entry(chunk, ®ion->chunk_list, list) { - for (nmap_index = 0; nmap_index < chunk->nmap; ++nmap_index) { - chunk_pages = sg_dma_len(&chunk->page_list[nmap_index]) >> 12; - chunk_pages += (sg_dma_len(&chunk->page_list[nmap_index]) & (4096-1)) ? 1 : 0; - nespbl->page = sg_page(&chunk->page_list[0]); - for (page_index=0; page_index<chunk_pages; page_index++) { - ((__le32 *)pbl)[0] = cpu_to_le32((u32) - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096))); - ((__le32 *)pbl)[1] = cpu_to_le32(((u64) - (sg_dma_address(&chunk->page_list[nmap_index])+ - (page_index*4096)))>>32); - nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, - (unsigned long long)*pbl, - le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); - pbl++; - } + for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) { + chunk_pages = sg_dma_len(sg) >> 12; + chunk_pages += (sg_dma_len(sg) & (4096-1)) ? 1 : 0; + if (first_page) { + nespbl->page = sg_page(sg); + first_page = 0; + } + + for (page_index = 0; page_index < chunk_pages; page_index++) { + ((__le32 *)pbl)[0] = cpu_to_le32((u32) + (sg_dma_address(sg)+ + (page_index*4096))); + ((__le32 *)pbl)[1] = cpu_to_le32(((u64) + (sg_dma_address(sg)+ + (page_index*4096)))>>32); + nes_debug(NES_DBG_MR, "pbl=%p, *pbl=0x%016llx, 0x%08x%08x\n", pbl, + (unsigned long long)*pbl, + le32_to_cpu(((__le32 *)pbl)[1]), le32_to_cpu(((__le32 *)pbl)[0])); + pbl++; } } + if (req.reg_type == IWNES_MEMREG_TYPE_QP) { list_add_tail(&nespbl->list, &nes_ucontext->qp_reg_mem_list); } else { @@ -3134,9 +3137,7 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, " original_last_aeq = 0x%04X. last_aeq = 0x%04X.\n", nesqp->hwqp.qp_id, atomic_read(&nesqp->refcount), original_last_aeq, nesqp->last_aeq); - if ((!ret) || - ((original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) && - (ret))) { + if (!ret || original_last_aeq != NES_AEQE_AEID_RDMAP_ROE_BAD_LLP_CLOSE) { if (dont_wait) { if (nesqp->cm_id && nesqp->hw_tcp_state != 0) { nes_debug(NES_DBG_MOD_QP, "QP%u Queuing fake disconnect for QP refcount (%d)," diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index 0eff7c44d76b..309b31c31ae1 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -184,5 +184,6 @@ struct nes_qp { u8 pau_busy; u8 pau_pending; u8 pau_state; + __u64 nesuqp_addr; }; #endif /* NES_VERBS_H */ diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile index 06a5bed12e43..d1bfd4f4cdde 100644 --- a/drivers/infiniband/hw/ocrdma/Makefile +++ b/drivers/infiniband/hw/ocrdma/Makefile @@ -2,4 +2,4 @@ ccflags-y := -Idrivers/net/ethernet/emulex/benet obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o -ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o +ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o ocrdma_stats.o diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 7c001b97b23f..19011dbb930f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -35,17 +35,27 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_addr.h> #include <be_roce.h> #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DEV_VERSION "1.0.0" +#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" + +#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" +#define OC_NAME_SH OCRDMA_NODE_DESC "(Skyhawk)" +#define OC_NAME_UNKNOWN OCRDMA_NODE_DESC "(Unknown)" + +#define OC_SKH_DEVICE_PF 0x720 +#define OC_SKH_DEVICE_VF 0x728 #define OCRDMA_MAX_AH 512 #define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) +#define convert_to_64bit(lo, hi) ((u64)hi << 32 | (u64)lo) + struct ocrdma_dev_attr { u8 fw_ver[32]; u32 vendor_id; @@ -65,6 +75,7 @@ struct ocrdma_dev_attr { int max_mr; u64 max_mr_size; u32 max_num_mr_pbl; + int max_mw; int max_fmr; int max_map_per_fmr; int max_pages_per_frmr; @@ -83,6 +94,12 @@ struct ocrdma_dev_attr { u8 num_ird_pages; }; +struct ocrdma_dma_mem { + void *va; + dma_addr_t pa; + u32 size; +}; + struct ocrdma_pbl { void *va; dma_addr_t pa; @@ -148,6 +165,26 @@ struct ocrdma_mr { struct ocrdma_hw_mr hwmr; }; +struct ocrdma_stats { + u8 type; + struct ocrdma_dev *dev; +}; + +struct stats_mem { + struct ocrdma_mqe mqe; + void *va; + dma_addr_t pa; + u32 size; + char *debugfs_mem; +}; + +struct phy_info { + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u16 phy_type; + u16 interface_type; +}; + struct ocrdma_dev { struct ib_device ibdev; struct ocrdma_dev_attr attr; @@ -191,12 +228,30 @@ struct ocrdma_dev { struct mqe_ctx mqe_ctx; struct be_dev_info nic_info; + struct phy_info phy; + char model_number[32]; + u32 hba_port_num; struct list_head entry; struct rcu_head rcu; int id; - struct ocrdma_mr *stag_arr[OCRDMA_MAX_STAG]; + u64 stag_arr[OCRDMA_MAX_STAG]; u16 pvid; + u32 asic_id; + + ulong last_stats_time; + struct mutex stats_lock; /* provide synch for debugfs operations */ + struct stats_mem stats_mem; + struct ocrdma_stats rsrc_stats; + struct ocrdma_stats rx_stats; + struct ocrdma_stats wqe_stats; + struct ocrdma_stats tx_stats; + struct ocrdma_stats db_err_stats; + struct ocrdma_stats tx_qp_err_stats; + struct ocrdma_stats rx_qp_err_stats; + struct ocrdma_stats tx_dbg_stats; + struct ocrdma_stats rx_dbg_stats; + struct dentry *dir; }; struct ocrdma_cq { @@ -209,8 +264,8 @@ struct ocrdma_cq { */ u32 max_hw_cqe; bool phase_change; - bool armed, solicited; - bool arm_needed; + bool deferred_arm, deferred_sol; + bool first_arm; spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization * to cq polling @@ -223,6 +278,7 @@ struct ocrdma_cq { struct ocrdma_ucontext *ucontext; dma_addr_t pa; u32 len; + u32 cqe_cnt; /* head of all qp's sq and rq for which cqes need to be flushed * by the software. @@ -232,7 +288,6 @@ struct ocrdma_cq { struct ocrdma_pd { struct ib_pd ibpd; - struct ocrdma_dev *dev; struct ocrdma_ucontext *uctx; u32 id; int num_dpp_qp; @@ -317,10 +372,8 @@ struct ocrdma_qp { bool dpp_enabled; u8 *ird_q_va; bool signaled; - u16 db_cache; }; - struct ocrdma_ucontext { struct ib_ucontext ibucontext; @@ -385,13 +438,6 @@ static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct ocrdma_srq, ibsrq); } - -static inline int ocrdma_get_num_posted_shift(struct ocrdma_qp *qp) -{ - return ((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY && - qp->id < 128) ? 24 : 16); -} - static inline int is_cqe_valid(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe) { int cqe_valid; @@ -436,4 +482,40 @@ static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev, return 0; } +static inline char *hca_name(struct ocrdma_dev *dev) +{ + switch (dev->nic_info.pdev->device) { + case OC_SKH_DEVICE_PF: + case OC_SKH_DEVICE_VF: + return OC_NAME_SH; + default: + return OC_NAME_UNKNOWN; + } +} + +static inline int ocrdma_get_eq_table_index(struct ocrdma_dev *dev, + int eqid) +{ + int indx; + + for (indx = 0; indx < dev->eq_cnt; indx++) { + if (dev->eq_tbl[indx].q.id == eqid) + return indx; + } + + return -EINVAL; +} + +static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev) +{ + if (dev->nic_info.dev_family == 0xF && !dev->asic_id) { + pci_read_config_dword( + dev->nic_info.pdev, + OCRDMA_SLI_ASIC_ID_OFFSET, &dev->asic_id); + } + + return (dev->asic_id & OCRDMA_SLI_ASIC_GEN_NUM_MASK) >> + OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index fbac8eb44036..1554cca5712a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -28,7 +28,8 @@ #ifndef __OCRDMA_ABI_H__ #define __OCRDMA_ABI_H__ -#define OCRDMA_ABI_VERSION 1 +#define OCRDMA_ABI_VERSION 2 +#define OCRDMA_BE_ROCE_ABI_VERSION 1 /* user kernel communication data structures. */ struct ocrdma_alloc_ucontext_resp { @@ -107,9 +108,7 @@ struct ocrdma_create_qp_uresp { u32 db_sq_offset; u32 db_rq_offset; u32 db_shift; - u64 rsvd1; - u64 rsvd2; - u64 rsvd3; + u64 rsvd[11]; } __packed; struct ocrdma_create_srq_uresp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 34071143006e..d4cc01f10c01 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -100,7 +100,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); - ah = kzalloc(sizeof *ah, GFP_ATOMIC); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 1664d648cbfc..3bbf2010a821 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -32,7 +32,6 @@ #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> -#include <rdma/ib_addr.h> #include "ocrdma.h" #include "ocrdma_hw.h" @@ -243,6 +242,23 @@ static int ocrdma_get_mbx_errno(u32 status) return err_num; } +char *port_speed_string(struct ocrdma_dev *dev) +{ + char *str = ""; + u16 speeds_supported; + + speeds_supported = dev->phy.fixed_speeds_supported | + dev->phy.auto_speeds_supported; + if (speeds_supported & OCRDMA_PHY_SPEED_40GBPS) + str = "40Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_10GBPS) + str = "10Gbps "; + else if (speeds_supported & OCRDMA_PHY_SPEED_1GBPS) + str = "1Gbps "; + + return str; +} + static int ocrdma_get_mbx_cqe_errno(u16 cqe_status) { int err_num = -EINVAL; @@ -332,6 +348,11 @@ static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len) return mqe; } +static void *ocrdma_alloc_mqe(void) +{ + return kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL); +} + static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q) { dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma); @@ -364,8 +385,8 @@ static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt, } } -static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q, - int queue_type) +static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, + struct ocrdma_queue_info *q, int queue_type) { u8 opcode = 0; int status; @@ -444,7 +465,7 @@ mbx_err: return status; } -static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq) { int irq; @@ -574,6 +595,7 @@ static int ocrdma_create_mq(struct ocrdma_dev *dev) if (status) goto alloc_err; + dev->eq_tbl[0].cq_cnt++; status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->eq_tbl[0].q); if (status) goto mbx_cq_free; @@ -639,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, { struct ocrdma_qp *qp = NULL; struct ocrdma_cq *cq = NULL; - struct ib_event ib_evt; + struct ib_event ib_evt = { 0 }; int cq_event = 0; int qp_event = 1; int srq_event = 0; @@ -664,6 +686,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, case OCRDMA_CQ_OVERRUN_ERROR: ib_evt.element.cq = &cq->ibcq; ib_evt.event = IB_EVENT_CQ_ERR; + cq_event = 1; + qp_event = 0; break; case OCRDMA_CQ_QPCAT_ERROR: ib_evt.element.qp = &qp->ibqp; @@ -725,6 +749,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, qp->srq->ibsrq. srq_context); } else if (dev_event) { + pr_err("%s: Fatal event received\n", dev->ibdev.name); ib_dispatch_event(&ib_evt); } @@ -752,7 +777,6 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, } } - static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe) { /* async CQE processing */ @@ -799,8 +823,6 @@ static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id) ocrdma_process_acqe(dev, cqe); else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK) ocrdma_process_mcqe(dev, cqe); - else - pr_err("%s() cqe->compl is not set.\n", __func__); memset(cqe, 0, sizeof(struct ocrdma_mcqe)); ocrdma_mcq_inc_tail(dev); } @@ -858,16 +880,8 @@ static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx) BUG(); cq = dev->cq_tbl[cq_idx]; - if (cq == NULL) { - pr_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx); + if (cq == NULL) return; - } - spin_lock_irqsave(&cq->cq_lock, flags); - cq->armed = false; - cq->solicited = false; - spin_unlock_irqrestore(&cq->cq_lock, flags); - - ocrdma_ring_cq_db(dev, cq->id, false, false, 0); if (cq->ibcq.comp_handler) { spin_lock_irqsave(&cq->comp_handler_lock, flags); @@ -892,27 +906,35 @@ static irqreturn_t ocrdma_irq_handler(int irq, void *handle) struct ocrdma_dev *dev = eq->dev; struct ocrdma_eqe eqe; struct ocrdma_eqe *ptr; - u16 eqe_popped = 0; u16 cq_id; - while (1) { + int budget = eq->cq_cnt; + + do { ptr = ocrdma_get_eqe(eq); eqe = *ptr; ocrdma_le32_to_cpu(&eqe, sizeof(eqe)); if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0) break; - eqe_popped += 1; + ptr->id_valid = 0; + /* ring eq doorbell as soon as its consumed. */ + ocrdma_ring_eq_db(dev, eq->q.id, false, true, 1); /* check whether its CQE or not. */ if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) { cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT; ocrdma_cq_handler(dev, cq_id); } ocrdma_eq_inc_tail(eq); - } - ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped); - /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */ - if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) - ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); + + /* There can be a stale EQE after the last bound CQ is + * destroyed. EQE valid and budget == 0 implies this. + */ + if (budget) + budget--; + + } while (budget); + + ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0); return IRQ_HANDLED; } @@ -949,7 +971,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) { int status = 0; u16 cqe_status, ext_status; - struct ocrdma_mqe *rsp; + struct ocrdma_mqe *rsp_mqe; + struct ocrdma_mbx_rsp *rsp = NULL; mutex_lock(&dev->mqe_ctx.lock); ocrdma_post_mqe(dev, mqe); @@ -958,23 +981,61 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) goto mbx_err; cqe_status = dev->mqe_ctx.cqe_status; ext_status = dev->mqe_ctx.ext_status; - rsp = ocrdma_get_mqe_rsp(dev); - ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe))); + rsp_mqe = ocrdma_get_mqe_rsp(dev); + ocrdma_copy_le32_to_cpu(mqe, rsp_mqe, (sizeof(*mqe))); + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + rsp = &mqe->u.rsp; + if (cqe_status || ext_status) { - pr_err("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n", - __func__, - (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> - OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status); + pr_err("%s() cqe_status=0x%x, ext_status=0x%x,", + __func__, cqe_status, ext_status); + if (rsp) { + /* This is for embedded cmds. */ + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + } status = ocrdma_get_mbx_cqe_errno(cqe_status); goto mbx_err; } - if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK) + /* For non embedded, rsp errors are handled in ocrdma_nonemb_mbx_cmd */ + if (rsp && (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)) status = ocrdma_get_mbx_errno(mqe->u.rsp.status); mbx_err: mutex_unlock(&dev->mqe_ctx.lock); return status; } +static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe, + void *payload_va) +{ + int status = 0; + struct ocrdma_mbx_rsp *rsp = payload_va; + + if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >> + OCRDMA_MQE_HDR_EMB_SHIFT) + BUG(); + + status = ocrdma_mbx_cmd(dev, mqe); + if (!status) + /* For non embedded, only CQE failures are handled in + * ocrdma_mbx_cmd. We need to check for RSP errors. + */ + if (rsp->status & OCRDMA_MBX_RSP_STATUS_MASK) + status = ocrdma_get_mbx_errno(rsp->status); + + if (status) + pr_err("opcode=0x%x, subsystem=0x%x\n", + (rsp->subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >> + OCRDMA_MBX_RSP_OPCODE_SHIFT, + (rsp->subsys_op & OCRDMA_MBX_RSP_SUBSYS_MASK) >> + OCRDMA_MBX_RSP_SUBSYS_SHIFT); + return status; +} + static void ocrdma_get_attr(struct ocrdma_dev *dev, struct ocrdma_dev_attr *attr, struct ocrdma_mbx_query_config *rsp) @@ -985,6 +1046,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_qp = (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT; + attr->max_srq = + (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; attr->max_send_sge = ((rsp->max_write_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); @@ -1000,9 +1064,6 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; - attr->max_srq = - (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT; @@ -1015,6 +1076,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >> OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; + attr->max_mw = rsp->max_mw; attr->max_mr = rsp->max_mr; attr->max_mr_size = ~0ull; attr->max_fmr = 0; @@ -1036,7 +1098,7 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_inline_data = attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) + sizeof(struct ocrdma_sge)); - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { attr->ird = 1; attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE; attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES; @@ -1110,6 +1172,96 @@ mbx_err: return status; } +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *dev, bool reset) +{ + struct ocrdma_rdma_stats_req *req = dev->stats_mem.va; + struct ocrdma_mqe *mqe = &dev->stats_mem.mqe; + struct ocrdma_rdma_stats_resp *old_stats = NULL; + int status; + + old_stats = kzalloc(sizeof(*old_stats), GFP_KERNEL); + if (old_stats == NULL) + return -ENOMEM; + + memset(mqe, 0, sizeof(*mqe)); + mqe->hdr.pyld_len = dev->stats_mem.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dev->stats_mem.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dev->stats_mem.pa); + mqe->u.nonemb_req.sge[0].len = dev->stats_mem.size; + + /* Cache the old stats */ + memcpy(old_stats, req, sizeof(struct ocrdma_rdma_stats_resp)); + memset(req, 0, dev->stats_mem.size); + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)req, + OCRDMA_CMD_GET_RDMA_STATS, + OCRDMA_SUBSYS_ROCE, + dev->stats_mem.size); + if (reset) + req->reset_stats = reset; + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dev->stats_mem.va); + if (status) + /* Copy from cache, if mbox fails */ + memcpy(req, old_stats, sizeof(struct ocrdma_rdma_stats_resp)); + else + ocrdma_le32_to_cpu(req, dev->stats_mem.size); + + kfree(old_stats); + return status; +} + +static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_dma_mem dma; + struct ocrdma_mqe *mqe; + struct ocrdma_get_ctrl_attribs_rsp *ctrl_attr_rsp; + struct mgmt_hba_attribs *hba_attribs; + + mqe = ocrdma_alloc_mqe(); + if (!mqe) + return status; + memset(mqe, 0, sizeof(*mqe)); + + dma.size = sizeof(struct ocrdma_get_ctrl_attribs_rsp); + dma.va = dma_alloc_coherent(&dev->nic_info.pdev->dev, + dma.size, &dma.pa, GFP_KERNEL); + if (!dma.va) + goto free_mqe; + + mqe->hdr.pyld_len = dma.size; + mqe->hdr.spcl_sge_cnt_emb |= + (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe->u.nonemb_req.sge[0].pa_lo = (u32) (dma.pa & 0xffffffff); + mqe->u.nonemb_req.sge[0].pa_hi = (u32) upper_32_bits(dma.pa); + mqe->u.nonemb_req.sge[0].len = dma.size; + + memset(dma.va, 0, dma.size); + ocrdma_init_mch((struct ocrdma_mbx_hdr *)dma.va, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES, + OCRDMA_SUBSYS_COMMON, + dma.size); + + status = ocrdma_nonemb_mbx_cmd(dev, mqe, dma.va); + if (!status) { + ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; + hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; + + dev->hba_port_num = hba_attribs->phy_port; + strncpy(dev->model_number, + hba_attribs->controller_model_number, 31); + } + dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa); +free_mqe: + kfree(mqe); + return status; +} + static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev) { int status = -ENOMEM; @@ -1157,6 +1309,35 @@ mbx_err: return status; } +static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev) +{ + int status = -ENOMEM; + struct ocrdma_mqe *cmd; + struct ocrdma_get_phy_info_rsp *rsp; + + cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_PHY_DETAILS, sizeof(*cmd)); + if (!cmd) + return status; + + ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0], + OCRDMA_CMD_PHY_DETAILS, OCRDMA_SUBSYS_COMMON, + sizeof(*cmd)); + + status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_phy_info_rsp *)cmd; + dev->phy.phy_type = le16_to_cpu(rsp->phy_type); + dev->phy.auto_speeds_supported = + le16_to_cpu(rsp->auto_speeds_supported); + dev->phy.fixed_speeds_supported = + le16_to_cpu(rsp->fixed_speeds_supported); +mbx_err: + kfree(cmd); + return status; +} + int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd) { int status = -ENOMEM; @@ -1226,7 +1407,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size, static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) { - int i ; + int i; int status = 0; int max_ah; struct ocrdma_create_ah_tbl *cmd; @@ -1357,12 +1538,10 @@ static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id) int i; mutex_lock(&dev->dev_lock); - for (i = 0; i < dev->eq_cnt; i++) { - if (dev->eq_tbl[i].q.id != eq_id) - continue; - dev->eq_tbl[i].cq_cnt -= 1; - break; - } + i = ocrdma_get_eq_table_index(dev, eq_id); + if (i == -EINVAL) + BUG(); + dev->eq_tbl[i].cq_cnt -= 1; mutex_unlock(&dev->dev_lock); } @@ -1380,7 +1559,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, __func__, dev->id, dev->attr.max_cqe, entries); return -EINVAL; } - if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)) + if (dpp_cq && (ocrdma_get_asic_type(dev) != OCRDMA_ASIC_GEN_SKH_R)) return -EINVAL; if (dpp_cq) { @@ -1417,6 +1596,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cq->eqn = ocrdma_bind_eq(dev); cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER3; cqe_count = cq->len / cqe_size; + cq->cqe_cnt = cqe_count; if (cqe_count > 1024) { /* Set cnt to 3 to indicate more than 1024 cq entries */ cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT); @@ -1439,7 +1619,7 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, } /* shared eq between all the consumer cqs. */ cmd->cmd.eqn = cq->eqn; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { if (dpp_cq) cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << OCRDMA_CREATE_CQ_TYPE_SHIFT; @@ -1484,12 +1664,9 @@ int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq) (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) & OCRDMA_DESTROY_CQ_QID_MASK; - ocrdma_unbind_eq(dev, cq->eqn); status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); - if (status) - goto mbx_err; + ocrdma_unbind_eq(dev, cq->eqn); dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa); -mbx_err: kfree(cmd); return status; } @@ -2029,8 +2206,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; qp->rq_cq = cq; - if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp && - (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) { + if (pd->dpp_enabled && pd->num_dpp_qp) { ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, dpp_cq_id); } @@ -2099,7 +2275,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0], sizeof(cmd->params.dgid)); status = ocrdma_query_gid(&qp->dev->ibdev, 1, - ah_attr->grh.sgid_index, &sgid); + ah_attr->grh.sgid_index, &sgid); if (status) return status; @@ -2127,8 +2303,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, static int ocrdma_set_qp_params(struct ocrdma_qp *qp, struct ocrdma_modify_qp *cmd, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps) + struct ib_qp_attr *attrs, int attr_mask) { int status = 0; @@ -2233,8 +2408,7 @@ pmtu_err: } int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps) + struct ib_qp_attr *attrs, int attr_mask) { int status = -ENOMEM; struct ocrdma_modify_qp *cmd; @@ -2257,7 +2431,7 @@ int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp, OCRDMA_QP_PARAMS_STATE_MASK; } - status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps); + status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask); if (status) goto mbx_err; status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); @@ -2488,7 +2662,7 @@ static int ocrdma_create_eqs(struct ocrdma_dev *dev) for (i = 0; i < num_eq; i++) { status = ocrdma_create_eq(dev, &dev->eq_tbl[i], - OCRDMA_EQ_LEN); + OCRDMA_EQ_LEN); if (status) { status = -EINVAL; break; @@ -2533,6 +2707,13 @@ int ocrdma_init_hw(struct ocrdma_dev *dev) status = ocrdma_mbx_create_ah_tbl(dev); if (status) goto conf_err; + status = ocrdma_mbx_get_phy_info(dev); + if (status) + goto conf_err; + status = ocrdma_mbx_get_ctrl_attribs(dev); + if (status) + goto conf_err; + return 0; conf_err: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index 82fe332ae6c6..e513f7293142 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -112,8 +112,7 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs, u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset, u16 *dpp_credit_lmt); int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *, - struct ib_qp_attr *attrs, int attr_mask, - enum ib_qp_state old_qps); + struct ib_qp_attr *attrs, int attr_mask); int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *, struct ocrdma_qp_params *param); int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *); @@ -132,5 +131,8 @@ int ocrdma_qp_state_change(struct ocrdma_qp *, enum ib_qp_state new_state, bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *); void ocrdma_flush_qp(struct ocrdma_qp *); +int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); +int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); +char *port_speed_string(struct ocrdma_dev *dev); #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 1a8a945efa60..7c504e079744 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -39,10 +39,11 @@ #include "ocrdma_ah.h" #include "be_roce.h" #include "ocrdma_hw.h" +#include "ocrdma_stats.h" #include "ocrdma_abi.h" -MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION); -MODULE_DESCRIPTION("Emulex RoCE HCA Driver"); +MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); +MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); @@ -286,7 +287,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.process_mad = ocrdma_process_mad; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= OCRDMA_UVERBS(CREATE_SRQ) | OCRDMA_UVERBS(MODIFY_SRQ) | @@ -338,9 +339,42 @@ static void ocrdma_free_resources(struct ocrdma_dev *dev) kfree(dev->sgid_tbl); } +/* OCRDMA sysfs interface */ +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->nic_info.pdev->vendor); +} + +static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); + +static struct device_attribute *ocrdma_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_fw_ver +}; + +static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); +} + static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { - int status = 0; + int status = 0, i; struct ocrdma_dev *dev; dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev)); @@ -369,11 +403,25 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++) + if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i])) + goto sysfs_err; spin_lock(&ocrdma_devlist_lock); list_add_tail_rcu(&dev->entry, &ocrdma_dev_list); spin_unlock(&ocrdma_devlist_lock); + /* Init stats */ + ocrdma_add_port_stats(dev); + + pr_info("%s %s: %s \"%s\" port %d\n", + dev_name(&dev->nic_info.pdev->dev), hca_name(dev), + port_speed_string(dev), dev->model_number, + dev->hba_port_num); + pr_info("%s ocrdma%d driver loaded successfully\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); return dev; +sysfs_err: + ocrdma_remove_sysfiles(dev); alloc_err: ocrdma_free_resources(dev); ocrdma_cleanup_hw(dev); @@ -400,6 +448,9 @@ static void ocrdma_remove(struct ocrdma_dev *dev) /* first unregister with stack to stop all the active traffic * of the registered clients. */ + ocrdma_rem_port_stats(dev); + ocrdma_remove_sysfiles(dev); + ib_unregister_device(&dev->ibdev); spin_lock(&ocrdma_devlist_lock); @@ -437,7 +488,7 @@ static int ocrdma_close(struct ocrdma_dev *dev) cur_qp = dev->qp_tbl; for (i = 0; i < OCRDMA_MAX_QP; i++) { qp = cur_qp[i]; - if (qp) { + if (qp && qp->ibqp.qp_type != IB_QPT_GSI) { /* change the QP state to ERROR */ _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask); @@ -478,6 +529,7 @@ static struct ocrdma_driver ocrdma_drv = { .add = ocrdma_add, .remove = ocrdma_remove, .state_change_handler = ocrdma_event_handler, + .be_abi_version = OCRDMA_BE_ROCE_ABI_VERSION, }; static void ocrdma_unregister_inet6addr_notifier(void) @@ -487,10 +539,17 @@ static void ocrdma_unregister_inet6addr_notifier(void) #endif } +static void ocrdma_unregister_inetaddr_notifier(void) +{ + unregister_inetaddr_notifier(&ocrdma_inetaddr_notifier); +} + static int __init ocrdma_init_module(void) { int status; + ocrdma_init_debugfs(); + status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier); if (status) return status; @@ -498,13 +557,19 @@ static int __init ocrdma_init_module(void) #if IS_ENABLED(CONFIG_IPV6) status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier); if (status) - return status; + goto err_notifier6; #endif status = be_roce_register_driver(&ocrdma_drv); if (status) - ocrdma_unregister_inet6addr_notifier(); + goto err_be_reg; + return 0; + +err_be_reg: + ocrdma_unregister_inet6addr_notifier(); +err_notifier6: + ocrdma_unregister_inetaddr_notifier(); return status; } @@ -512,6 +577,8 @@ static void __exit ocrdma_exit_module(void) { be_roce_unregister_driver(&ocrdma_drv); ocrdma_unregister_inet6addr_notifier(); + ocrdma_unregister_inetaddr_notifier(); + ocrdma_rem_debugfs(); } module_init(ocrdma_init_module); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 60d5ac23ea80..96c9ee602ba4 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -30,8 +30,16 @@ #define Bit(_b) (1 << (_b)) -#define OCRDMA_GEN1_FAMILY 0xB -#define OCRDMA_GEN2_FAMILY 0x0F +enum { + OCRDMA_ASIC_GEN_SKH_R = 0x04, + OCRDMA_ASIC_GEN_LANCER = 0x0B +}; + +enum { + OCRDMA_ASIC_REV_A0 = 0x00, + OCRDMA_ASIC_REV_B0 = 0x10, + OCRDMA_ASIC_REV_C0 = 0x20 +}; #define OCRDMA_SUBSYS_ROCE 10 enum { @@ -64,6 +72,7 @@ enum { OCRDMA_CMD_ATTACH_MCAST, OCRDMA_CMD_DETACH_MCAST, + OCRDMA_CMD_GET_RDMA_STATS, OCRDMA_CMD_MAX }; @@ -74,12 +83,14 @@ enum { OCRDMA_CMD_CREATE_CQ = 12, OCRDMA_CMD_CREATE_EQ = 13, OCRDMA_CMD_CREATE_MQ = 21, + OCRDMA_CMD_GET_CTRL_ATTRIBUTES = 32, OCRDMA_CMD_GET_FW_VER = 35, OCRDMA_CMD_DELETE_MQ = 53, OCRDMA_CMD_DELETE_CQ = 54, OCRDMA_CMD_DELETE_EQ = 55, OCRDMA_CMD_GET_FW_CONFIG = 58, - OCRDMA_CMD_CREATE_MQ_EXT = 90 + OCRDMA_CMD_CREATE_MQ_EXT = 90, + OCRDMA_CMD_PHY_DETAILS = 102 }; enum { @@ -103,7 +114,10 @@ enum { OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ_OFFSET, OCRDMA_DB_CQ_OFFSET = 0x120, OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET, - OCRDMA_DB_MQ_OFFSET = 0x140 + OCRDMA_DB_MQ_OFFSET = 0x140, + + OCRDMA_DB_SQ_SHIFT = 16, + OCRDMA_DB_RQ_SHIFT = 24 }; #define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */ @@ -138,6 +152,10 @@ enum { #define OCRDMA_MIN_Q_PAGE_SIZE (4096) #define OCRDMA_MAX_Q_PAGES (8) +#define OCRDMA_SLI_ASIC_ID_OFFSET 0x9C +#define OCRDMA_SLI_ASIC_REV_MASK 0x000000FF +#define OCRDMA_SLI_ASIC_GEN_NUM_MASK 0x0000FF00 +#define OCRDMA_SLI_ASIC_GEN_NUM_SHIFT 0x08 /* # 0: 4K Bytes # 1: 8K Bytes @@ -562,6 +580,30 @@ enum { OCRDMA_FN_MODE_RDMA = 0x4 }; +struct ocrdma_get_phy_info_rsp { + struct ocrdma_mqe_hdr hdr; + struct ocrdma_mbx_rsp rsp; + + u16 phy_type; + u16 interface_type; + u32 misc_params; + u16 ext_phy_details; + u16 rsvd; + u16 auto_speeds_supported; + u16 fixed_speeds_supported; + u32 future_use[2]; +}; + +enum { + OCRDMA_PHY_SPEED_ZERO = 0x0, + OCRDMA_PHY_SPEED_10MBPS = 0x1, + OCRDMA_PHY_SPEED_100MBPS = 0x2, + OCRDMA_PHY_SPEED_1GBPS = 0x4, + OCRDMA_PHY_SPEED_10GBPS = 0x8, + OCRDMA_PHY_SPEED_40GBPS = 0x20 +}; + + struct ocrdma_get_link_speed_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; @@ -590,7 +632,7 @@ enum { enum { OCRDMA_CREATE_CQ_VER2 = 2, - OCRDMA_CREATE_CQ_VER3 = 3, + OCRDMA_CREATE_CQ_VER3 = 3, OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF, OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16, @@ -1050,6 +1092,7 @@ enum { OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF << OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT }; + struct ocrdma_modify_qp_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; @@ -1062,8 +1105,8 @@ struct ocrdma_query_qp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_hdr req; -#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 -#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF +#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0 +#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF u32 qp_id; }; @@ -1703,4 +1746,208 @@ struct ocrdma_av { u32 valid; } __packed; +struct ocrdma_rsrc_stats { + u32 dpp_pds; + u32 non_dpp_pds; + u32 rc_dpp_qps; + u32 uc_dpp_qps; + u32 ud_dpp_qps; + u32 rc_non_dpp_qps; + u32 rsvd; + u32 uc_non_dpp_qps; + u32 ud_non_dpp_qps; + u32 rsvd1; + u32 srqs; + u32 rbqs; + u32 r64K_nsmr; + u32 r64K_to_2M_nsmr; + u32 r2M_to_44M_nsmr; + u32 r44M_to_1G_nsmr; + u32 r1G_to_4G_nsmr; + u32 nsmr_count_4G_to_32G; + u32 r32G_to_64G_nsmr; + u32 r64G_to_128G_nsmr; + u32 r128G_to_higher_nsmr; + u32 embedded_nsmr; + u32 frmr; + u32 prefetch_qps; + u32 ondemand_qps; + u32 phy_mr; + u32 mw; + u32 rsvd2[7]; +}; + +struct ocrdma_db_err_stats { + u32 sq_doorbell_errors; + u32 cq_doorbell_errors; + u32 rq_srq_doorbell_errors; + u32 cq_overflow_errors; + u32 rsvd[4]; +}; + +struct ocrdma_wqe_stats { + u32 large_send_rc_wqes_lo; + u32 large_send_rc_wqes_hi; + u32 large_write_rc_wqes_lo; + u32 large_write_rc_wqes_hi; + u32 rsvd[4]; + u32 read_wqes_lo; + u32 read_wqes_hi; + u32 frmr_wqes_lo; + u32 frmr_wqes_hi; + u32 mw_bind_wqes_lo; + u32 mw_bind_wqes_hi; + u32 invalidate_wqes_lo; + u32 invalidate_wqes_hi; + u32 rsvd1[2]; + u32 dpp_wqe_drops; + u32 rsvd2[5]; +}; + +struct ocrdma_tx_stats { + u32 send_pkts_lo; + u32 send_pkts_hi; + u32 write_pkts_lo; + u32 write_pkts_hi; + u32 read_pkts_lo; + u32 read_pkts_hi; + u32 read_rsp_pkts_lo; + u32 read_rsp_pkts_hi; + u32 ack_pkts_lo; + u32 ack_pkts_hi; + u32 send_bytes_lo; + u32 send_bytes_hi; + u32 write_bytes_lo; + u32 write_bytes_hi; + u32 read_req_bytes_lo; + u32 read_req_bytes_hi; + u32 read_rsp_bytes_lo; + u32 read_rsp_bytes_hi; + u32 ack_timeouts; + u32 rsvd[5]; +}; + + +struct ocrdma_tx_qp_err_stats { + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 retry_count_exceeded_errors; + u32 rnr_retry_count_exceeded_errors; + u32 rsvd[3]; +}; + +struct ocrdma_rx_stats { + u32 roce_frame_bytes_lo; + u32 roce_frame_bytes_hi; + u32 roce_frame_icrc_drops; + u32 roce_frame_payload_len_drops; + u32 ud_drops; + u32 qp1_drops; + u32 psn_error_request_packets; + u32 psn_error_resp_packets; + u32 rnr_nak_timeouts; + u32 rnr_nak_receives; + u32 roce_frame_rxmt_drops; + u32 nak_count_psn_sequence_errors; + u32 rc_drop_count_lookup_errors; + u32 rq_rnr_naks; + u32 srq_rnr_naks; + u32 roce_frames_lo; + u32 roce_frames_hi; + u32 rsvd; +}; + +struct ocrdma_rx_qp_err_stats { + u32 nak_invalid_requst_errors; + u32 nak_remote_operation_errors; + u32 nak_count_remote_access_errors; + u32 local_length_errors; + u32 local_protection_errors; + u32 local_qp_operation_errors; + u32 rsvd[2]; +}; + +struct ocrdma_tx_dbg_stats { + u32 data[100]; +}; + +struct ocrdma_rx_dbg_stats { + u32 data[200]; +}; + +struct ocrdma_rdma_stats_req { + struct ocrdma_mbx_hdr hdr; + u8 reset_stats; + u8 rsvd[3]; +} __packed; + +struct ocrdma_rdma_stats_resp { + struct ocrdma_mbx_hdr hdr; + struct ocrdma_rsrc_stats act_rsrc_stats; + struct ocrdma_rsrc_stats th_rsrc_stats; + struct ocrdma_db_err_stats db_err_stats; + struct ocrdma_wqe_stats wqe_stats; + struct ocrdma_tx_stats tx_stats; + struct ocrdma_tx_qp_err_stats tx_qp_err_stats; + struct ocrdma_rx_stats rx_stats; + struct ocrdma_rx_qp_err_stats rx_qp_err_stats; + struct ocrdma_tx_dbg_stats tx_dbg_stats; + struct ocrdma_rx_dbg_stats rx_dbg_stats; +} __packed; + + +struct mgmt_hba_attribs { + u8 flashrom_version_string[32]; + u8 manufacturer_name[32]; + u32 supported_modes; + u32 rsvd0[3]; + u8 ncsi_ver_string[12]; + u32 default_extended_timeout; + u8 controller_model_number[32]; + u8 controller_description[64]; + u8 controller_serial_number[32]; + u8 ip_version_string[32]; + u8 firmware_version_string[32]; + u8 bios_version_string[32]; + u8 redboot_version_string[32]; + u8 driver_version_string[32]; + u8 fw_on_flash_version_string[32]; + u32 functionalities_supported; + u16 max_cdblength; + u8 asic_revision; + u8 generational_guid[16]; + u8 hba_port_count; + u16 default_link_down_timeout; + u8 iscsi_ver_min_max; + u8 multifunction_device; + u8 cache_valid; + u8 hba_status; + u8 max_domains_supported; + u8 phy_port; + u32 firmware_post_status; + u32 hba_mtu[8]; + u32 rsvd1[4]; +}; + +struct mgmt_controller_attrib { + struct mgmt_hba_attribs hba_attribs; + u16 pci_vendor_id; + u16 pci_device_id; + u16 pci_sub_vendor_id; + u16 pci_sub_system_id; + u8 pci_bus_number; + u8 pci_device_number; + u8 pci_function_number; + u8 interface_type; + u64 unique_identifier; + u32 rsvd0[5]; +}; + +struct ocrdma_get_ctrl_attribs_rsp { + struct ocrdma_mbx_hdr hdr; + struct mgmt_controller_attrib ctrl_attribs; +}; + + #endif /* __OCRDMA_SLI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c new file mode 100644 index 000000000000..6c54106f5e64 --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -0,0 +1,623 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#include <rdma/ib_addr.h> +#include "ocrdma_stats.h" + +static struct dentry *ocrdma_dbgfs_dir; + +static int ocrdma_add_stat(char *start, char *pcur, + char *name, u64 count) +{ + char buff[128] = {0}; + int cpy_len = 0; + + snprintf(buff, 128, "%s: %llu\n", name, count); + cpy_len = strlen(buff); + + if (pcur + cpy_len > start + OCRDMA_MAX_DBGFS_MEM) { + pr_err("%s: No space in stats buff\n", __func__); + return 0; + } + + memcpy(pcur, buff, cpy_len); + return cpy_len; +} + +static bool ocrdma_alloc_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + /* Alloc mbox command mem*/ + mem->size = max_t(u32, sizeof(struct ocrdma_rdma_stats_req), + sizeof(struct ocrdma_rdma_stats_resp)); + + mem->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, mem->size, + &mem->pa, GFP_KERNEL); + if (!mem->va) { + pr_err("%s: stats mbox allocation failed\n", __func__); + return false; + } + + memset(mem->va, 0, mem->size); + + /* Alloc debugfs mem */ + mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL); + if (!mem->debugfs_mem) { + pr_err("%s: stats debugfs mem allocation failed\n", __func__); + return false; + } + + return true; +} + +static void ocrdma_release_stats_mem(struct ocrdma_dev *dev) +{ + struct stats_mem *mem = &dev->stats_mem; + + if (mem->va) + dma_free_coherent(&dev->nic_info.pdev->dev, mem->size, + mem->va, mem->pa); + kfree(mem->debugfs_mem); +} + +static char *ocrdma_resource_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "active_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "active_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "active_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "active_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "active_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "active_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "active_mw", + (u64)rsrc_stats->mw); + + /* Print the threshold stats */ + rsrc_stats = &rdma_stats->th_rsrc_stats; + + pcur += ocrdma_add_stat(stats, pcur, "threshold_dpp_pds", + (u64)rsrc_stats->dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_non_dpp_pds", + (u64)rsrc_stats->non_dpp_pds); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_dpp_qps", + (u64)rsrc_stats->rc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_dpp_qps", + (u64)rsrc_stats->uc_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_dpp_qps", + (u64)rsrc_stats->ud_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rc_non_dpp_qps", + (u64)rsrc_stats->rc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_uc_non_dpp_qps", + (u64)rsrc_stats->uc_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ud_non_dpp_qps", + (u64)rsrc_stats->ud_non_dpp_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_srqs", + (u64)rsrc_stats->srqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_rbqs", + (u64)rsrc_stats->rbqs); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_nsmr", + (u64)rsrc_stats->r64K_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64K_to_2M_nsmr", + (u64)rsrc_stats->r64K_to_2M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_2M_to_44M_nsmr", + (u64)rsrc_stats->r2M_to_44M_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_44M_to_1G_nsmr", + (u64)rsrc_stats->r44M_to_1G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_1G_to_4G_nsmr", + (u64)rsrc_stats->r1G_to_4G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_nsmr_count_4G_to_32G", + (u64)rsrc_stats->nsmr_count_4G_to_32G); + pcur += ocrdma_add_stat(stats, pcur, "threshold_32G_to_64G_nsmr", + (u64)rsrc_stats->r32G_to_64G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_64G_to_128G_nsmr", + (u64)rsrc_stats->r64G_to_128G_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_128G_to_higher_nsmr", + (u64)rsrc_stats->r128G_to_higher_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_embedded_nsmr", + (u64)rsrc_stats->embedded_nsmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_frmr", + (u64)rsrc_stats->frmr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_prefetch_qps", + (u64)rsrc_stats->prefetch_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_ondemand_qps", + (u64)rsrc_stats->ondemand_qps); + pcur += ocrdma_add_stat(stats, pcur, "threshold_phy_mr", + (u64)rsrc_stats->phy_mr); + pcur += ocrdma_add_stat(stats, pcur, "threshold_mw", + (u64)rsrc_stats->mw); + return stats; +} + +static char *ocrdma_rx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_stats *rx_stats = &rdma_stats->rx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat + (stats, pcur, "roce_frame_bytes", + convert_to_64bit(rx_stats->roce_frame_bytes_lo, + rx_stats->roce_frame_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_icrc_drops", + (u64)rx_stats->roce_frame_icrc_drops); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_payload_len_drops", + (u64)rx_stats->roce_frame_payload_len_drops); + pcur += ocrdma_add_stat(stats, pcur, "ud_drops", + (u64)rx_stats->ud_drops); + pcur += ocrdma_add_stat(stats, pcur, "qp1_drops", + (u64)rx_stats->qp1_drops); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_request_packets", + (u64)rx_stats->psn_error_request_packets); + pcur += ocrdma_add_stat(stats, pcur, "psn_error_resp_packets", + (u64)rx_stats->psn_error_resp_packets); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_timeouts", + (u64)rx_stats->rnr_nak_timeouts); + pcur += ocrdma_add_stat(stats, pcur, "rnr_nak_receives", + (u64)rx_stats->rnr_nak_receives); + pcur += ocrdma_add_stat(stats, pcur, "roce_frame_rxmt_drops", + (u64)rx_stats->roce_frame_rxmt_drops); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_psn_sequence_errors", + (u64)rx_stats->nak_count_psn_sequence_errors); + pcur += ocrdma_add_stat(stats, pcur, "rc_drop_count_lookup_errors", + (u64)rx_stats->rc_drop_count_lookup_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_rnr_naks", + (u64)rx_stats->rq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "srq_rnr_naks", + (u64)rx_stats->srq_rnr_naks); + pcur += ocrdma_add_stat(stats, pcur, "roce_frames", + convert_to_64bit(rx_stats->roce_frames_lo, + rx_stats->roce_frames_hi)); + + return stats; +} + +static char *ocrdma_tx_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_stats *tx_stats = &rdma_stats->tx_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "send_pkts", + convert_to_64bit(tx_stats->send_pkts_lo, + tx_stats->send_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_pkts", + convert_to_64bit(tx_stats->write_pkts_lo, + tx_stats->write_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_pkts", + convert_to_64bit(tx_stats->read_pkts_lo, + tx_stats->read_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_pkts", + convert_to_64bit(tx_stats->read_rsp_pkts_lo, + tx_stats->read_rsp_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_pkts", + convert_to_64bit(tx_stats->ack_pkts_lo, + tx_stats->ack_pkts_hi)); + pcur += ocrdma_add_stat(stats, pcur, "send_bytes", + convert_to_64bit(tx_stats->send_bytes_lo, + tx_stats->send_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "write_bytes", + convert_to_64bit(tx_stats->write_bytes_lo, + tx_stats->write_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_req_bytes", + convert_to_64bit(tx_stats->read_req_bytes_lo, + tx_stats->read_req_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_rsp_bytes", + convert_to_64bit(tx_stats->read_rsp_bytes_lo, + tx_stats->read_rsp_bytes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "ack_timeouts", + (u64)tx_stats->ack_timeouts); + + return stats; +} + +static char *ocrdma_wqe_stats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_wqe_stats *wqe_stats = &rdma_stats->wqe_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "large_send_rc_wqes", + convert_to_64bit(wqe_stats->large_send_rc_wqes_lo, + wqe_stats->large_send_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "large_write_rc_wqes", + convert_to_64bit(wqe_stats->large_write_rc_wqes_lo, + wqe_stats->large_write_rc_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "read_wqes", + convert_to_64bit(wqe_stats->read_wqes_lo, + wqe_stats->read_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "frmr_wqes", + convert_to_64bit(wqe_stats->frmr_wqes_lo, + wqe_stats->frmr_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "mw_bind_wqes", + convert_to_64bit(wqe_stats->mw_bind_wqes_lo, + wqe_stats->mw_bind_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "invalidate_wqes", + convert_to_64bit(wqe_stats->invalidate_wqes_lo, + wqe_stats->invalidate_wqes_hi)); + pcur += ocrdma_add_stat(stats, pcur, "dpp_wqe_drops", + (u64)wqe_stats->dpp_wqe_drops); + return stats; +} + +static char *ocrdma_db_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_db_err_stats *db_err_stats = &rdma_stats->db_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "sq_doorbell_errors", + (u64)db_err_stats->sq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_doorbell_errors", + (u64)db_err_stats->cq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "rq_srq_doorbell_errors", + (u64)db_err_stats->rq_srq_doorbell_errors); + pcur += ocrdma_add_stat(stats, pcur, "cq_overflow_errors", + (u64)db_err_stats->cq_overflow_errors); + return stats; +} + +static char *ocrdma_rxqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_qp_err_stats *rx_qp_err_stats = + &rdma_stats->rx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "nak_invalid_requst_errors", + (u64)rx_qp_err_stats->nak_invalid_requst_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_remote_operation_errors", + (u64)rx_qp_err_stats->nak_remote_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "nak_count_remote_access_errors", + (u64)rx_qp_err_stats->nak_count_remote_access_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)rx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)rx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)rx_qp_err_stats->local_qp_operation_errors); + return stats; +} + +static char *ocrdma_txqp_errstats(struct ocrdma_dev *dev) +{ + char *stats = dev->stats_mem.debugfs_mem, *pcur; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_qp_err_stats *tx_qp_err_stats = + &rdma_stats->tx_qp_err_stats; + + memset(stats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + pcur = stats; + pcur += ocrdma_add_stat(stats, pcur, "local_length_errors", + (u64)tx_qp_err_stats->local_length_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_protection_errors", + (u64)tx_qp_err_stats->local_protection_errors); + pcur += ocrdma_add_stat(stats, pcur, "local_qp_operation_errors", + (u64)tx_qp_err_stats->local_qp_operation_errors); + pcur += ocrdma_add_stat(stats, pcur, "retry_count_exceeded_errors", + (u64)tx_qp_err_stats->retry_count_exceeded_errors); + pcur += ocrdma_add_stat(stats, pcur, "rnr_retry_count_exceeded_errors", + (u64)tx_qp_err_stats->rnr_retry_count_exceeded_errors); + return stats; +} + +static char *ocrdma_tx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_tx_dbg_stats *tx_dbg_stats = + &rdma_stats->tx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 100; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + tx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static char *ocrdma_rx_dbg_stats(struct ocrdma_dev *dev) +{ + int i; + char *pstats = dev->stats_mem.debugfs_mem; + struct ocrdma_rdma_stats_resp *rdma_stats = + (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va; + struct ocrdma_rx_dbg_stats *rx_dbg_stats = + &rdma_stats->rx_dbg_stats; + + memset(pstats, 0, (OCRDMA_MAX_DBGFS_MEM)); + + for (i = 0; i < 200; i++) + pstats += snprintf(pstats, 80, "DW[%d] = 0x%x\n", i, + rx_dbg_stats->data[i]); + + return dev->stats_mem.debugfs_mem; +} + +static void ocrdma_update_stats(struct ocrdma_dev *dev) +{ + ulong now = jiffies, secs; + int status = 0; + + secs = jiffies_to_msecs(now - dev->last_stats_time) / 1000U; + if (secs) { + /* update */ + status = ocrdma_mbx_rdma_stats(dev, false); + if (status) + pr_err("%s: stats mbox failed with status = %d\n", + __func__, status); + dev->last_stats_time = jiffies; + } +} + +static ssize_t ocrdma_dbgfs_ops_read(struct file *filp, char __user *buffer, + size_t usr_buf_len, loff_t *ppos) +{ + struct ocrdma_stats *pstats = filp->private_data; + struct ocrdma_dev *dev = pstats->dev; + ssize_t status = 0; + char *data = NULL; + + /* No partial reads */ + if (*ppos != 0) + return 0; + + mutex_lock(&dev->stats_lock); + + ocrdma_update_stats(dev); + + switch (pstats->type) { + case OCRDMA_RSRC_STATS: + data = ocrdma_resource_stats(dev); + break; + case OCRDMA_RXSTATS: + data = ocrdma_rx_stats(dev); + break; + case OCRDMA_WQESTATS: + data = ocrdma_wqe_stats(dev); + break; + case OCRDMA_TXSTATS: + data = ocrdma_tx_stats(dev); + break; + case OCRDMA_DB_ERRSTATS: + data = ocrdma_db_errstats(dev); + break; + case OCRDMA_RXQP_ERRSTATS: + data = ocrdma_rxqp_errstats(dev); + break; + case OCRDMA_TXQP_ERRSTATS: + data = ocrdma_txqp_errstats(dev); + break; + case OCRDMA_TX_DBG_STATS: + data = ocrdma_tx_dbg_stats(dev); + break; + case OCRDMA_RX_DBG_STATS: + data = ocrdma_rx_dbg_stats(dev); + break; + + default: + status = -EFAULT; + goto exit; + } + + if (usr_buf_len < strlen(data)) { + status = -ENOSPC; + goto exit; + } + + status = simple_read_from_buffer(buffer, usr_buf_len, ppos, data, + strlen(data)); +exit: + mutex_unlock(&dev->stats_lock); + return status; +} + +static int ocrdma_debugfs_open(struct inode *inode, struct file *file) +{ + if (inode->i_private) + file->private_data = inode->i_private; + return 0; +} + +static const struct file_operations ocrdma_dbg_ops = { + .owner = THIS_MODULE, + .open = ocrdma_debugfs_open, + .read = ocrdma_dbgfs_ops_read, +}; + +void ocrdma_add_port_stats(struct ocrdma_dev *dev) +{ + if (!ocrdma_dbgfs_dir) + return; + + /* Create post stats base dir */ + dev->dir = debugfs_create_dir(dev->ibdev.name, ocrdma_dbgfs_dir); + if (!dev->dir) + goto err; + + dev->rsrc_stats.type = OCRDMA_RSRC_STATS; + dev->rsrc_stats.dev = dev; + if (!debugfs_create_file("resource_stats", S_IRUSR, dev->dir, + &dev->rsrc_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_stats.type = OCRDMA_RXSTATS; + dev->rx_stats.dev = dev; + if (!debugfs_create_file("rx_stats", S_IRUSR, dev->dir, + &dev->rx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->wqe_stats.type = OCRDMA_WQESTATS; + dev->wqe_stats.dev = dev; + if (!debugfs_create_file("wqe_stats", S_IRUSR, dev->dir, + &dev->wqe_stats, &ocrdma_dbg_ops)) + goto err; + + dev->tx_stats.type = OCRDMA_TXSTATS; + dev->tx_stats.dev = dev; + if (!debugfs_create_file("tx_stats", S_IRUSR, dev->dir, + &dev->tx_stats, &ocrdma_dbg_ops)) + goto err; + + dev->db_err_stats.type = OCRDMA_DB_ERRSTATS; + dev->db_err_stats.dev = dev; + if (!debugfs_create_file("db_err_stats", S_IRUSR, dev->dir, + &dev->db_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_qp_err_stats.type = OCRDMA_TXQP_ERRSTATS; + dev->tx_qp_err_stats.dev = dev; + if (!debugfs_create_file("tx_qp_err_stats", S_IRUSR, dev->dir, + &dev->tx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_qp_err_stats.type = OCRDMA_RXQP_ERRSTATS; + dev->rx_qp_err_stats.dev = dev; + if (!debugfs_create_file("rx_qp_err_stats", S_IRUSR, dev->dir, + &dev->rx_qp_err_stats, &ocrdma_dbg_ops)) + goto err; + + + dev->tx_dbg_stats.type = OCRDMA_TX_DBG_STATS; + dev->tx_dbg_stats.dev = dev; + if (!debugfs_create_file("tx_dbg_stats", S_IRUSR, dev->dir, + &dev->tx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + dev->rx_dbg_stats.type = OCRDMA_RX_DBG_STATS; + dev->rx_dbg_stats.dev = dev; + if (!debugfs_create_file("rx_dbg_stats", S_IRUSR, dev->dir, + &dev->rx_dbg_stats, &ocrdma_dbg_ops)) + goto err; + + /* Now create dma_mem for stats mbx command */ + if (!ocrdma_alloc_stats_mem(dev)) + goto err; + + mutex_init(&dev->stats_lock); + + return; +err: + ocrdma_release_stats_mem(dev); + debugfs_remove_recursive(dev->dir); + dev->dir = NULL; +} + +void ocrdma_rem_port_stats(struct ocrdma_dev *dev) +{ + if (!dev->dir) + return; + mutex_destroy(&dev->stats_lock); + ocrdma_release_stats_mem(dev); + debugfs_remove(dev->dir); +} + +void ocrdma_init_debugfs(void) +{ + /* Create base dir in debugfs root dir */ + ocrdma_dbgfs_dir = debugfs_create_dir("ocrdma", NULL); +} + +void ocrdma_rem_debugfs(void) +{ + debugfs_remove_recursive(ocrdma_dbgfs_dir); +} diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.h b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h new file mode 100644 index 000000000000..5f5e20c46d7c --- /dev/null +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.h @@ -0,0 +1,54 @@ +/******************************************************************* + * This file is part of the Emulex RoCE Device Driver for * + * RoCE (RDMA over Converged Ethernet) adapters. * + * Copyright (C) 2008-2014 Emulex. All rights reserved. * + * EMULEX and SLI are trademarks of Emulex. * + * www.emulex.com * + * * + * This program is free software; you can redistribute it and/or * + * modify it under the terms of version 2 of the GNU General * + * Public License as published by the Free Software Foundation. * + * This program is distributed in the hope that it will be useful. * + * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * + * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * + * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * + * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * + * TO BE LEGALLY INVALID. See the GNU General Public License for * + * more details, a copy of which can be found in the file COPYING * + * included with this package. * + * + * Contact Information: + * linux-drivers@emulex.com + * + * Emulex + * 3333 Susan Street + * Costa Mesa, CA 92626 + *******************************************************************/ + +#ifndef __OCRDMA_STATS_H__ +#define __OCRDMA_STATS_H__ + +#include <linux/debugfs.h> +#include "ocrdma.h" +#include "ocrdma_hw.h" + +#define OCRDMA_MAX_DBGFS_MEM 4096 + +enum OCRDMA_STATS_TYPE { + OCRDMA_RSRC_STATS, + OCRDMA_RXSTATS, + OCRDMA_WQESTATS, + OCRDMA_TXSTATS, + OCRDMA_DB_ERRSTATS, + OCRDMA_RXQP_ERRSTATS, + OCRDMA_TXQP_ERRSTATS, + OCRDMA_TX_DBG_STATS, + OCRDMA_RX_DBG_STATS +}; + +void ocrdma_rem_debugfs(void); +void ocrdma_init_debugfs(void); +void ocrdma_rem_port_stats(struct ocrdma_dev *dev); +void ocrdma_add_port_stats(struct ocrdma_dev *dev); + +#endif /* __OCRDMA_STATS_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e0cc201be41a..edf6211d84b8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -53,7 +53,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port, dev = get_ocrdma_dev(ibdev); memset(sgid, 0, sizeof(*sgid)); - if (index >= OCRDMA_MAX_SGID) + if (index > OCRDMA_MAX_SGID) return -EINVAL; memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid)); @@ -89,7 +89,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; - attr->max_mw = 0; + attr->max_mw = dev->attr.max_mw; attr->max_pd = dev->attr.max_pd; attr->atomic_cap = 0; attr->max_fmr = 0; @@ -144,7 +144,6 @@ static inline void get_link_speed_and_width(struct ocrdma_dev *dev, } } - int ocrdma_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { @@ -267,7 +266,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, if (udata && uctx) { pd->dpp_enabled = - dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY; + ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; } @@ -726,10 +725,10 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, u32 num_pbes) { struct ocrdma_pbe *pbe; - struct ib_umem_chunk *chunk; + struct scatterlist *sg; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; struct ib_umem *umem = mr->umem; - int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + int shift, pg_cnt, pages, pbe_cnt, entry, total_num_pbes = 0; if (!mr->hwmr.num_pbes) return; @@ -739,39 +738,37 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, shift = ilog2(umem->page_size); - list_for_each_entry(chunk, &umem->chunk_list, list) { - /* get all the dma regions from the chunk. */ - for (i = 0; i < chunk->nmap; i++) { - pages = sg_dma_len(&chunk->page_list[i]) >> shift; - for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { - /* store the page address in pbe */ - pbe->pa_lo = - cpu_to_le32(sg_dma_address - (&chunk->page_list[i]) + - (umem->page_size * pg_cnt)); - pbe->pa_hi = - cpu_to_le32(upper_32_bits - ((sg_dma_address - (&chunk->page_list[i]) + - umem->page_size * pg_cnt))); - pbe_cnt += 1; - total_num_pbes += 1; - pbe++; - - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; - - /* if the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == - (mr->hwmr.pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct ocrdma_pbe *)pbl_tbl->va; - pbe_cnt = 0; - } + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->pa_lo = + cpu_to_le32(sg_dma_address + (sg) + + (umem->page_size * pg_cnt)); + pbe->pa_hi = + cpu_to_le32(upper_32_bits + ((sg_dma_address + (sg) + + umem->page_size * pg_cnt))); + pbe_cnt += 1; + total_num_pbes += 1; + pbe++; + + /* if done building pbes, issue the mbx cmd. */ + if (total_num_pbes == num_pbes) + return; + + /* if the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == + (mr->hwmr.pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct ocrdma_pbe *)pbl_tbl->va; + pbe_cnt = 0; } + } } } @@ -840,8 +837,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey); - if (mr->hwmr.fr_mr == 0) - ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); + ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); /* it could be user registered memory. */ if (mr->umem) @@ -910,6 +906,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, spin_lock_init(&cq->comp_handler_lock); INIT_LIST_HEAD(&cq->sq_head); INIT_LIST_HEAD(&cq->rq_head); + cq->first_arm = true; if (ib_ctx) { uctx = get_ocrdma_ucontext(ib_ctx); @@ -927,9 +924,7 @@ struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector, goto ctx_err; } cq->phase = OCRDMA_CQE_VALID; - cq->arm_needed = true; dev->cq_tbl[cq->id] = cq; - return &cq->ibcq; ctx_err: @@ -952,15 +947,52 @@ int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt, return status; } +static void ocrdma_flush_cq(struct ocrdma_cq *cq) +{ + int cqe_cnt; + int valid_count = 0; + unsigned long flags; + + struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device); + struct ocrdma_cqe *cqe = NULL; + + cqe = cq->va; + cqe_cnt = cq->cqe_cnt; + + /* Last irq might have scheduled a polling thread + * sync-up with it before hard flushing. + */ + spin_lock_irqsave(&cq->cq_lock, flags); + while (cqe_cnt) { + if (is_cqe_valid(cq, cqe)) + valid_count++; + cqe++; + cqe_cnt--; + } + ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count); + spin_unlock_irqrestore(&cq->cq_lock, flags); +} + int ocrdma_destroy_cq(struct ib_cq *ibcq) { int status; struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); + struct ocrdma_eq *eq = NULL; struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); int pdid = 0; + u32 irq, indx; - status = ocrdma_mbx_destroy_cq(dev, cq); + dev->cq_tbl[cq->id] = NULL; + indx = ocrdma_get_eq_table_index(dev, cq->eqn); + if (indx == -EINVAL) + BUG(); + eq = &dev->eq_tbl[indx]; + irq = ocrdma_get_irq(dev, eq); + synchronize_irq(irq); + ocrdma_flush_cq(cq); + + status = ocrdma_mbx_destroy_cq(dev, cq); if (cq->ucontext) { pdid = cq->ucontext->cntxt_pd->id; ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, @@ -969,7 +1001,6 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } - dev->cq_tbl[cq->id] = NULL; kfree(cq); return status; @@ -1092,15 +1123,9 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, } uresp.db_page_addr = usr_db; uresp.db_page_size = dev->nic_info.db_page_size; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; - uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; - uresp.db_shift = 24; - } else { - uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET; - uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; - uresp.db_shift = 16; - } + uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET; + uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; + uresp.db_shift = OCRDMA_DB_RQ_SHIFT; if (qp->dpp_enabled) { uresp.dpp_credit = dpp_credit_lmt; @@ -1132,7 +1157,7 @@ err: static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp, struct ocrdma_pd *pd) { - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { qp->sq_db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size) + OCRDMA_DB_GEN2_SQ_OFFSET; @@ -1182,7 +1207,6 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; } - static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, struct ib_qp_init_attr *attrs) { @@ -1268,17 +1292,6 @@ gen_err: return ERR_PTR(status); } - -static void ocrdma_flush_rq_db(struct ocrdma_qp *qp) -{ - if (qp->db_cache) { - u32 val = qp->rq.dbid | (qp->db_cache << - ocrdma_get_num_posted_shift(qp)); - iowrite32(val, qp->rq_db); - qp->db_cache = 0; - } -} - int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) { @@ -1296,9 +1309,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, */ if (status < 0) return status; - status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps); - if (!status && attr_mask & IB_QP_STATE && attr->qp_state == IB_QPS_RTR) - ocrdma_flush_rq_db(qp); + status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask); return status; } @@ -1510,7 +1521,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) int discard_cnt = 0; u32 cur_getp, stop_getp; struct ocrdma_cqe *cqe; - u32 qpn = 0; + u32 qpn = 0, wqe_idx = 0; spin_lock_irqsave(&cq->cq_lock, cq_flags); @@ -1539,24 +1550,29 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq) if (qpn == 0 || qpn != qp->id) goto skip_cqe; - /* mark cqe discarded so that it is not picked up later - * in the poll_cq(). - */ - discard_cnt += 1; - cqe->cmn.qpn = 0; if (is_cqe_for_sq(cqe)) { ocrdma_hwq_inc_tail(&qp->sq); } else { if (qp->srq) { + wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> + OCRDMA_CQE_BUFTAG_SHIFT) & + qp->srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); spin_lock_irqsave(&qp->srq->q_lock, flags); ocrdma_hwq_inc_tail(&qp->srq->rq); - ocrdma_srq_toggle_bit(qp->srq, cur_getp); + ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1); spin_unlock_irqrestore(&qp->srq->q_lock, flags); } else { ocrdma_hwq_inc_tail(&qp->rq); } } + /* mark cqe discarded so that it is not picked up later + * in the poll_cq(). + */ + discard_cnt += 1; + cqe->cmn.qpn = 0; skip_cqe: cur_getp = (cur_getp + 1) % cq->max_hw_cqe; } while (cur_getp != stop_getp); @@ -1659,7 +1675,7 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq, (srq->pd->id * dev->nic_info.db_page_size); uresp.db_page_size = dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; - if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { + if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET; uresp.db_shift = 24; } else { @@ -2009,15 +2025,15 @@ static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr, fast_reg->num_sges = wr->wr.fast_reg.page_list_len; fast_reg->size_sge = get_encoded_page_size(1 << wr->wr.fast_reg.page_shift); - mr = (struct ocrdma_mr *) (unsigned long) qp->dev->stag_arr[(hdr->lkey >> 8) & - (OCRDMA_MAX_STAG - 1)]; + mr = (struct ocrdma_mr *) (unsigned long) + qp->dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)]; build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr); return 0; } static void ocrdma_ring_sq_db(struct ocrdma_qp *qp) { - u32 val = qp->sq.dbid | (1 << 16); + u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT); iowrite32(val, qp->sq_db); } @@ -2122,12 +2138,9 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, static void ocrdma_ring_rq_db(struct ocrdma_qp *qp) { - u32 val = qp->rq.dbid | (1 << ocrdma_get_num_posted_shift(qp)); + u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT); - if (qp->state != OCRDMA_QPS_INIT) - iowrite32(val, qp->rq_db); - else - qp->db_cache++; + iowrite32(val, qp->rq_db); } static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr, @@ -2213,7 +2226,7 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq) if (row == srq->bit_fields_len) BUG(); - return indx; + return indx + 1; /* Use from index 1 */ } static void ocrdma_ring_srq_db(struct ocrdma_srq *srq) @@ -2550,10 +2563,13 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc, srq = get_ocrdma_srq(qp->ibqp.srq); wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >> - OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx; + if (wqe_idx < 1) + BUG(); + ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx]; spin_lock_irqsave(&srq->q_lock, flags); - ocrdma_srq_toggle_bit(srq, wqe_idx); + ocrdma_srq_toggle_bit(srq, wqe_idx - 1); spin_unlock_irqrestore(&srq->q_lock, flags); ocrdma_hwq_inc_tail(&srq->rq); } @@ -2705,10 +2721,18 @@ expand_cqe: } stop_cqe: cq->getp = cur_getp; - if (polled_hw_cqes || expand || stop) { - ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited, + if (cq->deferred_arm) { + ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol, + polled_hw_cqes); + cq->deferred_arm = false; + cq->deferred_sol = false; + } else { + /* We need to pop the CQE. No need to arm */ + ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol, polled_hw_cqes); + cq->deferred_sol = false; } + return i; } @@ -2780,30 +2804,28 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags) struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device); u16 cq_id; - u16 cur_getp; - struct ocrdma_cqe *cqe; unsigned long flags; + bool arm_needed = false, sol_needed = false; cq_id = cq->id; spin_lock_irqsave(&cq->cq_lock, flags); if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED) - cq->armed = true; + arm_needed = true; if (cq_flags & IB_CQ_SOLICITED) - cq->solicited = true; - - cur_getp = cq->getp; - cqe = cq->va + cur_getp; + sol_needed = true; - /* check whether any valid cqe exist or not, if not then safe to - * arm. If cqe is not yet consumed, then let it get consumed and then - * we arm it to avoid false interrupts. - */ - if (!is_cqe_valid(cq, cqe) || cq->arm_needed) { - cq->arm_needed = false; - ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0); + if (cq->first_arm) { + ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0); + cq->first_arm = false; + goto skip_defer; } + cq->deferred_arm = true; + +skip_defer: + cq->deferred_sol = sol_needed; spin_unlock_irqrestore(&cq->cq_lock, flags); + return 0; } @@ -2838,7 +2860,8 @@ struct ib_mr *ocrdma_alloc_frmr(struct ib_pd *ibpd, int max_page_list_len) goto mbx_err; mr->ibmr.rkey = mr->hwmr.lkey; mr->ibmr.lkey = mr->hwmr.lkey; - dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = mr; + dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] = + (unsigned long) mr; return &mr->ibmr; mbx_err: ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 1946101419a3..c00ae093b6f8 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -868,8 +868,10 @@ struct qib_devdata { /* last buffer for user use */ u32 lastctxt_piobuf; - /* saturating counter of (non-port-specific) device interrupts */ - u32 int_counter; + /* reset value */ + u64 z_int_counter; + /* percpu intcounter */ + u64 __percpu *int_counter; /* pio bufs allocated per ctxt */ u32 pbufsctxt; @@ -1184,7 +1186,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *); void qib_set_ctxtcnt(struct qib_devdata *); int qib_create_ctxts(struct qib_devdata *dd); struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int); -void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); +int qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); u32 qib_kreceive(struct qib_ctxtdata *, u32 *, u32 *); @@ -1449,6 +1451,10 @@ void qib_nomsi(struct qib_devdata *); void qib_nomsix(struct qib_devdata *); void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *); void qib_pcie_reenable(struct qib_devdata *, u16, u8, u8); +/* interrupts for device */ +u64 qib_int_counter(struct qib_devdata *); +/* interrupt for all devices */ +u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index 1686fd4bda87..5dfda4c5cc9c 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -546,7 +546,7 @@ static ssize_t qib_diagpkt_write(struct file *fp, size_t count, loff_t *off) { u32 __iomem *piobuf; - u32 plen, clen, pbufn; + u32 plen, pbufn, maxlen_reserve; struct qib_diag_xpkt dp; u32 *tmpbuf = NULL; struct qib_devdata *dd; @@ -590,15 +590,20 @@ static ssize_t qib_diagpkt_write(struct file *fp, } ppd = &dd->pport[dp.port - 1]; - /* need total length before first word written */ - /* +1 word is for the qword padding */ - plen = sizeof(u32) + dp.len; - clen = dp.len >> 2; - - if ((plen + 4) > ppd->ibmaxlen) { + /* + * need total length before first word written, plus 2 Dwords. One Dword + * is for padding so we get the full user data when not aligned on + * a word boundary. The other Dword is to make sure we have room for the + * ICRC which gets tacked on later. + */ + maxlen_reserve = 2 * sizeof(u32); + if (dp.len > ppd->ibmaxlen - maxlen_reserve) { ret = -EINVAL; - goto bail; /* before writing pbc */ + goto bail; } + + plen = sizeof(u32) + dp.len; + tmpbuf = vmalloc(plen); if (!tmpbuf) { qib_devinfo(dd->pcidev, @@ -638,11 +643,11 @@ static ssize_t qib_diagpkt_write(struct file *fp, */ if (dd->flags & QIB_PIO_FLUSH_WC) { qib_flush_wc(); - qib_pio_copy(piobuf + 2, tmpbuf, clen - 1); + qib_pio_copy(piobuf + 2, tmpbuf, plen - 1); qib_flush_wc(); - __raw_writel(tmpbuf[clen - 1], piobuf + clen + 1); + __raw_writel(tmpbuf[plen - 1], piobuf + plen + 1); } else - qib_pio_copy(piobuf + 2, tmpbuf, clen); + qib_pio_copy(piobuf + 2, tmpbuf, plen); if (dd->flags & QIB_USE_SPCL_TRIG) { u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023; @@ -689,28 +694,23 @@ int qib_register_observer(struct qib_devdata *dd, const struct diag_observer *op) { struct diag_observer_list_elt *olp; - int ret = -EINVAL; + unsigned long flags; if (!dd || !op) - goto bail; - ret = -ENOMEM; + return -EINVAL; olp = vmalloc(sizeof *olp); if (!olp) { pr_err("vmalloc for observer failed\n"); - goto bail; + return -ENOMEM; } - if (olp) { - unsigned long flags; - spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); - olp->op = op; - olp->next = dd->diag_observer_list; - dd->diag_observer_list = olp; - spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); - ret = 0; - } -bail: - return ret; + spin_lock_irqsave(&dd->qib_diag_trans_lock, flags); + olp->op = op; + olp->next = dd->diag_observer_list; + dd->diag_observer_list = olp; + spin_unlock_irqrestore(&dd->qib_diag_trans_lock, flags); + + return 0; } /* Remove all registered observers when device is closed */ diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c index 2920bb39a65b..59fe092b4b0f 100644 --- a/drivers/infiniband/hw/qib/qib_dma.c +++ b/drivers/infiniband/hw/qib/qib_dma.c @@ -108,6 +108,10 @@ static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl, ret = 0; break; } + sg->dma_address = addr + sg->offset; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->dma_length = sg->length; +#endif } return ret; } @@ -119,21 +123,6 @@ static void qib_unmap_sg(struct ib_device *dev, BUG_ON(!valid_dma_direction(direction)); } -static u64 qib_sg_dma_address(struct ib_device *dev, struct scatterlist *sg) -{ - u64 addr = (u64) page_address(sg_page(sg)); - - if (addr) - addr += sg->offset; - return addr; -} - -static unsigned int qib_sg_dma_len(struct ib_device *dev, - struct scatterlist *sg) -{ - return sg->length; -} - static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -173,8 +162,6 @@ struct ib_dma_mapping_ops qib_dma_mapping_ops = { .unmap_page = qib_dma_unmap_page, .map_sg = qib_map_sg, .unmap_sg = qib_unmap_sg, - .dma_address = qib_sg_dma_address, - .dma_len = qib_sg_dma_len, .sync_single_for_cpu = qib_sync_single_for_cpu, .sync_single_for_device = qib_sync_single_for_device, .alloc_coherent = qib_dma_alloc_coherent, diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 275f247f9fca..b15e34eeef68 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1459,7 +1459,7 @@ static int get_a_ctxt(struct file *fp, const struct qib_user_info *uinfo, cused++; else cfree++; - if (pusable && cfree && cused < inuse) { + if (cfree && cused < inuse) { udd = dd; inuse = cused; } @@ -1578,7 +1578,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) struct qib_ctxtdata *rcd = fd->rcd; struct qib_devdata *dd = rcd->dd; - if (dd->flags & QIB_HAS_SEND_DMA) + if (dd->flags & QIB_HAS_SEND_DMA) { fd->pq = qib_user_sdma_queue_create(&dd->pcidev->dev, dd->unit, @@ -1586,6 +1586,7 @@ static int do_qib_user_sdma_queue_create(struct file *fp) fd->subctxt); if (!fd->pq) return -ENOMEM; + } return 0; } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index c61e2a92b3c1..cab610ccd50e 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -105,6 +105,7 @@ static int create_file(const char *name, umode_t mode, static ssize_t driver_stats_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + qib_stats.sps_ints = qib_sps_ints(); return simple_read_from_buffer(buf, count, ppos, &qib_stats, sizeof qib_stats); } diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 84e593d6007b..d68266ac7619 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1634,9 +1634,7 @@ static irqreturn_t qib_6120intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) @@ -1808,7 +1806,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler re-ordering around actual reset */ @@ -3266,7 +3265,9 @@ static int init_6120_variables(struct qib_devdata *dd) dd->eep_st_masks[2].errs_to_log = ERR_MASK(ResetNegated); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR; ppd->link_width_enabled = IB_WIDTH_4X; diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 454c2e7668fe..7dec89fdc124 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1962,10 +1962,7 @@ static irqreturn_t qib_7220intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; - + this_cpu_inc(*dd->int_counter); if (unlikely(istat & (~QLOGIC_IB_I_BITSEXTANT | QLOGIC_IB_I_GPIO | QLOGIC_IB_I_ERROR))) unlikely_7220_intr(dd, istat); @@ -2120,7 +2117,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd) * isn't set. */ dd->flags &= ~(QIB_INITTED | QIB_PRESENT); - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); val = dd->control | QLOGIC_IB_C_RESET; writeq(val, &dd->kregbase[kr_control]); mb(); /* prevent compiler reordering around actual reset */ @@ -4061,7 +4059,9 @@ static int qib_init_7220_variables(struct qib_devdata *dd) init_waitqueue_head(&cpspec->autoneg_wait); INIT_DELAYED_WORK(&cpspec->autoneg_work, autoneg_7220_work); - qib_init_pportdata(ppd, dd, 0, 1); + ret = qib_init_pportdata(ppd, dd, 0, 1); + if (ret) + goto bail; ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_speed_supported = QIB_IB_SDR | QIB_IB_DDR; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index d1bd21319d7d..a7eb32517a04 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -3115,9 +3115,7 @@ static irqreturn_t qib_7322intr(int irq, void *data) goto bail; } - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* handle "errors" of various kinds first, device ahead of port */ if (unlikely(istat & (~QIB_I_BITSEXTANT | QIB_I_GPIO | @@ -3186,9 +3184,7 @@ static irqreturn_t qib_7322pintr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ((1ULL << QIB_I_RCVAVAIL_LSB) | @@ -3215,9 +3211,7 @@ static irqreturn_t qib_7322bufavail(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, QIB_I_SPIOBUFAVAIL); @@ -3248,9 +3242,7 @@ static irqreturn_t sdma_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3277,9 +3269,7 @@ static irqreturn_t sdma_idle_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3306,9 +3296,7 @@ static irqreturn_t sdma_progress_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3336,9 +3324,7 @@ static irqreturn_t sdma_cleanup_intr(int irq, void *data) */ return IRQ_HANDLED; - qib_stats.sps_ints++; - if (dd->int_counter != (u32) -1) - dd->int_counter++; + this_cpu_inc(*dd->int_counter); /* Clear the interrupt bit we expect to be set. */ qib_write_kreg(dd, kr_intclear, ppd->hw_pidx ? @@ -3723,7 +3709,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd) dd->pport->cpspec->ibsymdelta = 0; dd->pport->cpspec->iblnkerrdelta = 0; dd->pport->cpspec->ibmalfdelta = 0; - dd->int_counter = 0; /* so we check interrupts work again */ + /* so we check interrupts work again */ + dd->z_int_counter = qib_int_counter(dd); /* * Keep chip from being accessed until we are ready. Use @@ -6557,7 +6544,11 @@ static int qib_init_7322_variables(struct qib_devdata *dd) } dd->num_pports++; - qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + ret = qib_init_pportdata(ppd, dd, pidx, dd->num_pports); + if (ret) { + dd->num_pports--; + goto bail; + } ppd->link_width_supported = IB_WIDTH_1X | IB_WIDTH_4X; ppd->link_width_enabled = IB_WIDTH_4X; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 24e802f4ea2f..5b7aeb224a30 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -130,7 +130,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd) int qib_create_ctxts(struct qib_devdata *dd) { unsigned i; - int ret; int local_node_id = pcibus_to_node(dd->pcidev->bus); if (local_node_id < 0) @@ -145,8 +144,7 @@ int qib_create_ctxts(struct qib_devdata *dd) if (!dd->rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata array, failing\n"); - ret = -ENOMEM; - goto done; + return -ENOMEM; } /* create (one or more) kctxt */ @@ -163,15 +161,14 @@ int qib_create_ctxts(struct qib_devdata *dd) if (!rcd) { qib_dev_err(dd, "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); - ret = -ENOMEM; - goto done; + kfree(dd->rcd); + dd->rcd = NULL; + return -ENOMEM; } rcd->pkeys[0] = QIB_DEFAULT_P_KEY; rcd->seq_cnt = 1; } - ret = 0; -done: - return ret; + return 0; } /* @@ -233,7 +230,7 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, /* * Common code for initializing the physical port structure. */ -void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, +int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, u8 hw_pidx, u8 port) { int size; @@ -243,6 +240,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, spin_lock_init(&ppd->sdma_lock); spin_lock_init(&ppd->lflags_lock); + spin_lock_init(&ppd->cc_shadow_lock); init_waitqueue_head(&ppd->state_wait); init_timer(&ppd->symerr_clear_timer); @@ -250,8 +248,10 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, ppd->symerr_clear_timer.data = (unsigned long)ppd; ppd->qib_wq = NULL; - - spin_lock_init(&ppd->cc_shadow_lock); + ppd->ibport_data.pmastats = + alloc_percpu(struct qib_pma_counters); + if (!ppd->ibport_data.pmastats) + return -ENOMEM; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) goto bail; @@ -299,7 +299,7 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, goto bail_3; } - return; + return 0; bail_3: kfree(ppd->ccti_entries_shadow); @@ -313,7 +313,7 @@ bail_1: bail: /* User is intentionally disabling the congestion control agent */ if (!qib_cc_table_size) - return; + return 0; if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) { qib_cc_table_size = 0; @@ -324,7 +324,7 @@ bail: qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port); - return; + return 0; } static int init_pioavailregs(struct qib_devdata *dd) @@ -525,6 +525,7 @@ static void enable_chip(struct qib_devdata *dd) static void verify_interrupt(unsigned long opaque) { struct qib_devdata *dd = (struct qib_devdata *) opaque; + u64 int_counter; if (!dd) return; /* being torn down */ @@ -533,7 +534,8 @@ static void verify_interrupt(unsigned long opaque) * If we don't have a lid or any interrupts, let the user know and * don't bother checking again. */ - if (dd->int_counter == 0) { + int_counter = qib_int_counter(dd) - dd->z_int_counter; + if (int_counter == 0) { if (!dd->f_intr_fallback(dd)) dev_err(&dd->pcidev->dev, "No interrupts detected, not usable.\n"); @@ -633,6 +635,12 @@ wq_error: return -ENOMEM; } +static void qib_free_pportdata(struct qib_pportdata *ppd) +{ + free_percpu(ppd->ibport_data.pmastats); + ppd->ibport_data.pmastats = NULL; +} + /** * qib_init - do the actual initialization sequence on the chip * @dd: the qlogic_ib device @@ -920,6 +928,7 @@ static void qib_shutdown_device(struct qib_devdata *dd) destroy_workqueue(ppd->qib_wq); ppd->qib_wq = NULL; } + qib_free_pportdata(ppd); } qib_update_eeprom_log(dd); @@ -1079,9 +1088,34 @@ void qib_free_devdata(struct qib_devdata *dd) #ifdef CONFIG_DEBUG_FS qib_dbg_ibdev_exit(&dd->verbs_dev); #endif + free_percpu(dd->int_counter); ib_dealloc_device(&dd->verbs_dev.ibdev); } +u64 qib_int_counter(struct qib_devdata *dd) +{ + int cpu; + u64 int_counter = 0; + + for_each_possible_cpu(cpu) + int_counter += *per_cpu_ptr(dd->int_counter, cpu); + return int_counter; +} + +u64 qib_sps_ints(void) +{ + unsigned long flags; + struct qib_devdata *dd; + u64 sps_ints = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + sps_ints += qib_int_counter(dd); + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return sps_ints; +} + /* * Allocate our primary per-unit data structure. Must be done via verbs * allocator, because the verbs cleanup process both does cleanup and @@ -1097,14 +1131,10 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) int ret; dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra); - if (!dd) { - dd = ERR_PTR(-ENOMEM); - goto bail; - } + if (!dd) + return ERR_PTR(-ENOMEM); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_init(&dd->verbs_dev); -#endif + INIT_LIST_HEAD(&dd->list); idr_preload(GFP_KERNEL); spin_lock_irqsave(&qib_devs_lock, flags); @@ -1121,11 +1151,13 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) if (ret < 0) { qib_early_err(&pdev->dev, "Could not allocate unit ID: error %d\n", -ret); -#ifdef CONFIG_DEBUG_FS - qib_dbg_ibdev_exit(&dd->verbs_dev); -#endif - ib_dealloc_device(&dd->verbs_dev.ibdev); - dd = ERR_PTR(ret); + goto bail; + } + dd->int_counter = alloc_percpu(u64); + if (!dd->int_counter) { + ret = -ENOMEM; + qib_early_err(&pdev->dev, + "Could not allocate per-cpu int_counter\n"); goto bail; } @@ -1139,9 +1171,15 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra) qib_early_err(&pdev->dev, "Could not alloc cpulist info, cpu affinity might be wrong\n"); } - -bail: +#ifdef CONFIG_DEBUG_FS + qib_dbg_ibdev_init(&dd->verbs_dev); +#endif return dd; +bail: + if (!list_empty(&dd->list)) + list_del_init(&dd->list); + ib_dealloc_device(&dd->verbs_dev.ibdev); + return ERR_PTR(ret);; } /* diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index ccb119143d20..edad991d60ed 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -1634,6 +1634,23 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp, return reply((struct ib_smp *)pmp); } +static void qib_snapshot_pmacounters( + struct qib_ibport *ibp, + struct qib_pma_counters *pmacounters) +{ + struct qib_pma_counters *p; + int cpu; + + memset(pmacounters, 0, sizeof(*pmacounters)); + for_each_possible_cpu(cpu) { + p = per_cpu_ptr(ibp->pmastats, cpu); + pmacounters->n_unicast_xmit += p->n_unicast_xmit; + pmacounters->n_unicast_rcv += p->n_unicast_rcv; + pmacounters->n_multicast_xmit += p->n_multicast_xmit; + pmacounters->n_multicast_rcv += p->n_multicast_rcv; + } +} + static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct ib_device *ibdev, u8 port) { @@ -1642,6 +1659,7 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; u8 port_select = p->port_select; memset(pmp->data, 0, sizeof(pmp->data)); @@ -1664,10 +1682,17 @@ static int pma_get_portcounters_ext(struct ib_pma_mad *pmp, p->port_rcv_data = cpu_to_be64(rwords); p->port_xmit_packets = cpu_to_be64(spkts); p->port_rcv_packets = cpu_to_be64(rpkts); - p->port_unicast_xmit_packets = cpu_to_be64(ibp->n_unicast_xmit); - p->port_unicast_rcv_packets = cpu_to_be64(ibp->n_unicast_rcv); - p->port_multicast_xmit_packets = cpu_to_be64(ibp->n_multicast_xmit); - p->port_multicast_rcv_packets = cpu_to_be64(ibp->n_multicast_rcv); + + qib_snapshot_pmacounters(ibp, &pma); + + p->port_unicast_xmit_packets = cpu_to_be64(pma.n_unicast_xmit + - ibp->z_unicast_xmit); + p->port_unicast_rcv_packets = cpu_to_be64(pma.n_unicast_rcv + - ibp->z_unicast_rcv); + p->port_multicast_xmit_packets = cpu_to_be64(pma.n_multicast_xmit + - ibp->z_multicast_xmit); + p->port_multicast_rcv_packets = cpu_to_be64(pma.n_multicast_rcv + - ibp->z_multicast_rcv); bail: return reply((struct ib_smp *) pmp); @@ -1795,6 +1820,7 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, struct qib_ibport *ibp = to_iport(ibdev, port); struct qib_pportdata *ppd = ppd_from_ibp(ibp); u64 swords, rwords, spkts, rpkts, xwait; + struct qib_pma_counters pma; qib_snapshot_counters(ppd, &swords, &rwords, &spkts, &rpkts, &xwait); @@ -1810,17 +1836,19 @@ static int pma_set_portcounters_ext(struct ib_pma_mad *pmp, if (p->counter_select & IB_PMA_SELX_PORT_RCV_PACKETS) ibp->z_port_rcv_packets = rpkts; + qib_snapshot_pmacounters(ibp, &pma); + if (p->counter_select & IB_PMA_SELX_PORT_UNI_XMIT_PACKETS) - ibp->n_unicast_xmit = 0; + ibp->z_unicast_xmit = pma.n_unicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_UNI_RCV_PACKETS) - ibp->n_unicast_rcv = 0; + ibp->z_unicast_rcv = pma.n_unicast_rcv; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_XMIT_PACKETS) - ibp->n_multicast_xmit = 0; + ibp->z_multicast_xmit = pma.n_multicast_xmit; if (p->counter_select & IB_PMA_SELX_PORT_MULTI_RCV_PACKETS) - ibp->n_multicast_rcv = 0; + ibp->z_multicast_rcv = pma.n_multicast_rcv; return pma_get_portcounters_ext(pmp, ibdev, port); } diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c index e6687ded8210..9bbb55347cc1 100644 --- a/drivers/infiniband/hw/qib/qib_mr.c +++ b/drivers/infiniband/hw/qib/qib_mr.c @@ -232,8 +232,8 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct qib_mr *mr; struct ib_umem *umem; - struct ib_umem_chunk *chunk; - int n, m, i; + struct scatterlist *sg; + int n, m, entry; struct ib_mr *ret; if (length == 0) { @@ -246,9 +246,7 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (IS_ERR(umem)) return (void *) umem; - n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) - n += chunk->nents; + n = umem->nmap; mr = alloc_mr(n, pd); if (IS_ERR(mr)) { @@ -268,11 +266,10 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.page_shift = ilog2(umem->page_size); m = 0; n = 0; - list_for_each_entry(chunk, &umem->chunk_list, list) { - for (i = 0; i < chunk->nents; i++) { + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { void *vaddr; - vaddr = page_address(sg_page(&chunk->page_list[i])); + vaddr = page_address(sg_page(sg)); if (!vaddr) { ret = ERR_PTR(-EINVAL); goto bail; @@ -284,7 +281,6 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, m++; n = 0; } - } } ret = &mr->ibmr; diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3ab341320ead..2f2501890c4e 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -752,7 +752,7 @@ void qib_send_rc_ack(struct qib_qp *qp) qib_flush_wc(); qib_sendbuf_done(dd, pbufn); - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); goto done; queue_ack: diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 357b6cfcd46c..4c07a8b34ffe 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -703,6 +703,7 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr, ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(qp->remote_qpn); ohdr->bth[2] = cpu_to_be32(bth2); + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } /** diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 3ad651c3356c..aaf7039f8ed2 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -280,11 +280,11 @@ int qib_make_ud_req(struct qib_qp *qp) ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) { if (ah_attr->dlid != QIB_PERMISSIVE_LID) - ibp->n_multicast_xmit++; + this_cpu_inc(ibp->pmastats->n_multicast_xmit); else - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); } else { - ibp->n_unicast_xmit++; + this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { /* diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 165aee2ca8a0..d2806cae234c 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -52,6 +52,17 @@ /* attempt to drain the queue for 5secs */ #define QIB_USER_SDMA_DRAIN_TIMEOUT 500 +/* + * track how many times a process open this driver. + */ +static struct rb_root qib_user_sdma_rb_root = RB_ROOT; + +struct qib_user_sdma_rb_node { + struct rb_node node; + int refcount; + pid_t pid; +}; + struct qib_user_sdma_pkt { struct list_head list; /* list element */ @@ -120,15 +131,60 @@ struct qib_user_sdma_queue { /* dma page table */ struct rb_root dma_pages_root; + struct qib_user_sdma_rb_node *sdma_rb_node; + /* protect everything above... */ struct mutex lock; }; +static struct qib_user_sdma_rb_node * +qib_user_sdma_rb_search(struct rb_root *root, pid_t pid) +{ + struct qib_user_sdma_rb_node *sdma_rb_node; + struct rb_node *node = root->rb_node; + + while (node) { + sdma_rb_node = container_of(node, + struct qib_user_sdma_rb_node, node); + if (pid < sdma_rb_node->pid) + node = node->rb_left; + else if (pid > sdma_rb_node->pid) + node = node->rb_right; + else + return sdma_rb_node; + } + return NULL; +} + +static int +qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new) +{ + struct rb_node **node = &(root->rb_node); + struct rb_node *parent = NULL; + struct qib_user_sdma_rb_node *got; + + while (*node) { + got = container_of(*node, struct qib_user_sdma_rb_node, node); + parent = *node; + if (new->pid < got->pid) + node = &((*node)->rb_left); + else if (new->pid > got->pid) + node = &((*node)->rb_right); + else + return 0; + } + + rb_link_node(&new->node, parent, node); + rb_insert_color(&new->node, root); + return 1; +} + struct qib_user_sdma_queue * qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) { struct qib_user_sdma_queue *pq = kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL); + struct qib_user_sdma_rb_node *sdma_rb_node; if (!pq) goto done; @@ -138,6 +194,7 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->num_pending = 0; pq->num_sending = 0; pq->added = 0; + pq->sdma_rb_node = NULL; INIT_LIST_HEAD(&pq->sent); spin_lock_init(&pq->sent_lock); @@ -163,8 +220,30 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) pq->dma_pages_root = RB_ROOT; + sdma_rb_node = qib_user_sdma_rb_search(&qib_user_sdma_rb_root, + current->pid); + if (sdma_rb_node) { + sdma_rb_node->refcount++; + } else { + int ret; + sdma_rb_node = kmalloc(sizeof( + struct qib_user_sdma_rb_node), GFP_KERNEL); + if (!sdma_rb_node) + goto err_rb; + + sdma_rb_node->refcount = 1; + sdma_rb_node->pid = current->pid; + + ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, + sdma_rb_node); + BUG_ON(ret == 0); + } + pq->sdma_rb_node = sdma_rb_node; + goto done; +err_rb: + dma_pool_destroy(pq->header_cache); err_slab: kmem_cache_destroy(pq->pkt_slab); err_kfree: @@ -1020,8 +1099,13 @@ void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq) if (!pq) return; - kmem_cache_destroy(pq->pkt_slab); + pq->sdma_rb_node->refcount--; + if (pq->sdma_rb_node->refcount == 0) { + rb_erase(&pq->sdma_rb_node->node, &qib_user_sdma_rb_root); + kfree(pq->sdma_rb_node); + } dma_pool_destroy(pq->header_cache); + kmem_cache_destroy(pq->pkt_slab); kfree(pq); } @@ -1241,26 +1325,52 @@ static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd, struct qib_user_sdma_queue *pq, struct list_head *pktlist, int count) { - int ret = 0; unsigned long flags; if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE))) return -ECOMM; - spin_lock_irqsave(&ppd->sdma_lock, flags); - - if (unlikely(!__qib_sdma_running(ppd))) { - ret = -ECOMM; - goto unlock; + /* non-blocking mode */ + if (pq->sdma_rb_node->refcount > 1) { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + pq->num_pending += count; + list_splice_tail_init(pktlist, &ppd->sdma_userpending); + qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return 0; } + /* In this case, descriptors from this process are not + * linked to ppd pending queue, interrupt handler + * won't update this process, it is OK to directly + * modify without sdma lock. + */ + + pq->num_pending += count; - list_splice_tail_init(pktlist, &ppd->sdma_userpending); - qib_user_sdma_send_desc(ppd, &ppd->sdma_userpending); + /* + * Blocking mode for single rail process, we must + * release/regain sdma_lock to give other process + * chance to make progress. This is important for + * performance. + */ + do { + spin_lock_irqsave(&ppd->sdma_lock, flags); + if (unlikely(!__qib_sdma_running(ppd))) { + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + return -ECOMM; + } + qib_user_sdma_send_desc(ppd, pktlist); + if (!list_empty(pktlist)) + qib_sdma_make_progress(ppd); + spin_unlock_irqrestore(&ppd->sdma_lock, flags); + } while (!list_empty(pktlist)); -unlock: - spin_unlock_irqrestore(&ppd->sdma_lock, flags); - return ret; + return 0; } int qib_user_sdma_writev(struct qib_ctxtdata *rcd, @@ -1290,7 +1400,7 @@ int qib_user_sdma_writev(struct qib_ctxtdata *rcd, qib_user_sdma_queue_clean(ppd, pq); while (dim) { - int mxp = 8; + int mxp = 1; int ndesc = 0; ret = qib_user_sdma_queue_pkts(dd, ppd, pq, diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 092b0bb1bb78..9bcfbd842980 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -662,7 +662,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid); if (mcast == NULL) goto drop; - ibp->n_multicast_rcv++; + this_cpu_inc(ibp->pmastats->n_multicast_rcv); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); /* @@ -678,8 +678,8 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) &rcd->lookaside_qp->refcount)) wake_up( &rcd->lookaside_qp->wait); - rcd->lookaside_qp = NULL; - } + rcd->lookaside_qp = NULL; + } } if (!rcd->lookaside_qp) { qp = qib_lookup_qpn(ibp, qp_num); @@ -689,7 +689,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) rcd->lookaside_qpn = qp_num; } else qp = rcd->lookaside_qp; - ibp->n_unicast_rcv++; + this_cpu_inc(ibp->pmastats->n_unicast_rcv); qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); } return; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index a01c7d2cf541..bfc8948fdd35 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -664,6 +664,13 @@ struct qib_opcode_stats_perctx { struct qib_opcode_stats stats[128]; }; +struct qib_pma_counters { + u64 n_unicast_xmit; /* total unicast packets sent */ + u64 n_unicast_rcv; /* total unicast packets received */ + u64 n_multicast_xmit; /* total multicast packets sent */ + u64 n_multicast_rcv; /* total multicast packets received */ +}; + struct qib_ibport { struct qib_qp __rcu *qp0; struct qib_qp __rcu *qp1; @@ -680,10 +687,11 @@ struct qib_ibport { __be64 mkey; __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */ u64 tid; /* TID for traps */ - u64 n_unicast_xmit; /* total unicast packets sent */ - u64 n_unicast_rcv; /* total unicast packets received */ - u64 n_multicast_xmit; /* total multicast packets sent */ - u64 n_multicast_rcv; /* total multicast packets received */ + struct qib_pma_counters __percpu *pmastats; + u64 z_unicast_xmit; /* starting count for PMA */ + u64 z_unicast_rcv; /* starting count for PMA */ + u64 z_multicast_xmit; /* starting count for PMA */ + u64 z_multicast_rcv; /* starting count for PMA */ u64 z_symbol_error_counter; /* starting count for PMA */ u64 z_link_error_recovery_counter; /* starting count for PMA */ u64 z_link_downed_counter; /* starting count for PMA */ diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 16755cdab2c0..801a1d6937e4 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -286,7 +286,7 @@ iter_chunk: err = iommu_map(pd->domain, va_start, pa_start, size, flags); if (err) { - usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n", + usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n", va_start, &pa_start, size, err); goto err_out; } |