From 55abf8df0aa080eb474f7f46337503351890b361 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:50 +0000 Subject: RDMA/cxgb4: Abort connections that receive unexpected streaming mode data This error means the RDMA connection was knocked out of RDMA mode, probably due to an error on the connection. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index c13745cde7fa..9cab6a6eb96a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1391,30 +1391,31 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) skb_pull(skb, sizeof(*hdr)); skb_trim(skb, dlen); - ep->rcv_seq += dlen; - BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen)); - /* update RX credits */ update_rx_credits(ep, dlen); switch (state_read(&ep->com)) { case MPA_REQ_SENT: + ep->rcv_seq += dlen; process_mpa_reply(ep, skb); break; case MPA_REQ_WAIT: + ep->rcv_seq += dlen; process_mpa_request(ep, skb); break; - case MPA_REP_SENT: - break; default: pr_err("%s Unexpected streaming data." \ " ep %p state %d tid %u status %d\n", __func__, ep, state_read(&ep->com), ep->hwtid, status); - /* - * The ep will timeout and inform the ULP of the failure. - * See ep_timeout(). - */ + if (ep->com.qp) { + struct c4iw_qp_attributes attrs; + + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + c4iw_ep_disconnect(ep, 1, GFP_KERNEL); break; } return 0; -- cgit v1.2.3 From 91e9c07195032bbde47489b8b423053cff5f413d Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:51 +0000 Subject: RDMA/cxgb4: Abort connections when moving to ERROR state If a FINI operation fails, then we need to ABORT instead of CLOSE. Also, if we ABORT due to unexpected STREAMING data, then wake up anybody blocked in FINI... Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 1 + drivers/infiniband/hw/cxgb4/qp.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 9cab6a6eb96a..3dce47a63709 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1438,6 +1438,7 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb) mutex_lock(&ep->com.mutex); switch (ep->com.state) { case ABORTING: + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); __state_set(&ep->com, DEAD); release = 1; break; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 05bfe53bff64..17ba4f8bc12d 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1383,6 +1383,7 @@ err: qhp->ep = NULL; set_state(qhp, C4IW_QP_STATE_ERROR); free = 1; + abort = 1; wake_up(&qhp->wait); BUG_ON(!ep); flush_qp(qhp); -- cgit v1.2.3 From 1557967bf921e787f0c9236c2899603d85f44d31 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:52 +0000 Subject: RDMA/cxgb4: Display streaming mode error only if detected in RTS With later firmware, the chances of getting streaming mode data after we exit RTS is likely, so we don't need to warn for it. The only real case where we don't expect it is when the QP is in RTS. Move QP to ERROR when streaming mode data received. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3dce47a63709..31d1fac605d3 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1403,21 +1403,23 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) ep->rcv_seq += dlen; process_mpa_request(ep, skb); break; - default: - pr_err("%s Unexpected streaming data." \ - " ep %p state %d tid %u status %d\n", - __func__, ep, state_read(&ep->com), ep->hwtid, status); - - if (ep->com.qp) { - struct c4iw_qp_attributes attrs; - - attrs.next_state = C4IW_QP_STATE_ERROR; - c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); - } + case FPDU_MODE: { + struct c4iw_qp_attributes attrs; + BUG_ON(!ep->com.qp); + if (ep->com.qp->attr.state == C4IW_QP_STATE_RTS) + pr_err("%s Unexpected streaming data." \ + " ep %p state %d tid %u status %d\n", + __func__, ep, state_read(&ep->com), + ep->hwtid, status); + attrs.next_state = C4IW_QP_STATE_ERROR; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); c4iw_ep_disconnect(ep, 1, GFP_KERNEL); break; } + default: + break; + } return 0; } -- cgit v1.2.3 From 325abead6cc7ef50572c53b1adc4d2442234b50f Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:53 +0000 Subject: RDMA/cxgb4: Keep QP referenced until TID released The driver is currently releasing the last ref on the QP too early. This can cause bus errors due to HW still fetching WRs from the HW queue. The fix is to keep a qp ref until we release the HW TID. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 20 ++++++++++++++++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 31d1fac605d3..ebcdb3ff0cf4 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -143,6 +143,18 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status); static LIST_HEAD(timeout_list); static spinlock_t timeout_lock; +static void deref_qp(struct c4iw_ep *ep) +{ + c4iw_qp_rem_ref(&ep->com.qp->ibqp); + clear_bit(QP_REFERENCED, &ep->com.flags); +} + +static void ref_qp(struct c4iw_ep *ep) +{ + set_bit(QP_REFERENCED, &ep->com.flags); + c4iw_qp_add_ref(&ep->com.qp->ibqp); +} + static void start_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p\n", __func__, ep); @@ -271,6 +283,8 @@ void _c4iw_free_ep(struct kref *kref) ep = container_of(kref, struct c4iw_ep, com.kref); PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]); + if (test_bit(QP_REFERENCED, &ep->com.flags)) + deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); @@ -863,7 +877,6 @@ static void close_complete_upcall(struct c4iw_ep *ep) ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; set_bit(CLOSE_UPCALL, &ep->com.history); } } @@ -906,7 +919,6 @@ static void peer_abort_upcall(struct c4iw_ep *ep) ep->com.cm_id->event_handler(ep->com.cm_id, &event); ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; set_bit(ABORT_UPCALL, &ep->com.history); } } @@ -946,7 +958,6 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status) if (status < 0) { ep->com.cm_id->rem_ref(ep->com.cm_id); ep->com.cm_id = NULL; - ep->com.qp = NULL; } } @@ -2434,6 +2445,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_id->add_ref(cm_id); ep->com.cm_id = cm_id; ep->com.qp = qp; + ref_qp(ep); /* bind QP to EP and move to RTS */ attrs.mpa_attr = ep->mpa_attr; @@ -2464,7 +2476,6 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) return 0; err1: ep->com.cm_id = NULL; - ep->com.qp = NULL; cm_id->rem_ref(cm_id); err: c4iw_put_ep(&ep->com); @@ -2505,6 +2516,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.cm_id = cm_id; ep->com.qp = get_qhp(dev, conn_param->qpn); BUG_ON(!ep->com.qp); + ref_qp(ep); PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn, ep->com.qp, cm_id); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 9c1644fb0259..0aaaa0e81f29 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -716,6 +716,7 @@ enum c4iw_ep_flags { ABORT_REQ_IN_PROGRESS = 1, RELEASE_RESOURCES = 2, CLOSE_SENT = 3, + QP_REFERENCED = 5, }; enum c4iw_ep_history { -- cgit v1.2.3 From 04236df2a5b1e9881c90f2d1f63fbee3659297a7 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:54 +0000 Subject: RDMA/cxgb4: Always log async errors Log AEs even if the QP isn't in RTS. It is useful information. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 6 +++--- drivers/infiniband/hw/cxgb4/ev.c | 8 +++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index ebcdb3ff0cf4..5989991e31a1 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1419,9 +1419,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) BUG_ON(!ep->com.qp); if (ep->com.qp->attr.state == C4IW_QP_STATE_RTS) pr_err("%s Unexpected streaming data." \ - " ep %p state %d tid %u status %d\n", - __func__, ep, state_read(&ep->com), - ep->hwtid, status); + " qpid %u ep %p state %d tid %u status %d\n", + __func__, ep->com.qp->wq.sq.qid, ep, + state_read(&ep->com), ep->hwtid, status); attrs.next_state = C4IW_QP_STATE_ERROR; c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index cf2f6b47617a..1a840b2211dd 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -46,9 +46,11 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp, if ((qhp->attr.state == C4IW_QP_STATE_ERROR) || (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) { - PDBG("%s AE received after RTS - " - "qp state %d qpid 0x%x status 0x%x\n", __func__, - qhp->attr.state, qhp->wq.sq.qid, CQE_STATUS(err_cqe)); + pr_err("%s AE after RTS - qpid 0x%x opcode %d status 0x%x "\ + "type %d wrid.hi 0x%x wrid.lo 0x%x\n", + __func__, CQE_QPID(err_cqe), CQE_OPCODE(err_cqe), + CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), + CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe)); return; } -- cgit v1.2.3 From e8e5b9278ba0502ada73b8b94b8498cc19def743 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:55 +0000 Subject: RDMA/cxgb4: Only log rx_data warnings if cpl status is non-zero With newer firmware, we can get streaming data due to connection errors before the driver moves the QP out of RTS. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 5989991e31a1..51ceb618beb2 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1417,7 +1417,7 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb) case FPDU_MODE: { struct c4iw_qp_attributes attrs; BUG_ON(!ep->com.qp); - if (ep->com.qp->attr.state == C4IW_QP_STATE_RTS) + if (status) pr_err("%s Unexpected streaming data." \ " qpid %u ep %p state %d tid %u status %d\n", __func__, ep->com.qp->wq.sq.qid, ep, -- cgit v1.2.3 From 1ec779cc29238e6f4d315bff53cd36165819bfd5 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:56 +0000 Subject: RDMA/cxgb4: Fix endpoint timeout race condition The endpoint timeout logic had a race that could cause an endpoint object to be freed while it was still on the timedout list. This can happen if the timer is stopped after it had fired, but before the timedout thread processed the endpoint timeout. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 29 ++++++++++++++++------------- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 1 + 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 51ceb618beb2..ab5b4dd39dec 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -159,10 +159,12 @@ static void start_ep_timer(struct c4iw_ep *ep) { PDBG("%s ep %p\n", __func__, ep); if (timer_pending(&ep->timer)) { - PDBG("%s stopped / restarted timer ep %p\n", __func__, ep); - del_timer_sync(&ep->timer); - } else - c4iw_get_ep(&ep->com); + pr_err("%s timer already started! ep %p\n", + __func__, ep); + return; + } + clear_bit(TIMEOUT, &ep->com.flags); + c4iw_get_ep(&ep->com); ep->timer.expires = jiffies + ep_timeout_secs * HZ; ep->timer.data = (unsigned long)ep; ep->timer.function = ep_timeout; @@ -171,14 +173,10 @@ static void start_ep_timer(struct c4iw_ep *ep) static void stop_ep_timer(struct c4iw_ep *ep) { - PDBG("%s ep %p\n", __func__, ep); - if (!timer_pending(&ep->timer)) { - WARN(1, "%s timer stopped when its not running! " - "ep %p state %u\n", __func__, ep, ep->com.state); - return; - } + PDBG("%s ep %p stopping\n", __func__, ep); del_timer_sync(&ep->timer); - c4iw_put_ep(&ep->com); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) + c4iw_put_ep(&ep->com); } static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb, @@ -3191,11 +3189,16 @@ static DECLARE_WORK(skb_work, process_work); static void ep_timeout(unsigned long arg) { struct c4iw_ep *ep = (struct c4iw_ep *)arg; + int kickit = 0; spin_lock(&timeout_lock); - list_add_tail(&ep->entry, &timeout_list); + if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { + list_add_tail(&ep->entry, &timeout_list); + kickit = 1; + } spin_unlock(&timeout_lock); - queue_work(workq, &skb_work); + if (kickit) + queue_work(workq, &skb_work); } /* diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 0aaaa0e81f29..94a3b3c47a8a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -716,6 +716,7 @@ enum c4iw_ep_flags { ABORT_REQ_IN_PROGRESS = 1, RELEASE_RESOURCES = 2, CLOSE_SENT = 3, + TIMEOUT = 4, QP_REFERENCED = 5, }; -- cgit v1.2.3 From fe7e0a4dd0304745b57e08827fde13e1c2376e66 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:57 +0000 Subject: RDMA/cxgb4: Don't reconnect on abort for mpa_rev 1 Only reconnect if the endpoint wasn't freed. peer_abort() should only attempt to reconnect if the endpoint wasn't freed. Also remove hwtid from the debugfs idr. Add missing check for peer2peer in MPAv2 code Use correct mpa version on reject. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index ab5b4dd39dec..88933af05c5c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -284,10 +284,10 @@ void _c4iw_free_ep(struct kref *kref) if (test_bit(QP_REFERENCED, &ep->com.flags)) deref_qp(ep); if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); - remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); } kfree(ep); } @@ -699,7 +699,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) memset(mpa, 0, sizeof(*mpa)); memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); mpa->flags = MPA_REJECT; - mpa->revision = mpa_rev; + mpa->revision = ep->mpa_attr.version; mpa->private_data_size = htons(plen); if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { @@ -2176,7 +2176,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: stop_ep_timer(ep); - if (mpa_rev == 2 && ep->tried_with_mpa_v1) + if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* @@ -2248,9 +2248,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) out: if (release) release_ep_resources(ep); - - /* retry with mpa-v1 */ - if (ep && ep->retry_with_mpa_v1) { + else if (ep->retry_with_mpa_v1) { + remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid); cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); dst_release(ep->dst); cxgb4_l2t_release(ep->l2t); -- cgit v1.2.3 From 7c0a33d61187a413f29f63d106b503b9c91680e8 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:58 +0000 Subject: RDMA/cxgb4: Don't wakeup threads for MPAv2 Don't wakeup threads blocked in rdma_init/rdma_fini if we are on MPAv2, and want to retry connection with MPAv1. Stop ep-timer on getting MPA version mismatch, before doing the abort_connection - in process_mpa_request. Take care to stop ep-timer in error paths for process_mpa_request. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 88933af05c5c..06c3a527d6f8 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1300,11 +1300,13 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) if (mpa->revision > mpa_rev) { printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d," " Received = %d\n", __func__, mpa_rev, mpa->revision); + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1315,6 +1317,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * Fail if there's too much private data. */ if (plen > MPA_MAX_PRIVATE_DATA) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -1323,6 +1326,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) * If plen does not account for pkt size */ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { + stop_ep_timer(ep); abort_connection(ep, skb, GFP_KERNEL); return; } @@ -3286,8 +3290,14 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) /* * Wake up any threads in rdma_init() or rdma_fini(). + * However, if we are on MPAv2 and want to retry with MPAv1 + * then, don't wake up yet. */ - c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + if (mpa_rev == 2 && !ep->tried_with_mpa_v1) { + if (ep->com.state != MPA_REQ_SENT) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); + } else + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); sched(dev, skb); return 0; } -- cgit v1.2.3 From b3de6cfebc6167761c40947f05f4c817531f37d5 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:11:59 +0000 Subject: RDMA/cxgb4: Insert hwtid in pass_accept_req instead in pass_establish CPL_ABORT_REQ_RSS can come before TCP connection is established. In such case peer_abort was trying to remove the hwtid, which was not inserted. To avoid this we insert the hwtid when we are sure that we are surely going to send passive accept request. Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 06c3a527d6f8..37ea2fcf3b10 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2010,6 +2010,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); + insert_handle(dev, &dev->hwtid_idr, child_ep, child_ep->hwtid); accept_cr(child_ep, peer_ip, skb, req); set_bit(PASS_ACCEPT_REQ, &child_ep->com.history); goto out; @@ -2035,7 +2036,6 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb) ntohs(req->tcp_opt)); set_emss(ep, ntohs(req->tcp_opt)); - insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid); dst_confirm(ep->dst); state_set(&ep->com, MPA_REQ_WAIT); -- cgit v1.2.3 From ef5d6355ed4bcf574e8473c3ce667cbf6c66a0ee Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 7 Jan 2013 13:12:00 +0000 Subject: RDMA/cxgb4: Address sparse warnings Fixe the following types of sparse warnings - cast to pointer from integer of different size - cast from pointer to integer of different size - incorrect type in assignment (different base types) - incorrect type in argument 1 (different base types) - cast from restricted __be64 - cast from restricted __be32 Signed-off-by: Vipul Pandya Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 68 ++++++++++++++++++++---------------- drivers/infiniband/hw/cxgb4/device.c | 3 +- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 37ea2fcf3b10..dfca515cc933 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1492,11 +1492,11 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) V_FW_OFLD_CONNECTION_WR_ASTID(atid)); req->tcb.cplrxdataack_cplpassacceptrpl = htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK); - req->tcb.tx_max = jiffies; + req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); wscale = compute_wscale(rcv_win); - req->tcb.opt0 = TCAM_BYPASS(1) | + req->tcb.opt0 = (__force __be64) (TCAM_BYPASS(1) | (nocong ? NO_CONG(1) : 0) | KEEP_ALIVE(1) | DELACK(1) | @@ -1507,20 +1507,20 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) SMAC_SEL(ep->smac_idx) | DSCP(ep->tos) | ULP_MODE(ULP_MODE_TCPDDP) | - RCV_BUFSIZ(rcv_win >> 10); - req->tcb.opt2 = PACE(1) | + RCV_BUFSIZ(rcv_win >> 10)); + req->tcb.opt2 = (__force __be32) (PACE(1) | TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) | RX_CHANNEL(0) | CCTRL_ECN(enable_ecn) | - RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); + RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid)); if (enable_tcp_timestamps) - req->tcb.opt2 |= TSTAMPS_EN(1); + req->tcb.opt2 |= (__force __be32) TSTAMPS_EN(1); if (enable_tcp_sack) - req->tcb.opt2 |= SACK_EN(1); + req->tcb.opt2 |= (__force __be32) SACK_EN(1); if (wscale && enable_tcp_window_scaling) - req->tcb.opt2 |= WND_SCALE_EN(1); - req->tcb.opt0 = cpu_to_be64(req->tcb.opt0); - req->tcb.opt2 = cpu_to_be32(req->tcb.opt2); + req->tcb.opt2 |= (__force __be32) WND_SCALE_EN(1); + req->tcb.opt0 = cpu_to_be64((__force u64) req->tcb.opt0); + req->tcb.opt2 = cpu_to_be32((__force u32) req->tcb.opt2); set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx); set_bit(ACT_OFLD_CONN, &ep->com.history); c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); @@ -2773,7 +2773,8 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, struct c4iw_ep *ep; int atid = be32_to_cpu(req->tid); - ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid); + ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, + (__force u32) req->tid); if (!ep) return; @@ -2817,7 +2818,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, struct cpl_pass_accept_req *cpl; int ret; - rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie); + rpl_skb = (__force struct sk_buff *)cpu_to_be64(req->cookie); BUG_ON(!rpl_skb); if (req->retval) { PDBG("%s passive open failure %d\n", __func__, req->retval); @@ -2828,7 +2829,8 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, } else { cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb); OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, - htonl(req->tid))); + (__force u32) htonl( + (__force u32) req->tid))); ret = pass_accept_req(dev, rpl_skb); if (!ret) kfree_skb(rpl_skb); @@ -2874,10 +2876,10 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) struct tcp_options_received tmp_opt; /* Store values from cpl_rx_pkt in temporary location. */ - vlantag = cpl->vlan; - len = cpl->len; - l2info = cpl->l2info; - hdr_len = cpl->hdr_len; + vlantag = (__force u16) cpl->vlan; + len = (__force u16) cpl->len; + l2info = (__force u32) cpl->l2info; + hdr_len = (__force u16) cpl->hdr_len; intf = cpl->iff; __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header)); @@ -2888,19 +2890,24 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos) */ memset(&tmp_opt, 0, sizeof(tmp_opt)); tcp_clear_options(&tmp_opt); - tcp_parse_options(skb, &tmp_opt, 0, 0, NULL); + tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL); req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req)); memset(req, 0, sizeof(*req)); req->l2info = cpu_to_be16(V_SYN_INTF(intf) | - V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) | + V_SYN_MAC_IDX(G_RX_MACIDX( + (__force int) htonl(l2info))) | F_SYN_XACT_MATCH); - req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) | - V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) | - V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) | - V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info)))); - req->vlan = vlantag; - req->len = len; + req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN( + (__force int) htonl(l2info))) | + V_TCP_HDR_LEN(G_RX_TCPHDR_LEN( + (__force int) htons(hdr_len))) | + V_IP_HDR_LEN(G_RX_IPHDR_LEN( + (__force int) htons(hdr_len))) | + V_ETH_HDR_LEN(G_RX_ETHHDR_LEN( + (__force int) htonl(l2info)))); + req->vlan = (__force __be16) vlantag; + req->len = (__force __be16) len; req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) | PASS_OPEN_TOS(tos)); req->tcpopt.mss = htons(tmp_opt.mss_clamp); @@ -2929,7 +2936,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1)); req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16))); req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL); - req->le.filter = filter; + req->le.filter = (__force __be32) filter; req->le.lport = lport; req->le.pport = rport; req->le.u.ipv4.lip = laddr; @@ -2955,7 +2962,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TP will ignore any value > 0 for MSS index. */ req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); - req->cookie = cpu_to_be64((u64)skb); + req->cookie = (__force __u64) cpu_to_be64((u64)skb); set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); @@ -3005,7 +3012,8 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) /* * Calculate the server tid from filter hit index from cpl_rx_pkt. */ - stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base + stid = (__force int) cpu_to_be32((__force u32) rss->hash_val) + - dev->rdev.lldi.tids->sftid_base + dev->rdev.lldi.tids->nstids; lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid); @@ -3066,10 +3074,10 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step]; - window = htons(tcph->window); + window = (__force u16) htons((__force u16)tcph->window); /* Calcuate filter portion for LE region. */ - filter = cpu_to_be32(select_ntuple(dev, dst, e)); + filter = (__force unsigned int) cpu_to_be32(select_ntuple(dev, dst, e)); /* * Synthesize the cpl_pass_accept_req. We have everything except the diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index ba11c76c0b5a..dc6adb213cd6 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -797,7 +797,8 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, "RSS %#llx, FL %#llx, len %u\n", pci_name(ctx->lldi.pdev), gl->va, (unsigned long long)be64_to_cpu(*rsp), - (unsigned long long)be64_to_cpu(*(u64 *)gl->va), + (unsigned long long)be64_to_cpu( + *(__force __be64 *)gl->va), gl->tot_len); return 0; -- cgit v1.2.3 From 710a31102be46ffc2f087119dca19c894dc237eb Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 5 Feb 2013 20:51:30 +0000 Subject: RDMA/cxgb4: "cookie" can stay in host endianness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Work requests are passed between the host and the firmware with a "cookie". This cookie is swapped to big-endian when passed to the firmware and back to host endianness on return. This swapping seems to be implemented incorrectly. Moreover, the byte swapping triggers GCC warnings on 32 bit: drivers/infiniband/hw/cxgb4/cm.c: In function ‘passive_ofld_conn_reply’: drivers/infiniband/hw/cxgb4/cm.c:2803:12: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] drivers/infiniband/hw/cxgb4/cm.c: In function ‘send_fw_pass_open_req’: drivers/infiniband/hw/cxgb4/cm.c:2941:16: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] [...] But byte swapping isn't needed as the firmware doesn't actually touch the cookie. Dropping byte swapping makes the warnings go away too. Signed-off-by: Paul Bolle Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index dfca515cc933..565bfb161c1a 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2818,7 +2818,7 @@ static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, struct cpl_pass_accept_req *cpl; int ret; - rpl_skb = (__force struct sk_buff *)cpu_to_be64(req->cookie); + rpl_skb = (struct sk_buff *)(unsigned long)req->cookie; BUG_ON(!rpl_skb); if (req->retval) { PDBG("%s passive open failure %d\n", __func__, req->retval); @@ -2962,7 +2962,7 @@ static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb, * TP will ignore any value > 0 for MSS index. */ req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF)); - req->cookie = (__force __u64) cpu_to_be64((u64)skb); + req->cookie = (unsigned long)skb; set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id); cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb); -- cgit v1.2.3 From bcc9b67a5b65ec2e1ec5371226a729ec1b380860 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 7 Feb 2013 20:47:51 +0000 Subject: IB/qib: Fix QP locate/remove race remove_qp() can execute concurrently with a qib_lookup_qpn() on another CPU, which in of itself, is ok, given the RCU locking. The issue is that remove_qp() NULLs out the qp->next field so that a qib_lookup_qpn() might fail to find a qp if it occurs after the one that is being deleted. This is a momentary issue and subsequent qib_lookup_qpn() calls would find the qp's since the search restarts from the bucket head. At scale, the issue might causes dropped packets and unnecessary retransmissions. The fix just deletes the qp->next NULL assignment to prevent the remove_qp() from hiding qp's from qib_lookup_qpn(). Reviewed-by: Dean Luick Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_qp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 35275099cafd..a6a2cc2ba260 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -268,8 +268,9 @@ static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) qpp = &q->next) if (q == qp) { atomic_dec(&qp->refcount); - *qpp = qp->next; - rcu_assign_pointer(qp->next, NULL); + rcu_assign_pointer(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&dev->qpt_lock))); break; } } -- cgit v1.2.3 From b23523d8585906c0fb7c78616e3deca18c7fcce2 Mon Sep 17 00:00:00 2001 From: Stefan Hasko Date: Sat, 22 Dec 2012 02:29:21 +0000 Subject: RDMA/cxgb4: Fix cast warning Fix compile warning about cast to pointer from integer of different size. Signed-off-by: Stefan Hasko Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index dc6adb213cd6..80069ad595c1 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -533,7 +533,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev) PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p qpshift %lu " "qpmask 0x%x cqshift %lu cqmask 0x%x\n", (unsigned)pci_resource_len(rdev->lldi.pdev, 2), - (void *)pci_resource_start(rdev->lldi.pdev, 2), + (void *)(unsigned long)pci_resource_start(rdev->lldi.pdev, 2), rdev->lldi.db_reg, rdev->lldi.gts_reg, rdev->qpshift, rdev->qpmask, -- cgit v1.2.3 From 8ab10f75372fc59b95d96af40b9a3b4c2a0da059 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 10 Oct 2012 13:10:43 +0000 Subject: RDMA/amso1100: Use module_pci_driver() to simplify the code Use the module_pci_driver() macro to make the code simpler by eliminating module_init and module_exit calls. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Reviewed-by: Steve WIse Signed-off-by: Roland Dreier --- drivers/infiniband/hw/amso1100/c2.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 7275e727e0f5..d53cf519f42a 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -1238,15 +1238,4 @@ static struct pci_driver c2_pci_driver = { .remove = c2_remove, }; -static int __init c2_init_module(void) -{ - return pci_register_driver(&c2_pci_driver); -} - -static void __exit c2_exit_module(void) -{ - pci_unregister_driver(&c2_pci_driver); -} - -module_init(c2_init_module); -module_exit(c2_exit_module); +module_pci_driver(c2_pci_driver); -- cgit v1.2.3 From cab66d1273e6490603cf5256155584d38cecca68 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 4 Feb 2013 11:22:36 +0000 Subject: IB/mlx4: Fix bug unwinding on error in mlx4_ib_init_sriov() We have to decrement "i" before calling mlx4_ib_free_demux_ctx() or we free something that wasn't allocated. That's fine for free_pv_object() but it would lead to a NULL dereference calling mlx4_ib_free_demux_ctx(). The null dereference is because ->tun is NULL when we check: if (!ctx->tun[i]) Also we didn't free ->sriov.demux[0] so it was a small leak. Signed-off-by: Dan Carpenter Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 0a903c129f0a..934792c477bc 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1999,16 +1999,17 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) goto demux_err; err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); if (err) - goto demux_err; + goto free_pv; } mlx4_ib_master_tunnels(dev, 1); return 0; +free_pv: + free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); demux_err: - while (i > 0) { + while (--i >= 0) { free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); - --i; } mlx4_ib_device_unregister_sysfs(dev); -- cgit v1.2.3 From 6950a235b86cf4e73d2b8e5476e7d0eb8f61af63 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 21 Jan 2013 13:02:58 +0000 Subject: IB/mlx4: Adjust duplicate test Delete successive tests to the same location. The code tested the result of a previous allocation, that itself was already tested. It is changed to test the result of the most recent allocation. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @s exists@ local idexpression y; expression x,e; @@ *if ( \(x == NULL\|IS_ERR(x)\|y != 0\) ) { ... when forall return ...; } ... when != \(y = e\|y += e\|y -= e\|y |= e\|y &= e\|y++\|y--\|&y\) when != \(XT_GETPAGE(...,y)\|WMI_CMD_BUF(...)\) *if ( \(x == NULL\|IS_ERR(x)\|y != 0\) ) { ... when forall return ...; } // Signed-off-by: Julia Lawall Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 5b2a01dfb907..97516eb363b7 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -732,7 +732,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) dev->ports_parent = kobject_create_and_add("ports", kobject_get(dev->iov_parent)); - if (!dev->iov_parent) { + if (!dev->ports_parent) { ret = -ENOMEM; goto err_ports; } -- cgit v1.2.3 From 9d1ad66e3eae0faf3f19a618da74b4c377474845 Mon Sep 17 00:00:00 2001 From: Shlomo Pongratz Date: Tue, 19 Feb 2013 15:40:22 +0000 Subject: IPoIB: Fix ipoib_neigh hashing to use the correct daddr octets The hash function introduced in commit b63b70d877 ("IPoIB: Use a private hash table for path lookup in xmit path") was designd to use the 3 octets of the IPoIB HW address that holds the remote QPN. However, this currently isn't the case on little-endian machines, because the the code there uses the flags part (octet[0]) and not the last octet of the QPN (octet[3]). Fix this. The fix caused a checkpatch warning on line over 80 characters, to solve that changed the name of the temp variable that holds the daddr. Signed-off-by: Shlomo Pongratz Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 07ca6fd5546b..a7ac0977cb5e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -117,6 +117,8 @@ enum { #define IPOIB_OP_CM (0) #endif +#define IPOIB_QPN_MASK ((__force u32) cpu_to_be32(0xFFFFFF)) + /* structs */ struct ipoib_header { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 6fdc9e78da0d..a68628e4197a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -844,10 +844,10 @@ static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) * different subnets. */ /* qpn octets[1:4) & port GUID octets[12:20) */ - u32 *daddr_32 = (u32 *) daddr; + u32 *d32 = (u32 *) daddr; u32 hv; - hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0); + hv = jhash_3words(d32[3], d32[4], IPOIB_QPN_MASK & d32[0], 0); return hv & htbl->mask; } -- cgit v1.2.3 From 4b48680b5572ee78834c276548e16ac5316908cb Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Tue, 19 Feb 2013 15:40:23 +0000 Subject: IPoIB: Add version and firmware info to ethtool reporting Implement version info as well as report firmware version and bus info of the underlying IB HW device. Signed-off-by: Yan Burman Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib.h | 2 ++ drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 19 ++++++++++++++++++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 5 +++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a7ac0977cb5e..eb71aaa26a9a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -762,4 +762,6 @@ extern int ipoib_debug_level; #define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff) +extern const char ipoib_driver_version[]; + #endif /* _IPOIB_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 29bc7b5724ac..c4b3940845e6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -39,7 +39,24 @@ static void ipoib_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - strncpy(drvinfo->driver, "ipoib", sizeof(drvinfo->driver) - 1); + struct ipoib_dev_priv *priv = netdev_priv(netdev); + struct ib_device_attr *attr; + + attr = kmalloc(sizeof(*attr), GFP_KERNEL); + if (attr && !ib_query_device(priv->ca, attr)) + snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), + "%d.%d.%d", (int)(attr->fw_ver >> 32), + (int)(attr->fw_ver >> 16) & 0xffff, + (int)attr->fw_ver & 0xffff); + kfree(attr); + + strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device), + sizeof(drvinfo->bus_info)); + + strlcpy(drvinfo->version, ipoib_driver_version, + sizeof(drvinfo->version)); + + strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); } static int ipoib_get_coalesce(struct net_device *dev, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a68628e4197a..4fe44eba2f9f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -49,9 +49,14 @@ #include #include +#define DRV_VERSION "1.0.0" + +const char ipoib_driver_version[] = DRV_VERSION; + MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(DRV_VERSION); int ipoib_sendq_size __read_mostly = IPOIB_TX_RING_SIZE; int ipoib_recvq_size __read_mostly = IPOIB_RX_RING_SIZE; -- cgit v1.2.3 From 5a2815f03c0fd5a091c95af93b7f1a17a971ac20 Mon Sep 17 00:00:00 2001 From: Itai Garbi Date: Tue, 19 Feb 2013 15:40:24 +0000 Subject: IPoIB: Don't attempt to release resources on error flow If the ipoib client info isn't found on the _remove_one callback, we must not attempt to scan the returned null list. Found by Coverity. Signed-off-by: Itai Garbi Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 4fe44eba2f9f..66d6da90982f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1693,6 +1693,8 @@ static void ipoib_remove_one(struct ib_device *device) return; dev_list = ib_get_client_data(device, &ipoib_client); + if (!dev_list) + return; list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); -- cgit v1.2.3 From aee38fadd25989c3e6d99fc08752e2d87601ffc1 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:07 +0000 Subject: IB/mlx4_ib: Remove local invalidate segment unused fields Remove unused fields from the local invalidate WQE segment structure. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 6 ++---- include/linux/mlx4/qp.h | 8 +++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 19e0637220b9..c6dde71b4642 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1983,10 +1983,8 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { - iseg->flags = 0; - iseg->mem_key = cpu_to_be32(rkey); - iseg->guest_id = 0; - iseg->pa = 0; + memset(iseg, 0, sizeof(*iseg)); + iseg->mem_key = cpu_to_be32(rkey); } static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg, diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 4b4ad6ffef92..6c8a68c602be 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -304,12 +304,10 @@ struct mlx4_wqe_fmr_ext_seg { }; struct mlx4_wqe_local_inval_seg { - __be32 flags; - u32 reserved1; + u64 reserved1; __be32 mem_key; - u32 reserved2[2]; - __be32 guest_id; - __be64 pa; + u32 reserved2; + u64 reserved3[2]; }; struct mlx4_wqe_raddr_seg { -- cgit v1.2.3 From b20e519a81d648aebfbb90811743cc86cd469a48 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:08 +0000 Subject: mlx4_core: Rename MPT-related functions to have mpt_ prefix The MPT - Memory Protection Table - is used by both memory windows and memory regions. Hence, all MPT references are relevant for both types of memory objects. Rename the relevant functions to start with mpt_ instead of the current mr_ prefix. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 16 ++++---- drivers/net/ethernet/mellanox/mlx4/mr.c | 48 +++++++++++----------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 14 +++---- 3 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 116c5c29d2d1..5075236a140d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -118,10 +118,10 @@ enum { MLX4_NUM_CMPTS = MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT }; -enum mlx4_mr_state { - MLX4_MR_DISABLED = 0, - MLX4_MR_EN_HW, - MLX4_MR_EN_SW +enum mlx4_mpt_state { + MLX4_MPT_DISABLED = 0, + MLX4_MPT_EN_HW, + MLX4_MPT_EN_SW }; #define MLX4_COMM_TIME 10000 @@ -871,10 +871,10 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn); void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn); int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn); void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn); -int __mlx4_mr_reserve(struct mlx4_dev *dev); -void __mlx4_mr_release(struct mlx4_dev *dev, u32 index); -int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index); -void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_reserve(struct mlx4_dev *dev); +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index); +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index); +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index); u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order); void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index c202d3ad2a0e..49705cf860c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -321,7 +321,7 @@ static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, mr->size = size; mr->pd = pd; mr->access = access; - mr->enabled = MLX4_MR_DISABLED; + mr->enabled = MLX4_MPT_DISABLED; mr->key = hw_index_to_key(mridx); return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); @@ -335,14 +335,14 @@ static int mlx4_WRITE_MTT(struct mlx4_dev *dev, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } -int __mlx4_mr_reserve(struct mlx4_dev *dev) +int __mlx4_mpt_reserve(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap); } -static int mlx4_mr_reserve(struct mlx4_dev *dev) +static int mlx4_mpt_reserve(struct mlx4_dev *dev) { u64 out_param; @@ -353,17 +353,17 @@ static int mlx4_mr_reserve(struct mlx4_dev *dev) return -1; return get_param_l(&out_param); } - return __mlx4_mr_reserve(dev); + return __mlx4_mpt_reserve(dev); } -void __mlx4_mr_release(struct mlx4_dev *dev, u32 index) +void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { struct mlx4_priv *priv = mlx4_priv(dev); mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index); } -static void mlx4_mr_release(struct mlx4_dev *dev, u32 index) +static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index) { u64 in_param; @@ -376,17 +376,17 @@ static void mlx4_mr_release(struct mlx4_dev *dev, u32 index) index); return; } - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); } -int __mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) +int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; return mlx4_table_get(dev, &mr_table->dmpt_table, index); } -static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) +static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index) { u64 param; @@ -397,17 +397,17 @@ static int mlx4_mr_alloc_icm(struct mlx4_dev *dev, u32 index) MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); } - return __mlx4_mr_alloc_icm(dev, index); + return __mlx4_mpt_alloc_icm(dev, index); } -void __mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) +void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) { struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table; mlx4_table_put(dev, &mr_table->dmpt_table, index); } -static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) +static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index) { u64 in_param; @@ -420,7 +420,7 @@ static void mlx4_mr_free_icm(struct mlx4_dev *dev, u32 index) index); return; } - return __mlx4_mr_free_icm(dev, index); + return __mlx4_mpt_free_icm(dev, index); } int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, @@ -429,14 +429,14 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, u32 index; int err; - index = mlx4_mr_reserve(dev); + index = mlx4_mpt_reserve(dev); if (index == -1) return -ENOMEM; err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size, access, npages, page_shift, mr); if (err) - mlx4_mr_release(dev, index); + mlx4_mpt_release(dev, index); return err; } @@ -446,14 +446,14 @@ static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) { int err; - if (mr->enabled == MLX4_MR_EN_HW) { + if (mr->enabled == MLX4_MPT_EN_HW) { err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); if (err) mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); - mr->enabled = MLX4_MR_EN_SW; + mr->enabled = MLX4_MPT_EN_SW; } mlx4_mtt_cleanup(dev, &mr->mtt); } @@ -462,8 +462,8 @@ void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) { mlx4_mr_free_reserved(dev, mr); if (mr->enabled) - mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); - mlx4_mr_release(dev, key_to_hw_index(mr->key)); + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); + mlx4_mpt_release(dev, key_to_hw_index(mr->key)); } EXPORT_SYMBOL_GPL(mlx4_mr_free); @@ -473,7 +473,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) struct mlx4_mpt_entry *mpt_entry; int err; - err = mlx4_mr_alloc_icm(dev, key_to_hw_index(mr->key)); + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key)); if (err) return err; @@ -520,7 +520,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); goto err_cmd; } - mr->enabled = MLX4_MR_EN_HW; + mr->enabled = MLX4_MPT_EN_HW; mlx4_free_cmd_mailbox(dev, mailbox); @@ -530,7 +530,7 @@ err_cmd: mlx4_free_cmd_mailbox(dev, mailbox); err_table: - mlx4_mr_free_icm(dev, key_to_hw_index(mr->key)); + mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); return err; } EXPORT_SYMBOL_GPL(mlx4_mr_enable); @@ -882,7 +882,7 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, err); return; } - fmr->mr.enabled = MLX4_MR_EN_SW; + fmr->mr.enabled = MLX4_MPT_EN_SW; } EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); @@ -892,7 +892,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) return -EBUSY; mlx4_mr_free(dev, &fmr->mr); - fmr->mr.enabled = MLX4_MR_DISABLED; + fmr->mr.enabled = MLX4_MPT_DISABLED; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 561ed2a22a17..2287dfda6212 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1231,14 +1231,14 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, switch (op) { case RES_OP_RESERVE: - index = __mlx4_mr_reserve(dev); + index = __mlx4_mpt_reserve(dev); if (index == -1) break; id = index & mpt_mask(dev); err = add_res_range(dev, slave, id, 1, RES_MPT, index); if (err) { - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); break; } set_param_l(out_param, index); @@ -1251,7 +1251,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - err = __mlx4_mr_alloc_icm(dev, mpt->key); + err = __mlx4_mpt_alloc_icm(dev, mpt->key); if (err) { res_abort_move(dev, slave, RES_MPT, id); return err; @@ -1586,7 +1586,7 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, err = rem_res_range(dev, slave, id, 1, RES_MPT, 0); if (err) break; - __mlx4_mr_release(dev, index); + __mlx4_mpt_release(dev, index); break; case RES_OP_MAP_ICM: index = get_param_l(&in_param); @@ -1596,7 +1596,7 @@ static int mpt_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - __mlx4_mr_free_icm(dev, mpt->key); + __mlx4_mpt_free_icm(dev, mpt->key); res_end_move(dev, slave, RES_MPT, id); return err; break; @@ -3480,7 +3480,7 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) while (state != 0) { switch (state) { case RES_MPT_RESERVED: - __mlx4_mr_release(dev, mpt->key); + __mlx4_mpt_release(dev, mpt->key); spin_lock_irq(mlx4_tlock(dev)); rb_erase(&mpt->com.node, &tracker->res_tree[RES_MPT]); @@ -3491,7 +3491,7 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave) break; case RES_MPT_MAPPED: - __mlx4_mr_free_icm(dev, mpt->key); + __mlx4_mpt_free_icm(dev, mpt->key); state = RES_MPT_RESERVED; break; -- cgit v1.2.3 From 61083720702a329ed5952e32bda384e3bbc9093c Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:09 +0000 Subject: mlx4_core: Propagate MR deregistration failures to caller MR deregistration fails when memory windows are bound to the MR. Handle such failures by propagating them to the caller ULP. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mr.c | 13 ++++++++----- drivers/net/ethernet/mellanox/mlx4/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mr.c | 29 +++++++++++++++++++++------- include/linux/mlx4/device.h | 2 +- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index bbaf6176f207..254e1cf26439 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -68,7 +68,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_free: kfree(mr); @@ -163,7 +163,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return &mr->ibmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); err_umem: ib_umem_release(mr->umem); @@ -177,8 +177,11 @@ err_free: int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); + int ret; - mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); + if (ret) + return ret; if (mr->umem) ib_umem_release(mr->umem); kfree(mr); @@ -212,7 +215,7 @@ struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, return &mr->ibmr; err_mr: - mlx4_mr_free(dev->dev, &mr->mmr); + (void) mlx4_mr_free(dev->dev, &mr->mmr); err_free: kfree(mr); @@ -291,7 +294,7 @@ struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, return &fmr->ibfmr; err_mr: - mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); + (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); err_free: kfree(fmr); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 3a2b8c65642d..a2987142734d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -176,7 +176,7 @@ static void mlx4_en_remove(struct mlx4_dev *dev, void *endev_ptr) flush_workqueue(mdev->workqueue); destroy_workqueue(mdev->workqueue); - mlx4_mr_free(dev, &mdev->mr); + (void) mlx4_mr_free(dev, &mdev->mr); iounmap(mdev->uar_map); mlx4_uar_free(dev, &mdev->priv_uar); mlx4_pd_free(dev, mdev->priv_pdn); @@ -283,7 +283,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) return mdev; err_mr: - mlx4_mr_free(dev, &mdev->mr); + (void) mlx4_mr_free(dev, &mdev->mr); err_map: if (!mdev->uar_map) iounmap(mdev->uar_map); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 49705cf860c6..06b16e4ace80 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -442,7 +442,7 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, } EXPORT_SYMBOL_GPL(mlx4_mr_alloc); -static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) +static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) { int err; @@ -450,20 +450,31 @@ static void mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr) err = mlx4_HW2SW_MPT(dev, NULL, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); - if (err) - mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d),", err); + mlx4_warn(dev, "MR has MWs bound to it.\n"); + return err; + } mr->enabled = MLX4_MPT_EN_SW; } mlx4_mtt_cleanup(dev, &mr->mtt); + + return 0; } -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) { - mlx4_mr_free_reserved(dev, mr); + int ret; + + ret = mlx4_mr_free_reserved(dev, mr); + if (ret) + return ret; if (mr->enabled) mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key)); mlx4_mpt_release(dev, key_to_hw_index(mr->key)); + + return 0; } EXPORT_SYMBOL_GPL(mlx4_mr_free); @@ -831,7 +842,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, return 0; err_free: - mlx4_mr_free(dev, &fmr->mr); + (void) mlx4_mr_free(dev, &fmr->mr); return err; } EXPORT_SYMBOL_GPL(mlx4_fmr_alloc); @@ -888,10 +899,14 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_unmap); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr) { + int ret; + if (fmr->maps) return -EBUSY; - mlx4_mr_free(dev, &fmr->mr); + ret = mlx4_mr_free(dev, &fmr->mr); + if (ret) + return ret; fmr->mr.enabled = MLX4_MPT_DISABLED; return 0; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 20ea939c22a6..e9fe8caaf8bb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -801,7 +801,7 @@ u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt); int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); -void mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); -- cgit v1.2.3 From b96e4abaaeda62b5292026a9deaacb4066431ac7 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 21 Feb 2013 14:50:08 +0000 Subject: IB/iser: Use proper define for the commands per LUN value advertised to SCSI ML ISER_DEF_CMD_PER_LUN was meant to be ISCSI_DEF_XMIT_CMDS_MAX, not plain 128 Signed-off-by: Roi Dayan Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index ef7d3be46c31..5babdb35bda7 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -94,7 +94,7 @@ /* support up to 512KB in one RDMA */ #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K) -#define ISER_DEF_CMD_PER_LUN 128 +#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX /* QP settings */ /* Maximal bounds on received asynchronous PDUs */ -- cgit v1.2.3 From 819a087316a63ef7d60e7b816d18c4e29a05861a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 21 Feb 2013 14:50:09 +0000 Subject: IB/iser: Avoid error prints on EAGAIN registration failures Under IO/CPU stress its possible that the FMR pool might not have a free FMR mapping element for iSER to use because of incomplete background unmapping processing. In that case we get -EAGAIN and the IO is pushed back to the SCSI layer which soon retries it. No need to be so verbose about that. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 2033a928d34d..79c4f35ba0c9 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -404,7 +404,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, } else { /* use FMR for multiple dma entries */ iser_page_vec_build(mem, ib_conn->page_vec, ibdev); err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg); - if (err) { + if (err && err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); iser_err("mem->dma_nents = %d (dlength = 0x%x)\n", mem->dma_nents, -- cgit v1.2.3 From 5525d210fd55952262f30ba45b2acceb4a6a50e9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 21 Feb 2013 14:50:10 +0000 Subject: IB/iser: Enable iser when FMRs are not supported Reuse the "SG unaligned for FMR" driver flow to make the initiator functional when running over driver instance which doesn't support FMRs, such as a mlx4 virtual function. Signed-off-by: Or Gerlitz Signed-off-by: Alex Tabachnik Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iser_memory.c | 7 ++++--- drivers/infiniband/ulp/iser/iser_verbs.c | 8 ++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 79c4f35ba0c9..be1edb04b085 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -369,10 +369,11 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task, regd_buf = &iser_task->rdma_regd[cmd_dir]; aligned_len = iser_data_buf_aligned_len(mem, ibdev); - if (aligned_len != mem->dma_nents) { + if (aligned_len != mem->dma_nents || + (!ib_conn->fmr_pool && mem->dma_nents > 1)) { iscsi_conn->fmr_unalign_cnt++; - iser_warn("rdma alignment violation %d/%d aligned\n", - aligned_len, mem->size); + iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n", + aligned_len, mem->size); iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 95a49affee44..4debadc53106 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -242,10 +242,14 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) IB_ACCESS_REMOTE_READ); ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms); - if (IS_ERR(ib_conn->fmr_pool)) { - ret = PTR_ERR(ib_conn->fmr_pool); + ret = PTR_ERR(ib_conn->fmr_pool); + if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) { ib_conn->fmr_pool = NULL; goto out_err; + } else if (ret == -ENOSYS) { + ib_conn->fmr_pool = NULL; + iser_warn("FMRs are not supported, using unaligned mode\n"); + ret = 0; } memset(&init_attr, 0, sizeof init_attr); -- cgit v1.2.3 From a29bec12412d0e6f18ef1ce5f535e39829038648 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 25 Feb 2013 09:02:03 -0800 Subject: IB/mlx4: Convert is_xxx variables in build_mlx_header() to bool Matches the way they're used, and actually lets at least x86-64 generate better code: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-38 (-38) function old new delta mlx4_ib_post_send 4416 4378 -38 Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 4 ++-- drivers/infiniband/hw/mlx4/qp.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index dcd845bc30f0..cce5dde94bc1 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -652,12 +652,12 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, u8 *mac, int *is_mcast, u8 port); -static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) +static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah) { u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3; if (rdma_port_get_link_layer(ah->ibah.device, port) == IB_LINK_LAYER_ETHERNET) - return 1; + return true; return !!(ah->av.ib.g_slid & 0x80); } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c6dde71b4642..f8ac5333504d 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1746,11 +1746,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int header_size; int spc; int i; - int is_eth; - int is_vlan = 0; - int is_grh; - u16 vlan; int err = 0; + u16 vlan; + bool is_eth; + bool is_vlan = false; + bool is_grh; send_size = 0; for (i = 0; i < wr->num_sge; ++i) -- cgit v1.2.3 From 57d88cffc83a2120266bbe9e72cbba5bd5f33675 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Mon, 25 Feb 2013 09:17:13 -0800 Subject: IB/mlx4: Fix compiler warning about uninitialized 'vlan' variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building qp.o triggers this gcc warning: drivers/infiniband/hw/mlx4/qp.c: In function ‘mlx4_ib_post_send’: drivers/infiniband/hw/mlx4/qp.c:1862:62: warning: ‘vlan’ may be used uninitialized in this function [-Wmaybe-uninitialized] drivers/infiniband/hw/mlx4/qp.c:1752:6: note: ‘vlan’ was declared here Looking at the code it is clear 'vlan' is only set and used if 'is_eth' is non-zero. But by initializing 'vlan' to 0xffff, on gcc (Ubuntu 4.7.2-22ubuntu1) 4.7.2 on x86-64 at least, we fix the warning, and the compiler was already setting 'vlan' to 0 in the generated code, so there's no real downside. Signed-off-by: Paul Bolle [ Get rid of unnecessary move of 'is_vlan' initialization. - Roland ] Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f8ac5333504d..ebadebe83b11 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1747,7 +1747,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int spc; int i; int err = 0; - u16 vlan; + u16 vlan = 0xffff; bool is_eth; bool is_vlan = false; bool is_grh; -- cgit v1.2.3 From c89d127128dba62458e956388fef7e5d728e1ded Mon Sep 17 00:00:00 2001 From: Syam Sidhardhan Date: Sun, 24 Feb 2013 23:20:05 +0000 Subject: IB/mlx4: Remove redundant NULL check before kfree kfree on NULL pointer is a no-op. Signed-off-by: Syam Sidhardhan Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e7d81c0d1ac5..3fdd1c17cef7 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1601,8 +1601,7 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); } out: - if (dm) - kfree(dm); + kfree(dm); return; } -- cgit v1.2.3 From e1b2f13aba9ff714d23ecd4a950e744ee7ad72e1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 21 Feb 2013 17:16:54 +0000 Subject: IB/srp: Track connection state properly Remove an assignment that incorrectly overwrites the connection state update by srp_connect_target(). Signed-off-by: Bart Van Assche Acked-by: David Dillow Cc: # 3.8 Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d5088ce78290..94f76b9319c1 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1972,7 +1972,6 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) spin_unlock(&host->target_lock); target->state = SRP_TARGET_LIVE; - target->connected = false; scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); -- cgit v1.2.3 From 3780d1f08856f692116bcf026e4acf1c521df1c7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 21 Feb 2013 17:18:00 +0000 Subject: IB/srp: Avoid sending a task management function needlessly Do not send a task management function if sending will fail anyway because either there is no RDMA/RC connection or the QP is in the error state. Signed-off-by: Bart Van Assche Acked-by: David Dillow Cc: # 3.8 Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 94f76b9319c1..263325848462 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1695,6 +1695,9 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; + if (!target->connected || target->qp_in_error) + return -1; + init_completion(&target->tsk_mgmt_done); spin_lock_irq(&target->lock); @@ -1754,8 +1757,6 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n"); - if (target->qp_in_error) - return FAILED; if (srp_send_tsk_mgmt(target, SRP_TAG_NO_REQ, scmnd->device->lun, SRP_TSK_LUN_RESET)) return FAILED; -- cgit v1.2.3 From c7c4e7ff8047e43c45628b85ac200582e9404c39 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 21 Feb 2013 17:19:04 +0000 Subject: IB/srp: Avoid endless SCSI error handling loop If a SCSI command times out it is passed to the SCSI error handler. The SCSI error handler will try to abort the commands that timed out. If aborting fails, a device reset will be attempted. If the device reset also fails a host reset will be attempted. If the host reset also fails the whole procedure will be repeated. srp_abort() and srp_reset_device() fail for a QP in the error state. srp_reset_host() fails after host removal has started. Hence if the SCSI error handler gets invoked after host removal has started and with the QP in the error state an endless loop will be triggered. Modify the SCSI error handling functions in ib_srp as follows: - Abort SCSI commands properly even if the QP is in the error state. - Make srp_reset_host() reset SCSI requests even after host removal has already started or if reconnecting fails. Signed-off-by: Bart Van Assche Acked-by: David Dillow Cc: # 3.8 Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 263325848462..8a7eb9f98a0c 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -700,23 +700,24 @@ static int srp_reconnect_target(struct srp_target_port *target) struct Scsi_Host *shost = target->scsi_host; int i, ret; - if (target->state != SRP_TARGET_LIVE) - return -EAGAIN; - scsi_target_block(&shost->shost_gendev); srp_disconnect_target(target); /* - * Now get a new local CM ID so that we avoid confusing the - * target in case things are really fouled up. + * Now get a new local CM ID so that we avoid confusing the target in + * case things are really fouled up. Doing so also ensures that all CM + * callbacks will have finished before a new QP is allocated. */ ret = srp_new_cm_id(target); - if (ret) - goto unblock; - - ret = srp_create_target_ib(target); - if (ret) - goto unblock; + /* + * Whether or not creating a new CM ID succeeded, create a new + * QP. This guarantees that all completion callback function + * invocations have finished before request resetting starts. + */ + if (ret == 0) + ret = srp_create_target_ib(target); + else + srp_create_target_ib(target); for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { struct srp_request *req = &target->req_ring[i]; @@ -728,9 +729,9 @@ static int srp_reconnect_target(struct srp_target_port *target) for (i = 0; i < SRP_SQ_SIZE; ++i) list_add(&target->tx_ring[i]->list, &target->free_tx); - ret = srp_connect_target(target); + if (ret == 0) + ret = srp_connect_target(target); -unblock: scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : SDEV_TRANSPORT_OFFLINE); @@ -1739,7 +1740,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); - if (!req || target->qp_in_error || !srp_claim_req(target, req, scmnd)) + if (!req || !srp_claim_req(target, req, scmnd)) return FAILED; srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, SRP_TSK_ABORT_TASK); -- cgit v1.2.3 From 2ce19e72f4d570c87e025ee6fca4eae699a8b712 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 21 Feb 2013 17:20:00 +0000 Subject: IB/srp: Fail I/O requests if the transport is offline If an SRP target is no longer reachable and srp_reset_host() fails to reconnect then ib_srp will invoke scsi_remove_host(). That function will invoke __scsi_remove_device() for each LUN. And that last function will change the device state from SDEV_TRANSPORT_OFFLINE into SDEV_CANCEL. Certain user space software, e.g. older versions of multipathd, continue queueing I/O to SCSI devices that are in the SDEV_CANCEL state. If these I/O requests are submitted as SG_IO that means that the REQ_PREEMPT flag will be set and hence that these requests will be passed to srp_queuecommand(). These requests will time out. If new requests are queued fast enough from user space these active requests will prevent __scsi_remove_device() to finish. Avoid this by failing I/O requests in the SDEV_CANCEL state if the transport is offline. Introduce a new variable to keep track of the transport state instead of failing requests if (!target->connected || target->qp_in_error), so that the SCSI error handler has a chance to retry commands after a transport layer failure occurred. Signed-off-by: Bart Van Assche Cc: # 3.8 Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 7 +++++++ drivers/infiniband/ulp/srp/ib_srp.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 8a7eb9f98a0c..7ccf3284dda3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -734,6 +734,7 @@ static int srp_reconnect_target(struct srp_target_port *target) scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : SDEV_TRANSPORT_OFFLINE); + target->transport_offline = !!ret; if (ret) goto err; @@ -1353,6 +1354,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) unsigned long flags; int len; + if (unlikely(target->transport_offline)) { + scmnd->result = DID_NO_CONNECT << 16; + scmnd->scsi_done(scmnd); + return 0; + } + spin_lock_irqsave(&target->lock, flags); iu = __srp_get_tx_iu(target, SRP_IU_CMD); if (!iu) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index de2d0b3c0bfe..66fbedda4571 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -140,6 +140,7 @@ struct srp_target_port { unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; + bool transport_offline; /* Everything above this point is used in the hot path of * command processing. Try to keep them packed into cachelines. -- cgit v1.2.3 From f72dd56690aba26fc87fc64e98dd4cc66f27122c Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 25 Feb 2013 09:42:15 -0800 Subject: IPoIB: Free ipoib neigh on path record failure so path rec queries are retried If IPoIB fails to look up a path record (eg if it tries during an SM failover when one SM is dead but the new one hasn't taken over yet), the driver ends up with a neighbour structure but no address handle (AH). There's no mechanism to recover from this: any further packets sent to this destination will be silently dumped in ipoib_start_xmit(). Fix this by freeing the neighbour structures when a path rec query fails, so that the next packet queued to be sent will trigger a new path record query. Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 66d6da90982f..8534afd04e7c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -510,6 +510,9 @@ static void path_rec_completion(int status, spin_unlock_irqrestore(&priv->lock, flags); + if (IS_ERR_OR_NULL(ah)) + ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw); + if (old_ah) ipoib_put_ah(old_ah); -- cgit v1.2.3 From cc1ade94eeaa235b28fb139d4ba20b697be36768 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:10 +0000 Subject: mlx4_core: Disable memory windows for virtual functions Do not enable memory windows allocation for virtual functions. In addition, add a few safety checks, such as: * Verifying the PD of a new MPT matches the VF. * Making sure binding memory window isn't enabled for FMRs, and that new memory windows are not FMR themselves. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 11 ++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 16 +++++++ drivers/net/ethernet/mellanox/mlx4/mr.c | 14 ------- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 49 ++++++++++++++++++++++ 4 files changed, 75 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 8b3d0512a46b..a389612f8a4a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -757,15 +757,19 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, u64 flags; int err = 0; u8 field; + u32 bmme_flags; err = mlx4_cmd_box(dev, 0, outbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); if (err) return err; - /* add port mng change event capability unconditionally to slaves */ + /* add port mng change event capability and disable mw type 1 + * unconditionally to slaves + */ MLX4_GET(flags, outbox->buf, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); flags |= MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV; + flags &= ~MLX4_DEV_CAP_FLAG_MEM_WINDOW; MLX4_PUT(outbox->buf, flags, QUERY_DEV_CAP_EXT_FLAGS_OFFSET); /* For guests, report Blueflame disabled */ @@ -773,6 +777,11 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave, field &= 0x7f; MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET); + /* For guests, disable mw type 2 */ + MLX4_GET(bmme_flags, outbox, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN; + MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 5075236a140d..539212b1c8dc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -268,6 +268,22 @@ struct mlx4_icm_table { struct mlx4_icm **icm; }; +#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) +#define MLX4_MPT_FLAG_FREE (0x3UL << 28) +#define MLX4_MPT_FLAG_MIO (1 << 17) +#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) +#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) +#define MLX4_MPT_FLAG_REGION (1 << 8) + +#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) +#define MLX4_MPT_PD_FLAG_RAE (1 << 28) +#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) + +#define MLX4_MPT_QP_FLAG_BOUND_QP (1 << 7) + +#define MLX4_MPT_STATUS_SW 0xF0 +#define MLX4_MPT_STATUS_HW 0x00 + /* * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits. */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 06b16e4ace80..5e785bdcc694 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -44,20 +44,6 @@ #include "mlx4.h" #include "icm.h" -#define MLX4_MPT_FLAG_SW_OWNS (0xfUL << 28) -#define MLX4_MPT_FLAG_FREE (0x3UL << 28) -#define MLX4_MPT_FLAG_MIO (1 << 17) -#define MLX4_MPT_FLAG_BIND_ENABLE (1 << 15) -#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) -#define MLX4_MPT_FLAG_REGION (1 << 8) - -#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) -#define MLX4_MPT_PD_FLAG_RAE (1 << 28) -#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) - -#define MLX4_MPT_STATUS_SW 0xF0 -#define MLX4_MPT_STATUS_HW 0x00 - static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order) { int o; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 2287dfda6212..9185e2eef8fa 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1796,6 +1796,26 @@ static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt) return be32_to_cpu(mpt->mtt_sz); } +static u32 mr_get_pd(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & 0x00ffffff; +} + +static int mr_is_fmr(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->pd_flags) & MLX4_MPT_PD_FLAG_FAST_REG; +} + +static int mr_is_bind_enabled(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_BIND_ENABLE; +} + +static int mr_is_region(struct mlx4_mpt_entry *mpt) +{ + return be32_to_cpu(mpt->flags) & MLX4_MPT_FLAG_REGION; +} + static int qp_get_mtt_addr(struct mlx4_qp_context *qpc) { return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8; @@ -1856,12 +1876,41 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; int phys; int id; + u32 pd; + int pd_slave; id = index & mpt_mask(dev); err = mr_res_start_move_to(dev, slave, id, RES_MPT_HW, &mpt); if (err) return err; + /* Disable memory windows for VFs. */ + if (!mr_is_region(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + + /* Make sure that the PD bits related to the slave id are zeros. */ + pd = mr_get_pd(inbox->buf); + pd_slave = (pd >> 17) & 0x7f; + if (pd_slave != 0 && pd_slave != slave) { + err = -EPERM; + goto ex_abort; + } + + if (mr_is_fmr(inbox->buf)) { + /* FMR and Bind Enable are forbidden in slave devices. */ + if (mr_is_bind_enabled(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + /* FMR and Memory Windows are also forbidden. */ + if (!mr_is_region(inbox->buf)) { + err = -EPERM; + goto ex_abort; + } + } + phys = mr_phys_mpt(inbox->buf); if (!phys) { err = get_res(dev, slave, mtt_base, RES_MTT, &mtt); -- cgit v1.2.3 From e448834e3545e02789897ab68905220aea39cd40 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:11 +0000 Subject: mlx4_core: Enable memory windows in {INIT, QUERY}_HCA Add memory windows-related code to INIT_HCA and QUERY_HCA. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 3 +++ drivers/net/ethernet/mellanox/mlx4/fw.h | 1 + drivers/net/ethernet/mellanox/mlx4/main.c | 4 ++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 ++ 4 files changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index a389612f8a4a..d136b3695258 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1207,6 +1207,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) #define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26) #define INIT_HCA_TPT_OFFSET 0x0f0 #define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00) +#define INIT_HCA_TPT_MW_OFFSET (INIT_HCA_TPT_OFFSET + 0x08) #define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b) #define INIT_HCA_MTT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x10) #define INIT_HCA_CMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x18) @@ -1323,6 +1324,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param) /* TPT attributes */ MLX4_PUT(inbox, param->dmpt_base, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_PUT(inbox, param->mw_enabled, INIT_HCA_TPT_MW_OFFSET); MLX4_PUT(inbox, param->log_mpt_sz, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_PUT(inbox, param->mtt_base, INIT_HCA_MTT_BASE_OFFSET); MLX4_PUT(inbox, param->cmpt_base, INIT_HCA_CMPT_BASE_OFFSET); @@ -1419,6 +1421,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev, /* TPT attributes */ MLX4_GET(param->dmpt_base, outbox, INIT_HCA_DMPT_BASE_OFFSET); + MLX4_GET(param->mw_enabled, outbox, INIT_HCA_TPT_MW_OFFSET); MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET); MLX4_GET(param->mtt_base, outbox, INIT_HCA_MTT_BASE_OFFSET); MLX4_GET(param->cmpt_base, outbox, INIT_HCA_CMPT_BASE_OFFSET); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index dbf2f69cc59f..9f1a25ca002c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -170,6 +170,7 @@ struct mlx4_init_hca_param { u8 log_mc_table_sz; u8 log_mpt_sz; u8 log_uar_sz; + u8 mw_enabled; /* Enable memory windows */ u8 uar_page_sz; /* log pg sz in 4k chunks */ u8 fs_hash_enable_bits; u8 steering_mode; /* for QUERY_HCA */ diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 5163af314990..7fdd04af379d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1447,6 +1447,10 @@ static int mlx4_init_hca(struct mlx4_dev *dev) init_hca.log_uar_sz = ilog2(dev->caps.num_uars); init_hca.uar_page_sz = PAGE_SHIFT - 12; + init_hca.mw_enabled = 0; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) + init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE; err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 539212b1c8dc..8b75d5ef0940 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -60,6 +60,8 @@ #define MLX4_FS_MGM_LOG_ENTRY_SIZE 7 #define MLX4_FS_NUM_MCG (1 << 17) +#define INIT_HCA_TPT_MW_ENABLE (1 << 7) + enum { MLX4_FS_L2_HASH = 0, MLX4_FS_L2_L3_L4_HASH, -- cgit v1.2.3 From 804d6a89a5c0b076317966bcbcd7a63d42241831 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:14 +0000 Subject: mlx4: Implement memory windows allocation and deallocation Implement MW allocation and deallocation in mlx4_core and mlx4_ib. Pass down the enable bind flag when registering memory regions. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 12 +++++ drivers/infiniband/hw/mlx4/mr.c | 52 ++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mr.c | 95 +++++++++++++++++++++++++++++++++ include/linux/mlx4/device.h | 20 ++++++- 4 files changed, 178 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index cce5dde94bc1..9ba0aaf3a58e 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -116,6 +116,11 @@ struct mlx4_ib_mr { struct ib_umem *umem; }; +struct mlx4_ib_mw { + struct ib_mw ibmw; + struct mlx4_mw mmw; +}; + struct mlx4_ib_fast_reg_page_list { struct ib_fast_reg_page_list ibfrpl; __be64 *mapped_page_list; @@ -533,6 +538,11 @@ static inline struct mlx4_ib_mr *to_mmr(struct ib_mr *ibmr) return container_of(ibmr, struct mlx4_ib_mr, ibmr); } +static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw) +{ + return container_of(ibmw, struct mlx4_ib_mw, ibmw); +} + static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl) { return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl); @@ -581,6 +591,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 254e1cf26439..5adf4c47ee18 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -41,9 +41,19 @@ static u32 convert_access(int acc) (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | + (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | MLX4_PERM_LOCAL_READ; } +static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) +{ + switch (type) { + case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; + case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; + default: return -1; + } +} + struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx4_ib_mr *mr; @@ -189,6 +199,48 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr) return 0; } +struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + struct mlx4_ib_mw *mw; + int err; + + mw = kmalloc(sizeof(*mw), GFP_KERNEL); + if (!mw) + return ERR_PTR(-ENOMEM); + + err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, + to_mlx4_type(type), &mw->mmw); + if (err) + goto err_free; + + err = mlx4_mw_enable(dev->dev, &mw->mmw); + if (err) + goto err_mw; + + mw->ibmw.rkey = mw->mmw.key; + + return &mw->ibmw; + +err_mw: + mlx4_mw_free(dev->dev, &mw->mmw); + +err_free: + kfree(mw); + + return ERR_PTR(err); +} + +int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) +{ + struct mlx4_ib_mw *mw = to_mmw(ibmw); + + mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); + kfree(mw); + + return 0; +} + struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 5e785bdcc694..602ca9bf78e4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -654,6 +654,101 @@ int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, } EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt); +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw) +{ + u32 index; + + if ((type == MLX4_MW_TYPE_1 && + !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) || + (type == MLX4_MW_TYPE_2 && + !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN))) + return -ENOTSUPP; + + index = mlx4_mpt_reserve(dev); + if (index == -1) + return -ENOMEM; + + mw->key = hw_index_to_key(index); + mw->pd = pd; + mw->type = type; + mw->enabled = MLX4_MPT_DISABLED; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mw_alloc); + +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_mpt_entry *mpt_entry; + int err; + + err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key)); + if (err) + return err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + err = PTR_ERR(mailbox); + goto err_table; + } + mpt_entry = mailbox->buf; + + memset(mpt_entry, 0, sizeof(*mpt_entry)); + + /* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned + * off, thus creating a memory window and not a memory region. + */ + mpt_entry->key = cpu_to_be32(key_to_hw_index(mw->key)); + mpt_entry->pd_flags = cpu_to_be32(mw->pd); + if (mw->type == MLX4_MW_TYPE_2) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE); + mpt_entry->qpn = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP); + mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV); + } + + err = mlx4_SW2HW_MPT(dev, mailbox, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) { + mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err); + goto err_cmd; + } + mw->enabled = MLX4_MPT_EN_HW; + + mlx4_free_cmd_mailbox(dev, mailbox); + + return 0; + +err_cmd: + mlx4_free_cmd_mailbox(dev, mailbox); + +err_table: + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mw_enable); + +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw) +{ + int err; + + if (mw->enabled == MLX4_MPT_EN_HW) { + err = mlx4_HW2SW_MPT(dev, NULL, + key_to_hw_index(mw->key) & + (dev->caps.num_mpts - 1)); + if (err) + mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err); + + mw->enabled = MLX4_MPT_EN_SW; + } + if (mw->enabled) + mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key)); + mlx4_mpt_release(dev, key_to_hw_index(mw->key)); +} +EXPORT_SYMBOL_GPL(mlx4_mw_free); + int mlx4_init_mr_table(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index e9fe8caaf8bb..67b4695e5940 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -170,6 +170,7 @@ enum { #define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { + MLX4_BMME_FLAG_WIN_TYPE_2B = 1 << 1, MLX4_BMME_FLAG_LOCAL_INV = 1 << 6, MLX4_BMME_FLAG_REMOTE_INV = 1 << 7, MLX4_BMME_FLAG_TYPE_2_WIN = 1 << 9, @@ -237,7 +238,8 @@ enum { MLX4_PERM_LOCAL_WRITE = 1 << 11, MLX4_PERM_REMOTE_READ = 1 << 12, MLX4_PERM_REMOTE_WRITE = 1 << 13, - MLX4_PERM_ATOMIC = 1 << 14 + MLX4_PERM_ATOMIC = 1 << 14, + MLX4_PERM_BIND_MW = 1 << 15, }; enum { @@ -503,6 +505,18 @@ struct mlx4_mr { int enabled; }; +enum mlx4_mw_type { + MLX4_MW_TYPE_1 = 1, + MLX4_MW_TYPE_2 = 2, +}; + +struct mlx4_mw { + u32 key; + u32 pd; + enum mlx4_mw_type type; + int enabled; +}; + struct mlx4_fmr { struct mlx4_mr mr; struct mlx4_mpt_entry *mpt; @@ -803,6 +817,10 @@ int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr); int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr); int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type, + struct mlx4_mw *mw); +void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw); +int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw); int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, int start_index, int npages, u64 *page_list); int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt, -- cgit v1.2.3 From 6ff63e194066a1f14aee805366a1d79c541fddae Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:15 +0000 Subject: IB/mlx4: Support memory window binding * Implement memory windows binding in mlx4_ib_post_send. * Implement mlx4_ib_bind_mw by deferring to mlx4_ib_post_send. * Rename MLX4_WQE_FMR_PERM_* flags to MLX4_WQE_FMR_AND_BIND_PERM_*, indicating that they are used both for fast registration work requests, and for memory window bind work requests. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 ++ drivers/infiniband/hw/mlx4/mr.c | 22 ++++++++++++++++++++++ drivers/infiniband/hw/mlx4/qp.c | 35 ++++++++++++++++++++++++++++++++--- include/linux/mlx4/qp.h | 11 ++++++++--- 4 files changed, 64 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 9ba0aaf3a58e..f61ec26500c4 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -592,6 +592,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr); struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type); +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 5adf4c47ee18..e471f089ff00 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -231,6 +231,28 @@ err_free: return ERR_PTR(err); } +int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + struct ib_send_wr wr; + struct ib_send_wr *bad_wr; + int ret; + + memset(&wr, 0, sizeof(wr)); + wr.opcode = IB_WR_BIND_MW; + wr.wr_id = mw_bind->wr_id; + wr.send_flags = mw_bind->send_flags; + wr.wr.bind_mw.mw = mw; + wr.wr.bind_mw.bind_info = mw_bind->bind_info; + wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); + + ret = mlx4_ib_post_send(qp, &wr, &bad_wr); + if (!ret) + mw->rkey = wr.wr.bind_mw.rkey; + + return ret; +} + int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) { struct mlx4_ib_mw *mw = to_mmw(ibmw); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index ebadebe83b11..35cced2a4da8 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -104,6 +104,7 @@ static const __be32 mlx4_ib_opcode[] = { [IB_WR_FAST_REG_MR] = cpu_to_be32(MLX4_OPCODE_FMR), [IB_WR_MASKED_ATOMIC_CMP_AND_SWP] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS), [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA), + [IB_WR_BIND_MW] = cpu_to_be32(MLX4_OPCODE_BIND_MW), }; static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) @@ -1953,9 +1954,12 @@ static int mlx4_wq_overflow(struct mlx4_ib_wq *wq, int nreq, struct ib_cq *ib_cq static __be32 convert_access(int acc) { - return (acc & IB_ACCESS_REMOTE_ATOMIC ? cpu_to_be32(MLX4_WQE_FMR_PERM_ATOMIC) : 0) | - (acc & IB_ACCESS_REMOTE_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_WRITE) : 0) | - (acc & IB_ACCESS_REMOTE_READ ? cpu_to_be32(MLX4_WQE_FMR_PERM_REMOTE_READ) : 0) | + return (acc & IB_ACCESS_REMOTE_ATOMIC ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC) : 0) | + (acc & IB_ACCESS_REMOTE_WRITE ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE) : 0) | + (acc & IB_ACCESS_REMOTE_READ ? + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ) : 0) | (acc & IB_ACCESS_LOCAL_WRITE ? cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_WRITE) : 0) | cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ); } @@ -1981,6 +1985,24 @@ static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr) fseg->reserved[1] = 0; } +static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr) +{ + bseg->flags1 = + convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) & + cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ | + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE | + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC); + bseg->flags2 = 0; + if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2); + if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED) + bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED); + bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey); + bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey); + bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr); + bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length); +} + static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey) { memset(iseg, 0, sizeof(*iseg)); @@ -2289,6 +2311,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += sizeof (struct mlx4_wqe_fmr_seg) / 16; break; + case IB_WR_BIND_MW: + ctrl->srcrb_flags |= + cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER); + set_bind_seg(wqe, wr); + wqe += sizeof(struct mlx4_wqe_bind_seg); + size += sizeof(struct mlx4_wqe_bind_seg) / 16; + break; default: /* No extra segments required for sends */ break; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 6c8a68c602be..67f46ad6920a 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -265,6 +265,11 @@ struct mlx4_wqe_lso_seg { __be32 header[0]; }; +enum mlx4_wqe_bind_seg_flags2 { + MLX4_WQE_BIND_ZERO_BASED = (1 << 30), + MLX4_WQE_BIND_TYPE_2 = (1 << 31), +}; + struct mlx4_wqe_bind_seg { __be32 flags1; __be32 flags2; @@ -277,9 +282,9 @@ struct mlx4_wqe_bind_seg { enum { MLX4_WQE_FMR_PERM_LOCAL_READ = 1 << 27, MLX4_WQE_FMR_PERM_LOCAL_WRITE = 1 << 28, - MLX4_WQE_FMR_PERM_REMOTE_READ = 1 << 29, - MLX4_WQE_FMR_PERM_REMOTE_WRITE = 1 << 30, - MLX4_WQE_FMR_PERM_ATOMIC = 1 << 31 + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ = 1 << 29, + MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE = 1 << 30, + MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC = 1 << 31 }; struct mlx4_wqe_fmr_seg { -- cgit v1.2.3 From b425388dc1f5672006517f8927d66bf7f22649d6 Mon Sep 17 00:00:00 2001 From: Shani Michaeli Date: Wed, 6 Feb 2013 16:19:16 +0000 Subject: IB/mlx4: Advertise MW support Indicate memory windows support through device capabilities, kernel verb entries and the relevant uverbs command mask entries. Signed-off-by: Haggai Eran Signed-off-by: Shani Michaeli Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3fdd1c17cef7..23d734349d8e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -137,6 +137,14 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) props->device_cap_flags |= IB_DEVICE_XRC; + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW; + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { + if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B) + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; + else + props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; + } props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; @@ -1434,6 +1442,17 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; } + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || + dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { + ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; + ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw; + ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; + + ibdev->ib_dev.uverbs_cmd_mask |= + (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | + (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + } + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; -- cgit v1.2.3