diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-30 07:01:17 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-06-30 07:01:17 +0300 |
commit | 7ede5f78a0d74b574791c7eb0e2ca6e54b80c93c (patch) | |
tree | 25801e614cc8fdf3569d6deb8af8d9ec3a380703 /drivers/infiniband/sw | |
parent | 31929ae00890d921618b0b449722dcdf4a4416cc (diff) | |
parent | 5f004bcaee4cb552cf1b46a505f18f08777db7e5 (diff) | |
download | linux-7ede5f78a0d74b574791c7eb0e2ca6e54b80c93c.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This cycle saw a focus on rxe and bnxt_re drivers:
- Code cleanups for irdma, rxe, rtrs, hns, vmw_pvrdma
- rxe uses workqueues instead of tasklets
- rxe has better compliance around access checks for MRs and rereg_mr
- mana supportst he 'v2' FW interface for RX coalescing
- hfi1 bug fix for stale cache entries in its MR cache
- mlx5 buf fix to handle FW failures when destroying QPs
- erdma HW has a new doorbell allocation mechanism for uverbs that is
secure
- Lots of small cleanups and rework in bnxt_re:
- Use the common mmap functions
- Support disassociation
- Improve FW command flow
- support for 'low latency push'"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (71 commits)
RDMA/bnxt_re: Fix an IS_ERR() vs NULL check
RDMA/bnxt_re: Fix spelling mistake "priviledged" -> "privileged"
RDMA/bnxt_re: Remove duplicated include in bnxt_re/main.c
RDMA/bnxt_re: Refactor code around bnxt_qplib_map_rc()
RDMA/bnxt_re: Remove incorrect return check from slow path
RDMA/bnxt_re: Enable low latency push
RDMA/bnxt_re: Reorg the bar mapping
RDMA/bnxt_re: Move the interface version to chip context structure
RDMA/bnxt_re: Query function capabilities from firmware
RDMA/bnxt_re: Optimize the bnxt_re_init_hwrm_hdr usage
RDMA/bnxt_re: Add disassociate ucontext support
RDMA/bnxt_re: Use the common mmap helper functions
RDMA/bnxt_re: Initialize opcode while sending message
RDMA/cma: Remove NULL check before dev_{put, hold}
RDMA/rxe: Simplify cq->notify code
RDMA/rxe: Fixes mr access supported list
RDMA/bnxt_re: optimize the parameters passed to helper functions
RDMA/bnxt_re: remove redundant cmdq_bitmap
RDMA/bnxt_re: use firmware provided max request timeout
RDMA/bnxt_re: cancel all control path command waiters upon error
...
Diffstat (limited to 'drivers/infiniband/sw')
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe.c | 9 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_comp.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_cq.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_loc.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mr.c | 21 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_mw.c | 22 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_opcode.h | 3 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_param.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_qp.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_req.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_resp.c | 25 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_task.c | 110 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_task.h | 6 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_verbs.c | 45 | ||||
-rw-r--r-- | drivers/infiniband/sw/rxe/rxe_verbs.h | 21 |
15 files changed, 198 insertions, 84 deletions
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 7a7e713de52d..54c723a6edda 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -212,10 +212,16 @@ static int __init rxe_module_init(void) { int err; - err = rxe_net_init(); + err = rxe_alloc_wq(); if (err) return err; + err = rxe_net_init(); + if (err) { + rxe_destroy_wq(); + return err; + } + rdma_link_register(&rxe_link_ops); pr_info("loaded\n"); return 0; @@ -226,6 +232,7 @@ static void __exit rxe_module_exit(void) rdma_link_unregister(&rxe_link_ops); ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); + rxe_destroy_wq(); pr_info("unloaded\n"); } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index f46c5a5fd0ae..5111735aafae 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -832,7 +832,7 @@ int rxe_completer(struct rxe_qp *qp) } /* A non-zero return value will cause rxe_do_task to - * exit its loop and end the tasklet. A zero return + * exit its loop and end the work item. A zero return * will continue looping and return to rxe_completer */ done: diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 6ca2a05b6a2a..d5486cbb3f10 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -113,10 +113,9 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); - if ((cq->notify == IB_CQ_NEXT_COMP) || - (cq->notify == IB_CQ_SOLICITED && solicited)) { + if ((cq->notify & IB_CQ_NEXT_COMP) || + (cq->notify & IB_CQ_SOLICITED && solicited)) { cq->notify = 0; - cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 804b15e929dd..666e06a82bc9 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -31,8 +31,6 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); -void rxe_cq_disable(struct rxe_cq *cq); - void rxe_cq_cleanup(struct rxe_pool_elem *elem); /* rxe_mcast.c */ diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 0e538fafcc20..f54042e9aeb2 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -45,22 +45,17 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) } } -#define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ - | IB_ACCESS_REMOTE_WRITE \ - | IB_ACCESS_REMOTE_ATOMIC) - static void rxe_mr_init(int access, struct rxe_mr *mr) { - u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); - u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; + u32 key = mr->elem.index << 8 | rxe_get_next_key(-1); /* set ibmr->l/rkey and also copy into private l/rkey * for user MRs these will always be the same * for cases where caller 'owns' the key portion * they may be different until REG_MR WQE is executed. */ - mr->lkey = mr->ibmr.lkey = lkey; - mr->rkey = mr->ibmr.rkey = rkey; + mr->lkey = mr->ibmr.lkey = key; + mr->rkey = mr->ibmr.rkey = key; mr->access = access; mr->ibmr.page_size = PAGE_SIZE; @@ -195,7 +190,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) int err; /* always allow remote access for FMRs */ - rxe_mr_init(IB_ACCESS_REMOTE, mr); + rxe_mr_init(RXE_ACCESS_REMOTE, mr); err = rxe_mr_alloc(mr, max_pages); if (err) @@ -644,6 +639,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_mr *mr; + int remote; int ret; mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); @@ -653,9 +649,10 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) goto err; } - if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { + remote = mr->access & RXE_ACCESS_REMOTE; + if (remote ? (key != mr->rkey) : (key != mr->lkey)) { rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n", - key, (mr->rkey ? mr->rkey : mr->lkey)); + key, (remote ? mr->rkey : mr->lkey)); ret = -EINVAL; goto err_drop_ref; } @@ -715,7 +712,7 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) mr->access = access; mr->lkey = key; - mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; + mr->rkey = key; mr->ibmr.iova = wqe->wr.wr.reg.mr->iova; mr->state = RXE_MR_STATE_VALID; diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index afa5ce1a7116..d8a43d87de93 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -48,7 +48,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) } static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_mw *mw, struct rxe_mr *mr) + struct rxe_mw *mw, struct rxe_mr *mr, int access) { if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { @@ -58,7 +58,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } /* o10-36.2.2 */ - if (unlikely((mw->access & IB_ZERO_BASED))) { + if (unlikely((access & IB_ZERO_BASED))) { rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n"); return -EINVAL; } @@ -104,7 +104,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } /* C10-74 */ - if (unlikely((mw->access & + if (unlikely((access & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) && !(mr->access & IB_ACCESS_LOCAL_WRITE))) { rxe_dbg_mw(mw, @@ -113,7 +113,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } /* C10-75 */ - if (mw->access & IB_ZERO_BASED) { + if (access & IB_ZERO_BASED) { if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { rxe_dbg_mw(mw, "attempt to bind a ZB MW outside of the MR\n"); @@ -133,12 +133,12 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_mw *mw, struct rxe_mr *mr) + struct rxe_mw *mw, struct rxe_mr *mr, int access) { u32 key = wqe->wr.wr.mw.rkey & 0xff; mw->rkey = (mw->rkey & ~0xff) | key; - mw->access = wqe->wr.wr.mw.access; + mw->access = access; mw->state = RXE_MW_STATE_VALID; mw->addr = wqe->wr.wr.mw.addr; mw->length = wqe->wr.wr.mw.length; @@ -169,6 +169,7 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) struct rxe_dev *rxe = to_rdev(qp->ibqp.device); u32 mw_rkey = wqe->wr.wr.mw.mw_rkey; u32 mr_lkey = wqe->wr.wr.mw.mr_lkey; + int access = wqe->wr.wr.mw.access; mw = rxe_pool_get_index(&rxe->mw_pool, mw_rkey >> 8); if (unlikely(!mw)) { @@ -196,13 +197,18 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe) mr = NULL; } + if (access & ~RXE_ACCESS_SUPPORTED_MW) { + rxe_err_mw(mw, "access %#x not supported", access); + return -EOPNOTSUPP; + } + spin_lock_bh(&mw->lock); - ret = rxe_check_bind_mw(qp, wqe, mw, mr); + ret = rxe_check_bind_mw(qp, wqe, mw, mr, access); if (ret) goto err_unlock; - rxe_do_bind_mw(qp, wqe, mw, mr); + rxe_do_bind_mw(qp, wqe, mw, mr, access); err_unlock: spin_unlock_bh(&mw->lock); err_drop_mr: diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index cea4e0a63919..5686b691d6b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -91,6 +91,9 @@ enum rxe_hdr_mask { RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), + RXE_RDMA_OP_MASK = (RXE_READ_MASK | RXE_WRITE_MASK | + RXE_ATOMIC_WRITE_MASK | RXE_FLUSH_MASK | + RXE_ATOMIC_MASK), }; #define OPCODE_NONE (-1) diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 7b41d79e72b2..d2f57ead78ad 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -112,7 +112,7 @@ enum rxe_device_param { RXE_INFLIGHT_SKBS_PER_QP_HIGH = 64, RXE_INFLIGHT_SKBS_PER_QP_LOW = 16, - /* Max number of interations of each tasklet + /* Max number of interations of each work item * before yielding the cpu to let other * work make progress */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a0f206431cf8..a569b111a9d2 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -392,6 +392,13 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) goto err1; + if (mask & IB_QP_ACCESS_FLAGS) { + if (!(qp_type(qp) == IB_QPT_RC || qp_type(qp) == IB_QPT_UC)) + goto err1; + if (attr->qp_access_flags & ~RXE_ACCESS_SUPPORTED_QP) + goto err1; + } + if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr)) goto err1; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 5fe7cbae3031..2171f19494bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -857,7 +857,7 @@ int rxe_requester(struct rxe_qp *qp) update_state(qp, &pkt); /* A non-zero return value will cause rxe_do_task to - * exit its loop and end the tasklet. A zero return + * exit its loop and end the work item. A zero return * will continue looping and return to rxe_requester */ done: diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ee68306555b9..64c64f5f36a8 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -387,7 +387,10 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, } } - return RESPST_CHK_RKEY; + if (pkt->mask & RXE_RDMA_OP_MASK) + return RESPST_CHK_RKEY; + else + return RESPST_EXECUTE; } /* if the reth length field is zero we can assume nothing @@ -434,6 +437,10 @@ static enum resp_states check_rkey(struct rxe_qp *qp, enum resp_states state; int access = 0; + /* parse RETH or ATMETH header for first/only packets + * for va, length, rkey, etc. or use current value for + * middle/last packets. + */ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (pkt->mask & RXE_RETH_MASK) qp_resp_from_reth(qp, pkt); @@ -454,7 +461,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp, qp_resp_from_atmeth(qp, pkt); access = IB_ACCESS_REMOTE_ATOMIC; } else { - return RESPST_EXECUTE; + /* shouldn't happen */ + WARN_ON(1); } /* A zero-byte read or write op is not required to @@ -1449,8 +1457,17 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify) struct rxe_recv_wqe *wqe; int err; - if (qp->srq) + if (qp->srq) { + if (notify && qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } return; + } while ((wqe = queue_head(q, q->type))) { if (notify) { @@ -1657,7 +1674,7 @@ int rxe_responder(struct rxe_qp *qp) } /* A non-zero return value will cause rxe_do_task to - * exit its loop and end the tasklet. A zero return + * exit its loop and end the work item. A zero return * will continue looping and return to rxe_responder */ done: diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index fb9a6bc8e620..1501120d4f52 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -6,8 +6,24 @@ #include "rxe.h" +static struct workqueue_struct *rxe_wq; + +int rxe_alloc_wq(void) +{ + rxe_wq = alloc_workqueue("rxe_wq", WQ_UNBOUND, WQ_MAX_ACTIVE); + if (!rxe_wq) + return -ENOMEM; + + return 0; +} + +void rxe_destroy_wq(void) +{ + destroy_workqueue(rxe_wq); +} + /* Check if task is idle i.e. not running, not scheduled in - * tasklet queue and not draining. If so move to busy to + * work queue and not draining. If so move to busy to * reserve a slot in do_task() by setting to busy and taking * a qp reference to cover the gap from now until the task finishes. * state will move out of busy if task returns a non zero value @@ -21,9 +37,6 @@ static bool __reserve_if_idle(struct rxe_task *task) { WARN_ON(rxe_read(task->qp) <= 0); - if (task->tasklet.state & BIT(TASKLET_STATE_SCHED)) - return false; - if (task->state == TASK_STATE_IDLE) { rxe_get(task->qp); task->state = TASK_STATE_BUSY; @@ -38,7 +51,7 @@ static bool __reserve_if_idle(struct rxe_task *task) } /* check if task is idle or drained and not currently - * scheduled in the tasklet queue. This routine is + * scheduled in the work queue. This routine is * called by rxe_cleanup_task or rxe_disable_task to * see if the queue is empty. * Context: caller should hold task->lock. @@ -46,7 +59,7 @@ static bool __reserve_if_idle(struct rxe_task *task) */ static bool __is_done(struct rxe_task *task) { - if (task->tasklet.state & BIT(TASKLET_STATE_SCHED)) + if (work_pending(&task->work)) return false; if (task->state == TASK_STATE_IDLE || @@ -77,23 +90,23 @@ static bool is_done(struct rxe_task *task) * schedules the task. They must call __reserve_if_idle to * move the task to busy before calling or scheduling. * The task can also be moved to drained or invalid - * by calls to rxe-cleanup_task or rxe_disable_task. + * by calls to rxe_cleanup_task or rxe_disable_task. * In that case tasks which get here are not executed but * just flushed. The tasks are designed to look to see if - * there is work to do and do part of it before returning + * there is work to do and then do part of it before returning * here with a return value of zero until all the work - * has been consumed then it retuens a non-zero value. + * has been consumed then it returns a non-zero value. * The number of times the task can be run is limited by * max iterations so one task cannot hold the cpu forever. + * If the limit is hit and work remains the task is rescheduled. */ -static void do_task(struct tasklet_struct *t) +static void do_task(struct rxe_task *task) { - int cont; - int ret; - struct rxe_task *task = from_tasklet(task, t, tasklet); unsigned int iterations; unsigned long flags; int resched = 0; + int cont; + int ret; WARN_ON(rxe_read(task->qp) <= 0); @@ -115,25 +128,22 @@ static void do_task(struct tasklet_struct *t) } while (ret == 0 && iterations-- > 0); spin_lock_irqsave(&task->lock, flags); + /* we're not done yet but we ran out of iterations. + * yield the cpu and reschedule the task + */ + if (!ret) { + task->state = TASK_STATE_IDLE; + resched = 1; + goto exit; + } + switch (task->state) { case TASK_STATE_BUSY: - if (ret) { - task->state = TASK_STATE_IDLE; - } else { - /* This can happen if the client - * can add work faster than the - * tasklet can finish it. - * Reschedule the tasklet and exit - * the loop to give up the cpu - */ - task->state = TASK_STATE_IDLE; - resched = 1; - } + task->state = TASK_STATE_IDLE; break; - /* someone tried to run the task since the last time we called - * func, so we will call one more time regardless of the - * return value + /* someone tried to schedule the task while we + * were running, keep going */ case TASK_STATE_ARMED: task->state = TASK_STATE_BUSY; @@ -141,22 +151,24 @@ static void do_task(struct tasklet_struct *t) break; case TASK_STATE_DRAINING: - if (ret) - task->state = TASK_STATE_DRAINED; - else - cont = 1; + task->state = TASK_STATE_DRAINED; break; default: WARN_ON(1); - rxe_info_qp(task->qp, "unexpected task state = %d", task->state); + rxe_dbg_qp(task->qp, "unexpected task state = %d", + task->state); + task->state = TASK_STATE_IDLE; } +exit: if (!cont) { task->num_done++; if (WARN_ON(task->num_done != task->num_sched)) - rxe_err_qp(task->qp, "%ld tasks scheduled, %ld tasks done", - task->num_sched, task->num_done); + rxe_dbg_qp( + task->qp, + "%ld tasks scheduled, %ld tasks done", + task->num_sched, task->num_done); } spin_unlock_irqrestore(&task->lock, flags); } while (cont); @@ -169,6 +181,12 @@ static void do_task(struct tasklet_struct *t) rxe_put(task->qp); } +/* wrapper around do_task to fix argument for work queue */ +static void do_work(struct work_struct *work) +{ + do_task(container_of(work, struct rxe_task, work)); +} + int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, int (*func)(struct rxe_qp *)) { @@ -176,11 +194,9 @@ int rxe_init_task(struct rxe_task *task, struct rxe_qp *qp, task->qp = qp; task->func = func; - - tasklet_setup(&task->tasklet, do_task); - task->state = TASK_STATE_IDLE; spin_lock_init(&task->lock); + INIT_WORK(&task->work, do_work); return 0; } @@ -213,8 +229,6 @@ void rxe_cleanup_task(struct rxe_task *task) while (!is_done(task)) cond_resched(); - tasklet_kill(&task->tasklet); - spin_lock_irqsave(&task->lock, flags); task->state = TASK_STATE_INVALID; spin_unlock_irqrestore(&task->lock, flags); @@ -226,7 +240,7 @@ void rxe_cleanup_task(struct rxe_task *task) void rxe_run_task(struct rxe_task *task) { unsigned long flags; - int run; + bool run; WARN_ON(rxe_read(task->qp) <= 0); @@ -235,11 +249,11 @@ void rxe_run_task(struct rxe_task *task) spin_unlock_irqrestore(&task->lock, flags); if (run) - do_task(&task->tasklet); + do_task(task); } -/* schedule the task to run later as a tasklet. - * the tasklet)schedule call can be called holding +/* schedule the task to run later as a work queue entry. + * the queue_work call can be called holding * the lock. */ void rxe_sched_task(struct rxe_task *task) @@ -250,7 +264,7 @@ void rxe_sched_task(struct rxe_task *task) spin_lock_irqsave(&task->lock, flags); if (__reserve_if_idle(task)) - tasklet_schedule(&task->tasklet); + queue_work(rxe_wq, &task->work); spin_unlock_irqrestore(&task->lock, flags); } @@ -277,7 +291,9 @@ void rxe_disable_task(struct rxe_task *task) while (!is_done(task)) cond_resched(); - tasklet_disable(&task->tasklet); + spin_lock_irqsave(&task->lock, flags); + task->state = TASK_STATE_DRAINED; + spin_unlock_irqrestore(&task->lock, flags); } void rxe_enable_task(struct rxe_task *task) @@ -291,7 +307,7 @@ void rxe_enable_task(struct rxe_task *task) spin_unlock_irqrestore(&task->lock, flags); return; } + task->state = TASK_STATE_IDLE; - tasklet_enable(&task->tasklet); spin_unlock_irqrestore(&task->lock, flags); } diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index facb7c8e3729..a63e258b3d66 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -22,7 +22,7 @@ enum { * called again. */ struct rxe_task { - struct tasklet_struct tasklet; + struct work_struct work; int state; spinlock_t lock; struct rxe_qp *qp; @@ -32,6 +32,10 @@ struct rxe_task { long num_done; }; +int rxe_alloc_wq(void); + +void rxe_destroy_wq(void); + /* * init rxe_task structure * qp => parameter to pass to func diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 83093e16b6c6..903f0b71447e 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1182,9 +1182,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) unsigned long irq_flags; spin_lock_irqsave(&cq->cq_lock, irq_flags); - if (cq->notify != IB_CQ_NEXT_COMP) - cq->notify = flags & IB_CQ_SOLICITED_MASK; - + cq->notify |= flags & IB_CQ_SOLICITED_MASK; empty = queue_empty(cq->queue, QUEUE_TYPE_TO_ULP); if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) @@ -1261,6 +1259,12 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start, struct rxe_mr *mr; int err, cleanup_err; + if (access & ~RXE_ACCESS_SUPPORTED_MR) { + rxe_err_pd(pd, "access = %#x not supported (%#x)", access, + RXE_ACCESS_SUPPORTED_MR); + return ERR_PTR(-EOPNOTSUPP); + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -1294,6 +1298,40 @@ err_free: return ERR_PTR(err); } +static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags, + u64 start, u64 length, u64 iova, + int access, struct ib_pd *ibpd, + struct ib_udata *udata) +{ + struct rxe_mr *mr = to_rmr(ibmr); + struct rxe_pd *old_pd = to_rpd(ibmr->pd); + struct rxe_pd *pd = to_rpd(ibpd); + + /* for now only support the two easy cases: + * rereg_pd and rereg_access + */ + if (flags & ~RXE_MR_REREG_SUPPORTED) { + rxe_err_mr(mr, "flags = %#x not supported", flags); + return ERR_PTR(-EOPNOTSUPP); + } + + if (flags & IB_MR_REREG_PD) { + rxe_put(old_pd); + rxe_get(pd); + mr->ibmr.pd = ibpd; + } + + if (flags & IB_MR_REREG_ACCESS) { + if (access & ~RXE_ACCESS_SUPPORTED_MR) { + rxe_err_mr(mr, "access = %#x not supported", access); + return ERR_PTR(-EOPNOTSUPP); + } + mr->access = access; + } + + return NULL; +} + static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg) { @@ -1446,6 +1484,7 @@ static const struct ib_device_ops rxe_dev_ops = { .query_srq = rxe_query_srq, .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, + .rereg_user_mr = rxe_rereg_user_mr, .resize_cq = rxe_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 26a20f088692..ccb9d19ffe8a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -253,6 +253,22 @@ struct rxe_qp { struct execute_work cleanup_work; }; +enum { + RXE_ACCESS_REMOTE = IB_ACCESS_REMOTE_READ + | IB_ACCESS_REMOTE_WRITE + | IB_ACCESS_REMOTE_ATOMIC, + RXE_ACCESS_SUPPORTED_MR = RXE_ACCESS_REMOTE + | IB_ACCESS_LOCAL_WRITE + | IB_ACCESS_MW_BIND + | IB_ACCESS_ON_DEMAND + | IB_ACCESS_FLUSH_GLOBAL + | IB_ACCESS_FLUSH_PERSISTENT + | IB_ACCESS_OPTIONAL, + RXE_ACCESS_SUPPORTED_QP = RXE_ACCESS_SUPPORTED_MR, + RXE_ACCESS_SUPPORTED_MW = RXE_ACCESS_SUPPORTED_MR + | IB_ZERO_BASED, +}; + enum rxe_mr_state { RXE_MR_STATE_INVALID, RXE_MR_STATE_FREE, @@ -269,6 +285,11 @@ enum rxe_mr_lookup_type { RXE_LOOKUP_REMOTE, }; +enum rxe_rereg { + RXE_MR_REREG_SUPPORTED = IB_MR_REREG_PD + | IB_MR_REREG_ACCESS, +}; + static inline int rkey_is_mw(u32 rkey) { u32 index = rkey >> 8; |