summaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtrdma
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/xprtrdma')
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c31
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c115
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c81
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c124
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c74
7 files changed, 204 insertions, 224 deletions
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index b647562a26dd..7f94c9a19fd3 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -40,7 +40,6 @@
* New MRs are created on demand.
*/
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 453bacc99907..0f5120c7668f 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -275,14 +275,6 @@ out:
return n;
}
-static void
-xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
-{
- *iptr++ = cpu_to_be32(mr->mr_handle);
- *iptr++ = cpu_to_be32(mr->mr_length);
- xdr_encode_hyper(iptr, mr->mr_offset);
-}
-
static int
encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{
@@ -292,7 +284,7 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
if (unlikely(!p))
return -EMSGSIZE;
- xdr_encode_rdma_segment(p, mr);
+ xdr_encode_rdma_segment(p, mr->mr_handle, mr->mr_length, mr->mr_offset);
return 0;
}
@@ -307,8 +299,8 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
return -EMSGSIZE;
*p++ = xdr_one; /* Item present */
- *p++ = cpu_to_be32(position);
- xdr_encode_rdma_segment(p, mr);
+ xdr_encode_read_segment(p, position, mr->mr_handle, mr->mr_length,
+ mr->mr_offset);
return 0;
}
@@ -1133,11 +1125,11 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
p = xdr_inline_decode(xdr, 0);
/* Chunk lists */
- if (*p++ != xdr_zero)
+ if (xdr_item_is_present(p++))
return false;
- if (*p++ != xdr_zero)
+ if (xdr_item_is_present(p++))
return false;
- if (*p++ != xdr_zero)
+ if (xdr_item_is_present(p++))
return false;
/* RPC header */
@@ -1176,10 +1168,7 @@ static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
if (unlikely(!p))
return -EIO;
- handle = be32_to_cpup(p++);
- *length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
-
+ xdr_decode_rdma_segment(p, &handle, length, &offset);
trace_xprtrdma_decode_seg(handle, *length, offset);
return 0;
}
@@ -1215,7 +1204,7 @@ static int decode_read_list(struct xdr_stream *xdr)
p = xdr_inline_decode(xdr, sizeof(*p));
if (unlikely(!p))
return -EIO;
- if (unlikely(*p != xdr_zero))
+ if (unlikely(xdr_item_is_present(p)))
return -EIO;
return 0;
}
@@ -1234,7 +1223,7 @@ static int decode_write_list(struct xdr_stream *xdr, u32 *length)
p = xdr_inline_decode(xdr, sizeof(*p));
if (unlikely(!p))
return -EIO;
- if (*p == xdr_zero)
+ if (xdr_item_is_absent(p))
break;
if (!first)
return -EIO;
@@ -1256,7 +1245,7 @@ static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length)
return -EIO;
*length = 0;
- if (*p != xdr_zero)
+ if (xdr_item_is_present(p))
if (decode_write_chunk(xdr, length))
return -EIO;
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 1ee73f7cf931..5e7c4ba9e147 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -87,7 +87,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
ctxt->sc_send_wr.opcode = IB_WR_SEND;
- return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+ return svc_rdma_send(rdma, ctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e426fedb9524..c6ea2903c21a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -117,6 +117,13 @@ svc_rdma_next_recv_ctxt(struct list_head *list)
rc_list);
}
+static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_rq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -135,6 +142,8 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
goto fail2;
+ svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid);
+
ctxt->rc_recv_wr.next = NULL;
ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
ctxt->rc_recv_wr.sg_list = &ctxt->rc_recv_sge;
@@ -248,16 +257,15 @@ static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
{
int ret;
- svc_xprt_get(&rdma->sc_xprt);
+ trace_svcrdma_post_recv(ctxt);
ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
- trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
if (ret)
goto err_post;
return 0;
err_post:
+ trace_svcrdma_rq_post_err(rdma, ret);
svc_rdma_recv_ctxt_put(rdma, ctxt);
- svc_xprt_put(&rdma->sc_xprt);
return ret;
}
@@ -265,6 +273,8 @@ static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
+ if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+ return 0;
ctxt = svc_rdma_recv_ctxt_get(rdma);
if (!ctxt)
return -ENOMEM;
@@ -309,11 +319,10 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_recv_ctxt *ctxt;
- trace_svcrdma_wc_receive(wc);
-
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
+ trace_svcrdma_wc_receive(wc, &ctxt->rc_cid);
if (wc->status != IB_WC_SUCCESS)
goto flushed;
@@ -333,15 +342,13 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
spin_unlock(&rdma->sc_rq_dto_lock);
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
svc_xprt_enqueue(&rdma->sc_xprt);
- goto out;
+ return;
flushed:
post_err:
svc_rdma_recv_ctxt_put(rdma, ctxt);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
-out:
- svc_xprt_put(&rdma->sc_xprt);
}
/**
@@ -419,7 +426,7 @@ static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
len = 0;
first = true;
- while (*p != xdr_zero) {
+ while (xdr_item_is_present(p)) {
p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_readseg_maxsz * sizeof(*p));
if (!p)
@@ -466,9 +473,7 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
if (!p)
return false;
- handle = be32_to_cpup(p++);
- length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
+ xdr_decode_rdma_segment(p, &handle, &length, &offset);
trace_svcrdma_decode_wseg(handle, length, offset);
total += length;
@@ -500,7 +505,7 @@ static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
if (!p)
return false;
rctxt->rc_write_list = p;
- while (*p != xdr_zero) {
+ while (xdr_item_is_present(p)) {
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
return false;
++chcount;
@@ -532,12 +537,11 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
- rctxt->rc_reply_chunk = p;
- if (*p != xdr_zero) {
+ rctxt->rc_reply_chunk = NULL;
+ if (xdr_item_is_present(p)) {
if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
return false;
- } else {
- rctxt->rc_reply_chunk = NULL;
+ rctxt->rc_reply_chunk = p;
}
return true;
}
@@ -568,7 +572,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
p += rpcrdma_fixed_maxsz;
/* Read list */
- while (*p++ != xdr_zero) {
+ while (xdr_item_is_present(p++)) {
p++; /* position */
if (inv_rkey == xdr_zero)
inv_rkey = *p;
@@ -578,7 +582,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
}
/* Write list */
- while (*p++ != xdr_zero) {
+ while (xdr_item_is_present(p++)) {
segcount = be32_to_cpup(p++);
for (i = 0; i < segcount; i++) {
if (inv_rkey == xdr_zero)
@@ -590,7 +594,7 @@ static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
}
/* Reply chunk */
- if (*p++ != xdr_zero) {
+ if (xdr_item_is_present(p++)) {
segcount = be32_to_cpup(p++);
for (i = 0; i < segcount; i++) {
if (inv_rkey == xdr_zero)
@@ -661,27 +665,27 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
hdr_len = xdr_stream_pos(&rctxt->rc_stream);
rq_arg->head[0].iov_len -= hdr_len;
rq_arg->len -= hdr_len;
- trace_svcrdma_decode_rqst(rdma_argp, hdr_len);
+ trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len);
return hdr_len;
out_short:
- trace_svcrdma_decode_short_err(rq_arg->len);
+ trace_svcrdma_decode_short_err(rctxt, rq_arg->len);
return -EINVAL;
out_version:
- trace_svcrdma_decode_badvers_err(rdma_argp);
+ trace_svcrdma_decode_badvers_err(rctxt, rdma_argp);
return -EPROTONOSUPPORT;
out_drop:
- trace_svcrdma_decode_drop_err(rdma_argp);
+ trace_svcrdma_decode_drop_err(rctxt, rdma_argp);
return 0;
out_proc:
- trace_svcrdma_decode_badproc_err(rdma_argp);
+ trace_svcrdma_decode_badproc_err(rctxt, rdma_argp);
return -EINVAL;
out_inval:
- trace_svcrdma_decode_parse_err(rdma_argp);
+ trace_svcrdma_decode_parse_err(rctxt, rdma_argp);
return -EINVAL;
}
@@ -714,57 +718,16 @@ static void rdma_read_complete(struct svc_rqst *rqstp,
rqstp->rq_arg.buflen = head->rc_arg.buflen;
}
-static void svc_rdma_send_error(struct svcxprt_rdma *xprt,
- __be32 *rdma_argp, int status)
+static void svc_rdma_send_error(struct svcxprt_rdma *rdma,
+ struct svc_rdma_recv_ctxt *rctxt,
+ int status)
{
- struct svc_rdma_send_ctxt *ctxt;
- __be32 *p;
- int ret;
+ struct svc_rdma_send_ctxt *sctxt;
- ctxt = svc_rdma_send_ctxt_get(xprt);
- if (!ctxt)
+ sctxt = svc_rdma_send_ctxt_get(rdma);
+ if (!sctxt)
return;
-
- p = xdr_reserve_space(&ctxt->sc_stream,
- rpcrdma_fixed_maxsz * sizeof(*p));
- if (!p)
- goto put_ctxt;
-
- *p++ = *rdma_argp;
- *p++ = *(rdma_argp + 1);
- *p++ = xprt->sc_fc_credits;
- *p = rdma_error;
-
- switch (status) {
- case -EPROTONOSUPPORT:
- p = xdr_reserve_space(&ctxt->sc_stream, 3 * sizeof(*p));
- if (!p)
- goto put_ctxt;
-
- *p++ = err_vers;
- *p++ = rpcrdma_version;
- *p = rpcrdma_version;
- trace_svcrdma_err_vers(*rdma_argp);
- break;
- default:
- p = xdr_reserve_space(&ctxt->sc_stream, sizeof(*p));
- if (!p)
- goto put_ctxt;
-
- *p = err_chunk;
- trace_svcrdma_err_chunk(*rdma_argp);
- }
-
- ctxt->sc_send_wr.num_sge = 1;
- ctxt->sc_send_wr.opcode = IB_WR_SEND;
- ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len;
- ret = svc_rdma_send(xprt, &ctxt->sc_send_wr);
- if (ret)
- goto put_ctxt;
- return;
-
-put_ctxt:
- svc_rdma_send_ctxt_put(xprt, ctxt);
+ svc_rdma_send_error_msg(rdma, sctxt, rctxt, status);
}
/* By convention, backchannel calls arrive via rdma_msg type
@@ -900,13 +863,13 @@ out_readchunk:
return 0;
out_err:
- svc_rdma_send_error(rdma_xprt, p, ret);
+ svc_rdma_send_error(rdma_xprt, ctxt, ret);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
out_postfail:
if (ret == -EINVAL)
- svc_rdma_send_error(rdma_xprt, p, ret);
+ svc_rdma_send_error(rdma_xprt, ctxt, ret);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 5eb35309ecef..fe54cbe97a46 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -7,6 +7,7 @@
#include <rdma/rw.h>
+#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
@@ -144,17 +145,25 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
* demand, and not cached.
*/
struct svc_rdma_chunk_ctxt {
+ struct rpc_rdma_cid cc_cid;
struct ib_cqe cc_cqe;
struct svcxprt_rdma *cc_rdma;
struct list_head cc_rwctxts;
int cc_sqecount;
};
+static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_sq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc)
{
+ svc_rdma_cc_cid_init(rdma, &cc->cc_cid);
cc->cc_rdma = rdma;
- svc_xprt_get(&rdma->sc_xprt);
INIT_LIST_HEAD(&cc->cc_rwctxts);
cc->cc_sqecount = 0;
@@ -174,7 +183,6 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
ctxt->rw_nents, dir);
svc_rdma_put_rw_ctxt(rdma, ctxt);
}
- svc_xprt_put(&rdma->sc_xprt);
}
/* State for sending a Write or Reply chunk.
@@ -236,7 +244,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
- trace_svcrdma_wc_write(wc);
+ trace_svcrdma_wc_write(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -294,7 +302,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_read_info *info =
container_of(cc, struct svc_rdma_read_info, ri_cc);
- trace_svcrdma_wc_read(wc);
+ trace_svcrdma_wc_read(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
@@ -350,6 +358,7 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
do {
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
+ trace_svcrdma_post_chunk(&cc->cc_cid, cc->cc_sqecount);
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
if (ret)
break;
@@ -441,34 +450,32 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
do {
unsigned int write_len;
- u32 seg_length, seg_handle;
- u64 seg_offset;
+ u32 handle, length;
+ u64 offset;
if (info->wi_seg_no >= info->wi_nsegs)
goto out_overflow;
- seg_handle = be32_to_cpup(seg);
- seg_length = be32_to_cpup(seg + 1);
- xdr_decode_hyper(seg + 2, &seg_offset);
- seg_offset += info->wi_seg_off;
+ xdr_decode_rdma_segment(seg, &handle, &length, &offset);
+ offset += info->wi_seg_off;
- write_len = min(remaining, seg_length - info->wi_seg_off);
+ write_len = min(remaining, length - info->wi_seg_off);
ctxt = svc_rdma_get_rw_ctxt(rdma,
(write_len >> PAGE_SHIFT) + 2);
if (!ctxt)
return -ENOMEM;
constructor(info, write_len, ctxt);
- ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle,
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, handle,
DMA_TO_DEVICE);
if (ret < 0)
return -EIO;
- trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset);
+ trace_svcrdma_send_wseg(handle, write_len, offset);
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
- if (write_len == seg_length - info->wi_seg_off) {
+ if (write_len == length - info->wi_seg_off) {
seg += 4;
info->wi_seg_no++;
info->wi_seg_off = 0;
@@ -684,35 +691,24 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
- unsigned int i;
int ret;
ret = -EINVAL;
info->ri_chunklen = 0;
while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
- u32 rs_handle, rs_length;
- u64 rs_offset;
-
- rs_handle = be32_to_cpup(p++);
- rs_length = be32_to_cpup(p++);
- p = xdr_decode_hyper(p, &rs_offset);
+ u32 handle, length;
+ u64 offset;
- ret = svc_rdma_build_read_segment(info, rqstp,
- rs_handle, rs_length,
- rs_offset);
+ p = xdr_decode_rdma_segment(p, &handle, &length, &offset);
+ ret = svc_rdma_build_read_segment(info, rqstp, handle, length,
+ offset);
if (ret < 0)
break;
- trace_svcrdma_send_rseg(rs_handle, rs_length, rs_offset);
- info->ri_chunklen += rs_length;
+ trace_svcrdma_send_rseg(handle, length, offset);
+ info->ri_chunklen += length;
}
- /* Pages under I/O have been copied to head->rc_pages.
- * Prevent their premature release by svc_xprt_release() .
- */
- for (i = 0; i < info->ri_readctxt->rc_page_count; i++)
- rqstp->rq_pages[i] = NULL;
-
return ret;
}
@@ -807,6 +803,26 @@ out:
return ret;
}
+/* Pages under I/O have been copied to head->rc_pages. Ensure they
+ * are not released by svc_xprt_release() until the I/O is complete.
+ *
+ * This has to be done after all Read WRs are constructed to properly
+ * handle a page that is part of I/O on behalf of two different RDMA
+ * segments.
+ *
+ * Do this only if I/O has been posted. Otherwise, we do indeed want
+ * svc_xprt_release() to clean things up properly.
+ */
+static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
+ const unsigned int start,
+ const unsigned int num_pages)
+{
+ unsigned int i;
+
+ for (i = start; i < num_pages + start; i++)
+ rqstp->rq_pages[i] = NULL;
+}
+
/**
* svc_rdma_recv_read_chunk - Pull a Read chunk from the client
* @rdma: controlling RDMA transport
@@ -860,6 +876,7 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
ret = svc_rdma_post_chunk_ctxt(&info->ri_cc);
if (ret < 0)
goto out_err;
+ svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count);
return 0;
out_err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 38e7c3c8c4a9..7b94d971feb3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -106,7 +106,6 @@
#include <rdma/rdma_cm.h>
#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
@@ -123,6 +122,13 @@ svc_rdma_next_send_ctxt(struct list_head *list)
sc_list);
}
+static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
+ struct rpc_rdma_cid *cid)
+{
+ cid->ci_queue_id = rdma->sc_sq_cq->res.id;
+ cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
+}
+
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -145,6 +151,8 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
goto fail2;
+ svc_rdma_send_cid_init(rdma, &ctxt->sc_cid);
+
ctxt->sc_send_wr.next = NULL;
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
@@ -269,34 +277,33 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
{
struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
- struct svc_rdma_send_ctxt *ctxt;
+ struct svc_rdma_send_ctxt *ctxt =
+ container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
- trace_svcrdma_wc_send(wc);
+ trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
atomic_inc(&rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
- ctxt = container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
svc_rdma_send_ctxt_put(rdma, ctxt);
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_xprt_enqueue(&rdma->sc_xprt);
}
-
- svc_xprt_put(&rdma->sc_xprt);
}
/**
* svc_rdma_send - Post a single Send WR
* @rdma: transport on which to post the WR
- * @wr: prepared Send WR to post
+ * @ctxt: send ctxt with a Send WR ready to post
*
* Returns zero the Send WR was posted successfully. Otherwise, a
* negative errno is returned.
*/
-int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
+int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
{
+ struct ib_send_wr *wr = &ctxt->sc_send_wr;
int ret;
might_sleep();
@@ -321,8 +328,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
continue;
}
- svc_xprt_get(&rdma->sc_xprt);
- trace_svcrdma_post_send(wr);
+ trace_svcrdma_post_send(ctxt);
ret = ib_post_send(rdma->sc_qp, wr, NULL);
if (ret)
break;
@@ -331,7 +337,6 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
trace_svcrdma_sq_post_err(rdma, ret);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_xprt_put(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
}
@@ -375,11 +380,8 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src,
if (!p)
return -EMSGSIZE;
- handle = be32_to_cpup(src++);
- length = be32_to_cpup(src++);
- xdr_decode_hyper(src, &offset);
+ xdr_decode_rdma_segment(src, &handle, &length, &offset);
- *p++ = cpu_to_be32(handle);
if (*remaining < length) {
/* segment only partly filled */
length = *remaining;
@@ -388,8 +390,7 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src,
/* entire segment was consumed */
*remaining -= length;
}
- *p++ = cpu_to_be32(length);
- xdr_encode_hyper(p, offset);
+ xdr_encode_rdma_segment(p, handle, length, offset);
trace_svcrdma_encode_wseg(handle, length, offset);
return len;
@@ -801,45 +802,76 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
} else {
sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
- return svc_rdma_send(rdma, &sctxt->sc_send_wr);
+ return svc_rdma_send(rdma, sctxt);
}
-/* Given the client-provided Write and Reply chunks, the server was not
- * able to form a complete reply. Return an RDMA_ERROR message so the
- * client can retire this RPC transaction. As above, the Send completion
- * routine releases payload pages that were part of a previous RDMA Write.
- *
- * Remote Invalidation is skipped for simplicity.
+/**
+ * svc_rdma_send_error_msg - Send an RPC/RDMA v1 error response
+ * @rdma: controlling transport context
+ * @sctxt: Send context for the response
+ * @rctxt: Receive context for incoming bad message
+ * @status: negative errno indicating error that occurred
+ *
+ * Given the client-provided Read, Write, and Reply chunks, the
+ * server was not able to parse the Call or form a complete Reply.
+ * Return an RDMA_ERROR message so the client can retire the RPC
+ * transaction.
+ *
+ * The caller does not have to release @sctxt. It is released by
+ * Send completion, or by this function on error.
*/
-static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt,
- struct svc_rqst *rqstp)
+void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *sctxt,
+ struct svc_rdma_recv_ctxt *rctxt,
+ int status)
{
- struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *rdma_argp = rctxt->rc_recv_buf;
__be32 *p;
- rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0);
- xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
- NULL);
+ rpcrdma_set_xdrlen(&sctxt->sc_hdrbuf, 0);
+ xdr_init_encode(&sctxt->sc_stream, &sctxt->sc_hdrbuf,
+ sctxt->sc_xprt_buf, NULL);
- p = xdr_reserve_space(&ctxt->sc_stream, RPCRDMA_HDRLEN_ERR);
+ p = xdr_reserve_space(&sctxt->sc_stream,
+ rpcrdma_fixed_maxsz * sizeof(*p));
if (!p)
- return -ENOMSG;
+ goto put_ctxt;
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits;
- *p++ = rdma_error;
- *p = err_chunk;
- trace_svcrdma_err_chunk(*rdma_argp);
+ *p = rdma_error;
+
+ switch (status) {
+ case -EPROTONOSUPPORT:
+ p = xdr_reserve_space(&sctxt->sc_stream, 3 * sizeof(*p));
+ if (!p)
+ goto put_ctxt;
+
+ *p++ = err_vers;
+ *p++ = rpcrdma_version;
+ *p = rpcrdma_version;
+ trace_svcrdma_err_vers(*rdma_argp);
+ break;
+ default:
+ p = xdr_reserve_space(&sctxt->sc_stream, sizeof(*p));
+ if (!p)
+ goto put_ctxt;
+
+ *p = err_chunk;
+ trace_svcrdma_err_chunk(*rdma_argp);
+ }
- svc_rdma_save_io_pages(rqstp, ctxt);
+ /* Remote Invalidation is skipped for simplicity. */
+ sctxt->sc_send_wr.num_sge = 1;
+ sctxt->sc_send_wr.opcode = IB_WR_SEND;
+ sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
+ if (svc_rdma_send(rdma, sctxt))
+ goto put_ctxt;
+ return;
- ctxt->sc_send_wr.num_sge = 1;
- ctxt->sc_send_wr.opcode = IB_WR_SEND;
- ctxt->sc_sges[0].length = ctxt->sc_hdrbuf.len;
- return svc_rdma_send(rdma, &ctxt->sc_send_wr);
+put_ctxt:
+ svc_rdma_send_ctxt_put(rdma, sctxt);
}
/**
@@ -930,15 +962,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (ret != -E2BIG && ret != -EINVAL)
goto err1;
- ret = svc_rdma_send_error_msg(rdma, sctxt, rqstp);
- if (ret < 0)
- goto err1;
+ /* Send completion releases payload pages that were part
+ * of previously posted RDMA Writes.
+ */
+ svc_rdma_save_io_pages(rqstp, sctxt);
+ svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;
err1:
svc_rdma_send_ctxt_put(rdma, sctxt);
err0:
- trace_svcrdma_send_failed(rqstp, ret);
+ trace_svcrdma_send_err(rqstp, ret);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
return -ENOTCONN;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index d38be57b00ed..fb044792b571 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -55,7 +55,6 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/debug.h>
-#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/sunrpc/svc_rdma.h>
@@ -238,65 +237,56 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id,
svc_xprt_enqueue(&listen_xprt->sc_xprt);
}
-/*
- * Handles events generated on the listening endpoint. These events will be
- * either be incoming connect requests or adapter removal events.
+/**
+ * svc_rdma_listen_handler - Handle CM events generated on a listening endpoint
+ * @cma_id: the server's listener rdma_cm_id
+ * @event: details of the event
+ *
+ * Return values:
+ * %0: Do not destroy @cma_id
+ * %1: Destroy @cma_id (never returned here)
+ *
+ * NB: There is never a DEVICE_REMOVAL event for INADDR_ANY listeners.
*/
-static int rdma_listen_handler(struct rdma_cm_id *cma_id,
- struct rdma_cm_event *event)
+static int svc_rdma_listen_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event)
{
switch (event->event) {
case RDMA_CM_EVENT_CONNECT_REQUEST:
- dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
- "event = %s (%d)\n", cma_id, cma_id->context,
- rdma_event_msg(event->event), event->event);
handle_connect_req(cma_id, &event->param.conn);
break;
default:
- /* NB: No device removal upcall for INADDR_ANY listeners */
- dprintk("svcrdma: Unexpected event on listening endpoint %p, "
- "event = %s (%d)\n", cma_id,
- rdma_event_msg(event->event), event->event);
break;
}
-
return 0;
}
-static int rdma_cma_handler(struct rdma_cm_id *cma_id,
- struct rdma_cm_event *event)
+/**
+ * svc_rdma_cma_handler - Handle CM events on client connections
+ * @cma_id: the server's listener rdma_cm_id
+ * @event: details of the event
+ *
+ * Return values:
+ * %0: Do not destroy @cma_id
+ * %1: Destroy @cma_id (never returned here)
+ */
+static int svc_rdma_cma_handler(struct rdma_cm_id *cma_id,
+ struct rdma_cm_event *event)
{
struct svcxprt_rdma *rdma = cma_id->context;
struct svc_xprt *xprt = &rdma->sc_xprt;
switch (event->event) {
case RDMA_CM_EVENT_ESTABLISHED:
- /* Accept complete */
- svc_xprt_get(xprt);
- dprintk("svcrdma: Connection completed on DTO xprt=%p, "
- "cm_id=%p\n", xprt, cma_id);
clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
svc_xprt_enqueue(xprt);
break;
case RDMA_CM_EVENT_DISCONNECTED:
- dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
- xprt, cma_id);
- set_bit(XPT_CLOSE, &xprt->xpt_flags);
- svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
- break;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
- dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
- "event = %s (%d)\n", cma_id, xprt,
- rdma_event_msg(event->event), event->event);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
- svc_xprt_put(xprt);
break;
default:
- dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
- "event = %s (%d)\n", cma_id,
- rdma_event_msg(event->event), event->event);
break;
}
return 0;
@@ -322,7 +312,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
- listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt,
+ listen_id = rdma_create_id(net, svc_rdma_listen_handler, cma_xprt,
RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
@@ -486,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout;
/* Swap out the handler */
- newxprt->sc_cm_id->event_handler = rdma_cma_handler;
+ newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler;
/* Construct RDMA-CM private message */
pmsg.cp_magic = rpcrdma_cmp_magic;
@@ -540,24 +530,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
return NULL;
}
-/*
- * When connected, an svc_xprt has at least two references:
- *
- * - A reference held by the cm_id between the ESTABLISHED and
- * DISCONNECTED events. If the remote peer disconnected first, this
- * reference could be gone.
- *
- * - A reference held by the svc_recv code that called this function
- * as part of close processing.
- *
- * At a minimum one references should still be held.
- */
static void svc_rdma_detach(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
- /* Disconnect and flush posted WQE */
rdma_disconnect(rdma->sc_cm_id);
}
@@ -567,6 +544,7 @@ static void __svc_rdma_free(struct work_struct *work)
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;
+ /* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);