diff options
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r-- | net/sunrpc/xprtrdma/rpc_rdma.c | 162 |
1 files changed, 59 insertions, 103 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index a3f2ab283aeb..162e5dd82466 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr) } static void -xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) +xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr) { - *iptr++ = cpu_to_be32(mw->mw_handle); - *iptr++ = cpu_to_be32(mw->mw_length); - xdr_encode_hyper(iptr, mw->mw_offset); + *iptr++ = cpu_to_be32(mr->mr_handle); + *iptr++ = cpu_to_be32(mr->mr_length); + xdr_encode_hyper(iptr, mr->mr_offset); } static int -encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) +encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr) { __be32 *p; @@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) if (unlikely(!p)) return -EMSGSIZE; - xdr_encode_rdma_segment(p, mw); + xdr_encode_rdma_segment(p, mr); return 0; } static int -encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, +encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr, u32 position) { __be32 *p; @@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, *p++ = xdr_one; /* Item present */ *p++ = cpu_to_be32(position); - xdr_encode_rdma_segment(p, mw); + xdr_encode_rdma_segment(p, mr); return 0; } @@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; unsigned int pos; int nsegs; @@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, do { seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - false, &mw); + false, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); - rpcrdma_push_mw(mw, &req->rl_registered); + rpcrdma_mr_push(mr, &req->rl_registered); - if (encode_read_segment(xdr, mw, pos) < 0) + if (encode_read_segment(xdr, mr, pos) < 0) return -EMSGSIZE; - dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", - rqst->rq_task->tk_pid, __func__, pos, - mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); - + trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs); r_xprt->rx_stats.read_chunk_count++; - nsegs -= mw->mw_nents; + nsegs -= mr->mr_nents; } while (nsegs); return 0; @@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; int nsegs, nchunks; __be32 *segcount; @@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mw); + true, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); - rpcrdma_push_mw(mw, &req->rl_registered); + rpcrdma_mr_push(mr, &req->rl_registered); - if (encode_rdma_segment(xdr, mw) < 0) + if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", - rqst->rq_task->tk_pid, __func__, - mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); - + trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.write_chunk_count++; - r_xprt->rx_stats.total_rdma_request += seg->mr_len; + r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; - nsegs -= mw->mw_nents; + nsegs -= mr->mr_nents; } while (nsegs); /* Update count of segments in this Write chunk */ @@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, { struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; - struct rpcrdma_mw *mw; + struct rpcrdma_mr *mr; int nsegs, nchunks; __be32 *segcount; @@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, nchunks = 0; do { seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mw); + true, &mr); if (IS_ERR(seg)) return PTR_ERR(seg); - rpcrdma_push_mw(mw, &req->rl_registered); + rpcrdma_mr_push(mr, &req->rl_registered); - if (encode_rdma_segment(xdr, mw) < 0) + if (encode_rdma_segment(xdr, mr) < 0) return -EMSGSIZE; - dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", - rqst->rq_task->tk_pid, __func__, - mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); - + trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs); r_xprt->rx_stats.reply_chunk_count++; - r_xprt->rx_stats.total_rdma_request += seg->mr_len; + r_xprt->rx_stats.total_rdma_request += mr->mr_length; nchunks++; - nsegs -= mw->mw_nents; + nsegs -= mr->mr_nents; } while (nsegs); /* Update count of segments in the Reply chunk */ @@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) struct ib_sge *sge; unsigned int count; - dprintk("RPC: %s: unmapping %u sges for sc=%p\n", - __func__, sc->sc_unmap_count, sc); - /* The first two SGEs contain the transport header and * the inline buffer. These are always left mapped so * they can be cheaply re-used. @@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) __be32 *p; int ret; -#if defined(CONFIG_SUNRPC_BACKCHANNEL) - if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) - return rpcrdma_bc_marshal_reply(rqst); -#endif - rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); xdr_init_encode(xdr, &req->rl_hdrbuf, req->rl_rdmabuf->rg_base); @@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) rtype = rpcrdma_areadch; } + /* If this is a retransmit, discard previously registered + * chunks. Very likely the connection has been replaced, + * so these registrations are invalid and unusable. + */ + while (unlikely(!list_empty(&req->rl_registered))) { + struct rpcrdma_mr *mr; + + mr = rpcrdma_mr_pop(&req->rl_registered); + rpcrdma_mr_defer_recovery(mr); + } + /* This implementation supports the following combinations * of chunk lists in one RPC-over-RDMA Call message: * @@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) if (ret) goto out_err; - dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", - rqst->rq_task->tk_pid, __func__, - transfertypes[rtype], transfertypes[wtype], - xdr_stream_pos(xdr)); + trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype); ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), &rqst->rq_snd_buf, rtype); @@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) curlen = rqst->rq_rcv_buf.head[0].iov_len; if (curlen > copy_len) curlen = copy_len; - dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", - __func__, srcp, copy_len, curlen); + trace_xprtrdma_fixup(rqst, copy_len, curlen); srcp += curlen; copy_len -= curlen; @@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) if (curlen > pagelist_len) curlen = pagelist_len; - dprintk("RPC: %s: page %d" - " srcp 0x%p len %d curlen %d\n", - __func__, i, srcp, copy_len, curlen); + trace_xprtrdma_fixup_pg(rqst, i, srcp, + copy_len, curlen); destp = kmap_atomic(ppages[i]); memcpy(destp + page_base, srcp, curlen); flush_dcache_page(ppages[i]); @@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) return fixup_copy_count; } -/* Caller must guarantee @rep remains stable during this call. - */ -static void -rpcrdma_mark_remote_invalidation(struct list_head *mws, - struct rpcrdma_rep *rep) -{ - struct rpcrdma_mw *mw; - - if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)) - return; - - list_for_each_entry(mw, mws, mw_list) - if (mw->mw_handle == rep->rr_inv_rkey) { - mw->mw_flags = RPCRDMA_MW_F_RI; - break; /* only one invalidated MR per RPC */ - } -} - /* By convention, backchannel calls arrive via rdma_msg type * messages, and never populate the chunk lists. This makes * the RPC/RDMA header small and fixed in size, so it is @@ -1058,26 +1026,19 @@ out_short: static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) { + u32 handle; + u64 offset; __be32 *p; p = xdr_inline_decode(xdr, 4 * sizeof(*p)); if (unlikely(!p)) return -EIO; - ifdebug(FACILITY) { - u64 offset; - u32 handle; - - handle = be32_to_cpup(p++); - *length = be32_to_cpup(p++); - xdr_decode_hyper(p, &offset); - dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n", - __func__, *length, (unsigned long long)offset, - handle); - } else { - *length = be32_to_cpup(p + 1); - } + handle = be32_to_cpup(p++); + *length = be32_to_cpup(p++); + xdr_decode_hyper(p, &offset); + trace_xprtrdma_decode_seg(handle, *length, offset); return 0; } @@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length) *length += seglength; } - dprintk("RPC: %s: segcount=%u, %u bytes\n", - __func__, be32_to_cpup(p), *length); return 0; } @@ -1296,8 +1255,7 @@ out: * being marshaled. */ out_badheader: - dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", - rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc)); + trace_xprtrdma_reply_hdr(rep); r_xprt->rx_stats.bad_reply_count++; status = -EIO; goto out; @@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work) struct rpcrdma_rep *rep = container_of(work, struct rpcrdma_rep, rr_work); struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; - rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); - rpcrdma_release_rqst(rep->rr_rxprt, req); + trace_xprtrdma_defer_cmp(rep); + if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE) + r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered); + rpcrdma_release_rqst(r_xprt, req); rpcrdma_complete_rqst(rep); } @@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) u32 credits; __be32 *p; - dprintk("RPC: %s: incoming rep %p\n", __func__, rep); - if (rep->rr_hdrbuf.head[0].iov_len == 0) goto out_badstatus; @@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) rep->rr_rqst = rqst; clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); - dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", - __func__, rep, req, be32_to_cpu(rep->rr_xid)); + trace_xprtrdma_reply(rqst->rq_task, rep, req, credits); queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); return; @@ -1420,8 +1378,7 @@ out_badstatus: return; out_badversion: - dprintk("RPC: %s: invalid version %d\n", - __func__, be32_to_cpu(rep->rr_vers)); + trace_xprtrdma_reply_vers(rep); goto repost; /* The RPC transaction has already been terminated, or the header @@ -1429,12 +1386,11 @@ out_badversion: */ out_norqst: spin_unlock(&xprt->recv_lock); - dprintk("RPC: %s: no match for incoming xid 0x%08x\n", - __func__, be32_to_cpu(rep->rr_xid)); + trace_xprtrdma_reply_rqst(rep); goto repost; out_shortreply: - dprintk("RPC: %s: short/invalid reply\n", __func__); + trace_xprtrdma_reply_short(rep); /* If no pending RPC transaction was matched, post a replacement * receive buffer before returning. |