From e622f2f4ad2142d2a613a57fb85f8cf737935ef5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Oct 2015 09:16:33 +0100 Subject: IB: split struct ib_send_wr This patch split up struct ib_send_wr so that all non-trivial verbs use their own structure which embedds struct ib_send_wr. This dramaticly shrinks the size of a WR for most common operations: sizeof(struct ib_send_wr) (old): 96 sizeof(struct ib_send_wr): 48 sizeof(struct ib_rdma_wr): 64 sizeof(struct ib_atomic_wr): 96 sizeof(struct ib_ud_wr): 88 sizeof(struct ib_fast_reg_wr): 88 sizeof(struct ib_bind_mw_wr): 96 sizeof(struct ib_sig_handover_wr): 80 And with Sagi's pending MR rework the fast registration WR will also be down to a reasonable size: sizeof(struct ib_fastreg_wr): 64 Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche [srp, srpt] Reviewed-by: Chuck Lever [sunrpc] Tested-by: Haggai Eran Tested-by: Sagi Grimberg Tested-by: Steve Wise --- net/sunrpc/xprtrdma/frwr_ops.c | 23 +++++------ net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 70 ++++++++++++++++----------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 18 ++++----- 3 files changed, 56 insertions(+), 55 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 5318951b3b53..0d2f46f600b6 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -312,7 +312,8 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_send_wr fastreg_wr, *bad_wr; + struct ib_fast_reg_wr fastreg_wr; + struct ib_send_wr *bad_wr; u8 key; int len, pageoff; int i, rc; @@ -358,23 +359,23 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, __func__, mw, i, len); memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.page_list_len = page_no; - fastreg_wr.wr.fast_reg.length = len; - fastreg_wr.wr.fast_reg.access_flags = writing ? + fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; + fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.iova_start = seg1->mr_dma + pageoff; + fastreg_wr.page_list = frmr->fr_pgl; + fastreg_wr.page_shift = PAGE_SHIFT; + fastreg_wr.page_list_len = page_no; + fastreg_wr.length = len; + fastreg_wr.access_flags = writing ? IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; mr = frmr->fr_mr; key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - fastreg_wr.wr.fast_reg.rkey = mr->rkey; + fastreg_wr.rkey = mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); if (rc) goto out_senderr; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index cb5174284074..7be42d0da19e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, u64 rs_offset, bool last) { - struct ib_send_wr read_wr; + struct ib_rdma_wr read_wr; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); int ret, read, pno; @@ -179,16 +179,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt, clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); memset(&read_wr, 0, sizeof(read_wr)); - read_wr.wr_id = (unsigned long)ctxt; - read_wr.opcode = IB_WR_RDMA_READ; - ctxt->wr_op = read_wr.opcode; - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = rs_handle; - read_wr.wr.rdma.remote_addr = rs_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = pages_needed; - - ret = svc_rdma_send(xprt, &read_wr); + read_wr.wr.wr_id = (unsigned long)ctxt; + read_wr.wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.wr.opcode; + read_wr.wr.send_flags = IB_SEND_SIGNALED; + read_wr.rkey = rs_handle; + read_wr.remote_addr = rs_offset; + read_wr.wr.sg_list = ctxt->sge; + read_wr.wr.num_sge = pages_needed; + + ret = svc_rdma_send(xprt, &read_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -218,9 +218,9 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, u64 rs_offset, bool last) { - struct ib_send_wr read_wr; + struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; - struct ib_send_wr fastreg_wr; + struct ib_fast_reg_wr fastreg_wr; u8 key; int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); @@ -289,31 +289,31 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, /* Prepare FASTREG WR */ memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; - fastreg_wr.wr.fast_reg.page_list = frmr->page_list; - fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; - fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; - fastreg_wr.wr.fast_reg.length = frmr->map_len; - fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; - fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; - fastreg_wr.next = &read_wr; + fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.iova_start = (unsigned long)frmr->kva; + fastreg_wr.page_list = frmr->page_list; + fastreg_wr.page_list_len = frmr->page_list_len; + fastreg_wr.page_shift = PAGE_SHIFT; + fastreg_wr.length = frmr->map_len; + fastreg_wr.access_flags = frmr->access_flags; + fastreg_wr.rkey = frmr->mr->lkey; + fastreg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); - read_wr.send_flags = IB_SEND_SIGNALED; - read_wr.wr.rdma.rkey = rs_handle; - read_wr.wr.rdma.remote_addr = rs_offset; - read_wr.sg_list = ctxt->sge; - read_wr.num_sge = 1; + read_wr.wr.send_flags = IB_SEND_SIGNALED; + read_wr.rkey = rs_handle; + read_wr.remote_addr = rs_offset; + read_wr.wr.sg_list = ctxt->sge; + read_wr.wr.num_sge = 1; if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) { - read_wr.opcode = IB_WR_RDMA_READ_WITH_INV; - read_wr.wr_id = (unsigned long)ctxt; - read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; + read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV; + read_wr.wr.wr_id = (unsigned long)ctxt; + read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey; } else { - read_wr.opcode = IB_WR_RDMA_READ; - read_wr.next = &inv_wr; + read_wr.wr.opcode = IB_WR_RDMA_READ; + read_wr.wr.next = &inv_wr; /* Prepare invalidate */ memset(&inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = (unsigned long)ctxt; @@ -321,10 +321,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE; inv_wr.ex.invalidate_rkey = frmr->mr->lkey; } - ctxt->wr_op = read_wr.opcode; + ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ - ret = svc_rdma_send(xprt, &fastreg_wr); + ret = svc_rdma_send(xprt, &fastreg_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 1dfae8317065..969a1ab75fc3 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, u32 xdr_off, int write_len, struct svc_rdma_req_map *vec) { - struct ib_send_wr write_wr; + struct ib_rdma_wr write_wr; struct ib_sge *sge; int xdr_sge_no; int sge_no; @@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, /* Prepare WRITE WR */ memset(&write_wr, 0, sizeof write_wr); ctxt->wr_op = IB_WR_RDMA_WRITE; - write_wr.wr_id = (unsigned long)ctxt; - write_wr.sg_list = &sge[0]; - write_wr.num_sge = sge_no; - write_wr.opcode = IB_WR_RDMA_WRITE; - write_wr.send_flags = IB_SEND_SIGNALED; - write_wr.wr.rdma.rkey = rmr; - write_wr.wr.rdma.remote_addr = to; + write_wr.wr.wr_id = (unsigned long)ctxt; + write_wr.wr.sg_list = &sge[0]; + write_wr.wr.num_sge = sge_no; + write_wr.wr.opcode = IB_WR_RDMA_WRITE; + write_wr.wr.send_flags = IB_SEND_SIGNALED; + write_wr.rkey = rmr; + write_wr.remote_addr = to; /* Post It */ atomic_inc(&rdma_stat_write); - if (svc_rdma_send(xprt, &write_wr)) + if (svc_rdma_send(xprt, &write_wr.wr)) goto err; return write_len - bc; err: -- cgit v1.2.3 From fa20105e09e97e81aadf02f722c31195e4a75c84 Mon Sep 17 00:00:00 2001 From: Guy Shapiro Date: Thu, 22 Oct 2015 15:20:10 +0300 Subject: IB/cma: Add support for network namespaces Add support for network namespaces in the ib_cma module. This is accomplished by: 1. Adding network namespace parameter for rdma_create_id. This parameter is used to populate the network namespace field in rdma_id_private. rdma_create_id keeps a reference on the network namespace. 2. Using the network namespace from the rdma_id instead of init_net inside of ib_cma, when listening on an ID and when looking for an ID for an incoming request. 3. Decrementing the reference count for the appropriate network namespace when calling rdma_destroy_id. In order to preserve the current behavior init_net is passed when calling from other modules. Signed-off-by: Guy Shapiro Signed-off-by: Haggai Eran Signed-off-by: Yotam Kenneth Signed-off-by: Shachar Raindel Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 46 +++++++++++++--------- drivers/infiniband/core/ucma.c | 3 +- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h | 4 +- include/rdma/rdma_cm.h | 6 ++- net/9p/trans_rdma.c | 4 +- net/rds/ib.c | 2 +- net/rds/ib_cm.c | 2 +- net/rds/iw.c | 2 +- net/rds/iw_cm.c | 2 +- net/rds/rdma_transport.c | 4 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 +- net/sunrpc/xprtrdma/verbs.c | 3 +- 14 files changed, 52 insertions(+), 34 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index ac03c32ca7f1..7e93eb1f33eb 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -600,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv, return 0; } -struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, +struct rdma_cm_id *rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type) { @@ -624,7 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, INIT_LIST_HEAD(&id_priv->listen_list); INIT_LIST_HEAD(&id_priv->mc_list); get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); - id_priv->id.route.addr.dev_addr.net = &init_net; + id_priv->id.route.addr.dev_addr.net = get_net(net); return &id_priv->id; } @@ -1278,7 +1279,7 @@ static bool cma_match_net_dev(const struct rdma_id_private *id_priv, cma_protocol_roce(&id_priv->id); return !addr->dev_addr.bound_dev_if || - (net_eq(dev_net(net_dev), &init_net) && + (net_eq(dev_net(net_dev), addr->dev_addr.net) && addr->dev_addr.bound_dev_if == net_dev->ifindex); } @@ -1339,7 +1340,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id, } } - bind_list = cma_ps_find(&init_net, + bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net, rdma_ps_from_service_id(req.service_id), cma_port_from_service_id(req.service_id)); id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev); @@ -1411,6 +1412,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv, static void cma_release_port(struct rdma_id_private *id_priv) { struct rdma_bind_list *bind_list = id_priv->bind_list; + struct net *net = id_priv->id.route.addr.dev_addr.net; if (!bind_list) return; @@ -1418,7 +1420,7 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_lock(&lock); hlist_del(&id_priv->node); if (hlist_empty(&bind_list->owners)) { - cma_ps_remove(&init_net, bind_list->ps, bind_list->port); + cma_ps_remove(net, bind_list->ps, bind_list->port); kfree(bind_list); } mutex_unlock(&lock); @@ -1477,6 +1479,7 @@ void rdma_destroy_id(struct rdma_cm_id *id) cma_deref_id(id_priv->id.context); kfree(id_priv->id.route.path_rec); + put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); } EXPORT_SYMBOL(rdma_destroy_id); @@ -1607,7 +1610,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, ib_event->param.req_rcvd.primary_path->service_id; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id->event_handler, listen_id->context, listen_id->ps, ib_event->param.req_rcvd.qp_type); if (IS_ERR(id)) return NULL; @@ -1662,9 +1666,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id, struct rdma_id_private *id_priv; struct rdma_cm_id *id; const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family; + struct net *net = listen_id->route.addr.dev_addr.net; int ret; - id = rdma_create_id(listen_id->event_handler, listen_id->context, + id = rdma_create_id(net, listen_id->event_handler, listen_id->context, listen_id->ps, IB_QPT_UD); if (IS_ERR(id)) return NULL; @@ -1901,7 +1906,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, return -ECONNABORTED; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = rdma_create_id(listen_id->id.event_handler, + new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, listen_id->id.context, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(new_cm_id)) { @@ -2029,12 +2035,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, { struct rdma_id_private *dev_id_priv; struct rdma_cm_id *id; + struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps, + id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, id_priv->id.qp_type); if (IS_ERR(id)) return; @@ -2708,7 +2715,8 @@ static int cma_alloc_port(enum rdma_port_space ps, if (!bind_list) return -ENOMEM; - ret = cma_ps_alloc(&init_net, ps, bind_list, snum); + ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list, + snum); if (ret < 0) goto err; @@ -2727,13 +2735,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps, static unsigned int last_used_port; int low, high, remaining; unsigned int rover; + struct net *net = id_priv->id.route.addr.dev_addr.net; - inet_get_local_port_range(&init_net, &low, &high); + inet_get_local_port_range(net, &low, &high); remaining = (high - low) + 1; rover = prandom_u32() % remaining + low; retry: if (last_used_port != rover && - !cma_ps_find(&init_net, ps, (unsigned short)rover)) { + !cma_ps_find(net, ps, (unsigned short)rover)) { int ret = cma_alloc_port(ps, id_priv, rover); /* * Remember previously used port number in order to avoid @@ -2799,7 +2808,7 @@ static int cma_use_port(enum rdma_port_space ps, if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE)) return -EACCES; - bind_list = cma_ps_find(&init_net, ps, snum); + bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum); if (!bind_list) { ret = cma_alloc_port(ps, id_priv, snum); } else { @@ -2991,8 +3000,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) if (addr->sa_family == AF_INET) id_priv->afonly = 1; #if IS_ENABLED(CONFIG_IPV6) - else if (addr->sa_family == AF_INET6) - id_priv->afonly = init_net.ipv6.sysctl.bindv6only; + else if (addr->sa_family == AF_INET6) { + struct net *net = id_priv->id.route.addr.dev_addr.net; + + id_priv->afonly = net->ipv6.sysctl.bindv6only; + } #endif } ret = cma_get_port(id_priv); @@ -3797,6 +3809,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id dev_addr = &id_priv->id.route.addr.dev_addr; if ((dev_addr->bound_dev_if == ndev->ifindex) && + (net_eq(dev_net(ndev), dev_addr->net)) && memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) { printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n", ndev->name, &id_priv->id); @@ -3822,9 +3835,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event, struct rdma_id_private *id_priv; int ret = NOTIFY_DONE; - if (dev_net(ndev) != &init_net) - return NOTIFY_DONE; - if (event != NETDEV_BONDING_FAILOVER) return NOTIFY_DONE; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 3a342dfaac15..e80c107450ab 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -472,7 +472,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, return -ENOMEM; ctx->uid = cmd.uid; - ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type); + ctx->cm_id = rdma_create_id(&init_net, ucma_event_handler, ctx, cmd.ps, + qp_type); if (IS_ERR(ctx->cm_id)) { ret = PTR_ERR(ctx->cm_id); goto err1; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 85132d867bc8..f3122372d49f 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1017,7 +1017,7 @@ int iser_connect(struct iser_conn *iser_conn, ib_conn->beacon.wr_id = ISER_BEACON_WRID; ib_conn->beacon.opcode = IB_WR_SEND; - ib_conn->cma_id = rdma_create_id(iser_cma_handler, + ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ib_conn->cma_id)) { diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b58ac25ce19d..7d41c9d6d6bf 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -3096,7 +3096,7 @@ isert_setup_id(struct isert_np *isert_np) sa = (struct sockaddr *)&np->np_sockaddr; isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa); - id = rdma_create_id(isert_cma_handler, isert_np, + id = rdma_create_id(&init_net, isert_cma_handler, isert_np, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id)); diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h index f4b6c33ac318..5015a9e830bd 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h @@ -128,7 +128,9 @@ extern kib_tunables_t kiblnd_tunables; IBLND_CREDIT_HIGHWATER_V1 : \ *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */ -#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt) +#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \ + cb, dev, \ + ps, qpt) static inline int kiblnd_concurrent_sends_v1(void) diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index c92522c192d2..463ec0ccbe51 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -160,13 +160,17 @@ struct rdma_cm_id { /** * rdma_create_id - Create an RDMA identifier. * + * @net: The network namespace in which to create the new id. * @event_handler: User callback invoked to report events associated with the * returned rdma_id. * @context: User specified context associated with the id. * @ps: RDMA port space. * @qp_type: type of queue pair associated with the id. + * + * The id holds a reference on the network namespace until it is destroyed. */ -struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler, +struct rdma_cm_id *rdma_create_id(struct net *net, + rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, enum ib_qp_type qp_type); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index ba1210253f5e..52b4a2f993f2 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) return -ENOMEM; /* Create the RDMA CM ID */ - rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, - IB_QPT_RC); + rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(rdma->cm_id)) goto error; diff --git a/net/rds/ib.c b/net/rds/ib.c index 2d3f2ab475df..cd64ef9a4748 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -317,7 +317,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 9043f5c04787..f5a98068faf0 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -565,7 +565,7 @@ int rds_ib_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); diff --git a/net/rds/iw.c b/net/rds/iw.c index 3df0295c6659..576f1825fc55 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr) /* Create a CMA ID and try to bind it. This catches both * IB and iWARP capable NICs. */ - cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); + cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) return PTR_ERR(cm_id); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index a6553a6fb2bc..aea4c911bc76 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn) /* XXX I wonder what affect the port space has */ /* delegate cm event handler to rdma_transport */ - ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(ic->i_cm_id)) { ret = PTR_ERR(ic->i_cm_id); diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index b9b40af5345b..9c1fed81bf0f 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void) struct rdma_cm_id *cm_id; int ret; - cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP, - IB_QPT_RC); + cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); printk(KERN_ERR "RDS/RDMA: failed to setup listener, " diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fcc3eb80c265..4a41122d586f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -692,8 +692,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, if (!cma_xprt) return ERR_PTR(-ENOMEM); - listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP, - IB_QPT_RC); + listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); dprintk("svcrdma: rdma_create_id failed = %d\n", ret); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 81e8d31f3abc..6c06ba088fea 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -432,7 +432,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, init_completion(&ia->ri_done); - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); + id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, + IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); dprintk("RPC: %s: rdma_create_id() failed %i\n", -- cgit v1.2.3 From 4143f34e01e9cdf1882f98c54d9073e4de8c28fb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:35 +0300 Subject: xprtrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Reviewed-by: Chuck Lever Signed-off-by: Doug Ledford --- net/sunrpc/xprtrdma/frwr_ops.c | 118 +++++++++++++++++++++++----------------- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +- 2 files changed, 69 insertions(+), 52 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 0d2f46f600b6..a1434447b0d6 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device, f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth); if (IS_ERR(f->fr_mr)) goto out_mr_err; - f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth); - if (IS_ERR(f->fr_pgl)) + + f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL); + if (!f->sg) goto out_list_err; + + sg_init_table(f->sg, depth); + return 0; out_mr_err: @@ -163,9 +167,9 @@ out_mr_err: return rc; out_list_err: - rc = PTR_ERR(f->fr_pgl); - dprintk("RPC: %s: ib_alloc_fast_reg_page_list status %i\n", - __func__, rc); + rc = -ENOMEM; + dprintk("RPC: %s: sg allocation failure\n", + __func__); ib_dereg_mr(f->fr_mr); return rc; } @@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r) if (rc) dprintk("RPC: %s: ib_dereg_mr status %i\n", __func__, rc); - ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + kfree(r->r.frmr.sg); } static int @@ -312,14 +316,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, struct rpcrdma_mw *mw; struct rpcrdma_frmr *frmr; struct ib_mr *mr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; struct ib_send_wr *bad_wr; + int rc, i, n, dma_nents; u8 key; - int len, pageoff; - int i, rc; - int seg_len; - u64 pa; - int page_no; mw = seg1->rl_mw; seg1->rl_mw = NULL; @@ -332,64 +332,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, } while (mw->r.frmr.fr_state != FRMR_IS_INVALID); frmr = &mw->r.frmr; frmr->fr_state = FRMR_IS_VALID; + mr = frmr->fr_mr; - pageoff = offset_in_page(seg1->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; if (nsegs > ia->ri_max_frmr_depth) nsegs = ia->ri_max_frmr_depth; - for (page_no = i = 0; i < nsegs;) { - rpcrdma_map_one(device, seg, direction); - pa = seg->mr_dma; - for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) { - frmr->fr_pgl->page_list[page_no++] = pa; - pa += PAGE_SIZE; - } - len += seg->mr_len; + for (i = 0; i < nsegs;) { + if (seg->mr_page) + sg_set_page(&frmr->sg[i], + seg->mr_page, + seg->mr_len, + offset_in_page(seg->mr_offset)); + else + sg_set_buf(&frmr->sg[i], seg->mr_offset, + seg->mr_len); + ++seg; ++i; + /* Check for holes */ if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) break; } - dprintk("RPC: %s: Using frmr %p to map %d segments (%d bytes)\n", - __func__, mw, i, len); - - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.wr_id = (unsigned long)(void *)mw; - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.iova_start = seg1->mr_dma + pageoff; - fastreg_wr.page_list = frmr->fr_pgl; - fastreg_wr.page_shift = PAGE_SHIFT; - fastreg_wr.page_list_len = page_no; - fastreg_wr.length = len; - fastreg_wr.access_flags = writing ? - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : - IB_ACCESS_REMOTE_READ; - mr = frmr->fr_mr; + frmr->sg_nents = i; + + dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction); + if (!dma_nents) { + pr_err("RPC: %s: failed to dma map sg %p sg_nents %u\n", + __func__, frmr->sg, frmr->sg_nents); + return -ENOMEM; + } + + n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("RPC: %s: failed to map mr %p (%u/%u)\n", + __func__, frmr->fr_mr, n, frmr->sg_nents); + rc = n < 0 ? n : -EINVAL; + goto out_senderr; + } + + dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", + __func__, mw, frmr->sg_nents, mr->length); + key = (u8)(mr->rkey & 0x000000FF); ib_update_fast_reg_key(mr, ++key); - fastreg_wr.rkey = mr->rkey; + + reg_wr.wr.next = NULL; + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = (uintptr_t)mw; + reg_wr.wr.num_sge = 0; + reg_wr.wr.send_flags = 0; + reg_wr.mr = mr; + reg_wr.key = mr->rkey; + reg_wr.access = writing ? + IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : + IB_ACCESS_REMOTE_READ; DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_post_send(ia->ri_id->qp, &fastreg_wr.wr, &bad_wr); + rc = ib_post_send(ia->ri_id->qp, ®_wr.wr, &bad_wr); if (rc) goto out_senderr; + seg1->mr_dir = direction; seg1->rl_mw = mw; seg1->mr_rkey = mr->rkey; - seg1->mr_base = seg1->mr_dma + pageoff; - seg1->mr_nsegs = i; - seg1->mr_len = len; - return i; + seg1->mr_base = mr->iova; + seg1->mr_nsegs = frmr->sg_nents; + seg1->mr_len = mr->length; + + return frmr->sg_nents; out_senderr: dprintk("RPC: %s: ib_post_send status %i\n", __func__, rc); - while (i--) - rpcrdma_unmap_one(device, --seg); + ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction); __frwr_queue_recovery(mw); return rc; } @@ -403,22 +419,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_mw *mw = seg1->rl_mw; + struct rpcrdma_frmr *frmr = &mw->r.frmr; struct ib_send_wr invalidate_wr, *bad_wr; int rc, nsegs = seg->mr_nsegs; dprintk("RPC: %s: FRMR %p\n", __func__, mw); seg1->rl_mw = NULL; - mw->r.frmr.fr_state = FRMR_IS_INVALID; + frmr->fr_state = FRMR_IS_INVALID; memset(&invalidate_wr, 0, sizeof(invalidate_wr)); invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr.opcode = IB_WR_LOCAL_INV; - invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey; + invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; DECR_CQCOUNT(&r_xprt->rx_ep); - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia->ri_device, seg++); + ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); read_lock(&ia->ri_qplock); rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); read_unlock(&ia->ri_qplock); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index c09414e6f91b..c82abf44e39d 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -193,7 +193,8 @@ enum rpcrdma_frmr_state { }; struct rpcrdma_frmr { - struct ib_fast_reg_page_list *fr_pgl; + struct scatterlist *sg; + int sg_nents; struct ib_mr *fr_mr; enum rpcrdma_frmr_state fr_state; struct work_struct fr_work; -- cgit v1.2.3 From 412a15c0fe537c59c794d4e8134580b9cb984a0c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:36 +0300 Subject: svcrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 6 +-- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 76 ++++++++++++++++++-------------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 34 +++++--------- 3 files changed, 55 insertions(+), 61 deletions(-) (limited to 'net/sunrpc/xprtrdma') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ccc961f33e9..1e4438ea2380 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge { }; struct svc_rdma_fastreg_mr { struct ib_mr *mr; - void *kva; - struct ib_fast_reg_page_list *page_list; - int page_list_len; + struct scatterlist *sg; + int sg_nents; unsigned long access_flags; - unsigned long map_len; enum dma_data_direction direction; struct list_head frmr_list; }; diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 7be42d0da19e..cb0991345816 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -220,12 +220,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, { struct ib_rdma_wr read_wr; struct ib_send_wr inv_wr; - struct ib_fast_reg_wr fastreg_wr; + struct ib_reg_wr reg_wr; u8 key; - int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; + int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT; struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt); struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt); - int ret, read, pno; + int ret, read, pno, dma_nents, n; u32 pg_off = *page_offset; u32 pg_no = *page_no; @@ -234,16 +234,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->direction = DMA_FROM_DEVICE; ctxt->frmr = frmr; - pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len); - read = min_t(int, pages_needed << PAGE_SHIFT, rs_length); + nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len); + read = min_t(int, nents << PAGE_SHIFT, rs_length); - frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]); frmr->direction = DMA_FROM_DEVICE; frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); - frmr->map_len = pages_needed << PAGE_SHIFT; - frmr->page_list_len = pages_needed; + frmr->sg_nents = nents; - for (pno = 0; pno < pages_needed; pno++) { + for (pno = 0; pno < nents; pno++) { int len = min_t(int, rs_length, PAGE_SIZE - pg_off); head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no]; @@ -251,17 +249,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, head->arg.len += len; if (!pg_off) head->count++; + + sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no], + len, pg_off); + rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1]; rqstp->rq_next_page = rqstp->rq_respages + 1; - frmr->page_list->page_list[pno] = - ib_dma_map_page(xprt->sc_cm_id->device, - head->arg.pages[pg_no], 0, - PAGE_SIZE, DMA_FROM_DEVICE); - ret = ib_dma_mapping_error(xprt->sc_cm_id->device, - frmr->page_list->page_list[pno]); - if (ret) - goto err; - atomic_inc(&xprt->sc_dma_used); /* adjust offset and wrap to next page if needed */ pg_off += len; @@ -277,28 +270,42 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, else clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device, + frmr->sg, frmr->sg_nents, + frmr->direction); + if (!dma_nents) { + pr_err("svcrdma: failed to dma map sg %p\n", + frmr->sg); + return -ENOMEM; + } + atomic_inc(&xprt->sc_dma_used); + + n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE); + if (unlikely(n != frmr->sg_nents)) { + pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n", + frmr->mr, n, frmr->sg_nents); + return n < 0 ? n : -EINVAL; + } + /* Bump the key */ key = (u8)(frmr->mr->lkey & 0x000000FF); ib_update_fast_reg_key(frmr->mr, ++key); - ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset; + ctxt->sge[0].addr = frmr->mr->iova; ctxt->sge[0].lkey = frmr->mr->lkey; - ctxt->sge[0].length = read; + ctxt->sge[0].length = frmr->mr->length; ctxt->count = 1; ctxt->read_hdr = head; - /* Prepare FASTREG WR */ - memset(&fastreg_wr, 0, sizeof(fastreg_wr)); - fastreg_wr.wr.opcode = IB_WR_FAST_REG_MR; - fastreg_wr.wr.send_flags = IB_SEND_SIGNALED; - fastreg_wr.iova_start = (unsigned long)frmr->kva; - fastreg_wr.page_list = frmr->page_list; - fastreg_wr.page_list_len = frmr->page_list_len; - fastreg_wr.page_shift = PAGE_SHIFT; - fastreg_wr.length = frmr->map_len; - fastreg_wr.access_flags = frmr->access_flags; - fastreg_wr.rkey = frmr->mr->lkey; - fastreg_wr.wr.next = &read_wr.wr; + /* Prepare REG WR */ + reg_wr.wr.opcode = IB_WR_REG_MR; + reg_wr.wr.wr_id = 0; + reg_wr.wr.send_flags = IB_SEND_SIGNALED; + reg_wr.wr.num_sge = 0; + reg_wr.mr = frmr->mr; + reg_wr.key = frmr->mr->lkey; + reg_wr.access = frmr->access_flags; + reg_wr.wr.next = &read_wr.wr; /* Prepare RDMA_READ */ memset(&read_wr, 0, sizeof(read_wr)); @@ -324,7 +331,7 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, ctxt->wr_op = read_wr.wr.opcode; /* Post the chain */ - ret = svc_rdma_send(xprt, &fastreg_wr.wr); + ret = svc_rdma_send(xprt, ®_wr.wr); if (ret) { pr_err("svcrdma: Error %d posting RDMA_READ\n", ret); set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -338,7 +345,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt, atomic_inc(&rdma_stat_read); return ret; err: - svc_rdma_unmap_dma(ctxt); + ib_dma_unmap_sg(xprt->sc_cm_id->device, + frmr->sg, frmr->sg_nents, frmr->direction); svc_rdma_put_context(ctxt, 0); svc_rdma_put_frmr(xprt, frmr); return ret; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 4a41122d586f..a266e870d870 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -732,7 +732,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) { struct ib_mr *mr; - struct ib_fast_reg_page_list *pl; + struct scatterlist *sg; struct svc_rdma_fastreg_mr *frmr; u32 num_sg; @@ -745,13 +745,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) if (IS_ERR(mr)) goto err_free_frmr; - pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, - num_sg); - if (IS_ERR(pl)) + sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL); + if (!sg) goto err_free_mr; + sg_init_table(sg, RPCSVC_MAXPAGES); + frmr->mr = mr; - frmr->page_list = pl; + frmr->sg = sg; INIT_LIST_HEAD(&frmr->frmr_list); return frmr; @@ -771,8 +772,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) frmr = list_entry(xprt->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); + kfree(frmr->sg); ib_dereg_mr(frmr->mr); - ib_free_fast_reg_page_list(frmr->page_list); kfree(frmr); } } @@ -786,8 +787,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) frmr = list_entry(rdma->sc_frmr_q.next, struct svc_rdma_fastreg_mr, frmr_list); list_del_init(&frmr->frmr_list); - frmr->map_len = 0; - frmr->page_list_len = 0; + frmr->sg_nents = 0; } spin_unlock_bh(&rdma->sc_frmr_q_lock); if (frmr) @@ -796,25 +796,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) return rdma_alloc_frmr(rdma); } -static void frmr_unmap_dma(struct svcxprt_rdma *xprt, - struct svc_rdma_fastreg_mr *frmr) -{ - int page_no; - for (page_no = 0; page_no < frmr->page_list_len; page_no++) { - dma_addr_t addr = frmr->page_list->page_list[page_no]; - if (ib_dma_mapping_error(frmr->mr->device, addr)) - continue; - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE, - frmr->direction); - } -} - void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, struct svc_rdma_fastreg_mr *frmr) { if (frmr) { - frmr_unmap_dma(rdma, frmr); + ib_dma_unmap_sg(rdma->sc_cm_id->device, + frmr->sg, frmr->sg_nents, frmr->direction); + atomic_dec(&rdma->sc_dma_used); spin_lock_bh(&rdma->sc_frmr_q_lock); WARN_ON_ONCE(!list_empty(&frmr->frmr_list)); list_add(&frmr->frmr_list, &rdma->sc_frmr_q); -- cgit v1.2.3