diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 06:03:48 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-31 06:03:48 +0300 |
commit | efd52b5d363e3e3b6224ad39949219c0df117c91 (patch) | |
tree | 2d885d2f431a324af58d8f267755240bff3e32da /net/sunrpc/xprtrdma/transport.c | |
parent | 1ed2d76e0213751c82e3a242b61b0883daf330df (diff) | |
parent | e231c6879cfd44e4fffd384bb6dd7d313249a523 (diff) | |
download | linux-efd52b5d363e3e3b6224ad39949219c0df117c91.tar.xz |
Merge tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust:
"Highlights include:
Stable bugfixes:
- Fix breakages in the nfsstat utility due to the inclusion of the
NFSv4 LOOKUPP operation
- Fix a NULL pointer dereference in nfs_idmap_prepare_pipe_upcall()
due to nfs_idmap_legacy_upcall() being called without an 'aux'
parameter
- Fix a refcount leak in the standard O_DIRECT error path
- Fix a refcount leak in the pNFS O_DIRECT fallback to MDS path
- Fix CPU latency issues with nfs_commit_release_pages()
- Fix the LAYOUTUNAVAILABLE error case in the file layout type
- NFS: Fix a race between mmap() and O_DIRECT
Features:
- Support the statx() mask and query flags to enable optimisations
when the user is requesting only attributes that are already up to
date in the inode cache, or is specifying the AT_STATX_DONT_SYNC
flag
- Add a module alias for the SCSI pNFS layout type
Bugfixes:
- Automounting when resolving a NFSv4 referral should preserve the
RDMA transport protocol settings
- Various other RDMA bugfixes from Chuck
- pNFS block layout fixes
- Always set NFS_LOCK_LOST when a lock is lost"
* tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (69 commits)
NFS: Fix a race between mmap() and O_DIRECT
NFS: Remove a redundant call to unmap_mapping_range()
pnfs/blocklayout: Ensure disk address in block device map
pnfs/blocklayout: pnfs_block_dev_map uses bytes, not sectors
lockd: Fix server refcounting
SUNRPC: Fix null rpc_clnt dereference in rpc_task_queued tracepoint
SUNRPC: Micro-optimize __rpc_execute
SUNRPC: task_run_action should display tk_callback
sunrpc: Format RPC events consistently for display
SUNRPC: Trace xprt_timer events
xprtrdma: Correct some documenting comments
xprtrdma: Fix "bytes registered" accounting
xprtrdma: Instrument allocation/release of rpcrdma_req/rep objects
xprtrdma: Add trace points to instrument QP and CQ access upcalls
xprtrdma: Add trace points in the client-side backchannel code paths
xprtrdma: Add trace points for connect events
xprtrdma: Add trace points to instrument MR allocation and recovery
xprtrdma: Add trace points to instrument memory invalidation
xprtrdma: Add trace points in reply decoder path
xprtrdma: Add trace points to instrument memory registration
..
Diffstat (limited to 'net/sunrpc/xprtrdma/transport.c')
-rw-r--r-- | net/sunrpc/xprtrdma/transport.c | 128 |
1 files changed, 55 insertions, 73 deletions
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6ee1ad8978f3..4b1ecfe979cf 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -67,8 +67,7 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; -static unsigned int xprt_rdma_inline_write_padding; -unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; +unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR; int xprt_rdma_pad_optimize; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) @@ -81,6 +80,7 @@ static unsigned int zero; static unsigned int max_padding = PAGE_SIZE; static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; static unsigned int max_memreg = RPCRDMA_LAST - 1; +static unsigned int dummy; static struct ctl_table_header *sunrpc_table_header; @@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = { }, { .procname = "rdma_inline_write_padding", - .data = &xprt_rdma_inline_write_padding, + .data = &dummy, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt_clear_connected(xprt); - dprintk("RPC: %s: %sconnect\n", __func__, - r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); if (rc) xprt_wake_pending_tasks(xprt, rc); - dprintk("RPC: %s: exit\n", __func__); xprt_clear_connecting(xprt); } @@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, rx_xprt); - pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); + trace_xprtrdma_inject_dsc(r_xprt); rdma_disconnect(r_xprt->rx_ia.ri_id); } @@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - dprintk("RPC: %s: called\n", __func__); + trace_xprtrdma_destroy(r_xprt); cancel_delayed_work_sync(&r_xprt->rx_connect_worker); @@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); - xprt_free(xprt); - dprintk("RPC: %s: returning\n", __func__); - module_put(THIS_MODULE); } @@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args) /* * Set up RDMA-specific connect data. */ - - sap = (struct sockaddr *)&cdata.addr; - memcpy(sap, args->dstaddr, args->addrlen); + sap = args->dstaddr; /* Ensure xprt->addr holds valid server TCP (not RDMA) * address, for any side protocols which peek at it */ @@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args) if (rpc_get_port(sap)) xprt_set_bound(xprt); + xprt_rdma_format_addresses(xprt, sap); cdata.max_requests = xprt->max_reqs; @@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args) if (cdata.inline_rsize > cdata.rsize) cdata.inline_rsize = cdata.rsize; - cdata.padding = xprt_rdma_inline_write_padding; - /* * Create new transport instance, which includes initialized * o ia @@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args) new_xprt = rpcx_to_rdmax(xprt); - rc = rpcrdma_ia_open(new_xprt, sap); + rc = rpcrdma_ia_open(new_xprt); if (rc) goto out1; @@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args) */ new_xprt->rx_data = cdata; new_ep = &new_xprt->rx_ep; - new_ep->rep_remote_addr = cdata.addr; rc = rpcrdma_ep_create(&new_xprt->rx_ep, &new_xprt->rx_ia, &new_xprt->rx_data); if (rc) goto out2; - /* - * Allocate pre-registered send and receive buffers for headers and - * any inline data. Also specify any padding which will be provided - * from a preregistered zero buffer. - */ rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out3; - /* - * Register a callback for connection events. This is necessary because - * connection loss notification is async. We also catch connection loss - * when reaping receives. - */ INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); - xprt_rdma_format_addresses(xprt, sap); xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); if (xprt->max_payload == 0) goto out4; @@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args) dprintk("RPC: %s: %s:%s\n", __func__, xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PORT]); + trace_xprtrdma_create(new_xprt); return xprt; out4: - xprt_rdma_free_addresses(xprt); - rc = -EINVAL; + rpcrdma_buffer_destroy(&new_xprt->rx_buf); + rc = -ENODEV; out3: rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: + trace_xprtrdma_destroy(new_xprt); + xprt_rdma_free_addresses(xprt); xprt_free(xprt); return ERR_PTR(rc); } @@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt) rpcrdma_ep_disconnect(ep, ia); } +/** + * xprt_rdma_set_port - update server port with rpcbind result + * @xprt: controlling RPC transport + * @port: new port value + * + * Transport connect status is unchanged. + */ static void xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) { - struct sockaddr_in *sap; + struct sockaddr *sap = (struct sockaddr *)&xprt->addr; + char buf[8]; - sap = (struct sockaddr_in *)&xprt->addr; - sap->sin_port = htons(port); - sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; - sap->sin_port = htons(port); - dprintk("RPC: %s: %u\n", __func__, port); + dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n", + __func__, xprt, + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + port); + + rpc_set_port(sap, port); + + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + snprintf(buf, sizeof(buf), "%u", port); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); + + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + snprintf(buf, sizeof(buf), "%4hx", port); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); } /** @@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) static void xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) { - dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt); - xprt_force_disconnect(xprt); } @@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task) req = rpcrdma_buffer_get(&r_xprt->rx_buf); if (req == NULL) - return -ENOMEM; + goto out_get; flags = RPCRDMA_DEF_GFP; if (RPC_IS_SWAPPER(task)) @@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) goto out_fail; - dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n", - task->tk_pid, __func__, rqst->rq_callsize, - rqst->rq_rcvsize, req); - req->rl_cpu = smp_processor_id(); req->rl_connect_cookie = 0; /* our reserved value */ rpcrdma_set_xprtdata(rqst, req); rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base; + trace_xprtrdma_allocate(task, req); return 0; out_fail: rpcrdma_buffer_put(req); +out_get: + trace_xprtrdma_allocate(task, NULL); return -ENOMEM; } @@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags)) - return; - - dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) rpcrdma_release_rqst(r_xprt, req); + trace_xprtrdma_rpc_done(task, req); rpcrdma_buffer_put(req); } @@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task) * * Caller holds the transport's write lock. * - * Return values: - * 0: The request has been sent - * ENOTCONN: Caller needs to invoke connect logic then call again - * ENOBUFS: Call again later to send the request - * EIO: A permanent error occurred. The request was not sent, - * and don't try it again - * - * send_request invokes the meat of RPC RDMA. It must do the following: - * - * 1. Marshal the RPC request into an RPC RDMA request, which means - * putting a header in front of data, and creating IOVs for RDMA - * from those in the request. - * 2. In marshaling, detect opportunities for RDMA, and use them. - * 3. Post a recv message to set up asynch completion, then send - * the request (rpcrdma_ep_post). - * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). + * Returns: + * %0 if the RPC message has been sent + * %-ENOTCONN if the caller should reconnect and call again + * %-ENOBUFS if the caller should call again later + * %-EIO if a permanent error occurred and the request was not + * sent. Do not try to send this message again. */ static int xprt_rdma_send_request(struct rpc_task *task) @@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int rc = 0; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + if (unlikely(!rqst->rq_buffer)) + return xprt_rdma_bc_send_reply(rqst); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + if (!xprt_connected(xprt)) goto drop_connection; - /* On retransmit, remove any previously registered chunks */ - if (unlikely(!list_empty(&req->rl_registered))) - r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, - &req->rl_registered); - rc = rpcrdma_marshal_req(r_xprt, rqst); if (rc < 0) goto failed_marshal; @@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task) goto drop_connection; req->rl_connect_cookie = xprt->connect_cookie; - set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); + __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; @@ -904,8 +887,7 @@ int xprt_rdma_init(void) "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", xprt_rdma_slot_table_entries, xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); - dprintk("\tPadding %d\n\tMemreg %d\n", - xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); + dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy); #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) if (!sunrpc_table_header) |