From edc1b01cd3b20a5fff049e98f82a2b0d24a34c89 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Oct 2015 10:53:49 -0400 Subject: SUNRPC: Move TCP receive data path into a workqueue context Stream protocols such as TCP can often build up a backlog of data to be read due to ordering. Combine this with the fact that some workloads such as NFS read()-intensive workloads need to receive a lot of data per RPC call, and it turns out that receiving the data from inside a softirq context can cause starvation. The following patch moves the TCP data receive into a workqueue context. We still end up calling tcp_read_sock(), but we do so from a process context, meaning that softirqs are enabled for most of the time. With this patch, I see a doubling of read bandwidth when running a multi-threaded iozone workload between a virtual client and server setup. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 357e44c1a46b..0ece4ba06f06 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -44,6 +44,8 @@ struct sock_xprt { */ unsigned long sock_state; struct delayed_work connect_worker; + struct work_struct recv_worker; + struct mutex recv_mutex; struct sockaddr_storage srcaddr; unsigned short srcport; -- cgit v1.2.3 From 516285ebe0efadc40b914a0e61a913a390604810 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 16:15:24 -0400 Subject: NFSv4: nfs4_async_handle_error should take a non-const nfs_server For symmetry with the synchronous handler, and so that we can potentially handle errors such as NFS4ERR_BADNAME. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- include/linux/nfs_xdr.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d044c7b11ff7..ae5cde621954 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -78,7 +78,7 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int _nfs4_recover_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *); +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *, struct nfs4_state *, long *); static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr); static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label); @@ -4982,7 +4982,7 @@ out: static int -nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server, struct nfs4_state *state, long *timeout) { struct nfs_client *clp = server->nfs_client; @@ -5559,7 +5559,7 @@ struct nfs4_unlockdata { struct nfs4_lock_state *lsp; struct nfs_open_context *ctx; struct file_lock fl; - const struct nfs_server *server; + struct nfs_server *server; unsigned long timestamp; }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52faf7e96c65..53f2acc68baf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -528,7 +528,7 @@ struct nfs4_delegreturnargs { struct nfs4_delegreturnres { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - const struct nfs_server *server; + struct nfs_server *server; }; /* @@ -601,7 +601,7 @@ struct nfs_removeargs { struct nfs_removeres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs_fattr *dir_attr; struct nfs4_change_info cinfo; }; @@ -619,7 +619,7 @@ struct nfs_renameargs { struct nfs_renameres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs4_change_info old_cinfo; struct nfs_fattr *old_fattr; struct nfs4_change_info new_cinfo; -- cgit v1.2.3 From 36022770de6cf9a403c40a68712ed2d2ea2746be Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:34 +0800 Subject: nfs42: add CLONE xdr functions xdr definitions per draft-ietf-nfsv4-minorversion2-38.txt Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/nfs4xdr.c | 1 + include/linux/nfs4.h | 2 + include/linux/nfs_xdr.h | 19 ++++++++++ 4 files changed, 118 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 0eb29e14070d..0ca482a51e53 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -34,6 +34,12 @@ 1 /* opaque devaddr4 length */ + \ XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE)) #define decode_layoutstats_maxsz (op_decode_hdr_maxsz) +#define encode_clone_maxsz (encode_stateid_maxsz + \ + encode_stateid_maxsz + \ + 2 /* src offset */ + \ + 2 /* dst offset */ + \ + 2 /* count */) +#define decode_clone_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -65,7 +71,20 @@ decode_sequence_maxsz + \ decode_putfh_maxsz + \ PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz) - +#define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_savefh_maxsz + \ + encode_putfh_maxsz + \ + encode_clone_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_clone_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_savefh_maxsz + \ + decode_putfh_maxsz + \ + decode_clone_maxsz + \ + decode_getattr_maxsz) static void encode_fallocate(struct xdr_stream *xdr, struct nfs42_falloc_args *args) @@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr, encode_uint32(xdr, 0); } +static void encode_clone(struct xdr_stream *xdr, + struct nfs42_clone_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr); + encode_nfs4_stateid(xdr, &args->src_stateid); + encode_nfs4_stateid(xdr, &args->dst_stateid); + p = reserve_space(xdr, 3*8); + p = xdr_encode_hyper(p, args->src_offset); + p = xdr_encode_hyper(p, args->dst_offset); + xdr_encode_hyper(p, args->count); +} + /* * Encode ALLOCATE request */ @@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode CLONE request + */ +static void nfs4_xdr_enc_clone(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs42_clone_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->src_fh, &hdr); + encode_savefh(xdr, &hdr); + encode_putfh(xdr, args->dst_fh, &hdr); + encode_clone(xdr, args, &hdr); + encode_getfattr(xdr, args->dst_bitmask, &hdr); + encode_nops(&hdr); +} + static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res) { return decode_op_hdr(xdr, OP_ALLOCATE); @@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr) return decode_op_hdr(xdr, OP_LAYOUTSTATS); } +static int decode_clone(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_CLONE); +} + /* * Decode ALLOCATE request */ @@ -351,4 +411,39 @@ out: return status; } +/* + * Decode CLONE request + */ +static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs42_clone_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_savefh(xdr); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_clone(xdr); + if (status) + goto out; + status = decode_getfattr(xdr, res->dst_fattr, res->server); + +out: + res->rpc_status = status; + return status; +} + #endif /* __LINUX_FS_NFS_NFS4_2XDR_H */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 788adf3897c7..868472b6b303 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -7465,6 +7465,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(ALLOCATE, enc_allocate, dec_allocate), PROC(DEALLOCATE, enc_deallocate, dec_deallocate), PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats), + PROC(CLONE, enc_clone, dec_clone), #endif /* CONFIG_NFS_V4_2 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 00121f298269..c0c695b634d0 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -130,6 +130,7 @@ enum nfs_opnum4 { OP_READ_PLUS = 68, OP_SEEK = 69, OP_WRITE_SAME = 70, + OP_CLONE = 71, OP_ILLEGAL = 10044, }; @@ -501,6 +502,7 @@ enum { NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, + NFSPROC4_CLNT_CLONE, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52faf7e96c65..ac678b7a65ed 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -359,6 +359,25 @@ struct nfs42_layoutstat_data { struct nfs42_layoutstat_res res; }; +struct nfs42_clone_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *src_fh; + struct nfs_fh *dst_fh; + nfs4_stateid src_stateid; + nfs4_stateid dst_stateid; + __u64 src_offset; + __u64 dst_offset; + __u64 count; + const u32 *dst_bitmask; +}; + +struct nfs42_clone_res { + struct nfs4_sequence_res seq_res; + unsigned int rpc_status; + struct nfs_fattr *dst_fattr; + const struct nfs_server *server; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; -- cgit v1.2.3 From e5341f3a5762d17be9cdd06257c02c0098bdcab8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:35 +0800 Subject: nfs42: add CLONE proc functions Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs42.h | 1 + fs/nfs/nfs42proc.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 3 +- include/linux/nfs_fs_sb.h | 1 + 4 files changed, 75 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 814c1255f1d2..b587ccd31083 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t); loff_t nfs42_proc_llseek(struct file *, loff_t, int); int nfs42_proc_layoutstats_generic(struct nfs_server *, struct nfs42_layoutstat_data *); +int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t); #endif /* __LINUX_FS_NFS_NFS4_2_H */ diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 0f020e4d8421..3e92a3cde15d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -271,3 +271,74 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server, return PTR_ERR(task); return 0; } + +static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, + struct file *dst_f, loff_t src_offset, + loff_t dst_offset, loff_t count) +{ + struct inode *src_inode = file_inode(src_f); + struct inode *dst_inode = file_inode(dst_f); + struct nfs_server *server = NFS_SERVER(dst_inode); + struct nfs42_clone_args args = { + .src_fh = NFS_FH(src_inode), + .dst_fh = NFS_FH(dst_inode), + .src_offset = src_offset, + .dst_offset = dst_offset, + .dst_bitmask = server->cache_consistency_bitmask, + }; + struct nfs42_clone_res res = { + .server = server, + }; + int status; + + msg->rpc_argp = &args; + msg->rpc_resp = &res; + + status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ); + if (status) + return status; + + status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE); + if (status) + return status; + + res.dst_fattr = nfs_alloc_fattr(); + if (!res.dst_fattr) + return -ENOMEM; + + status = nfs4_call_sync(server->client, server, msg, + &args.seq_args, &res.seq_res, 0); + if (status == 0) + status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); + + kfree(res.dst_fattr); + return status; +} + +int nfs42_proc_clone(struct file *src_f, struct file *dst_f, + loff_t src_offset, loff_t dst_offset, loff_t count) +{ + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE], + }; + struct inode *inode = file_inode(src_f); + struct nfs_server *server = NFS_SERVER(file_inode(src_f)); + struct nfs4_exception exception = { }; + int err; + + if (!nfs_server_capable(inode, NFS_CAP_CLONE)) + return -EOPNOTSUPP; + + do { + err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset, + dst_offset, count); + if (err == -ENOTSUPP || err == -EOPNOTSUPP) { + NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE; + return -EOPNOTSUPP; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + + return err; + +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5133bb18830e..9688b1a9787f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8729,7 +8729,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = { | NFS_CAP_ALLOCATE | NFS_CAP_DEALLOCATE | NFS_CAP_SEEK - | NFS_CAP_LAYOUTSTATS, + | NFS_CAP_LAYOUTSTATS + | NFS_CAP_CLONE, .init_client = nfs41_init_client, .shutdown_client = nfs41_shutdown_client, .match_stateid = nfs41_match_stateid, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 570a7df2775b..a50de1002b20 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -243,5 +243,6 @@ struct nfs_server { #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_LAYOUTSTATS (1U << 22) +#define NFS_CAP_CLONE (1U << 23) #endif -- cgit v1.2.3 From 2a92ee92d4545448066fb664674c0ae5a9d5ea99 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:37 +0800 Subject: nfs: get clone_blksize when probing fsinfo NFSv42 CLONE operation is supposed to respect it. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/nfs4xdr.c | 25 +++++++++++++++++++++++++ include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 6 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 57c5a02f6213..d6d5d2a48e83 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->time_delta = fsinfo->time_delta; + server->clone_blksize = fsinfo->clone_blksize; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9688b1a9787f..8814fbe62623 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -239,6 +239,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_FS_LAYOUT_TYPES, FATTR4_WORD2_LAYOUT_BLKSIZE + | FATTR4_WORD2_CLONE_BLKSIZE }; const u32 nfs4_fs_locations_bitmap[3] = { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 868472b6b303..9f656791a338 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4764,6 +4764,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap, return 0; } +/* + * The granularity of a CLONE operation. + */ +static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap, + uint32_t *res) +{ + __be32 *p; + + dprintk("%s: bitmap is %x\n", __func__, bitmap[2]); + *res = 0; + if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) { + print_overflow_msg(__func__, xdr); + return -EIO; + } + *res = be32_to_cpup(p); + bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE; + } + return 0; +} + static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) { unsigned int savep; @@ -4796,6 +4818,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) if (status != 0) goto xdr_error; status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize); + if (status) + goto xdr_error; + status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize); if (status) goto xdr_error; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c0c695b634d0..e7e78537aea2 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -422,6 +422,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) +#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) /* MDS threshold bitmap bits */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a50de1002b20..2469ab0bb3a1 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -147,6 +147,7 @@ struct nfs_server { unsigned int acdirmax; unsigned int namelen; unsigned int options; /* extra options enabled by mount */ + unsigned int clone_blksize; /* granularity of a CLONE operation */ #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac678b7a65ed..92ff445e60a0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -141,6 +141,7 @@ struct nfs_fsinfo { __u32 lease_time; /* in seconds */ __u32 layouttype; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ + __u32 clone_blksize; /* granularity of a CLONE operation */ }; struct nfs_fsstat { -- cgit v1.2.3 From 42e5c3e2725ba0c0affc1fc8a6aa1d5cf31ecb75 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:27:35 -0400 Subject: SUNRPC: Abstract backchannel operations xprt_{setup,destroy}_backchannel() won't be adequate for RPC/RMDA bi-direction. In particular, receive buffers have to be pre- registered and posted in order to receive incoming backchannel requests. Add a virtual function call to allow the insertion of appropriate backchannel setup and destruction methods for each transport. In addition, freeing a backchannel request is a little different for RPC/RDMA. Introduce an rpc_xprt_op to handle the difference. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/bc_xprt.h | 5 +++++ include/linux/sunrpc/xprt.h | 5 +++++ net/sunrpc/backchannel_rqst.c | 24 ++++++++++++++++++++++-- net/sunrpc/xprtsock.c | 5 +++++ 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 8df43c9f11dc..4397a4824c81 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -38,6 +38,11 @@ void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); +/* Socket backchannel transport methods */ +int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); +void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); +void xprt_free_bc_rqst(struct rpc_rqst *req); + /* * Determine if a shared backchannel is in use */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0fb9acbb4780..3f79c4a4ce74 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -136,6 +136,11 @@ struct rpc_xprt_ops { int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); void (*inject_disconnect)(struct rpc_xprt *xprt); + int (*bc_setup)(struct rpc_xprt *xprt, + unsigned int min_reqs); + void (*bc_free_rqst)(struct rpc_rqst *rqst); + void (*bc_destroy)(struct rpc_xprt *xprt, + unsigned int max_reqs); }; /* diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 6255d141133b..229956bf8457 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -137,6 +137,14 @@ out_free: * callback requests can be up to 4096 bytes in size. */ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) +{ + if (!xprt->ops->bc_setup) + return 0; + return xprt->ops->bc_setup(xprt, min_reqs); +} +EXPORT_SYMBOL_GPL(xprt_setup_backchannel); + +int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) { struct rpc_rqst *req; struct list_head tmp_list; @@ -192,7 +200,6 @@ out_free: dprintk("RPC: setup backchannel transport failed\n"); return -ENOMEM; } -EXPORT_SYMBOL_GPL(xprt_setup_backchannel); /** * xprt_destroy_backchannel - Destroys the backchannel preallocated structures. @@ -204,6 +211,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel); * of reqs specified by the caller. */ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) +{ + if (xprt->ops->bc_destroy) + xprt->ops->bc_destroy(xprt, max_reqs); +} +EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); + +void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs) { struct rpc_rqst *req = NULL, *tmp = NULL; @@ -227,7 +241,6 @@ out: dprintk("RPC: backchannel list empty= %s\n", list_empty(&xprt->bc_pa_list) ? "true" : "false"); } -EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid) { @@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req) { struct rpc_xprt *xprt = req->rq_xprt; + xprt->ops->bc_free_rqst(req); +} + +void xprt_free_bc_rqst(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + dprintk("RPC: free backchannel req=%p\n", req); req->rq_connect_cookie = xprt->connect_cookie - 1; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1a85e0ed0b48..44a81e4c6783 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2580,6 +2580,11 @@ static struct rpc_xprt_ops xs_tcp_ops = { .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, .inject_disconnect = xs_inject_disconnect, +#ifdef CONFIG_SUNRPC_BACKCHANNEL + .bc_setup = xprt_setup_bc, + .bc_free_rqst = xprt_free_bc_rqst, + .bc_destroy = xprt_destroy_bc, +#endif }; /* -- cgit v1.2.3 From 9468431962616c2449d47c482208a5967e011bf9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:28:16 -0400 Subject: svcrdma: Add backward direction service for RPC/RDMA transport On NFSv4.1 mount points, the Linux NFS client uses this transport endpoint to receive backward direction calls and route replies back to the NFSv4.1 server. Signed-off-by: Chuck Lever Acked-by: "J. Bruce Fields" Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/svc_rdma.h | 6 +++- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprtrdma/svc_rdma.c | 6 ++++ net/sunrpc/xprtrdma/svc_rdma_transport.c | 58 ++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ccc961f33e9..fb4013edcf57 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -228,9 +228,13 @@ extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); -extern struct svc_xprt_class svc_rdma_class; extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); +extern struct svc_xprt_class svc_rdma_class; +#ifdef CONFIG_SUNRPC_BACKCHANNEL +extern struct svc_xprt_class svc_rdma_bc_class; +#endif + /* svc_rdma.c */ extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3f79c4a4ce74..82c083946ef0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -158,6 +158,7 @@ enum xprt_transports { XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, }; diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 2cd252f023a5..1b7051bdbdc8 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -239,6 +239,9 @@ void svc_rdma_cleanup(void) unregister_sysctl_table(svcrdma_table_header); svcrdma_table_header = NULL; } +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + svc_unreg_xprt_class(&svc_rdma_bc_class); +#endif svc_unreg_xprt_class(&svc_rdma_class); kmem_cache_destroy(svc_rdma_map_cachep); kmem_cache_destroy(svc_rdma_ctxt_cachep); @@ -286,6 +289,9 @@ int svc_rdma_init(void) /* Register RDMA with the SVC transport switch */ svc_reg_xprt_class(&svc_rdma_class); +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + svc_reg_xprt_class(&svc_rdma_bc_class); +#endif return 0; err1: kmem_cache_destroy(svc_rdma_map_cachep); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index fcc3eb80c265..a133b1e5b5f6 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -56,6 +56,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int); static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct net *net, struct sockaddr *sa, int salen, @@ -95,6 +96,63 @@ struct svc_xprt_class svc_rdma_class = { .xcl_ident = XPRT_TRANSPORT_RDMA, }; +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *, + struct sockaddr *, int, int); +static void svc_rdma_bc_detach(struct svc_xprt *); +static void svc_rdma_bc_free(struct svc_xprt *); + +static struct svc_xprt_ops svc_rdma_bc_ops = { + .xpo_create = svc_rdma_bc_create, + .xpo_detach = svc_rdma_bc_detach, + .xpo_free = svc_rdma_bc_free, + .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr, + .xpo_secure_port = svc_rdma_secure_port, +}; + +struct svc_xprt_class svc_rdma_bc_class = { + .xcl_name = "rdma-bc", + .xcl_owner = THIS_MODULE, + .xcl_ops = &svc_rdma_bc_ops, + .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN) +}; + +static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv, + struct net *net, + struct sockaddr *sa, int salen, + int flags) +{ + struct svcxprt_rdma *cma_xprt; + struct svc_xprt *xprt; + + cma_xprt = rdma_create_xprt(serv, 0); + if (!cma_xprt) + return ERR_PTR(-ENOMEM); + xprt = &cma_xprt->sc_xprt; + + svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv); + serv->sv_bc_xprt = xprt; + + dprintk("svcrdma: %s(%p)\n", __func__, xprt); + return xprt; +} + +static void svc_rdma_bc_detach(struct svc_xprt *xprt) +{ + dprintk("svcrdma: %s(%p)\n", __func__, xprt); +} + +static void svc_rdma_bc_free(struct svc_xprt *xprt) +{ + struct svcxprt_rdma *rdma = + container_of(xprt, struct svcxprt_rdma, sc_xprt); + + dprintk("svcrdma: %s(%p)\n", __func__, xprt); + if (xprt) + kfree(rdma); +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) { struct svc_rdma_op_ctxt *ctxt; -- cgit v1.2.3 From 76566773a1f1c2295ed901b6f1241cfe10d99029 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:28:32 -0400 Subject: NFS: Enable client side NFSv4.1 backchannel to use other transports Forechannel transports get their own "bc_up" method to create an endpoint for the backchannel service. Signed-off-by: Chuck Lever [Anna Schumaker: Add forward declaration of struct net to xprt.h] Signed-off-by: Anna Schumaker --- fs/nfs/callback.c | 40 ++++++++------------------------------- include/linux/sunrpc/xprt.h | 3 +++ net/sunrpc/xprtrdma/backchannel.c | 21 ++++++++++++++++++++ net/sunrpc/xprtrdma/transport.c | 1 + net/sunrpc/xprtrdma/xprt_rdma.h | 1 + net/sunrpc/xprtsock.c | 12 ++++++++++++ 6 files changed, 46 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 75f7c0a7538a..a7f2e6e33305 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv) } #if defined(CONFIG_NFS_V4_1) -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - /* - * Create an svc_sock for the back channel service that shares the - * fore channel connection. - * Returns the input port (0) and sets the svc_serv bc_xprt on success - */ - return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, - SVC_SOCK_ANONYMOUS); -} - /* * The callback service for NFSv4.1 callbacks */ @@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, xprt->bc_serv = serv; } #else -static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net) -{ - return 0; -} - static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv, struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) { @@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc svc_shutdown_net(serv, net); } -static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net) +static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, + struct net *net, struct rpc_xprt *xprt) { struct nfs_net *nn = net_generic(net, nfs_net_id); int ret; @@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n goto err_bind; } - switch (minorversion) { - case 0: - ret = nfs4_callback_up_net(serv, net); - break; - case 1: - case 2: - ret = nfs41_callback_up_net(serv, net); - break; - default: - printk(KERN_ERR "NFS: unknown callback version: %d\n", - minorversion); - ret = -EINVAL; - break; - } + ret = -EPROTONOSUPPORT; + if (minorversion == 0) + ret = nfs4_callback_up_net(serv, net); + else if (xprt->ops->bc_up) + ret = xprt->ops->bc_up(serv, net); if (ret < 0) { printk(KERN_ERR "NFS: callback service start failed\n"); @@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt) goto err_create; } - ret = nfs_callback_up_net(minorversion, serv, net); + ret = nfs_callback_up_net(minorversion, serv, net, xprt); if (ret < 0) goto err_net; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 82c083946ef0..69ef5b3ab038 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -54,6 +54,8 @@ enum rpc_display_format_t { struct rpc_task; struct rpc_xprt; struct seq_file; +struct svc_serv; +struct net; /* * This describes a complete RPC request @@ -138,6 +140,7 @@ struct rpc_xprt_ops { void (*inject_disconnect)(struct rpc_xprt *xprt); int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); + int (*bc_up)(struct svc_serv *serv, struct net *net); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 0b3387fe3f0d..2dcb44f69e53 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "xprt_rdma.h" @@ -173,6 +174,26 @@ out_err: return -ENOMEM; } +/** + * xprt_rdma_bc_up - Create transport endpoint for backchannel service + * @serv: server endpoint + * @net: network namespace + * + * The "xprt" is an implied argument: it supplies the name of the + * backchannel transport class. + * + * Returns zero on success, negative errno on failure + */ +int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net) +{ + int ret; + + ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0); + if (ret < 0) + return ret; + return 0; +} + /** * rpcrdma_bc_marshal_reply - Send backwards direction reply * @rqst: buffer containing RPC reply data diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 845278e63be0..8c545f7d7525 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -708,6 +708,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .inject_disconnect = xprt_rdma_inject_disconnect, #if defined(CONFIG_SUNRPC_BACKCHANNEL) .bc_setup = xprt_rdma_bc_setup, + .bc_up = xprt_rdma_bc_up, .bc_free_rqst = xprt_rdma_bc_free_rqst, .bc_destroy = xprt_rdma_bc_destroy, #endif diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index eb87d96e80ca..f8dd17be9f43 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -520,6 +520,7 @@ void xprt_rdma_cleanup(void); */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); +int xprt_rdma_bc_up(struct svc_serv *, struct net *); int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); int rpcrdma_bc_marshal_reply(struct rpc_rqst *); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 44a81e4c6783..dc4706711224 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1306,6 +1306,17 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, xs_tcp_read_reply(xprt, desc) : xs_tcp_read_callback(xprt, desc); } + +static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net) +{ + int ret; + + ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0, + SVC_SOCK_ANONYMOUS); + if (ret < 0) + return ret; + return 0; +} #else static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) @@ -2582,6 +2593,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .inject_disconnect = xs_inject_disconnect, #ifdef CONFIG_SUNRPC_BACKCHANNEL .bc_setup = xprt_setup_bc, + .bc_up = xs_tcp_bc_up, .bc_free_rqst = xprt_free_bc_rqst, .bc_destroy = xprt_destroy_bc, #endif -- cgit v1.2.3 From 8fbcf237439f841e7e9c4675790e08ea1c295bd3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 Nov 2015 18:25:34 +0100 Subject: nfs: Remove unused xdr page offsets in getacl/setacl arguments The arguments passed around for getacl and setacl xdr encoding, struct nfs_setaclargs and struct nfs_getaclargs, both contain an array of pages, an offset into the first page, and the length of the page data. The offset is unused as it is always zero; remove it. Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 ++--- fs/nfs/nfs4xdr.c | 4 ++-- include/linux/nfs_xdr.h | 2 -- 3 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 87a081c6299d..7ed8f2cd97f8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4603,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server) #define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE) static int buf_to_pages_noslab(const void *buf, size_t buflen, - struct page **pages, unsigned int *pgbase) + struct page **pages) { struct page *newpage, **spages; int rc = 0; @@ -4747,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu goto out_free; args.acl_len = npages * PAGE_SIZE; - args.acl_pgbase = 0; dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); @@ -4839,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) return -ERANGE; - i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + i = buf_to_pages_noslab(buf, buflen, arg.acl_pages); if (i < 0) return i; nfs4_inode_return_delegation(inode); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 9f656791a338..22a1ddd4fe96 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun *p = cpu_to_be32(FATTR4_WORD0_ACL); p = reserve_space(xdr, 4); *p = cpu_to_be32(arg->acl_len); - xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len); } static void @@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, - args->acl_pages, args->acl_pgbase, args->acl_len); + args->acl_pages, 0, args->acl_len); encode_nops(&hdr); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4728e7e5fc49..570d630f98ae 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -705,7 +705,6 @@ struct nfs_setaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; @@ -717,7 +716,6 @@ struct nfs_getaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; -- cgit v1.2.3