From 45a52a02072b2a7e265f024cfdb00127e08dd9f2 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:08 +0000 Subject: NFS move nfs_client initialization into nfs_get_client Now nfs_get_client returns an nfs_client ready to be used no matter if it was found or created. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cf9fdbdabc67..4d7b3a97e522 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -215,6 +215,10 @@ extern struct rpc_procinfo nfs4_procedures[]; /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); +extern int nfs_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, rpc_authflavor_t authflavour, + int noresvport); /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, @@ -274,6 +278,11 @@ extern int nfs_migrate_page(struct address_space *, #endif /* nfs4proc.c */ +extern int nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour, + int noresvport); extern int _nfs4_call_sync(struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, -- cgit v1.2.3 From d83217c13531fd59730d77b5c2284e90e56c0a50 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:17 +0000 Subject: NFSv4.1: data server connection Introduce a data server set_client and init session following the nfs4_set_client and nfs4_init_session convention. Once a new nfs_client is on the nfs_client_list, the nfs_client cl_cons_state serializes access to creating an nfs_client struct with matching properties. Use the new nfs_get_client() that initializes new clients. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 41 +++++++++++++++++++++++++++++++ fs/nfs/internal.h | 5 ++++ fs/nfs/nfs4_fs.h | 12 +++++++++ fs/nfs/nfs4filelayoutdev.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 29 ++++++++++++++++++++-- include/linux/nfs_xdr.h | 1 + 6 files changed, 147 insertions(+), 2 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d5c5bdfa4231..6dd50ac5b545 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1417,6 +1417,47 @@ error: return error; } +/* + * Set up a pNFS Data Server client. + * + * Return any existing nfs_client that matches server address,port,version + * and minorversion. + * + * For a new nfs_client, use a soft mount (default), a low retrans and a + * low timeout interval so that if a connection is lost, we retry through + * the MDS. + */ +struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, + const struct sockaddr *ds_addr, + int ds_addrlen, int ds_proto) +{ + struct nfs_client_initdata cl_init = { + .addr = ds_addr, + .addrlen = ds_addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = ds_proto, + .minorversion = mds_clp->cl_minorversion, + }; + struct rpc_timeout ds_timeout = { + .to_initval = 15 * HZ, + .to_maxval = 15 * HZ, + .to_retries = 1, + .to_exponential = 1, + }; + struct nfs_client *clp; + + /* + * Set an authflavor equual to the MDS value. Use the MDS nfs_client + * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS + * (section 13.1 RFC 5661). + */ + clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, + mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); + + dprintk("<-- %s %p\n", __func__, clp); + return clp; +} +EXPORT_SYMBOL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4d7b3a97e522..5cc92014259e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -148,6 +148,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fattr *); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern int nfs4_check_client_ready(struct nfs_client *clp); +extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, + const struct sockaddr *ds_addr, + int ds_addrlen, int ds_proto); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); @@ -213,6 +216,8 @@ extern const u32 nfs41_maxwrite_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif +extern int nfs4_init_ds_session(struct nfs_client *clp); + /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern int nfs_init_client(struct nfs_client *clp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d4cfacc40003..7058a9f75e7f 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -266,6 +266,12 @@ is_ds_only_client(struct nfs_client *clp) return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) == EXCHGID4_FLAG_USE_PNFS_DS; } + +static inline bool +is_ds_client(struct nfs_client *clp) +{ + return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS; +} #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) { @@ -289,6 +295,12 @@ is_ds_only_client(struct nfs_client *clp) { return false; } + +static inline bool +is_ds_client(struct nfs_client *clp) +{ + return false; +} #endif /* CONFIG_NFS_V4_1 */ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index b73c34375f60..8bc91fb8b6fa 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -104,6 +104,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port) return NULL; } +/* + * Create an rpc connection to the nfs4_pnfs_ds data server + * Currently only support IPv4 + */ +static int +nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) +{ + struct nfs_client *clp; + struct sockaddr_in sin; + int status = 0; + + dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = ds->ds_ip_addr; + sin.sin_port = ds->ds_port; + + clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, + sizeof(sin), IPPROTO_TCP); + if (IS_ERR(clp)) { + status = PTR_ERR(clp); + goto out; + } + + if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) { + if (!is_ds_client(clp)) { + status = -ENODEV; + goto out_put; + } + ds->ds_clp = clp; + dprintk("%s [existing] ip=%x, port=%hu\n", __func__, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + goto out; + } + + /* + * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to + * be equal to the MDS lease. Renewal is scheduled in create_session. + */ + spin_lock(&mds_srv->nfs_client->cl_lock); + clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time; + spin_unlock(&mds_srv->nfs_client->cl_lock); + clp->cl_last_renewal = jiffies; + + /* New nfs_client */ + status = nfs4_init_ds_session(clp); + if (status) + goto out_put; + + ds->ds_clp = clp; + dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), + ntohs(ds->ds_port)); +out: + return status; +out_put: + nfs_put_client(clp); + goto out; +} + static void destroy_ds(struct nfs4_pnfs_ds *ds) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55a8fc2f3df4..07d1a43f40f5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1573,9 +1573,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int nfs4_recover_expired_lease(struct nfs_server *server) +static int nfs4_client_recover_expired_lease(struct nfs_client *clp) { - struct nfs_client *clp = server->nfs_client; unsigned int loop; int ret; @@ -1592,6 +1591,11 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) return ret; } +static int nfs4_recover_expired_lease(struct nfs_server *server) +{ + return nfs4_client_recover_expired_lease(server->nfs_client); +} + /* * OPEN_EXPIRED: * reclaim state on the server after a network partition. @@ -5118,6 +5122,27 @@ int nfs4_init_session(struct nfs_server *server) return ret; } +int nfs4_init_ds_session(struct nfs_client *clp) +{ + struct nfs4_session *session = clp->cl_session; + int ret; + + if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) + return 0; + + ret = nfs4_client_recover_expired_lease(clp); + if (!ret) + /* Test for the DS role */ + if (!is_ds_client(clp)) + ret = -ENODEV; + if (!ret) + ret = nfs4_check_client_ready(clp); + return ret; + +} +EXPORT_SYMBOL_GPL(nfs4_init_ds_session); + + /* * Renew the cl_session lease. */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9d2b9dae277d..c66ff7fe1b6b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1018,6 +1018,7 @@ struct nfs_read_data { struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ struct pnfs_layout_segment *lseg; + struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From dc70d7b3189597f313df7bd2da849cfc39063b15 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:19 +0000 Subject: NFSv4.1: filelayout read Attempt a pNFS file layout read by setting up the nfs_read_data struct and calling nfs_initiate_read with the data server rpc client and the filelayout rpc call ops. Error handling is implemented in a subsequent patch. Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Mingyang Guo Signed-off-by: Oleg Drokin Signed-off-by: Ricardo Labiaga Tested-by: Guo Mingyang Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs4_fs.h | 3 ++ fs/nfs/nfs4filelayout.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 3 +- fs/nfs/read.c | 3 +- include/linux/nfs_xdr.h | 1 + 6 files changed, 92 insertions(+), 2 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5cc92014259e..5e9df992cd73 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -271,6 +271,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif /* read.c */ +extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7058a9f75e7f..c64be1cff080 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -252,6 +252,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply, struct rpc_task *task); +extern int nfs41_setup_sequence(struct nfs4_session *session, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task); extern void nfs4_destroy_session(struct nfs4_session *session); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index ed833705dcee..3608411653dc 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -100,6 +100,87 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } +/* + * Call ops for the async read/write cases + * In the case of dense layouts, the offset needs to be reset to its + * original value. + */ +static void filelayout_read_prepare(struct rpc_task *task, void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, + &rdata->args.seq_args, &rdata->res.seq_res, + 0, task)) + return; + + rpc_call_start(task); +} + +static void filelayout_read_call_done(struct rpc_task *task, void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + + /* Note this may cause RPC to be resent */ + rdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_read_release(void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + rdata->mds_ops->rpc_release(data); +} + +struct rpc_call_ops filelayout_read_call_ops = { + .rpc_call_prepare = filelayout_read_prepare, + .rpc_call_done = filelayout_read_call_done, + .rpc_release = filelayout_read_release, +}; + +static enum pnfs_try_status +filelayout_read_pagelist(struct nfs_read_data *data) +{ + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + loff_t offset = data->args.offset; + u32 j, idx; + struct nfs_fh *fh; + int status; + + dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + __func__, data->inode->i_ino, + data->args.pgbase, (size_t)data->args.count, offset); + + /* Retrieve the correct rpc_client for the byte range */ + j = nfs4_fl_calc_j_index(lseg, offset); + idx = nfs4_fl_calc_ds_index(lseg, j); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) { + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + return PNFS_NOT_ATTEMPTED; + } + dprintk("%s USE DS:ip %x %hu\n", __func__, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + + /* No multipath support. Use first DS */ + data->ds_clp = ds->ds_clp; + fh = nfs4_fl_select_ds_fh(lseg, j); + if (fh) + data->args.fh = fh; + + data->args.offset = filelayout_get_dserver_offset(lseg, offset); + data->mds_offset = offset; + + /* Perform an asynchronous read to ds */ + status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, + &filelayout_read_call_ops); + BUG_ON(status != 0); + return PNFS_ATTEMPTED; +} + /* * filelayout_check_layout() * @@ -320,6 +401,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, .pg_test = filelayout_pg_test, + .read_pagelist = filelayout_read_pagelist, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 07d1a43f40f5..d09623933302 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -505,7 +505,7 @@ out: return ret_id; } -static int nfs41_setup_sequence(struct nfs4_session *session, +int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply, @@ -571,6 +571,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, res->sr_status = 1; return 0; } +EXPORT_SYMBOL_GPL(nfs41_setup_sequence); int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4127a1c0eec6..f4d0fcffcb5a 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -157,7 +157,7 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } -static int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, +int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops) { struct inode *inode = data->inode; @@ -195,6 +195,7 @@ static int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, rpc_put_task(task); return 0; } +EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c66ff7fe1b6b..b63faef5052e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1020,6 +1020,7 @@ struct nfs_read_data { struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; + __u64 mds_offset; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From cbdabc7f8bf14ca1d40ab1cb86f64b3bc09716e8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:20 +0000 Subject: NFSv4.1: filelayout async error handler Use our own async error handler. Mark the layout as failed and retry i/o through the MDS on specified errors. Update the mds_offset in nfs_readpage_retry so that a failed short-read retry to a DS gets correctly resent through the MDS. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/nfs4filelayout.c | 81 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 35 ++++++++++++++++---- fs/nfs/nfs4state.c | 1 + fs/nfs/read.c | 1 + include/linux/nfs_xdr.h | 1 + include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 8 +++++ 8 files changed, 123 insertions(+), 6 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5e9df992cd73..1a3228e9ea22 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -285,6 +285,7 @@ extern int nfs_migrate_page(struct address_space *, #endif /* nfs4proc.c */ +extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); extern int nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 3608411653dc..6a424c19abea 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -40,6 +40,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dean Hildebrand "); MODULE_DESCRIPTION("The NFSv4 file layout driver"); +#define FILELAYOUT_POLL_RETRY_MAX (15*HZ) + static int filelayout_set_layoutdriver(struct nfs_server *nfss) { @@ -100,6 +102,83 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } +/* For data server errors we don't recover from */ +static void +filelayout_set_lo_fail(struct pnfs_layout_segment *lseg) +{ + if (lseg->pls_range.iomode == IOMODE_RW) { + dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + } else { + dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + } +} + +static int filelayout_async_handle_error(struct rpc_task *task, + struct nfs4_state *state, + struct nfs_client *clp, + int *reset) +{ + if (task->tk_status >= 0) + return 0; + + *reset = 0; + + switch (task->tk_status) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session. Exchangeid " + "flags 0x%x\n", __func__, task->tk_status, + clp->cl_exchange_flags); + nfs4_schedule_session_recovery(clp->cl_session); + break; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + case -EKEYEXPIRED: + rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); + break; + default: + dprintk("%s DS error. Retry through MDS %d\n", __func__, + task->tk_status); + *reset = 1; + break; + } + task->tk_status = 0; + return -EAGAIN; +} + +/* NFS_PROTO call done callback routines */ + +static int filelayout_read_done_cb(struct rpc_task *task, + struct nfs_read_data *data) +{ + struct nfs_client *clp = data->ds_clp; + int reset = 0; + + dprintk("%s DS read\n", __func__); + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + filelayout_set_lo_fail(data->lseg); + nfs4_reset_read(task, data); + clp = NFS_SERVER(data->inode)->nfs_client; + } + nfs_restart_rpc(task, clp); + return -EAGAIN; + } + + return 0; +} + /* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its @@ -109,6 +188,8 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; + rdata->read_done_cb = filelayout_read_done_cb; + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, 0, task)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d09623933302..1dc809039448 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3074,15 +3074,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); - dprintk("--> %s\n", __func__); - - if (!nfs4_sequence_done(task, &data->res.seq_res)) - return -EAGAIN; - if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { nfs_restart_rpc(task, server->nfs_client); return -EAGAIN; @@ -3094,12 +3089,40 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +{ + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return -EAGAIN; + + return data->read_done_cb(task, data); +} + static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) { data->timestamp = jiffies; + data->read_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } +/* Reset the the nfs_read_data to send the read to the MDS. */ +void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) +{ + dprintk("%s Reset task for i/o through\n", __func__); + put_lseg(data->lseg); + data->lseg = NULL; + /* offsets will differ in the dense stripe case */ + data->args.offset = data->mds_offset; + data->ds_clp = NULL; + data->args.fh = NFS_FH(data->inode); + data->read_done_cb = nfs4_read_done_cb; + task->tk_ops = data->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); +} +EXPORT_SYMBOL_GPL(nfs4_reset_read); + static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 69c836373124..ab1bf5bb021f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1453,6 +1453,7 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session) { nfs4_schedule_lease_recovery(session->clp); } +EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); void nfs41_handle_recall_slot(struct nfs_client *clp) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f4d0fcffcb5a..f40c7f4dc16b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -391,6 +391,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data return; /* Yes, so retry the read at the end of the data */ + data->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b63faef5052e..eb0e87084353 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1020,6 +1020,7 @@ struct nfs_read_data { struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; + int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; struct page *page_array[NFS_PAGEVEC_SIZE]; }; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ef9476a36ff7..db7bcaf7c5bd 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -129,6 +129,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, u32); +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8b5a6b40d37c..edaf56e2ed29 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -597,6 +597,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) } } +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + rpc_task_release_client(task); + rpc_task_set_client(task, clnt); +} +EXPORT_SYMBOL_GPL(rpc_task_reset_client); + + static void rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) { -- cgit v1.2.3 From a69aef1496726ed88386dad65abfcc8cd3195304 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:47 +0000 Subject: NFSv4.1: pnfs filelayout driver write Allows the pnfs filelayout driver to write to the data servers. Note that COMMIT to data servers will be implemented in a future patch. To avoid improper behavior, for the moment any WRITE to a data server that would also require a COMMIT to the data server is sent NFS_FILE_SYNC. Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Mingyang Guo Signed-off-by: Oleg Drokin Signed-off-by: Ricardo Labiaga Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 5 +++ fs/nfs/nfs4filelayout.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/nfs4proc.c | 17 ++++++++ fs/nfs/write.c | 5 ++- include/linux/nfs_xdr.h | 2 + 5 files changed, 128 insertions(+), 2 deletions(-) (limited to 'fs/nfs/internal.h') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1a3228e9ea22..d1ddc23c404d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -276,6 +276,10 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ +extern int nfs_initiate_write(struct nfs_write_data *data, + struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops, + int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, @@ -291,6 +295,7 @@ extern int nfs4_init_client(struct nfs_client *clp, const char *ip_addr, rpc_authflavor_t authflavour, int noresvport); +extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); extern int _nfs4_call_sync(struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9d21bfeec88f..7e1d4571b7b2 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -189,12 +189,69 @@ static void filelayout_read_release(void *data) rdata->mds_ops->rpc_release(data); } +static int filelayout_write_done_cb(struct rpc_task *task, + struct nfs_write_data *data) +{ + int reset = 0; + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + struct nfs_client *clp; + + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + filelayout_set_lo_fail(data->lseg); + nfs4_reset_write(task, data); + clp = NFS_SERVER(data->inode)->nfs_client; + } else + clp = data->ds_clp; + nfs_restart_rpc(task, clp); + return -EAGAIN; + } + + return 0; +} + +static void filelayout_write_prepare(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, &wdata->res.seq_res, + 0, task)) + return; + + rpc_call_start(task); +} + +static void filelayout_write_call_done(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + /* Note this may cause RPC to be resent */ + wdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_write_release(void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + wdata->mds_ops->rpc_release(data); +} + struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, .rpc_release = filelayout_read_release, }; +struct rpc_call_ops filelayout_write_call_ops = { + .rpc_call_prepare = filelayout_write_prepare, + .rpc_call_done = filelayout_write_call_done, + .rpc_release = filelayout_write_release, +}; + static enum pnfs_try_status filelayout_read_pagelist(struct nfs_read_data *data) { @@ -238,10 +295,52 @@ filelayout_read_pagelist(struct nfs_read_data *data) return PNFS_ATTEMPTED; } +/* Perform async writes. */ static enum pnfs_try_status filelayout_write_pagelist(struct nfs_write_data *data, int sync) { - return PNFS_NOT_ATTEMPTED; + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + loff_t offset = data->args.offset; + u32 j, idx; + struct nfs_fh *fh; + int status; + + /* Retrieve the correct rpc_client for the byte range */ + j = nfs4_fl_calc_j_index(lseg, offset); + idx = nfs4_fl_calc_ds_index(lseg, j); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) { + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + return PNFS_NOT_ATTEMPTED; + } + dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, + data->inode->i_ino, sync, (size_t) data->args.count, offset, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + + /* We can't handle commit to ds yet */ + if (!FILELAYOUT_LSEG(lseg)->commit_through_mds) + data->args.stable = NFS_FILE_SYNC; + + data->write_done_cb = filelayout_write_done_cb; + data->ds_clp = ds->ds_clp; + fh = nfs4_fl_select_ds_fh(lseg, j); + if (fh) + data->args.fh = fh; + /* + * Get the file offset on the dserver. Set the write offset to + * this offset and save the original offset. + */ + data->args.offset = filelayout_get_dserver_offset(lseg, offset); + data->mds_offset = offset; + + /* Perform an asynchronous write */ + status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, + &filelayout_write_call_ops, sync); + BUG_ON(status != 0); + return PNFS_ATTEMPTED; } /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index da902123ec53..7b4b9f3e9842 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3145,6 +3145,23 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) return data->write_done_cb(task, data); } +/* Reset the the nfs_write_data to send the write to the MDS. */ +void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) +{ + dprintk("%s Reset task for i/o through\n", __func__); + put_lseg(data->lseg); + data->lseg = NULL; + data->ds_clp = NULL; + data->write_done_cb = nfs4_write_done_cb; + data->args.fh = NFS_FH(data->inode); + data->args.bitmask = data->res.server->cache_consistency_bitmask; + data->args.offset = data->mds_offset; + data->res.fattr = &data->fattr; + task->tk_ops = data->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); +} +EXPORT_SYMBOL_GPL(nfs4_reset_write); + static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->inode); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index df99c5b0ee65..ee62ddf60e7a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -783,7 +783,7 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -static int nfs_initiate_write(struct nfs_write_data *data, +int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how) @@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data, out: return ret; } +EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. @@ -1194,6 +1195,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ static unsigned long complain; + /* Note this will print the MDS for a DS write */ if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", @@ -1214,6 +1216,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ + data->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c82ad33cffe1..3440f5ab0f54 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1039,11 +1039,13 @@ struct nfs_write_data { struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ struct pnfs_layout_segment *lseg; + struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif + __u64 mds_offset; /* Filelayout dense stripe */ struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3