From 38511722446993d926861696194c39ef135d85a4 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Feb 2011 18:28:50 +0000 Subject: pnfs: avoid incorrect use of layout stateid The code could violate the following from RFC5661, section 12.5.3: "Once a client has no more layouts on a file, the layout stateid is no longer valid and MUST NOT be used." This can occur when a layout already has a lseg, starts another non-everlapping LAYOUTGET, and a CB_LAYOUTRECALL for the existing lseg is processed before we hit pnfs_layout_process(). Solve by setting, each time the client has no more lsegs for a file, a flag which blocks further use of the layout and triggers its removal. This also fixes a second bug which occurs in the same instance as above. If we actually use pnfs_layout_process, we add the new lseg to the layout, but the layout has been removed from the nfs_client list by the intervening CB_LAYOUTRECALL and will not be added back. Thus the newly acquired lseg will not be properly returned in the event of a subsequent CB_LAYOUTRECALL. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1b1bc1a0fb0a..c8d9b2148cb1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -255,6 +255,9 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, list_del_init(&lseg->pls_layout->plh_layouts); spin_unlock(&clp->cl_lock); clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); + set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); + /* Matched by initial refcount set in alloc_init_layout_hdr */ + put_layout_hdr_locked(lseg->pls_layout); } rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); list_add(&lseg->pls_list, tmp_list); @@ -299,6 +302,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, dprintk("%s:Begin lo %p\n", __func__, lo); + if (list_empty(&lo->plh_segs)) { + if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) + put_layout_hdr_locked(lo); + return 0; + } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) if (should_free_lseg(lseg->pls_range.iomode, iomode)) { dprintk("%s: freeing lseg %p iomode %d " @@ -332,10 +340,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) spin_lock(&nfsi->vfs_inode.i_lock); lo = nfsi->layout; if (lo) { - set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); + lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); - /* Matched by refcount set to 1 in alloc_init_layout_hdr */ - put_layout_hdr_locked(lo); } spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); @@ -403,6 +409,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) return true; return lo->plh_block_lgets || + test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) || test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || (list_empty(&lo->plh_segs) && (atomic_read(&lo->plh_outstanding) > lget)); -- cgit v1.2.3 From 9f52c2525e09854ed6aa4cbd83915a56226d86c1 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Feb 2011 18:28:51 +0000 Subject: pnfs: do not need to clear NFS_LAYOUT_BULK_RECALL flag We do not need to clear the NFS_LAYOUT_BULK_RECALL, as setting it guarantees that NFS_LAYOUT_DESTROYED will be set once any outstanding io is finished. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c8d9b2148cb1..c17edfbbaebf 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -254,7 +254,6 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, /* List does not take a reference, so no need for put here */ list_del_init(&lseg->pls_layout->plh_layouts); spin_unlock(&clp->cl_lock); - clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags); set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); /* Matched by initial refcount set in alloc_init_layout_hdr */ put_layout_hdr_locked(lseg->pls_layout); @@ -754,7 +753,6 @@ pnfs_update_layout(struct inode *ino, spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); spin_unlock(&clp->cl_lock); - clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); } spin_unlock(&ino->i_lock); } -- cgit v1.2.3 From f49f9baac8f63de9cbc17a0a84e04060496e8e76 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Feb 2011 18:28:52 +0000 Subject: pnfs: fix pnfs lock inversion of i_lock and cl_lock The pnfs code was using throughout the lock order i_lock, cl_lock. This conflicts with the nfs delegation code. Rework the pnfs code to avoid taking both locks simultaneously. Currently the code takes the double lock to add/remove the layout to a nfs_client list, while atomically checking that the list of lsegs is empty. To avoid this, we rely on existing serializations. When a layout is initialized with lseg count equal zero, LAYOUTGET's openstateid serialization is in effect, making it safe to assume it stays zero unless we change it. And once a layout's lseg count drops to zero, it is set as DESTROYED and so will stay at zero. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 2 +- fs/nfs/pnfs.c | 42 +++++++++++++++++++++++++----------------- 2 files changed, 26 insertions(+), 18 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 89587573fe50..2f41dccea18e 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, rv = NFS4ERR_DELAY; list_del_init(&lo->plh_bulk_recall); spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&free_me_list); put_layout_hdr(lo); iput(ino); } - pnfs_free_lseg_list(&free_me_list); return rv; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c17edfbbaebf..0f5b66f90d17 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -247,13 +247,6 @@ put_lseg_locked(struct pnfs_layout_segment *lseg, BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del(&lseg->pls_list); if (list_empty(&lseg->pls_layout->plh_segs)) { - struct nfs_client *clp; - - clp = NFS_SERVER(ino)->nfs_client; - spin_lock(&clp->cl_lock); - /* List does not take a reference, so no need for put here */ - list_del_init(&lseg->pls_layout->plh_layouts); - spin_unlock(&clp->cl_lock); set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); /* Matched by initial refcount set in alloc_init_layout_hdr */ put_layout_hdr_locked(lseg->pls_layout); @@ -319,11 +312,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return invalid - removed; } +/* note free_me must contain lsegs from a single layout_hdr */ void pnfs_free_lseg_list(struct list_head *free_me) { struct pnfs_layout_segment *lseg, *tmp; + struct pnfs_layout_hdr *lo; + + if (list_empty(free_me)) + return; + lo = list_first_entry(free_me, struct pnfs_layout_segment, + pls_list)->pls_layout; + + if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) { + struct nfs_client *clp; + + clp = NFS_SERVER(lo->plh_inode)->nfs_client; + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); + } list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { list_del(&lseg->pls_list); free_lseg(lseg); @@ -705,6 +714,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; + bool first = false; if (!pnfs_enabled_sb(NFS_SERVER(ino))) return NULL; @@ -735,7 +745,10 @@ pnfs_update_layout(struct inode *ino, atomic_inc(&lo->plh_outstanding); get_layout_hdr(lo); - if (list_empty(&lo->plh_segs)) { + if (list_empty(&lo->plh_segs)) + first = true; + spin_unlock(&ino->i_lock); + if (first) { /* The lo must be on the clp list if there is any * chance of a CB_LAYOUTRECALL(FILE) coming in. */ @@ -744,17 +757,12 @@ pnfs_update_layout(struct inode *ino, list_add_tail(&lo->plh_layouts, &clp->cl_layouts); spin_unlock(&clp->cl_lock); } - spin_unlock(&ino->i_lock); lseg = send_layoutget(lo, ctx, iomode); - if (!lseg) { - spin_lock(&ino->i_lock); - if (list_empty(&lo->plh_segs)) { - spin_lock(&clp->cl_lock); - list_del_init(&lo->plh_layouts); - spin_unlock(&clp->cl_lock); - } - spin_unlock(&ino->i_lock); + if (!lseg && first) { + spin_lock(&clp->cl_lock); + list_del_init(&lo->plh_layouts); + spin_unlock(&clp->cl_lock); } atomic_dec(&lo->plh_outstanding); put_layout_hdr(lo); -- cgit v1.2.3 From 83762c56c1ba7c5b4b92fb32d570661633228bc6 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 11 Feb 2011 15:42:37 +0000 Subject: NFS: remove pointless if statement in nfs_direct_write_result The code was doing nothing more in either branch of the if. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9943a75bb6d1..f493bdd74f78 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -649,8 +649,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata) { struct nfs_write_data *data = calldata; - if (nfs_writeback_done(task, data) != 0) - return; + nfs_writeback_done(task, data); } /* -- cgit v1.2.3 From 136028967a283929c6f01518d0700b73fa622d56 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Fri, 11 Feb 2011 15:42:38 +0000 Subject: NFS: change nfs_writeback_done to return void The return values are not used by any callers. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 8 ++++---- include/linux/nfs_fs.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 42b92d7a9cc4..ae528b98b804 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1132,7 +1132,7 @@ static const struct rpc_call_ops nfs_write_full_ops = { /* * This function is called when the WRITE call is complete. */ -int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) +void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) { struct nfs_writeargs *argp = &data->args; struct nfs_writeres *resp = &data->res; @@ -1151,7 +1151,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ status = NFS_PROTO(data->inode)->write_done(task, data); if (status != 0) - return status; + return; nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -1196,7 +1196,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) argp->stable = NFS_FILE_SYNC; } nfs_restart_rpc(task, server->nfs_client); - return -EAGAIN; + return; } if (time_before(complain, jiffies)) { printk(KERN_WARNING @@ -1207,7 +1207,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } - return 0; + return; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 6023efa9f5d9..f88522b10a38 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -501,7 +501,7 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc); extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); -extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); +extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); /* * Try to write back everything synchronously (but check the -- cgit v1.2.3 From bf9c1387ca80deac792c9ecf1c64dfcc5d1cc768 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:07 +0000 Subject: NFSv4.1: put_layout_hdr can remove nfsi->layout Prevents an Oops triggered by CB_LAYOUTRECALL and LAYOUTGET race on a pnfs_layout_hdr first pnfs_layout_segment. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0f5b66f90d17..7d031cd7d920 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -768,7 +768,7 @@ pnfs_update_layout(struct inode *ino, put_layout_hdr(lo); out: dprintk("%s end, state 0x%lx lseg %p\n", __func__, - nfsi->layout->plh_flags, lseg); + nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); return lseg; out_unlock: spin_unlock(&ino->i_lock); -- cgit v1.2.3 From 45a52a02072b2a7e265f024cfdb00127e08dd9f2 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:08 +0000 Subject: NFS move nfs_client initialization into nfs_get_client Now nfs_get_client returns an nfs_client ready to be used no matter if it was found or created. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 56 ++++++++++++++++++++++++++----------------------- fs/nfs/internal.h | 9 ++++++++ fs/nfs/nfs3proc.c | 1 + fs/nfs/nfs4proc.c | 1 + fs/nfs/proc.c | 1 + include/linux/nfs_xdr.h | 3 +++ 6 files changed, 45 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index bd3ca32879e7..b9ed2a8bc26a 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -481,7 +481,12 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat * Look up a client by IP address and protocol version * - creates a new record if one doesn't yet exist */ -static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) +static struct nfs_client * +nfs_get_client(const struct nfs_client_initdata *cl_init, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour, + int noresvport) { struct nfs_client *clp, *new = NULL; int error; @@ -512,6 +517,13 @@ install_client: clp = new; list_add(&clp->cl_share_link, &nfs_client_list); spin_unlock(&nfs_client_lock); + + error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr, + authflavour, noresvport); + if (error < 0) { + nfs_put_client(clp); + return ERR_PTR(error); + } dprintk("--> nfs_get_client() = %p [new]\n", clp); return clp; @@ -767,9 +779,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, /* * Initialise an NFS2 or NFS3 client */ -static int nfs_init_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - const struct nfs_parsed_mount_data *data) +int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, + const char *ip_addr, rpc_authflavor_t authflavour, + int noresvport) { int error; @@ -784,7 +796,7 @@ static int nfs_init_client(struct nfs_client *clp, * - RFC 2623, sec 2.3.2 */ error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, - 0, data->flags & NFS_MOUNT_NORESVPORT); + 0, noresvport); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -820,19 +832,17 @@ static int nfs_init_server(struct nfs_server *server, cl_init.rpc_ops = &nfs_v3_clientops; #endif + nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, + data->timeo, data->retrans); + /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init); + clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX, + data->flags & NFS_MOUNT_NORESVPORT); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); } - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); - error = nfs_init_client(clp, &timeparms, data); - if (error < 0) - goto error; - server->nfs_client = clp; /* Initialise the client representation from the mount data */ @@ -1307,11 +1317,11 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp) /* * Initialise an NFS4 client record */ -static int nfs4_init_client(struct nfs_client *clp, - const struct rpc_timeout *timeparms, - const char *ip_addr, - rpc_authflavor_t authflavour, - int flags) +int nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour, + int noresvport) { int error; @@ -1325,7 +1335,7 @@ static int nfs4_init_client(struct nfs_client *clp, clp->rpc_ops = &nfs_v4_clientops; error = nfs_create_rpc_client(clp, timeparms, authflavour, - 1, flags & NFS_MOUNT_NORESVPORT); + 1, noresvport); if (error < 0) goto error; strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); @@ -1378,22 +1388,16 @@ static int nfs4_set_client(struct nfs_server *server, dprintk("--> nfs4_set_client()\n"); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(&cl_init); + clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour, + server->flags & NFS_MOUNT_NORESVPORT); if (IS_ERR(clp)) { error = PTR_ERR(clp); goto error; } - error = nfs4_init_client(clp, timeparms, ip_addr, authflavour, - server->flags); - if (error < 0) - goto error_put; server->nfs_client = clp; dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); return 0; - -error_put: - nfs_put_client(clp); error: dprintk("<-- nfs4_set_client() = xerror %d\n", error); return error; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index cf9fdbdabc67..4d7b3a97e522 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -215,6 +215,10 @@ extern struct rpc_procinfo nfs4_procedures[]; /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); +extern int nfs_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, rpc_authflavor_t authflavour, + int noresvport); /* dir.c */ extern int nfs_access_cache_shrinker(struct shrinker *shrink, @@ -274,6 +278,11 @@ extern int nfs_migrate_page(struct address_space *, #endif /* nfs4proc.c */ +extern int nfs4_init_client(struct nfs_client *clp, + const struct rpc_timeout *timeparms, + const char *ip_addr, + rpc_authflavor_t authflavour, + int noresvport); extern int _nfs4_call_sync(struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index ce939c062a52..d0c80d8b3f96 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -885,4 +885,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .lock = nfs3_proc_lock, .clear_acl_cache = nfs3_forget_cached_acls, .close_context = nfs_close_context, + .init_client = nfs_init_client, }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0a07e353a961..55a8fc2f3df4 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5648,6 +5648,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .clear_acl_cache = nfs4_zap_acl_attr, .close_context = nfs4_close_context, .open_context = nfs4_atomic_open, + .init_client = nfs4_init_client, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 77d5e21c4ad6..b8ec170f2a0f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -741,4 +741,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .lock = nfs_proc_lock, .lock_check_bounds = nfs_lock_check_bounds, .close_context = nfs_close_context, + .init_client = nfs_init_client, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 51bfadbe24e2..d159fe733381 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1040,6 +1040,7 @@ struct nfs_write_data { }; struct nfs_access_entry; +struct nfs_client; /* * RPC procedure vector for NFSv2/NFSv3 demuxing @@ -1104,6 +1105,8 @@ struct nfs_rpc_ops { struct nfs_open_context *ctx, int open_flags, struct iattr *iattr); + int (*init_client) (struct nfs_client *, const struct rpc_timeout *, + const char *, rpc_authflavor_t, int); }; /* -- cgit v1.2.3 From 89d1ea65798953b251e399b17f32d31033889ae0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:09 +0000 Subject: NFSv4.1: send zero stateid seqid on v4.1 i/o Data servers require a zero stateid seqid, and there is no advantage to not doing the same for all NFSv4.1 Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 94d50e86a124..a656b6e179b0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1384,7 +1384,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr) hdr->replen += decode_putrootfh_maxsz; } -static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx) +static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid) { nfs4_stateid stateid; __be32 *p; @@ -1392,6 +1392,8 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context p = reserve_space(xdr, NFS4_STATEID_SIZE); if (ctx->state != NULL) { nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); + if (zero_seqid) + stateid.stateid.seqid = 0; xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); } else xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); @@ -1404,7 +1406,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, p = reserve_space(xdr, 4); *p = cpu_to_be32(OP_READ); - encode_stateid(xdr, args->context, args->lock_context); + encode_stateid(xdr, args->context, args->lock_context, + hdr->minorversion); p = reserve_space(xdr, 12); p = xdr_encode_hyper(p, args->offset); @@ -1592,7 +1595,8 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg p = reserve_space(xdr, 4); *p = cpu_to_be32(OP_WRITE); - encode_stateid(xdr, args->context, args->lock_context); + encode_stateid(xdr, args->context, args->lock_context, + hdr->minorversion); p = reserve_space(xdr, 16); p = xdr_encode_hyper(p, args->offset); -- cgit v1.2.3 From d3b4c9d76738df49a7db7682c2518a0ef9f7391d Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:10 +0000 Subject: NFSv4.1: new flag for state renewal check Data servers not sharing a session with the mount MDS always have an empty cl_superblocks list. Replace the cl_superblocks empty list check to see if it is time to shut down renewd with the NFS_CS_STOP_RENEW bit which is not set by such a data server. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 5 +++++ fs/nfs/nfs4renewd.c | 6 +----- include/linux/nfs_fs_sb.h | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b9ed2a8bc26a..a86698cd82fd 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1019,14 +1019,19 @@ static void nfs_server_insert_lists(struct nfs_server *server) spin_lock(&nfs_client_lock); list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); list_add_tail(&server->master_link, &nfs_volume_list); + clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); spin_unlock(&nfs_client_lock); } static void nfs_server_remove_lists(struct nfs_server *server) { + struct nfs_client *clp = server->nfs_client; + spin_lock(&nfs_client_lock); list_del_rcu(&server->client_link); + if (clp && list_empty(&clp->cl_superblocks)) + set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state); list_del(&server->master_link); spin_unlock(&nfs_client_lock); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 402143d75fc5..df8e7f3ca56d 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -64,12 +64,8 @@ nfs4_renew_state(struct work_struct *work) ops = clp->cl_mvops->state_renewal_ops; dprintk("%s: start\n", __func__); - rcu_read_lock(); - if (list_empty(&clp->cl_superblocks)) { - rcu_read_unlock(); + if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state)) goto out; - } - rcu_read_unlock(); spin_lock(&clp->cl_lock); lease = clp->cl_lease_time; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3e112de12d8d..0bac4176e505 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -30,6 +30,7 @@ struct nfs_client { #define NFS_CS_CALLBACK 1 /* - callback started */ #define NFS_CS_IDMAP 2 /* - idmap started */ #define NFS_CS_RENEWD 3 /* - renewd started */ +#define NFS_CS_STOP_RENEW 4 /* no more state to renew */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From d6fb79d433d0a34c36bdf74eaf90857193a6261f Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:11 +0000 Subject: NFSv4.1: new flag for lease time check Data servers cannot send nfs4_proc_get_lease_time. but still need to setup state renewal. Add the NFS_CS_CHECK_LEASE_TIME bit to indicate if the lease time can be checked. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 9 +++++++++ fs/nfs/nfs4state.c | 5 +++++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 15 insertions(+) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a86698cd82fd..280d41f64a57 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1400,6 +1400,15 @@ static int nfs4_set_client(struct nfs_server *server, goto error; } + /* + * Query for the lease time on clientid setup or renewal + * + * Note that this will be set on nfs_clients that were created + * only for the DS role and did not set this bit, but now will + * serve a dual role. + */ + set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); + server->nfs_client = clp; dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); return 0; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0592288f9f06..69c836373124 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -153,6 +153,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) int status; struct nfs_fsinfo fsinfo; + if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { + nfs4_schedule_state_renewal(clp); + return 0; + } + status = nfs4_proc_get_lease_time(clp, &fsinfo); if (status == 0) { /* Update lease time and schedule renewal */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 0bac4176e505..c00d4ec47ec3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -31,6 +31,7 @@ struct nfs_client { #define NFS_CS_IDMAP 2 /* - idmap started */ #define NFS_CS_RENEWD 3 /* - renewd started */ #define NFS_CS_STOP_RENEW 4 /* no more state to renew */ +#define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit v1.2.3 From 94de8b27d0dcb2608d56a7e5c2941b87e6da7ce3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:12 +0000 Subject: NFSv4.1: add MDS mount DS only check The DS only role cannot be used to mount. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 ++++ fs/nfs/nfs4_fs.h | 13 +++++++++++++ 2 files changed, 17 insertions(+) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 280d41f64a57..d5c5bdfa4231 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1453,6 +1453,10 @@ static int nfs4_server_common_setup(struct nfs_server *server, BUG_ON(!server->nfs_client->rpc_ops); BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); + /* data servers support only a subset of NFSv4.1 */ + if (is_ds_only_client(server->nfs_client)) + return -EPROTONOSUPPORT; + fattr = nfs_alloc_fattr(); if (fattr == NULL) return -ENOMEM; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1be36cf65bfc..d4cfacc40003 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -259,6 +259,13 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *); extern int nfs4_init_session(struct nfs_server *server); extern int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo); + +static inline bool +is_ds_only_client(struct nfs_client *clp) +{ + return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) == + EXCHGID4_FLAG_USE_PNFS_DS; +} #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) { @@ -276,6 +283,12 @@ static inline int nfs4_init_session(struct nfs_server *server) { return 0; } + +static inline bool +is_ds_only_client(struct nfs_client *clp) +{ + return false; +} #endif /* CONFIG_NFS_V4_1 */ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; -- cgit v1.2.3 From d684d2ae10a4f95d3035abf698d7d611ff2cd279 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 1 Mar 2011 01:34:13 +0000 Subject: NFSv4.1: lseg refcounting Prepare put_lseg and get_lseg to be called from the pNFS I/O code. Pull common code from pnfs_lseg_locked to call from pnfs_lseg. Inline pnfs_lseg_locked into it's only caller. Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 62 +++++++++++++++++++++++++++++++++++++---------------------- fs/nfs/pnfs.h | 20 +++++++++++++++++++ 2 files changed, 59 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 7d031cd7d920..3afa82e45438 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -230,32 +230,41 @@ static void free_lseg(struct pnfs_layout_segment *lseg) put_layout_hdr(NFS_I(ino)->layout); } -/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg - * could sleep, so must be called outside of the lock. - * Returns 1 if object was removed, otherwise return 0. - */ -static int -put_lseg_locked(struct pnfs_layout_segment *lseg, - struct list_head *tmp_list) +static void +put_lseg_common(struct pnfs_layout_segment *lseg) +{ + struct inode *inode = lseg->pls_layout->plh_inode; + + BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + list_del_init(&lseg->pls_list); + if (list_empty(&lseg->pls_layout->plh_segs)) { + set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); + /* Matched by initial refcount set in alloc_init_layout_hdr */ + put_layout_hdr_locked(lseg->pls_layout); + } + rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); +} + +static void +put_lseg(struct pnfs_layout_segment *lseg) { + struct inode *inode; + + if (!lseg) + return; + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, atomic_read(&lseg->pls_refcount), test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); - if (atomic_dec_and_test(&lseg->pls_refcount)) { - struct inode *ino = lseg->pls_layout->plh_inode; + inode = lseg->pls_layout->plh_inode; + if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { + LIST_HEAD(free_me); - BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); - list_del(&lseg->pls_list); - if (list_empty(&lseg->pls_layout->plh_segs)) { - set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); - /* Matched by initial refcount set in alloc_init_layout_hdr */ - put_layout_hdr_locked(lseg->pls_layout); - } - rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); - list_add(&lseg->pls_list, tmp_list); - return 1; + put_lseg_common(lseg); + list_add(&lseg->pls_list, &free_me); + spin_unlock(&inode->i_lock); + pnfs_free_lseg_list(&free_me); } - return 0; } static bool @@ -276,7 +285,13 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, * list. It will now be removed when all * outstanding io is finished. */ - rv = put_lseg_locked(lseg, tmp_list); + dprintk("%s: lseg %p ref %d\n", __func__, lseg, + atomic_read(&lseg->pls_refcount)); + if (atomic_dec_and_test(&lseg->pls_refcount)) { + put_lseg_common(lseg); + list_add(&lseg->pls_list, tmp_list); + rv = 1; + } } return rv; } @@ -689,7 +704,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && is_matching_lseg(lseg, iomode)) { - ret = lseg; + ret = get_lseg(lseg); break; } if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) @@ -769,6 +784,7 @@ pnfs_update_layout(struct inode *ino, out: dprintk("%s end, state 0x%lx lseg %p\n", __func__, nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); + put_lseg(lseg); /* STUB - callers currently ignore return value */ return lseg; out_unlock: spin_unlock(&ino->i_lock); @@ -821,7 +837,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) } init_lseg(lo, lseg); lseg->pls_range = res->range; - *lgp->lsegpp = lseg; + *lgp->lsegpp = get_lseg(lseg); pnfs_insert_layout(lo, lseg); if (res->return_on_close) { diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index e2612ea0cbed..9a994bc9899f 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -177,6 +177,16 @@ static inline int lo_fail_bit(u32 iomode) NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; } +static inline struct pnfs_layout_segment * +get_lseg(struct pnfs_layout_segment *lseg) +{ + if (lseg) { + atomic_inc(&lseg->pls_refcount); + smp_mb__after_atomic_inc(); + } + return lseg; +} + /* Return true if a layout driver is being used for this mountpoint */ static inline int pnfs_enabled_sb(struct nfs_server *nfss) { @@ -193,6 +203,16 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) { } +static inline struct pnfs_layout_segment * +get_lseg(struct pnfs_layout_segment *lseg) +{ + return NULL; +} + +static inline void put_lseg(struct pnfs_layout_segment *lseg) +{ +} + static inline struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, enum pnfs_iomode access_type) -- cgit v1.2.3 From 94ad1c80e28f9700c84b4d28d1e5302ddf63a6fd Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 1 Mar 2011 01:34:14 +0000 Subject: NFSv4.1: coelesce across layout stripes Add a pg_test layout driver hook which is used to avoid coelescing I/O across layout stripes. Signed-off-by: Andy Adamson Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Boaz Harrosh Signed-off-by: Oleg Drokin Signed-off-by: Tao Guo Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 26 ++++++++++++++++++++++++++ fs/nfs/pagelist.c | 12 ++++++++++-- fs/nfs/pnfs.c | 16 ++++++++++++++++ fs/nfs/pnfs.h | 12 ++++++++++++ fs/nfs/read.c | 1 + fs/nfs/write.c | 3 +++ include/linux/nfs_page.h | 2 ++ 7 files changed, 70 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 23f930caf1e2..0efe8cbd9e3c 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -252,6 +252,31 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) _filelayout_free_lseg(fl); } +/* + * filelayout_pg_test(). Called by nfs_can_coalesce_requests() + * + * return 1 : coalesce page + * return 0 : don't coalesce page + */ +int +filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + u64 p_stripe, r_stripe; + u32 stripe_unit; + + if (!pgio->pg_lseg) + return 1; + p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; + r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; + stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; + + do_div(p_stripe, stripe_unit); + do_div(r_stripe, stripe_unit); + + return (p_stripe == r_stripe); +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", @@ -260,6 +285,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .clear_layoutdriver = filelayout_clear_layoutdriver, .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, + .pg_test = filelayout_pg_test, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index e1164e3f9e69..9b9a65c9bb4f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -226,6 +226,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_doio = doio; desc->pg_ioflags = io_flags; desc->pg_error = 0; + desc->pg_lseg = NULL; } /** @@ -240,7 +241,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, * Return 'true' if this is the case, else return 'false'. */ static int nfs_can_coalesce_requests(struct nfs_page *prev, - struct nfs_page *req) + struct nfs_page *req, + struct nfs_pageio_descriptor *pgio) { if (req->wb_context->cred != prev->wb_context->cred) return 0; @@ -254,6 +256,12 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev, return 0; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return 0; + /* + * Non-whole file layouts need to check that req is inside of + * pgio->pg_lseg. + */ + if (pgio->pg_test && !pgio->pg_test(pgio, prev, req)) + return 0; return 1; } @@ -286,7 +294,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, if (newlen > desc->pg_bsize) return 0; prev = nfs_list_entry(desc->pg_list.prev); - if (!nfs_can_coalesce_requests(prev, req)) + if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; } else desc->pg_base = req->wb_pgbase; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3afa82e45438..330cee115de0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -858,6 +858,22 @@ out_forget_reply: goto out; } +static void +pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio) +{ + struct pnfs_layoutdriver_type *ld; + + ld = NFS_SERVER(inode)->pnfs_curr_ld; + pgio->pg_test = (ld ? ld->pg_test : NULL); +} + +void +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + pnfs_set_pg_test(inode, pgio); +} + /* * Device ID cache. Currently supports one layout type per struct nfs_client. * Add layout type to the lookup key to expand to support multiple types. diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 9a994bc9899f..db52d9658570 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -30,6 +30,8 @@ #ifndef FS_NFS_PNFS_H #define FS_NFS_PNFS_H +#include + enum { NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ NFS_LSEG_ROC, /* roc bit received from server */ @@ -65,6 +67,9 @@ struct pnfs_layoutdriver_type { int (*clear_layoutdriver) (struct nfs_server *); struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); void (*free_lseg) (struct pnfs_layout_segment *lseg); + + /* test for nfs page cache coalescing */ + int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); }; struct pnfs_layout_hdr { @@ -151,6 +156,7 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, enum pnfs_iomode access_type); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); +void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); @@ -250,6 +256,12 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } +static inline void +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino) +{ + pgio->pg_test = NULL; +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/read.c b/fs/nfs/read.c index aedcaa7f291f..2a2765975e1f 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -626,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, goto read_complete; /* all pages were read */ pnfs_update_layout(inode, desc.ctx, IOMODE_READ); + pnfs_pageio_init_read(&pgio, inode); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae528b98b804..40143c4747a5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -28,6 +28,7 @@ #include "iostat.h" #include "nfs4_fs.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -982,6 +983,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, { size_t wsize = NFS_SERVER(inode)->wsize; + pgio->pg_test = NULL; + if (wsize < PAGE_CACHE_SIZE) nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); else diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index d55cee73f634..4eaf27a1282d 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -62,6 +62,8 @@ struct nfs_pageio_descriptor { int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int); int pg_ioflags; int pg_error; + struct pnfs_layout_segment *pg_lseg; + int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) -- cgit v1.2.3 From bae724ef95b0d0a1f4518f5451e7c8aabc41f820 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 1 Mar 2011 01:34:15 +0000 Subject: NFSv4.1: shift pnfs_update_layout locations Move the pnfs_update_layout call location to nfs_pageio_do_add_request(). Grab the lseg sent in the doio function to nfs_read_rpcsetup and attach it to each nfs_read_data so it can be sent to the layout driver. Signed-off-by: Andy Adamson Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Benny Halevy Signed-off-by: Boaz Harrosh Signed-off-by: Oleg Drokin Signed-off-by: Tao Guo Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 4 ---- fs/nfs/pagelist.c | 7 +++++-- fs/nfs/pnfs.c | 27 ++++++++++++++++----------- fs/nfs/pnfs.h | 1 + fs/nfs/read.c | 40 ++++++++++++++++++++++++---------------- fs/nfs/write.c | 4 ++-- include/linux/nfs_page.h | 4 ++-- include/linux/nfs_xdr.h | 1 + 8 files changed, 51 insertions(+), 37 deletions(-) (limited to 'fs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 7bf029ef4084..d85a534b15cd 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -387,10 +387,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); - pnfs_update_layout(mapping->host, - nfs_file_open_context(file), - IOMODE_RW); - start: /* * Prevent starvation issues if someone is doing a consistency diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 9b9a65c9bb4f..45b0fb8add39 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -20,6 +20,7 @@ #include #include "internal.h" +#include "pnfs.h" static struct kmem_cache *nfs_page_cachep; @@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req) */ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *), size_t bsize, int io_flags) { @@ -315,7 +316,9 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) nfs_page_array_len(desc->pg_base, desc->pg_count), desc->pg_count, - desc->pg_ioflags); + desc->pg_ioflags, + desc->pg_lseg); + desc->pg_lseg = NULL; if (error < 0) desc->pg_error = error; else diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 330cee115de0..77966ecb0a2c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -245,7 +245,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); } -static void +void put_lseg(struct pnfs_layout_segment *lseg) { struct inode *inode; @@ -784,7 +784,6 @@ pnfs_update_layout(struct inode *ino, out: dprintk("%s end, state 0x%lx lseg %p\n", __func__, nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); - put_lseg(lseg); /* STUB - callers currently ignore return value */ return lseg; out_unlock: spin_unlock(&ino->i_lock); @@ -858,20 +857,26 @@ out_forget_reply: goto out; } -static void -pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio) +static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, + struct nfs_page *req) { - struct pnfs_layoutdriver_type *ld; - - ld = NFS_SERVER(inode)->pnfs_curr_ld; - pgio->pg_test = (ld ? ld->pg_test : NULL); + if (pgio->pg_count == prev->wb_bytes) { + /* This is first coelesce call for a series of nfs_pages */ + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + prev->wb_context, + IOMODE_READ); + } + return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, - struct inode *inode) +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { - pnfs_set_pg_test(inode, pgio); + struct pnfs_layoutdriver_type *ld; + + ld = NFS_SERVER(inode)->pnfs_curr_ld; + pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; } /* diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index db52d9658570..5107d14db485 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -151,6 +151,7 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); +void put_lseg(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, enum pnfs_iomode access_type); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2a2765975e1f..6dc9eaf00e5c 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -20,17 +20,17 @@ #include #include +#include "pnfs.h" #include "nfs4_fs.h" #include "internal.h" #include "iostat.h" #include "fscache.h" -#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); -static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); +static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); +static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -69,6 +69,7 @@ void nfs_readdata_free(struct nfs_read_data *p) static void nfs_readdata_release(struct nfs_read_data *rdata) { + put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); nfs_readdata_free(rdata); } @@ -121,7 +122,6 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, len = nfs_page_length(page); if (len == 0) return nfs_return_empty_page(page); - pnfs_update_layout(inode, ctx, IOMODE_READ); new = nfs_create_request(ctx, inode, page, 0, len); if (IS_ERR(new)) { unlock_page(page); @@ -132,9 +132,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, nfs_list_add_request(new, &one_request); if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(inode, &one_request, 1, len, 0); + nfs_pagein_multi(inode, &one_request, 1, len, 0, NULL); else - nfs_pagein_one(inode, &one_request, 1, len, 0); + nfs_pagein_one(inode, &one_request, 1, len, 0, NULL); return 0; } @@ -160,7 +160,8 @@ static void nfs_readpage_release(struct nfs_page *req) */ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset) + unsigned int count, unsigned int offset, + struct pnfs_layout_segment *lseg) { struct inode *inode = req->wb_context->path.dentry->d_inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; @@ -183,6 +184,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->req = req; data->inode = inode; data->cred = msg.rpc_cred; + data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -240,7 +242,7 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) +static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -266,6 +268,8 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne } while(nbytes != 0); atomic_set(&req->wb_complete, requests); + /* We know lseg==NULL */ + lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ); ClearPageError(page); offset = 0; nbytes = count; @@ -280,12 +284,13 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne if (nbytes < rsize) rsize = nbytes; ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - rsize, offset); + rsize, offset, lseg); if (ret == 0) ret = ret2; offset += rsize; nbytes -= rsize; } while (nbytes != 0); + put_lseg(lseg); return ret; @@ -300,7 +305,7 @@ out_bad: return -ENOMEM; } -static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) +static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg) { struct nfs_page *req; struct page **pages; @@ -308,8 +313,10 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int ret = -ENOMEM; data = nfs_readdata_alloc(npages); - if (!data) - goto out_bad; + if (!data) { + nfs_async_read_error(head); + goto out; + } pages = data->pagevec; while (!list_empty(head)) { @@ -320,10 +327,12 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); + if ((!lseg) && list_is_singular(&data->pages)) + lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ); - return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); -out_bad: - nfs_async_read_error(head); + ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0, lseg); +out: + put_lseg(lseg); return ret; } @@ -625,7 +634,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - pnfs_update_layout(inode, desc.ctx, IOMODE_READ); pnfs_pageio_init_read(&pgio, inode); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 40143c4747a5..f033fa0d7d33 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -880,7 +880,7 @@ static void nfs_redirty_request(struct nfs_page *req) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) +static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg) { struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; @@ -947,7 +947,7 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) +static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg) { struct nfs_page *req; struct page **pages; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 4eaf27a1282d..ba88ff4f8186 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -59,7 +59,7 @@ struct nfs_pageio_descriptor { unsigned int pg_base; struct inode *pg_inode; - int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int); + int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); int pg_ioflags; int pg_error; struct pnfs_layout_segment *pg_lseg; @@ -81,7 +81,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), + int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *), size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d159fe733381..560923e28723 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1017,6 +1017,7 @@ struct nfs_read_data { struct nfs_readargs args; struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ + struct pnfs_layout_segment *lseg; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From 64419a9b20938d9070fdd8c58c2fa23c911915f8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:16 +0000 Subject: NFSv4.1: generic read Separate the rpc run portion of nfs_read_rpcsetup into a new function nfs_initiate_read that is called for normal NFS I/O. Add a pNFS read_pagelist function that is called instead of nfs_intitate_read for pNFS reads. Signed-off-by: Andy Adamson Signed-off-by: Boaz Harrosh Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Mike Sager Signed-off-by: Mingyang Guo Signed-off-by: Ricardo Labiaga Signed-off-by: Tao Guo Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 28 ++++++++++++++++++++ fs/nfs/pnfs.h | 20 ++++++++++++++ fs/nfs/read.c | 65 ++++++++++++++++++++++++++++------------------ include/linux/nfs_iostat.h | 1 + include/linux/nfs_xdr.h | 1 + 5 files changed, 90 insertions(+), 25 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 77966ecb0a2c..86c154bad5db 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -30,6 +30,7 @@ #include #include "internal.h" #include "pnfs.h" +#include "iostat.h" #define NFSDBG_FACILITY NFSDBG_PNFS @@ -879,6 +880,33 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; } +/* + * Call the appropriate parallel I/O subsystem read function. + */ +enum pnfs_try_status +pnfs_try_to_read_data(struct nfs_read_data *rdata, + const struct rpc_call_ops *call_ops) +{ + struct inode *inode = rdata->inode; + struct nfs_server *nfss = NFS_SERVER(inode); + enum pnfs_try_status trypnfs; + + rdata->mds_ops = call_ops; + + dprintk("%s: Reading ino:%lu %u@%llu\n", + __func__, inode->i_ino, rdata->args.count, rdata->args.offset); + + trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata); + if (trypnfs == PNFS_NOT_ATTEMPTED) { + put_lseg(rdata->lseg); + rdata->lseg = NULL; + } else { + nfs_inc_stats(inode, NFSIOS_PNFS_READ); + } + dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); + return trypnfs; +} + /* * Device ID cache. Currently supports one layout type per struct nfs_client. * Add layout type to the lookup key to expand to support multiple types. diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 5107d14db485..585023fabb55 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -45,6 +45,11 @@ struct pnfs_layout_segment { struct pnfs_layout_hdr *pls_layout; }; +enum pnfs_try_status { + PNFS_ATTEMPTED = 0, + PNFS_NOT_ATTEMPTED = 1, +}; + #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" @@ -70,6 +75,12 @@ struct pnfs_layoutdriver_type { /* test for nfs page cache coalescing */ int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + + /* + * Return PNFS_ATTEMPTED to indicate the layout code has attempted + * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS + */ + enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); }; struct pnfs_layout_hdr { @@ -157,6 +168,8 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, enum pnfs_iomode access_type); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); +enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, + const struct rpc_call_ops *); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); @@ -227,6 +240,13 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, return NULL; } +static inline enum pnfs_try_status +pnfs_try_to_read_data(struct nfs_read_data *data, + const struct rpc_call_ops *call_ops) +{ + return PNFS_NOT_ATTEMPTED; +} + static inline bool pnfs_roc(struct inode *ino) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6dc9eaf00e5c..4127a1c0eec6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include "pnfs.h" @@ -155,25 +157,20 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } -/* - * Set up the NFS read request struct - */ -static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg) +static int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops) { - struct inode *inode = req->wb_context->path.dentry->d_inode; + struct inode *inode = data->inode; int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, .rpc_resp = &data->res, - .rpc_cred = req->wb_context->cred, + .rpc_cred = data->cred, }; struct rpc_task_setup task_setup_data = { .task = &data->task, - .rpc_client = NFS_CLIENT(inode), + .rpc_client = clnt, .rpc_message = &msg, .callback_ops = call_ops, .callback_data = data, @@ -181,9 +178,37 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, .flags = RPC_TASK_ASYNC | swap_flags, }; + /* Set up the initial task struct. */ + NFS_PROTO(inode)->read_setup(data, &msg); + + dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ " + "offset %llu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + rpc_put_task(task); + return 0; +} + +/* + * Set up the NFS read request struct + */ +static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + const struct rpc_call_ops *call_ops, + unsigned int count, unsigned int offset, + struct pnfs_layout_segment *lseg) +{ + struct inode *inode = req->wb_context->path.dentry->d_inode; + data->req = req; data->inode = inode; - data->cred = msg.rpc_cred; + data->cred = req->wb_context->cred; data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); @@ -199,21 +224,11 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.eof = 0; nfs_fattr_init(&data->fattr); - /* Set up the initial task struct. */ - NFS_PROTO(inode)->read_setup(data, &msg); - - dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - count, - (unsigned long long)data->args.offset); + if (data->lseg && + (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) + return 0; - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) - return PTR_ERR(task); - rpc_put_task(task); - return 0; + return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } static void diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h index 68b10f5f8907..37a143732d02 100644 --- a/include/linux/nfs_iostat.h +++ b/include/linux/nfs_iostat.h @@ -113,6 +113,7 @@ enum nfs_stat_eventcounters { NFSIOS_SHORTREAD, NFSIOS_SHORTWRITE, NFSIOS_DELAY, + NFSIOS_PNFS_READ, __NFSIOS_COUNTSMAX, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 560923e28723..9d2b9dae277d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1018,6 +1018,7 @@ struct nfs_read_data { struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ struct pnfs_layout_segment *lseg; + const struct rpc_call_ops *mds_ops; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From d83217c13531fd59730d77b5c2284e90e56c0a50 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:17 +0000 Subject: NFSv4.1: data server connection Introduce a data server set_client and init session following the nfs4_set_client and nfs4_init_session convention. Once a new nfs_client is on the nfs_client_list, the nfs_client cl_cons_state serializes access to creating an nfs_client struct with matching properties. Use the new nfs_get_client() that initializes new clients. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 41 +++++++++++++++++++++++++++++++ fs/nfs/internal.h | 5 ++++ fs/nfs/nfs4_fs.h | 12 +++++++++ fs/nfs/nfs4filelayoutdev.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 29 ++++++++++++++++++++-- include/linux/nfs_xdr.h | 1 + 6 files changed, 147 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index d5c5bdfa4231..6dd50ac5b545 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1417,6 +1417,47 @@ error: return error; } +/* + * Set up a pNFS Data Server client. + * + * Return any existing nfs_client that matches server address,port,version + * and minorversion. + * + * For a new nfs_client, use a soft mount (default), a low retrans and a + * low timeout interval so that if a connection is lost, we retry through + * the MDS. + */ +struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, + const struct sockaddr *ds_addr, + int ds_addrlen, int ds_proto) +{ + struct nfs_client_initdata cl_init = { + .addr = ds_addr, + .addrlen = ds_addrlen, + .rpc_ops = &nfs_v4_clientops, + .proto = ds_proto, + .minorversion = mds_clp->cl_minorversion, + }; + struct rpc_timeout ds_timeout = { + .to_initval = 15 * HZ, + .to_maxval = 15 * HZ, + .to_retries = 1, + .to_exponential = 1, + }; + struct nfs_client *clp; + + /* + * Set an authflavor equual to the MDS value. Use the MDS nfs_client + * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS + * (section 13.1 RFC 5661). + */ + clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr, + mds_clp->cl_rpcclient->cl_auth->au_flavor, 0); + + dprintk("<-- %s %p\n", __func__, clp); + return clp; +} +EXPORT_SYMBOL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4d7b3a97e522..5cc92014259e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -148,6 +148,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fattr *); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern int nfs4_check_client_ready(struct nfs_client *clp); +extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, + const struct sockaddr *ds_addr, + int ds_addrlen, int ds_proto); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); @@ -213,6 +216,8 @@ extern const u32 nfs41_maxwrite_overhead; extern struct rpc_procinfo nfs4_procedures[]; #endif +extern int nfs4_init_ds_session(struct nfs_client *clp); + /* proc.c */ void nfs_close_context(struct nfs_open_context *ctx, int is_sync); extern int nfs_init_client(struct nfs_client *clp, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index d4cfacc40003..7058a9f75e7f 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -266,6 +266,12 @@ is_ds_only_client(struct nfs_client *clp) return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) == EXCHGID4_FLAG_USE_PNFS_DS; } + +static inline bool +is_ds_client(struct nfs_client *clp) +{ + return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS; +} #else /* CONFIG_NFS_v4_1 */ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) { @@ -289,6 +295,12 @@ is_ds_only_client(struct nfs_client *clp) { return false; } + +static inline bool +is_ds_client(struct nfs_client *clp) +{ + return false; +} #endif /* CONFIG_NFS_V4_1 */ extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index b73c34375f60..8bc91fb8b6fa 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -104,6 +104,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port) return NULL; } +/* + * Create an rpc connection to the nfs4_pnfs_ds data server + * Currently only support IPv4 + */ +static int +nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) +{ + struct nfs_client *clp; + struct sockaddr_in sin; + int status = 0; + + dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); + + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = ds->ds_ip_addr; + sin.sin_port = ds->ds_port; + + clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, + sizeof(sin), IPPROTO_TCP); + if (IS_ERR(clp)) { + status = PTR_ERR(clp); + goto out; + } + + if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) { + if (!is_ds_client(clp)) { + status = -ENODEV; + goto out_put; + } + ds->ds_clp = clp; + dprintk("%s [existing] ip=%x, port=%hu\n", __func__, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + goto out; + } + + /* + * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to + * be equal to the MDS lease. Renewal is scheduled in create_session. + */ + spin_lock(&mds_srv->nfs_client->cl_lock); + clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time; + spin_unlock(&mds_srv->nfs_client->cl_lock); + clp->cl_last_renewal = jiffies; + + /* New nfs_client */ + status = nfs4_init_ds_session(clp); + if (status) + goto out_put; + + ds->ds_clp = clp; + dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), + ntohs(ds->ds_port)); +out: + return status; +out_put: + nfs_put_client(clp); + goto out; +} + static void destroy_ds(struct nfs4_pnfs_ds *ds) { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 55a8fc2f3df4..07d1a43f40f5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1573,9 +1573,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int nfs4_recover_expired_lease(struct nfs_server *server) +static int nfs4_client_recover_expired_lease(struct nfs_client *clp) { - struct nfs_client *clp = server->nfs_client; unsigned int loop; int ret; @@ -1592,6 +1591,11 @@ static int nfs4_recover_expired_lease(struct nfs_server *server) return ret; } +static int nfs4_recover_expired_lease(struct nfs_server *server) +{ + return nfs4_client_recover_expired_lease(server->nfs_client); +} + /* * OPEN_EXPIRED: * reclaim state on the server after a network partition. @@ -5118,6 +5122,27 @@ int nfs4_init_session(struct nfs_server *server) return ret; } +int nfs4_init_ds_session(struct nfs_client *clp) +{ + struct nfs4_session *session = clp->cl_session; + int ret; + + if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state)) + return 0; + + ret = nfs4_client_recover_expired_lease(clp); + if (!ret) + /* Test for the DS role */ + if (!is_ds_client(clp)) + ret = -ENODEV; + if (!ret) + ret = nfs4_check_client_ready(clp); + return ret; + +} +EXPORT_SYMBOL_GPL(nfs4_init_ds_session); + + /* * Renew the cl_session lease. */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9d2b9dae277d..c66ff7fe1b6b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1018,6 +1018,7 @@ struct nfs_read_data { struct nfs_readres res; unsigned long timestamp; /* For lease renewal */ struct pnfs_layout_segment *lseg; + struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From cfe7f4120f8b1b9465c333d1e42efd4669b1799f Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Tue, 1 Mar 2011 01:34:18 +0000 Subject: NFSv4.1: filelayout i/o helpers Prepare for filelayout_read_pagelist with helper functions that find the correct data server, filehandle, and offset. Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Marc Eshel Signed-off-by: Mike Sager Signed-off-by: Oleg Drokin Signed-off-by: Tao Guo Signed-off-by: Tigran Mkrtchyan Signed-off-by: Tigran Mkrtchyan Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 34 +++++++++++++++++++++++ fs/nfs/nfs4filelayout.h | 7 +++++ fs/nfs/nfs4filelayoutdev.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 0efe8cbd9e3c..ed833705dcee 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -66,6 +66,40 @@ filelayout_clear_layoutdriver(struct nfs_server *nfss) return 0; } +static loff_t +filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg, + loff_t offset) +{ + u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count; + u64 tmp; + + offset -= flseg->pattern_offset; + tmp = offset; + do_div(tmp, stripe_width); + + return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit); +} + +/* This function is used by the layout driver to calculate the + * offset of the file on the dserver based on whether the + * layout type is STRIPE_DENSE or STRIPE_SPARSE + */ +static loff_t +filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + + switch (flseg->stripe_type) { + case STRIPE_SPARSE: + return offset; + + case STRIPE_DENSE: + return filelayout_get_dense_offset(flseg, offset); + } + + BUG(); +} + /* * filelayout_check_layout() * diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index bbf60dd2ab9d..9fef76e04936 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -83,9 +83,16 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } +extern struct nfs_fh * +nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); + extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); extern void print_ds(struct nfs4_pnfs_ds *ds); extern void print_deviceid(struct nfs4_deviceid *dev_id); +u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); +u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); +struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, + u32 ds_idx); extern struct nfs4_file_layout_dsaddr * nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); struct nfs4_file_layout_dsaddr * diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 8bc91fb8b6fa..f466fed2f466 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -516,3 +516,70 @@ nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) return (d == NULL) ? NULL : container_of(d, struct nfs4_file_layout_dsaddr, deviceid); } + +/* + * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit + * Then: ((res + fsi) % dsaddr->stripe_count) + */ +u32 +nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + u64 tmp; + + tmp = offset - flseg->pattern_offset; + do_div(tmp, flseg->stripe_unit); + tmp += flseg->first_stripe_index; + return do_div(tmp, flseg->dsaddr->stripe_count); +} + +u32 +nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) +{ + return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; +} + +struct nfs_fh * +nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) +{ + struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); + u32 i; + + if (flseg->stripe_type == STRIPE_SPARSE) { + if (flseg->num_fh == 1) + i = 0; + else if (flseg->num_fh == 0) + /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ + return NULL; + else + i = nfs4_fl_calc_ds_index(lseg, j); + } else + i = j; + return flseg->fh_array[i]; +} + +struct nfs4_pnfs_ds * +nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) +{ + struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; + struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; + + if (ds == NULL) { + printk(KERN_ERR "%s: No data server for offset index %d\n", + __func__, ds_idx); + return NULL; + } + + if (!ds->ds_clp) { + int err; + + err = nfs4_ds_connect(NFS_SERVER(lseg->pls_layout->plh_inode), + dsaddr->ds_list[ds_idx]); + if (err) { + printk(KERN_ERR "%s nfs4_ds_connect error %d\n", + __func__, err); + return NULL; + } + } + return ds; +} -- cgit v1.2.3 From dc70d7b3189597f313df7bd2da849cfc39063b15 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:19 +0000 Subject: NFSv4.1: filelayout read Attempt a pNFS file layout read by setting up the nfs_read_data struct and calling nfs_initiate_read with the data server rpc client and the filelayout rpc call ops. Error handling is implemented in a subsequent patch. Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Fred Isaman Signed-off-by: Mingyang Guo Signed-off-by: Oleg Drokin Signed-off-by: Ricardo Labiaga Tested-by: Guo Mingyang Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs4_fs.h | 3 ++ fs/nfs/nfs4filelayout.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 3 +- fs/nfs/read.c | 3 +- include/linux/nfs_xdr.h | 1 + 6 files changed, 92 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5cc92014259e..5e9df992cd73 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -271,6 +271,8 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif /* read.c */ +extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 7058a9f75e7f..c64be1cff080 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -252,6 +252,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser extern int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply, struct rpc_task *task); +extern int nfs41_setup_sequence(struct nfs4_session *session, + struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, + int cache_reply, struct rpc_task *task); extern void nfs4_destroy_session(struct nfs4_session *session); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index ed833705dcee..3608411653dc 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -100,6 +100,87 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } +/* + * Call ops for the async read/write cases + * In the case of dense layouts, the offset needs to be reset to its + * original value. + */ +static void filelayout_read_prepare(struct rpc_task *task, void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, + &rdata->args.seq_args, &rdata->res.seq_res, + 0, task)) + return; + + rpc_call_start(task); +} + +static void filelayout_read_call_done(struct rpc_task *task, void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); + + /* Note this may cause RPC to be resent */ + rdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_read_release(void *data) +{ + struct nfs_read_data *rdata = (struct nfs_read_data *)data; + + rdata->mds_ops->rpc_release(data); +} + +struct rpc_call_ops filelayout_read_call_ops = { + .rpc_call_prepare = filelayout_read_prepare, + .rpc_call_done = filelayout_read_call_done, + .rpc_release = filelayout_read_release, +}; + +static enum pnfs_try_status +filelayout_read_pagelist(struct nfs_read_data *data) +{ + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + loff_t offset = data->args.offset; + u32 j, idx; + struct nfs_fh *fh; + int status; + + dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", + __func__, data->inode->i_ino, + data->args.pgbase, (size_t)data->args.count, offset); + + /* Retrieve the correct rpc_client for the byte range */ + j = nfs4_fl_calc_j_index(lseg, offset); + idx = nfs4_fl_calc_ds_index(lseg, j); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) { + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + return PNFS_NOT_ATTEMPTED; + } + dprintk("%s USE DS:ip %x %hu\n", __func__, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + + /* No multipath support. Use first DS */ + data->ds_clp = ds->ds_clp; + fh = nfs4_fl_select_ds_fh(lseg, j); + if (fh) + data->args.fh = fh; + + data->args.offset = filelayout_get_dserver_offset(lseg, offset); + data->mds_offset = offset; + + /* Perform an asynchronous read to ds */ + status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient, + &filelayout_read_call_ops); + BUG_ON(status != 0); + return PNFS_ATTEMPTED; +} + /* * filelayout_check_layout() * @@ -320,6 +401,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, .pg_test = filelayout_pg_test, + .read_pagelist = filelayout_read_pagelist, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 07d1a43f40f5..d09623933302 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -505,7 +505,7 @@ out: return ret_id; } -static int nfs41_setup_sequence(struct nfs4_session *session, +int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, int cache_reply, @@ -571,6 +571,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, res->sr_status = 1; return 0; } +EXPORT_SYMBOL_GPL(nfs41_setup_sequence); int nfs4_setup_sequence(const struct nfs_server *server, struct nfs4_sequence_args *args, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4127a1c0eec6..f4d0fcffcb5a 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -157,7 +157,7 @@ static void nfs_readpage_release(struct nfs_page *req) nfs_release_request(req); } -static int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, +int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops) { struct inode *inode = data->inode; @@ -195,6 +195,7 @@ static int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, rpc_put_task(task); return 0; } +EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c66ff7fe1b6b..b63faef5052e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1020,6 +1020,7 @@ struct nfs_read_data { struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; + __u64 mds_offset; struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From cbdabc7f8bf14ca1d40ab1cb86f64b3bc09716e8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:20 +0000 Subject: NFSv4.1: filelayout async error handler Use our own async error handler. Mark the layout as failed and retry i/o through the MDS on specified errors. Update the mds_offset in nfs_readpage_retry so that a failed short-read retry to a DS gets correctly resent through the MDS. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 1 + fs/nfs/nfs4filelayout.c | 81 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4proc.c | 35 ++++++++++++++++---- fs/nfs/nfs4state.c | 1 + fs/nfs/read.c | 1 + include/linux/nfs_xdr.h | 1 + include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 8 +++++ 8 files changed, 123 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5e9df992cd73..1a3228e9ea22 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -285,6 +285,7 @@ extern int nfs_migrate_page(struct address_space *, #endif /* nfs4proc.c */ +extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); extern int nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, const char *ip_addr, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 3608411653dc..6a424c19abea 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -40,6 +40,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dean Hildebrand "); MODULE_DESCRIPTION("The NFSv4 file layout driver"); +#define FILELAYOUT_POLL_RETRY_MAX (15*HZ) + static int filelayout_set_layoutdriver(struct nfs_server *nfss) { @@ -100,6 +102,83 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset) BUG(); } +/* For data server errors we don't recover from */ +static void +filelayout_set_lo_fail(struct pnfs_layout_segment *lseg) +{ + if (lseg->pls_range.iomode == IOMODE_RW) { + dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + } else { + dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + } +} + +static int filelayout_async_handle_error(struct rpc_task *task, + struct nfs4_state *state, + struct nfs_client *clp, + int *reset) +{ + if (task->tk_status >= 0) + return 0; + + *reset = 0; + + switch (task->tk_status) { + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_SEQ_FALSE_RETRY: + case -NFS4ERR_SEQ_MISORDERED: + dprintk("%s ERROR %d, Reset session. Exchangeid " + "flags 0x%x\n", __func__, task->tk_status, + clp->cl_exchange_flags); + nfs4_schedule_session_recovery(clp->cl_session); + break; + case -NFS4ERR_DELAY: + case -NFS4ERR_GRACE: + case -EKEYEXPIRED: + rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX); + break; + default: + dprintk("%s DS error. Retry through MDS %d\n", __func__, + task->tk_status); + *reset = 1; + break; + } + task->tk_status = 0; + return -EAGAIN; +} + +/* NFS_PROTO call done callback routines */ + +static int filelayout_read_done_cb(struct rpc_task *task, + struct nfs_read_data *data) +{ + struct nfs_client *clp = data->ds_clp; + int reset = 0; + + dprintk("%s DS read\n", __func__); + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + filelayout_set_lo_fail(data->lseg); + nfs4_reset_read(task, data); + clp = NFS_SERVER(data->inode)->nfs_client; + } + nfs_restart_rpc(task, clp); + return -EAGAIN; + } + + return 0; +} + /* * Call ops for the async read/write cases * In the case of dense layouts, the offset needs to be reset to its @@ -109,6 +188,8 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data) { struct nfs_read_data *rdata = (struct nfs_read_data *)data; + rdata->read_done_cb = filelayout_read_done_cb; + if (nfs41_setup_sequence(rdata->ds_clp->cl_session, &rdata->args.seq_args, &rdata->res.seq_res, 0, task)) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d09623933302..1dc809039448 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3074,15 +3074,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } -static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); - dprintk("--> %s\n", __func__); - - if (!nfs4_sequence_done(task, &data->res.seq_res)) - return -EAGAIN; - if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { nfs_restart_rpc(task, server->nfs_client); return -EAGAIN; @@ -3094,12 +3089,40 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) return 0; } +static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) +{ + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return -EAGAIN; + + return data->read_done_cb(task, data); +} + static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) { data->timestamp = jiffies; + data->read_done_cb = nfs4_read_done_cb; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } +/* Reset the the nfs_read_data to send the read to the MDS. */ +void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) +{ + dprintk("%s Reset task for i/o through\n", __func__); + put_lseg(data->lseg); + data->lseg = NULL; + /* offsets will differ in the dense stripe case */ + data->args.offset = data->mds_offset; + data->ds_clp = NULL; + data->args.fh = NFS_FH(data->inode); + data->read_done_cb = nfs4_read_done_cb; + task->tk_ops = data->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); +} +EXPORT_SYMBOL_GPL(nfs4_reset_read); + static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 69c836373124..ab1bf5bb021f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1453,6 +1453,7 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session) { nfs4_schedule_lease_recovery(session->clp); } +EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); void nfs41_handle_recall_slot(struct nfs_client *clp) { diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f4d0fcffcb5a..f40c7f4dc16b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -391,6 +391,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data return; /* Yes, so retry the read at the end of the data */ + data->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b63faef5052e..eb0e87084353 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1020,6 +1020,7 @@ struct nfs_read_data { struct pnfs_layout_segment *lseg; struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; + int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; struct page *page_array[NFS_PAGEVEC_SIZE]; }; diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index ef9476a36ff7..db7bcaf7c5bd 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -129,6 +129,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, struct rpc_program *, u32); +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8b5a6b40d37c..edaf56e2ed29 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -597,6 +597,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) } } +void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt) +{ + rpc_task_release_client(task); + rpc_task_set_client(task, clnt); +} +EXPORT_SYMBOL_GPL(rpc_task_reset_client); + + static void rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg) { -- cgit v1.2.3 From ea8eecdd11ee6becd09c095c8efa88aa7df95961 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Mar 2011 01:34:21 +0000 Subject: NFSv4.1 move deviceid cache to filelayout driver No need for generic cache with only one user. Keep a simple hash of deviceids in the filelayout driver. Signed-off-by: Christoph Hellwig Acked-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 46 +++----------- fs/nfs/nfs4filelayout.h | 8 ++- fs/nfs/nfs4filelayoutdev.c | 106 +++++++++++++++++++++++--------- fs/nfs/pnfs.c | 147 +-------------------------------------------- fs/nfs/pnfs.h | 48 --------------- include/linux/nfs_fs_sb.h | 1 - 6 files changed, 92 insertions(+), 264 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 6a424c19abea..a922e75af42e 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -42,32 +42,6 @@ MODULE_DESCRIPTION("The NFSv4 file layout driver"); #define FILELAYOUT_POLL_RETRY_MAX (15*HZ) -static int -filelayout_set_layoutdriver(struct nfs_server *nfss) -{ - int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, - nfs4_fl_free_deviceid_callback); - if (status) { - printk(KERN_WARNING "%s: deviceid cache could not be " - "initialized\n", __func__); - return status; - } - dprintk("%s: deviceid cache has been initialized successfully\n", - __func__); - return 0; -} - -/* Clear out the layout by destroying its device list */ -static int -filelayout_clear_layoutdriver(struct nfs_server *nfss) -{ - dprintk("--> %s\n", __func__); - - if (nfss->nfs_client->cl_devid_cache) - pnfs_put_deviceid_cache(nfss->nfs_client); - return 0; -} - static loff_t filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg, loff_t offset) @@ -295,7 +269,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, } /* find and reference the deviceid */ - dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); + dsaddr = nfs4_fl_find_get_deviceid(id); if (dsaddr == NULL) { dsaddr = get_device_info(lo->plh_inode, id); if (dsaddr == NULL) @@ -330,7 +304,7 @@ out: dprintk("--> %s returns %d\n", __func__, status); return status; out_put: - pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); + nfs4_fl_put_deviceid(dsaddr); goto out; } @@ -439,12 +413,10 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, static void filelayout_free_lseg(struct pnfs_layout_segment *lseg) { - struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode); struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); dprintk("--> %s\n", __func__); - pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, - &fl->dsaddr->deviceid); + nfs4_fl_put_deviceid(fl->dsaddr); _filelayout_free_lseg(fl); } @@ -474,13 +446,11 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, } static struct pnfs_layoutdriver_type filelayout_type = { - .id = LAYOUT_NFSV4_1_FILES, - .name = "LAYOUT_NFSV4_1_FILES", - .owner = THIS_MODULE, - .set_layoutdriver = filelayout_set_layoutdriver, - .clear_layoutdriver = filelayout_clear_layoutdriver, - .alloc_lseg = filelayout_alloc_lseg, - .free_lseg = filelayout_free_lseg, + .id = LAYOUT_NFSV4_1_FILES, + .name = "LAYOUT_NFSV4_1_FILES", + .owner = THIS_MODULE, + .alloc_lseg = filelayout_alloc_lseg, + .free_lseg = filelayout_free_lseg, .pg_test = filelayout_pg_test, .read_pagelist = filelayout_read_pagelist, }; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 9fef76e04936..23f1e1e2a0f5 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -56,7 +56,9 @@ struct nfs4_pnfs_ds { }; struct nfs4_file_layout_dsaddr { - struct pnfs_deviceid_node deviceid; + struct hlist_node node; + struct nfs4_deviceid deviceid; + atomic_t ref; u32 stripe_count; u8 *stripe_indices; u32 ds_num; @@ -86,7 +88,6 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); -extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); extern void print_ds(struct nfs4_pnfs_ds *ds); extern void print_deviceid(struct nfs4_deviceid *dev_id); u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); @@ -94,7 +95,8 @@ u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx); extern struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); +nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); +extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index f466fed2f466..f594ca35a996 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -36,6 +36,30 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD +/* + * Device ID RCU cache. A device ID is unique per client ID and layout type. + */ +#define NFS4_FL_DEVICE_ID_HASH_BITS 5 +#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS) +#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1) + +static inline u32 +nfs4_fl_deviceid_hash(struct nfs4_deviceid *id) +{ + unsigned char *cptr = (unsigned char *)id->data; + unsigned int nbytes = NFS4_DEVICEID4_SIZE; + u32 x = 0; + + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x & NFS4_FL_DEVICE_ID_HASH_MASK; +} + +static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE]; +static DEFINE_SPINLOCK(filelayout_deviceid_lock); + /* * Data server cache * @@ -183,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) struct nfs4_pnfs_ds *ds; int i; - print_deviceid(&dsaddr->deviceid.de_id); + print_deviceid(&dsaddr->deviceid); for (i = 0; i < dsaddr->ds_num; i++) { ds = dsaddr->ds_list[i]; @@ -200,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) kfree(dsaddr); } -void -nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device) -{ - struct nfs4_file_layout_dsaddr *dsaddr = - container_of(device, struct nfs4_file_layout_dsaddr, deviceid); - - nfs4_fl_free_deviceid(dsaddr); -} - static struct nfs4_pnfs_ds * nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) { @@ -361,7 +376,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) dsaddr->stripe_count = cnt; dsaddr->ds_num = num; - memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); + memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); /* Go back an read stripe indices */ p = indicesp; @@ -411,28 +426,37 @@ out_err: } /* - * Decode the opaque device specified in 'dev' - * and add it to the list of available devices. - * If the deviceid is already cached, nfs4_add_deviceid will return - * a pointer to the cached struct and throw away the new. + * Decode the opaque device specified in 'dev' and add it to the cache of + * available devices. */ -static struct nfs4_file_layout_dsaddr* +static struct nfs4_file_layout_dsaddr * decode_and_add_device(struct inode *inode, struct pnfs_device *dev) { - struct nfs4_file_layout_dsaddr *dsaddr; - struct pnfs_deviceid_node *d; + struct nfs4_file_layout_dsaddr *d, *new; + long hash; - dsaddr = decode_device(inode, dev); - if (!dsaddr) { + new = decode_device(inode, dev); + if (!new) { printk(KERN_WARNING "%s: Could not decode or add device\n", __func__); return NULL; } - d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, - &dsaddr->deviceid); + spin_lock(&filelayout_deviceid_lock); + d = nfs4_fl_find_get_deviceid(&new->deviceid); + if (d) { + spin_unlock(&filelayout_deviceid_lock); + nfs4_fl_free_deviceid(new); + return d; + } + + INIT_HLIST_NODE(&new->node); + atomic_set(&new->ref, 1); + hash = nfs4_fl_deviceid_hash(&new->deviceid); + hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]); + spin_unlock(&filelayout_deviceid_lock); - return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); + return new; } /* @@ -507,14 +531,38 @@ out_free: return dsaddr; } -struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) +void +nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { - struct pnfs_deviceid_node *d; + if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) { + hlist_del_rcu(&dsaddr->node); + spin_unlock(&filelayout_deviceid_lock); + + synchronize_rcu(); + nfs4_fl_free_deviceid(dsaddr); + } +} - d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); - return (d == NULL) ? NULL : - container_of(d, struct nfs4_file_layout_dsaddr, deviceid); +struct nfs4_file_layout_dsaddr * +nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id) +{ + struct nfs4_file_layout_dsaddr *d; + struct hlist_node *n; + long hash = nfs4_fl_deviceid_hash(id); + + + rcu_read_lock(); + hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) { + if (!memcmp(&d->deviceid, id, sizeof(*id))) { + if (!atomic_inc_not_zero(&d->ref)) + goto fail; + rcu_read_unlock(); + return d; + } + } +fail: + rcu_read_unlock(); + return NULL; } /* diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 86c154bad5db..1f4c153441a1 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -75,10 +75,8 @@ find_pnfs_driver(u32 id) void unset_pnfs_layoutdriver(struct nfs_server *nfss) { - if (nfss->pnfs_curr_ld) { - nfss->pnfs_curr_ld->clear_layoutdriver(nfss); + if (nfss->pnfs_curr_ld) module_put(nfss->pnfs_curr_ld->owner); - } nfss->pnfs_curr_ld = NULL; } @@ -116,13 +114,7 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id) goto out_no_driver; } server->pnfs_curr_ld = ld_type; - if (ld_type->set_layoutdriver(server)) { - printk(KERN_ERR - "%s: Error initializing mount point for layout driver %u.\n", - __func__, id); - module_put(ld_type->owner); - goto out_no_driver; - } + dprintk("%s: pNFS module for %u set\n", __func__, id); return; @@ -906,138 +898,3 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); return trypnfs; } - -/* - * Device ID cache. Currently supports one layout type per struct nfs_client. - * Add layout type to the lookup key to expand to support multiple types. - */ -int -pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, - void (*free_callback)(struct pnfs_deviceid_node *)) -{ - struct pnfs_deviceid_cache *c; - - c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); - if (!c) - return -ENOMEM; - spin_lock(&clp->cl_lock); - if (clp->cl_devid_cache != NULL) { - atomic_inc(&clp->cl_devid_cache->dc_ref); - dprintk("%s [kref [%d]]\n", __func__, - atomic_read(&clp->cl_devid_cache->dc_ref)); - kfree(c); - } else { - /* kzalloc initializes hlists */ - spin_lock_init(&c->dc_lock); - atomic_set(&c->dc_ref, 1); - c->dc_free_callback = free_callback; - clp->cl_devid_cache = c; - dprintk("%s [new]\n", __func__); - } - spin_unlock(&clp->cl_lock); - return 0; -} -EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); - -/* - * Called from pnfs_layoutdriver_type->free_lseg - * last layout segment reference frees deviceid - */ -void -pnfs_put_deviceid(struct pnfs_deviceid_cache *c, - struct pnfs_deviceid_node *devid) -{ - struct nfs4_deviceid *id = &devid->de_id; - struct pnfs_deviceid_node *d; - struct hlist_node *n; - long h = nfs4_deviceid_hash(id); - - dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); - if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) - return; - - hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) - if (!memcmp(&d->de_id, id, sizeof(*id))) { - hlist_del_rcu(&d->de_node); - spin_unlock(&c->dc_lock); - synchronize_rcu(); - c->dc_free_callback(devid); - return; - } - spin_unlock(&c->dc_lock); - /* Why wasn't it found in the list? */ - BUG(); -} -EXPORT_SYMBOL_GPL(pnfs_put_deviceid); - -/* Find and reference a deviceid */ -struct pnfs_deviceid_node * -pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) -{ - struct pnfs_deviceid_node *d; - struct hlist_node *n; - long hash = nfs4_deviceid_hash(id); - - dprintk("--> %s hash %ld\n", __func__, hash); - rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { - if (!memcmp(&d->de_id, id, sizeof(*id))) { - if (!atomic_inc_not_zero(&d->de_ref)) { - goto fail; - } else { - rcu_read_unlock(); - return d; - } - } - } -fail: - rcu_read_unlock(); - return NULL; -} -EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); - -/* - * Add a deviceid to the cache. - * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new - */ -struct pnfs_deviceid_node * -pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) -{ - struct pnfs_deviceid_node *d; - long hash = nfs4_deviceid_hash(&new->de_id); - - dprintk("--> %s hash %ld\n", __func__, hash); - spin_lock(&c->dc_lock); - d = pnfs_find_get_deviceid(c, &new->de_id); - if (d) { - spin_unlock(&c->dc_lock); - dprintk("%s [discard]\n", __func__); - c->dc_free_callback(new); - return d; - } - INIT_HLIST_NODE(&new->de_node); - atomic_set(&new->de_ref, 1); - hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); - spin_unlock(&c->dc_lock); - dprintk("%s [new]\n", __func__); - return new; -} -EXPORT_SYMBOL_GPL(pnfs_add_deviceid); - -void -pnfs_put_deviceid_cache(struct nfs_client *clp) -{ - struct pnfs_deviceid_cache *local = clp->cl_devid_cache; - - dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); - if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { - int i; - /* Verify cache is empty */ - for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) - BUG_ON(!hlist_empty(&local->dc_deviceids[i])); - clp->cl_devid_cache = NULL; - spin_unlock(&clp->cl_lock); - kfree(local); - } -} -EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 585023fabb55..acbb77802075 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -68,8 +68,6 @@ struct pnfs_layoutdriver_type { const u32 id; const char *name; struct module *owner; - int (*set_layoutdriver) (struct nfs_server *); - int (*clear_layoutdriver) (struct nfs_server *); struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); void (*free_lseg) (struct pnfs_layout_segment *lseg); @@ -106,52 +104,6 @@ struct pnfs_device { unsigned int pglen; }; -/* - * Device ID RCU cache. A device ID is unique per client ID and layout type. - */ -#define NFS4_DEVICE_ID_HASH_BITS 5 -#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) -#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) - -static inline u32 -nfs4_deviceid_hash(struct nfs4_deviceid *id) -{ - unsigned char *cptr = (unsigned char *)id->data; - unsigned int nbytes = NFS4_DEVICEID4_SIZE; - u32 x = 0; - - while (nbytes--) { - x *= 37; - x += *cptr++; - } - return x & NFS4_DEVICE_ID_HASH_MASK; -} - -struct pnfs_deviceid_node { - struct hlist_node de_node; - struct nfs4_deviceid de_id; - atomic_t de_ref; -}; - -struct pnfs_deviceid_cache { - spinlock_t dc_lock; - atomic_t dc_ref; - void (*dc_free_callback)(struct pnfs_deviceid_node *); - struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE]; -}; - -extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *, - void (*free_callback)(struct pnfs_deviceid_node *)); -extern void pnfs_put_deviceid_cache(struct nfs_client *); -extern struct pnfs_deviceid_node *pnfs_find_get_deviceid( - struct pnfs_deviceid_cache *, - struct nfs4_deviceid *); -extern struct pnfs_deviceid_node *pnfs_add_deviceid( - struct pnfs_deviceid_cache *, - struct pnfs_deviceid_node *); -extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, - struct pnfs_deviceid_node *devid); - extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c00d4ec47ec3..0cbf109a4056 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -77,7 +77,6 @@ struct nfs_client { u32 cl_exchange_flags; struct nfs4_session *cl_session; /* sharred session */ struct list_head cl_layouts; - struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */ #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE -- cgit v1.2.3 From 568e8c494ded95a28c5dd8b79b4d3ffb95b6d845 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 1 Mar 2011 01:34:22 +0000 Subject: NFSv4.1: turn off pNFS on ds connection failure If a data server is unavailable, go through MDS. Mark the deviceid containing the data server as a negative cache entry. Do not try to connect to any data server on a deviceid marked as a negative cache entry. Mark any layout that tries to use the marked deviceid as failed. Inodes with a layout marked as fails will not use the layout for I/O, and will not perform any more layoutgets. Inodes without a layout will still do layoutget, but the layout will get marked immediately. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 4 +++- fs/nfs/nfs4filelayout.h | 4 ++++ fs/nfs/nfs4filelayoutdev.c | 28 ++++++++++++++++++++++++---- fs/nfs/pnfs.c | 9 +++++---- 4 files changed, 36 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index a922e75af42e..0040a5ee6208 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -214,7 +214,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) idx = nfs4_fl_calc_ds_index(lseg, j); ds = nfs4_fl_prepare_ds(lseg, idx); if (!ds) { - printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + /* Either layout fh index faulty, or ds connect failed */ + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } dprintk("%s USE DS:ip %x %hu\n", __func__, diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 23f1e1e2a0f5..ee0c907742b5 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -55,10 +55,14 @@ struct nfs4_pnfs_ds { atomic_t ds_count; }; +/* nfs4_file_layout_dsaddr flags */ +#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 + struct nfs4_file_layout_dsaddr { struct hlist_node node; struct nfs4_deviceid deviceid; atomic_t ref; + unsigned long flags; u32 stripe_count; u8 *stripe_indices; u32 ds_num; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index f594ca35a996..68143c162e3b 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -606,6 +606,21 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) return flseg->fh_array[i]; } +static void +filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, + int err, u32 ds_addr) +{ + u32 *p = (u32 *)&dsaddr->deviceid; + + printk(KERN_ERR "NFS: data server %x connection error %d." + " Deviceid [%x%x%x%x] marked out of use.\n", + ds_addr, err, p[0], p[1], p[2], p[3]); + + spin_lock(&filelayout_deviceid_lock); + dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; + spin_unlock(&filelayout_deviceid_lock); +} + struct nfs4_pnfs_ds * nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) { @@ -619,13 +634,18 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) } if (!ds->ds_clp) { + struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); int err; - err = nfs4_ds_connect(NFS_SERVER(lseg->pls_layout->plh_inode), - dsaddr->ds_list[ds_idx]); + if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) { + /* Already tried to connect, don't try again */ + dprintk("%s Deviceid marked out of use\n", __func__); + return NULL; + } + err = nfs4_ds_connect(s, ds); if (err) { - printk(KERN_ERR "%s nfs4_ds_connect error %d\n", - __func__, err); + filelayout_mark_devid_negative(dsaddr, err, + ntohl(ds->ds_ip_addr)); return NULL; } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 1f4c153441a1..3e545144a0b2 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -739,15 +739,16 @@ pnfs_update_layout(struct inode *ino, dprintk("%s matches recall, use MDS\n", __func__); goto out_unlock; } - /* Check to see if the layout for the given range already exists */ - lseg = pnfs_find_lseg(lo, iomode); - if (lseg) - goto out_unlock; /* if LAYOUTGET already failed once we don't try again */ if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) goto out_unlock; + /* Check to see if the layout for the given range already exists */ + lseg = pnfs_find_lseg(lo, iomode); + if (lseg) + goto out_unlock; + if (pnfs_layoutgets_blocked(lo, NULL, 0)) goto out_unlock; atomic_inc(&lo->plh_outstanding); -- cgit v1.2.3 From d138d5d17be6a60d883e8bd4e22bc218d3adfab3 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 3 Mar 2011 15:13:41 +0000 Subject: NFSv4.1: rearrange nfs_write_rpcsetup Reorder nfs_write_rpcsetup, preparing for a pnfs entry point. Signed-off-by: Andy Adamson Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 82 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 36 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index f033fa0d7d33..ae035990941a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -782,25 +782,21 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -/* - * Set up the argument/result storage required for the RPC call. - */ -static int nfs_write_rpcsetup(struct nfs_page *req, - struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset, - int how) +static int nfs_initiate_write(struct nfs_write_data *data, + struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops, + int how) { - struct inode *inode = req->wb_context->path.dentry->d_inode; + struct inode *inode = data->inode; int priority = flush_task_priority(how); struct rpc_task *task; struct rpc_message msg = { .rpc_argp = &data->args, .rpc_resp = &data->res, - .rpc_cred = req->wb_context->cred, + .rpc_cred = data->cred, }; struct rpc_task_setup task_setup_data = { - .rpc_client = NFS_CLIENT(inode), + .rpc_client = clnt, .task = &data->task, .rpc_message = &msg, .callback_ops = call_ops, @@ -811,12 +807,49 @@ static int nfs_write_rpcsetup(struct nfs_page *req, }; int ret = 0; + /* Set up the initial task struct. */ + NFS_PROTO(inode)->write_setup(data, &msg); + + dprintk("NFS: %5u initiated write call " + "(req %s/%lld, %u bytes @ offset %llu)\n", + data->task.tk_pid, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + data->args.count, + (unsigned long long)data->args.offset); + + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) { + ret = PTR_ERR(task); + goto out; + } + if (how & FLUSH_SYNC) { + ret = rpc_wait_for_completion_task(task); + if (ret == 0) + ret = task->tk_status; + } + rpc_put_task(task); +out: + return ret; +} + +/* + * Set up the argument/result storage required for the RPC call. + */ +static int nfs_write_rpcsetup(struct nfs_page *req, + struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, + unsigned int count, unsigned int offset, + int how) +{ + struct inode *inode = req->wb_context->path.dentry->d_inode; + /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ data->req = req; data->inode = inode = req->wb_context->path.dentry->d_inode; - data->cred = msg.rpc_cred; + data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -837,30 +870,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); - /* Set up the initial task struct. */ - NFS_PROTO(inode)->write_setup(data, &msg); - - dprintk("NFS: %5u initiated write call " - "(req %s/%lld, %u bytes @ offset %llu)\n", - data->task.tk_pid, - inode->i_sb->s_id, - (long long)NFS_FILEID(inode), - count, - (unsigned long long)data->args.offset); - - task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - ret = PTR_ERR(task); - goto out; - } - if (how & FLUSH_SYNC) { - ret = rpc_wait_for_completion_task(task); - if (ret == 0) - ret = task->tk_status; - } - rpc_put_task(task); -out: - return ret; + return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } /* If a nfs_flush_* function fails, it should remove reqs from @head and -- cgit v1.2.3 From b029bc9b0880cbaf999f580c0ea8f06dd274fc77 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:42 +0000 Subject: NFSv4.1: add callback to nfs4_write_done Add callback that pnfs layout driver can use to do its own handling of data server WRITE response. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 14 ++++++++++---- include/linux/nfs_xdr.h | 1 + 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1dc809039448..15248549c89f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3123,13 +3123,10 @@ void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data) } EXPORT_SYMBOL_GPL(nfs4_reset_read); -static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) +static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; - if (!nfs4_sequence_done(task, &data->res.seq_res)) - return -EAGAIN; - if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; @@ -3141,11 +3138,20 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) return 0; } +static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) +{ + if (!nfs4_sequence_done(task, &data->res.seq_res)) + return -EAGAIN; + return data->write_done_cb(task, data); +} + static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->inode); data->args.bitmask = server->cache_consistency_bitmask; + if (!data->write_done_cb) + data->write_done_cb = nfs4_write_done_cb; data->res.server = server; data->timestamp = jiffies; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index eb0e87084353..21cd41ddbca6 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1038,6 +1038,7 @@ struct nfs_write_data { unsigned int npages; /* Max length of pagevec */ struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ + int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif -- cgit v1.2.3 From 5053aa568d4017aeb1fa35247d4ad96be262920f Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:43 +0000 Subject: NFSv4.1: Send lseg down into nfs_write_rpcsetup We grab the lseg sent in from the doio function and attach it to each struct nfs_write_data created. This is how the lseg will be sent to the layout driver. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 7 +++++-- include/linux/nfs_xdr.h | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ae035990941a..72b0ec0bb0e1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -97,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p) static void nfs_writedata_release(struct nfs_write_data *wdata) { + put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } @@ -840,6 +841,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, + struct pnfs_layout_segment *lseg, int how) { struct inode *inode = req->wb_context->path.dentry->d_inode; @@ -850,6 +852,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->req = req; data->inode = inode = req->wb_context->path.dentry->d_inode; data->cred = req->wb_context->cred; + data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -930,7 +933,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned if (nbytes < wsize) wsize = nbytes; ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - wsize, offset, how); + wsize, offset, lseg, how); if (ret == 0) ret = ret2; offset += wsize; @@ -978,7 +981,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i req = nfs_list_entry(data->pages.next); /* Set up the argument struct */ - return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); + return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how); out_bad: while (!list_empty(head)) { req = nfs_list_entry(head->next); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 21cd41ddbca6..09d96812d6d0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1038,6 +1038,7 @@ struct nfs_write_data { unsigned int npages; /* Max length of pagevec */ struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ + struct pnfs_layout_segment *lseg; int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ -- cgit v1.2.3 From 44b83799a922a153957c65ccfc985a8c902958c8 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:44 +0000 Subject: NFSv4.1: trigger LAYOUTGET for writes Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.c | 22 ++++++++++++++++++++++ fs/nfs/pnfs.h | 7 +++++++ fs/nfs/write.c | 32 ++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 3e545144a0b2..5f205d31d96c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -873,6 +873,28 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; } +static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_count == prev->wb_bytes) { + /* This is first coelesce call for a series of nfs_pages */ + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + prev->wb_context, + IOMODE_RW); + } + return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); +} + +void +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld; + + ld = NFS_SERVER(inode)->pnfs_curr_ld; + pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; +} + /* * Call the appropriate parallel I/O subsystem read function. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index acbb77802075..1d4e6317fa95 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -123,6 +123,7 @@ void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); +void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); @@ -235,6 +236,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino) pgio->pg_test = NULL; } +static inline void +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) +{ + pgio->pg_test = NULL; +} + #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 72b0ec0bb0e1..49c4784c24e5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -919,6 +919,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned } while (nbytes != 0); atomic_set(&req->wb_complete, requests); + BUG_ON(lseg); + lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW); ClearPageError(page); offset = 0; nbytes = count; @@ -940,6 +942,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned nbytes -= wsize; } while (nbytes != 0); + put_lseg(lseg); return ret; out_bad: @@ -965,11 +968,18 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i struct nfs_page *req; struct page **pages; struct nfs_write_data *data; + int ret; data = nfs_writedata_alloc(npages); - if (!data) - goto out_bad; - + if (!data) { + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_redirty_request(req); + } + ret = -ENOMEM; + goto out; + } pages = data->pagevec; while (!list_empty(head)) { req = nfs_list_entry(head->next); @@ -979,16 +989,14 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); + if ((!lseg) && list_is_singular(&data->pages)) + lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW); /* Set up the argument struct */ - return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how); - out_bad: - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_redirty_request(req); - } - return -ENOMEM; + ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how); +out: + put_lseg(lseg); /* Cleans any gotten in ->pg_test */ + return ret; } static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, @@ -996,7 +1004,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, { size_t wsize = NFS_SERVER(inode)->wsize; - pgio->pg_test = NULL; + pnfs_pageio_init_write(pgio, inode); if (wsize < PAGE_CACHE_SIZE) nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); -- cgit v1.2.3 From 0382b74409c6b9ef12c952b50bb44f557a361a43 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Thu, 3 Mar 2011 15:13:45 +0000 Subject: NFSv4.1: implement generic pnfs layer write switch Signed-off-by: Andy Adamson Signed-off-by: Boaz Harrosh Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: J. Bruce Fields Signed-off-by: Mike Sager Signed-off-by: Ricardo Labiaga Signed-off-by: Tao Guo Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 7 +++++++ fs/nfs/pnfs.c | 24 ++++++++++++++++++++++++ fs/nfs/pnfs.h | 10 ++++++++++ fs/nfs/write.c | 4 ++++ include/linux/nfs_iostat.h | 1 + include/linux/nfs_xdr.h | 1 + 6 files changed, 47 insertions(+) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 0040a5ee6208..9d21bfeec88f 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -238,6 +238,12 @@ filelayout_read_pagelist(struct nfs_read_data *data) return PNFS_ATTEMPTED; } +static enum pnfs_try_status +filelayout_write_pagelist(struct nfs_write_data *data, int sync) +{ + return PNFS_NOT_ATTEMPTED; +} + /* * filelayout_check_layout() * @@ -455,6 +461,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .free_lseg = filelayout_free_lseg, .pg_test = filelayout_pg_test, .read_pagelist = filelayout_read_pagelist, + .write_pagelist = filelayout_write_pagelist, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5f205d31d96c..f38813a0a295 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -895,6 +895,30 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; } +enum pnfs_try_status +pnfs_try_to_write_data(struct nfs_write_data *wdata, + const struct rpc_call_ops *call_ops, int how) +{ + struct inode *inode = wdata->inode; + enum pnfs_try_status trypnfs; + struct nfs_server *nfss = NFS_SERVER(inode); + + wdata->mds_ops = call_ops; + + dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, + inode->i_ino, wdata->args.count, wdata->args.offset, how); + + trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) { + put_lseg(wdata->lseg); + wdata->lseg = NULL; + } else + nfs_inc_stats(inode, NFSIOS_PNFS_WRITE); + + dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); + return trypnfs; +} + /* * Call the appropriate parallel I/O subsystem read function. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1d4e6317fa95..6380b9405bcd 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -79,6 +79,7 @@ struct pnfs_layoutdriver_type { * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS */ enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); + enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); }; struct pnfs_layout_hdr { @@ -120,6 +121,8 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, enum pnfs_iomode access_type); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); +enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, + const struct rpc_call_ops *, int); enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); @@ -200,6 +203,13 @@ pnfs_try_to_read_data(struct nfs_read_data *data, return PNFS_NOT_ATTEMPTED; } +static inline enum pnfs_try_status +pnfs_try_to_write_data(struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, int how) +{ + return PNFS_NOT_ATTEMPTED; +} + static inline bool pnfs_roc(struct inode *ino) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 49c4784c24e5..df99c5b0ee65 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -873,6 +873,10 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); + if (data->lseg && + (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) + return 0; + return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } diff --git a/include/linux/nfs_iostat.h b/include/linux/nfs_iostat.h index 37a143732d02..8866bb3502ee 100644 --- a/include/linux/nfs_iostat.h +++ b/include/linux/nfs_iostat.h @@ -114,6 +114,7 @@ enum nfs_stat_eventcounters { NFSIOS_SHORTWRITE, NFSIOS_DELAY, NFSIOS_PNFS_READ, + NFSIOS_PNFS_WRITE, __NFSIOS_COUNTSMAX, }; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 09d96812d6d0..c82ad33cffe1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1039,6 +1039,7 @@ struct nfs_write_data { struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ struct pnfs_layout_segment *lseg; + const struct rpc_call_ops *mds_ops; int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ -- cgit v1.2.3 From 7ffd10640dc008f6d5a375bd6450755745c63c7d Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:46 +0000 Subject: NFSv4.1: remove GETATTR from ds writes Any WRITE compound directed to a data server needs to have the GETATTR calls suppressed. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++++- fs/nfs/nfs4xdr.c | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15248549c89f..da902123ec53 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3149,7 +3149,11 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag { struct nfs_server *server = NFS_SERVER(data->inode); - data->args.bitmask = server->cache_consistency_bitmask; + if (data->lseg) { + data->args.bitmask = NULL; + data->res.fattr = NULL; + } else + data->args.bitmask = server->cache_consistency_bitmask; if (!data->write_done_cb) data->write_done_cb = nfs4_write_done_cb; data->res.server = server; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index a656b6e179b0..0f2dcfb41f29 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2275,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_write(xdr, args, &hdr); req->rq_snd_buf.flags |= XDRBUF_WRITE; - encode_getfattr(xdr, args->bitmask, &hdr); + if (args->bitmask) + encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); } @@ -5694,8 +5695,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr, status = decode_write(xdr, res); if (status) goto out; - decode_getfattr(xdr, res->fattr, res->server, - !RPC_IS_ASYNC(rqstp->rq_task)); + if (res->fattr) + decode_getfattr(xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); if (!status) status = res->count; out: -- cgit v1.2.3 From a69aef1496726ed88386dad65abfcc8cd3195304 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:47 +0000 Subject: NFSv4.1: pnfs filelayout driver write Allows the pnfs filelayout driver to write to the data servers. Note that COMMIT to data servers will be implemented in a future patch. To avoid improper behavior, for the moment any WRITE to a data server that would also require a COMMIT to the data server is sent NFS_FILE_SYNC. Signed-off-by: Andy Adamson Signed-off-by: Dean Hildebrand Signed-off-by: Fred Isaman Signed-off-by: Mingyang Guo Signed-off-by: Oleg Drokin Signed-off-by: Ricardo Labiaga Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 5 +++ fs/nfs/nfs4filelayout.c | 101 +++++++++++++++++++++++++++++++++++++++++++++++- fs/nfs/nfs4proc.c | 17 ++++++++ fs/nfs/write.c | 5 ++- include/linux/nfs_xdr.h | 2 + 5 files changed, 128 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1a3228e9ea22..d1ddc23c404d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -276,6 +276,10 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ +extern int nfs_initiate_write(struct nfs_write_data *data, + struct rpc_clnt *clnt, + const struct rpc_call_ops *call_ops, + int how); extern void nfs_write_prepare(struct rpc_task *task, void *calldata); #ifdef CONFIG_MIGRATION extern int nfs_migrate_page(struct address_space *, @@ -291,6 +295,7 @@ extern int nfs4_init_client(struct nfs_client *clp, const char *ip_addr, rpc_authflavor_t authflavour, int noresvport); +extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); extern int _nfs4_call_sync(struct nfs_server *server, struct rpc_message *msg, struct nfs4_sequence_args *args, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 9d21bfeec88f..7e1d4571b7b2 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -189,12 +189,69 @@ static void filelayout_read_release(void *data) rdata->mds_ops->rpc_release(data); } +static int filelayout_write_done_cb(struct rpc_task *task, + struct nfs_write_data *data) +{ + int reset = 0; + + if (filelayout_async_handle_error(task, data->args.context->state, + data->ds_clp, &reset) == -EAGAIN) { + struct nfs_client *clp; + + dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", + __func__, data->ds_clp, data->ds_clp->cl_session); + if (reset) { + filelayout_set_lo_fail(data->lseg); + nfs4_reset_write(task, data); + clp = NFS_SERVER(data->inode)->nfs_client; + } else + clp = data->ds_clp; + nfs_restart_rpc(task, clp); + return -EAGAIN; + } + + return 0; +} + +static void filelayout_write_prepare(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + if (nfs41_setup_sequence(wdata->ds_clp->cl_session, + &wdata->args.seq_args, &wdata->res.seq_res, + 0, task)) + return; + + rpc_call_start(task); +} + +static void filelayout_write_call_done(struct rpc_task *task, void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + /* Note this may cause RPC to be resent */ + wdata->mds_ops->rpc_call_done(task, data); +} + +static void filelayout_write_release(void *data) +{ + struct nfs_write_data *wdata = (struct nfs_write_data *)data; + + wdata->mds_ops->rpc_release(data); +} + struct rpc_call_ops filelayout_read_call_ops = { .rpc_call_prepare = filelayout_read_prepare, .rpc_call_done = filelayout_read_call_done, .rpc_release = filelayout_read_release, }; +struct rpc_call_ops filelayout_write_call_ops = { + .rpc_call_prepare = filelayout_write_prepare, + .rpc_call_done = filelayout_write_call_done, + .rpc_release = filelayout_write_release, +}; + static enum pnfs_try_status filelayout_read_pagelist(struct nfs_read_data *data) { @@ -238,10 +295,52 @@ filelayout_read_pagelist(struct nfs_read_data *data) return PNFS_ATTEMPTED; } +/* Perform async writes. */ static enum pnfs_try_status filelayout_write_pagelist(struct nfs_write_data *data, int sync) { - return PNFS_NOT_ATTEMPTED; + struct pnfs_layout_segment *lseg = data->lseg; + struct nfs4_pnfs_ds *ds; + loff_t offset = data->args.offset; + u32 j, idx; + struct nfs_fh *fh; + int status; + + /* Retrieve the correct rpc_client for the byte range */ + j = nfs4_fl_calc_j_index(lseg, offset); + idx = nfs4_fl_calc_ds_index(lseg, j); + ds = nfs4_fl_prepare_ds(lseg, idx); + if (!ds) { + printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); + set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); + set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); + return PNFS_NOT_ATTEMPTED; + } + dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, + data->inode->i_ino, sync, (size_t) data->args.count, offset, + ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + + /* We can't handle commit to ds yet */ + if (!FILELAYOUT_LSEG(lseg)->commit_through_mds) + data->args.stable = NFS_FILE_SYNC; + + data->write_done_cb = filelayout_write_done_cb; + data->ds_clp = ds->ds_clp; + fh = nfs4_fl_select_ds_fh(lseg, j); + if (fh) + data->args.fh = fh; + /* + * Get the file offset on the dserver. Set the write offset to + * this offset and save the original offset. + */ + data->args.offset = filelayout_get_dserver_offset(lseg, offset); + data->mds_offset = offset; + + /* Perform an asynchronous write */ + status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, + &filelayout_write_call_ops, sync); + BUG_ON(status != 0); + return PNFS_ATTEMPTED; } /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index da902123ec53..7b4b9f3e9842 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3145,6 +3145,23 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) return data->write_done_cb(task, data); } +/* Reset the the nfs_write_data to send the write to the MDS. */ +void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data) +{ + dprintk("%s Reset task for i/o through\n", __func__); + put_lseg(data->lseg); + data->lseg = NULL; + data->ds_clp = NULL; + data->write_done_cb = nfs4_write_done_cb; + data->args.fh = NFS_FH(data->inode); + data->args.bitmask = data->res.server->cache_consistency_bitmask; + data->args.offset = data->mds_offset; + data->res.fattr = &data->fattr; + task->tk_ops = data->mds_ops; + rpc_task_reset_client(task, NFS_CLIENT(data->inode)); +} +EXPORT_SYMBOL_GPL(nfs4_reset_write); + static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) { struct nfs_server *server = NFS_SERVER(data->inode); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index df99c5b0ee65..ee62ddf60e7a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -783,7 +783,7 @@ static int flush_task_priority(int how) return RPC_PRIORITY_NORMAL; } -static int nfs_initiate_write(struct nfs_write_data *data, +int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops, int how) @@ -833,6 +833,7 @@ static int nfs_initiate_write(struct nfs_write_data *data, out: return ret; } +EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. @@ -1194,6 +1195,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ static unsigned long complain; + /* Note this will print the MDS for a DS write */ if (time_before(complain, jiffies)) { dprintk("NFS: faulty NFS server %s:" " (committed = %d) != (stable = %d)\n", @@ -1214,6 +1216,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Was this an NFSv2 write or an NFSv3 stable write? */ if (resp->verf->committed != NFS_UNSTABLE) { /* Resend from where the server left off */ + data->mds_offset += resp->count; argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index c82ad33cffe1..3440f5ab0f54 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1039,11 +1039,13 @@ struct nfs_write_data { struct nfs_writeargs args; /* argument struct */ struct nfs_writeres res; /* result struct */ struct pnfs_layout_segment *lseg; + struct nfs_client *ds_clp; /* pNFS data server */ const struct rpc_call_ops *mds_ops; int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data); #ifdef CONFIG_NFS_V4 unsigned long timestamp; /* For lease renewal */ #endif + __u64 mds_offset; /* Filelayout dense stripe */ struct page *page_array[NFS_PAGEVEC_SIZE]; }; -- cgit v1.2.3 From c76069bda0f17cd3e153e54d9ac01242909c6b15 Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:48 +0000 Subject: NFSv4.1: rearrange ->doio args This will make it possible to clear the lseg pointer in the same function as it is put, instead of in the caller nfs_pageio_doio(). Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 10 ++-------- fs/nfs/read.c | 42 +++++++++++++++++++++++++----------------- fs/nfs/write.c | 28 ++++++++++++++++------------ include/linux/nfs_page.h | 4 ++-- 4 files changed, 45 insertions(+), 39 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 45b0fb8add39..9f628746f5c8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -214,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req) */ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *), + int (*doio)(struct nfs_pageio_descriptor *), size_t bsize, int io_flags) { @@ -311,13 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { if (!list_empty(&desc->pg_list)) { - int error = desc->pg_doio(desc->pg_inode, - &desc->pg_list, - nfs_page_array_len(desc->pg_base, - desc->pg_count), - desc->pg_count, - desc->pg_ioflags, - desc->pg_lseg); + int error = desc->pg_doio(desc); desc->pg_lseg = NULL; if (error < 0) desc->pg_error = error; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f40c7f4dc16b..ab9c7768b7c6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -31,8 +31,8 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); -static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -117,9 +117,9 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { - LIST_HEAD(one_request); struct nfs_page *new; unsigned int len; + struct nfs_pageio_descriptor pgio; len = nfs_page_length(page); if (len == 0) @@ -132,11 +132,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_list_add_request(new, &one_request); + nfs_pageio_init(&pgio, inode, NULL, 0, 0); + nfs_list_add_request(new, &pgio.pg_list); + pgio.pg_count = len; + if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(inode, &one_request, 1, len, 0, NULL); + nfs_pagein_multi(&pgio); else - nfs_pagein_one(inode, &one_request, 1, len, 0, NULL); + nfs_pagein_one(&pgio); return 0; } @@ -258,20 +261,21 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) { - struct nfs_page *req = nfs_list_entry(head->next); + struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_read_data *data; - size_t rsize = NFS_SERVER(inode)->rsize, nbytes; + size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; + struct pnfs_layout_segment *lseg; LIST_HEAD(list); nfs_list_remove_request(req); - nbytes = count; + nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); @@ -284,11 +288,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - /* We know lseg==NULL */ - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ); + BUG_ON(desc->pg_lseg != NULL); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); ClearPageError(page); offset = 0; - nbytes = count; + nbytes = desc->pg_count; do { int ret2; @@ -321,14 +325,17 @@ out_bad: return -ENOMEM; } -static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags, struct pnfs_layout_segment *lseg) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) { struct nfs_page *req; struct page **pages; struct nfs_read_data *data; + struct list_head *head = &desc->pg_list; + struct pnfs_layout_segment *lseg = desc->pg_lseg; int ret = -ENOMEM; - data = nfs_readdata_alloc(npages); + data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, + desc->pg_count)); if (!data) { nfs_async_read_error(head); goto out; @@ -344,9 +351,10 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_READ); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); - ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0, lseg); + ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, + 0, lseg); out: put_lseg(lseg); return ret; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ee62ddf60e7a..b74200a2f753 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -898,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) { - struct nfs_page *req = nfs_list_entry(head->next); + struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_write_data *data; - size_t wsize = NFS_SERVER(inode)->wsize, nbytes; + size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; + struct pnfs_layout_segment *lseg; LIST_HEAD(list); nfs_list_remove_request(req); - nbytes = count; + nbytes = desc->pg_count; do { size_t len = min(nbytes, wsize); @@ -924,11 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - BUG_ON(lseg); - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW); + BUG_ON(desc->pg_lseg); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); ClearPageError(page); offset = 0; - nbytes = count; + nbytes = desc->pg_count; do { int ret2; @@ -940,7 +941,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned if (nbytes < wsize) wsize = nbytes; ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - wsize, offset, lseg, how); + wsize, offset, lseg, desc->pg_ioflags); if (ret == 0) ret = ret2; offset += wsize; @@ -968,14 +969,17 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how, struct pnfs_layout_segment *lseg) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; + struct list_head *head = &desc->pg_list; + struct pnfs_layout_segment *lseg = desc->pg_lseg; int ret; - data = nfs_writedata_alloc(npages); + data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, + desc->pg_count)); if (!data) { while (!list_empty(head)) { req = nfs_list_entry(head->next); @@ -995,10 +999,10 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(inode, req->wb_context, IOMODE_RW); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); /* Set up the argument struct */ - ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, lseg, how); + ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); out: put_lseg(lseg); /* Cleans any gotten in ->pg_test */ return ret; diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index ba88ff4f8186..90907ada6d52 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -59,7 +59,7 @@ struct nfs_pageio_descriptor { unsigned int pg_base; struct inode *pg_inode; - int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *); + int (*pg_doio)(struct nfs_pageio_descriptor *); int pg_ioflags; int pg_error; struct pnfs_layout_segment *pg_lseg; @@ -81,7 +81,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int, struct pnfs_layout_segment *), + int (*doio)(struct nfs_pageio_descriptor *desc), size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, -- cgit v1.2.3 From 36fe432d33e078caee5c954e15e929819c2cacae Mon Sep 17 00:00:00 2001 From: Fred Isaman Date: Thu, 3 Mar 2011 15:13:49 +0000 Subject: NFSv4.1: Clear lseg pointer in ->doio function Now that we have access to the pointer, clear it immediately after the put, instead of in caller. Signed-off-by: Fred Isaman Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 1 - fs/nfs/read.c | 2 ++ fs/nfs/write.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 9f628746f5c8..23e794410669 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -312,7 +312,6 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { if (!list_empty(&desc->pg_list)) { int error = desc->pg_doio(desc); - desc->pg_lseg = NULL; if (error < 0) desc->pg_error = error; else diff --git a/fs/nfs/read.c b/fs/nfs/read.c index ab9c7768b7c6..4b764c6048db 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -311,6 +311,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) nbytes -= rsize; } while (nbytes != 0); put_lseg(lseg); + desc->pg_lseg = NULL; return ret; @@ -357,6 +358,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) 0, lseg); out: put_lseg(lseg); + desc->pg_lseg = NULL; return ret; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b74200a2f753..47a3ad63e0d5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -949,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) } while (nbytes != 0); put_lseg(lseg); + desc->pg_lseg = NULL; return ret; out_bad: @@ -1005,6 +1006,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); out: put_lseg(lseg); /* Cleans any gotten in ->pg_test */ + desc->pg_lseg = NULL; return ret; } -- cgit v1.2.3 From 75247affd7930cc3dcf57f850f0d7898379ef3b3 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 22 Feb 2011 15:56:01 -0800 Subject: NFSv4.1: reject zero layout with zeroed stripe unit Allowing stripe_unit==0 causes the client to crash later on when dividing by zero. Reported-by: Marc Eshel Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 7e1d4571b7b2..428558464817 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -369,8 +369,8 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } - if (fl->stripe_unit % PAGE_SIZE) { - dprintk("%s Stripe unit (%u) not page aligned\n", + if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) { + dprintk("%s Invalid stripe unit (%u)\n", __func__, fl->stripe_unit); goto out; } -- cgit v1.2.3 From 5cf36cfdc8caa2724738ad0842c5c3dd02f309dc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Feb 2011 15:44:31 -0800 Subject: NFSv4: If the server sends us a numeric uid/gid then accept it Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 18696882f1c6..cbe6e2fa8cef 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -33,6 +33,24 @@ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include +#include +#include + +static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) +{ + unsigned long val; + char buf[16]; + + if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf)) + return 0; + memcpy(buf, name, namelen); + buf[namelen] = '\0'; + if (strict_strtoul(buf, 0, &val) != 0) + return 0; + *res = val; + return 1; +} #ifdef CONFIG_NFS_USE_NEW_IDMAPPER @@ -42,7 +60,6 @@ #include #include #include -#include #include #include @@ -221,11 +238,15 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) { + if (nfs_map_string_to_numeric(name, namelen, uid)) + return 0; return nfs_idmap_lookup_id(name, namelen, "uid", uid); } int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) { + if (nfs_map_string_to_numeric(name, namelen, gid)) + return 0; return nfs_idmap_lookup_id(name, namelen, "gid", gid); } @@ -243,7 +264,6 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu #include #include #include -#include #include #include #include @@ -699,6 +719,8 @@ int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen { struct idmap *idmap = clp->cl_idmap; + if (nfs_map_string_to_numeric(name, namelen, uid)) + return 0; return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); } @@ -706,6 +728,8 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele { struct idmap *idmap = clp->cl_idmap; + if (nfs_map_string_to_numeric(name, namelen, uid)) + return 0; return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -- cgit v1.2.3 From f0b851689a5da2354f19bcbbac30cd2cab45c4a1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Feb 2011 15:44:31 -0800 Subject: NFSv4: Send unmapped uid/gids to the server if the idmapper fails Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index cbe6e2fa8cef..8518573c3ffc 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -52,6 +52,11 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re return 1; } +static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) +{ + return snprintf(buf, buflen, "%u", id); +} + #ifdef CONFIG_NFS_USE_NEW_IDMAPPER #include @@ -252,11 +257,20 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) { - return nfs_idmap_lookup_name(uid, "user", buf, buflen); + int ret; + ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); + if (ret < 0) + ret = nfs_map_numeric_to_string(uid, buf, buflen); + return ret; } int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) { - return nfs_idmap_lookup_name(gid, "group", buf, buflen); + int ret; + + ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); + if (ret < 0) + ret = nfs_map_numeric_to_string(gid, buf, buflen); + return ret; } #else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ @@ -736,14 +750,22 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = clp->cl_idmap; + int ret; - return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); + ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); + if (ret < 0) + ret = nfs_map_numeric_to_string(uid, buf, buflen); + return ret; } int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = clp->cl_idmap; + int ret; - return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); + ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); + if (ret < 0) + ret = nfs_map_numeric_to_string(uid, buf, buflen); + return ret; } #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ -- cgit v1.2.3 From e4fd72a17d2703cfd626c55893ac4ca7e7d81ce9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Feb 2011 15:44:31 -0800 Subject: NFSv4: cleanup idmapper functions to take an nfs_server argument ...instead of the nfs_client. Signed-off-by: Trond Myklebust --- fs/nfs/idmap.c | 24 ++++++++++++------------ fs/nfs/nfs4xdr.c | 18 ++++++++---------- include/linux/nfs_idmap.h | 9 +++++---- 3 files changed, 25 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 8518573c3ffc..e2d579d458f1 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -241,21 +241,21 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, return ret; } -int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) { if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; return nfs_idmap_lookup_id(name, namelen, "uid", uid); } -int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid) { if (nfs_map_string_to_numeric(name, namelen, gid)) return 0; return nfs_idmap_lookup_id(name, namelen, "gid", gid); } -int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) +int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) { int ret; ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); @@ -263,7 +263,7 @@ int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buf ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; } -int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) +int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) { int ret; @@ -729,27 +729,27 @@ static unsigned int fnvhash32(const void *buf, size_t buflen) return hash; } -int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) { - struct idmap *idmap = clp->cl_idmap; + struct idmap *idmap = server->nfs_client->cl_idmap; if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); } -int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) +int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) { - struct idmap *idmap = clp->cl_idmap; + struct idmap *idmap = server->nfs_client->cl_idmap; if (nfs_map_string_to_numeric(name, namelen, uid)) return 0; return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); } -int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) +int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) { - struct idmap *idmap = clp->cl_idmap; + struct idmap *idmap = server->nfs_client->cl_idmap; int ret; ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); @@ -757,9 +757,9 @@ int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buf ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; } -int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) +int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) { - struct idmap *idmap = clp->cl_idmap; + struct idmap *idmap = server->nfs_client->cl_idmap; int ret; ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0f2dcfb41f29..686c21d8c523 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -844,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); + owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); if (owner_namelen < 0) { dprintk("nfs: couldn't resolve uid %d to string\n", iap->ia_uid); @@ -856,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); + owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ); if (owner_grouplen < 0) { dprintk("nfs: couldn't resolve gid %d to string\n", iap->ia_gid); @@ -3387,7 +3387,7 @@ out_overflow: } static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, - struct nfs_client *clp, uint32_t *uid, int may_sleep) + const struct nfs_server *server, uint32_t *uid, int may_sleep) { uint32_t len; __be32 *p; @@ -3407,7 +3407,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, if (!may_sleep) { /* do nothing */ } else if (len < XDR_MAX_NETOBJ) { - if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) + if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0) ret = NFS_ATTR_FATTR_OWNER; else dprintk("%s: nfs_map_name_to_uid failed!\n", @@ -3425,7 +3425,7 @@ out_overflow: } static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, - struct nfs_client *clp, uint32_t *gid, int may_sleep) + const struct nfs_server *server, uint32_t *gid, int may_sleep) { uint32_t len; __be32 *p; @@ -3445,7 +3445,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, if (!may_sleep) { /* do nothing */ } else if (len < XDR_MAX_NETOBJ) { - if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) + if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0) ret = NFS_ATTR_FATTR_GROUP; else dprintk("%s: nfs_map_group_to_gid failed!\n", @@ -3944,14 +3944,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; - status = decode_attr_owner(xdr, bitmap, server->nfs_client, - &fattr->uid, may_sleep); + status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep); if (status < 0) goto xdr_error; fattr->valid |= status; - status = decode_attr_group(xdr, bitmap, server->nfs_client, - &fattr->gid, may_sleep); + status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep); if (status < 0) goto xdr_error; fattr->valid |= status; diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index e8352dc5afb5..ae7d6a380dae 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h @@ -65,6 +65,7 @@ struct idmap_msg { /* Forward declaration to make this header independent of others */ struct nfs_client; +struct nfs_server; #ifdef CONFIG_NFS_USE_NEW_IDMAPPER @@ -96,10 +97,10 @@ void nfs_idmap_delete(struct nfs_client *); #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ -int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *); -int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *); -int nfs_map_uid_to_name(struct nfs_client *, __u32, char *, size_t); -int nfs_map_gid_to_group(struct nfs_client *, __u32, char *, size_t); +int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(const struct nfs_server *, __u32, char *, size_t); +int nfs_map_gid_to_group(const struct nfs_server *, __u32, char *, size_t); extern unsigned int nfs_idmap_cache_timeout; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 3ddeb7c5c61d0d6bfd837487d3454ffdb788bb91 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Feb 2011 15:44:31 -0800 Subject: NFSv4: Propagate the error NFS4ERR_BADOWNER to nfs4_do_setattr This will be required in order to switch uid/gid mapping back on if the admin has tried to disable it. Note that we also propagate NFS4ERR_BADNAME at the same time, in order to work around a Linux server bug. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +++ fs/nfs/nfs4xdr.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7b4b9f3e9842..8f3ada04ea19 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -85,6 +85,9 @@ static int nfs4_map_errors(int err) switch (err) { case -NFS4ERR_RESOURCE: return -EREMOTEIO; + case -NFS4ERR_BADOWNER: + case -NFS4ERR_BADNAME: + return -EINVAL; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 686c21d8c523..0cf560f77884 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -6171,8 +6171,6 @@ static struct { { NFS4ERR_DQUOT, -EDQUOT }, { NFS4ERR_STALE, -ESTALE }, { NFS4ERR_BADHANDLE, -EBADHANDLE }, - { NFS4ERR_BADOWNER, -EINVAL }, - { NFS4ERR_BADNAME, -EINVAL }, { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, { NFS4ERR_NOTSUPP, -ENOTSUPP }, { NFS4ERR_TOOSMALL, -ETOOSMALL }, -- cgit v1.2.3 From b064eca2cf6440bf9d5843b24cc4010624031694 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Feb 2011 15:44:32 -0800 Subject: NFSv4: Send unmapped uid/gids to the server when using auth_sys The new behaviour is enabled using the new module parameter 'nfs4_disable_idmapping'. Note that if the server rejects an unmapped uid or gid, then the client will automatically switch back to using the idmapper. Signed-off-by: Trond Myklebust --- Documentation/kernel-parameters.txt | 8 ++++++++ fs/nfs/client.c | 16 ++++++++++++++++ fs/nfs/idmap.c | 24 ++++++++++++++++-------- fs/nfs/nfs4proc.c | 15 ++++++++++++++- include/linux/nfs_fs_sb.h | 1 + 5 files changed, 55 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index f4a04c0c7edc..14dcf1b7dd9c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1580,6 +1580,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. of returning the full 64-bit number. The default is to return 64-bit inode numbers. + nfs.nfs4_disable_idmapping= + [NFSv4] When set, this option disables the NFSv4 + idmapper on the client, but only if the mount + is using the 'sec=sys' security flavour. This may + make migration from legacy NFSv2/v3 systems easier + provided that the server has the appropriate support. + The default is to always enable NFSv4 idmapping. + nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take when a NMI is triggered. Format: [state][,regs][,debounce][,die] diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 6dd50ac5b545..139be9647d80 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -81,6 +81,11 @@ retry: } #endif /* CONFIG_NFS_V4 */ +/* + * Turn off NFSv4 uid/gid mapping when using AUTH_SYS + */ +static int nfs4_disable_idmapping = 0; + /* * RPC cruft for NFS */ @@ -1567,6 +1572,13 @@ static int nfs4_init_server(struct nfs_server *server, if (error < 0) goto error; + /* + * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower + * authentication. + */ + if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX) + server->caps |= NFS_CAP_UIDGID_NOMAP; + if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); if (data->wsize) @@ -1984,3 +1996,7 @@ void nfs_fs_proc_exit(void) } #endif /* CONFIG_PROC_FS */ + +module_param(nfs4_disable_idmapping, bool, 0644); +MODULE_PARM_DESC(nfs4_disable_idmapping, + "Turn off NFSv4 idmapping when using 'sec=sys'"); diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index e2d579d458f1..79664a1025af 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -61,6 +61,9 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) #include #include +#include +#include +#include #include #include #include @@ -257,17 +260,20 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) { - int ret; - ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); + int ret = -EINVAL; + + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_lookup_name(uid, "user", buf, buflen); if (ret < 0) ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; } int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen) { - int ret; + int ret = -EINVAL; - ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_lookup_name(gid, "group", buf, buflen); if (ret < 0) ret = nfs_map_numeric_to_string(gid, buf, buflen); return ret; @@ -750,9 +756,10 @@ int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = server->nfs_client->cl_idmap; - int ret; + int ret = -EINVAL; - ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); if (ret < 0) ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; @@ -760,9 +767,10 @@ int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, s int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen) { struct idmap *idmap = server->nfs_client->cl_idmap; - int ret; + int ret = -EINVAL; - ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); + if (!(server->caps & NFS_CAP_UIDGID_NOMAP)) + ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); if (ret < 0) ret = nfs_map_numeric_to_string(uid, buf, buflen); return ret; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8f3ada04ea19..1d84e7088af9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -244,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout) /* This is the error handling routine for processes that are allowed * to sleep. */ -static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) +static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception) { struct nfs_client *clp = server->nfs_client; struct nfs4_state *state = exception->state; @@ -296,6 +296,19 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, break; case -NFS4ERR_OLD_STATEID: exception->retry = 1; + break; + case -NFS4ERR_BADOWNER: + /* The following works around a Linux server bug! */ + case -NFS4ERR_BADNAME: + if (server->caps & NFS_CAP_UIDGID_NOMAP) { + server->caps &= ~NFS_CAP_UIDGID_NOMAP; + exception->retry = 1; + printk(KERN_WARNING "NFS: v4 server %s " + "does not accept raw " + "uid/gids. " + "Reenabling the idmapper.\n", + server->nfs_client->cl_hostname); + } } /* We failed to handle the error */ return nfs4_map_errors(ret); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 0cbf109a4056..216cea5db0aa 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -177,6 +177,7 @@ struct nfs_server { #define NFS_CAP_CTIME (1U << 12) #define NFS_CAP_MTIME (1U << 13) #define NFS_CAP_POSIX_LOCK (1U << 14) +#define NFS_CAP_UIDGID_NOMAP (1U << 15) /* maximum number of slots to use */ -- cgit v1.2.3 From 7ec10f26e1fd5fcceb9c96e508c1292a816199f7 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 22 Feb 2011 00:28:34 +0300 Subject: NFS: account direct-io into task io accounting Account NFS direct-io reads and writes into Task I/O Accounting. Do it before complition to handle aio. NFS have unusual direct-io implementation, thus accounting in generic code does not work. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f493bdd74f78..8eea25366717 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -937,6 +938,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, if (retval) goto out; + task_io_account_read(count); + retval = nfs_direct_read(iocb, iov, nr_segs, pos); if (retval > 0) iocb->ki_pos = pos + retval; @@ -998,6 +1001,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, if (retval) goto out; + task_io_account_write(count); + retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); if (retval > 0) -- cgit v1.2.3 From c12bacec458bef16d843c052f38422862f3da8fe Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Wed, 9 Mar 2011 15:54:13 -0600 Subject: cleanup: save 60 lines/100 bytes by combining two mostly duplicate functions. Eliminate two mostly duplicate functions (nfs_parse_simple_hostname() and nfs_parse_protected_hostname()) and instead just make the calling function (nfs_parse_devname()) do everything. Signed-off-by: Rob Landley Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 129 +++++++++++++++------------------------------------------ 1 file changed, 33 insertions(+), 96 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b68c8607770f..a74e9740190f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1665,99 +1665,59 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, return nfs_walk_authlist(args, &request); } -static int nfs_parse_simple_hostname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) +/* + * Split "dev_name" into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path. If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_devname(const char *dev_name, + char **hostname, size_t maxnamlen, + char **export_path, size_t maxpathlen) { size_t len; - char *colon, *comma; - - colon = strchr(dev_name, ':'); - if (colon == NULL) - goto out_bad_devname; - - len = colon - dev_name; - if (len > maxnamlen) - goto out_hostname; - - /* N.B. caller will free nfs_server.hostname in all cases */ - *hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (!*hostname) - goto out_nomem; + char *end; - /* kill possible hostname list: not supported */ - comma = strchr(*hostname, ','); - if (comma != NULL) { - if (comma == *hostname) + /* Is the host name protected with square brakcets? */ + if (*dev_name == '[') { + end = strchr(++dev_name, ']'); + if (end == NULL || end[1] != ':') goto out_bad_devname; - *comma = '\0'; - } - - colon++; - len = strlen(colon); - if (len > maxpathlen) - goto out_path; - *export_path = kstrndup(colon, len, GFP_KERNEL); - if (!*export_path) - goto out_nomem; - - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); - return 0; -out_bad_devname: - dfprintk(MOUNT, "NFS: device name not in host:path format\n"); - return -EINVAL; - -out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); - return -ENOMEM; - -out_hostname: - dfprintk(MOUNT, "NFS: server hostname too long\n"); - return -ENAMETOOLONG; - -out_path: - dfprintk(MOUNT, "NFS: export pathname too long\n"); - return -ENAMETOOLONG; -} - -/* - * Hostname has square brackets around it because it contains one or - * more colons. We look for the first closing square bracket, and a - * colon must follow it. - */ -static int nfs_parse_protected_hostname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) -{ - size_t len; - char *start, *end; + len = end - dev_name; + end++; + } else { + char *comma; - start = (char *)(dev_name + 1); + end = strchr(dev_name, ':'); + if (end == NULL) + goto out_bad_devname; + len = end - dev_name; - end = strchr(start, ']'); - if (end == NULL) - goto out_bad_devname; - if (*(end + 1) != ':') - goto out_bad_devname; + /* kill possible hostname list: not supported */ + comma = strchr(dev_name, ','); + if (comma != NULL && comma < end) + *comma = 0; + } - len = end - start; if (len > maxnamlen) goto out_hostname; /* N.B. caller will free nfs_server.hostname in all cases */ - *hostname = kstrndup(start, len, GFP_KERNEL); + *hostname = kstrndup(dev_name, len, GFP_KERNEL); if (*hostname == NULL) goto out_nomem; - - end += 2; - len = strlen(end); + len = strlen(++end); if (len > maxpathlen) goto out_path; *export_path = kstrndup(end, len, GFP_KERNEL); if (!*export_path) goto out_nomem; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); return 0; out_bad_devname: @@ -1777,29 +1737,6 @@ out_path: return -ENAMETOOLONG; } -/* - * Split "dev_name" into "hostname:export_path". - * - * The leftmost colon demarks the split between the server's hostname - * and the export path. If the hostname starts with a left square - * bracket, then it may contain colons. - * - * Note: caller frees hostname and export path, even on error. - */ -static int nfs_parse_devname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) -{ - if (*dev_name == '[') - return nfs_parse_protected_hostname(dev_name, - hostname, maxnamlen, - export_path, maxpathlen); - - return nfs_parse_simple_hostname(dev_name, - hostname, maxnamlen, - export_path, maxpathlen); -} - /* * Validate the NFS2/NFS3 mount data * - fills in the mount root filehandle -- cgit v1.2.3 From c5cb09b6f898609922f9b873661f6cbc26cb29e1 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Wed, 9 Mar 2011 16:02:37 -0600 Subject: Cleanup: Factor out some cut-and-paste code. Factor out some cut-and-paste code in options parsing. Saves about 800 bytes on x86-64. Signed-off-by: Rob Landley Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 155 ++++++++++++++++----------------------------------------- 1 file changed, 44 insertions(+), 111 deletions(-) (limited to 'fs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a74e9740190f..7e13e1a6b391 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -979,6 +979,27 @@ static int nfs_parse_security_flavors(char *value, return 1; } +static int nfs_get_option_str(substring_t args[], char **option) +{ + kfree(*option); + *option = match_strdup(args); + return !option; +} + +static int nfs_get_option_ul(substring_t args[], unsigned long *option) +{ + int rc; + char *string; + + string = match_strdup(args); + if (string == NULL) + return -ENOMEM; + rc = strict_strtoul(string, 10, option); + kfree(string); + + return rc; +} + /* * Error-check and convert a string of mount options from user space into * a data structure. The whole mount string is processed; bad options are @@ -1127,155 +1148,82 @@ static int nfs_parse_mount_options(char *raw, * options that take numeric values */ case Opt_port: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0 || option > USHRT_MAX) + if (nfs_get_option_ul(args, &option) || + option > USHRT_MAX) goto out_invalid_value; mnt->nfs_server.port = option; break; case Opt_rsize: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->rsize = option; break; case Opt_wsize: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->wsize = option; break; case Opt_bsize: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->bsize = option; break; case Opt_timeo: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0 || option == 0) + if (nfs_get_option_ul(args, &option) || option == 0) goto out_invalid_value; mnt->timeo = option; break; case Opt_retrans: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0 || option == 0) + if (nfs_get_option_ul(args, &option) || option == 0) goto out_invalid_value; mnt->retrans = option; break; case Opt_acregmin: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->acregmin = option; break; case Opt_acregmax: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->acregmax = option; break; case Opt_acdirmin: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->acdirmin = option; break; case Opt_acdirmax: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->acdirmax = option; break; case Opt_actimeo: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->acregmin = mnt->acregmax = mnt->acdirmin = mnt->acdirmax = option; break; case Opt_namelen: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; mnt->namlen = option; break; case Opt_mountport: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0 || option > USHRT_MAX) + if (nfs_get_option_ul(args, &option) || + option > USHRT_MAX) goto out_invalid_value; mnt->mount_server.port = option; break; case Opt_mountvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0 || + if (nfs_get_option_ul(args, &option) || option < NFS_MNT_VERSION || option > NFS_MNT3_VERSION) goto out_invalid_value; mnt->mount_server.version = option; break; case Opt_nfsvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; switch (option) { case NFS2_VERSION: @@ -1295,12 +1243,7 @@ static int nfs_parse_mount_options(char *raw, } break; case Opt_minorversion: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = strict_strtoul(string, 10, &option); - kfree(string); - if (rc != 0) + if (nfs_get_option_ul(args, &option)) goto out_invalid_value; if (option > NFS4_MAX_MINOR_VERSION) goto out_invalid_value; @@ -1336,21 +1279,18 @@ static int nfs_parse_mount_options(char *raw, case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - kfree(string); break; case Opt_xprt_tcp6: protofamily = AF_INET6; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - kfree(string); break; case Opt_xprt_rdma: /* vector side protocols to TCP */ mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; xprt_load_transport(string); - kfree(string); break; default: dfprintk(MOUNT, "NFS: unrecognized " @@ -1358,6 +1298,7 @@ static int nfs_parse_mount_options(char *raw, kfree(string); return 0; } + kfree(string); break; case Opt_mountproto: string = match_strdup(args); @@ -1400,18 +1341,13 @@ static int nfs_parse_mount_options(char *raw, goto out_invalid_address; break; case Opt_clientaddr: - string = match_strdup(args); - if (string == NULL) + if (nfs_get_option_str(args, &mnt->client_address)) goto out_nomem; - kfree(mnt->client_address); - mnt->client_address = string; break; case Opt_mounthost: - string = match_strdup(args); - if (string == NULL) + if (nfs_get_option_str(args, + &mnt->mount_server.hostname)) goto out_nomem; - kfree(mnt->mount_server.hostname); - mnt->mount_server.hostname = string; break; case Opt_mountaddr: string = match_strdup(args); @@ -1451,11 +1387,8 @@ static int nfs_parse_mount_options(char *raw, }; break; case Opt_fscache_uniq: - string = match_strdup(args); - if (string == NULL) + if (nfs_get_option_str(args, &mnt->fscache_uniq)) goto out_nomem; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = string; mnt->options |= NFS_OPTION_FSCACHE; break; case Opt_local_lock: -- cgit v1.2.3 From 8f68cd42d85f31fb58dd2cabf3ff4aad0a2bafd9 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 15 Mar 2011 18:37:09 +1100 Subject: nfs: BKL is no longer needed, so remove the include Signed-off-by: Stephen Rothwell Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 4b764c6048db..7cded2b12a05 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3