From 1daef0a868370c5a96d031b9202e3354bea060e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Jul 2008 18:19:01 -0400 Subject: NFS: Clean up nfs_sb_active/nfs_sb_deactive Instead of causing umount requests to block on server->active_wq while the asynchronous sillyrename deletes are executing, we can use the sb->s_active counter to obtain a reference to the super_block, and then release that reference in nfs_async_unlink_release(). Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c9beacd16c00..4e477ae58699 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -119,7 +119,6 @@ struct nfs_server { void (*destroy)(struct nfs_server *); atomic_t active; /* Keep trace of any activity to this server */ - wait_queue_head_t active_wq; /* Wait for any activity to stop */ /* mountd-related mount options */ struct sockaddr_storage mountd_address; -- cgit v1.2.3 From 4eec952e42314b53e48fef1f54dd89cbf9789734 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2008 17:58:13 -0400 Subject: NFS: Add options for finer control of the lookup cache Add the flag NFS_MOUNT_LOOKUP_CACHE_NONEG to turn off the caching of negative dentries. In reality what we do is to force nfs_lookup_revalidate() to always discard negative dentries. Add the flag NFS_MOUNT_LOOKUP_CACHE_NONE for enforcing stricter revalidation of dentries. It forces the revalidate code to always do a lookup instead of just checking the cached mtime of the parent directory. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ++++ include/linux/nfs_mount.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 74f92b717f78..49d565412827 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -667,6 +667,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + return 0; if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ @@ -750,6 +752,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, /* Don't revalidate a negative dentry if we're creating a new file */ if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) return 0; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) + return 1; return !nfs_check_verifier(dir, dentry); } diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index df7c6b7a7ebb..6549a06ac16e 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -65,4 +65,8 @@ struct nfs_mount_data { #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_FLAGMASK 0xFFFF +/* The following are for internal use only */ +#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000 +#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000 + #endif -- cgit v1.2.3 From 691beb13cdc88358334ef0ba867c080a247a760f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Oct 2008 14:48:22 -0400 Subject: NFS: Allow concurrent inode revalidation Currently, if two processes are both trying to revalidate metadata for the same inode, they will find themselves being serialised. There is no good justification for this now that we have improved our ability to detect stale attribute data, so we should remove that serialisation. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 43 ++----------------------------------------- include/linux/nfs_fs.h | 9 ++++----- 2 files changed, 6 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f3b8ed904df7..e25009f35cc2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -472,37 +472,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } } -static int nfs_wait_schedule(void *word) -{ - if (signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; -} - -/* - * Wait for the inode to get unlocked. - */ -static int nfs_wait_on_inode(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int error; - - error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, - nfs_wait_schedule, TASK_KILLABLE); - - return error; -} - -static void nfs_wake_up_inode(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); - smp_mb__after_clear_bit(); - wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); -} - int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; @@ -697,20 +666,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); - nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (is_bad_inode(inode)) - goto out_nowait; + goto out; if (NFS_STALE(inode)) - goto out_nowait; - - status = nfs_wait_on_inode(inode); - if (status < 0) goto out; - status = -ESTALE; if (NFS_STALE(inode)) goto out; + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", @@ -740,9 +704,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode)); out: - nfs_wake_up_inode(inode); - - out_nowait: return status; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 78a5922a2f11..ca563ee13e32 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -200,11 +200,10 @@ struct nfs_inode { /* * Bit offsets in flags field */ -#define NFS_INO_REVALIDATING (0) /* revalidating attrs */ -#define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ -#define NFS_INO_STALE (2) /* possible stale inode */ -#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ -#define NFS_INO_MOUNTPOINT (4) /* inode is remote mountpoint */ +#define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */ +#define NFS_INO_STALE (1) /* possible stale inode */ +#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */ +#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3 From 9fa8d66f1e55bf197568c8c689043c2aad1ffc97 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Tue, 26 Aug 2008 16:23:20 +0100 Subject: NFS: remove 8 bytes of padding from struct nfs_fattr on 64 bit builds remove 8 bytes of padding from struct nfs_fattr on 64 bit builds This also removes padding from several nfs structures, including 16 bytes from nfs4_opendata, nfs4_createdata,nfs3_createdata & 8 bytes from nfs_read_data,nfs_write_data,nfs_removeres,nfs4_closedata This also reduces the reported stack usage of many nfs functions (30+). Signed-off-by: Richard Kennedy ---- This patch is against the latest git 2.6.27-rc4. I've built & run this on my AMD64 desktop, & successfully run _simple_ tests with a 64 bit client => 32 bit server & 32 bit client to 64 bit server. On fedora with gcc (GCC) 4.3.0 20080428 (Red Hat 4.3.0-8) checkpatch reports 33 functions with reduced stack usage. e.g. __nfs_revalidate_inode [nfs] 216 => 200 _nfs4_proc_access [nfs] 304 => 288 _nfs4_proc_link [nfs] 536 => 504 _nfs4_proc_remove [nfs] 304 => 288 _nfs4_proc_rename [nfs] 584 => 552 nfs3_proc_access [nfs] 272 => 256 nfs3_proc_getacl [nfs] 384 => 368 nfs3_proc_link [nfs] 496 => 464 etc I can supply the complete list if anyone is interested. regards Richard Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8c77c11224d1..9cabbb3a9e6d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -36,6 +36,7 @@ struct nfs_fattr { __u32 nlink; __u32 uid; __u32 gid; + dev_t rdev; __u64 size; union { struct { @@ -46,7 +47,6 @@ struct nfs_fattr { __u64 used; } nfs3; } du; - dev_t rdev; struct nfs_fsid fsid; __u64 fileid; struct timespec atime; -- cgit v1.2.3 From d1ce02e1689dff9d413138f60a79b4e3affb4708 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 Sep 2008 11:57:12 -0400 Subject: NFS: SETCLIENTID truncates client ID and netid The sc_name field is currently 56 bytes long. This is not large enough to hold a pair of IPv6 addresses, the authentication type, the protocol name, and a uniquifier number. The maximum possible size of the name string using IPv6 addresses is just under 110 bytes, so I increased the size of the sc_name field to accomodate this maximum. In addition, the strings in the nfs4_setclientid structure are constructed with scnprintf(), which wants to terminate its output with '\0'. The sc_netid field was large enough only for a three byte netid string and a '\0' so inet6 netids were being truncated. Perhaps we don't need the overhead of scnprintf() to do a simple string copy, but I fixed this by increasing the size of the buffer by one byte. Since all three of the string buffers in nfs4_setclientid are constructed with scnprintf(), I increased the size of all three by one byte to document the requirement, although I don't think either the universal address field or the name field will be so small that these strings get truncated in this way. The size of the Linux client's client ID on the wire will be larger than before. RFC 3530 suggests the size limit for client IDs is 1024, and we are still well below that. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9cabbb3a9e6d..f6e95bfad5de 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -672,16 +672,16 @@ struct nfs4_rename_res { struct nfs_fattr * new_fattr; }; -#define NFS4_SETCLIENTID_NAMELEN (56) +#define NFS4_SETCLIENTID_NAMELEN (128) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; unsigned int sc_name_len; - char sc_name[NFS4_SETCLIENTID_NAMELEN]; + char sc_name[NFS4_SETCLIENTID_NAMELEN + 1]; u32 sc_prog; unsigned int sc_netid_len; - char sc_netid[RPCBIND_MAXNETIDLEN]; + char sc_netid[RPCBIND_MAXNETIDLEN + 1]; unsigned int sc_uaddr_len; - char sc_uaddr[RPCBIND_MAXUADDRLEN]; + char sc_uaddr[RPCBIND_MAXUADDRLEN + 1]; u32 sc_cb_ident; }; -- cgit v1.2.3 From 19d771f3caccaf66ce2fb539319222139e5b4e88 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 8 Oct 2008 13:54:52 -0400 Subject: NFS: Save padding bytes in struct nfs4_setclientid Peter Staubach suggested reducing NFS4_SETCLIENTID_NAMELEN by one byte so as to avoid 7 bytes of unnecessary padding. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f6e95bfad5de..6ee6ae3f095c 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -672,7 +672,7 @@ struct nfs4_rename_res { struct nfs_fattr * new_fattr; }; -#define NFS4_SETCLIENTID_NAMELEN (128) +#define NFS4_SETCLIENTID_NAMELEN (127) struct nfs4_setclientid { const nfs4_verifier * sc_verifier; unsigned int sc_name_len; -- cgit v1.2.3 From fe9053b30bb48b99f7b45541249f5cfe96bdf7f7 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 14:59:59 -0400 Subject: RPC/RDMA: add data types and new FRMR memory registration enum. Internal RPC/RDMA structure updates in preparation for FRMR support. Signed-off-by: Tom Talpey Acked-by: Tom Tucker Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtrdma.h | 1 + net/sunrpc/xprtrdma/xprt_rdma.h | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 4de56b1d372b..55a5d92ca1e2 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -78,6 +78,7 @@ enum rpcrdma_memreg { RPCRDMA_MEMWINDOWS, RPCRDMA_MEMWINDOWS_ASYNC, RPCRDMA_MTHCAFMR, + RPCRDMA_FRMR, RPCRDMA_ALLPHYSICAL, RPCRDMA_LAST }; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2427822f8bd4..05b7898e1f4b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -58,6 +58,8 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_bind_mem; + u32 ri_dma_lkey; + int ri_have_dma_lkey; struct completion ri_done; int ri_async_rc; enum rpcrdma_memreg ri_memreg_strategy; @@ -156,6 +158,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ union { struct ib_mw *mw; struct ib_fmr *fmr; + struct { + struct ib_fast_reg_page_list *fr_pgl; + struct ib_mr *fr_mr; + } frmr; } r; struct list_head mw_list; } *rl_mw; @@ -198,7 +204,7 @@ struct rpcrdma_buffer { atomic_t rb_credits; /* most recent server credits */ unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ int rb_max_requests;/* client max requests */ - struct list_head rb_mws; /* optional memory windows/fmrs */ + struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ int rb_send_index; struct rpcrdma_req **rb_send_bufs; int rb_recv_index; -- cgit v1.2.3 From 5675add36e76b9487e7f9e689f854cb8d6afd9b4 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 9 Oct 2008 15:01:41 -0400 Subject: RPC/RDMA: harden connection logic against missing/late rdma_cm upcalls. Add defensive timeouts to wait_for_completion() calls in RDMA address resolution, and make them interruptible. Fix the timeout units to milliseconds (formerly jiffies) and move to private header. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtrdma.h | 3 --- net/sunrpc/xprtrdma/verbs.c | 11 +++++++---- net/sunrpc/xprtrdma/xprt_rdma.h | 3 +++ 3 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 55a5d92ca1e2..54a379c9e8eb 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -66,9 +66,6 @@ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ -#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ -#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ - /* memory registration strategies */ #define RPCRDMA_PERSISTENT_REGISTRATION (1) diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index a63d0c0ec017..f46fb93f421b 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: + ia->ri_async_rc = 0; complete(&ia->ri_done); break; case RDMA_CM_EVENT_ADDR_ERROR: @@ -363,26 +364,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, return id; } - ia->ri_async_rc = 0; + ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); if (rc) { dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", __func__, rc); goto out; } - wait_for_completion(&ia->ri_done); + wait_for_completion_interruptible_timeout(&ia->ri_done, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); rc = ia->ri_async_rc; if (rc) goto out; - ia->ri_async_rc = 0; + ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); if (rc) { dprintk("RPC: %s: rdma_resolve_route() failed %i\n", __func__, rc); goto out; } - wait_for_completion(&ia->ri_done); + wait_for_completion_interruptible_timeout(&ia->ri_done, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); rc = ia->ri_async_rc; if (rc) goto out; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index fde6499a53b2..c7a7eba991bc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -51,6 +51,9 @@ #include /* RPC/RDMA protocol */ #include /* xprt parameters */ +#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ +#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ + /* * Interface Adapter -- one per transport instance */ -- cgit v1.2.3 From 4704f0e274829e3af00737d2d9adace2d71a9605 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Oct 2008 19:16:07 -0400 Subject: NFS: Fix the resolution problem with nfs_inode_attrs_need_update() It appears that 'jiffies' timestamps do not have high enough resolution for nfs_inode_attrs_need_update(). One problem is that a GETATTR can be launched within < 1 jiffy of the last operation that updated the attribute. Another problem is that RPC calls can take < 1 jiffy to execute. We can fix this by switching the variables to use a simple global counter that gets incremented every time we start another GETATTR call. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 14 ++++++++++---- fs/nfs/inode.c | 37 ++++++++++++++++++++++++++++++------- include/linux/nfs_fs.h | 10 +++------- include/linux/nfs_xdr.h | 1 + 4 files changed, 44 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 49d565412827..4807074ada8c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -156,6 +156,7 @@ typedef struct { decode_dirent_t decode; int plus; unsigned long timestamp; + unsigned long gencount; int timestamp_valid; } nfs_readdir_descriptor_t; @@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) struct file *file = desc->file; struct inode *inode = file->f_path.dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); - unsigned long timestamp; + unsigned long timestamp, gencount; int error; dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", @@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) again: timestamp = jiffies; + gencount = nfs_inc_attr_generation_counter(); error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { @@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } desc->timestamp = timestamp; + desc->gencount = gencount; desc->timestamp_valid = 1; SetPageUptodate(page); /* Ensure consistent page alignment of the data. @@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) if (IS_ERR(p)) return PTR_ERR(p); desc->ptr = p; - if (desc->timestamp_valid) + if (desc->timestamp_valid) { desc->entry->fattr->time_start = desc->timestamp; - else + desc->entry->fattr->gencount = desc->gencount; + } else desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; return 0; } @@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct rpc_cred *cred = nfs_file_cred(file); struct page *page = NULL; int status; - unsigned long timestamp; + unsigned long timestamp, gencount; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); @@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } timestamp = jiffies; + gencount = nfs_inc_attr_generation_counter(); status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, @@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (status >= 0) { desc->timestamp = timestamp; + desc->gencount = gencount; desc->timestamp_valid = 1; if ((status = dir_decode(desc)) == 0) desc->entry->prev_cookie = *desc->dir_cookie; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index de3f11e6234e..116a3bd2bc9b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -305,7 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) init_special_inode(inode, inode->i_mode, fattr->rdev); nfsi->read_cache_jiffies = fattr->time_start; - nfsi->last_updated = now; + nfsi->attr_gencount = fattr->gencount; nfsi->cache_change_attribute = now; inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; @@ -909,6 +909,30 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); } +static unsigned long nfs_attr_generation_counter; + +static unsigned long nfs_read_attr_generation_counter(void) +{ + smp_rmb(); + return nfs_attr_generation_counter; +} + +unsigned long nfs_inc_attr_generation_counter(void) +{ + unsigned long ret; + smp_rmb(); + ret = ++nfs_attr_generation_counter; + smp_wmb(); + return ret; +} + +void nfs_fattr_init(struct nfs_fattr *fattr) +{ + fattr->valid = 0; + fattr->time_start = jiffies; + fattr->gencount = nfs_inc_attr_generation_counter(); +} + /** * nfs_inode_attrs_need_update - check if the inode attributes need updating * @inode - pointer to inode @@ -922,8 +946,7 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt * catch the case where ctime either didn't change, or went backwards * (if someone reset the clock on the server) by looking at whether * or not this RPC call was started after the inode was last updated. - * Note also the check for jiffy wraparound if the last_updated timestamp - * is later than 'jiffies'. + * Note also the check for wraparound of 'attr_gencount' * * The function returns 'true' if it thinks the attributes in 'fattr' are * more recent than the ones cached in the inode. @@ -933,10 +956,10 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n { const struct nfs_inode *nfsi = NFS_I(inode); - return time_after(fattr->time_start, nfsi->last_updated) || + return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || nfs_ctime_need_update(inode, fattr) || nfs_size_need_update(inode, fattr) || - time_after(nfsi->last_updated, jiffies); + ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) @@ -1107,7 +1130,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) - invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } else if (nfsi->change_attr != fattr->change_attr) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); @@ -1163,7 +1186,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - nfsi->last_updated = now; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index ca563ee13e32..ac8d0233b05c 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -137,7 +137,7 @@ struct nfs_inode { unsigned long attrtimeo_timestamp; __u64 change_attr; /* v4 only */ - unsigned long last_updated; + unsigned long attr_gencount; /* "Generation counter" for the attribute cache. This is * bumped whenever we update the metadata on the * server. @@ -344,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); extern u64 nfs_compat_user_ino64(u64 fileid); +extern void nfs_fattr_init(struct nfs_fattr *fattr); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ - -static inline void nfs_fattr_init(struct nfs_fattr *fattr) -{ - fattr->valid = 0; - fattr->time_start = jiffies; -} +extern unsigned long nfs_inc_attr_generation_counter(void); /* * linux/fs/nfs/file.c diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 6ee6ae3f095c..c1c31acb8a2b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -56,6 +56,7 @@ struct nfs_fattr { __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ unsigned long time_start; + unsigned long gencount; }; #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ -- cgit v1.2.3