From 4373ea84c84d8a96e99d3da99e813d3e36d1bd11 Mon Sep 17 00:00:00 2001 From: Wendy Cheng Date: Thu, 17 Jan 2008 11:10:12 -0500 Subject: lockd: unlock lockd locks associated with a given server ip For high-availability NFS service, we generally need to be able to drop file locks held on the exported filesystem before moving clients to a new server. Currently the only way to do that is by shutting down lockd entirely, which is often undesireable (for example, if you want to continue exporting other filesystems). This patch allows the administrator to release all locks held by clients accessing the client through a given server ip address, by echoing that address to a new file, /proc/fs/nfsd/unlock_ip, as in: shell> echo 10.1.1.2 > /proc/fs/nfsd/unlock_ip The expected sequence of events can be: 1. Tear down the IP address 2. Unexport the path 3. Write IP to /proc/fs/nfsd/unlock_ip to unlock files 4. Signal peer to begin take-over. For now we only support IPv4 addresses and NFSv2/v3 (NFSv4 locks are not affected). Also, if unmounting the filesystem is required, we assume at step 3 that clients using the given server ip are the only clients holding locks on the given filesystem; otherwise, an additional patch is required to allow revoking all locks held by lockd on a given filesystem. Signed-off-by: S. Wendy Cheng Cc: Lon Hohberger Cc: Christoph Hellwig Signed-off-by: J. Bruce Fields fs/lockd/svcsubs.c | 66 +++++++++++++++++++++++++++++++++++++++----- fs/nfsd/nfsctl.c | 65 +++++++++++++++++++++++++++++++++++++++++++ include/linux/lockd/lockd.h | 7 ++++ 3 files changed, 131 insertions(+), 7 deletions(-) --- include/linux/lockd/lockd.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 94649a8da014..bcb93fc1fce7 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -194,7 +194,7 @@ void nsm_release(struct nsm_handle *); * This is used in garbage collection and resource reclaim * A return value != 0 means destroy the lock/block/share */ -typedef int (*nlm_host_match_fn_t)(struct nlm_host *cur, struct nlm_host *ref); +typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref); /* * Server-side lock handling @@ -220,6 +220,11 @@ void nlmsvc_mark_resources(void); void nlmsvc_free_host_resources(struct nlm_host *); void nlmsvc_invalidate_all(void); +/* + * Cluster failover support + */ +int nlmsvc_unlock_all_by_ip(__be32 server_addr); + static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) { return file->f_file->f_path.dentry->d_inode; -- cgit v1.2.3 From 17efa372cfe4d189705edf6cd4fbe283827a5dc7 Mon Sep 17 00:00:00 2001 From: Wendy Cheng Date: Thu, 17 Jan 2008 11:10:12 -0500 Subject: lockd: unlock lockd locks held for a certain filesystem Add /proc/fs/nfsd/unlock_filesystem, which allows e.g.: shell> echo /mnt/sfs1 > /proc/fs/nfsd/unlock_filesystem so that a filesystem can be unmounted before allowing a peer nfsd to take over nfs service for the filesystem. Signed-off-by: S. Wendy Cheng Cc: Lon Hohberger Cc: Christoph Hellwig Signed-off-by: J. Bruce Fields fs/lockd/svcsubs.c | 66 +++++++++++++++++++++++++++++++++++++++----- fs/nfsd/nfsctl.c | 65 +++++++++++++++++++++++++++++++++++++++++++ include/linux/lockd/lockd.h | 7 ++++ 3 files changed, 131 insertions(+), 7 deletions(-) --- fs/lockd/svcsubs.c | 37 ++++++++++++++++++++++++++++++++----- fs/nfsd/nfsctl.c | 32 ++++++++++++++++++++++++++++++++ include/linux/lockd/lockd.h | 1 + 3 files changed, 65 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 1adcf93cb6e6..d1c48b539df8 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -196,6 +196,12 @@ again: return 0; } +static int +nlmsvc_always_match(void *dummy1, struct nlm_host *dummy2) +{ + return 1; +} + /* * Inspect a single file */ @@ -232,7 +238,8 @@ nlm_file_inuse(struct nlm_file *file) * Loop over all files in the file table. */ static int -nlm_traverse_files(void *data, nlm_host_match_fn_t match) +nlm_traverse_files(void *data, nlm_host_match_fn_t match, + int (*is_failover_file)(void *data, struct nlm_file *file)) { struct hlist_node *pos, *next; struct nlm_file *file; @@ -241,6 +248,8 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match) mutex_lock(&nlm_file_mutex); for (i = 0; i < FILE_NRHASH; i++) { hlist_for_each_entry_safe(file, pos, next, &nlm_files[i], f_list) { + if (is_failover_file && !is_failover_file(data, file)) + continue; file->f_count++; mutex_unlock(&nlm_file_mutex); @@ -345,7 +354,7 @@ void nlmsvc_mark_resources(void) { dprintk("lockd: nlmsvc_mark_resources\n"); - nlm_traverse_files(NULL, nlmsvc_mark_host); + nlm_traverse_files(NULL, nlmsvc_mark_host, NULL); } /* @@ -356,7 +365,7 @@ nlmsvc_free_host_resources(struct nlm_host *host) { dprintk("lockd: nlmsvc_free_host_resources\n"); - if (nlm_traverse_files(host, nlmsvc_same_host)) { + if (nlm_traverse_files(host, nlmsvc_same_host, NULL)) { printk(KERN_WARNING "lockd: couldn't remove all locks held by %s\n", host->h_name); @@ -376,8 +385,26 @@ nlmsvc_invalidate_all(void) * turn, which is about as inefficient as it gets. * Now we just do it once in nlm_traverse_files. */ - nlm_traverse_files(NULL, nlmsvc_is_client); + nlm_traverse_files(NULL, nlmsvc_is_client, NULL); +} + +static int +nlmsvc_match_sb(void *datap, struct nlm_file *file) +{ + struct super_block *sb = datap; + + return sb == file->f_file->f_path.mnt->mnt_sb; +} + +int +nlmsvc_unlock_all_by_sb(struct super_block *sb) +{ + int ret; + + ret = nlm_traverse_files(sb, nlmsvc_always_match, nlmsvc_match_sb); + return ret ? -EIO : 0; } +EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) @@ -391,7 +418,7 @@ int nlmsvc_unlock_all_by_ip(__be32 server_addr) { int ret; - ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip); + ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL); return ret ? -EIO : 0; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7706f6c9ab8a..42f3820ee8f5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -56,6 +56,7 @@ enum { NFSD_List, NFSD_Fh, NFSD_FO_UnlockIP, + NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, NFSD_Versions, @@ -93,6 +94,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size); +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size); static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Svc] = write_svc, @@ -104,6 +106,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { [NFSD_Getfs] = write_getfs, [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = failover_unlock_ip, + [NFSD_FO_UnlockFS] = failover_unlock_fs, [NFSD_Threads] = write_threads, [NFSD_Pool_Threads] = write_pool_threads, [NFSD_Versions] = write_versions, @@ -329,6 +332,33 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) return nlmsvc_unlock_all_by_ip(server_ip); } +static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) +{ + struct nameidata nd; + char *fo_path; + int error; + + /* sanity check */ + if (size == 0) + return -EINVAL; + + if (buf[size-1] != '\n') + return -EINVAL; + + fo_path = buf; + if (qword_get(&buf, fo_path, size) < 0) + return -EINVAL; + + error = path_lookup(fo_path, 0, &nd); + if (error) + return error; + + error = nlmsvc_unlock_all_by_sb(nd.path.mnt->mnt_sb); + + path_put(&nd.path); + return error; +} + static ssize_t write_filehandle(struct file *file, char *buf, size_t size) { /* request is: @@ -733,6 +763,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_FO_UnlockFS] = {"unlock_filesystem", + &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index bcb93fc1fce7..102d928f7206 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -223,6 +223,7 @@ void nlmsvc_invalidate_all(void); /* * Cluster failover support */ +int nlmsvc_unlock_all_by_sb(struct super_block *sb); int nlmsvc_unlock_all_by_ip(__be32 server_addr); static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) -- cgit v1.2.3 From 1a747ee0cc11a198f9e2435add821bd0dfedb7c1 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 24 Apr 2008 10:08:22 -0400 Subject: locks: don't call ->copy_lock methods on return of conflicting locks The file_lock structure is used both as a heavy-weight representation of an active lock, with pointers to reference-counted structures, etc., and as a simple container for parameters that describe a file lock. The conflicting lock returned from __posix_lock_file is an example of the latter; so don't call the filesystem or lock manager callbacks when copying to it. This also saves the need for an unnecessary locks_init_lock in the nfsv4 server. Thanks to Trond for pointing out the error. Signed-off-by: J. Bruce Fields Cc: Trond Myklebust --- fs/lockd/svclock.c | 2 +- fs/locks.c | 4 ++-- fs/nfsd/nfs4state.c | 3 --- include/linux/fs.h | 1 + 4 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 1f122c1940af..4d81553d2948 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -632,7 +632,7 @@ nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf, block->b_flags |= B_TIMED_OUT; if (conf) { if (block->b_fl) - locks_copy_lock(block->b_fl, conf); + __locks_copy_lock(block->b_fl, conf); } } diff --git a/fs/locks.c b/fs/locks.c index 2e0fa661e423..e1ea2fe03681 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -224,7 +224,7 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl) /* * Initialize a new lock from an existing file_lock structure. */ -static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) +void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl) { new->fl_owner = fl->fl_owner; new->fl_pid = fl->fl_pid; @@ -833,7 +833,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str if (!posix_locks_conflict(request, fl)) continue; if (conflock) - locks_copy_lock(conflock, fl); + __locks_copy_lock(conflock, fl); error = -EAGAIN; if (!(request->fl_flags & FL_SLEEP)) goto out; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 55dfdd71f1b0..8799b8708188 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2712,9 +2712,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * Note: locks.c uses the BKL to protect the inode's lock list. */ - /* XXX?: Just to divert the locks_release_private at the start of - * locks_copy_lock: */ - locks_init_lock(&conflock); err = vfs_lock_file(filp, cmd, &file_lock, &conflock); switch (-err) { case 0: /* success! */ diff --git a/include/linux/fs.h b/include/linux/fs.h index cc2be2cf7d41..6556f2f967e5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -973,6 +973,7 @@ extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, /* fs/locks.c */ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); +extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); extern void posix_test_lock(struct file *, struct file_lock *); -- cgit v1.2.3 From e36cd4a2873c398ba188f16e4087cce7f00a1506 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 24 Apr 2008 16:59:30 -0400 Subject: nfsd: don't allow setting ctime over v4 Presumably this is left over from earlier drafts of v4, which listed TIME_METADATA as writeable. It's read-only in rfc 3530, and shouldn't be modifiable anyway. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 14 -------------- include/linux/nfsd/nfsd.h | 2 +- 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1ba7ad981935..c513bbdf2d36 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -376,20 +376,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia goto xdr_error; } } - if (bmval[1] & FATTR4_WORD1_TIME_METADATA) { - /* We require the high 32 bits of 'seconds' to be 0, and we ignore - all 32 bits of 'nseconds'. */ - READ_BUF(12); - len += 12; - READ32(dummy32); - if (dummy32) - return nfserr_inval; - READ32(iattr->ia_ctime.tv_sec); - READ32(iattr->ia_ctime.tv_nsec); - if (iattr->ia_ctime.tv_nsec >= (u32)1000000000) - return nfserr_inval; - iattr->ia_valid |= ATTR_CTIME; - } if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) { READ_BUF(4); len += 4; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 21ee440dd3e7..41d30c9c9de6 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -329,7 +329,7 @@ extern struct timeval nfssvc_boot; (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) #define NFSD_WRITEABLE_ATTRS_WORD1 \ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ - | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY_SET) + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) #endif /* CONFIG_NFSD_V4 */ -- cgit v1.2.3