From e851db5b05408b89b9a9429a66814b79fabee2a1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Jun 2008 18:45:30 -0400 Subject: SUNRPC: Add address family field to svc_serv data structure Introduce and initialize an address family field in the svc_serv structure. This field will determine what family to use for the service's listener sockets and what families are advertised via the local rpcbind daemon. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index dc69068d94c7..23143f38b121 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -66,6 +66,7 @@ struct svc_serv { struct list_head sv_tempsocks; /* all temporary sockets */ int sv_tmpcnt; /* count of temporary sockets */ struct timer_list sv_temptimer; /* timer for aging temporary sockets */ + sa_family_t sv_family; /* listener's address family */ char * sv_name; /* service name */ @@ -381,14 +382,14 @@ struct svc_procedure { /* * Function prototypes. */ -struct svc_serv * svc_create(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv*)); +struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t, + void (*shutdown)(struct svc_serv *)); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool); void svc_exit_thread(struct svc_rqst *); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - void (*shutdown)(struct svc_serv*), svc_thread_fn, - struct module *); + sa_family_t, void (*shutdown)(struct svc_serv *), + svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); -- cgit v1.2.3 From 04716e6621ff4abb422d64ba7b48718f52716a3e Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 7 Aug 2008 13:00:20 -0400 Subject: nfsd: permit unauthenticated stat of export root RFC 2623 section 2.3.2 permits the server to bypass gss authentication checks for certain operations that a client may perform when mounting. In the case of a client that doesn't have some form of credentials available to it on boot, this allows it to perform the mount unattended. (Presumably real file access won't be needed until a user with credentials logs in.) Being slightly more lenient allows lots of old clients to access krb5-only exports, with the only loss being a small amount of information leaked about the root directory of the export. This affects only v2 and v3; v4 still requires authentication for all access. Thanks to Peter Staubach testing against a Solaris client, which suggesting addition of v3 getattr, to the list, and to Trond for noting that doing so exposes no additional information. Signed-off-by: J. Bruce Fields Cc: Peter Staubach Cc: Trond Myklebust --- fs/nfsd/nfs3proc.c | 8 +++++--- fs/nfsd/nfsfh.c | 30 ++++++++++++++++++++---------- fs/nfsd/nfsproc.c | 6 ++++-- fs/nfsd/vfs.c | 4 ++-- include/linux/nfsd/nfsd.h | 3 ++- 5 files changed, 33 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4d617ea28cfc..9dbd2eb91281 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -63,7 +63,8 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); if (nfserr) RETURN_STATUS(nfserr); @@ -530,7 +531,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: FSSTAT(3) %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); RETURN_STATUS(nfserr); } @@ -558,7 +559,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, resp->f_maxfilesize = ~(u32) 0; resp->f_properties = NFS3_FSF_DEFAULT; - nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &argp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); /* Check special features of the file system. May request * different read/write sizes for file systems known to have diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ea37c96f0445..cd25d91895a1 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -302,17 +302,27 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) if (error) goto out; - if (!(access & NFSD_MAY_LOCK)) { - /* - * pseudoflavor restrictions are not enforced on NLM, - * which clients virtually always use auth_sys for, - * even while using RPCSEC_GSS for NFS. - */ - error = check_nfsd_access(exp, rqstp); - if (error) - goto out; - } + /* + * pseudoflavor restrictions are not enforced on NLM, + * which clients virtually always use auth_sys for, + * even while using RPCSEC_GSS for NFS. + */ + if (access & NFSD_MAY_LOCK) + goto skip_pseudoflavor_check; + /* + * Clients may expect to be able to use auth_sys during mount, + * even if they use gss for everything else; see section 2.3.2 + * of rfc 2623. + */ + if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT + && exp->ex_path.dentry == dentry) + goto skip_pseudoflavor_check; + + error = check_nfsd_access(exp, rqstp); + if (error) + goto out; +skip_pseudoflavor_check: /* Finally, check access permissions. */ error = nfsd_permission(rqstp, exp, dentry, access); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0766f95d236a..5cffeca7acef 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -65,7 +65,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); - nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); + nfserr = fh_verify(rqstp, &resp->fh, 0, + NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); return nfsd_return_attrs(nfserr, resp); } @@ -521,7 +522,8 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); - nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); + nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, + NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); return nfserr; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 18060bed5267..1319e8027d55 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1866,9 +1866,9 @@ out: * N.B. After this call fhp needs an fh_put */ __be32 -nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) +nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) { - __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); + __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); if (!err && vfs_statfs(fhp->fh_dentry,stat)) err = nfserr_io; return err; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 108f47e5fd95..21269405ffe2 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -38,6 +38,7 @@ #define NFSD_MAY_LOCK 32 #define NFSD_MAY_OWNER_OVERRIDE 64 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ +#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -125,7 +126,7 @@ int nfsd_truncate(struct svc_rqst *, struct svc_fh *, __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, loff_t *, struct readdir_cd *, filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, - struct kstatfs *); + struct kstatfs *, int access); int nfsd_notify_change(struct inode *, struct iattr *); __be32 nfsd_permission(struct svc_rqst *, struct svc_export *, -- cgit v1.2.3 From bfcd17a6c5529bc37234cfa720a047cf9397bcfc Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Wed, 6 Aug 2008 15:12:22 +0200 Subject: Configure out file locking features This patch adds the CONFIG_FILE_LOCKING option which allows to remove support for advisory locks. With this patch enabled, the flock() system call, the F_GETLK, F_SETLK and F_SETLKW operations of fcntl() and NFS support are disabled. These features are not necessarly needed on embedded systems. It allows to save ~11 Kb of kernel code and data: text data bss dec hex filename 1125436 118764 212992 1457192 163c28 vmlinux.old 1114299 118564 212992 1445855 160fdf vmlinux -11137 -200 0 -11337 -2C49 +/- This patch has originally been written by Matt Mackall , and is part of the Linux Tiny project. Signed-off-by: Thomas Petazzoni Signed-off-by: Matt Mackall Cc: matthew@wil.cx Cc: linux-fsdevel@vger.kernel.org Cc: mpm@selenic.com Cc: akpm@linux-foundation.org Signed-off-by: J. Bruce Fields --- fs/Kconfig | 8 ++++++++ fs/Makefile | 3 ++- fs/proc/proc_misc.c | 4 ++++ include/linux/fs.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++------- kernel/sys_ni.c | 1 + kernel/sysctl.c | 6 +++++- 6 files changed, 70 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a8..c6ae4d4842eb 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -419,6 +419,14 @@ config FS_POSIX_ACL bool default n +config FILE_LOCKING + bool "Enable POSIX file locking API" if EMBEDDED + default y + help + This option enables standard file locking support, required + for filesystems like NFS and for the flock() system + call. Disabling this option saves about 11k. + source "fs/xfs/Kconfig" source "fs/gfs2/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff15..4b86d433baaf 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -7,7 +7,7 @@ obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ - ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ + ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ @@ -27,6 +27,7 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o obj-$(CONFIG_SIGNALFD) += signalfd.o obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o +obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o nfsd-$(CONFIG_NFSD) := nfsctl.o diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 29e20c6b1f7f..1aabbe2592e1 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -684,6 +684,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off, return proc_calc_metrics(page, start, off, count, eof, len); } +#ifdef CONFIG_FILE_LOCKING static int locks_open(struct inode *inode, struct file *filp) { return seq_open(filp, &locks_seq_operations); @@ -695,6 +696,7 @@ static const struct file_operations proc_locks_operations = { .llseek = seq_lseek, .release = seq_release, }; +#endif /* CONFIG_FILE_LOCKING */ static int execdomains_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) @@ -888,7 +890,9 @@ void __init proc_misc_init(void) #ifdef CONFIG_PRINTK proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); #endif +#ifdef CONFIG_FILE_LOCKING proc_create("locks", 0, NULL, &proc_locks_operations); +#endif proc_create("devices", 0, NULL, &proc_devinfo_operations); proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); #ifdef CONFIG_BLOCK diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513668fe..9f540165a078 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -983,6 +983,13 @@ struct file_lock { #include +extern void send_sigio(struct fown_struct *fown, int fd, int band); + +/* fs/sync.c */ +extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, + loff_t endbyte, unsigned int flags); + +#ifdef CONFIG_FILE_LOCKING extern int fcntl_getlk(struct file *, struct flock __user *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, struct flock __user *); @@ -993,14 +1000,9 @@ extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 __user *); #endif -extern void send_sigio(struct fown_struct *fown, int fd, int band); extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); -/* fs/sync.c */ -extern int do_sync_mapping_range(struct address_space *mapping, loff_t offset, - loff_t endbyte, unsigned int flags); - /* fs/locks.c */ extern void locks_init_lock(struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *); @@ -1023,6 +1025,37 @@ extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); extern struct seq_operations locks_seq_operations; +#else /* !CONFIG_FILE_LOCKING */ +#define fcntl_getlk(a, b) ({ -EINVAL; }) +#define fcntl_setlk(a, b, c, d) ({ -EACCES; }) +#if BITS_PER_LONG == 32 +#define fcntl_getlk64(a, b) ({ -EINVAL; }) +#define fcntl_setlk64(a, b, c, d) ({ -EACCES; }) +#endif +#define fcntl_setlease(a, b, c) ({ 0; }) +#define fcntl_getlease(a) ({ 0; }) +#define locks_init_lock(a) ({ }) +#define __locks_copy_lock(a, b) ({ }) +#define locks_copy_lock(a, b) ({ }) +#define locks_remove_posix(a, b) ({ }) +#define locks_remove_flock(a) ({ }) +#define posix_test_lock(a, b) ({ 0; }) +#define posix_lock_file(a, b, c) ({ -ENOLCK; }) +#define posix_lock_file_wait(a, b) ({ -ENOLCK; }) +#define posix_unblock_lock(a, b) (-ENOENT) +#define vfs_test_lock(a, b) ({ 0; }) +#define vfs_lock_file(a, b, c, d) (-ENOLCK) +#define vfs_cancel_lock(a, b) ({ 0; }) +#define flock_lock_file_wait(a, b) ({ -ENOLCK; }) +#define __break_lease(a, b) ({ 0; }) +#define lease_get_mtime(a, b) ({ }) +#define generic_setlease(a, b, c) ({ -EINVAL; }) +#define vfs_setlease(a, b, c) ({ -EINVAL; }) +#define lease_modify(a, b) ({ -EINVAL; }) +#define lock_may_read(a, b, c) ({ 1; }) +#define lock_may_write(a, b, c) ({ 1; }) +#endif /* !CONFIG_FILE_LOCKING */ + struct fasync_struct { int magic; @@ -1554,9 +1587,12 @@ extern int vfs_statfs(struct dentry *, struct kstatfs *); /* /sys/fs */ extern struct kobject *fs_kobj; +extern int rw_verify_area(int, struct file *, loff_t *, size_t); + #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 +#ifdef CONFIG_FILE_LOCKING extern int locks_mandatory_locked(struct inode *); extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); @@ -1587,8 +1623,6 @@ static inline int locks_verify_locked(struct inode *inode) return 0; } -extern int rw_verify_area(int, struct file *, loff_t *, size_t); - static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) @@ -1609,6 +1643,15 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return __break_lease(inode, mode); return 0; } +#else /* !CONFIG_FILE_LOCKING */ +#define locks_mandatory_locked(a) ({ 0; }) +#define locks_mandatory_area(a, b, c, d, e) ({ 0; }) +#define __mandatory_lock(a) ({ 0; }) +#define mandatory_lock(a) ({ 0; }) +#define locks_verify_locked(a) ({ 0; }) +#define locks_verify_truncate(a, b, c) ({ 0; }) +#define break_lease(a, b) ({ 0; }) +#endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 08d6e1bb99ac..503d8d4eb80a 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -125,6 +125,7 @@ cond_syscall(sys_vm86old); cond_syscall(sys_vm86); cond_syscall(compat_sys_ipc); cond_syscall(compat_sys_sysctl); +cond_syscall(sys_flock); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 50ec0886fa3d..4588b2cf2ecb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -97,7 +97,7 @@ static int sixty = 60; static int neg_one = -1; #endif -#ifdef CONFIG_MMU +#if defined(CONFIG_MMU) && defined(CONFIG_FILE_LOCKING) static int two = 2; #endif @@ -1261,6 +1261,7 @@ static struct ctl_table fs_table[] = { .extra1 = &minolduid, .extra2 = &maxolduid, }, +#ifdef CONFIG_FILE_LOCKING { .ctl_name = FS_LEASES, .procname = "leases-enable", @@ -1269,6 +1270,7 @@ static struct ctl_table fs_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, +#endif #ifdef CONFIG_DNOTIFY { .ctl_name = FS_DIR_NOTIFY, @@ -1280,6 +1282,7 @@ static struct ctl_table fs_table[] = { }, #endif #ifdef CONFIG_MMU +#ifdef CONFIG_FILE_LOCKING { .ctl_name = FS_LEASE_TIME, .procname = "lease-break-time", @@ -1291,6 +1294,7 @@ static struct ctl_table fs_table[] = { .extra1 = &zero, .extra2 = &two, }, +#endif { .procname = "aio-nr", .data = &aio_nr, -- cgit v1.2.3 From 14aeb2118d6e9fd9ee988324c740a00c80979093 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 Aug 2008 19:34:00 -0400 Subject: SUNRPC: Simplify rpcb_register() API Bruce suggested there's no need to expose the difference between an error sending the PMAP_SET request and an error reply from the portmapper to rpcb_register's callers. The user space equivalent of rpcb_register() is pmap_set(3), which returns a bool_t : either the PMAP set worked, or it didn't. Simple. So let's remove the "*okay" argument from rpcb_register() and rpcb_v4_register(), and simply return an error if any part of the call didn't work. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 4 +-- net/sunrpc/rpcb_clnt.c | 65 +++++++++++++++++++-------------------------- net/sunrpc/svc.c | 8 ++---- 3 files changed, 32 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index e5bfe01ee305..8ac8e75243a7 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -124,10 +124,10 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); -int rpcb_register(u32, u32, int, unsigned short, int *); +int rpcb_register(u32, u32, int, unsigned short); int rpcb_v4_register(const u32 program, const u32 version, const struct sockaddr *address, - const char *netid, int *result); + const char *netid); int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int); void rpcb_getport_async(struct rpc_task *); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 24db2b4d12d3..cc7250d4659b 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -176,13 +176,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, } static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, - u32 version, struct rpc_message *msg, - int *result) + u32 version, struct rpc_message *msg) { struct rpc_clnt *rpcb_clnt; - int error = 0; + int result, error = 0; - *result = 0; + msg->rpc_resp = &result; rpcb_clnt = rpcb_create_local(addr, addrlen, version); if (!IS_ERR(rpcb_clnt)) { @@ -191,12 +190,19 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, } else error = PTR_ERR(rpcb_clnt); - if (error < 0) + if (error < 0) { printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); - dprintk("RPC: registration status %d/%d\n", error, *result); + return error; + } + + if (!result) { + dprintk("RPC: registration failed\n"); + return -EACCES; + } - return error; + dprintk("RPC: registration succeeded\n"); + return 0; } /** @@ -205,7 +211,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * @vers: RPC version number to bind * @prot: transport protocol to register * @port: port value to register - * @okay: OUT: result code + * + * Returns zero if the registration request was dispatched successfully + * and the rpcbind daemon returned success. Otherwise, returns an errno + * value that reflects the nature of the error (request could not be + * dispatched, timed out, or rpcbind returned an error). * * RPC services invoke this function to advertise their contact * information via the system's rpcbind daemon. RPC services @@ -217,15 +227,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * all registered transports for [program, version] from the local * rpcbind database. * - * Returns zero if the registration request was dispatched - * successfully and a reply was received. The rpcbind daemon's - * boolean result code is stored in *okay. - * - * Returns an errno value and sets *result to zero if there was - * some problem that prevented the rpcbind request from being - * dispatched, or if the rpcbind daemon did not respond within - * the timeout. - * * This function uses rpcbind protocol version 2 to contact the * local rpcbind daemon. * @@ -236,7 +237,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 * addresses). */ -int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) { struct rpcbind_args map = { .r_prog = prog, @@ -246,7 +247,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) }; struct rpc_message msg = { .rpc_argp = &map, - .rpc_resp = okay, }; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " @@ -259,7 +259,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, sizeof(rpcb_inaddr_loopback), - RPCBVERS_2, &msg, okay); + RPCBVERS_2, &msg); } /* @@ -290,7 +290,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, sizeof(rpcb_inaddr_loopback), - RPCBVERS_4, msg, msg->rpc_resp); + RPCBVERS_4, msg); } /* @@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, sizeof(rpcb_in6addr_loopback), - RPCBVERS_4, msg, msg->rpc_resp); + RPCBVERS_4, msg); } /** @@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * @version: RPC version number of service to (un)register * @address: address family, IP address, and port to (un)register * @netid: netid of transport protocol to (un)register - * @result: result code from rpcbind RPC call + * + * Returns zero if the registration request was dispatched successfully + * and the rpcbind daemon returned success. Otherwise, returns an errno + * value that reflects the nature of the error (request could not be + * dispatched, timed out, or rpcbind returned an error). * * RPC services invoke this function to advertise their contact * information via the system's rpcbind daemon. RPC services @@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * to zero. Callers pass a netid of "" to unregister all * transport netids associated with [program, version, address]. * - * Returns zero if the registration request was dispatched - * successfully and a reply was received. The rpcbind daemon's - * result code is stored in *result. - * - * Returns an errno value and sets *result to zero if there was - * some problem that prevented the rpcbind request from being - * dispatched, or if the rpcbind daemon did not respond within - * the timeout. - * * This function uses rpcbind protocol version 4 to contact the * local rpcbind daemon. The local rpcbind daemon must support * version 4 of the rpcbind protocol in order for these functions @@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * advertises the service on all IPv4 and IPv6 addresses. */ int rpcb_v4_register(const u32 program, const u32 version, - const struct sockaddr *address, const char *netid, - int *result) + const struct sockaddr *address, const char *netid) { struct rpcbind_args map = { .r_prog = program, @@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version, }; struct rpc_message msg = { .rpc_argp = &map, - .rpc_resp = result, }; - *result = 0; - switch (address->sa_family) { case AF_INET: return rpcb_register_netid4((struct sockaddr_in *)address, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9ba17044109d..9805143d0660 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -730,7 +730,7 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) struct svc_program *progp; unsigned long flags; unsigned int i; - int error = 0, dummy; + int error = 0; if (!port) clear_thread_flag(TIF_SIGPENDING); @@ -751,13 +751,9 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) if (progp->pg_vers[i]->vs_hidden) continue; - error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); + error = rpcb_register(progp->pg_prog, i, proto, port); if (error < 0) break; - if (port && !dummy) { - error = -EACCES; - break; - } } } -- cgit v1.2.3 From a26cfad6e0a308a2c68df1f1ef50aabd48b17e6d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 Aug 2008 19:34:16 -0400 Subject: SUNRPC: Support IPv6 when registering kernel RPC services In order to advertise NFS-related services on IPv6 interfaces via rpcbind, the kernel RPC server implementation must use rpcb_v4_register() instead of rpcb_register(). A new kernel build option allows distributions to use the legacy v2 call until they integrate an appropriate user-space rpcbind daemon that can support IPv6 RPC services. I tried adding some automatic logic to fall back if registering with a v4 protocol request failed, but there are too many corner cases. So I just made it a compile-time switch that distributions can throw when they've replaced portmapper with rpcbind. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/Kconfig | 22 +++++++++++ include/linux/sunrpc/svc.h | 4 +- net/sunrpc/svc.c | 95 ++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 113 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/Kconfig b/fs/Kconfig index c6ae4d4842eb..ed57a5a37250 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1773,6 +1773,28 @@ config SUNRPC_XPRT_RDMA If unsure, say N. +config SUNRPC_REGISTER_V4 + bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + default n + help + Sun added support for registering RPC services at an IPv6 + address by creating two new versions of the rpcbind protocol + (RFC 1833). + + This option enables support in the kernel RPC server for + registering kernel RPC services via version 4 of the rpcbind + protocol. If you enable this option, you must run a portmapper + daemon that supports rpcbind protocol version 4. + + Serving NFS over IPv6 from knfsd (the kernel's NFS server) + requires that you enable this option and use a portmapper that + supports rpcbind version 4. + + If unsure, say N to get traditional behavior (register kernel + RPC services using only rpcbind version 2). Distributions + using the legacy Linux portmapper daemon must say N here. + config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 23143f38b121..54a79e1ad634 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -393,7 +393,9 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); -int svc_register(struct svc_serv *, int, unsigned short); +int svc_register(const struct svc_serv *, const unsigned short, + const unsigned short); + void svc_wake_up(struct svc_serv *); void svc_reserve(struct svc_rqst *rqstp, int space); struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9eb78a771da5..c43ccb628052 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -719,13 +719,92 @@ svc_exit_thread(struct svc_rqst *rqstp) } EXPORT_SYMBOL(svc_exit_thread); +#ifdef CONFIG_SUNRPC_REGISTER_V4 /* - * Register an RPC service with the local portmapper. - * To unregister a service, call this routine with - * proto and port == 0. + * Registering kernel RPC services with rpcbind version 2 will work + * over either IPv4 or IPv6, since the Linux kernel always registers + * services for the "any" address. + * + * However, the local rpcbind daemon listens on either only AF_INET + * or AF_INET6 (never both). When it listens on AF_INET6, an rpcbind + * version 2 registration will result in registering the service at + * IN6ADDR_ANY, even if the RPC service being registered is not + * IPv6-enabled. + * + * Rpcbind version 4 allows us to be a little more specific. Kernel + * RPC services that don't yet support AF_INET6 can register + * themselves as IPv4-only with the local rpcbind daemon, even if the + * daemon is listening only on AF_INET6. + * + * And, registering IPv6-enabled kernel RPC services via AF_INET6 + * verifies that the local user space rpcbind daemon is properly + * configured to support remote AF_INET6 rpcbind requests. + * + * An AF_INET6 registration request will fail if the local rpcbind + * daemon is not set up to listen on AF_INET6. Likewise, we fail + * AF_INET6 registration requests if svc_register() is configured to + * support only rpcbind version 2. */ -int -svc_register(struct svc_serv *serv, int proto, unsigned short port) +static int __svc_register(const u32 program, const u32 version, + const sa_family_t family, + const unsigned short protocol, + const unsigned short port) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + .sin_port = htons(port), + }; + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; + struct sockaddr *sap; + char *netid; + + switch (family) { + case AF_INET: + sap = (struct sockaddr *)&sin; + netid = RPCBIND_NETID_TCP; + if (protocol == IPPROTO_UDP) + netid = RPCBIND_NETID_UDP; + break; + case AF_INET6: + sap = (struct sockaddr *)&sin6; + netid = RPCBIND_NETID_TCP6; + if (protocol == IPPROTO_UDP) + netid = RPCBIND_NETID_UDP6; + break; + default: + return -EAFNOSUPPORT; + } + + return rpcb_v4_register(program, version, sap, netid); +} +#else +static int __svc_register(const u32 program, const u32 version, + sa_family_t family, + const unsigned short protocol, + const unsigned short port) +{ + if (family != AF_INET) + return -EAFNOSUPPORT; + + return rpcb_register(program, version, protocol, port); +} +#endif + +/** + * svc_register - register an RPC service with the local portmapper + * @serv: svc_serv struct for the service to register + * @proto: transport protocol number to advertise + * @port: port to advertise + * + * Service is registered for any address in serv's address family + */ +int svc_register(const struct svc_serv *serv, const unsigned short proto, + const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -738,8 +817,9 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) if (progp->pg_vers[i] == NULL) continue; - dprintk("svc: svc_register(%s, %s, %d, %d)%s\n", + dprintk("svc: svc_register(%s, %u, %s, %u, %d)%s\n", progp->pg_name, + serv->sv_family, proto == IPPROTO_UDP? "udp" : "tcp", port, i, @@ -749,7 +829,8 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port) if (progp->pg_vers[i]->vs_hidden) continue; - error = rpcb_register(progp->pg_prog, i, proto, port); + error = __svc_register(progp->pg_prog, i, + serv->sv_family, proto, port); if (error < 0) break; } -- cgit v1.2.3 From 1b333c54a15a746ff6b04a684b0845a66daacef2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Aug 2008 16:57:23 -0400 Subject: lockd: address-family independent printable addresses Knowing which source address is used for communicating with remote NLM services can be helpful for debugging configuration problems on hosts with multiple addresses. Keep the dprintk debugging here, but adapt it so it displays AF_INET6 addresses properly. There are also a couple of dprintk clean-ups as well. At some point we will aggregate the helpers that display presentation format addresses into a single set of shared helpers. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 78 +++++++++++++++++++++++++++++++++++---------- include/linux/lockd/lockd.h | 4 +++ 2 files changed, 65 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index cb26e3d952a2..e5dcfa57e099 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -11,12 +11,14 @@ #include #include #include +#include #include #include #include #include #include +#include #define NLMDBG_FACILITY NLMDBG_HOSTCACHE #define NLM_HOST_NRHASH 32 @@ -38,6 +40,32 @@ static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, const char *hostname, unsigned int hostname_len); +static void nlm_display_address(const struct sockaddr *sap, + char *buf, const size_t len) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + + switch (sap->sa_family) { + case AF_UNSPEC: + snprintf(buf, len, "unspecified"); + break; + case AF_INET: + snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); + break; + case AF_INET6: + if (ipv6_addr_v4mapped(&sin6->sin6_addr)) + snprintf(buf, len, NIPQUAD_FMT, + NIPQUAD(sin6->sin6_addr.s6_addr32[3])); + else + snprintf(buf, len, NIP6_FMT, NIP6(sin6->sin6_addr)); + break; + default: + snprintf(buf, len, "unsupported address family"); + break; + } +} + /* * Common host lookup routine for server & client */ @@ -54,14 +82,10 @@ static struct nlm_host *nlm_lookup_host(int server, struct nsm_handle *nsm = NULL; int hash; - dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT - ", p=%d, v=%u, my role=%s, name=%.*s)\n", - NIPQUAD(ssin->sin_addr.s_addr), - NIPQUAD(sin->sin_addr.s_addr), proto, version, - server? "server" : "client", - hostname_len, - hostname? hostname : ""); - + dprintk("lockd: nlm_lookup_host(proto=%d, vers=%u," + " my role is %s, hostname=%.*s)\n", + proto, version, server ? "server" : "client", + hostname_len, hostname ? hostname : ""); hash = NLM_ADDRHASH(sin->sin_addr.s_addr); @@ -101,6 +125,8 @@ static struct nlm_host *nlm_lookup_host(int server, hlist_add_head(&host->h_hash, chain); nlm_get_host(host); + dprintk("lockd: nlm_lookup_host found host %s (%s)\n", + host->h_name, host->h_addrbuf); goto out; } @@ -113,13 +139,17 @@ static struct nlm_host *nlm_lookup_host(int server, else { host = NULL; nsm = nsm_find(sin, hostname, hostname_len); - if (!nsm) + if (!nsm) { + dprintk("lockd: nlm_lookup_host failed; " + "no nsm handle\n"); goto out; + } } host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) { nsm_release(nsm); + dprintk("lockd: nlm_lookup_host failed; no memory\n"); goto out; } host->h_name = nsm->sm_name; @@ -146,6 +176,15 @@ static struct nlm_host *nlm_lookup_host(int server, INIT_LIST_HEAD(&host->h_reclaim); nrhosts++; + + nlm_display_address((struct sockaddr *)&host->h_addr, + host->h_addrbuf, sizeof(host->h_addrbuf)); + nlm_display_address((struct sockaddr *)&host->h_saddr, + host->h_saddrbuf, sizeof(host->h_saddrbuf)); + + dprintk("lockd: nlm_lookup_host created host %s\n", + host->h_name); + out: mutex_unlock(&nlm_host_mutex); return host; @@ -210,9 +249,8 @@ nlm_bind_host(struct nlm_host *host) { struct rpc_clnt *clnt; - dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", - NIPQUAD(host->h_saddr.sin_addr), - NIPQUAD(host->h_addr.sin_addr)); + dprintk("lockd: nlm_bind_host %s (%s), my addr=%s\n", + host->h_name, host->h_addrbuf, host->h_saddrbuf); /* Lock host handle */ mutex_lock(&host->h_mutex); @@ -224,7 +262,7 @@ nlm_bind_host(struct nlm_host *host) if (time_after_eq(jiffies, host->h_nextrebind)) { rpc_force_rebind(clnt); host->h_nextrebind = jiffies + NLM_HOST_REBIND; - dprintk("lockd: next rebind in %ld jiffies\n", + dprintk("lockd: next rebind in %lu jiffies\n", host->h_nextrebind - jiffies); } } else { @@ -327,12 +365,16 @@ void nlm_host_rebooted(const struct sockaddr_in *sin, struct nsm_handle *nsm; struct nlm_host *host; - dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n", - hostname, NIPQUAD(sin->sin_addr)); - /* Find the NSM handle for this peer */ - if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0))) + nsm = __nsm_find(sin, hostname, hostname_len, 0); + if (nsm == NULL) { + dprintk("lockd: never saw rebooted peer '%.*s' before\n", + hostname_len, hostname); return; + } + + dprintk("lockd: nlm_host_rebooted(%.*s, %s)\n", + hostname_len, hostname, nsm->sm_addrbuf); /* When reclaiming locks on this peer, make sure that * we set up a new notification */ @@ -516,6 +558,8 @@ retry: nsm->sm_name = (char *) (nsm + 1); memcpy(nsm->sm_name, hostname, hostname_len); nsm->sm_name[hostname_len] = '\0'; + nlm_display_address((struct sockaddr *)&nsm->sm_addr, + nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf)); atomic_set(&nsm->sm_count, 1); goto retry; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index dbb87ab282e8..0691efbd0b34 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -61,6 +61,9 @@ struct nlm_host { struct list_head h_granted; /* Locks in GRANTED state */ struct list_head h_reclaim; /* Locks in RECLAIM state */ struct nsm_handle * h_nsmhandle; /* NSM status handle */ + + char h_addrbuf[48], /* address eyecatchers */ + h_saddrbuf[48]; }; struct nsm_handle { @@ -70,6 +73,7 @@ struct nsm_handle { struct sockaddr_in sm_addr; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ + char sm_addrbuf[48]; /* address eyecatcher */ }; /* -- cgit v1.2.3 From 5344b12d4f97d4a9a62d806425977a6ff64b6baf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 27 Aug 2008 16:57:46 -0400 Subject: SUNRPC: Make svc_addr's argument a constant Clean up: Add extra type safety and squelch a few compiler complaints in upcoming patches. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 54a79e1ad634..3afe7fb403b2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -266,17 +266,17 @@ struct svc_rqst { /* * Rigorous type checking on sockaddr type conversions */ -static inline struct sockaddr_in *svc_addr_in(struct svc_rqst *rqst) +static inline struct sockaddr_in *svc_addr_in(const struct svc_rqst *rqst) { return (struct sockaddr_in *) &rqst->rq_addr; } -static inline struct sockaddr_in6 *svc_addr_in6(struct svc_rqst *rqst) +static inline struct sockaddr_in6 *svc_addr_in6(const struct svc_rqst *rqst) { return (struct sockaddr_in6 *) &rqst->rq_addr; } -static inline struct sockaddr *svc_addr(struct svc_rqst *rqst) +static inline struct sockaddr *svc_addr(const struct svc_rqst *rqst) { return (struct sockaddr *) &rqst->rq_addr; } -- cgit v1.2.3 From b4ed58fd34d4def88bda59f9cc566ec9fca6a096 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Sep 2008 14:35:39 -0400 Subject: lockd: Use sockaddr_storage + length for h_addr field To store larger addresses in the nlm_host structure, make h_addr a sockaddr_storage, and add an address length field. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 2 +- fs/lockd/host.c | 11 ++++++----- include/linux/lockd/lockd.h | 16 +++++++++++++++- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 0b45fd3a4bfd..0df5587f804e 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) + if (!nlm_cmp_addr(nlm_addr_in(block->b_host), addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 008e4026f540..8c7022eeae65 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -116,7 +116,7 @@ static struct nlm_host *nlm_lookup_host(int server, */ chain = &nlm_hosts[hash]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (!nlm_cmp_addr(&host->h_addr, sin)) + if (!nlm_cmp_addr(nlm_addr_in(host), sin)) continue; /* See if we have an NSM handle for this client */ @@ -165,8 +165,9 @@ static struct nlm_host *nlm_lookup_host(int server, goto out; } host->h_name = nsm->sm_name; - host->h_addr = *sin; - nlm_clear_port((struct sockaddr *)&host->h_addr); + memcpy(nlm_addr(host), sin, sizeof(*sin)); + host->h_addrlen = sizeof(*sin); + nlm_clear_port(nlm_addr(host)); host->h_saddr = *ssin; host->h_version = version; host->h_proto = proto; @@ -291,8 +292,8 @@ nlm_bind_host(struct nlm_host *host) }; struct rpc_create_args args = { .protocol = host->h_proto, - .address = (struct sockaddr *)&host->h_addr, - .addrsize = sizeof(host->h_addr), + .address = nlm_addr(host), + .addrsize = host->h_addrlen, .saddress = (struct sockaddr *)&host->h_saddr, .timeout = &timeparms, .servername = host->h_name, diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0691efbd0b34..41d7a8e61cea 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -38,7 +38,8 @@ */ struct nlm_host { struct hlist_node h_hash; /* doubly linked list */ - struct sockaddr_in h_addr; /* peer address */ + struct sockaddr_storage h_addr; /* peer address */ + size_t h_addrlen; struct sockaddr_in h_saddr; /* our address (optional) */ struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ char * h_name; /* remote hostname */ @@ -76,6 +77,19 @@ struct nsm_handle { char sm_addrbuf[48]; /* address eyecatcher */ }; +/* + * Rigorous type checking on sockaddr type conversions + */ +static inline struct sockaddr_in *nlm_addr_in(const struct nlm_host *host) +{ + return (struct sockaddr_in *)&host->h_addr; +} + +static inline struct sockaddr *nlm_addr(const struct nlm_host *host) +{ + return (struct sockaddr *)&host->h_addr; +} + /* * Map an fl_owner_t into a unique 32-bit "pid" */ -- cgit v1.2.3 From 90151e6e4d00a3150d03d52170c246734b274622 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Sep 2008 14:35:46 -0400 Subject: lockd: Use sockaddr_storage for h_saddr field To store larger addresses in the nlm_host structure, make h_saddr a sockaddr_storage. And let's call it something more self-explanatory: "saddr" could easily be mistaken for "server address". Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 12 ++++++------ fs/lockd/svcsubs.c | 2 +- include/linux/lockd/lockd.h | 14 ++++++++++++-- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 8c7022eeae65..3ce2702d0368 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -129,7 +129,7 @@ static struct nlm_host *nlm_lookup_host(int server, continue; if (host->h_server != server) continue; - if (!nlm_cmp_addr(&host->h_saddr, ssin)) + if (!nlm_cmp_addr(nlm_srcaddr_in(host), ssin)) continue; /* Move to head of hash chain. */ @@ -168,7 +168,7 @@ static struct nlm_host *nlm_lookup_host(int server, memcpy(nlm_addr(host), sin, sizeof(*sin)); host->h_addrlen = sizeof(*sin); nlm_clear_port(nlm_addr(host)); - host->h_saddr = *ssin; + memcpy(nlm_srcaddr(host), ssin, sizeof(*ssin)); host->h_version = version; host->h_proto = proto; host->h_rpcclnt = NULL; @@ -192,8 +192,8 @@ static struct nlm_host *nlm_lookup_host(int server, nlm_display_address((struct sockaddr *)&host->h_addr, host->h_addrbuf, sizeof(host->h_addrbuf)); - nlm_display_address((struct sockaddr *)&host->h_saddr, - host->h_saddrbuf, sizeof(host->h_saddrbuf)); + nlm_display_address((struct sockaddr *)&host->h_srcaddr, + host->h_srcaddrbuf, sizeof(host->h_srcaddrbuf)); dprintk("lockd: nlm_lookup_host created host %s\n", host->h_name); @@ -267,7 +267,7 @@ nlm_bind_host(struct nlm_host *host) struct rpc_clnt *clnt; dprintk("lockd: nlm_bind_host %s (%s), my addr=%s\n", - host->h_name, host->h_addrbuf, host->h_saddrbuf); + host->h_name, host->h_addrbuf, host->h_srcaddrbuf); /* Lock host handle */ mutex_lock(&host->h_mutex); @@ -294,7 +294,7 @@ nlm_bind_host(struct nlm_host *host) .protocol = host->h_proto, .address = nlm_addr(host), .addrsize = host->h_addrlen, - .saddress = (struct sockaddr *)&host->h_saddr, + .saddress = nlm_srcaddr(host), .timeout = &timeparms, .servername = host->h_name, .program = &nlm_program, diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 198b4e55b373..d3d1330d7c27 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) { - return nlm_cmp_addr(&host->h_saddr, datap); + return nlm_cmp_addr(nlm_srcaddr_in(host), datap); } /** diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 41d7a8e61cea..964e6c93830f 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -40,7 +40,7 @@ struct nlm_host { struct hlist_node h_hash; /* doubly linked list */ struct sockaddr_storage h_addr; /* peer address */ size_t h_addrlen; - struct sockaddr_in h_saddr; /* our address (optional) */ + struct sockaddr_storage h_srcaddr; /* our address (optional) */ struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ char * h_name; /* remote hostname */ u32 h_version; /* interface version */ @@ -64,7 +64,7 @@ struct nlm_host { struct nsm_handle * h_nsmhandle; /* NSM status handle */ char h_addrbuf[48], /* address eyecatchers */ - h_saddrbuf[48]; + h_srcaddrbuf[48]; }; struct nsm_handle { @@ -90,6 +90,16 @@ static inline struct sockaddr *nlm_addr(const struct nlm_host *host) return (struct sockaddr *)&host->h_addr; } +static inline struct sockaddr_in *nlm_srcaddr_in(const struct nlm_host *host) +{ + return (struct sockaddr_in *)&host->h_srcaddr; +} + +static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) +{ + return (struct sockaddr *)&host->h_srcaddr; +} + /* * Map an fl_owner_t into a unique 32-bit "pid" */ -- cgit v1.2.3 From 7e9d7746bfd40121438b155023793796499497d8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Sep 2008 14:35:54 -0400 Subject: NSM: Use sockaddr_storage for sm_addr field To store larger addresses in the nsm_handle structure, make sm_addr a sockaddr_storage. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 5 +++-- fs/lockd/mon.c | 2 +- include/linux/lockd/lockd.h | 13 ++++++++++++- 3 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 3ce2702d0368..510ebcf485f0 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -551,7 +551,7 @@ retry: if (strlen(pos->sm_name) != hostname_len || memcmp(pos->sm_name, hostname, hostname_len)) continue; - } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) + } else if (!nlm_cmp_addr(nsm_addr_in(pos), sin)) continue; atomic_inc(&pos->sm_count); kfree(nsm); @@ -571,7 +571,8 @@ retry: if (nsm == NULL) return NULL; - nsm->sm_addr = *sin; + memcpy(nsm_addr(nsm), sin, sizeof(*sin)); + nsm->sm_addrlen = sizeof(*sin); nsm->sm_name = (char *) (nsm + 1); memcpy(nsm->sm_name, hostname, hostname_len); nsm->sm_name[hostname_len] = '\0'; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e4d563543b11..4e7e958e8f67 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -51,7 +51,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) memset(&args, 0, sizeof(args)); args.mon_name = nsm->sm_name; - args.addr = nsm->sm_addr.sin_addr.s_addr; + args.addr = nsm_addr_in(nsm)->sin_addr.s_addr; args.prog = NLM_PROGRAM; args.vers = 3; args.proc = NLMPROC_NSM_NOTIFY; diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 964e6c93830f..b1dfa0b1d1bc 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -71,7 +71,8 @@ struct nsm_handle { struct list_head sm_link; atomic_t sm_count; char * sm_name; - struct sockaddr_in sm_addr; + struct sockaddr_storage sm_addr; + size_t sm_addrlen; unsigned int sm_monitored : 1, sm_sticky : 1; /* don't unmonitor */ char sm_addrbuf[48]; /* address eyecatcher */ @@ -100,6 +101,16 @@ static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host) return (struct sockaddr *)&host->h_srcaddr; } +static inline struct sockaddr_in *nsm_addr_in(const struct nsm_handle *handle) +{ + return (struct sockaddr_in *)&handle->sm_addr; +} + +static inline struct sockaddr *nsm_addr(const struct nsm_handle *handle) +{ + return (struct sockaddr *)&handle->sm_addr; +} + /* * Map an fl_owner_t into a unique 32-bit "pid" */ -- cgit v1.2.3 From 781b61a6f4ff94cb8c14cf598b547f5d5c490969 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 3 Sep 2008 14:36:01 -0400 Subject: lockd: Teach nlm_cmp_addr() to support AF_INET6 addresses Update the nlm_cmp_addr() helper to support AF_INET6 as well as AF_INET addresses. New version takes two "struct sockaddr *" arguments instead of "struct sockaddr_in *" arguments. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 3 ++- fs/lockd/host.c | 6 +++--- fs/lockd/svcsubs.c | 2 +- include/linux/lockd/lockd.h | 36 ++++++++++++++++++++++++++++++++---- 4 files changed, 38 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 0df5587f804e..237224a3c420 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -166,7 +166,8 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlm_cmp_addr(nlm_addr_in(block->b_host), addr)) + if (!nlm_cmp_addr(nlm_addr(block->b_host), + (struct sockaddr *)addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 510ebcf485f0..dbf3fe620a0c 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -116,7 +116,7 @@ static struct nlm_host *nlm_lookup_host(int server, */ chain = &nlm_hosts[hash]; hlist_for_each_entry(host, pos, chain, h_hash) { - if (!nlm_cmp_addr(nlm_addr_in(host), sin)) + if (!nlm_cmp_addr(nlm_addr(host), (struct sockaddr *)sin)) continue; /* See if we have an NSM handle for this client */ @@ -129,7 +129,7 @@ static struct nlm_host *nlm_lookup_host(int server, continue; if (host->h_server != server) continue; - if (!nlm_cmp_addr(nlm_srcaddr_in(host), ssin)) + if (!nlm_cmp_addr(nlm_srcaddr(host), (struct sockaddr *)ssin)) continue; /* Move to head of hash chain. */ @@ -551,7 +551,7 @@ retry: if (strlen(pos->sm_name) != hostname_len || memcmp(pos->sm_name, hostname, hostname_len)) continue; - } else if (!nlm_cmp_addr(nsm_addr_in(pos), sin)) + } else if (!nlm_cmp_addr(nsm_addr(pos), (struct sockaddr *)sin)) continue; atomic_inc(&pos->sm_count); kfree(nsm); diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d3d1330d7c27..34c2766e27c7 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); static int nlmsvc_match_ip(void *datap, struct nlm_host *host) { - return nlm_cmp_addr(nlm_srcaddr_in(host), datap); + return nlm_cmp_addr(nlm_srcaddr(host), datap); } /** diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b1dfa0b1d1bc..ec8af115843d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -12,6 +12,8 @@ #ifdef __KERNEL__ #include +#include +#include #include #include #include @@ -272,13 +274,39 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) return file->f_file->f_path.dentry->d_inode; } +static inline int __nlm_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static inline int __nlm_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr); +} + /* - * Compare two host addresses (needs modifying for ipv6) + * Compare two host addresses + * + * Return TRUE if the addresses are the same; otherwise FALSE. */ -static inline int nlm_cmp_addr(const struct sockaddr_in *sin1, - const struct sockaddr_in *sin2) +static inline int nlm_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) { - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __nlm_cmp_addr4(sap1, sap2); + case AF_INET6: + return __nlm_cmp_addr6(sap1, sap2); + } + } + return 0; } /* -- cgit v1.2.3 From d5b337b4877f7c4e1d761434ee04d045b0201e03 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 28 Sep 2008 09:21:26 +0300 Subject: nfsd: use nfs client rpc callback program since commit ff7d9756b501744540be65e172d27ee321d86103 "nfsd: use static memory for callback program and stats" do_probe_callback uses a static callback program (NFS4_CALLBACK) rather than the one set in clp->cl_callback.cb_prog as passed in by the client in setclientid (4.0) or create_session (4.1). This patches introduces rpc_create_args.prognumber that allows overriding program->number when creating rpc_clnt. Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 1 + include/linux/sunrpc/clnt.h | 1 + net/sunrpc/clnt.c | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index f7c793a5b803..094747a1227c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -380,6 +380,7 @@ static int do_probe_callback(void *data) .addrsize = sizeof(addr), .timeout = &timeparms, .program = &cb_program, + .prognumber = cb->cb_prog, .version = nfs_cb_version[1]->number, .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 8ac8e75243a7..6f0ee1b84a4f 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -104,6 +104,7 @@ struct rpc_create_args { const struct rpc_timeout *timeout; char *servername; struct rpc_program *program; + u32 prognumber; /* overrides program->number */ u32 version; rpc_authflavor_t authflavor; unsigned long flags; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 76739e928d0d..da0789fa1b88 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; - clnt->cl_prog = program->number; + clnt->cl_prog = args->prognumber ? : program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); -- cgit v1.2.3 From af558e33bedab672f5cfd3260bce7445e353fe21 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Sep 2007 12:34:25 -0400 Subject: nfsd: common grace period control Rewrite grace period code to unify management of grace period across lockd and nfsd. The current code has lockd and nfsd cooperate to compute a grace period which is satisfactory to them both, and then individually enforce it. This creates a slight race condition, since the enforcement is not coordinated. It's also more complicated than necessary. Here instead we have lockd and nfsd each inform common code when they enter the grace period, and when they're ready to leave the grace period, and allow normal locking only after both of them are ready to leave. We also expect the locks_start_grace()/locks_end_grace() interface here to be simpler to build on for future cluster/high-availability work, which may require (for example) putting individual filesystems into grace, or enforcing grace periods across multiple cluster nodes. Signed-off-by: J. Bruce Fields --- fs/lockd/Makefile | 2 +- fs/lockd/grace.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++ fs/lockd/svc.c | 20 ++++------------ fs/lockd/svc4proc.c | 12 +++++----- fs/lockd/svcproc.c | 12 +++++----- fs/nfsd/lockd.c | 1 - fs/nfsd/nfs4proc.c | 8 +++---- fs/nfsd/nfs4state.c | 34 ++++++++++++-------------- include/linux/fs.h | 8 +++++++ include/linux/lockd/bind.h | 9 ------- 10 files changed, 104 insertions(+), 61 deletions(-) create mode 100644 fs/lockd/grace.c (limited to 'include') diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index 7725a0a9a555..97f6073ab339 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_LOCKD) += lockd.o lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ - svcproc.o svcsubs.o mon.o xdr.o + svcproc.o svcsubs.o mon.o xdr.o grace.o lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o lockd-objs := $(lockd-objs-y) diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c new file mode 100644 index 000000000000..183cc1f0af1c --- /dev/null +++ b/fs/lockd/grace.c @@ -0,0 +1,59 @@ +/* + * Common code for control of lockd and nfsv4 grace periods. + */ + +#include +#include + +static LIST_HEAD(grace_list); +static DEFINE_SPINLOCK(grace_lock); + +/** + * locks_start_grace + * @lm: who this grace period is for + * + * A grace period is a period during which locks should not be given + * out. Currently grace periods are only enforced by the two lock + * managers (lockd and nfsd), using the locks_in_grace() function to + * check when they are in a grace period. + * + * This function is called to start a grace period. + */ +void locks_start_grace(struct lock_manager *lm) +{ + spin_lock(&grace_lock); + list_add(&lm->list, &grace_list); + spin_unlock(&grace_lock); +} +EXPORT_SYMBOL_GPL(locks_start_grace); + +/** + * locks_end_grace + * @lm: who this grace period is for + * + * Call this function to state that the given lock manager is ready to + * resume regular locking. The grace period will not end until all lock + * managers that called locks_start_grace() also call locks_end_grace(). + * Note that callers count on it being safe to call this more than once, + * and the second call should be a no-op. + */ +void locks_end_grace(struct lock_manager *lm) +{ + spin_lock(&grace_lock); + list_del_init(&lm->list); + spin_unlock(&grace_lock); +} +EXPORT_SYMBOL_GPL(locks_end_grace); + +/** + * locks_in_grace + * + * Lock managers call this function to determine when it is OK for them + * to answer ordinary lock requests, and when they should accept only + * lock reclaims. + */ +int locks_in_grace(void) +{ + return !list_empty(&grace_list); +} +EXPORT_SYMBOL_GPL(locks_in_grace); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index f345ef7fb8ae..f013aed11533 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -51,7 +51,6 @@ static DEFINE_MUTEX(nlmsvc_mutex); static unsigned int nlmsvc_users; static struct task_struct *nlmsvc_task; static struct svc_rqst *nlmsvc_rqst; -int nlmsvc_grace_period; unsigned long nlmsvc_timeout; /* @@ -85,30 +84,21 @@ static unsigned long get_lockd_grace_period(void) return nlm_timeout * 5 * HZ; } -unsigned long get_nfs_grace_period(void) -{ - unsigned long lockdgrace = get_lockd_grace_period(); - unsigned long nfsdgrace = 0; - - if (nlmsvc_ops) - nfsdgrace = nlmsvc_ops->get_grace_period(); - - return max(lockdgrace, nfsdgrace); -} -EXPORT_SYMBOL(get_nfs_grace_period); +static struct lock_manager lockd_manager = { +}; static void grace_ender(struct work_struct *not_used) { - nlmsvc_grace_period = 0; + locks_end_grace(&lockd_manager); } static DECLARE_DELAYED_WORK(grace_period_end, grace_ender); static void set_grace_period(void) { - unsigned long grace_period = get_nfs_grace_period() + jiffies; + unsigned long grace_period = get_lockd_grace_period(); - nlmsvc_grace_period = 1; + locks_start_grace(&lockd_manager); cancel_delayed_work_sync(&grace_period_end); schedule_delayed_work(&grace_period_end, grace_period); } diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4a714f64515b..7ca617367b3e 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -89,7 +89,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept test requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -123,7 +123,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -169,7 +169,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -202,7 +202,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -341,7 +341,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -374,7 +374,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 76262c1986f2..1b013e198804 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -118,7 +118,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept test requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -153,7 +153,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rc; } @@ -199,7 +199,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -232,7 +232,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -373,7 +373,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept new lock requests during grace period */ - if (nlmsvc_grace_period && !argp->reclaim) { + if (locks_in_grace() && !argp->reclaim) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } @@ -406,7 +406,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; /* Don't accept requests during grace period */ - if (nlmsvc_grace_period) { + if (locks_in_grace()) { resp->status = nlm_lck_denied_grace_period; return rpc_success; } diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 15c6faeec77c..b2786a5f9afe 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -70,7 +70,6 @@ nlm_fclose(struct file *filp) static struct nlmsvc_binding nfsd_nlm_ops = { .fopen = nlm_fopen, /* open file for locking */ .fclose = nlm_fclose, /* close file */ - .get_grace_period = get_nfs4_grace_period, }; void diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e5b51ffafc6c..669461e291ae 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -201,10 +201,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, /* Openowner is now set, so sequence id will get bumped. Now we need * these checks before we do any creates: */ status = nfserr_grace; - if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) + if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) goto out; switch (open->op_claim_type) { @@ -575,7 +575,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, { __be32 status; - if (nfs4_in_grace()) + if (locks_in_grace()) return nfserr_grace; status = nfsd_unlink(rqstp, &cstate->current_fh, 0, remove->rm_name, remove->rm_namelen); @@ -596,7 +596,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!cstate->save_fh.fh_dentry) return status; - if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags + if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK)) return nfserr_grace; status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1578d7a2667e..0cc7ff5d5ab5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -61,7 +61,6 @@ static time_t lease_time = 90; /* default lease time */ static time_t user_lease_time = 90; static time_t boot_time; -static int in_grace = 1; static u32 current_ownerid = 1; static u32 current_fileid = 1; static u32 current_delegid = 1; @@ -1640,7 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta case NFS4_OPEN_CLAIM_NULL: /* Let's not give out any delegations till everyone's * had the chance to reclaim theirs.... */ - if (nfs4_in_grace()) + if (locks_in_grace()) goto out; if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) goto out; @@ -1816,12 +1815,15 @@ out: return status; } +struct lock_manager nfsd4_manager = { +}; + static void -end_grace(void) +nfsd4_end_grace(void) { dprintk("NFSD: end of grace period\n"); nfsd4_recdir_purge_old(); - in_grace = 0; + locks_end_grace(&nfsd4_manager); } static time_t @@ -1838,8 +1840,8 @@ nfs4_laundromat(void) nfs4_lock_state(); dprintk("NFSD: laundromat service - starting\n"); - if (in_grace) - end_grace(); + if (locks_in_grace()) + nfsd4_end_grace(); list_for_each_safe(pos, next, &client_lru) { clp = list_entry(pos, struct nfs4_client, cl_lru); if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { @@ -1974,7 +1976,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) return nfserr_bad_stateid; else if (ONE_STATEID(stateid) && (flags & RD_STATE)) return nfs_ok; - else if (nfs4_in_grace()) { + else if (locks_in_grace()) { /* Answer in remaining cases depends on existance of * conflicting state; so we must wait out the grace period. */ return nfserr_grace; @@ -1993,7 +1995,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) static inline int io_during_grace_disallowed(struct inode *inode, int flags) { - return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) + return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) && mandatory_lock(inode); } @@ -2693,10 +2695,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, filp = lock_stp->st_vfs_file; status = nfserr_grace; - if (nfs4_in_grace() && !lock->lk_reclaim) + if (locks_in_grace() && !lock->lk_reclaim) goto out; status = nfserr_no_grace; - if (!nfs4_in_grace() && lock->lk_reclaim) + if (!locks_in_grace() && lock->lk_reclaim) goto out; locks_init_lock(&file_lock); @@ -2779,7 +2781,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, int error; __be32 status; - if (nfs4_in_grace()) + if (locks_in_grace()) return nfserr_grace; if (check_lock_length(lockt->lt_offset, lockt->lt_length)) @@ -3192,9 +3194,9 @@ __nfs4_state_start(void) unsigned long grace_time; boot_time = get_seconds(); - grace_time = get_nfs_grace_period(); + grace_time = get_nfs4_grace_period(); lease_time = user_lease_time; - in_grace = 1; + locks_start_grace(&nfsd4_manager); printk(KERN_INFO "NFSD: starting %ld-second grace period\n", grace_time/HZ); laundry_wq = create_singlethread_workqueue("nfsd4"); @@ -3213,12 +3215,6 @@ nfs4_state_start(void) return; } -int -nfs4_in_grace(void) -{ - return in_grace; -} - time_t nfs4_lease_time(void) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 9f540165a078..27cfa723b92a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -942,6 +942,14 @@ struct lock_manager_operations { int (*fl_change)(struct file_lock **, int); }; +struct lock_manager { + struct list_head list; +}; + +void locks_start_grace(struct lock_manager *); +void locks_end_grace(struct lock_manager *); +int locks_in_grace(void); + /* that will die - we need it for nfs_lock_info */ #include diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 3d25bcd139d1..1f0465c374dc 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -27,7 +27,6 @@ struct nlmsvc_binding { struct nfs_fh *, struct file **); void (*fclose)(struct file *); - unsigned long (*get_grace_period)(void); }; extern struct nlmsvc_binding * nlmsvc_ops; @@ -56,12 +55,4 @@ extern int nlmclnt_proc(struct nlm_host *host, int cmd, extern int lockd_up(int proto); extern void lockd_down(void); -unsigned long get_nfs_grace_period(void); - -#ifdef CONFIG_NFSD_V4 -unsigned long get_nfs4_grace_period(void); -#else -static inline unsigned long get_nfs4_grace_period(void) {return 0;} -#endif - #endif /* LINUX_LOCKD_BIND_H */ -- cgit v1.2.3 From b2b5028905226f85075a408b1118857c9aa48bb3 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 6 Feb 2008 13:59:23 -0500 Subject: lockd: move grace period checks to common code Do all the grace period checks in svclock.c. This simplifies the code a bit, and will ease some later changes. Signed-off-by: J. Bruce Fields --- fs/lockd/svc4proc.c | 15 ++------------- fs/lockd/svclock.c | 14 +++++++++++++- fs/lockd/svcproc.c | 15 ++------------- include/linux/lockd/lockd.h | 2 +- 4 files changed, 18 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 7ca617367b3e..f6f18fa5cf8b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -88,12 +88,6 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, dprintk("lockd: TEST4 called\n"); resp->cookie = argp->cookie; - /* Don't accept test requests during grace period */ - if (locks_in_grace()) { - resp->status = nlm_lck_denied_grace_period; - return rc; - } - /* Obtain client and file */ if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; @@ -122,12 +116,6 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; - /* Don't accept new lock requests during grace period */ - if (locks_in_grace() && !argp->reclaim) { - resp->status = nlm_lck_denied_grace_period; - return rc; - } - /* Obtain client and file */ if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; @@ -146,7 +134,8 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, /* Now try to lock the file */ resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, - argp->block, &argp->cookie); + argp->block, &argp->cookie, + argp->reclaim); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; else diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index cf0d5c2c318d..808d246ada4d 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -360,7 +360,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block) __be32 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_host *host, struct nlm_lock *lock, int wait, - struct nlm_cookie *cookie) + struct nlm_cookie *cookie, int reclaim) { struct nlm_block *block = NULL; int error; @@ -406,6 +406,11 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } + if (locks_in_grace() && !reclaim) { + ret = nlm_lck_denied_grace_period; + goto out; + } + if (!wait) lock->fl.fl_flags &= ~FL_SLEEP; error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); @@ -502,6 +507,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, goto out; } + if (locks_in_grace()) { + ret = nlm_lck_denied_grace_period; + goto out; + } error = vfs_test_lock(file->f_file, &lock->fl); if (error == FILE_LOCK_DEFERRED) { ret = nlmsvc_defer_lock_rqst(rqstp, block); @@ -582,6 +591,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) (long long)lock->fl.fl_start, (long long)lock->fl.fl_end); + if (locks_in_grace()) + return nlm_lck_denied_grace_period; + mutex_lock(&file->f_mutex); block = nlmsvc_lookup_block(file, lock); mutex_unlock(&file->f_mutex); diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 1b013e198804..a587b81338b1 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -117,12 +117,6 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, dprintk("lockd: TEST called\n"); resp->cookie = argp->cookie; - /* Don't accept test requests during grace period */ - if (locks_in_grace()) { - resp->status = nlm_lck_denied_grace_period; - return rc; - } - /* Obtain client and file */ if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; @@ -152,12 +146,6 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; - /* Don't accept new lock requests during grace period */ - if (locks_in_grace() && !argp->reclaim) { - resp->status = nlm_lck_denied_grace_period; - return rc; - } - /* Obtain client and file */ if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; @@ -176,7 +164,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, /* Now try to lock the file */ resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, - argp->block, &argp->cookie)); + argp->block, &argp->cookie, + argp->reclaim)); if (resp->status == nlm_drop_reply) rc = rpc_drop_reply; else diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ec8af115843d..973ab1d6e862 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -242,7 +242,7 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref); */ __be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, int, - struct nlm_cookie *); + struct nlm_cookie *, int); __be32 nlmsvc_unlock(struct nlm_file *, struct nlm_lock *); __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *, struct nlm_host *, struct nlm_lock *, -- cgit v1.2.3 From 0d3ebb9ae9f9c887518fd4c81a68084111d154d7 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 30 Sep 2008 13:06:13 -0500 Subject: svcrdma: Add Fast Reg MR Data Types Add data types to track Fast Reg Memory Regions. The core data type is svc_rdma_fastreg_mr that associates a device MR with a host kva and page list. A field is added to the WR context to keep track of the FRMR used to map the local memory for an RPC. An FRMR list and spin lock are added to the transport instance to keep track of all FRMR allocated for the transport. Also added are device capability flags to indicate what the memory registration capabilities are for the underlying device and whether or not fast memory registration is supported. Signed-off-by: Tom Tucker --- include/linux/sunrpc/svc_rdma.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index dc05b54bd3a3..49e458d98945 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -72,6 +72,7 @@ extern atomic_t rdma_stat_sq_prod; */ struct svc_rdma_op_ctxt { struct svc_rdma_op_ctxt *read_hdr; + struct svc_rdma_fastreg_mr *frmr; int hdr_count; struct xdr_buf arg; struct list_head dto_q; @@ -103,16 +104,30 @@ struct svc_rdma_chunk_sge { int start; /* sge no for this chunk */ int count; /* sge count for this chunk */ }; +struct svc_rdma_fastreg_mr { + struct ib_mr *mr; + void *kva; + struct ib_fast_reg_page_list *page_list; + int page_list_len; + unsigned long access_flags; + unsigned long map_len; + enum dma_data_direction direction; + struct list_head frmr_list; +}; struct svc_rdma_req_map { + struct svc_rdma_fastreg_mr *frmr; unsigned long count; union { struct kvec sge[RPCSVC_MAXPAGES]; struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; }; }; - +#define RDMACTXT_F_FAST_UNREG 1 #define RDMACTXT_F_LAST_CTXT 2 +#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ +#define SVCRDMA_DEVCAP_READ_W_INV 2 /* read w/ invalidate */ + struct svcxprt_rdma { struct svc_xprt sc_xprt; /* SVC transport structure */ struct rdma_cm_id *sc_cm_id; /* RDMA connection id */ @@ -136,6 +151,11 @@ struct svcxprt_rdma { struct ib_cq *sc_rq_cq; struct ib_cq *sc_sq_cq; struct ib_mr *sc_phys_mr; /* MR for server memory */ + u32 sc_dev_caps; /* distilled device caps */ + u32 sc_dma_lkey; /* local dma key */ + unsigned int sc_frmr_pg_list_len; + struct list_head sc_frmr_q; + spinlock_t sc_frmr_q_lock; spinlock_t sc_lock; /* transport lock */ -- cgit v1.2.3 From d7d204403b31beb83b1aefef7bd76f5209369555 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 12:50:21 -0400 Subject: lockd: Adjust nlmclnt_lookup_host() signature to accomodate non-AF_INET Pass a struct sockaddr * and a length to nlmclnt_lookup_host() to accomodate non-AF_INET family addresses. As a side benefit, eliminate the hostname_len argument, as the hostname is always NUL-terminated. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 5 ++--- fs/lockd/host.c | 32 +++++++++++++++++++++----------- include/linux/lockd/lockd.h | 9 +++++---- 3 files changed, 28 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 237224a3c420..9eaf306d15fa 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -58,10 +58,9 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) if (status < 0) return ERR_PTR(status); - host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address, + host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, nlm_init->protocol, nlm_version, - nlm_init->hostname, - strlen(nlm_init->hostname)); + nlm_init->hostname); if (host == NULL) { lockd_down(); return ERR_PTR(-ENOLCK); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 5876b0e4c0be..cbd2398e594c 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -264,32 +264,42 @@ nlm_destroy_host(struct nlm_host *host) kfree(host); } -/* - * Find an NLM server handle in the cache. If there is none, create it. +/** + * nlmclnt_lookup_host - Find an NLM host handle matching a remote server + * @sap: network address of server + * @salen: length of server address + * @protocol: transport protocol to use + * @version: NLM protocol version + * @hostname: '\0'-terminated hostname of server + * + * Returns an nlm_host structure that matches the passed-in + * [server address, transport protocol, NLM version, server hostname]. + * If one doesn't already exist in the host cache, a new handle is + * created and returned. */ -struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, - int proto, u32 version, - const char *hostname, - unsigned int hostname_len) +struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, + const size_t salen, + const unsigned short protocol, + const u32 version, const char *hostname) { const struct sockaddr source = { .sa_family = AF_UNSPEC, }; struct nlm_lookup_host_info ni = { .server = 0, - .sap = (struct sockaddr *)sin, - .salen = sizeof(*sin), - .protocol = proto, + .sap = sap, + .salen = salen, + .protocol = protocol, .version = version, .hostname = hostname, - .hostname_len = hostname_len, + .hostname_len = strlen(hostname), .src_sap = &source, .src_len = sizeof(source), }; dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, (hostname ? hostname : ""), version, - (proto == IPPROTO_UDP ? "udp" : "tcp")); + (protocol == IPPROTO_UDP ? "udp" : "tcp")); return nlm_lookup_host(&ni); } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 973ab1d6e862..90a996d2f005 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -215,10 +215,11 @@ void nlmclnt_next_cookie(struct nlm_cookie *); /* * Host cache */ -struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, - int proto, u32 version, - const char *hostname, - unsigned int hostname_len); +struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, + const size_t salen, + const unsigned short protocol, + const u32 version, + const char *hostname); struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *, unsigned int); struct rpc_clnt * nlm_bind_host(struct nlm_host *); -- cgit v1.2.3 From 6bfbe8af4674458e6d88aef8f0136bd1b8855b11 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 12:50:29 -0400 Subject: lockd: Adjust nlmsvc_lookup_host() to accomodate AF_INET6 addresses Fix up nlmsvc_lookup_host() to pass AF_INET6 source addresses to nlm_lookup_host(). Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/host.c | 47 ++++++++++++++++++++++++++++++++++++--------- include/linux/lockd/lockd.h | 5 +++-- 2 files changed, 41 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/fs/lockd/host.c b/fs/lockd/host.c index cbd2398e594c..9fd8889097b7 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -304,16 +304,33 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, return nlm_lookup_host(&ni); } -/* - * Find an NLM client handle in the cache. If there is none, create it. +/** + * nlmsvc_lookup_host - Find an NLM host handle matching a remote client + * @rqstp: incoming NLM request + * @hostname: name of client host + * @hostname_len: length of client hostname + * + * Returns an nlm_host structure that matches the [client address, + * transport protocol, NLM version, client hostname] of the passed-in + * NLM request. If one doesn't already exist in the host cache, a + * new handle is created and returned. + * + * Before possibly creating a new nlm_host, construct a sockaddr + * for a specific source address in case the local system has + * multiple network addresses. The family of the address in + * rq_daddr is guaranteed to be the same as the family of the + * address in rq_addr, so it's safe to use the same family for + * the source address. */ -struct nlm_host * -nlmsvc_lookup_host(struct svc_rqst *rqstp, - const char *hostname, unsigned int hostname_len) +struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, + const char *hostname, + const size_t hostname_len) { - const struct sockaddr_in source = { + struct sockaddr_in sin = { .sin_family = AF_INET, - .sin_addr = rqstp->rq_daddr.addr, + }; + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, }; struct nlm_lookup_host_info ni = { .server = 1, @@ -323,14 +340,26 @@ nlmsvc_lookup_host(struct svc_rqst *rqstp, .version = rqstp->rq_vers, .hostname = hostname, .hostname_len = hostname_len, - .src_sap = (struct sockaddr *)&source, - .src_len = sizeof(source), + .src_len = rqstp->rq_addrlen, }; dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, (int)hostname_len, hostname, rqstp->rq_vers, (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); + switch (ni.sap->sa_family) { + case AF_INET: + sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr; + ni.src_sap = (struct sockaddr *)&sin; + break; + case AF_INET6: + ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6); + ni.src_sap = (struct sockaddr *)&sin6; + break; + default: + return NULL; + } + return nlm_lookup_host(&ni); } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 90a996d2f005..16ff2e88f05d 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -220,8 +220,9 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, const unsigned short protocol, const u32 version, const char *hostname); -struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *, - unsigned int); +struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, + const char *hostname, + const size_t hostname_len); struct rpc_clnt * nlm_bind_host(struct nlm_host *); void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); -- cgit v1.2.3 From dcff09f124f71d1d4fe61eb63c79e52f488ac22e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 12:50:36 -0400 Subject: lockd: change nlmclnt_grant() to take a "struct sockaddr *" Adjust the signature and callers of nlmclnt_grant() to pass a "struct sockaddr *" instead of a "struct sockaddr_in *" in order to support IPv6 addresses. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 5 ++--- fs/lockd/svc4proc.c | 2 +- fs/lockd/svcproc.c | 2 +- include/linux/lockd/lockd.h | 3 ++- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 9eaf306d15fa..2976bf0f4147 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -141,7 +141,7 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) /* * The server lockd has called us back to tell us the lock was granted */ -__be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) +__be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) { const struct file_lock *fl = &lock->fl; const struct nfs_fh *fh = &lock->fh; @@ -165,8 +165,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock */ if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) continue; - if (!nlm_cmp_addr(nlm_addr(block->b_host), - (struct sockaddr *)addr)) + if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) continue; if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) continue; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index f6f18fa5cf8b..50ee8eb139ab 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -220,7 +220,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); + resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index a587b81338b1..935ce967a6a1 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -250,7 +250,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, resp->cookie = argp->cookie; dprintk("lockd: GRANTED called\n"); - resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); + resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); return rpc_success; } diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 16ff2e88f05d..e6b070979287 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -207,7 +207,8 @@ int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *); struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl); void nlmclnt_finish_block(struct nlm_wait *block); int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout); -__be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *); +__be32 nlmclnt_grant(const struct sockaddr *addr, + const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); void nlmclnt_next_cookie(struct nlm_cookie *); -- cgit v1.2.3 From b85e4676344fc4d7ec5e0f62c3d3712e48bbe223 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 12:50:44 -0400 Subject: lockd: Add helper to sanity check incoming NOTIFY requests lockd accepts SM_NOTIFY calls only from a privileged process on the local system. If lockd uses an AF_INET6 listener, the sender's address (ie the local rpc.statd) will be the IPv6 loopback address, not the IPv4 loopback address. Make sure the privilege test in nlmsvc_proc_sm_notify() and nlm4svc_proc_sm_notify() works for both AF_INET and AF_INET6 family addresses by refactoring the test into a helper and adding support for IPv6 addresses. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/svc4proc.c | 6 ++---- fs/lockd/svcproc.c | 6 ++---- include/linux/lockd/lockd.h | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 50ee8eb139ab..014f6ce48172 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -421,11 +421,9 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, { struct sockaddr_in saddr; - memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); - dprintk("lockd: SM_NOTIFY called\n"); - if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) - || ntohs(saddr.sin_port) >= 1024) { + + if (!nlm_privileged_requester(rqstp)) { char buf[RPC_MAX_ADDRBUFLEN]; printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", svc_print_addr(rqstp, buf, sizeof(buf))); diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 935ce967a6a1..548b0bb2b84d 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -453,11 +453,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, { struct sockaddr_in saddr; - memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); - dprintk("lockd: SM_NOTIFY called\n"); - if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) - || ntohs(saddr.sin_port) >= 1024) { + + if (!nlm_privileged_requester(rqstp)) { char buf[RPC_MAX_ADDRBUFLEN]; printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", svc_print_addr(rqstp, buf, sizeof(buf))); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index e6b070979287..b56d5aa9b194 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -277,6 +277,47 @@ static inline struct inode *nlmsvc_file_inode(struct nlm_file *file) return file->f_file->f_path.dentry->d_inode; } +static inline int __nlm_privileged_request4(const struct sockaddr *sap) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + return (sin->sin_addr.s_addr == htonl(INADDR_LOOPBACK)) && + (ntohs(sin->sin_port) < 1024); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline int __nlm_privileged_request6(const struct sockaddr *sap) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + return (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK) && + (ntohs(sin6->sin6_port) < 1024); +} +#else /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +static inline int __nlm_privileged_request6(const struct sockaddr *sap) +{ + return 0; +} +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ + +/* + * Ensure incoming requests are from local privileged callers. + * + * Return TRUE if sender is local and is connecting via a privileged port; + * otherwise return FALSE. + */ +static inline int nlm_privileged_requester(const struct svc_rqst *rqstp) +{ + const struct sockaddr *sap = svc_addr(rqstp); + + switch (sap->sa_family) { + case AF_INET: + return __nlm_privileged_request4(sap); + case AF_INET6: + return __nlm_privileged_request6(sap); + default: + return 0; + } +} + static inline int __nlm_cmp_addr4(const struct sockaddr *sap1, const struct sockaddr *sap2) { -- cgit v1.2.3 From 9a38a83880c224c6a3fd973ac9ae30a043487f0f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 12:50:51 -0400 Subject: lockd: Remove unused fields in the nlm_reboot structure The nlm_reboot structure is used to store information provided by the NSM_NOTIFY procedure. This procedure is not specified by the NLM or NSM protocols, other than to say that the procedure can be used to transmit information private to a particular NLM/NSM implementation. For Linux, the callback arguments include the name of the monitored host, the new NSM state of the host, and a 16-byte private opaque. As a clean up, remove the unused fields and the server-side XDR logic that decodes them. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/xdr.c | 2 -- fs/lockd/xdr4.c | 2 -- include/linux/lockd/xdr.h | 2 -- 3 files changed, 6 deletions(-) (limited to 'include') diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 3e459e18cc31..1f226290c67c 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c @@ -351,8 +351,6 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp) argp->state = ntohl(*p++); /* Preserve the address in network byte order */ argp->addr = *p++; - argp->vers = *p++; - argp->proto = *p++; return xdr_argsize_check(rqstp, p); } diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 43ff9397e6c6..50c493a8ad8e 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -358,8 +358,6 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp argp->state = ntohl(*p++); /* Preserve the address in network byte order */ argp->addr = *p++; - argp->vers = *p++; - argp->proto = *p++; return xdr_argsize_check(rqstp, p); } diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index df18fa053bcd..d6b3a802c046 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -81,8 +81,6 @@ struct nlm_reboot { unsigned int len; u32 state; __be32 addr; - __be32 vers; - __be32 proto; }; /* -- cgit v1.2.3 From 26a414092353590ceaa5955bcb53f863d6ea7549 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 17:15:30 -0400 Subject: NLM: Remove "proto" argument from lockd_up() Clean up: Now that lockd_up() starts listeners for both transports, the "proto" argument is no longer needed. Signed-off-by: Chuck Lever Cc: Neil Brown Signed-off-by: J. Bruce Fields --- fs/lockd/clntlock.c | 4 ++-- fs/lockd/svc.c | 3 +-- fs/nfsd/nfsctl.c | 5 ++--- fs/nfsd/nfssvc.c | 19 +++++++------------ include/linux/lockd/bind.h | 2 +- 5 files changed, 13 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 2976bf0f4147..8307dd64bf46 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -54,7 +54,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; int status; - status = lockd_up(nlm_init->protocol); + status = lockd_up(); if (status < 0) return ERR_PTR(status); @@ -215,7 +215,7 @@ reclaimer(void *ptr) /* This one ensures that our parent doesn't terminate while the * reclaim is in progress */ lock_kernel(); - lockd_up(0); /* note: this cannot fail as lockd is already running */ + lockd_up(); /* note: this cannot fail as lockd is already running */ dprintk("lockd: reclaiming locks for host %s\n", host->h_name); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 36396fc058c5..c631a83931ce 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -230,8 +230,7 @@ static int make_socks(struct svc_serv *serv) /* * Bring up the lockd process if it's not already up. */ -int -lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ +int lockd_up(void) { struct svc_serv *serv; int error = 0; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c53e65f8f3a2..862dff5247f7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -614,10 +614,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) return -EINVAL; err = nfsd_create_serv(); if (!err) { - int proto = 0; - err = svc_addsock(nfsd_serv, fd, buf, &proto); + err = svc_addsock(nfsd_serv, fd, buf, NULL); if (err >= 0) { - err = lockd_up(proto); + err = lockd_up(); if (err < 0) svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 7f3d76a7839d..59eeb46f82c5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -244,25 +244,20 @@ static int nfsd_init_socks(int port) if (!list_empty(&nfsd_serv->sv_permsocks)) return 0; - error = lockd_up(IPPROTO_UDP); - if (error >= 0) { - error = svc_create_xprt(nfsd_serv, "udp", port, + error = svc_create_xprt(nfsd_serv, "udp", port, SVC_SOCK_DEFAULTS); - if (error < 0) - lockd_down(); - } if (error < 0) return error; - error = lockd_up(IPPROTO_TCP); - if (error >= 0) { - error = svc_create_xprt(nfsd_serv, "tcp", port, + error = svc_create_xprt(nfsd_serv, "tcp", port, SVC_SOCK_DEFAULTS); - if (error < 0) - lockd_down(); - } if (error < 0) return error; + + error = lockd_up(); + if (error < 0) + return error; + return 0; } diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 1f0465c374dc..e5872dc994c0 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -52,7 +52,7 @@ extern void nlmclnt_done(struct nlm_host *host); extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl); -extern int lockd_up(int proto); +extern int lockd_up(void); extern void lockd_down(void); #endif /* LINUX_LOCKD_BIND_H */ -- cgit v1.2.3 From 2937391385807b3da9cd7a39345259caf550b032 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 3 Oct 2008 17:15:38 -0400 Subject: NLM: Remove unused argument from svc_addsock() function Clean up: The svc_addsock() function no longer uses its "proto" argument, so remove it. Signed-off-by: Chuck Lever Cc: Neil Brown Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 2 +- include/linux/sunrpc/svcsock.h | 5 +---- net/sunrpc/svcsock.c | 4 +--- 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 862dff5247f7..97543df58242 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -614,7 +614,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) return -EINVAL; err = nfsd_create_serv(); if (!err) { - err = svc_addsock(nfsd_serv, fd, buf, NULL); + err = svc_addsock(nfsd_serv, fd, buf); if (err >= 0) { err = lockd_up(); if (err < 0) diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 8cff696dedf5..483e10380aae 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -39,10 +39,7 @@ int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); int svc_sock_names(char *buf, struct svc_serv *serv, char *toclose); -int svc_addsock(struct svc_serv *serv, - int fd, - char *name_return, - int *proto); +int svc_addsock(struct svc_serv *serv, int fd, char *name_return); void svc_init_xprt_sock(void); void svc_cleanup_xprt_sock(void); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f91377c14951..95293f549e9c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1167,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, int svc_addsock(struct svc_serv *serv, int fd, - char *name_return, - int *proto) + char *name_return) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1203,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv, sockfd_put(so); return err; } - if (proto) *proto = so->sk->sk_protocol; return one_sock_name(name_return, svsk); } EXPORT_SYMBOL_GPL(svc_addsock); -- cgit v1.2.3 From 64be8608c163bd480cf5ec4b34366f11e0f3c87f Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Mon, 6 Oct 2008 14:45:18 -0500 Subject: svcrdma: Add FRMR get/put services Add services for the allocating, freeing, and unmapping Fast Reg MR. These services will be used by the transport connection setup, send and receive routines. Signed-off-by: Tom Tucker --- include/linux/sunrpc/svc_rdma.h | 3 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 116 +++++++++++++++++++++++++++++-- 2 files changed, 114 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 49e458d98945..34252683671c 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -214,6 +214,9 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); +extern void svc_rdma_put_frmr(struct svcxprt_rdma *, + struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); extern struct svc_xprt_class svc_rdma_class; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 900cb69728c6..f0b5c5f2f629 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -100,6 +100,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) ctxt->xprt = xprt; INIT_LIST_HEAD(&ctxt->dto_q); ctxt->count = 0; + ctxt->frmr = NULL; atomic_inc(&xprt->sc_ctxt_used); return ctxt; } @@ -109,11 +110,19 @@ static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) struct svcxprt_rdma *xprt = ctxt->xprt; int i; for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_single(xprt->sc_cm_id->device, - ctxt->sge[i].addr, - ctxt->sge[i].length, - ctxt->direction); + /* + * Unmap the DMA addr in the SGE if the lkey matches + * the sc_dma_lkey, otherwise, ignore it since it is + * an FRMR lkey and will be unmapped later when the + * last WR that uses it completes. + */ + if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(xprt->sc_cm_id->device, + ctxt->sge[i].addr, + ctxt->sge[i].length, + ctxt->direction); + } } } @@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } map->count = 0; + map->frmr = NULL; return map; } @@ -425,10 +435,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); + INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); + spin_lock_init(&cma_xprt->sc_frmr_q_lock); cma_xprt->sc_ord = svcrdma_ord; @@ -686,6 +698,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, return ERR_PTR(ret); } +static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) +{ + struct ib_mr *mr; + struct ib_fast_reg_page_list *pl; + struct svc_rdma_fastreg_mr *frmr; + + frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); + if (!frmr) + goto err; + + mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); + if (!mr) + goto err_free_frmr; + + pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, + RPCSVC_MAXPAGES); + if (!pl) + goto err_free_mr; + + frmr->mr = mr; + frmr->page_list = pl; + INIT_LIST_HEAD(&frmr->frmr_list); + return frmr; + + err_free_mr: + ib_dereg_mr(mr); + err_free_frmr: + kfree(frmr); + err: + return ERR_PTR(-ENOMEM); +} + +static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_fastreg_mr *frmr; + + while (!list_empty(&xprt->sc_frmr_q)) { + frmr = list_entry(xprt->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + ib_dereg_mr(frmr->mr); + ib_free_fast_reg_page_list(frmr->page_list); + kfree(frmr); + } +} + +struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) +{ + struct svc_rdma_fastreg_mr *frmr = NULL; + + spin_lock_bh(&rdma->sc_frmr_q_lock); + if (!list_empty(&rdma->sc_frmr_q)) { + frmr = list_entry(rdma->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + frmr->map_len = 0; + frmr->page_list_len = 0; + } + spin_unlock_bh(&rdma->sc_frmr_q_lock); + if (frmr) + return frmr; + + return rdma_alloc_frmr(rdma); +} + +static void frmr_unmap_dma(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + int page_no; + for (page_no = 0; page_no < frmr->page_list_len; page_no++) { + dma_addr_t addr = frmr->page_list->page_list[page_no]; + if (ib_dma_mapping_error(frmr->mr->device, addr)) + continue; + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, + frmr->direction); + } +} + +void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, + struct svc_rdma_fastreg_mr *frmr) +{ + if (frmr) { + frmr_unmap_dma(rdma, frmr); + spin_lock_bh(&rdma->sc_frmr_q_lock); + BUG_ON(!list_empty(&frmr->frmr_list)); + list_add(&frmr->frmr_list, &rdma->sc_frmr_q); + spin_unlock_bh(&rdma->sc_frmr_q_lock); + } +} + /* * This is the xpo_recvfrom function for listening endpoints. Its * purpose is to accept incoming connections. The CMA callback handler @@ -961,6 +1064,9 @@ static void __svc_rdma_free(struct work_struct *work) WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + /* De-allocate fastreg mr */ + rdma_dealloc_frmr_q(rdma); + /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_destroy_qp(rdma->sc_qp); -- cgit v1.2.3 From e1183210625cc8e02ce13eec78fb7a246567fc59 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Fri, 3 Oct 2008 15:22:18 -0500 Subject: svcrdma: Add a service to register a Fast Reg MR with the device Fast Reg MR introduces a new WR type. Add a service to register the region with the adapter and update the completion handling to support completions with a NULL WR context. Signed-off-by: Tom Tucker --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_transport.c | 111 +++++++++++++++++++++---------- 2 files changed, 77 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 34252683671c..1402d193b393 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -214,6 +214,7 @@ extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); +extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index a8ec4b1eec58..c3e8db0eeb3b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -325,6 +325,45 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) svc_xprt_enqueue(&xprt->sc_xprt); } +/* + * Processs a completion context + */ +static void process_context(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt) +{ + svc_rdma_unmap_dma(ctxt); + + switch (ctxt->wr_op) { + case IB_WR_SEND: + svc_rdma_put_context(ctxt, 1); + break; + + case IB_WR_RDMA_WRITE: + svc_rdma_put_context(ctxt, 0); + break; + + case IB_WR_RDMA_READ: + if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { + struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; + BUG_ON(!read_hdr); + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + svc_xprt_enqueue(&xprt->sc_xprt); + } + svc_rdma_put_context(ctxt, 0); + break; + + default: + printk(KERN_ERR "svcrdma: unexpected completion type, " + "opcode=%d\n", + ctxt->wr_op); + break; + } +} + /* * Send Queue Completion Handler - potentially called on interrupt context. * @@ -337,17 +376,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; int ret; - if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - xprt = ctxt->xprt; - - svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -356,35 +390,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); - switch (ctxt->wr_op) { - case IB_WR_SEND: - svc_rdma_put_context(ctxt, 1); - break; - - case IB_WR_RDMA_WRITE: - svc_rdma_put_context(ctxt, 0); - break; - - case IB_WR_RDMA_READ: - if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { - struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - spin_lock_bh(&xprt->sc_rq_dto_lock); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_rq_dto_lock); - svc_xprt_enqueue(&xprt->sc_xprt); - } - svc_rdma_put_context(ctxt, 0); - break; + ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; + if (ctxt) + process_context(xprt, ctxt); - default: - printk(KERN_ERR "svcrdma: unexpected completion type, " - "opcode=%d, status=%d\n", - wc.opcode, wc.status); - break; - } svc_xprt_put(&xprt->sc_xprt); } @@ -1184,6 +1193,40 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +/* + * Attempt to register the kvec representing the RPC memory with the + * device. + * + * Returns: + * NULL : The device does not support fastreg or there were no more + * fastreg mr. + * frmr : The kvec register request was successfully posted. + * <0 : An error was encountered attempting to register the kvec. + */ +int svc_rdma_fastreg(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + struct ib_send_wr fastreg_wr; + u8 key; + + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof fastreg_wr); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + return svc_rdma_send(xprt, &fastreg_wr); +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { struct ib_send_wr *bad_wr; @@ -1193,8 +1236,6 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); - BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != - wr->opcode); /* If the SQ is full, wait until an SQ entry is available */ while (1) { spin_lock_bh(&xprt->sc_lock); -- cgit v1.2.3 From 146b6df6a537939570c5772ebd7db826fdbd5d82 Mon Sep 17 00:00:00 2001 From: Tom Tucker Date: Tue, 12 Aug 2008 15:12:10 -0500 Subject: svcrdma: Modify the RPC recv path to use FRMR when available RPCRDMA requests that specify a read-list are fetched with RDMA_READ. Using an FRMR to map the data sink improves NFSRDMA security on transports that place the RDMA_READ data sink LKEY on the wire because the valid lifetime of the MR is only the duration of the RDMA_READ. The LKEY is invalidated when the last RDMA_READ WR completes. Mapping the data sink also allows for very large amounts to data to be fetched with a single WR, so if the client is also using FRMR, the entire RPC read-list can be fetched with a single WR. Signed-off-by: Tom Tucker --- include/linux/sunrpc/svc_rdma.h | 1 + net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 187 +++++++++++++++++++++++++++---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 5 +- 3 files changed, 171 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 1402d193b393..c14fe86dac59 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -212,6 +212,7 @@ extern int svc_rdma_post_recv(struct svcxprt_rdma *); extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); +extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); extern struct svc_rdma_req_map *svc_rdma_get_req_map(void); extern void svc_rdma_put_req_map(struct svc_rdma_req_map *); extern int svc_rdma_fastreg(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 74de31a06616..a4756576d687 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, * * Assumptions: * - chunk[0]->position points to pages[0] at an offset of 0 - * - pages[] is not physically or virtually contigous and consists of + * - pages[] is not physically or virtually contiguous and consists of * PAGE_SIZE elements. * * Output: @@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, * chunk in the read list * */ -static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, +static int map_read_chunks(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, struct rpcrdma_msg *rmsgp, @@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, return sge_no; } -static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, - struct svc_rdma_op_ctxt *ctxt, - struct kvec *vec, - u64 *sgl_offset, - int count) +/* Map a read-chunk-list to an XDR and fast register the page-list. + * + * Assumptions: + * - chunk[0] position points to pages[0] at an offset of 0 + * - pages[] will be made physically contiguous by creating a one-off memory + * region using the fastreg verb. + * - byte_count is # of bytes in read-chunk-list + * - ch_count is # of chunks in read-chunk-list + * + * Output: + * - sge array pointing into pages[] array. + * - chunk_sge array specifying sge index and count for each + * chunk in the read list + */ +static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + struct rpcrdma_msg *rmsgp, + struct svc_rdma_req_map *rpl_map, + struct svc_rdma_req_map *chl_map, + int ch_count, + int byte_count) +{ + int page_no; + int ch_no; + u32 offset; + struct rpcrdma_read_chunk *ch; + struct svc_rdma_fastreg_mr *frmr; + int ret = 0; + + frmr = svc_rdma_get_frmr(xprt); + if (IS_ERR(frmr)) + return -ENOMEM; + + head->frmr = frmr; + head->arg.head[0] = rqstp->rq_arg.head[0]; + head->arg.tail[0] = rqstp->rq_arg.tail[0]; + head->arg.pages = &head->pages[head->count]; + head->hdr_count = head->count; /* save count of hdr pages */ + head->arg.page_base = 0; + head->arg.page_len = byte_count; + head->arg.len = rqstp->rq_arg.len + byte_count; + head->arg.buflen = rqstp->rq_arg.buflen + byte_count; + + /* Fast register the page list */ + frmr->kva = page_address(rqstp->rq_arg.pages[0]); + frmr->direction = DMA_FROM_DEVICE; + frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); + frmr->map_len = byte_count; + frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; + for (page_no = 0; page_no < frmr->page_list_len; page_no++) { + frmr->page_list->page_list[page_no] = + ib_dma_map_single(xprt->sc_cm_id->device, + page_address(rqstp->rq_arg.pages[page_no]), + PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + atomic_inc(&xprt->sc_dma_used); + head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; + } + head->count += page_no; + + /* rq_respages points one past arg pages */ + rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; + + /* Create the reply and chunk maps */ + offset = 0; + ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + for (ch_no = 0; ch_no < ch_count; ch_no++) { + rpl_map->sge[ch_no].iov_base = frmr->kva + offset; + rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; + chl_map->ch[ch_no].count = 1; + chl_map->ch[ch_no].start = ch_no; + offset += ch->rc_target.rs_length; + ch++; + } + + ret = svc_rdma_fastreg(xprt, frmr); + if (ret) + goto fatal_err; + + return ch_no; + + fatal_err: + printk("svcrdma: error fast registering xdr for xprt %p", xprt); + svc_rdma_put_frmr(xprt, frmr); + return -EIO; +} + +static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt, + struct svc_rdma_fastreg_mr *frmr, + struct kvec *vec, + u64 *sgl_offset, + int count) { int i; ctxt->count = count; ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { - atomic_inc(&xprt->sc_dma_used); - ctxt->sge[i].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - vec[i].iov_base, vec[i].iov_len, - DMA_FROM_DEVICE); + ctxt->sge[i].length = 0; /* in case map fails */ + if (!frmr) { + ctxt->sge[i].addr = + ib_dma_map_single(xprt->sc_cm_id->device, + vec[i].iov_base, + vec[i].iov_len, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + ctxt->sge[i].addr)) + return -EINVAL; + ctxt->sge[i].lkey = xprt->sc_dma_lkey; + atomic_inc(&xprt->sc_dma_used); + } else { + ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; + ctxt->sge[i].lkey = frmr->mr->lkey; + } ctxt->sge[i].length = vec[i].iov_len; - ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey; *sgl_offset = *sgl_offset + vec[i].iov_len; } + return 0; } static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) @@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; + struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; @@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; - sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, - ch_count, byte_count); + + if (!xprt->sc_frmr_pg_list_len) + sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, + rpl_map, chl_map, ch_count, + byte_count); + else + sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, + rpl_map, chl_map, ch_count, + byte_count); + if (sge_count < 0) { + err = -EIO; + goto out; + } + sgl_offset = 0; ch_no = 0; @@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; + ctxt->frmr = hdr_ctxt->frmr; + ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare READ WR */ memset(&read_wr, 0, sizeof read_wr); - ctxt->wr_op = IB_WR_RDMA_READ; read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; read_wr.wr.rdma.remote_addr = @@ -327,10 +444,15 @@ next_sge: read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); - rdma_set_ctxt_sge(xprt, ctxt, - &rpl_map->sge[chl_map->ch[ch_no].start], - &sgl_offset, - read_wr.num_sge); + err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, + &rpl_map->sge[chl_map->ch[ch_no].start], + &sgl_offset, + read_wr.num_sge); + if (err) { + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + goto out; + } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* @@ -339,6 +461,29 @@ next_sge: * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + if (hdr_ctxt->frmr) { + set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + /* + * Invalidate the local MR used to map the data + * sink. + */ + if (xprt->sc_dev_caps & + SVCRDMA_DEVCAP_READ_W_INV) { + read_wr.opcode = + IB_WR_RDMA_READ_WITH_INV; + ctxt->wr_op = read_wr.opcode; + read_wr.ex.invalidate_rkey = + ctxt->frmr->mr->lkey; + } else { + /* Prepare INVALIDATE WR */ + memset(&inv_wr, 0, sizeof inv_wr); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = + hdr_ctxt->frmr->mr->lkey; + read_wr.next = &inv_wr; + } + } ctxt->read_hdr = hdr_ctxt; } /* Post the read */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f22f58767661..fb0dff5e53ea 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -105,7 +105,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) return ctxt; } -static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) +void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) { struct svcxprt_rdma *xprt = ctxt->xprt; int i; @@ -343,9 +343,12 @@ static void process_context(struct svcxprt_rdma *xprt, break; case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; BUG_ON(!read_hdr); + if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) + svc_rdma_put_frmr(xprt, ctxt->frmr); spin_lock_bh(&xprt->sc_rq_dto_lock); set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); list_add_tail(&read_hdr->dto_q, -- cgit v1.2.3