diff options
36 files changed, 2359 insertions, 1163 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7936b801fe6a..c08813dbfce2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2395,6 +2395,18 @@ and is between 256 and 4096 characters. It is defined in the file stifb= [HW] Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]] + sunrpc.min_resvport= + sunrpc.max_resvport= + [NFS,SUNRPC] + SunRPC servers often require that client requests + originate from a privileged port (i.e. a port in the + range 0 < portnr < 1024). + An administrator who wishes to reserve some of these + ports for other uses may adjust the range that the + kernel's sunrpc client considers to be privileged + using these two parameters to set the minimum and + maximum port values. + sunrpc.pool_mode= [NFS] Control how the NFS server code allocates CPUs to @@ -2411,6 +2423,15 @@ and is between 256 and 4096 characters. It is defined in the file pernode one pool for each NUMA node (equivalent to global on non-NUMA machines) + sunrpc.tcp_slot_table_entries= + sunrpc.udp_slot_table_entries= + [NFS,SUNRPC] + Sets the upper limit on the number of simultaneous + RPC calls that can be sent from the client to a + server. Increasing these values may allow you to + improve throughput, but will also increase the + amount of memory reserved for use by the client. + swiotlb= [IA-64] Number of I/O TLB slabs switches= [HW,M68k] diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 99d737bd4325..7cb076ac6b45 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -87,18 +87,6 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap) return hash & (NLM_HOST_NRHASH - 1); } -static void nlm_clear_port(struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = 0; - break; - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = 0; - break; - } -} - /* * Common host lookup routine for server & client */ @@ -177,7 +165,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) host->h_addrbuf = nsm->sm_addrbuf; memcpy(nlm_addr(host), ni->sap, ni->salen); host->h_addrlen = ni->salen; - nlm_clear_port(nlm_addr(host)); + rpc_set_port(nlm_addr(host), 0); memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); host->h_version = ni->version; host->h_proto = ni->protocol; diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 7fce1b525849..30c933188dd7 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -61,43 +61,6 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm) return (struct sockaddr *)&nsm->sm_addr; } -static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf, - const size_t len) -{ - const struct sockaddr_in *sin = (struct sockaddr_in *)sap; - snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr); -} - -static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf, - const size_t len) -{ - const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - - if (ipv6_addr_v4mapped(&sin6->sin6_addr)) - snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]); - else if (sin6->sin6_scope_id != 0) - snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr, - sin6->sin6_scope_id); - else - snprintf(buf, len, "%pI6", &sin6->sin6_addr); -} - -static void nsm_display_address(const struct sockaddr *sap, - char *buf, const size_t len) -{ - switch (sap->sa_family) { - case AF_INET: - nsm_display_ipv4_address(sap, buf, len); - break; - case AF_INET6: - nsm_display_ipv6_address(sap, buf, len); - break; - default: - snprintf(buf, len, "unsupported address family"); - break; - } -} - static struct rpc_clnt *nsm_create(void) { struct sockaddr_in sin = { @@ -307,8 +270,11 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap, memcpy(nsm_addr(new), sap, salen); new->sm_addrlen = salen; nsm_init_private(new); - nsm_display_address((const struct sockaddr *)&new->sm_addr, - new->sm_addrbuf, sizeof(new->sm_addrbuf)); + + if (rpc_ntop(nsm_addr(new), new->sm_addrbuf, + sizeof(new->sm_addrbuf)) == 0) + (void)snprintf(new->sm_addrbuf, sizeof(new->sm_addrbuf), + "unsupported address family"); memcpy(new->sm_name, hostname, hostname_len); new->sm_name[hostname_len] = '\0'; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 7f604c7941fb..293fa0528a6e 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -43,21 +43,29 @@ static struct svc_program nfs4_callback_program; unsigned int nfs_callback_set_tcpport; unsigned short nfs_callback_tcpport; unsigned short nfs_callback_tcpport6; -static const int nfs_set_port_min = 0; -static const int nfs_set_port_max = 65535; +#define NFS_CALLBACK_MAXPORTNR (65535U) -static int param_set_port(const char *val, struct kernel_param *kp) +static int param_set_portnr(const char *val, struct kernel_param *kp) { - char *endp; - int num = simple_strtol(val, &endp, 0); - if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) return -EINVAL; - *((int *)kp->arg) = num; + *((unsigned int *)kp->arg) = num; return 0; } -module_param_call(callback_tcpport, param_set_port, param_get_int, - &nfs_callback_set_tcpport, 0644); +static int param_get_portnr(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_portnr(name, p) __param_check(name, p, unsigned int); + +module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); /* * This is the NFSv4 callback kernel thread. diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8d25ccb2d51d..d36925f9b952 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; server->options = data->options; + server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| + NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| + NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); @@ -1074,10 +1077,6 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - spin_lock(&nfs_client_lock); list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); list_add_tail(&server->master_link, &nfs_volume_list); @@ -1274,7 +1273,7 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; - server->caps |= NFS_CAP_ATOMIC_OPEN; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; server->options = data->options; /* Get a client record */ @@ -1359,10 +1358,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) server->namelen = NFS4_MAXNAMLEN; - BUG_ON(!server->nfs_client); - BUG_ON(!server->nfs_client->rpc_ops); - BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); - spin_lock(&nfs_client_lock); list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); list_add_tail(&server->master_link, &nfs_volume_list); @@ -1400,7 +1395,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Initialise the client representation from the parent server */ nfs_server_copy_userdata(server, parent_server); - server->caps |= NFS_CAP_ATOMIC_OPEN; + server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR; /* Get a client representation. * Note: NFSv4 always uses TCP, */ diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 05062329b678..5021b75d2d1e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -328,6 +328,42 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync) } /* + * Decide whether a read/modify/write cycle may be more efficient + * then a modify/write/read cycle when writing to a page in the + * page cache. + * + * The modify/write/read cycle may occur if a page is read before + * being completely filled by the writer. In this situation, the + * page must be completely written to stable storage on the server + * before it can be refilled by reading in the page from the server. + * This can lead to expensive, small, FILE_SYNC mode writes being + * done. + * + * It may be more efficient to read the page first if the file is + * open for reading in addition to writing, the page is not marked + * as Uptodate, it is not dirty or waiting to be committed, + * indicating that it was previously allocated and then modified, + * that there were valid bytes of data in that range of the file, + * and that the new data won't completely replace the old data in + * that range of the file. + */ +static int nfs_want_read_modify_write(struct file *file, struct page *page, + loff_t pos, unsigned len) +{ + unsigned int pglen = nfs_page_length(page); + unsigned int offset = pos & (PAGE_CACHE_SIZE - 1); + unsigned int end = offset + len; + + if ((file->f_mode & FMODE_READ) && /* open for read? */ + !PageUptodate(page) && /* Uptodate? */ + !PagePrivate(page) && /* i/o request already? */ + pglen && /* valid bytes of file? */ + (end < pglen || offset)) /* replace all valid bytes? */ + return 1; + return 0; +} + +/* * This does the "real" work of the write. We must allocate and lock the * page to be sent back to the generic routine, which then copies the * data from user space. @@ -340,15 +376,16 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { int ret; - pgoff_t index; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; - index = pos >> PAGE_CACHE_SHIFT; + int once_thru = 0; dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", file->f_path.dentry->d_parent->d_name.name, file->f_path.dentry->d_name.name, mapping->host->i_ino, len, (long long) pos); +start: /* * Prevent starvation issues if someone is doing a consistency * sync-to-disk @@ -367,6 +404,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, if (ret) { unlock_page(page); page_cache_release(page); + } else if (!once_thru && + nfs_want_read_modify_write(file, page, pos, len)) { + once_thru = 1; + ret = nfs_readpage(file, page); + page_cache_release(page); + if (!ret) + goto start; } return ret; } @@ -479,6 +523,7 @@ const struct address_space_operations nfs_file_aops = { .invalidatepage = nfs_invalidate_page, .releasepage = nfs_release_page, .direct_IO = nfs_direct_IO, + .migratepage = nfs_migrate_page, .launder_page = nfs_launder_page, }; diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 86147b0ab2cf..21a84d45916f 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); static unsigned int fnvhash32(const void *, size_t); -static struct rpc_pipe_ops idmap_upcall_ops = { +static const struct rpc_pipe_ops idmap_upcall_ops = { .upcall = idmap_pipe_upcall, .downcall = idmap_pipe_downcall, .destroy_msg = idmap_pipe_destroy_msg, @@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp) if (idmap == NULL) return -ENOMEM; - idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", - idmap, &idmap_upcall_ops, 0); + idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry, + "idmap", idmap, &idmap_upcall_ops, 0); if (IS_ERR(idmap->idmap_dentry)) { error = PTR_ERR(idmap->idmap_dentry); kfree(idmap); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bd7938eda6a8..fe5a8b45d867 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -286,6 +286,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) /* We can't support update_atime(), since the server will reset it */ inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; + if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 + && nfs_server_capable(inode, NFS_CAP_MODE)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ @@ -330,20 +335,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->attr_gencount = fattr->gencount; if (fattr->valid & NFS_ATTR_FATTR_ATIME) inode->i_atime = fattr->atime; + else if (nfs_server_capable(inode, NFS_CAP_ATIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_MTIME) inode->i_mtime = fattr->mtime; + else if (nfs_server_capable(inode, NFS_CAP_MTIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA; if (fattr->valid & NFS_ATTR_FATTR_CTIME) inode->i_ctime = fattr->ctime; + else if (nfs_server_capable(inode, NFS_CAP_CTIME)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_CHANGE) nfsi->change_attr = fattr->change_attr; + else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA; if (fattr->valid & NFS_ATTR_FATTR_SIZE) inode->i_size = nfs_size_to_loff_t(fattr->size); + else + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_REVAL_PAGECACHE; if (fattr->valid & NFS_ATTR_FATTR_NLINK) inode->i_nlink = fattr->nlink; + else if (nfs_server_capable(inode, NFS_CAP_NLINK)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (fattr->valid & NFS_ATTR_FATTR_OWNER) inode->i_uid = fattr->uid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_GROUP) inode->i_gid = fattr->gid; + else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) + nfsi->cache_validity |= NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL; if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) inode->i_blocks = fattr->du.nfs2.blocks; if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { @@ -1145,6 +1176,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) loff_t cur_isize, new_isize; unsigned long invalid = 0; unsigned long now = jiffies; + unsigned long save_cache_validity; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __func__, inode->i_sb->s_id, inode->i_ino, @@ -1171,10 +1203,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ nfsi->read_cache_jiffies = fattr->time_start; - if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_PAGECACHE); + save_cache_validity = nfsi->cache_validity; + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED + | NFS_INO_REVAL_PAGECACHE); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); @@ -1189,7 +1222,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); nfsi->change_attr = fattr->change_attr; } - } + } else if (server->caps & NFS_CAP_CHANGE_ATTR) + invalid |= save_cache_validity; if (fattr->valid & NFS_ATTR_FATTR_MTIME) { /* NFSv2/v3: Check if the mtime agrees */ @@ -1201,7 +1235,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_force_lookup_revalidate(inode); memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); } - } + } else if (server->caps & NFS_CAP_MTIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_DATA + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); + if (fattr->valid & NFS_ATTR_FATTR_CTIME) { /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { @@ -1215,7 +1254,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); } - } + } else if (server->caps & NFS_CAP_CTIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); /* Check if our cached file size is stale */ if (fattr->valid & NFS_ATTR_FATTR_SIZE) { @@ -1231,30 +1274,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) dprintk("NFS: isize change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } - } + } else + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_PAGECACHE + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_ATIME) memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + else if (server->caps & NFS_CAP_ATIME) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_MODE) { if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; } - } + } else if (server->caps & NFS_CAP_MODE) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + if (fattr->valid & NFS_ATTR_FATTR_OWNER) { if (inode->i_uid != fattr->uid) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_uid = fattr->uid; } - } + } else if (server->caps & NFS_CAP_OWNER) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); + if (fattr->valid & NFS_ATTR_FATTR_GROUP) { if (inode->i_gid != fattr->gid) { invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_gid = fattr->gid; } - } + } else if (server->caps & NFS_CAP_OWNER_GROUP) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_INVALID_ACCESS + | NFS_INO_INVALID_ACL + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_NLINK) { if (inode->i_nlink != fattr->nlink) { @@ -1263,7 +1326,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) invalid |= NFS_INO_INVALID_DATA; inode->i_nlink = fattr->nlink; } - } + } else if (server->caps & NFS_CAP_NLINK) + invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR + | NFS_INO_REVAL_FORCED); if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { /* @@ -1293,9 +1358,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; if (!nfs_have_delegation(inode, FMODE_READ) || - (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) + (save_cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; - nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED; return 0; out_changed: diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7dd90a6769d0..2e485677019c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -102,6 +102,7 @@ struct nfs_mount_request { }; extern int nfs_mount(struct nfs_mount_request *info); +extern void nfs_umount(const struct nfs_mount_request *info); /* client.c */ extern struct rpc_program nfs_program; @@ -213,7 +214,6 @@ void nfs_zap_acl_cache(struct inode *inode); extern int nfs_wait_bit_killable(void *word); /* super.c */ -void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 extern struct file_system_type nfs4_xdev_fs_type; @@ -248,6 +248,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata); /* write.c */ extern void nfs_write_prepare(struct rpc_task *task, void *calldata); +#ifdef CONFIG_MIGRATION +extern int nfs_migrate_page(struct address_space *, + struct page *, struct page *); +#else +#define nfs_migrate_page NULL +#endif /* nfs4proc.c */ extern int _nfs4_call_sync(struct nfs_server *server, @@ -368,24 +374,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >> PAGE_SHIFT; } - -#define IPV6_SCOPE_DELIMITER '%' - -/* - * Set the port number in an address. Be agnostic about the address - * family. - */ -static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) -{ - struct sockaddr_in *ap = (struct sockaddr_in *)sap; - struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; - - switch (sap->sa_family) { - case AF_INET: - ap->sin_port = htons(port); - break; - case AF_INET6: - ap6->sin6_port = htons(port); - break; - } -} diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 38ef9eaec407..0adefc40cc89 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -209,6 +209,71 @@ out_mnt_err: goto out; } +/** + * nfs_umount - Notify a server that we have unmounted this export + * @info: pointer to umount request arguments + * + * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always + * use UDP. + */ +void nfs_umount(const struct nfs_mount_request *info) +{ + static const struct rpc_timeout nfs_umnt_timeout = { + .to_initval = 1 * HZ, + .to_maxval = 3 * HZ, + .to_retries = 2, + }; + struct rpc_create_args args = { + .protocol = IPPROTO_UDP, + .address = info->sap, + .addrsize = info->salen, + .timeout = &nfs_umnt_timeout, + .servername = info->hostname, + .program = &mnt_program, + .version = info->version, + .authflavor = RPC_AUTH_UNIX, + .flags = RPC_CLNT_CREATE_NOPING, + }; + struct mountres result; + struct rpc_message msg = { + .rpc_argp = info->dirpath, + .rpc_resp = &result, + }; + struct rpc_clnt *clnt; + int status; + + if (info->noresvport) + args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; + + clnt = rpc_create(&args); + if (unlikely(IS_ERR(clnt))) + goto out_clnt_err; + + dprintk("NFS: sending UMNT request for %s:%s\n", + (info->hostname ? info->hostname : "server"), info->dirpath); + + if (info->version == NFS_MNT3_VERSION) + msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT]; + else + msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT]; + + status = rpc_call_sync(clnt, &msg, 0); + rpc_shutdown_client(clnt); + + if (unlikely(status < 0)) + goto out_call_err; + + return; + +out_clnt_err: + dprintk("NFS: failed to create UMNT RPC client, status=%ld\n", + PTR_ERR(clnt)); + return; + +out_call_err: + dprintk("NFS: UMNT request failed, status=%d\n", status); +} + /* * XDR encode/decode functions for MOUNT */ @@ -258,7 +323,7 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res) return -EIO; status = ntohl(*p); - for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { + for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) { if (mnt_errtbl[i].status == status) { res->errno = mnt_errtbl[i].errno; return 0; @@ -309,7 +374,7 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res) return -EIO; status = ntohl(*p); - for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { + for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) { if (mnt3_errtbl[i].status == status) { res->errno = mnt3_errtbl[i].errno; return 0; @@ -407,6 +472,13 @@ static struct rpc_procinfo mnt_procedures[] = { .p_statidx = MOUNTPROC_MNT, .p_name = "MOUNT", }, + [MOUNTPROC_UMNT] = { + .p_proc = MOUNTPROC_UMNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_arglen = MNT_enc_dirpath_sz, + .p_statidx = MOUNTPROC_UMNT, + .p_name = "UMOUNT", + }, }; static struct rpc_procinfo mnt3_procedures[] = { @@ -419,6 +491,13 @@ static struct rpc_procinfo mnt3_procedures[] = { .p_statidx = MOUNTPROC3_MNT, .p_name = "MOUNT", }, + [MOUNTPROC3_UMNT] = { + .p_proc = MOUNTPROC3_UMNT, + .p_encode = (kxdrproc_t)mnt_enc_dirpath, + .p_arglen = MNT_enc_dirpath_sz, + .p_statidx = MOUNTPROC3_UMNT, + .p_name = "UMOUNT", + }, }; diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 2a2a0a7143ad..ef22ee89aa77 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -121,11 +121,11 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) continue; - nfs_parse_ip_address(buf->data, buf->len, - mountdata->addr, &mountdata->addrlen); - if (mountdata->addr->sa_family == AF_UNSPEC) + mountdata->addrlen = rpc_pton(buf->data, buf->len, + mountdata->addr, mountdata->addrlen); + if (mountdata->addrlen == 0) continue; - nfs_set_port(mountdata->addr, NFS_PORT); + rpc_set_port(mountdata->addr, NFS_PORT); memcpy(page2, buf->data, buf->len); page2[buf->len] = '\0'; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6917311f201c..be6544aef41f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -61,6 +61,8 @@ #define NFS4_POLL_RETRY_MIN (HZ/10) #define NFS4_POLL_RETRY_MAX (15*HZ) +#define NFS4_MAX_LOOP_ON_RECOVER (10) + struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -426,17 +428,19 @@ out: static int nfs4_recover_session(struct nfs4_session *session) { struct nfs_client *clp = session->clp; + unsigned int loop; int ret; - for (;;) { + for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { ret = nfs4_wait_clnt_recover(clp); if (ret != 0) - return ret; + break; if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) break; nfs4_schedule_state_manager(clp); + ret = -EIO; } - return 0; + return ret; } static int nfs41_setup_sequence(struct nfs4_session *session, @@ -1444,18 +1448,20 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + unsigned int loop; int ret; - for (;;) { + for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { ret = nfs4_wait_clnt_recover(clp); if (ret != 0) - return ret; + break; if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) break; nfs4_schedule_state_recovery(clp); + ret = -EIO; } - return 0; + return ret; } /* @@ -1997,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server, &msg, &args, &res, 0); if (status == 0) { memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); + server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| + NFS_CAP_SYMLINKS|NFS_CAP_FILEID| + NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| + NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| + NFS_CAP_CTIME|NFS_CAP_MTIME); if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) server->caps |= NFS_CAP_ACLS; if (res.has_links != 0) server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; + if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) + server->caps |= NFS_CAP_FILEID; + if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) + server->caps |= NFS_CAP_MODE; + if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) + server->caps |= NFS_CAP_NLINK; + if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) + server->caps |= NFS_CAP_OWNER; + if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) + server->caps |= NFS_CAP_OWNER_GROUP; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) + server->caps |= NFS_CAP_ATIME; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) + server->caps |= NFS_CAP_CTIME; + if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) + server->caps |= NFS_CAP_MTIME; + memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 14b6f513648f..cfc30d362f94 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3148,7 +3148,8 @@ out_overflow: return -EIO; } -static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) +static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs_client *clp, uint32_t *uid, int may_sleep) { uint32_t len; __be32 *p; @@ -3165,7 +3166,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf p = xdr_inline_decode(xdr, len); if (unlikely(!p)) goto out_overflow; - if (len < XDR_MAX_NETOBJ) { + if (!may_sleep) { + /* do nothing */ + } else if (len < XDR_MAX_NETOBJ) { if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) ret = NFS_ATTR_FATTR_OWNER; else @@ -3183,7 +3186,8 @@ out_overflow: return -EIO; } -static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) +static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs_client *clp, uint32_t *gid, int may_sleep) { uint32_t len; __be32 *p; @@ -3200,7 +3204,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf p = xdr_inline_decode(xdr, len); if (unlikely(!p)) goto out_overflow; - if (len < XDR_MAX_NETOBJ) { + if (!may_sleep) { + /* do nothing */ + } else if (len < XDR_MAX_NETOBJ) { if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) ret = NFS_ATTR_FATTR_GROUP; else @@ -3616,7 +3622,8 @@ xdr_error: return status; } -static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) +static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, + const struct nfs_server *server, int may_sleep) { __be32 *savep; uint32_t attrlen, @@ -3688,12 +3695,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons goto xdr_error; fattr->valid |= status; - status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid); + status = decode_attr_owner(xdr, bitmap, server->nfs_client, + &fattr->uid, may_sleep); if (status < 0) goto xdr_error; fattr->valid |= status; - status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid); + status = decode_attr_group(xdr, bitmap, server->nfs_client, + &fattr->gid, may_sleep); if (status < 0) goto xdr_error; fattr->valid |= status; @@ -4616,7 +4625,8 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct status = decode_open_downgrade(&xdr, res); if (status != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4643,7 +4653,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac status = decode_access(&xdr, res); if (status != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4670,7 +4681,8 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo goto out; if ((status = decode_getfh(&xdr, res->fh)) != 0) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server); + status = decode_getfattr(&xdr, res->fattr, res->server + ,!RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4694,7 +4706,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf if ((status = decode_putrootfh(&xdr)) != 0) goto out; if ((status = decode_getfh(&xdr, res->fh)) == 0) - status = decode_getfattr(&xdr, res->fattr, res->server); + status = decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4719,7 +4732,8 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem goto out; if ((status = decode_remove(&xdr, &res->cinfo)) != 0) goto out; - decode_getfattr(&xdr, &res->dir_attr, res->server); + decode_getfattr(&xdr, &res->dir_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4749,11 +4763,13 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) goto out; /* Current FH is target directory */ - if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0) + if (decode_getfattr(&xdr, res->new_fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; if ((status = decode_restorefh(&xdr)) != 0) goto out; - decode_getfattr(&xdr, res->old_fattr, res->server); + decode_getfattr(&xdr, res->old_fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4786,11 +4802,13 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link * Note order: OP_LINK leaves the directory as the current * filehandle. */ - if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0) + if (decode_getfattr(&xdr, res->dir_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; if ((status = decode_restorefh(&xdr)) != 0) goto out; - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4819,11 +4837,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr goto out; if ((status = decode_getfh(&xdr, res->fh)) != 0) goto out; - if (decode_getfattr(&xdr, res->fattr, res->server) != 0) + if (decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; if ((status = decode_restorefh(&xdr)) != 0) goto out; - decode_getfattr(&xdr, res->dir_fattr, res->server); + decode_getfattr(&xdr, res->dir_fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4855,7 +4875,8 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g status = decode_putfh(&xdr); if (status) goto out; - status = decode_getfattr(&xdr, res->fattr, res->server); + status = decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4962,7 +4983,8 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos * an ESTALE error. Shouldn't be a problem, * though, since fattr->valid will remain unset. */ - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -4994,11 +5016,13 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr goto out; if (decode_getfh(&xdr, &res->fh) != 0) goto out; - if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) + if (decode_getfattr(&xdr, res->f_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)) != 0) goto out; if (decode_restorefh(&xdr) != 0) goto out; - decode_getfattr(&xdr, res->dir_attr, res->server); + decode_getfattr(&xdr, res->dir_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -5046,7 +5070,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf status = decode_open(&xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->f_attr, res->server); + decode_getfattr(&xdr, res->f_attr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -5073,7 +5098,8 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se status = decode_setattr(&xdr); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -5247,7 +5273,8 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ status = decode_write(&xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); if (!status) status = res->count; out: @@ -5276,7 +5303,8 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri status = decode_commit(&xdr, res); if (status) goto out; - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -5440,7 +5468,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf if (status != 0) goto out; status = decode_delegreturn(&xdr); - decode_getfattr(&xdr, res->fattr, res->server); + decode_getfattr(&xdr, res->fattr, res->server, + !RPC_IS_ASYNC(rqstp->rq_task)); out: return status; } @@ -5468,7 +5497,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, goto out; xdr_enter_page(&xdr, PAGE_SIZE); status = decode_getfattr(&xdr, &res->fs_locations->fattr, - res->fs_locations->server); + res->fs_locations->server, + !RPC_IS_ASYNC(req->rq_task)); out: return status; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0b4cbdc60abd..9c85cdb353aa 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -158,7 +158,7 @@ static const match_table_t nfs_mount_option_tokens = { { Opt_mountvers, "mountvers=%s" }, { Opt_nfsvers, "nfsvers=%s" }, { Opt_nfsvers, "vers=%s" }, - { Opt_minorversion, "minorversion=%u" }, + { Opt_minorversion, "minorversion=%s" }, { Opt_sec, "sec=%s" }, { Opt_proto, "proto=%s" }, @@ -742,129 +742,10 @@ static int nfs_verify_server_address(struct sockaddr *addr) } } + dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); return 0; } -static void nfs_parse_ipv4_address(char *string, size_t str_len, - struct sockaddr *sap, size_t *addr_len) -{ - struct sockaddr_in *sin = (struct sockaddr_in *)sap; - u8 *addr = (u8 *)&sin->sin_addr.s_addr; - - if (str_len <= INET_ADDRSTRLEN) { - dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n", - (int)str_len, string); - - sin->sin_family = AF_INET; - *addr_len = sizeof(*sin); - if (in4_pton(string, str_len, addr, '\0', NULL)) - return; - } - - sap->sa_family = AF_UNSPEC; - *addr_len = 0; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, - const char *delim, - struct sockaddr_in6 *sin6) -{ - char *p; - size_t len; - - if ((string + str_len) == delim) - return 1; - - if (*delim != IPV6_SCOPE_DELIMITER) - return 0; - - if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) - return 0; - - len = (string + str_len) - delim - 1; - p = kstrndup(delim + 1, len, GFP_KERNEL); - if (p) { - unsigned long scope_id = 0; - struct net_device *dev; - - dev = dev_get_by_name(&init_net, p); - if (dev != NULL) { - scope_id = dev->ifindex; - dev_put(dev); - } else { - if (strict_strtoul(p, 10, &scope_id) == 0) { - kfree(p); - return 0; - } - } - - kfree(p); - - sin6->sin6_scope_id = scope_id; - dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); - return 1; - } - - return 0; -} - -static void nfs_parse_ipv6_address(char *string, size_t str_len, - struct sockaddr *sap, size_t *addr_len) -{ - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; - const char *delim; - - if (str_len <= INET6_ADDRSTRLEN) { - dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n", - (int)str_len, string); - - sin6->sin6_family = AF_INET6; - *addr_len = sizeof(*sin6); - if (in6_pton(string, str_len, addr, - IPV6_SCOPE_DELIMITER, &delim) != 0) { - if (nfs_parse_ipv6_scope_id(string, str_len, - delim, sin6) != 0) - return; - } - } - - sap->sa_family = AF_UNSPEC; - *addr_len = 0; -} -#else -static void nfs_parse_ipv6_address(char *string, size_t str_len, - struct sockaddr *sap, size_t *addr_len) -{ - sap->sa_family = AF_UNSPEC; - *addr_len = 0; -} -#endif - -/* - * Construct a sockaddr based on the contents of a string that contains - * an IP address in presentation format. - * - * If there is a problem constructing the new sockaddr, set the address - * family to AF_UNSPEC. - */ -void nfs_parse_ip_address(char *string, size_t str_len, - struct sockaddr *sap, size_t *addr_len) -{ - unsigned int i, colons; - - colons = 0; - for (i = 0; i < str_len; i++) - if (string[i] == ':') - colons++; - - if (colons >= 2) - nfs_parse_ipv6_address(string, str_len, sap, addr_len); - else - nfs_parse_ipv4_address(string, str_len, sap, addr_len); -} - /* * Sanity check the NFS transport protocol. * @@ -904,8 +785,6 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) /* * Parse the value of the 'sec=' option. - * - * The flavor_len setting is for v4 mounts. */ static int nfs_parse_security_flavors(char *value, struct nfs_parsed_mount_data *mnt) @@ -916,53 +795,43 @@ static int nfs_parse_security_flavors(char *value, switch (match_token(value, nfs_secflavor_tokens, args)) { case Opt_sec_none: - mnt->auth_flavor_len = 0; mnt->auth_flavors[0] = RPC_AUTH_NULL; break; case Opt_sec_sys: - mnt->auth_flavor_len = 0; mnt->auth_flavors[0] = RPC_AUTH_UNIX; break; case Opt_sec_krb5: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; break; case Opt_sec_krb5i: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; break; case Opt_sec_krb5p: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; break; case Opt_sec_lkey: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; break; case Opt_sec_lkeyi: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; break; case Opt_sec_lkeyp: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; break; case Opt_sec_spkm: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; break; case Opt_sec_spkmi: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; break; case Opt_sec_spkmp: - mnt->auth_flavor_len = 1; mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; break; default: return 0; } + mnt->auth_flavor_len = 1; return 1; } @@ -1001,7 +870,6 @@ static int nfs_parse_mount_options(char *raw, while ((p = strsep(&raw, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; unsigned long option; - int int_option; int token; if (!*p) @@ -1273,11 +1141,16 @@ static int nfs_parse_mount_options(char *raw, } break; case Opt_minorversion: - if (match_int(args, &int_option)) - return 0; - if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) - return 0; - mnt->minorversion = int_option; + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + rc = strict_strtoul(string, 10, &option); + kfree(string); + if (rc != 0) + goto out_invalid_value; + if (option > NFS4_MAX_MINOR_VERSION) + goto out_invalid_value; + mnt->minorversion = option; break; /* @@ -1352,11 +1225,14 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - nfs_parse_ip_address(string, strlen(string), - (struct sockaddr *) - &mnt->nfs_server.address, - &mnt->nfs_server.addrlen); + mnt->nfs_server.addrlen = + rpc_pton(string, strlen(string), + (struct sockaddr *) + &mnt->nfs_server.address, + sizeof(mnt->nfs_server.address)); kfree(string); + if (mnt->nfs_server.addrlen == 0) + goto out_invalid_address; break; case Opt_clientaddr: string = match_strdup(args); @@ -1376,11 +1252,14 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - nfs_parse_ip_address(string, strlen(string), - (struct sockaddr *) - &mnt->mount_server.address, - &mnt->mount_server.addrlen); + mnt->mount_server.addrlen = + rpc_pton(string, strlen(string), + (struct sockaddr *) + &mnt->mount_server.address, + sizeof(mnt->mount_server.address)); kfree(string); + if (mnt->mount_server.addrlen == 0) + goto out_invalid_address; break; case Opt_lookupcache: string = match_strdup(args); @@ -1432,8 +1311,11 @@ static int nfs_parse_mount_options(char *raw, return 1; +out_invalid_address: + printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); + return 0; out_invalid_value: - printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); + printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); return 0; out_nomem: printk(KERN_INFO "NFS: not enough memory to parse option\n"); @@ -1445,13 +1327,50 @@ out_security_failure: } /* + * Match the requested auth flavors with the list returned by + * the server. Returns zero and sets the mount's authentication + * flavor on success; returns -EACCES if server does not support + * the requested flavor. + */ +static int nfs_walk_authlist(struct nfs_parsed_mount_data *args, + struct nfs_mount_request *request) +{ + unsigned int i, j, server_authlist_len = *(request->auth_flav_len); + + /* + * We avoid sophisticated negotiating here, as there are + * plenty of cases where we can get it wrong, providing + * either too little or too much security. + * + * RFC 2623, section 2.7 suggests we SHOULD prefer the + * flavor listed first. However, some servers list + * AUTH_NULL first. Our caller plants AUTH_SYS, the + * preferred default, in args->auth_flavors[0] if user + * didn't specify sec= mount option. + */ + for (i = 0; i < args->auth_flavor_len; i++) + for (j = 0; j < server_authlist_len; j++) + if (args->auth_flavors[i] == request->auth_flavs[j]) { + dfprintk(MOUNT, "NFS: using auth flavor %d\n", + request->auth_flavs[j]); + args->auth_flavors[0] = request->auth_flavs[j]; + return 0; + } + + dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n"); + nfs_umount(request); + return -EACCES; +} + +/* * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. */ static int nfs_try_mount(struct nfs_parsed_mount_data *args, struct nfs_fh *root_fh) { - unsigned int auth_flavor_len = 0; + rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS]; + unsigned int server_authlist_len = ARRAY_SIZE(server_authlist); struct nfs_mount_request request = { .sap = (struct sockaddr *) &args->mount_server.address, @@ -1459,7 +1378,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, .protocol = args->mount_server.protocol, .fh = root_fh, .noresvport = args->flags & NFS_MOUNT_NORESVPORT, - .auth_flav_len = &auth_flavor_len, + .auth_flav_len = &server_authlist_len, + .auth_flavs = server_authlist, }; int status; @@ -1489,19 +1409,25 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, /* * autobind will be used if mount_server.port == 0 */ - nfs_set_port(request.sap, args->mount_server.port); + rpc_set_port(request.sap, args->mount_server.port); /* * Now ask the mount server to map our export path * to a file handle. */ status = nfs_mount(&request); - if (status == 0) - return 0; + if (status != 0) { + dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", + request.hostname, status); + return status; + } - dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", - request.hostname, status); - return status; + /* + * MNTv1 (NFSv2) does not support auth flavor negotiation. + */ + if (args->mount_server.version != NFS_MNT3_VERSION) + return 0; + return nfs_walk_authlist(args, &request); } static int nfs_parse_simple_hostname(const char *dev_name, @@ -1676,6 +1602,7 @@ static int nfs_validate_mount_data(void *options, args->nfs_server.port = 0; /* autobind unless user sets port */ args->nfs_server.protocol = XPRT_TRANSPORT_TCP; args->auth_flavors[0] = RPC_AUTH_UNIX; + args->auth_flavor_len = 1; switch (data->version) { case 1: @@ -1776,7 +1703,7 @@ static int nfs_validate_mount_data(void *options, &args->nfs_server.address)) goto out_no_address; - nfs_set_port((struct sockaddr *)&args->nfs_server.address, + rpc_set_port((struct sockaddr *)&args->nfs_server.address, args->nfs_server.port); nfs_set_mount_transport_protocol(args); @@ -2339,7 +2266,7 @@ static int nfs4_validate_mount_data(void *options, args->acdirmax = NFS_DEF_ACDIRMAX; args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ args->auth_flavors[0] = RPC_AUTH_UNIX; - args->auth_flavor_len = 0; + args->auth_flavor_len = 1; args->minorversion = 0; switch (data->version) { @@ -2409,7 +2336,7 @@ static int nfs4_validate_mount_data(void *options, &args->nfs_server.address)) return -EINVAL; - nfs_set_port((struct sockaddr *)&args->nfs_server.address, + rpc_set_port((struct sockaddr *)&args->nfs_server.address, args->nfs_server.port); nfs_validate_transport_protocol(args); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a34fae21fe10..120acadc6a84 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -13,6 +13,7 @@ #include <linux/file.h> #include <linux/writeback.h> #include <linux/swap.h> +#include <linux/migrate.h> #include <linux/sunrpc/clnt.h> #include <linux/nfs_fs.h> @@ -26,6 +27,7 @@ #include "internal.h" #include "iostat.h" #include "nfs4_fs.h" +#include "fscache.h" #define NFSDBG_FACILITY NFSDBG_PAGECACHE @@ -218,24 +220,17 @@ static void nfs_end_page_writeback(struct page *page) clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); } -/* - * Find an associated nfs write request, and prepare to flush it out - * May return an error if the user signalled nfs_wait_on_request(). - */ -static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page) +static struct nfs_page *nfs_find_and_lock_request(struct page *page) { struct inode *inode = page->mapping->host; struct nfs_page *req; int ret; spin_lock(&inode->i_lock); - for(;;) { + for (;;) { req = nfs_page_find_request_locked(page); - if (req == NULL) { - spin_unlock(&inode->i_lock); - return 0; - } + if (req == NULL) + break; if (nfs_set_page_tag_locked(req)) break; /* Note: If we hold the page lock, as is the case in nfs_writepage, @@ -247,23 +242,40 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, ret = nfs_wait_on_request(req); nfs_release_request(req); if (ret != 0) - return ret; + return ERR_PTR(ret); spin_lock(&inode->i_lock); } - if (test_bit(PG_CLEAN, &req->wb_flags)) { - spin_unlock(&inode->i_lock); - BUG(); - } - if (nfs_set_page_writeback(page) != 0) { - spin_unlock(&inode->i_lock); - BUG(); - } spin_unlock(&inode->i_lock); + return req; +} + +/* + * Find an associated nfs write request, and prepare to flush it out + * May return an error if the user signalled nfs_wait_on_request(). + */ +static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, + struct page *page) +{ + struct nfs_page *req; + int ret = 0; + + req = nfs_find_and_lock_request(page); + if (!req) + goto out; + ret = PTR_ERR(req); + if (IS_ERR(req)) + goto out; + + ret = nfs_set_page_writeback(page); + BUG_ON(ret != 0); + BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); + if (!nfs_pageio_add_request(pgio, req)) { nfs_redirty_request(req); - return pgio->pg_error; + ret = pgio->pg_error; } - return 0; +out: + return ret; } static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) @@ -1580,6 +1592,41 @@ int nfs_wb_page(struct inode *inode, struct page* page) return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } +#ifdef CONFIG_MIGRATION +int nfs_migrate_page(struct address_space *mapping, struct page *newpage, + struct page *page) +{ + struct nfs_page *req; + int ret; + + if (PageFsCache(page)) + nfs_fscache_release_page(page, GFP_KERNEL); + + req = nfs_find_and_lock_request(page); + ret = PTR_ERR(req); + if (IS_ERR(req)) + goto out; + + ret = migrate_page(mapping, newpage, page); + if (!req) + goto out; + if (ret) + goto out_unlock; + page_cache_get(newpage); + req->wb_page = newpage; + SetPagePrivate(newpage); + set_page_private(newpage, page_private(page)); + ClearPagePrivate(page); + set_page_private(page, 0); + page_cache_release(page); +out_unlock: + nfs_clear_page_tag_locked(req); + nfs_release_request(req); +out: + return ret; +} +#endif + int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index b92a27629fb7..d9462643155c 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, expkey_request); +} + static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); static struct cache_detail svc_expkey_cache; @@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = { .hash_table = expkey_table, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_request = expkey_request, + .cache_upcall = expkey_upcall, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); +} + static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = { .hash_table = export_table, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_request = svc_export_request, + .cache_upcall = svc_export_upcall, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5b398421b051..cdfa86fa1471 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -146,6 +146,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int +idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) +{ + return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); +} + +static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -175,10 +181,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) } static void -warn_no_idmapd(struct cache_detail *detail) +warn_no_idmapd(struct cache_detail *detail, int has_died) { printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", - detail->last_close? "died" : "not been started"); + has_died ? "died" : "not been started"); } @@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = { .hash_table = idtoname_table, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_request = idtoname_request, + .cache_upcall = idtoname_upcall, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -325,6 +331,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int +nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) +{ + return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); +} + +static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = { .hash_table = nametoid_table, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_request = nametoid_request, + .cache_upcall = nametoid_upcall, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 6d0847562d87..7e906c5b7671 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -37,6 +37,7 @@ #include <linux/nfsd/xdr.h> #include <linux/nfsd/syscall.h> #include <linux/lockd/lockd.h> +#include <linux/sunrpc/clnt.h> #include <asm/uaccess.h> #include <net/ipv6.h> @@ -490,22 +491,18 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) * * Input: * buf: '\n'-terminated C string containing a - * presentation format IPv4 address + * presentation format IP address * size: length of C string in @buf * Output: * On success: returns zero if all specified locks were released; * returns one if one or more locks were not released * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in */ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) { - struct sockaddr_in sin = { - .sin_family = AF_INET, - }; - int b1, b2, b3, b4; - char c; + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + size_t salen = sizeof(address); char *fo_path; /* sanity check */ @@ -519,14 +516,10 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - /* get ipv4 address */ - if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) - return -EINVAL; - if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255) + if (rpc_pton(fo_path, size, sap, salen) == 0) return -EINVAL; - sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4); - return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin); + return nlmsvc_unlock_all_by_ip(sap); } /** diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 19fe15d12042..320569eabe3b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -167,6 +167,15 @@ struct nfs_server { #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) +#define NFS_CAP_CHANGE_ATTR (1U << 5) +#define NFS_CAP_FILEID (1U << 6) +#define NFS_CAP_MODE (1U << 7) +#define NFS_CAP_NLINK (1U << 8) +#define NFS_CAP_OWNER (1U << 9) +#define NFS_CAP_OWNER_GROUP (1U << 10) +#define NFS_CAP_ATIME (1U << 11) +#define NFS_CAP_CTIME (1U << 12) +#define NFS_CAP_MTIME (1U << 13) /* maximum number of slots to use */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 2d8b211b9324..6f52b4d7c447 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -59,6 +59,15 @@ struct cache_head { #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ +struct cache_detail_procfs { + struct proc_dir_entry *proc_ent; + struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; +}; + +struct cache_detail_pipefs { + struct dentry *dir; +}; + struct cache_detail { struct module * owner; int hash_size; @@ -70,15 +79,17 @@ struct cache_detail { char *name; void (*cache_put)(struct kref *); - void (*cache_request)(struct cache_detail *cd, - struct cache_head *h, - char **bpp, int *blen); + int (*cache_upcall)(struct cache_detail *, + struct cache_head *); + int (*cache_parse)(struct cache_detail *, char *buf, int len); int (*cache_show)(struct seq_file *m, struct cache_detail *cd, struct cache_head *h); + void (*warn_no_listener)(struct cache_detail *cd, + int has_died); struct cache_head * (*alloc)(void); int (*match)(struct cache_head *orig, struct cache_head *new); @@ -96,13 +107,15 @@ struct cache_detail { /* fields for communication over channel */ struct list_head queue; - struct proc_dir_entry *proc_ent; - struct proc_dir_entry *flush_ent, *channel_ent, *content_ent; atomic_t readers; /* how many time is /chennel open */ time_t last_close; /* if no readers, when did last close */ time_t last_warn; /* when we last warned about no readers */ - void (*warn_no_listener)(struct cache_detail *cd); + + union { + struct cache_detail_procfs procfs; + struct cache_detail_pipefs pipefs; + } u; }; @@ -127,6 +140,10 @@ struct cache_deferred_req { }; +extern const struct file_operations cache_file_operations_pipefs; +extern const struct file_operations content_file_operations_pipefs; +extern const struct file_operations cache_flush_operations_pipefs; + extern struct cache_head * sunrpc_cache_lookup(struct cache_detail *detail, struct cache_head *key, int hash); @@ -134,6 +151,13 @@ extern struct cache_head * sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); +extern int +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, + void (*cache_request)(struct cache_detail *, + struct cache_head *, + char **, + int *)); + extern void cache_clean_deferred(void *owner); @@ -171,6 +195,10 @@ extern void cache_purge(struct cache_detail *detail); extern int cache_register(struct cache_detail *cd); extern void cache_unregister(struct cache_detail *cd); +extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, + mode_t, struct cache_detail *); +extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); + extern void qword_add(char **bpp, int *lp, char *str); extern void qword_addhex(char **bpp, int *lp, char *buf, int blen); extern int qword_get(char **bpp, char *dest, int bufsize); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 37881f1a0bd7..ab3f6e90caa5 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -9,6 +9,10 @@ #ifndef _LINUX_SUNRPC_CLNT_H #define _LINUX_SUNRPC_CLNT_H +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> + #include <linux/sunrpc/msg_prot.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprt.h> @@ -17,6 +21,7 @@ #include <linux/sunrpc/xdr.h> #include <linux/sunrpc/timer.h> #include <asm/signal.h> +#include <linux/path.h> struct rpc_inode; @@ -50,9 +55,7 @@ struct rpc_clnt { int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; - char cl_pathname[30];/* Path in rpc_pipe_fs */ - struct vfsmount * cl_vfsmnt; - struct dentry * cl_dentry; /* inode */ + struct path cl_path; struct rpc_clnt * cl_parent; /* Points to parent of clones */ struct rpc_rtt cl_rtt_default; struct rpc_timeout cl_timeout_default; @@ -151,5 +154,39 @@ void rpc_force_rebind(struct rpc_clnt *); size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *); +size_t rpc_uaddr2sockaddr(const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 70df4f1d8847..77e624883393 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -189,7 +189,22 @@ typedef __be32 rpc_fraghdr; * Additionally, the two alternative forms specified in Section 2.2 of * [RFC2373] are also acceptable. */ -#define RPCBIND_MAXUADDRLEN (56u) + +#include <linux/inet.h> + +/* Maximum size of the port number part of a universal address */ +#define RPCBIND_MAXUADDRPLEN sizeof(".255.255") + +/* Maximum size of an IPv4 universal address */ +#define RPCBIND_MAXUADDR4LEN \ + (INET_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) + +/* Maximum size of an IPv6 universal address */ +#define RPCBIND_MAXUADDR6LEN \ + (INET6_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN) + +/* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */ +#define RPCBIND_MAXUADDRLEN RPCBIND_MAXUADDR6LEN #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index cea764c2359f..a92571a34556 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -32,8 +32,8 @@ struct rpc_inode { wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; - struct rpc_pipe_ops *ops; struct delayed_work queue_timeout; + const struct rpc_pipe_ops *ops; }; static inline struct rpc_inode * @@ -44,9 +44,19 @@ RPC_I(struct inode *inode) extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); -extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); -extern int rpc_rmdir(struct dentry *); -extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags); +struct rpc_clnt; +extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *); +extern int rpc_remove_client_dir(struct dentry *); + +struct cache_detail; +extern struct dentry *rpc_create_cache_dir(struct dentry *, + struct qstr *, + mode_t umode, + struct cache_detail *); +extern void rpc_remove_cache_dir(struct dentry *); + +extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, + const struct rpc_pipe_ops *, int flags); extern int rpc_unlink(struct dentry *); extern struct vfsmount *rpc_get_mount(void); extern void rpc_put_mount(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 1175d58efc2e..c090df442572 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -38,10 +38,8 @@ enum rpc_display_format_t { RPC_DISPLAY_ADDR = 0, RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, - RPC_DISPLAY_ALL, RPC_DISPLAY_HEX_ADDR, RPC_DISPLAY_HEX_PORT, - RPC_DISPLAY_UNIVERSAL_ADDR, RPC_DISPLAY_NETID, RPC_DISPLAY_MAX, }; diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index db73fd2a3f0e..9d2fca5ad14a 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o auth_generic.o \ svc.o svcsock.o svcauth.o svcauth_unix.o \ - rpcb_clnt.o timer.o xdr.o \ + addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c new file mode 100644 index 000000000000..22e8fd89477f --- /dev/null +++ b/net/sunrpc/addr.c @@ -0,0 +1,364 @@ +/* + * Copyright 2009, Oracle. All rights reserved. + * + * Convert socket addresses to presentation addresses and universal + * addresses, and vice versa. + * + * Universal addresses are introduced by RFC 1833 and further refined by + * recent RFCs describing NFSv4. The universal address format is part + * of the external (network) interface provided by rpcbind version 3 + * and 4, and by NFSv4. Such an address is a string containing a + * presentation format IP address followed by a port number in + * "hibyte.lobyte" format. + * + * IPv6 addresses can also include a scope ID, typically denoted by + * a '%' followed by a device name or a non-negative integer. Refer to + * RFC 4291, Section 2.2 for details on IPv6 presentation formats. + */ + +#include <net/ipv6.h> +#include <linux/sunrpc/clnt.h> + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + const struct in6_addr *addr = &sin6->sin6_addr; + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded ANY address + */ + if (ipv6_addr_any(addr)) + return snprintf(buf, buflen, "::"); + + /* + * RFC 4291, Section 2.2.2 + * + * Shorthanded loopback address + */ + if (ipv6_addr_loopback(addr)) + return snprintf(buf, buflen, "::1"); + + /* + * RFC 4291, Section 2.2.3 + * + * Special presentation address format for mapped v4 + * addresses. + */ + if (ipv6_addr_v4mapped(addr)) + return snprintf(buf, buflen, "::ffff:%pI4", + &addr->s6_addr32[3]); + + /* + * RFC 4291, Section 2.2.1 + * + * To keep the result as short as possible, especially + * since we don't shorthand, we don't want leading zeros + * in each halfword, so avoid %pI6. + */ + return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", + ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), + ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), + ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), + ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + char scopebuf[IPV6_SCOPE_ID_LEN]; + size_t len; + int rc; + + len = rpc_ntop6_noscopeid(sap, buf, buflen); + if (unlikely(len == 0)) + return len; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return len; + + rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u", + IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id); + if (unlikely((size_t)rc > sizeof(scopebuf))) + return 0; + + len += rc; + if (unlikely(len > buflen)) + return 0; + + strcat(buf, scopebuf); + return len; +} + +#else /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, + char *buf, const int buflen) +{ + return 0; +} + +static size_t rpc_ntop6(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + return 0; +} + +#endif /* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */ + +static int rpc_ntop4(const struct sockaddr *sap, + char *buf, const size_t buflen) +{ + const struct sockaddr_in *sin = (struct sockaddr_in *)sap; + + return snprintf(buf, buflen, "%pI4", &sin->sin_addr); +} + +/** + * rpc_ntop - construct a presentation address in @buf + * @sap: socket address + * @buf: construction area + * @buflen: size of @buf, in bytes + * + * Plants a %NUL-terminated string in @buf and returns the length + * of the string, excluding the %NUL. Otherwise zero is returned. + */ +size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen) +{ + switch (sap->sa_family) { + case AF_INET: + return rpc_ntop4(sap, buf, buflen); + case AF_INET6: + return rpc_ntop6(sap, buf, buflen); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_ntop); + +static size_t rpc_pton4(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + u8 *addr = (u8 *)&sin->sin_addr.s_addr; + + if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in)); + + if (in4_pton(buf, buflen, addr, '\0', NULL) == 0) + return 0; + + sin->sin_family = AF_INET; + return sizeof(struct sockaddr_in);; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static int rpc_parse_scope_id(const char *buf, const size_t buflen, + const char *delim, struct sockaddr_in6 *sin6) +{ + char *p; + size_t len; + + if ((buf + buflen) == delim) + return 1; + + if (*delim != IPV6_SCOPE_DELIMITER) + return 0; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) && + !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL)) + return 0; + + len = (buf + buflen) - delim - 1; + p = kstrndup(delim + 1, len, GFP_KERNEL); + if (p) { + unsigned long scope_id = 0; + struct net_device *dev; + + dev = dev_get_by_name(&init_net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + if (strict_strtoul(p, 10, &scope_id) == 0) { + kfree(p); + return 0; + } + } + + kfree(p); + + sin6->sin6_scope_id = scope_id; + return 1; + } + + return 0; +} + +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; + u8 *addr = (u8 *)&sin6->sin6_addr.in6_u; + const char *delim; + + if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) || + salen < sizeof(struct sockaddr_in6)) + return 0; + + memset(sap, 0, sizeof(struct sockaddr_in6)); + + if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0) + return 0; + + if (!rpc_parse_scope_id(buf, buflen, delim, sin6)) + return 0; + + sin6->sin6_family = AF_INET6; + return sizeof(struct sockaddr_in6); +} +#else +static size_t rpc_pton6(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + return 0; +} +#endif + +/** + * rpc_pton - Construct a sockaddr in @sap + * @buf: C string containing presentation format IP address + * @buflen: length of presentation address in bytes + * @sap: buffer into which to plant socket address + * @salen: size of buffer in bytes + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + * + * Plants a socket address in @sap and returns the size of the + * socket address, if successful. Returns zero if an error + * occurred. + */ +size_t rpc_pton(const char *buf, const size_t buflen, + struct sockaddr *sap, const size_t salen) +{ + unsigned int i; + + for (i = 0; i < buflen; i++) + if (buf[i] == ':') + return rpc_pton6(buf, buflen, sap, salen); + return rpc_pton4(buf, buflen, sap, salen); +} +EXPORT_SYMBOL_GPL(rpc_pton); + +/** + * rpc_sockaddr2uaddr - Construct a universal address string from @sap. + * @sap: socket address + * + * Returns a %NUL-terminated string in dynamically allocated memory; + * otherwise NULL is returned if an error occurred. Caller must + * free the returned string. + */ +char *rpc_sockaddr2uaddr(const struct sockaddr *sap) +{ + char portbuf[RPCBIND_MAXUADDRPLEN]; + char addrbuf[RPCBIND_MAXUADDRLEN]; + unsigned short port; + + switch (sap->sa_family) { + case AF_INET: + if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in *)sap)->sin_port); + break; + case AF_INET6: + if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) + return NULL; + port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + break; + default: + return NULL; + } + + if (snprintf(portbuf, sizeof(portbuf), + ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf)) + return NULL; + + if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf)) + return NULL; + + return kstrdup(addrbuf, GFP_KERNEL); +} +EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr); + +/** + * rpc_uaddr2sockaddr - convert a universal address to a socket address. + * @uaddr: C string containing universal address to convert + * @uaddr_len: length of universal address string + * @sap: buffer into which to plant socket address + * @salen: size of buffer + * + * Returns the size of the socket address if successful; otherwise + * zero is returned. + */ +size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len, + struct sockaddr *sap, const size_t salen) +{ + char *c, buf[RPCBIND_MAXUADDRLEN]; + unsigned long portlo, porthi; + unsigned short port; + + if (uaddr_len > sizeof(buf)) + return 0; + + memcpy(buf, uaddr, uaddr_len); + + buf[uaddr_len] = '\n'; + buf[uaddr_len + 1] = '\0'; + + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0)) + return 0; + if (unlikely(portlo > 255)) + return 0; + + c[0] = '\n'; + c[1] = '\0'; + + c = strrchr(buf, '.'); + if (unlikely(c == NULL)) + return 0; + if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0)) + return 0; + if (unlikely(porthi > 255)) + return 0; + + port = (unsigned short)((porthi << 8) | portlo); + + c[0] = '\0'; + + if (rpc_pton(buf, strlen(buf), sap, salen) == 0) + return 0; + + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + return sizeof(struct sockaddr_in); + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + return sizeof(struct sockaddr_in6); + } + + return 0; +} +EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 66d458fc6920..fc6a43ccd950 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -89,8 +89,8 @@ static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); static void gss_free_ctx(struct gss_cl_ctx *); -static struct rpc_pipe_ops gss_upcall_ops_v0; -static struct rpc_pipe_ops gss_upcall_ops_v1; +static const struct rpc_pipe_ops gss_upcall_ops_v0; +static const struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) @@ -777,7 +777,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) * that we supported only the old pipe. So we instead create * the new pipe first. */ - gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry, "gssd", clnt, &gss_upcall_ops_v1, RPC_PIPE_WAIT_FOR_OPEN); @@ -786,7 +786,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) goto err_put_mech; } - gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_dentry, + gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry, gss_auth->mech->gm_name, clnt, &gss_upcall_ops_v0, RPC_PIPE_WAIT_FOR_OPEN); @@ -1507,7 +1507,7 @@ static const struct rpc_credops gss_nullops = { .crunwrap_resp = gss_unwrap_resp, }; -static struct rpc_pipe_ops gss_upcall_ops_v0 = { +static const struct rpc_pipe_ops gss_upcall_ops_v0 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, @@ -1515,7 +1515,7 @@ static struct rpc_pipe_ops gss_upcall_ops_v0 = { .release_pipe = gss_pipe_release, }; -static struct rpc_pipe_ops gss_upcall_ops_v1 = { +static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 2278a50c6444..2e6a148d277c 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -181,6 +181,11 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, rsi_request); +} + static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) @@ -270,7 +275,7 @@ static struct cache_detail rsi_cache = { .hash_table = rsi_table, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_request = rsi_request, + .cache_upcall = rsi_upcall, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index ff0c23053d2f..db7720e453c3 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -27,10 +27,12 @@ #include <linux/net.h> #include <linux/workqueue.h> #include <linux/mutex.h> +#include <linux/pagemap.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> +#include <linux/sunrpc/rpc_pipe_fs.h> #define RPCDBG_FACILITY RPCDBG_CACHE @@ -175,7 +177,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail, } EXPORT_SYMBOL_GPL(sunrpc_cache_update); -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h); +static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) +{ + if (!cd->cache_upcall) + return -EINVAL; + return cd->cache_upcall(cd, h); +} + /* * This is the generic cache management routine for all * the authentication caches. @@ -284,76 +292,11 @@ static DEFINE_SPINLOCK(cache_list_lock); static struct cache_detail *current_detail; static int current_index; -static const struct file_operations cache_file_operations; -static const struct file_operations content_file_operations; -static const struct file_operations cache_flush_operations; - static void do_cache_clean(struct work_struct *work); static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean); -static void remove_cache_proc_entries(struct cache_detail *cd) -{ - if (cd->proc_ent == NULL) - return; - if (cd->flush_ent) - remove_proc_entry("flush", cd->proc_ent); - if (cd->channel_ent) - remove_proc_entry("channel", cd->proc_ent); - if (cd->content_ent) - remove_proc_entry("content", cd->proc_ent); - cd->proc_ent = NULL; - remove_proc_entry(cd->name, proc_net_rpc); -} - -#ifdef CONFIG_PROC_FS -static int create_cache_proc_entries(struct cache_detail *cd) -{ - struct proc_dir_entry *p; - - cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); - if (cd->proc_ent == NULL) - goto out_nomem; - cd->channel_ent = cd->content_ent = NULL; - - p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_flush_operations, cd); - cd->flush_ent = p; - if (p == NULL) - goto out_nomem; - - if (cd->cache_request || cd->cache_parse) { - p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &cache_file_operations, cd); - cd->channel_ent = p; - if (p == NULL) - goto out_nomem; - } - if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, - cd->proc_ent, &content_file_operations, cd); - cd->content_ent = p; - if (p == NULL) - goto out_nomem; - } - return 0; -out_nomem: - remove_cache_proc_entries(cd); - return -ENOMEM; -} -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd) -{ - return 0; -} -#endif - -int cache_register(struct cache_detail *cd) +static void sunrpc_init_cache_detail(struct cache_detail *cd) { - int ret; - - ret = create_cache_proc_entries(cd); - if (ret) - return ret; rwlock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); @@ -367,11 +310,9 @@ int cache_register(struct cache_detail *cd) /* start the cleaning process */ schedule_delayed_work(&cache_cleaner, 0); - return 0; } -EXPORT_SYMBOL_GPL(cache_register); -void cache_unregister(struct cache_detail *cd) +static void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); @@ -386,7 +327,6 @@ void cache_unregister(struct cache_detail *cd) list_del_init(&cd->others); write_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); - remove_cache_proc_entries(cd); if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ cancel_delayed_work_sync(&cache_cleaner); @@ -395,7 +335,6 @@ void cache_unregister(struct cache_detail *cd) out: printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name); } -EXPORT_SYMBOL_GPL(cache_unregister); /* clean cache tries to find something to clean * and cleans it. @@ -687,18 +626,18 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; -static ssize_t -cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) +static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, + loff_t *ppos, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + struct inode *inode = filp->f_path.dentry->d_inode; int err; if (count == 0) return 0; - mutex_lock(&queue_io_mutex); /* protect against multiple concurrent + mutex_lock(&inode->i_mutex); /* protect against multiple concurrent * readers on this file */ again: spin_lock(&queue_lock); @@ -711,7 +650,7 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) } if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); - mutex_unlock(&queue_io_mutex); + mutex_unlock(&inode->i_mutex); BUG_ON(rp->offset); return 0; } @@ -758,49 +697,90 @@ cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) } if (err == -EAGAIN) goto again; - mutex_unlock(&queue_io_mutex); + mutex_unlock(&inode->i_mutex); return err ? err : count; } -static char write_buf[8192]; /* protected by queue_io_mutex */ +static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, + size_t count, struct cache_detail *cd) +{ + ssize_t ret; + + if (copy_from_user(kaddr, buf, count)) + return -EFAULT; + kaddr[count] = '\0'; + ret = cd->cache_parse(cd, kaddr, count); + if (!ret) + ret = count; + return ret; +} -static ssize_t -cache_write(struct file *filp, const char __user *buf, size_t count, - loff_t *ppos) +static ssize_t cache_slow_downcall(const char __user *buf, + size_t count, struct cache_detail *cd) { - int err; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + static char write_buf[8192]; /* protected by queue_io_mutex */ + ssize_t ret = -EINVAL; - if (count == 0) - return 0; if (count >= sizeof(write_buf)) - return -EINVAL; - + goto out; mutex_lock(&queue_io_mutex); + ret = cache_do_downcall(write_buf, buf, count, cd); + mutex_unlock(&queue_io_mutex); +out: + return ret; +} - if (copy_from_user(write_buf, buf, count)) { - mutex_unlock(&queue_io_mutex); - return -EFAULT; - } - write_buf[count] = '\0'; - if (cd->cache_parse) - err = cd->cache_parse(cd, write_buf, count); - else - err = -EINVAL; +static ssize_t cache_downcall(struct address_space *mapping, + const char __user *buf, + size_t count, struct cache_detail *cd) +{ + struct page *page; + char *kaddr; + ssize_t ret = -ENOMEM; + + if (count >= PAGE_CACHE_SIZE) + goto out_slow; + + page = find_or_create_page(mapping, 0, GFP_KERNEL); + if (!page) + goto out_slow; + + kaddr = kmap(page); + ret = cache_do_downcall(kaddr, buf, count, cd); + kunmap(page); + unlock_page(page); + page_cache_release(page); + return ret; +out_slow: + return cache_slow_downcall(buf, count, cd); +} - mutex_unlock(&queue_io_mutex); - return err ? err : count; +static ssize_t cache_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) +{ + struct address_space *mapping = filp->f_mapping; + struct inode *inode = filp->f_path.dentry->d_inode; + ssize_t ret = -EINVAL; + + if (!cd->cache_parse) + goto out; + + mutex_lock(&inode->i_mutex); + ret = cache_downcall(mapping, buf, count, cd); + mutex_unlock(&inode->i_mutex); +out: + return ret; } static DECLARE_WAIT_QUEUE_HEAD(queue_wait); -static unsigned int -cache_poll(struct file *filp, poll_table *wait) +static unsigned int cache_poll(struct file *filp, poll_table *wait, + struct cache_detail *cd) { unsigned int mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; poll_wait(filp, &queue_wait, wait); @@ -822,14 +802,13 @@ cache_poll(struct file *filp, poll_table *wait) return mask; } -static int -cache_ioctl(struct inode *ino, struct file *filp, - unsigned int cmd, unsigned long arg) +static int cache_ioctl(struct inode *ino, struct file *filp, + unsigned int cmd, unsigned long arg, + struct cache_detail *cd) { int len = 0; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; - struct cache_detail *cd = PDE(ino)->data; if (cmd != FIONREAD || !rp) return -EINVAL; @@ -852,15 +831,13 @@ cache_ioctl(struct inode *ino, struct file *filp, return put_user(len, (int __user *)arg); } -static int -cache_open(struct inode *inode, struct file *filp) +static int cache_open(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = NULL; nonseekable_open(inode, filp); if (filp->f_mode & FMODE_READ) { - struct cache_detail *cd = PDE(inode)->data; - rp = kmalloc(sizeof(*rp), GFP_KERNEL); if (!rp) return -ENOMEM; @@ -875,11 +852,10 @@ cache_open(struct inode *inode, struct file *filp) return 0; } -static int -cache_release(struct inode *inode, struct file *filp) +static int cache_release(struct inode *inode, struct file *filp, + struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; - struct cache_detail *cd = PDE(inode)->data; if (rp) { spin_lock(&queue_lock); @@ -908,18 +884,6 @@ cache_release(struct inode *inode, struct file *filp) -static const struct file_operations cache_file_operations = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .read = cache_read, - .write = cache_write, - .poll = cache_poll, - .ioctl = cache_ioctl, /* for FIONREAD */ - .open = cache_open, - .release = cache_release, -}; - - static void queue_loose(struct cache_detail *detail, struct cache_head *ch) { struct cache_queue *cq; @@ -1020,15 +984,21 @@ static void warn_no_listener(struct cache_detail *detail) if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; if (detail->warn_no_listener) - detail->warn_no_listener(detail); + detail->warn_no_listener(detail, detail->last_close != 0); } } /* - * register an upcall request to user-space. + * register an upcall request to user-space and queue it up for read() by the + * upcall daemon. + * * Each request is at most one page long. */ -static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, + void (*cache_request)(struct cache_detail *, + struct cache_head *, + char **, + int *)) { char *buf; @@ -1036,9 +1006,6 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) char *bp; int len; - if (detail->cache_request == NULL) - return -EINVAL; - if (atomic_read(&detail->readers) == 0 && detail->last_close < get_seconds() - 30) { warn_no_listener(detail); @@ -1057,7 +1024,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) bp = buf; len = PAGE_SIZE; - detail->cache_request(detail, h, &bp, &len); + cache_request(detail, h, &bp, &len); if (len < 0) { kfree(buf); @@ -1075,6 +1042,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h) wake_up(&queue_wait); return 0; } +EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); /* * parse a message from user-space and pass it @@ -1242,10 +1210,10 @@ static const struct seq_operations cache_content_op = { .show = c_show, }; -static int content_open(struct inode *inode, struct file *file) +static int content_open(struct inode *inode, struct file *file, + struct cache_detail *cd) { struct handle *han; - struct cache_detail *cd = PDE(inode)->data; han = __seq_open_private(file, &cache_content_op, sizeof(*han)); if (han == NULL) @@ -1255,17 +1223,10 @@ static int content_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations content_file_operations = { - .open = content_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - static ssize_t read_flush(struct file *file, char __user *buf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; unsigned long p = *ppos; size_t len; @@ -1283,10 +1244,10 @@ static ssize_t read_flush(struct file *file, char __user *buf, return len; } -static ssize_t write_flush(struct file * file, const char __user * buf, - size_t count, loff_t *ppos) +static ssize_t write_flush(struct file *file, const char __user *buf, + size_t count, loff_t *ppos, + struct cache_detail *cd) { - struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data; char tbuf[20]; char *ep; long flushtime; @@ -1307,8 +1268,299 @@ static ssize_t write_flush(struct file * file, const char __user * buf, return count; } -static const struct file_operations cache_flush_operations = { +static ssize_t cache_read_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_procfs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_open(inode, filp, cd); +} + +static int cache_release_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return cache_release(inode, filp, cd); +} + +static const struct file_operations cache_file_operations_procfs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_procfs, + .write = cache_write_procfs, + .poll = cache_poll_procfs, + .ioctl = cache_ioctl_procfs, /* for FIONREAD */ + .open = cache_open_procfs, + .release = cache_release_procfs, +}; + +static int content_open_procfs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = PDE(inode)->data; + + return content_open(inode, filp, cd); +} + +static const struct file_operations content_file_operations_procfs = { + .open = content_open_procfs, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static ssize_t read_flush_procfs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_procfs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data; + + return write_flush(filp, buf, count, ppos, cd); +} + +static const struct file_operations cache_flush_operations_procfs = { + .open = nonseekable_open, + .read = read_flush_procfs, + .write = write_flush_procfs, +}; + +static void remove_cache_proc_entries(struct cache_detail *cd) +{ + if (cd->u.procfs.proc_ent == NULL) + return; + if (cd->u.procfs.flush_ent) + remove_proc_entry("flush", cd->u.procfs.proc_ent); + if (cd->u.procfs.channel_ent) + remove_proc_entry("channel", cd->u.procfs.proc_ent); + if (cd->u.procfs.content_ent) + remove_proc_entry("content", cd->u.procfs.proc_ent); + cd->u.procfs.proc_ent = NULL; + remove_proc_entry(cd->name, proc_net_rpc); +} + +#ifdef CONFIG_PROC_FS +static int create_cache_proc_entries(struct cache_detail *cd) +{ + struct proc_dir_entry *p; + + cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc); + if (cd->u.procfs.proc_ent == NULL) + goto out_nomem; + cd->u.procfs.channel_ent = NULL; + cd->u.procfs.content_ent = NULL; + + p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_flush_operations_procfs, cd); + cd->u.procfs.flush_ent = p; + if (p == NULL) + goto out_nomem; + + if (cd->cache_upcall || cd->cache_parse) { + p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &cache_file_operations_procfs, cd); + cd->u.procfs.channel_ent = p; + if (p == NULL) + goto out_nomem; + } + if (cd->cache_show) { + p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + cd->u.procfs.proc_ent, + &content_file_operations_procfs, cd); + cd->u.procfs.content_ent = p; + if (p == NULL) + goto out_nomem; + } + return 0; +out_nomem: + remove_cache_proc_entries(cd); + return -ENOMEM; +} +#else /* CONFIG_PROC_FS */ +static int create_cache_proc_entries(struct cache_detail *cd) +{ + return 0; +} +#endif + +int cache_register(struct cache_detail *cd) +{ + int ret; + + sunrpc_init_cache_detail(cd); + ret = create_cache_proc_entries(cd); + if (ret) + sunrpc_destroy_cache_detail(cd); + return ret; +} +EXPORT_SYMBOL_GPL(cache_register); + +void cache_unregister(struct cache_detail *cd) +{ + remove_cache_proc_entries(cd); + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(cache_unregister); + +static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_read(filp, buf, count, ppos, cd); +} + +static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_write(filp, buf, count, ppos, cd); +} + +static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return cache_poll(filp, wait, cd); +} + +static int cache_ioctl_pipefs(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_ioctl(inode, filp, cmd, arg, cd); +} + +static int cache_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_open(inode, filp, cd); +} + +static int cache_release_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return cache_release(inode, filp, cd); +} + +const struct file_operations cache_file_operations_pipefs = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .read = cache_read_pipefs, + .write = cache_write_pipefs, + .poll = cache_poll_pipefs, + .ioctl = cache_ioctl_pipefs, /* for FIONREAD */ + .open = cache_open_pipefs, + .release = cache_release_pipefs, +}; + +static int content_open_pipefs(struct inode *inode, struct file *filp) +{ + struct cache_detail *cd = RPC_I(inode)->private; + + return content_open(inode, filp, cd); +} + +const struct file_operations content_file_operations_pipefs = { + .open = content_open_pipefs, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return read_flush(filp, buf, count, ppos, cd); +} + +static ssize_t write_flush_pipefs(struct file *filp, + const char __user *buf, + size_t count, loff_t *ppos) +{ + struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private; + + return write_flush(filp, buf, count, ppos, cd); +} + +const struct file_operations cache_flush_operations_pipefs = { .open = nonseekable_open, - .read = read_flush, - .write = write_flush, + .read = read_flush_pipefs, + .write = write_flush_pipefs, }; + +int sunrpc_cache_register_pipefs(struct dentry *parent, + const char *name, mode_t umode, + struct cache_detail *cd) +{ + struct qstr q; + struct dentry *dir; + int ret = 0; + + sunrpc_init_cache_detail(cd); + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len); + dir = rpc_create_cache_dir(parent, &q, umode, cd); + if (!IS_ERR(dir)) + cd->u.pipefs.dir = dir; + else { + sunrpc_destroy_cache_detail(cd); + ret = PTR_ERR(dir); + } + return ret; +} +EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); + +void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) +{ + rpc_remove_cache_dir(cd->u.pipefs.dir); + cd->u.pipefs.dir = NULL; + sunrpc_destroy_cache_detail(cd); +} +EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); + diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ebfcf9b89909..c1e467e1b07d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -27,6 +27,8 @@ #include <linux/types.h> #include <linux/kallsyms.h> #include <linux/mm.h> +#include <linux/namei.h> +#include <linux/mount.h> #include <linux/slab.h> #include <linux/utsname.h> #include <linux/workqueue.h> @@ -97,33 +99,49 @@ static int rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name) { static uint32_t clntid; + struct nameidata nd; + struct path path; + char name[15]; + struct qstr q = { + .name = name, + }; int error; - clnt->cl_vfsmnt = ERR_PTR(-ENOENT); - clnt->cl_dentry = ERR_PTR(-ENOENT); + clnt->cl_path.mnt = ERR_PTR(-ENOENT); + clnt->cl_path.dentry = ERR_PTR(-ENOENT); if (dir_name == NULL) return 0; - clnt->cl_vfsmnt = rpc_get_mount(); - if (IS_ERR(clnt->cl_vfsmnt)) - return PTR_ERR(clnt->cl_vfsmnt); + path.mnt = rpc_get_mount(); + if (IS_ERR(path.mnt)) + return PTR_ERR(path.mnt); + error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd); + if (error) + goto err; for (;;) { - snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname), - "%s/clnt%x", dir_name, - (unsigned int)clntid++); - clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0'; - clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt); - if (!IS_ERR(clnt->cl_dentry)) - return 0; - error = PTR_ERR(clnt->cl_dentry); + q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++); + name[sizeof(name) - 1] = '\0'; + q.hash = full_name_hash(q.name, q.len); + path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt); + if (!IS_ERR(path.dentry)) + break; + error = PTR_ERR(path.dentry); if (error != -EEXIST) { - printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n", - clnt->cl_pathname, error); - rpc_put_mount(); - return error; + printk(KERN_INFO "RPC: Couldn't create pipefs entry" + " %s/%s, error %d\n", + dir_name, name, error); + goto err_path_put; } } + path_put(&nd.path); + clnt->cl_path = path; + return 0; +err_path_put: + path_put(&nd.path); +err: + rpc_put_mount(); + return error; } static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt) @@ -231,8 +249,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru return clnt; out_no_auth: - if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } out_no_path: @@ -423,8 +441,8 @@ rpc_free_client(struct kref *kref) dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_dentry); + if (!IS_ERR(clnt->cl_path.dentry)) { + rpc_remove_client_dir(clnt->cl_path.dentry); rpc_put_mount(); } if (clnt->cl_parent != clnt) { diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9ced0628d69c..8dd81535e08f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -26,6 +26,7 @@ #include <linux/sunrpc/clnt.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> +#include <linux/sunrpc/cache.h> static struct vfsmount *rpc_mount __read_mostly; static int rpc_mount_count; @@ -125,7 +126,7 @@ static void rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); - struct rpc_pipe_ops *ops; + const struct rpc_pipe_ops *ops; int need_release; mutex_lock(&inode->i_mutex); @@ -398,66 +399,12 @@ static const struct file_operations rpc_info_operations = { /* - * We have a single directory with 1 node in it. - */ -enum { - RPCAUTH_Root = 1, - RPCAUTH_lockd, - RPCAUTH_mount, - RPCAUTH_nfs, - RPCAUTH_portmap, - RPCAUTH_statd, - RPCAUTH_nfsd4_cb, - RPCAUTH_RootEOF -}; - -/* * Description of fs contents. */ struct rpc_filelist { - char *name; + const char *name; const struct file_operations *i_fop; - int mode; -}; - -static struct rpc_filelist files[] = { - [RPCAUTH_lockd] = { - .name = "lockd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_mount] = { - .name = "mount", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfs] = { - .name = "nfs", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_portmap] = { - .name = "portmap", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_statd] = { - .name = "statd", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, - [RPCAUTH_nfsd4_cb] = { - .name = "nfsd4_cb", - .mode = S_IFDIR | S_IRUGO | S_IXUGO, - }, -}; - -enum { - RPCAUTH_info = 2, - RPCAUTH_EOF -}; - -static struct rpc_filelist authfiles[] = { - [RPCAUTH_info] = { - .name = "info", - .i_fop = &rpc_info_operations, - .mode = S_IFREG | S_IRUSR, - }, + umode_t mode; }; struct vfsmount *rpc_get_mount(void) @@ -484,39 +431,8 @@ static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; -static int -rpc_lookup_parent(char *path, struct nameidata *nd) -{ - struct vfsmount *mnt; - - if (path[0] == '\0') - return -ENOENT; - - mnt = rpc_get_mount(); - if (IS_ERR(mnt)) { - printk(KERN_WARNING "%s: %s failed to mount " - "pseudofilesystem \n", __FILE__, __func__); - return PTR_ERR(mnt); - } - - if (vfs_path_lookup(mnt->mnt_root, mnt, path, LOOKUP_PARENT, nd)) { - printk(KERN_WARNING "%s: %s failed to find path %s\n", - __FILE__, __func__, path); - rpc_put_mount(); - return -ENOENT; - } - return 0; -} - -static void -rpc_release_path(struct nameidata *nd) -{ - path_put(&nd->path); - rpc_put_mount(); -} - static struct inode * -rpc_get_inode(struct super_block *sb, int mode) +rpc_get_inode(struct super_block *sb, umode_t mode) { struct inode *inode = new_inode(sb); if (!inode) @@ -534,212 +450,274 @@ rpc_get_inode(struct super_block *sb, int mode) return inode; } -/* - * FIXME: This probably has races. - */ -static void rpc_depopulate(struct dentry *parent, - unsigned long start, unsigned long eof) +static int __rpc_create_common(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *dir = parent->d_inode; - struct list_head *pos, *next; - struct dentry *dentry, *dvec[10]; - int n = 0; + struct inode *inode; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); -repeat: - spin_lock(&dcache_lock); - list_for_each_safe(pos, next, &parent->d_subdirs) { - dentry = list_entry(pos, struct dentry, d_u.d_child); - if (!dentry->d_inode || - dentry->d_inode->i_ino < start || - dentry->d_inode->i_ino >= eof) - continue; - spin_lock(&dentry->d_lock); - if (!d_unhashed(dentry)) { - dget_locked(dentry); - __d_drop(dentry); - spin_unlock(&dentry->d_lock); - dvec[n++] = dentry; - if (n == ARRAY_SIZE(dvec)) - break; - } else - spin_unlock(&dentry->d_lock); - } - spin_unlock(&dcache_lock); - if (n) { - do { - dentry = dvec[--n]; - if (S_ISREG(dentry->d_inode->i_mode)) - simple_unlink(dir, dentry); - else if (S_ISDIR(dentry->d_inode->i_mode)) - simple_rmdir(dir, dentry); - d_delete(dentry); - dput(dentry); - } while (n); - goto repeat; - } - mutex_unlock(&dir->i_mutex); + BUG_ON(!d_unhashed(dentry)); + inode = rpc_get_inode(dir->i_sb, mode); + if (!inode) + goto out_err; + inode->i_ino = iunique(dir->i_sb, 100); + if (i_fop) + inode->i_fop = i_fop; + if (private) + rpc_inode_setowner(inode, private); + d_add(dentry, inode); + return 0; +out_err: + printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", + __FILE__, __func__, dentry->d_name.name); + dput(dentry); + return -ENOMEM; } -static int -rpc_populate(struct dentry *parent, - struct rpc_filelist *files, - int start, int eof) +static int __rpc_create(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *inode, *dir = parent->d_inode; - void *private = RPC_I(dir)->private; - struct dentry *dentry; - int mode, i; + int err; - mutex_lock(&dir->i_mutex); - for (i = start; i < eof; i++) { - dentry = d_alloc_name(parent, files[i].name); - if (!dentry) - goto out_bad; - dentry->d_op = &rpc_dentry_operations; - mode = files[i].mode; - inode = rpc_get_inode(dir->i_sb, mode); - if (!inode) { - dput(dentry); - goto out_bad; - } - inode->i_ino = i; - if (files[i].i_fop) - inode->i_fop = files[i].i_fop; - if (private) - rpc_inode_setowner(inode, private); - if (S_ISDIR(mode)) - inc_nlink(dir); - d_add(dentry, inode); - fsnotify_create(dir, dentry); - } - mutex_unlock(&dir->i_mutex); + err = __rpc_create_common(dir, dentry, S_IFREG | mode, i_fop, private); + if (err) + return err; + fsnotify_create(dir, dentry); return 0; -out_bad: - mutex_unlock(&dir->i_mutex); - printk(KERN_WARNING "%s: %s failed to populate directory %s\n", - __FILE__, __func__, parent->d_name.name); - return -ENOMEM; } -static int -__rpc_mkdir(struct inode *dir, struct dentry *dentry) +static int __rpc_mkdir(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private) { - struct inode *inode; + int err; - inode = rpc_get_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO); - if (!inode) - goto out_err; - inode->i_ino = iunique(dir->i_sb, 100); - d_instantiate(dentry, inode); + err = __rpc_create_common(dir, dentry, S_IFDIR | mode, i_fop, private); + if (err) + return err; inc_nlink(dir); fsnotify_mkdir(dir, dentry); return 0; -out_err: - printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", - __FILE__, __func__, dentry->d_name.name); - return -ENOMEM; } -static int -__rpc_rmdir(struct inode *dir, struct dentry *dentry) +static int __rpc_mkpipe(struct inode *dir, struct dentry *dentry, + umode_t mode, + const struct file_operations *i_fop, + void *private, + const struct rpc_pipe_ops *ops, + int flags) { - int error; - error = simple_rmdir(dir, dentry); - if (!error) - d_delete(dentry); - return error; + struct rpc_inode *rpci; + int err; + + err = __rpc_create_common(dir, dentry, S_IFIFO | mode, i_fop, private); + if (err) + return err; + rpci = RPC_I(dentry->d_inode); + rpci->nkern_readwriters = 1; + rpci->private = private; + rpci->flags = flags; + rpci->ops = ops; + fsnotify_create(dir, dentry); + return 0; } -static struct dentry * -rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive) +static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) +{ + int ret; + + dget(dentry); + ret = simple_rmdir(dir, dentry); + d_delete(dentry); + dput(dentry); + return ret; +} + +static int __rpc_unlink(struct inode *dir, struct dentry *dentry) +{ + int ret; + + dget(dentry); + ret = simple_unlink(dir, dentry); + d_delete(dentry); + dput(dentry); + return ret; +} + +static int __rpc_rmpipe(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + struct rpc_inode *rpci = RPC_I(inode); + + rpci->nkern_readwriters--; + if (rpci->nkern_readwriters != 0) + return 0; + rpc_close_pipes(inode); + return __rpc_unlink(dir, dentry); +} + +static struct dentry *__rpc_lookup_create(struct dentry *parent, + struct qstr *name) { - struct inode *dir = parent->d_inode; struct dentry *dentry; - mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(name, parent, len); - if (IS_ERR(dentry)) - goto out_err; + dentry = d_lookup(parent, name); + if (!dentry) { + dentry = d_alloc(parent, name); + if (!dentry) { + dentry = ERR_PTR(-ENOMEM); + goto out_err; + } + } if (!dentry->d_inode) dentry->d_op = &rpc_dentry_operations; - else if (exclusive) { - dput(dentry); - dentry = ERR_PTR(-EEXIST); - goto out_err; - } - return dentry; out_err: - mutex_unlock(&dir->i_mutex); return dentry; } -static struct dentry * -rpc_lookup_negative(char *path, struct nameidata *nd) +static struct dentry *__rpc_lookup_create_exclusive(struct dentry *parent, + struct qstr *name) { struct dentry *dentry; - int error; - if ((error = rpc_lookup_parent(path, nd)) != 0) - return ERR_PTR(error); - dentry = rpc_lookup_create(nd->path.dentry, nd->last.name, nd->last.len, - 1); - if (IS_ERR(dentry)) - rpc_release_path(nd); - return dentry; + dentry = __rpc_lookup_create(parent, name); + if (dentry->d_inode == NULL) + return dentry; + dput(dentry); + return ERR_PTR(-EEXIST); } -/** - * rpc_mkdir - Create a new directory in rpc_pipefs - * @path: path from the rpc_pipefs root to the new directory - * @rpc_client: rpc client to associate with this directory - * - * This creates a directory at the given @path associated with - * @rpc_clnt, which will contain a file named "info" with some basic - * information about the client, together with any "pipes" that may - * later be created using rpc_mkpipe(). +/* + * FIXME: This probably has races. */ -struct dentry * -rpc_mkdir(char *path, struct rpc_clnt *rpc_client) +static void __rpc_depopulate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof) { - struct nameidata nd; + struct inode *dir = parent->d_inode; struct dentry *dentry; - struct inode *dir; + struct qstr name; + int i; + + for (i = start; i < eof; i++) { + name.name = files[i].name; + name.len = strlen(files[i].name); + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + + if (dentry == NULL) + continue; + if (dentry->d_inode == NULL) + goto next; + switch (dentry->d_inode->i_mode & S_IFMT) { + default: + BUG(); + case S_IFREG: + __rpc_unlink(dir, dentry); + break; + case S_IFDIR: + __rpc_rmdir(dir, dentry); + } +next: + dput(dentry); + } +} + +static void rpc_depopulate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof) +{ + struct inode *dir = parent->d_inode; + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_CHILD); + __rpc_depopulate(parent, files, start, eof); + mutex_unlock(&dir->i_mutex); +} + +static int rpc_populate(struct dentry *parent, + const struct rpc_filelist *files, + int start, int eof, + void *private) +{ + struct inode *dir = parent->d_inode; + struct dentry *dentry; + int i, err; + + mutex_lock(&dir->i_mutex); + for (i = start; i < eof; i++) { + struct qstr q; + + q.name = files[i].name; + q.len = strlen(files[i].name); + q.hash = full_name_hash(q.name, q.len); + dentry = __rpc_lookup_create_exclusive(parent, &q); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto out_bad; + switch (files[i].mode & S_IFMT) { + default: + BUG(); + case S_IFREG: + err = __rpc_create(dir, dentry, + files[i].mode, + files[i].i_fop, + private); + break; + case S_IFDIR: + err = __rpc_mkdir(dir, dentry, + files[i].mode, + NULL, + private); + } + if (err != 0) + goto out_bad; + } + mutex_unlock(&dir->i_mutex); + return 0; +out_bad: + __rpc_depopulate(parent, files, start, eof); + mutex_unlock(&dir->i_mutex); + printk(KERN_WARNING "%s: %s failed to populate directory %s\n", + __FILE__, __func__, parent->d_name.name); + return err; +} + +static struct dentry *rpc_mkdir_populate(struct dentry *parent, + struct qstr *name, umode_t mode, void *private, + int (*populate)(struct dentry *, void *), void *args_populate) +{ + struct dentry *dentry; + struct inode *dir = parent->d_inode; int error; - dentry = rpc_lookup_negative(path, &nd); + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = __rpc_lookup_create_exclusive(parent, name); if (IS_ERR(dentry)) - return dentry; - dir = nd.path.dentry->d_inode; - if ((error = __rpc_mkdir(dir, dentry)) != 0) - goto err_dput; - RPC_I(dentry->d_inode)->private = rpc_client; - error = rpc_populate(dentry, authfiles, - RPCAUTH_info, RPCAUTH_EOF); - if (error) - goto err_depopulate; - dget(dentry); + goto out; + error = __rpc_mkdir(dir, dentry, mode, NULL, private); + if (error != 0) + goto out_err; + if (populate != NULL) { + error = populate(dentry, args_populate); + if (error) + goto err_rmdir; + } out: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); return dentry; -err_depopulate: - rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); +err_rmdir: __rpc_rmdir(dir, dentry); -err_dput: - dput(dentry); - printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %d)\n", - __FILE__, __func__, path, error); +out_err: dentry = ERR_PTR(error); goto out; } -/** - * rpc_rmdir - Remove a directory created with rpc_mkdir() - * @dentry: directory to remove - */ -int -rpc_rmdir(struct dentry *dentry) +static int rpc_rmdir_depopulate(struct dentry *dentry, + void (*depopulate)(struct dentry *)) { struct dentry *parent; struct inode *dir; @@ -748,9 +726,9 @@ rpc_rmdir(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF); + if (depopulate != NULL) + depopulate(dentry); error = __rpc_rmdir(dir, dentry); - dput(dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; @@ -776,50 +754,54 @@ rpc_rmdir(struct dentry *dentry) * The @private argument passed here will be available to all these methods * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private. */ -struct dentry * -rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags) +struct dentry *rpc_mkpipe(struct dentry *parent, const char *name, + void *private, const struct rpc_pipe_ops *ops, + int flags) { struct dentry *dentry; - struct inode *dir, *inode; - struct rpc_inode *rpci; + struct inode *dir = parent->d_inode; + umode_t umode = S_IFIFO | S_IRUSR | S_IWUSR; + struct qstr q; + int err; - dentry = rpc_lookup_create(parent, name, strlen(name), 0); + if (ops->upcall == NULL) + umode &= ~S_IRUGO; + if (ops->downcall == NULL) + umode &= ~S_IWUGO; + + q.name = name; + q.len = strlen(name); + q.hash = full_name_hash(q.name, q.len), + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + dentry = __rpc_lookup_create(parent, &q); if (IS_ERR(dentry)) - return dentry; - dir = parent->d_inode; + goto out; if (dentry->d_inode) { - rpci = RPC_I(dentry->d_inode); + struct rpc_inode *rpci = RPC_I(dentry->d_inode); if (rpci->private != private || rpci->ops != ops || rpci->flags != flags) { dput (dentry); - dentry = ERR_PTR(-EBUSY); + err = -EBUSY; + goto out_err; } rpci->nkern_readwriters++; goto out; } - inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR); - if (!inode) - goto err_dput; - inode->i_ino = iunique(dir->i_sb, 100); - inode->i_fop = &rpc_pipe_fops; - d_instantiate(dentry, inode); - rpci = RPC_I(inode); - rpci->private = private; - rpci->flags = flags; - rpci->ops = ops; - rpci->nkern_readwriters = 1; - fsnotify_create(dir, dentry); - dget(dentry); + + err = __rpc_mkpipe(dir, dentry, umode, &rpc_pipe_fops, + private, ops, flags); + if (err) + goto out_err; out: mutex_unlock(&dir->i_mutex); return dentry; -err_dput: - dput(dentry); - dentry = ERR_PTR(-ENOMEM); +out_err: + dentry = ERR_PTR(err); printk(KERN_WARNING "%s: %s() failed to create pipe %s/%s (errno = %d)\n", __FILE__, __func__, parent->d_name.name, name, - -ENOMEM); + err); goto out; } EXPORT_SYMBOL_GPL(rpc_mkpipe); @@ -842,19 +824,107 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); - if (!error) - d_delete(dentry); - } - dput(dentry); + error = __rpc_rmpipe(dir, dentry); mutex_unlock(&dir->i_mutex); dput(parent); return error; } EXPORT_SYMBOL_GPL(rpc_unlink); +enum { + RPCAUTH_info, + RPCAUTH_EOF +}; + +static const struct rpc_filelist authfiles[] = { + [RPCAUTH_info] = { + .name = "info", + .i_fop = &rpc_info_operations, + .mode = S_IFREG | S_IRUSR, + }, +}; + +static int rpc_clntdir_populate(struct dentry *dentry, void *private) +{ + return rpc_populate(dentry, + authfiles, RPCAUTH_info, RPCAUTH_EOF, + private); +} + +static void rpc_clntdir_depopulate(struct dentry *dentry) +{ + rpc_depopulate(dentry, authfiles, RPCAUTH_info, RPCAUTH_EOF); +} + +/** + * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs + * @path: path from the rpc_pipefs root to the new directory + * @rpc_client: rpc client to associate with this directory + * + * This creates a directory at the given @path associated with + * @rpc_clnt, which will contain a file named "info" with some basic + * information about the client, together with any "pipes" that may + * later be created using rpc_mkpipe(). + */ +struct dentry *rpc_create_client_dir(struct dentry *dentry, + struct qstr *name, + struct rpc_clnt *rpc_client) +{ + return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, NULL, + rpc_clntdir_populate, rpc_client); +} + +/** + * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir() + * @dentry: directory to remove + */ +int rpc_remove_client_dir(struct dentry *dentry) +{ + return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate); +} + +static const struct rpc_filelist cache_pipefs_files[3] = { + [0] = { + .name = "channel", + .i_fop = &cache_file_operations_pipefs, + .mode = S_IFIFO|S_IRUSR|S_IWUSR, + }, + [1] = { + .name = "content", + .i_fop = &content_file_operations_pipefs, + .mode = S_IFREG|S_IRUSR, + }, + [2] = { + .name = "flush", + .i_fop = &cache_flush_operations_pipefs, + .mode = S_IFREG|S_IRUSR|S_IWUSR, + }, +}; + +static int rpc_cachedir_populate(struct dentry *dentry, void *private) +{ + return rpc_populate(dentry, + cache_pipefs_files, 0, 3, + private); +} + +static void rpc_cachedir_depopulate(struct dentry *dentry) +{ + rpc_depopulate(dentry, cache_pipefs_files, 0, 3); +} + +struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name, + mode_t umode, struct cache_detail *cd) +{ + return rpc_mkdir_populate(parent, name, umode, NULL, + rpc_cachedir_populate, cd); +} + +void rpc_remove_cache_dir(struct dentry *dentry) +{ + rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate); +} + /* * populate the filesystem */ @@ -866,6 +936,46 @@ static struct super_operations s_ops = { #define RPCAUTH_GSSMAGIC 0x67596969 +/* + * We have a single directory with 1 node in it. + */ +enum { + RPCAUTH_lockd, + RPCAUTH_mount, + RPCAUTH_nfs, + RPCAUTH_portmap, + RPCAUTH_statd, + RPCAUTH_nfsd4_cb, + RPCAUTH_RootEOF +}; + +static const struct rpc_filelist files[] = { + [RPCAUTH_lockd] = { + .name = "lockd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_mount] = { + .name = "mount", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfs] = { + .name = "nfs", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_portmap] = { + .name = "portmap", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_statd] = { + .name = "statd", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, + [RPCAUTH_nfsd4_cb] = { + .name = "nfsd4_cb", + .mode = S_IFDIR | S_IRUGO | S_IXUGO, + }, +}; + static int rpc_fill_super(struct super_block *sb, void *data, int silent) { @@ -886,7 +996,7 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) iput(inode); return -ENOMEM; } - if (rpc_populate(root, files, RPCAUTH_Root + 1, RPCAUTH_RootEOF)) + if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) goto out; sb->s_root = root; return 0; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index beee6da33035..830faf4d9997 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -75,6 +75,37 @@ enum { #define RPCB_OWNER_STRING "0" #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) +/* + * XDR data type sizes + */ +#define RPCB_program_sz (1) +#define RPCB_version_sz (1) +#define RPCB_protocol_sz (1) +#define RPCB_port_sz (1) +#define RPCB_boolean_sz (1) + +#define RPCB_netid_sz (1 + XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) +#define RPCB_addr_sz (1 + XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) +#define RPCB_ownerstring_sz (1 + XDR_QUADLEN(RPCB_MAXOWNERLEN)) + +/* + * XDR argument and result sizes + */ +#define RPCB_mappingargs_sz (RPCB_program_sz + RPCB_version_sz + \ + RPCB_protocol_sz + RPCB_port_sz) +#define RPCB_getaddrargs_sz (RPCB_program_sz + RPCB_version_sz + \ + RPCB_netid_sz + RPCB_addr_sz + \ + RPCB_ownerstring_sz) + +#define RPCB_getportres_sz RPCB_port_sz +#define RPCB_setres_sz RPCB_boolean_sz + +/* + * Note that RFC 1833 does not put any size restrictions on the + * address string returned by the remote rpcbind database. + */ +#define RPCB_getaddrres_sz RPCB_addr_sz + static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; @@ -122,6 +153,7 @@ static void rpcb_map_release(void *data) rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status); xprt_put(map->r_xprt); + kfree(map->r_addr); kfree(map); } @@ -268,12 +300,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); - char buf[32]; + int result; - /* Construct AF_INET universal address */ - snprintf(buf, sizeof(buf), "%pI4.%u.%u", - &sin->sin_addr.s_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -284,7 +313,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } /* @@ -296,16 +327,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); - char buf[64]; + int result; - /* Construct AF_INET6 universal address */ - if (ipv6_addr_any(&sin6->sin6_addr)) - snprintf(buf, sizeof(buf), "::.%u.%u", - port >> 8, port & 0xff); - else - snprintf(buf, sizeof(buf), "%pI6.%u.%u", - &sin6->sin6_addr, port >> 8, port & 0xff); - map->r_addr = buf; + map->r_addr = rpc_sockaddr2uaddr(sap); dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " "local rpcbind\n", (port ? "" : "un"), @@ -316,7 +340,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(RPCBVERS_4, msg); + kfree(map->r_addr); + return result; } static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) @@ -428,7 +454,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot) struct rpc_message msg = { .rpc_proc = &rpcb_procedures2[RPCBPROC_GETPORT], .rpc_argp = &map, - .rpc_resp = &map.r_port, + .rpc_resp = &map, }; struct rpc_clnt *rpcb_clnt; int status; @@ -458,7 +484,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi struct rpc_message msg = { .rpc_proc = proc, .rpc_argp = map, - .rpc_resp = &map->r_port, + .rpc_resp = map, }; struct rpc_task_setup task_setup_data = { .rpc_client = rpcb_clnt, @@ -539,6 +565,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } + /* Parent transport's destination address */ salen = rpc_peeraddr(clnt, sap, sizeof(addr)); /* Don't ever use rpcbind v2 for AF_INET6 requests */ @@ -589,11 +616,22 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); - map->r_owner = ""; map->r_status = -EIO; + switch (bind_version) { + case RPCBVERS_4: + case RPCBVERS_3: + map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); + map->r_addr = rpc_sockaddr2uaddr(sap); + map->r_owner = ""; + break; + case RPCBVERS_2: + map->r_addr = NULL; + break; + default: + BUG(); + } + child = rpcb_call_async(rpcb_clnt, map, proc); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { @@ -656,176 +694,278 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) * XDR functions for rpcbind */ -static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int rpcb_enc_mapping(struct rpc_rqst *req, __be32 *p, + const struct rpcbind_args *rpcb) { - dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + + dprintk("RPC: %5u encoding PMAP_%s call (%u, %u, %d, %u)\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + + p = xdr_reserve_space(&xdr, sizeof(__be32) * RPCB_mappingargs_sz); + if (unlikely(p == NULL)) + return -EIO; + *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); *p++ = htonl(rpcb->r_prot); - *p++ = htonl(rpcb->r_port); + *p = htonl(rpcb->r_port); - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); return 0; } -static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, - unsigned short *portp) +static int rpcb_dec_getport(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) { - *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb getport result: %u\n", - *portp); + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + unsigned long port; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + rpcb->r_port = 0; + + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + return -EIO; + + port = ntohl(*p); + dprintk("RPC: %5u PMAP_%s result: %lu\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, port); + if (unlikely(port > USHORT_MAX)) + return -EIO; + + rpcb->r_port = port; return 0; } -static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, - unsigned int *boolp) +static int rpcb_dec_set(struct rpc_rqst *req, __be32 *p, + unsigned int *boolp) { - *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb set/unset call %s\n", + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + return -EIO; + + *boolp = 0; + if (*p) + *boolp = 1; + + dprintk("RPC: %5u RPCB_%s call %s\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, (*boolp ? "succeeded" : "failed")); return 0; } -static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, - struct rpcbind_args *rpcb) +static int encode_rpcb_string(struct xdr_stream *xdr, const char *string, + const u32 maxstrlen) { - dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", - rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); - *p++ = htonl(rpcb->r_prog); - *p++ = htonl(rpcb->r_vers); + u32 len; + __be32 *p; - p = xdr_encode_string(p, rpcb->r_netid); - p = xdr_encode_string(p, rpcb->r_addr); - p = xdr_encode_string(p, rpcb->r_owner); + if (unlikely(string == NULL)) + return -EIO; + len = strlen(string); + if (unlikely(len > maxstrlen)) + return -EIO; - req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + p = xdr_reserve_space(xdr, sizeof(__be32) + len); + if (unlikely(p == NULL)) + return -EIO; + xdr_encode_opaque(p, string, len); return 0; } -static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, - unsigned short *portp) +static int rpcb_enc_getaddr(struct rpc_rqst *req, __be32 *p, + const struct rpcbind_args *rpcb) { - char *addr; - u32 addr_len; - int c, i, f, first, val; + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; - *portp = 0; - addr_len = ntohl(*p++); + dprintk("RPC: %5u encoding RPCB_%s call (%u, %u, '%s', '%s')\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name, + rpcb->r_prog, rpcb->r_vers, + rpcb->r_netid, rpcb->r_addr); - if (addr_len == 0) { - dprintk("RPC: rpcb_decode_getaddr: " - "service is not registered\n"); - return 0; - } + xdr_init_encode(&xdr, &req->rq_snd_buf, p); - /* - * Simple sanity check. - */ - if (addr_len > RPCBIND_MAXUADDRLEN) - goto out_err; - - /* - * Start at the end and walk backwards until the first dot - * is encountered. When the second dot is found, we have - * both parts of the port number. - */ - addr = (char *)p; - val = 0; - first = 1; - f = 1; - for (i = addr_len - 1; i > 0; i--) { - c = addr[i]; - if (c >= '0' && c <= '9') { - val += (c - '0') * f; - f *= 10; - } else if (c == '.') { - if (first) { - *portp = val; - val = first = 0; - f = 1; - } else { - *portp |= (val << 8); - break; - } - } - } + p = xdr_reserve_space(&xdr, + sizeof(__be32) * (RPCB_program_sz + RPCB_version_sz)); + if (unlikely(p == NULL)) + return -EIO; + *p++ = htonl(rpcb->r_prog); + *p = htonl(rpcb->r_vers); - /* - * Simple sanity check. If we never saw a dot in the reply, - * then this was probably just garbage. - */ - if (first) - goto out_err; + if (encode_rpcb_string(&xdr, rpcb->r_netid, RPCBIND_MAXNETIDLEN)) + return -EIO; + if (encode_rpcb_string(&xdr, rpcb->r_addr, RPCBIND_MAXUADDRLEN)) + return -EIO; + if (encode_rpcb_string(&xdr, rpcb->r_owner, RPCB_MAXOWNERLEN)) + return -EIO; - dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; - -out_err: - dprintk("RPC: rpcbind server returned malformed reply\n"); - return -EIO; } -#define RPCB_program_sz (1u) -#define RPCB_version_sz (1u) -#define RPCB_protocol_sz (1u) -#define RPCB_port_sz (1u) -#define RPCB_boolean_sz (1u) +static int rpcb_dec_getaddr(struct rpc_rqst *req, __be32 *p, + struct rpcbind_args *rpcb) +{ + struct sockaddr_storage address; + struct sockaddr *sap = (struct sockaddr *)&address; + struct rpc_task *task = req->rq_task; + struct xdr_stream xdr; + u32 len; -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) -#define RPCB_addr_sz (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN)) -#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) + rpcb->r_port = 0; -#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \ - RPCB_protocol_sz+RPCB_port_sz -#define RPCB_getaddrargs_sz RPCB_program_sz+RPCB_version_sz+ \ - RPCB_netid_sz+RPCB_addr_sz+ \ - RPCB_ownerstring_sz + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); -#define RPCB_setres_sz RPCB_boolean_sz -#define RPCB_getportres_sz RPCB_port_sz - -/* - * Note that RFC 1833 does not put any size restrictions on the - * address string returned by the remote rpcbind database. - */ -#define RPCB_getaddrres_sz RPCB_addr_sz + p = xdr_inline_decode(&xdr, sizeof(__be32)); + if (unlikely(p == NULL)) + goto out_fail; + len = ntohl(*p); -#define PROC(proc, argtype, restype) \ - [RPCBPROC_##proc] = { \ - .p_proc = RPCBPROC_##proc, \ - .p_encode = (kxdrproc_t) rpcb_encode_##argtype, \ - .p_decode = (kxdrproc_t) rpcb_decode_##restype, \ - .p_arglen = RPCB_##argtype##args_sz, \ - .p_replen = RPCB_##restype##res_sz, \ - .p_statidx = RPCBPROC_##proc, \ - .p_timer = 0, \ - .p_name = #proc, \ + /* + * If the returned universal address is a null string, + * the requested RPC service was not registered. + */ + if (len == 0) { + dprintk("RPC: %5u RPCB reply: program not registered\n", + task->tk_pid); + return 0; } + if (unlikely(len > RPCBIND_MAXUADDRLEN)) + goto out_fail; + + p = xdr_inline_decode(&xdr, len); + if (unlikely(p == NULL)) + goto out_fail; + dprintk("RPC: %5u RPCB_%s reply: %s\n", task->tk_pid, + task->tk_msg.rpc_proc->p_name, (char *)p); + + if (rpc_uaddr2sockaddr((char *)p, len, sap, sizeof(address)) == 0) + goto out_fail; + rpcb->r_port = rpc_get_port(sap); + + return 0; + +out_fail: + dprintk("RPC: %5u malformed RPCB_%s reply\n", + task->tk_pid, task->tk_msg.rpc_proc->p_name); + return -EIO; +} + /* * Not all rpcbind procedures described in RFC 1833 are implemented * since the Linux kernel RPC code requires only these. */ + static struct rpc_procinfo rpcb_procedures2[] = { - PROC(SET, mapping, set), - PROC(UNSET, mapping, set), - PROC(GETPORT, mapping, getport), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETPORT] = { + .p_proc = RPCBPROC_GETPORT, + .p_encode = (kxdrproc_t)rpcb_enc_mapping, + .p_decode = (kxdrproc_t)rpcb_dec_getport, + .p_arglen = RPCB_mappingargs_sz, + .p_replen = RPCB_getportres_sz, + .p_statidx = RPCBPROC_GETPORT, + .p_timer = 0, + .p_name = "GETPORT", + }, }; static struct rpc_procinfo rpcb_procedures3[] = { - PROC(SET, getaddr, set), - PROC(UNSET, getaddr, set), - PROC(GETADDR, getaddr, getaddr), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETADDR] = { + .p_proc = RPCBPROC_GETADDR, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_getaddrres_sz, + .p_statidx = RPCBPROC_GETADDR, + .p_timer = 0, + .p_name = "GETADDR", + }, }; static struct rpc_procinfo rpcb_procedures4[] = { - PROC(SET, getaddr, set), - PROC(UNSET, getaddr, set), - PROC(GETADDR, getaddr, getaddr), - PROC(GETVERSADDR, getaddr, getaddr), + [RPCBPROC_SET] = { + .p_proc = RPCBPROC_SET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_SET, + .p_timer = 0, + .p_name = "SET", + }, + [RPCBPROC_UNSET] = { + .p_proc = RPCBPROC_UNSET, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_set, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_setres_sz, + .p_statidx = RPCBPROC_UNSET, + .p_timer = 0, + .p_name = "UNSET", + }, + [RPCBPROC_GETADDR] = { + .p_proc = RPCBPROC_GETADDR, + .p_encode = (kxdrproc_t)rpcb_enc_getaddr, + .p_decode = (kxdrproc_t)rpcb_dec_getaddr, + .p_arglen = RPCB_getaddrargs_sz, + .p_replen = RPCB_getaddrres_sz, + .p_statidx = RPCBPROC_GETADDR, + .p_timer = 0, + .p_name = "GETADDR", + }, }; static struct rpcb_info rpcb_next_version[] = { diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 5c865e2d299e..6caffa34ac01 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -171,6 +171,11 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); +} + static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr); static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -289,7 +294,7 @@ struct cache_detail ip_map_cache = { .hash_table = ip_table, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_request = ip_map_request, + .cache_upcall = ip_map_upcall, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, @@ -523,6 +528,11 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } +static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) +{ + return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); +} + static struct unix_gid *unix_gid_lookup(uid_t uid); extern struct cache_detail unix_gid_cache; @@ -622,7 +632,7 @@ struct cache_detail unix_gid_cache = { .hash_table = gid_table, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_request = unix_gid_request, + .cache_upcall = unix_gid_upcall, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, diff --git a/net/sunrpc/timer.c b/net/sunrpc/timer.c index 31becbf09263..dd824341c349 100644 --- a/net/sunrpc/timer.c +++ b/net/sunrpc/timer.c @@ -25,8 +25,13 @@ #define RPC_RTO_INIT (HZ/5) #define RPC_RTO_MIN (HZ/10) -void -rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) +/** + * rpc_init_rtt - Initialize an RPC RTT estimator context + * @rt: context to initialize + * @timeo: initial timeout value, in jiffies + * + */ +void rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) { unsigned long init = 0; unsigned i; @@ -43,12 +48,16 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) } EXPORT_SYMBOL_GPL(rpc_init_rtt); -/* +/** + * rpc_update_rtt - Update an RPC RTT estimator context + * @rt: context to update + * @timer: timer array index (request type) + * @m: recent actual RTT, in jiffies + * * NB: When computing the smoothed RTT and standard deviation, * be careful not to produce negative intermediate results. */ -void -rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) +void rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) { long *srtt, *sdrtt; @@ -79,21 +88,25 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) } EXPORT_SYMBOL_GPL(rpc_update_rtt); -/* - * Estimate rto for an nfs rpc sent via. an unreliable datagram. - * Use the mean and mean deviation of rtt for the appropriate type of rpc - * for the frequent rpcs and a default for the others. - * The justification for doing "other" this way is that these rpcs - * happen so infrequently that timer est. would probably be stale. - * Also, since many of these rpcs are - * non-idempotent, a conservative timeout is desired. +/** + * rpc_calc_rto - Provide an estimated timeout value + * @rt: context to use for calculation + * @timer: timer array index (request type) + * + * Estimate RTO for an NFS RPC sent via an unreliable datagram. Use + * the mean and mean deviation of RTT for the appropriate type of RPC + * for frequently issued RPCs, and a fixed default for the others. + * + * The justification for doing "other" this way is that these RPCs + * happen so infrequently that timer estimation would probably be + * stale. Also, since many of these RPCs are non-idempotent, a + * conservative timeout is desired. + * * getattr, lookup, * read, write, commit - A+4D * other - timeo */ - -unsigned long -rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) +unsigned long rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) { unsigned long res; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 1dd6123070e9..9a63f669ece4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -168,47 +168,25 @@ static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ static void xprt_rdma_format_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) + struct sockaddr *sap = (struct sockaddr *) &rpcx_to_rdmad(xprt).addr; - char *buf; + struct sockaddr_in *sin = (struct sockaddr_in *)sap; + char buf[64]; - buf = kzalloc(20, GFP_KERNEL); - if (buf) - snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%u", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_PORT] = buf; + (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - buf = kzalloc(48, GFP_KERNEL); - if (buf) - snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), "rdma"); - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; + (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", + NIPQUAD(sin->sin_addr.s_addr)); + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); + + (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); /* netid */ xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 83c73c4d017a..62438f3a914d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -248,8 +248,8 @@ struct sock_xprt { * Connection of transports */ struct delayed_work connect_worker; - struct sockaddr_storage addr; - unsigned short port; + struct sockaddr_storage srcaddr; + unsigned short srcport; /* * UDP socket buffer size parameters @@ -296,117 +296,60 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) return (struct sockaddr_in6 *) &xprt->addr; } -static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt, - const char *protocol, - const char *netid) +static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = xs_addr_in(xprt); - char *buf; + struct sockaddr *sap = xs_addr(xprt); + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + char buf[128]; - buf = kzalloc(20, GFP_KERNEL); - if (buf) { - snprintf(buf, 20, "%pI4", &addr->sin_addr.s_addr); - } - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%u", - ntohs(addr->sin_port)); - } - xprt->address_strings[RPC_DISPLAY_PORT] = buf; - - xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; - - buf = kzalloc(48, GFP_KERNEL); - if (buf) { - snprintf(buf, 48, "addr=%pI4 port=%u proto=%s", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port), - protocol); - } - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) { - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - } - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) { - snprintf(buf, 30, "%pI4.%u.%u", - &addr->sin_addr.s_addr, - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); + switch (sap->sa_family) { + case AF_INET: + sin = xs_addr_in(xprt); + (void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x", + NIPQUAD(sin->sin_addr.s_addr)); + break; + case AF_INET6: + sin6 = xs_addr_in6(xprt); + (void)snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); + break; + default: + BUG(); } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - - xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } -static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt, - const char *protocol, - const char *netid) +static void xs_format_common_peer_ports(struct rpc_xprt *xprt) { - struct sockaddr_in6 *addr = xs_addr_in6(xprt); - char *buf; + struct sockaddr *sap = xs_addr(xprt); + char buf[128]; - buf = kzalloc(40, GFP_KERNEL); - if (buf) { - snprintf(buf, 40, "%pI6",&addr->sin6_addr); - } - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + (void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%u", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_PORT] = buf; + (void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); +} +static void xs_format_peer_addresses(struct rpc_xprt *xprt, + const char *protocol, + const char *netid) +{ xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; + xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xs_format_common_peer_addresses(xprt); + xs_format_common_peer_ports(xprt); +} - buf = kzalloc(64, GFP_KERNEL); - if (buf) { - snprintf(buf, 64, "addr=%pI6 port=%u proto=%s", - &addr->sin6_addr, - ntohs(addr->sin6_port), - protocol); - } - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(36, GFP_KERNEL); - if (buf) - snprintf(buf, 36, "%pi6", &addr->sin6_addr); - - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(50, GFP_KERNEL); - if (buf) { - snprintf(buf, 50, "%pI6.%u.%u", - &addr->sin6_addr, - ntohs(addr->sin6_port) >> 8, - ntohs(addr->sin6_port) & 0xff); - } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; +static void xs_update_peer_port(struct rpc_xprt *xprt) +{ + kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - xprt->address_strings[RPC_DISPLAY_NETID] = netid; + xs_format_common_peer_ports(xprt); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) @@ -1587,25 +1530,15 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr *addr = xs_addr(xprt); - dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - switch (addr->sa_family) { - case AF_INET: - ((struct sockaddr_in *)addr)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); - break; - default: - BUG(); - } + rpc_set_port(xs_addr(xprt), port); + xs_update_peer_port(xprt); } static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) { - unsigned short port = transport->port; + unsigned short port = transport->srcport; if (port == 0 && transport->xprt.resvport) port = xs_get_random_port(); @@ -1614,8 +1547,8 @@ static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) { - if (transport->port != 0) - transport->port = 0; + if (transport->srcport != 0) + transport->srcport = 0; if (!transport->xprt.resvport) return 0; if (port <= xprt_min_resvport || port > xprt_max_resvport) @@ -1633,7 +1566,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in *)&transport->addr; + sa = (struct sockaddr_in *)&transport->srcaddr; myaddr.sin_addr = sa->sin_addr; do { myaddr.sin_port = htons(port); @@ -1642,7 +1575,7 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->port = port; + transport->srcport = port; break; } last = port; @@ -1666,7 +1599,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) unsigned short port = xs_get_srcport(transport, sock); unsigned short last; - sa = (struct sockaddr_in6 *)&transport->addr; + sa = (struct sockaddr_in6 *)&transport->srcaddr; myaddr.sin6_addr = sa->sin6_addr; do { myaddr.sin6_port = htons(port); @@ -1675,7 +1608,7 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) if (port == 0) break; if (err == 0) { - transport->port = port; + transport->srcport = port; break; } last = port; @@ -1780,8 +1713,11 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1822,8 +1758,11 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); xs_udp_finish_connecting(xprt, sock); status = 0; @@ -1948,8 +1887,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, goto out_eagain; } - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + dprintk("RPC: worker connecting xprt %p via %s to " + "%s (port %s)\n", xprt, + xprt->address_strings[RPC_DISPLAY_PROTO], + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT]); status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", @@ -2120,7 +2062,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", - transport->port, + transport->srcport, xprt->stat.bind_count, xprt->stat.sends, xprt->stat.recvs, @@ -2144,7 +2086,7 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) idle_time = (long)(jiffies - xprt->last_used) / HZ; seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", - transport->port, + transport->srcport, xprt->stat.bind_count, xprt->stat.connect_count, xprt->stat.connect_time, @@ -2223,7 +2165,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, memcpy(&xprt->addr, args->dstaddr, args->addrlen); xprt->addrlen = args->addrlen; if (args->srcaddr) - memcpy(&new->addr, args->srcaddr, args->addrlen); + memcpy(&new->srcaddr, args->srcaddr, args->addrlen); return xprt; } @@ -2272,7 +2214,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); + xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) @@ -2280,15 +2222,22 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); + xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); if (try_module_get(THIS_MODULE)) return xprt; @@ -2337,23 +2286,33 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) if (((struct sockaddr_in *)addr)->sin_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_connect_worker4); + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); break; case AF_INET6: if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_tcp_connect_worker6); + xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); break; default: kfree(xprt); return ERR_PTR(-EAFNOSUPPORT); } - dprintk("RPC: set up transport to address %s\n", - xprt->address_strings[RPC_DISPLAY_ALL]); + if (xprt_bound(xprt)) + dprintk("RPC: set up xprt to %s (port %s) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PORT], + xprt->address_strings[RPC_DISPLAY_PROTO]); + else + dprintk("RPC: set up xprt to %s (autobind) via %s\n", + xprt->address_strings[RPC_DISPLAY_ADDR], + xprt->address_strings[RPC_DISPLAY_PROTO]); + if (try_module_get(THIS_MODULE)) return xprt; @@ -2412,3 +2371,55 @@ void cleanup_socket_xprt(void) xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); } + +static int param_set_uint_minmax(const char *val, struct kernel_param *kp, + unsigned int min, unsigned int max) +{ + unsigned long num; + int ret; + + if (!val) + return -EINVAL; + ret = strict_strtoul(val, 0, &num); + if (ret == -EINVAL || num < min || num > max) + return -EINVAL; + *((unsigned int *)kp->arg) = num; + return 0; +} + +static int param_set_portnr(const char *val, struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_RESVPORT, + RPC_MAX_RESVPORT); +} + +static int param_get_portnr(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_portnr(name, p) \ + __param_check(name, p, unsigned int); + +module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); +module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); + +static int param_set_slot_table_size(const char *val, struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_SLOT_TABLE, + RPC_MAX_SLOT_TABLE); +} + +static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) +{ + return param_get_uint(buffer, kp); +} +#define param_check_slot_table_size(name, p) \ + __param_check(name, p, unsigned int); + +module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, + slot_table_size, 0644); +module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, + slot_table_size, 0644); + |