diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-08 04:39:56 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-02-08 04:39:56 +0300 |
commit | f43574d0ac80d76537e265548a13b1bc46aa751b (patch) | |
tree | 8667f7a2fd0825ebf7a636eca3341a8a866f32b0 | |
parent | 41dcd67e88688afbeb3b2bd23960eed5daec74e7 (diff) | |
parent | 7dc2993a9e51dd2eee955944efec65bef90265b7 (diff) | |
download | linux-f43574d0ac80d76537e265548a13b1bc46aa751b.tar.xz |
Merge tag 'nfs-for-5.6-1' of git://git.linux-nfs.org/projects/anna/linux-nfs
Puyll NFS client updates from Anna Schumaker:
"Stable bugfixes:
- Fix memory leaks and corruption in readdir # v2.6.37+
- Directory page cache needs to be locked when read # v2.6.37+
New features:
- Convert NFS to use the new mount API
- Add "softreval" mount option to let clients use cache if server goes down
- Add a config option to compile without UDP support
- Limit the number of inactive delegations the client can cache at once
- Improved readdir concurrency using iterate_shared()
Other bugfixes and cleanups:
- More 64-bit time conversions
- Add additional diagnostic tracepoints
- Check for holes in swapfiles, and add dependency on CONFIG_SWAP
- Various xprtrdma cleanups to prepare for 5.7's changes
- Several fixes for NFS writeback and commit handling
- Fix acls over krb5i/krb5p mounts
- Recover from premature loss of openstateids
- Fix NFS v3 chacl and chmod bug
- Compare creds using cred_fscmp()
- Use kmemdup_nul() in more places
- Optimize readdir cache page invalidation
- Lease renewal and recovery fixes"
* tag 'nfs-for-5.6-1' of git://git.linux-nfs.org/projects/anna/linux-nfs: (93 commits)
NFSv4.0: nfs4_do_fsinfo() should not do implicit lease renewals
NFSv4: try lease recovery on NFS4ERR_EXPIRED
NFS: Fix memory leaks
nfs: optimise readdir cache page invalidation
NFS: Switch readdir to using iterate_shared()
NFS: Use kmemdup_nul() in nfs_readdir_make_qstr()
NFS: Directory page cache pages need to be locked when read
NFS: Fix memory leaks and corruption in readdir
SUNRPC: Use kmemdup_nul() in rpc_parse_scope_id()
NFS: Replace various occurrences of kstrndup() with kmemdup_nul()
NFSv4: Limit the total number of cached delegations
NFSv4: Add accounting for the number of active delegations held
NFSv4: Try to return the delegation immediately when marked for return on close
NFS: Clear NFS_DELEGATION_RETURN_IF_CLOSED when the delegation is returned
NFSv4: nfs_inode_evict_delegation() should set NFS_DELEGATION_RETURNING
NFS: nfs_find_open_context() should use cred_fscmp()
NFS: nfs_access_get_cached_rcu() should use cred_fscmp()
NFSv4: pnfs_roc() must use cred_fscmp() to compare creds
NFS: remove unused macros
nfs: Return EINVAL rather than ERANGE for mount parse errors
...
63 files changed, 3225 insertions, 2909 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 295a7a21b774..40b6c5ac46c0 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -90,7 +90,7 @@ config NFS_V4 config NFS_SWAP bool "Provide swap over NFS support" default n - depends on NFS_FS + depends on NFS_FS && SWAP select SUNRPC_SWAP help This option enables swapon to work on files located on NFS mounts. @@ -196,3 +196,12 @@ config NFS_DEBUG depends on NFS_FS && SUNRPC_DEBUG select CRC32 default y + +config NFS_DISABLE_UDP_SUPPORT + bool "NFS: Disable NFS UDP protocol support" + depends on NFS_FS + default y + help + Choose Y here to disable the use of NFS over UDP. NFS over UDP + on modern networks (1Gb+) can lead to data corruption caused by + fragmentation during high loads. diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 34cdeaecccf6..2433c3e03cfa 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -9,7 +9,7 @@ CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ io.o direct.o pagelist.o read.o symlink.o unlink.o \ write.o namespace.o mount_clnt.o nfstrace.o \ - export.o sysfs.o + export.o sysfs.o fs_context.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 03a20f5716c7..79ff172eb1c8 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -18,6 +18,7 @@ #include "callback.h" #include "internal.h" #include "nfs4session.h" +#include "nfs4trace.h" #define CB_OP_TAGLEN_MAXSZ (512) #define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status @@ -946,9 +947,13 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) if (hdr_arg.minorversion == 0) { cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); - if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) { - if (cps.clp) - nfs_put_client(cps.clp); + if (!cps.clp) { + trace_nfs_cb_no_clp(rqstp->rq_xid, hdr_arg.cb_ident); + goto out_invalidcred; + } + if (!check_gss_callback_principal(cps.clp, rqstp)) { + trace_nfs_cb_badprinc(rqstp->rq_xid, hdr_arg.cb_ident); + nfs_put_client(cps.clp); goto out_invalidcred; } } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 02110a30a49e..989c30c98511 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -474,6 +474,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = to->to_initval; to->to_exponential = 0; break; +#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT case XPRT_TRANSPORT_UDP: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_UDP_RETRANS; @@ -484,6 +485,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_exponential = 1; break; +#endif default: BUG(); } @@ -580,8 +582,10 @@ static int nfs_start_lockd(struct nfs_server *server) default: nlm_init.protocol = IPPROTO_TCP; break; +#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT case XPRT_TRANSPORT_UDP: nlm_init.protocol = IPPROTO_UDP; +#endif } host = nlmclnt_init(&nlm_init); @@ -658,28 +662,28 @@ EXPORT_SYMBOL_GPL(nfs_init_client); * Create a version 2 or 3 client */ static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data, - struct nfs_subversion *nfs_mod) + const struct fs_context *fc) { + const struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; struct nfs_client_initdata cl_init = { - .hostname = data->nfs_server.hostname, - .addr = (const struct sockaddr *)&data->nfs_server.address, - .addrlen = data->nfs_server.addrlen, - .nfs_mod = nfs_mod, - .proto = data->nfs_server.protocol, - .net = data->net, + .hostname = ctx->nfs_server.hostname, + .addr = (const struct sockaddr *)&ctx->nfs_server.address, + .addrlen = ctx->nfs_server.addrlen, + .nfs_mod = ctx->nfs_mod, + .proto = ctx->nfs_server.protocol, + .net = fc->net_ns, .timeparms = &timeparms, .cred = server->cred, - .nconnect = data->nfs_server.nconnect, + .nconnect = ctx->nfs_server.nconnect, .init_flags = (1UL << NFS_CS_REUSEPORT), }; struct nfs_client *clp; int error; - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); - if (data->flags & NFS_MOUNT_NORESVPORT) + nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, + ctx->timeo, ctx->retrans); + if (ctx->flags & NFS_MOUNT_NORESVPORT) set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); /* Allocate or find a client reference we can use */ @@ -690,46 +694,46 @@ static int nfs_init_server(struct nfs_server *server, server->nfs_client = clp; /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->options = data->options; + server->flags = ctx->flags; + server->options = ctx->options; server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP| NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); + if (ctx->rsize) + server->rsize = nfs_block_size(ctx->rsize, NULL); + if (ctx->wsize) + server->wsize = nfs_block_size(ctx->wsize, NULL); - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; + server->acregmin = ctx->acregmin * HZ; + server->acregmax = ctx->acregmax * HZ; + server->acdirmin = ctx->acdirmin * HZ; + server->acdirmax = ctx->acdirmax * HZ; /* Start lockd here, before we might error out */ error = nfs_start_lockd(server); if (error < 0) goto error; - server->port = data->nfs_server.port; - server->auth_info = data->auth_info; + server->port = ctx->nfs_server.port; + server->auth_info = ctx->auth_info; error = nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + ctx->selected_flavor); if (error < 0) goto error; /* Preserve the values of mount_server-related mount options */ - if (data->mount_server.addrlen) { - memcpy(&server->mountd_address, &data->mount_server.address, - data->mount_server.addrlen); - server->mountd_addrlen = data->mount_server.addrlen; + if (ctx->mount_server.addrlen) { + memcpy(&server->mountd_address, &ctx->mount_server.address, + ctx->mount_server.addrlen); + server->mountd_addrlen = ctx->mount_server.addrlen; } - server->mountd_version = data->mount_server.version; - server->mountd_port = data->mount_server.port; - server->mountd_protocol = data->mount_server.protocol; + server->mountd_version = ctx->mount_server.version; + server->mountd_port = ctx->mount_server.port; + server->mountd_protocol = ctx->mount_server.protocol; - server->namelen = data->namlen; + server->namelen = ctx->namlen; return 0; error: @@ -951,9 +955,9 @@ EXPORT_SYMBOL_GPL(nfs_free_server); * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs_create_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_server *server; struct nfs_fattr *fattr; int error; @@ -970,18 +974,18 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, goto error; /* Get a client representation */ - error = nfs_init_server(server, mount_info->parsed, nfs_mod); + error = nfs_init_server(server, fc); if (error < 0) goto error; /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr); + error = nfs_probe_fsinfo(server, ctx->mntfh, fattr); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; - if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS)) + if (!(ctx->flags & NFS_MOUNT_NORDIRPLUS)) server->caps |= NFS_CAP_READDIRPLUS; } else { if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) @@ -989,8 +993,8 @@ struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info, } if (!(fattr->valid & NFS_ATTR_FATTR)) { - error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, - fattr, NULL, NULL); + error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh, + fattr, NULL, NULL); if (error < 0) { dprintk("nfs_create_server: getattr error = %d\n", -error); goto error; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index fe57b2b5314a..4a841071d8a7 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -25,13 +25,32 @@ #include "internal.h" #include "nfs4trace.h" -static void nfs_free_delegation(struct nfs_delegation *delegation) +#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U) + +static atomic_long_t nfs_active_delegations; +static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK; + +static void __nfs_free_delegation(struct nfs_delegation *delegation) { put_cred(delegation->cred); delegation->cred = NULL; kfree_rcu(delegation, rcu); } +static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation) +{ + if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) { + delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; + atomic_long_dec(&nfs_active_delegations); + } +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + nfs_mark_delegation_revoked(delegation); + __nfs_free_delegation(delegation); +} + /** * nfs_mark_delegation_referenced - set delegation's REFERENCED flag * @delegation: delegation to process @@ -343,7 +362,8 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation, delegation->stateid.seqid = update->stateid.seqid; smp_wmb(); delegation->type = update->type; - clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags); + if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) + atomic_long_inc(&nfs_active_delegations); } } @@ -423,6 +443,8 @@ add_new: rcu_assign_pointer(nfsi->delegation, delegation); delegation = NULL; + atomic_long_inc(&nfs_active_delegations); + trace_nfs4_set_delegation(inode, type); spin_lock(&inode->i_lock); @@ -432,7 +454,7 @@ add_new: out: spin_unlock(&clp->cl_lock); if (delegation != NULL) - nfs_free_delegation(delegation); + __nfs_free_delegation(delegation); if (freeme != NULL) { nfs_do_return_delegation(inode, freeme, 0); nfs_free_delegation(freeme); @@ -479,7 +501,7 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags)) ret = true; - if (test_and_clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) && !ret) { + else if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags)) { struct inode *inode; spin_lock(&delegation->lock); @@ -488,6 +510,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation) ret = true; spin_unlock(&delegation->lock); } + if (ret) + clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) || test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) ret = false; @@ -607,6 +631,7 @@ void nfs_inode_evict_delegation(struct inode *inode) delegation = nfs_inode_detach_delegation(inode); if (delegation != NULL) { + set_bit(NFS_DELEGATION_RETURNING, &delegation->flags); set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); nfs_do_return_delegation(inode, delegation, 1); nfs_free_delegation(delegation); @@ -637,6 +662,40 @@ int nfs4_inode_return_delegation(struct inode *inode) } /** + * nfs_inode_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process + * + * This routine is called on file close in order to determine if the + * inode delegation needs to be returned immediately. + */ +void nfs4_inode_return_delegation_on_close(struct inode *inode) +{ + struct nfs_delegation *delegation; + struct nfs_delegation *ret = NULL; + + if (!inode) + return; + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (!delegation) + goto out; + if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) || + atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) { + spin_lock(&delegation->lock); + if (delegation->inode && + list_empty(&NFS_I(inode)->open_files) && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + clear_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); + ret = delegation; + } + spin_unlock(&delegation->lock); + } +out: + rcu_read_unlock(); + nfs_end_delegation_return(inode, ret, 0); +} + +/** * nfs4_inode_make_writeable * @inode: pointer to inode * @@ -760,13 +819,6 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl rcu_read_unlock(); } -static void nfs_mark_delegation_revoked(struct nfs_server *server, - struct nfs_delegation *delegation) -{ - set_bit(NFS_DELEGATION_REVOKED, &delegation->flags); - delegation->stateid.type = NFS4_INVALID_STATEID_TYPE; -} - static void nfs_revoke_delegation(struct inode *inode, const nfs4_stateid *stateid) { @@ -794,7 +846,7 @@ static void nfs_revoke_delegation(struct inode *inode, } spin_unlock(&delegation->lock); } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); ret = true; out: rcu_read_unlock(); @@ -833,7 +885,7 @@ void nfs_delegation_mark_returned(struct inode *inode, delegation->stateid.seqid = stateid->seqid; } - nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation); + nfs_mark_delegation_revoked(delegation); out_clear_returning: clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags); @@ -1317,3 +1369,5 @@ out: rcu_read_unlock(); return ret; } + +module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 15d3484be028..31b84604d383 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -42,6 +42,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred, void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, fmode_t type, const nfs4_stateid *stateid, unsigned long pagemod_limit); int nfs4_inode_return_delegation(struct inode *inode); +void nfs4_inode_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_evict_delegation(struct inode *inode); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e180033e35cf..1320288ff9ec 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -58,7 +58,7 @@ static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, - .iterate = nfs_readdir, + .iterate_shared = nfs_readdir, .open = nfs_opendir, .release = nfs_closedir, .fsync = nfs_fsync_dir, @@ -162,6 +162,17 @@ typedef struct { bool eof; } nfs_readdir_descriptor_t; +static +void nfs_readdir_init_array(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); +} + /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -174,6 +185,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); + array->size = 0; kunmap_atomic(array); } @@ -186,7 +198,7 @@ static int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) { string->len = len; - string->name = kmemdup(name, len, GFP_KERNEL); + string->name = kmemdup_nul(name, len, GFP_KERNEL); if (string->name == NULL) return -ENOMEM; /* @@ -437,7 +449,8 @@ void nfs_force_use_readdirplus(struct inode *dir) if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && !list_empty(&nfsi->open_files)) { set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); - invalidate_mapping_pages(dir->i_mapping, 0, -1); + invalidate_mapping_pages(dir->i_mapping, + nfsi->page_index + 1, -1); } } @@ -610,6 +623,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + nfs_readdir_init_array(page); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -626,8 +641,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } array = kmap(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) @@ -682,6 +695,7 @@ int nfs_readdir_filler(void *data, struct page* page) unlock_page(page); return 0; error: + nfs_readdir_clear_array(page); unlock_page(page); return ret; } @@ -689,8 +703,6 @@ int nfs_readdir_filler(void *data, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -704,19 +716,32 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) /* * Returns 0 if desc->dir_cookie was found on page desc->page_index + * and locks the page to prevent removal from the page cache. */ static -int find_cache_page(nfs_readdir_descriptor_t *desc) +int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { + struct inode *inode = file_inode(desc->file); + struct nfs_inode *nfsi = NFS_I(inode); int res; desc->page = get_cache_page(desc); if (IS_ERR(desc->page)) return PTR_ERR(desc->page); - - res = nfs_readdir_search_array(desc); + res = lock_page_killable(desc->page); if (res != 0) - cache_page_release(desc); + goto error; + res = -EAGAIN; + if (desc->page->mapping != NULL) { + res = nfs_readdir_search_array(desc); + if (res == 0) { + nfsi->page_index = desc->page_index; + return 0; + } + } + unlock_page(desc->page); +error: + cache_page_release(desc); return res; } @@ -731,7 +756,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) desc->last_cookie = 0; } do { - res = find_cache_page(desc); + res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } @@ -770,7 +795,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = true; kunmap(desc->page); - cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -816,13 +840,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) status = nfs_do_filldir(desc); + out_release: + nfs_readdir_clear_array(desc->page); + cache_page_release(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; - out_release: - cache_page_release(desc); - goto out; } /* The file offset position represents the dirent entry number. A @@ -887,6 +911,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); + unlock_page(desc->page); + cache_page_release(desc); if (res < 0) break; } while (!desc->eof); @@ -1142,10 +1168,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, if (fhandle == NULL || fattr == NULL || IS_ERR(label)) goto out; - ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); if (ret < 0) { - if (ret == -ESTALE || ret == -ENOENT) + switch (ret) { + case -ESTALE: + case -ENOENT: ret = 0; + break; + case -ETIMEDOUT: + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + ret = 1; + } goto out; } ret = 0; @@ -1408,7 +1441,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in goto out; trace_nfs_lookup_enter(dir, dentry, flags); - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; if (error < 0) { @@ -1683,7 +1716,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, d_drop(dentry); if (fhandle->size == 0) { - error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL); + error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL); if (error) goto out_error; } @@ -2312,11 +2345,11 @@ static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, s /* Found an entry, is our attribute cache valid? */ if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) break; + if (!retry) + break; err = -ECHILD; if (!may_block) goto out; - if (!retry) - goto out_zap; spin_unlock(&inode->i_lock); err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); if (err) @@ -2353,7 +2386,7 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre lh = rcu_dereference(nfsi->access_cache_entry_lru.prev); cache = list_entry(lh, struct nfs_access_entry, lru); if (lh == &nfsi->access_cache_entry_lru || - cred != cache->cred) + cred_fscmp(cred, cache->cred) != 0) cache = NULL; if (cache == NULL) goto out; @@ -2476,7 +2509,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; - int cache_mask; + int cache_mask = -1; int status; trace_nfs_access_enter(inode); @@ -2515,7 +2548,7 @@ out_cached: if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: - trace_nfs_access_exit(inode, status); + trace_nfs_access_exit(inode, mask, cache_mask, status); return status; } diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 040a50fd9bf3..b768a0b42e82 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -245,10 +245,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, data->ds_commit_index); /* verifier not set so always fail */ - if (verfp->committed < 0) + if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) return 1; - return nfs_direct_cmp_verf(verfp, &data->verf); + return nfs_direct_cmp_verf(verfp, data->res.verf); } /** @@ -824,7 +824,8 @@ static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) dreq->flags = NFS_ODIRECT_RESCHED_WRITES; /* fake unstable write to let common nfs resend pages */ hdr->verf.committed = NFS_UNSTABLE; - hdr->good_bytes = hdr->args.count; + hdr->good_bytes = hdr->args.offset + hdr->args.count - + hdr->io_start; } spin_unlock(&dreq->lock); } diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index aec769a500a1..89bd5581f317 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -93,7 +93,7 @@ static void nfs_dns_ent_init(struct cache_head *cnew, key = container_of(ckey, struct nfs_dns_ent, h); kfree(new->hostname); - new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL); + new->hostname = kmemdup_nul(key->hostname, key->namelen, GFP_KERNEL); if (new->hostname) { new->namelen = key->namelen; nfs_dns_ent_update(cnew, ckey); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8eb731d9be3e..f96367a2463e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -204,44 +204,39 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap); static int nfs_file_fsync_commit(struct file *file, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); - int do_resend, status; - int ret = 0; + int ret; dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - status = nfs_commit_inode(inode, FLUSH_SYNC); - if (status == 0) - status = file_check_and_advance_wb_err(file); - if (status < 0) { - ret = status; - goto out; - } - do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); - if (do_resend) - ret = -EAGAIN; -out: - return ret; + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + return ret; + return file_check_and_advance_wb_err(file); } int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - int ret; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file_inode(file); + int ret; trace_nfs_fsync_enter(inode); - do { + for (;;) { ret = file_write_and_wait_range(file, start, end); if (ret != 0) break; ret = nfs_file_fsync_commit(file, datasync); - if (!ret) - ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; + ret = pnfs_sync_inode(inode, !!datasync); + if (ret != 0) + break; + if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags)) + break; /* * If nfs_file_fsync_commit detected a server reboot, then * resend all dirty pages that might have been covered by @@ -249,7 +244,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ start = 0; end = LLONG_MAX; - } while (ret == -EAGAIN); + } trace_nfs_fsync_exit(inode, ret); return ret; @@ -489,7 +484,19 @@ static int nfs_launder_page(struct page *page) static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { + unsigned long blocks; + long long isize; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); + struct inode *inode = file->f_mapping->host; + + spin_lock(&inode->i_lock); + blocks = inode->i_blocks; + isize = inode->i_size; + spin_unlock(&inode->i_lock); + if (blocks*512 < isize) { + pr_warn("swap activate: swapfile has holes\n"); + return -EINVAL; + } *span = sis->pages; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 5657b7f2611f..bb9148b83166 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1266,9 +1266,10 @@ static int ff_layout_async_handle_error(struct rpc_task *task, static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, int idx, u64 offset, u64 length, - u32 status, int opnum, int error) + u32 *op_status, int opnum, int error) { struct nfs4_ff_layout_mirror *mirror; + u32 status = *op_status; int err; if (status == 0) { @@ -1286,10 +1287,10 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ENOBUFS: case -EPIPE: case -EPERM: - status = NFS4ERR_NXIO; + *op_status = status = NFS4ERR_NXIO; break; case -EACCES: - status = NFS4ERR_ACCESS; + *op_status = status = NFS4ERR_ACCESS; break; default: return; @@ -1321,16 +1322,19 @@ static int ff_layout_read_done_cb(struct rpc_task *task, int new_idx = hdr->pgio_mirror_idx; int err; - trace_nfs4_pnfs_read(hdr, task->tk_status); - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, - hdr->res.op_status, OP_READ, + &hdr->res.op_status, OP_READ, task->tk_status); + trace_ff_layout_read_error(hdr); + } + err = ff_layout_async_handle_error(task, hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + trace_nfs4_pnfs_read(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { @@ -1494,16 +1498,19 @@ static int ff_layout_write_done_cb(struct rpc_task *task, loff_t end_offs = 0; int err; - trace_nfs4_pnfs_write(hdr, task->tk_status); - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, hdr->args.offset, hdr->args.count, - hdr->res.op_status, OP_WRITE, + &hdr->res.op_status, OP_WRITE, task->tk_status); + trace_ff_layout_write_error(hdr); + } + err = ff_layout_async_handle_error(task, hdr->args.context->state, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + trace_nfs4_pnfs_write(hdr, err); clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { @@ -1537,15 +1544,18 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, { int err; - trace_nfs4_pnfs_commit_ds(data, task->tk_status); - if (task->tk_status < 0) + if (task->tk_status < 0) { ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, data->args.offset, data->args.count, - data->res.op_status, OP_COMMIT, + &data->res.op_status, OP_COMMIT, task->tk_status); + trace_ff_layout_commit_error(data); + } + err = ff_layout_async_handle_error(task, NULL, data->ds_clp, data->lseg, data->ds_commit_index); + trace_nfs4_pnfs_commit_ds(data, err); switch (err) { case -NFS4ERR_RESET_TO_PNFS: pnfs_generic_prepare_to_resend_writes(data); diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c new file mode 100644 index 000000000000..2c6dc1b6cc92 --- /dev/null +++ b/fs/nfs/fs_context.c @@ -0,0 +1,1437 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/fs/nfs/fs_context.c + * + * Copyright (C) 1992 Rick Sladkey + * Conversion to new mount api Copyright (C) David Howells + * + * NFS mount handling. + * + * Split from fs/nfs/super.c by David Howells <dhowells@redhat.com> + */ + +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/nfs_fs.h> +#include <linux/nfs_mount.h> +#include <linux/nfs4_mount.h> +#include "nfs.h" +#include "internal.h" + +#define NFSDBG_FACILITY NFSDBG_MOUNT + +#if IS_ENABLED(CONFIG_NFS_V3) +#define NFS_DEFAULT_VERSION 3 +#else +#define NFS_DEFAULT_VERSION 2 +#endif + +#define NFS_MAX_CONNECTIONS 16 + +enum nfs_param { + Opt_ac, + Opt_acdirmax, + Opt_acdirmin, + Opt_acl, + Opt_acregmax, + Opt_acregmin, + Opt_actimeo, + Opt_addr, + Opt_bg, + Opt_bsize, + Opt_clientaddr, + Opt_cto, + Opt_fg, + Opt_fscache, + Opt_hard, + Opt_intr, + Opt_local_lock, + Opt_lock, + Opt_lookupcache, + Opt_migration, + Opt_minorversion, + Opt_mountaddr, + Opt_mounthost, + Opt_mountport, + Opt_mountproto, + Opt_mountvers, + Opt_namelen, + Opt_nconnect, + Opt_port, + Opt_posix, + Opt_proto, + Opt_rdirplus, + Opt_rdma, + Opt_resvport, + Opt_retrans, + Opt_retry, + Opt_rsize, + Opt_sec, + Opt_sharecache, + Opt_sloppy, + Opt_soft, + Opt_softerr, + Opt_softreval, + Opt_source, + Opt_tcp, + Opt_timeo, + Opt_udp, + Opt_v, + Opt_vers, + Opt_wsize, +}; + +static const struct fs_parameter_spec nfs_param_specs[] = { + fsparam_flag_no("ac", Opt_ac), + fsparam_u32 ("acdirmax", Opt_acdirmax), + fsparam_u32 ("acdirmin", Opt_acdirmin), + fsparam_flag_no("acl", Opt_acl), + fsparam_u32 ("acregmax", Opt_acregmax), + fsparam_u32 ("acregmin", Opt_acregmin), + fsparam_u32 ("actimeo", Opt_actimeo), + fsparam_string("addr", Opt_addr), + fsparam_flag ("bg", Opt_bg), + fsparam_u32 ("bsize", Opt_bsize), + fsparam_string("clientaddr", Opt_clientaddr), + fsparam_flag_no("cto", Opt_cto), + fsparam_flag ("fg", Opt_fg), + __fsparam(fs_param_is_string, "fsc", Opt_fscache, + fs_param_neg_with_no|fs_param_v_optional), + fsparam_flag ("hard", Opt_hard), + __fsparam(fs_param_is_flag, "intr", Opt_intr, + fs_param_neg_with_no|fs_param_deprecated), + fsparam_enum ("local_lock", Opt_local_lock), + fsparam_flag_no("lock", Opt_lock), + fsparam_enum ("lookupcache", Opt_lookupcache), + fsparam_flag_no("migration", Opt_migration), + fsparam_u32 ("minorversion", Opt_minorversion), + fsparam_string("mountaddr", Opt_mountaddr), + fsparam_string("mounthost", Opt_mounthost), + fsparam_u32 ("mountport", Opt_mountport), + fsparam_string("mountproto", Opt_mountproto), + fsparam_u32 ("mountvers", Opt_mountvers), + fsparam_u32 ("namlen", Opt_namelen), + fsparam_u32 ("nconnect", Opt_nconnect), + fsparam_string("nfsvers", Opt_vers), + fsparam_u32 ("port", Opt_port), + fsparam_flag_no("posix", Opt_posix), + fsparam_string("proto", Opt_proto), + fsparam_flag_no("rdirplus", Opt_rdirplus), + fsparam_flag ("rdma", Opt_rdma), + fsparam_flag_no("resvport", Opt_resvport), + fsparam_u32 ("retrans", Opt_retrans), + fsparam_string("retry", Opt_retry), + fsparam_u32 ("rsize", Opt_rsize), + fsparam_string("sec", Opt_sec), + fsparam_flag_no("sharecache", Opt_sharecache), + fsparam_flag ("sloppy", Opt_sloppy), + fsparam_flag ("soft", Opt_soft), + fsparam_flag ("softerr", Opt_softerr), + fsparam_flag ("softreval", Opt_softreval), + fsparam_string("source", Opt_source), + fsparam_flag ("tcp", Opt_tcp), + fsparam_u32 ("timeo", Opt_timeo), + fsparam_flag ("udp", Opt_udp), + fsparam_flag ("v2", Opt_v), + fsparam_flag ("v3", Opt_v), + fsparam_flag ("v4", Opt_v), + fsparam_flag ("v4.0", Opt_v), + fsparam_flag ("v4.1", Opt_v), + fsparam_flag ("v4.2", Opt_v), + fsparam_string("vers", Opt_vers), + fsparam_u32 ("wsize", Opt_wsize), + {} +}; + +enum { + Opt_local_lock_all, + Opt_local_lock_flock, + Opt_local_lock_none, + Opt_local_lock_posix, +}; + +enum { + Opt_lookupcache_all, + Opt_lookupcache_none, + Opt_lookupcache_positive, +}; + +static const struct fs_parameter_enum nfs_param_enums[] = { + { Opt_local_lock, "all", Opt_local_lock_all }, + { Opt_local_lock, "flock", Opt_local_lock_flock }, + { Opt_local_lock, "none", Opt_local_lock_none }, + { Opt_local_lock, "posix", Opt_local_lock_posix }, + { Opt_lookupcache, "all", Opt_lookupcache_all }, + { Opt_lookupcache, "none", Opt_lookupcache_none }, + { Opt_lookupcache, "pos", Opt_lookupcache_positive }, + { Opt_lookupcache, "positive", Opt_lookupcache_positive }, + {} +}; + +static const struct fs_parameter_description nfs_fs_parameters = { + .name = "nfs", + .specs = nfs_param_specs, + .enums = nfs_param_enums, +}; + +enum { + Opt_vers_2, + Opt_vers_3, + Opt_vers_4, + Opt_vers_4_0, + Opt_vers_4_1, + Opt_vers_4_2, +}; + +static const struct constant_table nfs_vers_tokens[] = { + { "2", Opt_vers_2 }, + { "3", Opt_vers_3 }, + { "4", Opt_vers_4 }, + { "4.0", Opt_vers_4_0 }, + { "4.1", Opt_vers_4_1 }, + { "4.2", Opt_vers_4_2 }, +}; + +enum { + Opt_xprt_rdma, + Opt_xprt_rdma6, + Opt_xprt_tcp, + Opt_xprt_tcp6, + Opt_xprt_udp, + Opt_xprt_udp6, + nr__Opt_xprt +}; + +static const struct constant_table nfs_xprt_protocol_tokens[nr__Opt_xprt] = { + { "rdma", Opt_xprt_rdma }, + { "rdma6", Opt_xprt_rdma6 }, + { "tcp", Opt_xprt_tcp }, + { "tcp6", Opt_xprt_tcp6 }, + { "udp", Opt_xprt_udp }, + { "udp6", Opt_xprt_udp6 }, +}; + +enum { + Opt_sec_krb5, + Opt_sec_krb5i, + Opt_sec_krb5p, + Opt_sec_lkey, + Opt_sec_lkeyi, + Opt_sec_lkeyp, + Opt_sec_none, + Opt_sec_spkm, + Opt_sec_spkmi, + Opt_sec_spkmp, + Opt_sec_sys, + nr__Opt_sec +}; + +static const struct constant_table nfs_secflavor_tokens[] = { + { "krb5", Opt_sec_krb5 }, + { "krb5i", Opt_sec_krb5i }, + { "krb5p", Opt_sec_krb5p }, + { "lkey", Opt_sec_lkey }, + { "lkeyi", Opt_sec_lkeyi }, + { "lkeyp", Opt_sec_lkeyp }, + { "none", Opt_sec_none }, + { "null", Opt_sec_none }, + { "spkm3", Opt_sec_spkm }, + { "spkm3i", Opt_sec_spkmi }, + { "spkm3p", Opt_sec_spkmp }, + { "sys", Opt_sec_sys }, +}; + +/* + * Sanity-check a server address provided by the mount command. + * + * Address family must be initialized, and address must not be + * the ANY address for that family. + */ +static int nfs_verify_server_address(struct sockaddr *addr) +{ + switch (addr->sa_family) { + case AF_INET: { + struct sockaddr_in *sa = (struct sockaddr_in *)addr; + return sa->sin_addr.s_addr != htonl(INADDR_ANY); + } + case AF_INET6: { + struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; + return !ipv6_addr_any(sa); + } + } + + dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); + return 0; +} + +/* + * Sanity check the NFS transport protocol. + * + */ +static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx) +{ + switch (ctx->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + break; + default: + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * For text based NFSv2/v3 mounts, the mount protocol transport default + * settings should depend upon the specified NFS transport. + */ +static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) +{ + nfs_validate_transport_protocol(ctx); + + if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP || + ctx->mount_server.protocol == XPRT_TRANSPORT_TCP) + return; + switch (ctx->nfs_server.protocol) { + case XPRT_TRANSPORT_UDP: + ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case XPRT_TRANSPORT_TCP: + case XPRT_TRANSPORT_RDMA: + ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; + } +} + +/* + * Add 'flavor' to 'auth_info' if not already present. + * Returns true if 'flavor' ends up in the list, false otherwise + */ +static int nfs_auth_info_add(struct fs_context *fc, + struct nfs_auth_info *auth_info, + rpc_authflavor_t flavor) +{ + unsigned int i; + unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); + + /* make sure this flavor isn't already in the list */ + for (i = 0; i < auth_info->flavor_len; i++) { + if (flavor == auth_info->flavors[i]) + return 0; + } + + if (auth_info->flavor_len + 1 >= max_flavor_len) + return nfs_invalf(fc, "NFS: too many sec= flavors"); + + auth_info->flavors[auth_info->flavor_len++] = flavor; + return 0; +} + +/* + * Parse the value of the 'sec=' option. + */ +static int nfs_parse_security_flavors(struct fs_context *fc, + struct fs_parameter *param) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + rpc_authflavor_t pseudoflavor; + char *string = param->string, *p; + int ret; + + dfprintk(MOUNT, "NFS: parsing %s=%s option\n", param->key, param->string); + + while ((p = strsep(&string, ":")) != NULL) { + if (!*p) + continue; + switch (lookup_constant(nfs_secflavor_tokens, p, -1)) { + case Opt_sec_none: + pseudoflavor = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + pseudoflavor = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + pseudoflavor = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + pseudoflavor = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + pseudoflavor = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + pseudoflavor = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + pseudoflavor = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + pseudoflavor = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + pseudoflavor = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + pseudoflavor = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + pseudoflavor = RPC_AUTH_GSS_SPKMP; + break; + default: + return nfs_invalf(fc, "NFS: sec=%s option not recognized", p); + } + + ret = nfs_auth_info_add(fc, &ctx->auth_info, pseudoflavor); + if (ret < 0) + return ret; + } + + return 0; +} + +static int nfs_parse_version_string(struct fs_context *fc, + const char *string) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + + ctx->flags &= ~NFS_MOUNT_VER3; + switch (lookup_constant(nfs_vers_tokens, string, -1)) { + case Opt_vers_2: + ctx->version = 2; + break; + case Opt_vers_3: + ctx->flags |= NFS_MOUNT_VER3; + ctx->version = 3; + break; + case Opt_vers_4: + /* Backward compatibility option. In future, + * the mount program should always supply + * a NFSv4 minor version number. + */ + ctx->version = 4; + break; + case Opt_vers_4_0: + ctx->version = 4; + ctx->minorversion = 0; + break; + case Opt_vers_4_1: + ctx->version = 4; + ctx->minorversion = 1; + break; + case Opt_vers_4_2: + ctx->version = 4; + ctx->minorversion = 2; + break; + default: + return nfs_invalf(fc, "NFS: Unsupported NFS version"); + } + return 0; +} + +/* + * Parse a single mount parameter. + */ +static int nfs_fs_context_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct fs_parse_result result; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + unsigned short protofamily, mountfamily; + unsigned int len; + int ret, opt; + + dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", param->key); + + opt = fs_parse(fc, &nfs_fs_parameters, param, &result); + if (opt < 0) + return ctx->sloppy ? 1 : opt; + + switch (opt) { + case Opt_source: + if (fc->source) + return nfs_invalf(fc, "NFS: Multiple sources not supported"); + fc->source = param->string; + param->string = NULL; + break; + + /* + * boolean options: foo/nofoo + */ + case Opt_soft: + ctx->flags |= NFS_MOUNT_SOFT; + ctx->flags &= ~NFS_MOUNT_SOFTERR; + break; + case Opt_softerr: + ctx->flags |= NFS_MOUNT_SOFTERR | NFS_MOUNT_SOFTREVAL; + ctx->flags &= ~NFS_MOUNT_SOFT; + break; + case Opt_hard: + ctx->flags &= ~(NFS_MOUNT_SOFT | + NFS_MOUNT_SOFTERR | + NFS_MOUNT_SOFTREVAL); + break; + case Opt_softreval: + if (result.negated) + ctx->flags &= ~NFS_MOUNT_SOFTREVAL; + else + ctx->flags &= NFS_MOUNT_SOFTREVAL; + break; + case Opt_posix: + if (result.negated) + ctx->flags &= ~NFS_MOUNT_POSIX; + else + ctx->flags |= NFS_MOUNT_POSIX; + break; + case Opt_cto: + if (result.negated) + ctx->flags |= NFS_MOUNT_NOCTO; + else + ctx->flags &= ~NFS_MOUNT_NOCTO; + break; + case Opt_ac: + if (result.negated) + ctx->flags |= NFS_MOUNT_NOAC; + else + ctx->flags &= ~NFS_MOUNT_NOAC; + break; + case Opt_lock: + if (result.negated) { + ctx->flags |= NFS_MOUNT_NONLM; + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } else { + ctx->flags &= ~NFS_MOUNT_NONLM; + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_LOCAL_FCNTL); + } + break; + case Opt_udp: + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_tcp: + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_rdma: + ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(param->key); + break; + case Opt_acl: + if (result.negated) + ctx->flags |= NFS_MOUNT_NOACL; + else + ctx->flags &= ~NFS_MOUNT_NOACL; + break; + case Opt_rdirplus: + if (result.negated) + ctx->flags |= NFS_MOUNT_NORDIRPLUS; + else + ctx->flags &= ~NFS_MOUNT_NORDIRPLUS; + break; + case Opt_sharecache: + if (result.negated) + ctx->flags |= NFS_MOUNT_UNSHARED; + else + ctx->flags &= ~NFS_MOUNT_UNSHARED; + break; + case Opt_resvport: + if (result.negated) + ctx->flags |= NFS_MOUNT_NORESVPORT; + else + ctx->flags &= ~NFS_MOUNT_NORESVPORT; + break; + case Opt_fscache: + kfree(ctx->fscache_uniq); + ctx->fscache_uniq = param->string; + param->string = NULL; + if (result.negated) + ctx->options &= ~NFS_OPTION_FSCACHE; + else + ctx->options |= NFS_OPTION_FSCACHE; + break; + case Opt_migration: + if (result.negated) + ctx->options &= ~NFS_OPTION_MIGRATION; + else + ctx->options |= NFS_OPTION_MIGRATION; + break; + + /* + * options that take numeric values + */ + case Opt_port: + if (result.uint_32 > USHRT_MAX) + goto out_of_bounds; + ctx->nfs_server.port = result.uint_32; + break; + case Opt_rsize: + ctx->rsize = result.uint_32; + break; + case Opt_wsize: + ctx->wsize = result.uint_32; + break; + case Opt_bsize: + ctx->bsize = result.uint_32; + break; + case Opt_timeo: + if (result.uint_32 < 1 || result.uint_32 > INT_MAX) + goto out_of_bounds; + ctx->timeo = result.uint_32; + break; + case Opt_retrans: + if (result.uint_32 > INT_MAX) + goto out_of_bounds; + ctx->retrans = result.uint_32; + break; + case Opt_acregmin: + ctx->acregmin = result.uint_32; + break; + case Opt_acregmax: + ctx->acregmax = result.uint_32; + break; + case Opt_acdirmin: + ctx->acdirmin = result.uint_32; + break; + case Opt_acdirmax: + ctx->acdirmax = result.uint_32; + break; + case Opt_actimeo: + ctx->acregmin = result.uint_32; + ctx->acregmax = result.uint_32; + ctx->acdirmin = result.uint_32; + ctx->acdirmax = result.uint_32; + break; + case Opt_namelen: + ctx->namlen = result.uint_32; + break; + case Opt_mountport: + if (result.uint_32 > USHRT_MAX) + goto out_of_bounds; + ctx->mount_server.port = result.uint_32; + break; + case Opt_mountvers: + if (result.uint_32 < NFS_MNT_VERSION || + result.uint_32 > NFS_MNT3_VERSION) + goto out_of_bounds; + ctx->mount_server.version = result.uint_32; + break; + case Opt_minorversion: + if (result.uint_32 > NFS4_MAX_MINOR_VERSION) + goto out_of_bounds; + ctx->minorversion = result.uint_32; + break; + + /* + * options that take text values + */ + case Opt_v: + ret = nfs_parse_version_string(fc, param->key + 1); + if (ret < 0) + return ret; + break; + case Opt_vers: + ret = nfs_parse_version_string(fc, param->string); + if (ret < 0) + return ret; + break; + case Opt_sec: + ret = nfs_parse_security_flavors(fc, param); + if (ret < 0) + return ret; + break; + + case Opt_proto: + protofamily = AF_INET; + switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { + case Opt_xprt_udp6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + ctx->flags &= ~NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_xprt_tcp6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma6: + protofamily = AF_INET6; + /* fall through */ + case Opt_xprt_rdma: + /* vector side protocols to TCP */ + ctx->flags |= NFS_MOUNT_TCP; + ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + xprt_load_transport(param->string); + break; + default: + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + } + + ctx->protofamily = protofamily; + break; + + case Opt_mountproto: + mountfamily = AF_INET; + switch (lookup_constant(nfs_xprt_protocol_tokens, param->string, -1)) { + case Opt_xprt_udp6: + mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_udp: + ctx->mount_server.protocol = XPRT_TRANSPORT_UDP; + break; + case Opt_xprt_tcp6: + mountfamily = AF_INET6; + /* fall through */ + case Opt_xprt_tcp: + ctx->mount_server.protocol = XPRT_TRANSPORT_TCP; + break; + case Opt_xprt_rdma: /* not used for side protocols */ + default: + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + } + ctx->mountfamily = mountfamily; + break; + + case Opt_addr: + len = rpc_pton(fc->net_ns, param->string, param->size, + &ctx->nfs_server.address, + sizeof(ctx->nfs_server._address)); + if (len == 0) + goto out_invalid_address; + ctx->nfs_server.addrlen = len; + break; + case Opt_clientaddr: + kfree(ctx->client_address); + ctx->client_address = param->string; + param->string = NULL; + break; + case Opt_mounthost: + kfree(ctx->mount_server.hostname); + ctx->mount_server.hostname = param->string; + param->string = NULL; + break; + case Opt_mountaddr: + len = rpc_pton(fc->net_ns, param->string, param->size, + &ctx->mount_server.address, + sizeof(ctx->mount_server._address)); + if (len == 0) + goto out_invalid_address; + ctx->mount_server.addrlen = len; + break; + case Opt_nconnect: + if (result.uint_32 < 1 || result.uint_32 > NFS_MAX_CONNECTIONS) + goto out_of_bounds; + ctx->nfs_server.nconnect = result.uint_32; + break; + case Opt_lookupcache: + switch (result.uint_32) { + case Opt_lookupcache_all: + ctx->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); + break; + case Opt_lookupcache_positive: + ctx->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; + break; + case Opt_lookupcache_none: + ctx->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; + break; + default: + goto out_invalid_value; + } + break; + case Opt_local_lock: + switch (result.uint_32) { + case Opt_local_lock_all: + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + case Opt_local_lock_flock: + ctx->flags |= NFS_MOUNT_LOCAL_FLOCK; + break; + case Opt_local_lock_posix: + ctx->flags |= NFS_MOUNT_LOCAL_FCNTL; + break; + case Opt_local_lock_none: + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + break; + default: + goto out_invalid_value; + } + break; + + /* + * Special options + */ + case Opt_sloppy: + ctx->sloppy = true; + dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); + break; + } + + return 0; + +out_invalid_value: + return nfs_invalf(fc, "NFS: Bad mount option value specified"); +out_invalid_address: + return nfs_invalf(fc, "NFS: Bad IP address specified"); +out_of_bounds: + return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); +} + +/* + * Split fc->source into "hostname:export_path". + * + * The leftmost colon demarks the split between the server's hostname + * and the export path. If the hostname starts with a left square + * bracket, then it may contain colons. + * + * Note: caller frees hostname and export path, even on error. + */ +static int nfs_parse_source(struct fs_context *fc, + size_t maxnamlen, size_t maxpathlen) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + const char *dev_name = fc->source; + size_t len; + const char *end; + + if (unlikely(!dev_name || !*dev_name)) { + dfprintk(MOUNT, "NFS: device name not specified\n"); + return -EINVAL; + } + + /* Is the host name protected with square brakcets? */ + if (*dev_name == '[') { + end = strchr(++dev_name, ']'); + if (end == NULL || end[1] != ':') + goto out_bad_devname; + + len = end - dev_name; + end++; + } else { + const char *comma; + + end = strchr(dev_name, ':'); + if (end == NULL) + goto out_bad_devname; + len = end - dev_name; + + /* kill possible hostname list: not supported */ + comma = memchr(dev_name, ',', len); + if (comma) + len = comma - dev_name; + } + + if (len > maxnamlen) + goto out_hostname; + + /* N.B. caller will free nfs_server.hostname in all cases */ + ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + goto out_nomem; + len = strlen(++end); + if (len > maxpathlen) + goto out_path; + ctx->nfs_server.export_path = kmemdup_nul(end, len, GFP_KERNEL); + if (!ctx->nfs_server.export_path) + goto out_nomem; + + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", ctx->nfs_server.export_path); + return 0; + +out_bad_devname: + return nfs_invalf(fc, "NFS: device name not in host:path format"); +out_nomem: + nfs_errorf(fc, "NFS: not enough memory to parse device name"); + return -ENOMEM; +out_hostname: + nfs_errorf(fc, "NFS: server hostname too long"); + return -ENAMETOOLONG; +out_path: + nfs_errorf(fc, "NFS: export pathname too long"); + return -ENAMETOOLONG; +} + +static inline bool is_remount_fc(struct fs_context *fc) +{ + return fc->root != NULL; +} + +/* + * Parse monolithic NFS2/NFS3 mount data + * - fills in the mount root filehandle + * + * For option strings, user space handles the following behaviors: + * + * + DNS: mapping server host name to IP address ("addr=" option) + * + * + failure mode: how to behave if a mount request can't be handled + * immediately ("fg/bg" option) + * + * + retry: how often to retry a mount request ("retry=" option) + * + * + breaking back: trying proto=udp after proto=tcp, v2 after v3, + * mountproto=tcp after mountproto=udp, and so on + */ +static int nfs23_parse_monolithic(struct fs_context *fc, + struct nfs_mount_data *data) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_fh *mntfh = ctx->mntfh; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; + + if (data == NULL) + goto out_no_data; + + ctx->version = NFS_DEFAULT_VERSION; + switch (data->version) { + case 1: + data->namlen = 0; /* fall through */ + case 2: + data->bsize = 0; /* fall through */ + case 3: + if (data->flags & NFS_MOUNT_VER3) + goto out_no_v3; + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + /* Turn off security negotiation */ + extra_flags |= NFS_MOUNT_SECFLAVOUR; + /* fall through */ + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) + goto out_no_sec; + /* fall through */ + case 5: + memset(data->context, 0, sizeof(data->context)); + /* fall through */ + case 6: + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; + mntfh->size = data->root.size; + ctx->version = 3; + } else { + mntfh->size = NFS2_FHSIZE; + ctx->version = 2; + } + + + memcpy(mntfh->data, data->root.data, mntfh->size); + if (mntfh->size < sizeof(mntfh->data)) + memset(mntfh->data + mntfh->size, 0, + sizeof(mntfh->data) - mntfh->size); + + /* + * Translate to nfs_fs_context, which nfs_fill_super + * can deal with. + */ + ctx->flags = data->flags & NFS_MOUNT_FLAGMASK; + ctx->flags |= extra_flags; + ctx->rsize = data->rsize; + ctx->wsize = data->wsize; + ctx->timeo = data->timeo; + ctx->retrans = data->retrans; + ctx->acregmin = data->acregmin; + ctx->acregmax = data->acregmax; + ctx->acdirmin = data->acdirmin; + ctx->acdirmax = data->acdirmax; + ctx->need_mount = false; + + memcpy(sap, &data->addr, sizeof(data->addr)); + ctx->nfs_server.addrlen = sizeof(data->addr); + ctx->nfs_server.port = ntohs(data->addr.sin_port); + if (sap->sa_family != AF_INET || + !nfs_verify_server_address(sap)) + goto out_no_address; + + if (!(data->flags & NFS_MOUNT_TCP)) + ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; + /* N.B. caller will free nfs_server.hostname in all cases */ + ctx->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + goto out_nomem; + + ctx->namlen = data->namlen; + ctx->bsize = data->bsize; + + if (data->flags & NFS_MOUNT_SECFLAVOUR) + ctx->selected_flavor = data->pseudoflavor; + else + ctx->selected_flavor = RPC_AUTH_UNIX; + + if (!(data->flags & NFS_MOUNT_NONLM)) + ctx->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + else + ctx->flags |= (NFS_MOUNT_LOCAL_FLOCK| + NFS_MOUNT_LOCAL_FCNTL); + + /* + * The legacy version 6 binary mount data from userspace has a + * field used only to transport selinux information into the + * the kernel. To continue to support that functionality we + * have a touch of selinux knowledge here in the NFS code. The + * userspace code converted context=blah to just blah so we are + * converting back to the full string selinux understands. + */ + if (data->context[0]){ +#ifdef CONFIG_SECURITY_SELINUX + int ret; + + data->context[NFS_MAX_CONTEXT_LEN] = '\0'; + ret = vfs_parse_fs_string(fc, "context", + data->context, strlen(data->context)); + if (ret < 0) + return ret; +#else + return -EINVAL; +#endif + } + + break; + default: + goto generic; + } + + ctx->skip_reconfig_option_check = true; + return 0; + +generic: + return generic_parse_monolithic(fc, data); + +out_no_data: + if (is_remount_fc(fc)) { + ctx->skip_reconfig_option_check = true; + return 0; + } + return nfs_invalf(fc, "NFS: mount program didn't pass any mount data"); + +out_no_v3: + return nfs_invalf(fc, "NFS: nfs_mount_data version does not support v3"); + +out_no_sec: + return nfs_invalf(fc, "NFS: nfs_mount_data version supports only AUTH_SYS"); + +out_nomem: + dfprintk(MOUNT, "NFS: not enough memory to handle mount options"); + return -ENOMEM; + +out_no_address: + return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); + +out_invalid_fh: + return nfs_invalf(fc, "NFS: invalid root filehandle"); +} + +#if IS_ENABLED(CONFIG_NFS_V4) +/* + * Validate NFSv4 mount options + */ +static int nfs4_parse_monolithic(struct fs_context *fc, + struct nfs4_mount_data *data) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + char *c; + + if (data == NULL) + goto out_no_data; + + ctx->version = 4; + + switch (data->version) { + case 1: + if (data->host_addrlen > sizeof(ctx->nfs_server.address)) + goto out_no_address; + if (data->host_addrlen == 0) + goto out_no_address; + ctx->nfs_server.addrlen = data->host_addrlen; + if (copy_from_user(sap, data->host_addr, data->host_addrlen)) + return -EFAULT; + if (!nfs_verify_server_address(sap)) + goto out_no_address; + ctx->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); + + if (data->auth_flavourlen) { + rpc_authflavor_t pseudoflavor; + if (data->auth_flavourlen > 1) + goto out_inval_auth; + if (copy_from_user(&pseudoflavor, + data->auth_flavours, + sizeof(pseudoflavor))) + return -EFAULT; + ctx->selected_flavor = pseudoflavor; + } else + ctx->selected_flavor = RPC_AUTH_UNIX; + + c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + ctx->nfs_server.hostname = c; + + c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); + if (IS_ERR(c)) + return PTR_ERR(c); + ctx->nfs_server.export_path = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); + + c = strndup_user(data->client_addr.data, 16); + if (IS_ERR(c)) + return PTR_ERR(c); + ctx->client_address = c; + + /* + * Translate to nfs_fs_context, which nfs_fill_super + * can deal with. + */ + + ctx->flags = data->flags & NFS4_MOUNT_FLAGMASK; + ctx->rsize = data->rsize; + ctx->wsize = data->wsize; + ctx->timeo = data->timeo; + ctx->retrans = data->retrans; + ctx->acregmin = data->acregmin; + ctx->acregmax = data->acregmax; + ctx->acdirmin = data->acdirmin; + ctx->acdirmax = data->acdirmax; + ctx->nfs_server.protocol = data->proto; + nfs_validate_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; + + break; + default: + goto generic; + } + + ctx->skip_reconfig_option_check = true; + return 0; + +generic: + return generic_parse_monolithic(fc, data); + +out_no_data: + if (is_remount_fc(fc)) { + ctx->skip_reconfig_option_check = true; + return 0; + } + return nfs_invalf(fc, "NFS4: mount program didn't pass any mount data"); + +out_inval_auth: + return nfs_invalf(fc, "NFS4: Invalid number of RPC auth flavours %d", + data->auth_flavourlen); + +out_no_address: + return nfs_invalf(fc, "NFS4: mount program didn't pass remote address"); + +out_invalid_transport_udp: + return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp"); +} +#endif + +/* + * Parse a monolithic block of data from sys_mount(). + */ +static int nfs_fs_context_parse_monolithic(struct fs_context *fc, + void *data) +{ + if (fc->fs_type == &nfs_fs_type) + return nfs23_parse_monolithic(fc, data); + +#if IS_ENABLED(CONFIG_NFS_V4) + if (fc->fs_type == &nfs4_fs_type) + return nfs4_parse_monolithic(fc, data); +#endif + + return nfs_invalf(fc, "NFS: Unsupported monolithic data version"); +} + +/* + * Validate the preparsed information in the config. + */ +static int nfs_fs_context_validate(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_subversion *nfs_mod; + struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; + int max_namelen = PAGE_SIZE; + int max_pathlen = NFS_MAXPATHLEN; + int port = 0; + int ret; + + if (!fc->source) + goto out_no_device_name; + + /* Check for sanity first. */ + if (ctx->minorversion && ctx->version != 4) + goto out_minorversion_mismatch; + + if (ctx->options & NFS_OPTION_MIGRATION && + (ctx->version != 4 || ctx->minorversion != 0)) + goto out_migration_misuse; + + /* Verify that any proto=/mountproto= options match the address + * families in the addr=/mountaddr= options. + */ + if (ctx->protofamily != AF_UNSPEC && + ctx->protofamily != ctx->nfs_server.address.sa_family) + goto out_proto_mismatch; + + if (ctx->mountfamily != AF_UNSPEC) { + if (ctx->mount_server.addrlen) { + if (ctx->mountfamily != ctx->mount_server.address.sa_family) + goto out_mountproto_mismatch; + } else { + if (ctx->mountfamily != ctx->nfs_server.address.sa_family) + goto out_mountproto_mismatch; + } + } + + if (!nfs_verify_server_address(sap)) + goto out_no_address; + + if (ctx->version == 4) { + if (IS_ENABLED(CONFIG_NFS_V4)) { + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + else + port = NFS_PORT; + max_namelen = NFS4_MAXNAMLEN; + max_pathlen = NFS4_MAXPATHLEN; + nfs_validate_transport_protocol(ctx); + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; + ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL | + NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK | + NFS_MOUNT_LOCAL_FCNTL); + } else { + goto out_v4_not_compiled; + } + } else { + nfs_set_mount_transport_protocol(ctx); +#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) + goto out_invalid_transport_udp; +#endif + if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) + port = NFS_RDMA_PORT; + } + + nfs_set_port(sap, &ctx->nfs_server.port, port); + + ret = nfs_parse_source(fc, max_namelen, max_pathlen); + if (ret < 0) + return ret; + + /* Load the NFS protocol module if we haven't done so yet */ + if (!ctx->nfs_mod) { + nfs_mod = get_nfs_version(ctx->version); + if (IS_ERR(nfs_mod)) { + ret = PTR_ERR(nfs_mod); + goto out_version_unavailable; + } + ctx->nfs_mod = nfs_mod; + } + return 0; + +out_no_device_name: + return nfs_invalf(fc, "NFS: Device name not specified"); +out_v4_not_compiled: + nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel"); + return -EPROTONOSUPPORT; +out_invalid_transport_udp: + return nfs_invalf(fc, "NFSv4: Unsupported transport protocol udp"); +out_no_address: + return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); +out_mountproto_mismatch: + return nfs_invalf(fc, "NFS: Mount server address does not match mountproto= option"); +out_proto_mismatch: + return nfs_invalf(fc, "NFS: Server address does not match proto= option"); +out_minorversion_mismatch: + return nfs_invalf(fc, "NFS: Mount option vers=%u does not support minorversion=%u", + ctx->version, ctx->minorversion); +out_migration_misuse: + return nfs_invalf(fc, "NFS: 'Migration' not supported for this NFS version"); +out_version_unavailable: + nfs_errorf(fc, "NFS: Version unavailable"); + return ret; +} + +/* + * Create an NFS superblock by the appropriate method. + */ +static int nfs_get_tree(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err = nfs_fs_context_validate(fc); + + if (err) + return err; + if (!ctx->internal) + return ctx->nfs_mod->rpc_ops->try_get_tree(fc); + else + return nfs_get_tree_common(fc); +} + +/* + * Handle duplication of a configuration. The caller copied *src into *sc, but + * it can't deal with resource pointers in the filesystem context, so we have + * to do that. We need to clear pointers, copy data or get extra refs as + * appropriate. + */ +static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc) +{ + struct nfs_fs_context *src = nfs_fc2context(src_fc), *ctx; + + ctx = kmemdup(src, sizeof(struct nfs_fs_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->mntfh = nfs_alloc_fhandle(); + if (!ctx->mntfh) { + kfree(ctx); + return -ENOMEM; + } + nfs_copy_fh(ctx->mntfh, src->mntfh); + + __module_get(ctx->nfs_mod->owner); + ctx->client_address = NULL; + ctx->mount_server.hostname = NULL; + ctx->nfs_server.export_path = NULL; + ctx->nfs_server.hostname = NULL; + ctx->fscache_uniq = NULL; + ctx->clone_data.fattr = NULL; + fc->fs_private = ctx; + return 0; +} + +static void nfs_fs_context_free(struct fs_context *fc) +{ + struct nfs_fs_context *ctx = nfs_fc2context(fc); + + if (ctx) { + if (ctx->server) + nfs_free_server(ctx->server); + if (ctx->nfs_mod) + put_nfs_version(ctx->nfs_mod); + kfree(ctx->client_address); + kfree(ctx->mount_server.hostname); + kfree(ctx->nfs_server.export_path); + kfree(ctx->nfs_server.hostname); + kfree(ctx->fscache_uniq); + nfs_free_fhandle(ctx->mntfh); + nfs_free_fattr(ctx->clone_data.fattr); + kfree(ctx); + } +} + +static const struct fs_context_operations nfs_fs_context_ops = { + .free = nfs_fs_context_free, + .dup = nfs_fs_context_dup, + .parse_param = nfs_fs_context_parse_param, + .parse_monolithic = nfs_fs_context_parse_monolithic, + .get_tree = nfs_get_tree, + .reconfigure = nfs_reconfigure, +}; + +/* + * Prepare superblock configuration. We use the namespaces attached to the + * context. This may be the current process's namespaces, or it may be a + * container's namespaces. + */ +static int nfs_init_fs_context(struct fs_context *fc) +{ + struct nfs_fs_context *ctx; + + ctx = kzalloc(sizeof(struct nfs_fs_context), GFP_KERNEL); + if (unlikely(!ctx)) + return -ENOMEM; + + ctx->mntfh = nfs_alloc_fhandle(); + if (unlikely(!ctx->mntfh)) { + kfree(ctx); + return -ENOMEM; + } + + ctx->protofamily = AF_UNSPEC; + ctx->mountfamily = AF_UNSPEC; + ctx->mount_server.port = NFS_UNSPEC_PORT; + + if (fc->root) { + /* reconfigure, start with the current config */ + struct nfs_server *nfss = fc->root->d_sb->s_fs_info; + struct net *net = nfss->nfs_client->cl_net; + + ctx->flags = nfss->flags; + ctx->rsize = nfss->rsize; + ctx->wsize = nfss->wsize; + ctx->retrans = nfss->client->cl_timeout->to_retries; + ctx->selected_flavor = nfss->client->cl_auth->au_flavor; + ctx->acregmin = nfss->acregmin / HZ; + ctx->acregmax = nfss->acregmax / HZ; + ctx->acdirmin = nfss->acdirmin / HZ; + ctx->acdirmax = nfss->acdirmax / HZ; + ctx->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; + ctx->nfs_server.port = nfss->port; + ctx->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + ctx->version = nfss->nfs_client->rpc_ops->version; + ctx->minorversion = nfss->nfs_client->cl_minorversion; + + memcpy(&ctx->nfs_server.address, &nfss->nfs_client->cl_addr, + ctx->nfs_server.addrlen); + + if (fc->net_ns != net) { + put_net(fc->net_ns); + fc->net_ns = get_net(net); + } + + ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod; + __module_get(ctx->nfs_mod->owner); + } else { + /* defaults */ + ctx->timeo = NFS_UNSPEC_TIMEO; + ctx->retrans = NFS_UNSPEC_RETRANS; + ctx->acregmin = NFS_DEF_ACREGMIN; + ctx->acregmax = NFS_DEF_ACREGMAX; + ctx->acdirmin = NFS_DEF_ACDIRMIN; + ctx->acdirmax = NFS_DEF_ACDIRMAX; + ctx->nfs_server.port = NFS_UNSPEC_PORT; + ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; + ctx->selected_flavor = RPC_AUTH_MAXFLAVOR; + ctx->minorversion = 0; + ctx->need_mount = true; + } + fc->fs_private = ctx; + fc->ops = &nfs_fs_context_ops; + return 0; +} + +struct file_system_type nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .init_fs_context = nfs_init_fs_context, + .parameters = &nfs_fs_parameters, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, +}; +MODULE_ALIAS_FS("nfs"); +EXPORT_SYMBOL_GPL(nfs_fs_type); + +#if IS_ENABLED(CONFIG_NFS_V4) +struct file_system_type nfs4_fs_type = { + .owner = THIS_MODULE, + .name = "nfs4", + .init_fs_context = nfs_init_fs_context, + .parameters = &nfs_fs_parameters, + .kill_sb = nfs_kill_super, + .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, +}; +MODULE_ALIAS_FS("nfs4"); +MODULE_ALIAS("nfs4"); +EXPORT_SYMBOL_GPL(nfs4_fs_type); +#endif /* CONFIG_NFS_V4 */ diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 7def925d3af5..52270bfac120 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -128,7 +128,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int return; key->nfs_client = nfss->nfs_client; - key->key.super.s_flags = sb->s_flags & NFS_MS_MASK; + key->key.super.s_flags = sb->s_flags & NFS_SB_MASK; key->key.nfs_server.flags = nfss->flags; key->key.nfs_server.rsize = nfss->rsize; key->key.nfs_server.wsize = nfss->wsize; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 878c4c5982d9..b012c2668a1f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -64,66 +64,71 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i /* * get an NFS2/NFS3 root dentry from the root filehandle */ -struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh, - const char *devname) +int nfs_get_root(struct super_block *s, struct fs_context *fc) { - struct nfs_server *server = NFS_SB(sb); + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct nfs_server *server = NFS_SB(s); struct nfs_fsinfo fsinfo; - struct dentry *ret; + struct dentry *root; struct inode *inode; - void *name = kstrdup(devname, GFP_KERNEL); - int error; + char *name; + int error = -ENOMEM; + name = kstrdup(fc->source, GFP_KERNEL); if (!name) - return ERR_PTR(-ENOMEM); + goto out; /* get the actual root for this mount */ fsinfo.fattr = nfs_alloc_fattr(); - if (fsinfo.fattr == NULL) { - kfree(name); - return ERR_PTR(-ENOMEM); - } + if (fsinfo.fattr == NULL) + goto out_name; - error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); + error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo); if (error < 0) { dprintk("nfs_get_root: getattr error = %d\n", -error); - ret = ERR_PTR(error); - goto out; + nfs_errorf(fc, "NFS: Couldn't getattr on root"); + goto out_fattr; } - inode = nfs_fhget(sb, mntfh, fsinfo.fattr, NULL); + inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL); if (IS_ERR(inode)) { dprintk("nfs_get_root: get root inode failed\n"); - ret = ERR_CAST(inode); - goto out; + error = PTR_ERR(inode); + nfs_errorf(fc, "NFS: Couldn't get root inode"); + goto out_fattr; } - error = nfs_superblock_set_dummy_root(sb, inode); - if (error != 0) { - ret = ERR_PTR(error); - goto out; - } + error = nfs_superblock_set_dummy_root(s, inode); + if (error != 0) + goto out_fattr; /* root dentries normally start off anonymous and get spliced in later * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - ret = d_obtain_root(inode); - if (IS_ERR(ret)) { + root = d_obtain_root(inode); + if (IS_ERR(root)) { dprintk("nfs_get_root: get root dentry failed\n"); - goto out; + error = PTR_ERR(root); + nfs_errorf(fc, "NFS: Couldn't get root dentry"); + goto out_fattr; } - security_d_instantiate(ret, inode); - spin_lock(&ret->d_lock); - if (IS_ROOT(ret) && !ret->d_fsdata && - !(ret->d_flags & DCACHE_NFSFS_RENAMED)) { - ret->d_fsdata = name; + security_d_instantiate(root, inode); + spin_lock(&root->d_lock); + if (IS_ROOT(root) && !root->d_fsdata && + !(root->d_flags & DCACHE_NFSFS_RENAMED)) { + root->d_fsdata = name; name = NULL; } - spin_unlock(&ret->d_lock); -out: - kfree(name); + spin_unlock(&root->d_lock); + fc->root = root; + error = 0; + +out_fattr: nfs_free_fattr(fsinfo.fattr); - return ret; +out_name: + kfree(name); +out: + return error; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b0b4b9f303fd..1309e6f47f3d 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1061,7 +1061,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct rcu_read_lock(); list_for_each_entry_rcu(pos, &nfsi->open_files, list) { - if (cred != NULL && pos->cred != cred) + if (cred != NULL && cred_fscmp(pos->cred, cred) != 0) continue; if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode) continue; @@ -1156,7 +1156,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n", inode->i_sb->s_id, (unsigned long long)NFS_FILEID(inode), status); - if (status == -ESTALE) { + switch (status) { + case -ETIMEDOUT: + /* A soft timeout occurred. Use cached information? */ + if (server->flags & NFS_MOUNT_SOFTREVAL) + status = 0; + break; + case -ESTALE: nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24a65da58aa9..f80c47d5ff27 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -4,17 +4,19 @@ */ #include "nfs4_fs.h" -#include <linux/mount.h> +#include <linux/fs_context.h> #include <linux/security.h> #include <linux/crc32.h> +#include <linux/sunrpc/addr.h> #include <linux/nfs_page.h> #include <linux/wait_bit.h> -#define NFS_MS_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) +#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS) extern const struct export_operations nfs_export_ops; struct nfs_string; +struct nfs_pageio_descriptor; static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) { @@ -31,17 +33,14 @@ static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr) return 1; } -struct nfs_clone_mount { - const struct super_block *sb; - const struct dentry *dentry; - struct nfs_fh *fh; - struct nfs_fattr *fattr; - char *hostname; - char *mnt_path; - struct sockaddr *addr; - size_t addrlen; - rpc_authflavor_t authflavor; -}; +static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) +{ + if (!(NFS_SB(dentry->d_sb)->flags & NFS_MOUNT_SOFTREVAL)) + return false; + if (!d_is_positive(dentry) || !NFS_FH(d_inode(dentry))->size) + return false; + return true; +} /* * Note: RFC 1813 doesn't limit the number of auth flavors that @@ -82,12 +81,16 @@ struct nfs_client_initdata { /* * In-kernel mount arguments */ -struct nfs_parsed_mount_data { - int flags; +struct nfs_fs_context { + bool internal; + bool skip_reconfig_option_check; + bool need_mount; + bool sloppy; + unsigned int flags; /* NFS{,4}_MOUNT_* flags */ unsigned int rsize, wsize; unsigned int timeo, retrans; - unsigned int acregmin, acregmax, - acdirmin, acdirmax; + unsigned int acregmin, acregmax; + unsigned int acdirmin, acdirmax; unsigned int namlen; unsigned int options; unsigned int bsize; @@ -97,10 +100,14 @@ struct nfs_parsed_mount_data { unsigned int version; unsigned int minorversion; char *fscache_uniq; - bool need_mount; + unsigned short protofamily; + unsigned short mountfamily; struct { - struct sockaddr_storage address; + union { + struct sockaddr address; + struct sockaddr_storage _address; + }; size_t addrlen; char *hostname; u32 version; @@ -109,19 +116,41 @@ struct nfs_parsed_mount_data { } mount_server; struct { - struct sockaddr_storage address; + union { + struct sockaddr address; + struct sockaddr_storage _address; + }; size_t addrlen; char *hostname; char *export_path; int port; unsigned short protocol; unsigned short nconnect; + unsigned short export_path_len; } nfs_server; - void *lsm_opts; - struct net *net; + struct nfs_fh *mntfh; + struct nfs_server *server; + struct nfs_subversion *nfs_mod; + + /* Information for a cloned mount. */ + struct nfs_clone_mount { + struct super_block *sb; + struct dentry *dentry; + struct nfs_fattr *fattr; + unsigned int inherited_bsize; + } clone_data; }; +#define nfs_errorf(fc, fmt, ...) errorf(fc, fmt, ## __VA_ARGS__) +#define nfs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__) +#define nfs_warnf(fc, fmt, ...) warnf(fc, fmt, ## __VA_ARGS__) + +static inline struct nfs_fs_context *nfs_fc2context(const struct fs_context *fc) +{ + return fc->fs_private; +} + /* mount_clnt.c */ struct nfs_mount_request { struct sockaddr *sap; @@ -137,14 +166,6 @@ struct nfs_mount_request { struct net *net; }; -struct nfs_mount_info { - void (*fill_super)(struct super_block *, struct nfs_mount_info *); - int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *); - struct nfs_parsed_mount_data *parsed; - struct nfs_clone_mount *cloned; - struct nfs_fh *mntfh; -}; - extern int nfs_mount(struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info); @@ -170,13 +191,9 @@ extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * nfs4_find_client_sessionid(struct net *, const struct sockaddr *, struct nfs4_sessionid *, u32); -extern struct nfs_server *nfs_create_server(struct nfs_mount_info *, - struct nfs_subversion *); -extern struct nfs_server *nfs4_create_server( - struct nfs_mount_info *, - struct nfs_subversion *); -extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, - struct nfs_fh *); +extern struct nfs_server *nfs_create_server(struct fs_context *); +extern struct nfs_server *nfs4_create_server(struct fs_context *); +extern struct nfs_server *nfs4_create_referral_server(struct fs_context *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, struct sockaddr *sap, size_t salen, struct net *net); @@ -227,7 +244,9 @@ static inline void nfs_fs_proc_exit(void) extern const struct svc_version nfs4_callback_version1; extern const struct svc_version nfs4_callback_version4; -struct nfs_pageio_descriptor; +/* fs_context.c */ +extern struct file_system_type nfs_fs_type; + /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); @@ -387,23 +406,10 @@ extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode); /* super.c */ extern const struct super_operations nfs_sops; -extern struct file_system_type nfs_fs_type; -extern struct file_system_type nfs_xdev_fs_type; -#if IS_ENABLED(CONFIG_NFS_V4) -extern struct file_system_type nfs4_referral_fs_type; -#endif bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); -struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, - struct nfs_subversion *); -int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *); -struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *, - struct nfs_mount_info *, struct nfs_subversion *); -struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *); -struct dentry * nfs_xdev_mount_common(struct file_system_type *, int, - const char *, struct nfs_mount_info *); +int nfs_try_get_tree(struct fs_context *); +int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); -void nfs_fill_super(struct super_block *, struct nfs_mount_info *); extern struct rpc_stat nfs_rpcstat; @@ -430,18 +436,12 @@ static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi) extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, unsigned flags); extern struct vfsmount *nfs_d_automount(struct path *path); -struct vfsmount *nfs_submount(struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); -struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *, - struct nfs_fattr *, rpc_authflavor_t); +int nfs_submount(struct fs_context *, struct nfs_server *); +int nfs_do_submount(struct fs_context *); /* getroot.c */ -extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, - const char *); +extern int nfs_get_root(struct super_block *s, struct fs_context *fc); #if IS_ENABLED(CONFIG_NFS_V4) -extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, - const char *); - extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool); #endif @@ -460,7 +460,7 @@ int nfs_show_options(struct seq_file *, struct dentry *); int nfs_show_devname(struct seq_file *, struct dentry *); int nfs_show_path(struct seq_file *, struct dentry *); int nfs_show_stats(struct seq_file *, struct dentry *); -int nfs_remount(struct super_block *sb, int *flags, char *raw_data); +int nfs_reconfigure(struct fs_context *); /* write.c */ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, @@ -706,9 +706,9 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) } /* - * Convert a struct timespec into a 64-bit change attribute + * Convert a struct timespec64 into a 64-bit change attribute * - * This does approximately the same thing as timespec_to_ns(), + * This does approximately the same thing as timespec64_to_ns(), * but for calculation efficiency, we multiply the seconds by * 1024*1024*1024. */ @@ -777,3 +777,16 @@ static inline bool nfs_error_is_fatal_on_server(int err) } return nfs_error_is_fatal(err); } + +/* + * Select between a default port value and a user-specified port value. + * If a zero value is set, then autobind will be used. + */ +static inline void nfs_set_port(struct sockaddr *sap, int *port, + const unsigned short default_port) +{ + if (*port == NFS_UNSPEC_PORT) + *port = default_port; + + rpc_set_port(sap, *port); +} diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index cb7c10e9721e..35c8cb2d7637 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -29,9 +29,7 @@ */ #define encode_dirpath_sz (1 + XDR_QUADLEN(MNTPATHLEN)) #define MNT_status_sz (1) -#define MNT_fhs_status_sz (1) #define MNT_fhandle_sz XDR_QUADLEN(NFS2_FHSIZE) -#define MNT_fhandle3_sz (1 + XDR_QUADLEN(NFS3_FHSIZE)) #define MNT_authflav3_sz (1 + NFS_MAX_SECFLAVORS) /* diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 5e0e9d29f5c5..ad6077404947 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -19,6 +19,7 @@ #include <linux/vfs.h> #include <linux/sunrpc/gss_api.h> #include "internal.h" +#include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -139,34 +140,65 @@ EXPORT_SYMBOL_GPL(nfs_path); */ struct vfsmount *nfs_d_automount(struct path *path) { - struct vfsmount *mnt; + struct nfs_fs_context *ctx; + struct fs_context *fc; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct nfs_server *server = NFS_SERVER(d_inode(path->dentry)); - struct nfs_fh *fh = NULL; - struct nfs_fattr *fattr = NULL; + struct nfs_client *client = server->nfs_client; + int ret; if (IS_ROOT(path->dentry)) return ERR_PTR(-ESTALE); - mnt = ERR_PTR(-ENOMEM); - fh = nfs_alloc_fhandle(); - fattr = nfs_alloc_fattr(); - if (fh == NULL || fattr == NULL) - goto out; + /* Open a new filesystem context, transferring parameters from the + * parent superblock, including the network namespace. + */ + fc = fs_context_for_submount(&nfs_fs_type, path->dentry); + if (IS_ERR(fc)) + return ERR_CAST(fc); - mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr); + ctx = nfs_fc2context(fc); + ctx->clone_data.dentry = path->dentry; + ctx->clone_data.sb = path->dentry->d_sb; + ctx->clone_data.fattr = nfs_alloc_fattr(); + if (!ctx->clone_data.fattr) + goto out_fc; + + if (fc->net_ns != client->cl_net) { + put_net(fc->net_ns); + fc->net_ns = get_net(client->cl_net); + } + + /* for submounts we want the same server; referrals will reassign */ + memcpy(&ctx->nfs_server.address, &client->cl_addr, client->cl_addrlen); + ctx->nfs_server.addrlen = client->cl_addrlen; + ctx->nfs_server.port = server->port; + + ctx->version = client->rpc_ops->version; + ctx->minorversion = client->cl_minorversion; + ctx->nfs_mod = client->cl_nfs_mod; + __module_get(ctx->nfs_mod->owner); + + ret = client->rpc_ops->submount(fc, server); + if (ret < 0) { + mnt = ERR_PTR(ret); + goto out_fc; + } + + up_write(&fc->root->d_sb->s_umount); + mnt = vfs_create_mount(fc); if (IS_ERR(mnt)) - goto out; + goto out_fc; if (nfs_mountpoint_expiry_timeout < 0) - goto out; + goto out_fc; mntget(mnt); /* prevent immediate expiration */ mnt_set_expiry(mnt, &nfs_automount_list); schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); -out: - nfs_free_fattr(fattr); - nfs_free_fhandle(fh); +out_fc: + put_fs_context(fc); return mnt; } @@ -213,16 +245,6 @@ void nfs_release_automount_timer(void) cancel_delayed_work(&nfs_automount_task); } -/* - * Clone a mountpoint of the appropriate type - */ -static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, - const char *devname, - struct nfs_clone_mount *mountdata) -{ - return vfs_submount(mountdata->dentry, &nfs_xdev_fs_type, devname, mountdata); -} - /** * nfs_do_submount - set up mountpoint when crossing a filesystem boundary * @dentry: parent directory @@ -231,46 +253,62 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, * @authflavor: security flavor to use when performing the mount * */ -struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh, - struct nfs_fattr *fattr, rpc_authflavor_t authflavor) +int nfs_do_submount(struct fs_context *fc) { - struct nfs_clone_mount mountdata = { - .sb = dentry->d_sb, - .dentry = dentry, - .fh = fh, - .fattr = fattr, - .authflavor = authflavor, - }; - struct vfsmount *mnt; - char *page = (char *) __get_free_page(GFP_USER); - char *devname; - - if (page == NULL) - return ERR_PTR(-ENOMEM); - - devname = nfs_devname(dentry, page, PAGE_SIZE); - if (IS_ERR(devname)) - mnt = ERR_CAST(devname); - else - mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); - - free_page((unsigned long)page); - return mnt; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; + struct nfs_server *server; + char *buffer, *p; + int ret; + + /* create a new volume representation */ + server = ctx->nfs_mod->rpc_ops->clone_server(NFS_SB(ctx->clone_data.sb), + ctx->mntfh, + ctx->clone_data.fattr, + ctx->selected_flavor); + + if (IS_ERR(server)) + return PTR_ERR(server); + + ctx->server = server; + + buffer = kmalloc(4096, GFP_USER); + if (!buffer) + return -ENOMEM; + + ctx->internal = true; + ctx->clone_data.inherited_bsize = ctx->clone_data.sb->s_blocksize_bits; + + p = nfs_devname(dentry, buffer, 4096); + if (IS_ERR(p)) { + nfs_errorf(fc, "NFS: Couldn't determine submount pathname"); + ret = PTR_ERR(p); + } else { + ret = vfs_parse_fs_string(fc, "source", p, buffer + 4096 - p); + if (!ret) + ret = vfs_get_tree(fc); + } + kfree(buffer); + return ret; } EXPORT_SYMBOL_GPL(nfs_do_submount); -struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr) +int nfs_submount(struct fs_context *fc, struct nfs_server *server) { - int err; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; struct dentry *parent = dget_parent(dentry); + int err; /* Look it up again to get its attributes */ - err = server->nfs_client->rpc_ops->lookup(d_inode(parent), &dentry->d_name, fh, fattr, NULL); + err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry, + ctx->mntfh, ctx->clone_data.fattr, + NULL); dput(parent); if (err != 0) - return ERR_PTR(err); + return err; - return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor); + ctx->selected_flavor = server->client->cl_auth->au_flavor; + return nfs_do_submount(fc); } EXPORT_SYMBOL_GPL(nfs_submount); diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index d94c7abdf25a..f6676af37d5d 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -360,17 +360,17 @@ static void encode_sattr(struct xdr_stream *xdr, const struct iattr *attr, else *p++ = cpu_to_be32(NFS2_SATTR_NOT_SET); - if (attr->ia_valid & ATTR_ATIME_SET) { + if (attr->ia_valid & ATTR_ATIME_SET) p = xdr_encode_time(p, &attr->ia_atime); - } else if (attr->ia_valid & ATTR_ATIME) { + else if (attr->ia_valid & ATTR_ATIME) p = xdr_encode_current_server_time(p, &attr->ia_atime); - } else + else p = xdr_time_not_set(p); - if (attr->ia_valid & ATTR_MTIME_SET) { + if (attr->ia_valid & ATTR_MTIME_SET) xdr_encode_time(p, &attr->ia_mtime); - } else if (attr->ia_valid & ATTR_MTIME) { + else if (attr->ia_valid & ATTR_MTIME) xdr_encode_current_server_time(p, &attr->ia_mtime); - } else + else xdr_time_not_set(p); } diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h index f82e11c4cb56..1b950b66b3bb 100644 --- a/fs/nfs/nfs3_fs.h +++ b/fs/nfs/nfs3_fs.h @@ -27,7 +27,7 @@ static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, #endif /* CONFIG_NFS_V3_ACL */ /* nfs3client.c */ -struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *); +struct nfs_server *nfs3_create_server(struct fs_context *); struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 223904bc40a7..5601e47360c2 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -46,10 +46,10 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server) } #endif -struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs3_create_server(struct fs_context *fc) { - struct nfs_server *server = nfs_create_server(mount_info, nfs_mod); + struct nfs_server *server = nfs_create_server(fc); + /* Create a client RPC handle for the NFS v3 ACL management interface */ if (!IS_ERR(server)) nfs_init_server_aclclient(server); diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 9eb2f1a503ab..a46d1d5d16d8 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -110,10 +110,15 @@ nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = fattr, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, task_flags); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -140,23 +145,23 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status == 0) { + nfs_setattr_update_inode(inode, sattr, fattr); if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); - nfs_setattr_update_inode(inode, sattr, fattr); } dprintk("NFS reply setattr: %d\n", status); return status; } static int -nfs3_proc_lookup(struct inode *dir, const struct qstr *name, +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs3_diropargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len + .name = dentry->d_name.name, + .len = dentry->d_name.len }; struct nfs3_diropres res = { .fh = fhandle, @@ -168,20 +173,25 @@ nfs3_proc_lookup(struct inode *dir, const struct qstr *name, .rpc_resp = &res, }; int status; + unsigned short task_flags = 0; - dprintk("NFS call lookup %s\n", name->name); + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; + + dprintk("NFS call lookup %pd2\n", dentry); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); nfs_refresh_inode(dir, res.dir_attr); if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_argp = fhandle; msg.rpc_resp = fattr; - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); } nfs_free_fattr(res.dir_attr); dprintk("NFS reply lookup: %d\n", status); @@ -990,7 +1000,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .nlmclnt_ops = &nlmclnt_fl_close_lock_ops, .getroot = nfs3_proc_get_root, .submount = nfs_submount, - .try_mount = nfs_try_mount, + .try_get_tree = nfs_try_get_tree, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 927eb680f161..69971f6c840d 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2334,6 +2334,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, void *data) { struct nfs_commitres *result = data; + struct nfs_writeverf *verf = result->verf; enum nfs_stat status; int error; @@ -2346,7 +2347,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, result->op_status = status; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, &result->verf->verifier); + error = decode_writeverf3(xdr, &verf->verifier); + if (!error) + verf->committed = NFS_FILE_SYNC; out: return error; out_status: diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 1fe83e0f663e..e2ae54b35dfe 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -61,8 +61,11 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context, lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.falloc_fattr = nfs_alloc_fattr(); if (!res.falloc_fattr) @@ -287,8 +290,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, } else { status = nfs4_set_rw_stateid(&args->src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } } status = nfs_filemap_write_and_wait_range(file_inode(src)->i_mapping, pos_src, pos_src + (loff_t)count - 1); @@ -297,8 +303,11 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = nfs4_set_rw_stateid(&args->dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_sync_inode(dst_inode); if (status) @@ -334,14 +343,14 @@ static ssize_t _nfs42_proc_copy(struct file *src, status = handle_async_copy(res, dst_server, src_server, src, dst, &args->src_stateid, restart); if (status) - return status; + goto out; } if ((!res->synchronous || !args->sync) && res->write_res.verifier.committed != NFS_FILE_SYNC) { status = process_copy_commit(dst, pos_dst, res); if (status) - return status; + goto out; } truncate_pagecache_range(dst_inode, pos_dst, @@ -546,8 +555,11 @@ static int _nfs42_proc_copy_notify(struct file *src, struct file *dst, status = nfs4_set_rw_stateid(&args->cna_src_stateid, ctx, l_ctx, FMODE_READ); nfs_put_lock_context(l_ctx); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs4_call_sync(src_server->client, src_server, &msg, &args->cna_seq_args, &res->cnr_seq_res, 0); @@ -618,8 +630,11 @@ static loff_t _nfs42_proc_llseek(struct file *filep, status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context, lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } status = nfs_filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); @@ -994,13 +1009,18 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context, src_lock, FMODE_READ); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; - + } status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context, dst_lock, FMODE_WRITE); - if (status) + if (status) { + if (status == -EAGAIN) + status = -NFS4ERR_BAD_STATEID; return status; + } res.dst_fattr = nfs_alloc_fattr(); if (!res.dst_fattr) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a7a73b1d1fec..8be1ba7c62bb 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -268,14 +268,13 @@ extern const struct dentry_operations nfs4_dentry_operations; int nfs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t); -/* super.c */ +/* fs_context.c */ extern struct file_system_type nfs4_fs_type; /* nfs4namespace.c */ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, const struct qstr *); -struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); +int nfs4_submount(struct fs_context *, struct nfs_server *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); @@ -303,8 +302,10 @@ extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struc extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, struct page *page, const struct cred *); extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); -extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, const struct qstr *, - struct nfs_fh *, struct nfs_fattr *); +extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, + struct dentry *, + struct nfs_fh *, + struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern const struct xattr_handler *nfs4_xattr_handlers[]; extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, @@ -446,9 +447,7 @@ extern void nfs4_schedule_state_renewal(struct nfs_client *); extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); extern void nfs4_kill_renewd(struct nfs_client *); extern void nfs4_renew_state(struct work_struct *); -extern void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed); +extern void nfs4_set_lease_period(struct nfs_client *clp, unsigned long lease); /* nfs4state.c */ @@ -526,7 +525,6 @@ extern const nfs4_stateid invalid_stateid; /* nfs4super.c */ struct nfs_mount_info; extern struct nfs_subversion nfs_v4; -struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); extern bool nfs4_disable_idmapping; extern unsigned short max_session_slots; extern unsigned short max_session_cb_slots; @@ -536,6 +534,9 @@ extern bool recover_lost_locks; #define NFS4_CLIENT_ID_UNIQ_LEN (64) extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN]; +extern int nfs4_try_get_tree(struct fs_context *); +extern int nfs4_get_referral_tree(struct fs_context *); + /* nfs4sysctl.c */ #ifdef CONFIG_SYSCTL int nfs4_register_sysctl(void); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 460d6251c405..0cd767e5c977 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1055,66 +1055,64 @@ out: /* * Create a version 4 volume record */ -static int nfs4_init_server(struct nfs_server *server, - struct nfs_parsed_mount_data *data) +static int nfs4_init_server(struct nfs_server *server, struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct rpc_timeout timeparms; int error; - nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, - data->timeo, data->retrans); + nfs_init_timeout_values(&timeparms, ctx->nfs_server.protocol, + ctx->timeo, ctx->retrans); /* Initialise the client representation from the mount data */ - server->flags = data->flags; - server->options = data->options; - server->auth_info = data->auth_info; + server->flags = ctx->flags; + server->options = ctx->options; + server->auth_info = ctx->auth_info; /* Use the first specified auth flavor. If this flavor isn't * allowed by the server, use the SECINFO path to try the * other specified flavors */ - if (data->auth_info.flavor_len >= 1) - data->selected_flavor = data->auth_info.flavors[0]; + if (ctx->auth_info.flavor_len >= 1) + ctx->selected_flavor = ctx->auth_info.flavors[0]; else - data->selected_flavor = RPC_AUTH_UNIX; + ctx->selected_flavor = RPC_AUTH_UNIX; /* Get a client record */ error = nfs4_set_client(server, - data->nfs_server.hostname, - (const struct sockaddr *)&data->nfs_server.address, - data->nfs_server.addrlen, - data->client_address, - data->nfs_server.protocol, - &timeparms, - data->minorversion, - data->nfs_server.nconnect, - data->net); + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, + ctx->client_address, + ctx->nfs_server.protocol, + &timeparms, + ctx->minorversion, + ctx->nfs_server.nconnect, + fc->net_ns); if (error < 0) return error; - if (data->rsize) - server->rsize = nfs_block_size(data->rsize, NULL); - if (data->wsize) - server->wsize = nfs_block_size(data->wsize, NULL); + if (ctx->rsize) + server->rsize = nfs_block_size(ctx->rsize, NULL); + if (ctx->wsize) + server->wsize = nfs_block_size(ctx->wsize, NULL); - server->acregmin = data->acregmin * HZ; - server->acregmax = data->acregmax * HZ; - server->acdirmin = data->acdirmin * HZ; - server->acdirmax = data->acdirmax * HZ; - server->port = data->nfs_server.port; + server->acregmin = ctx->acregmin * HZ; + server->acregmax = ctx->acregmax * HZ; + server->acdirmin = ctx->acdirmin * HZ; + server->acdirmax = ctx->acdirmax * HZ; + server->port = ctx->nfs_server.port; return nfs_init_server_rpcclient(server, &timeparms, - data->selected_flavor); + ctx->selected_flavor); } /* * Create a version 4 volume record * - keyed on server and FSID */ -/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh)*/ -struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +struct nfs_server *nfs4_create_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_server *server; bool auth_probe; int error; @@ -1125,14 +1123,14 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, server->cred = get_cred(current_cred()); - auth_probe = mount_info->parsed->auth_info.flavor_len < 1; + auth_probe = ctx->auth_info.flavor_len < 1; /* set up the general RPC client */ - error = nfs4_init_server(server, mount_info->parsed); + error = nfs4_init_server(server, fc); if (error < 0) goto error; - error = nfs4_server_common_setup(server, mount_info->mntfh, auth_probe); + error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe); if (error < 0) goto error; @@ -1146,9 +1144,9 @@ error: /* * Create an NFS4 referral server record */ -struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *mntfh) +struct nfs_server *nfs4_create_referral_server(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_client *parent_client; struct nfs_server *server, *parent_server; bool auth_probe; @@ -1158,7 +1156,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (!server) return ERR_PTR(-ENOMEM); - parent_server = NFS_SB(data->sb); + parent_server = NFS_SB(ctx->clone_data.sb); parent_client = parent_server->nfs_client; server->cred = get_cred(parent_server->cred); @@ -1168,10 +1166,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, /* Get a client representation */ #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) - rpc_set_port(data->addr, NFS_RDMA_PORT); - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, + rpc_set_port(&ctx->nfs_server.address, NFS_RDMA_PORT); + error = nfs4_set_client(server, + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_RDMA, parent_server->client->cl_timeout, @@ -1182,10 +1181,11 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, goto init_server; #endif /* IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) */ - rpc_set_port(data->addr, NFS_PORT); - error = nfs4_set_client(server, data->hostname, - data->addr, - data->addrlen, + rpc_set_port(&ctx->nfs_server.address, NFS_PORT); + error = nfs4_set_client(server, + ctx->nfs_server.hostname, + &ctx->nfs_server.address, + ctx->nfs_server.addrlen, parent_client->cl_ipaddr, XPRT_TRANSPORT_TCP, parent_server->client->cl_timeout, @@ -1198,13 +1198,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, #if IS_ENABLED(CONFIG_SUNRPC_XPRT_RDMA) init_server: #endif - error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor); + error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, + ctx->selected_flavor); if (error < 0) goto error; auth_probe = parent_server->auth_info.flavor_len < 1; - error = nfs4_server_common_setup(server, mntfh, auth_probe); + error = nfs4_server_common_setup(server, ctx->mntfh, auth_probe); if (error < 0) goto error; diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 620de905cba9..be4eb720d5b6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -7,6 +7,7 @@ #include <linux/fs.h> #include <linux/file.h> #include <linux/falloc.h> +#include <linux/mount.h> #include <linux/nfs_fs.h> #include "delegation.h" #include "internal.h" diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 2e460c33ae48..84026e7b8a5f 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -8,6 +8,7 @@ * NFSv4 namespace */ +#include <linux/module.h> #include <linux/dcache.h> #include <linux/mount.h> #include <linux/namei.h> @@ -21,37 +22,64 @@ #include <linux/inet.h> #include "internal.h" #include "nfs4_fs.h" +#include "nfs.h" #include "dns_resolve.h" #define NFSDBG_FACILITY NFSDBG_VFS /* + * Work out the length that an NFSv4 path would render to as a standard posix + * path, with a leading slash but no terminating slash. + */ +static ssize_t nfs4_pathname_len(const struct nfs4_pathname *pathname) +{ + ssize_t len = 0; + int i; + + for (i = 0; i < pathname->ncomponents; i++) { + const struct nfs4_string *component = &pathname->components[i]; + + if (component->len > NAME_MAX) + goto too_long; + len += 1 + component->len; /* Adding "/foo" */ + if (len > PATH_MAX) + goto too_long; + } + return len; + +too_long: + return -ENAMETOOLONG; +} + +/* * Convert the NFSv4 pathname components into a standard posix path. - * - * Note that the resulting string will be placed at the end of the buffer */ -static inline char *nfs4_pathname_string(const struct nfs4_pathname *pathname, - char *buffer, ssize_t buflen) +static char *nfs4_pathname_string(const struct nfs4_pathname *pathname, + unsigned short *_len) { - char *end = buffer + buflen; - int n; + ssize_t len; + char *buf, *p; + int i; + + len = nfs4_pathname_len(pathname); + if (len < 0) + return ERR_PTR(len); + *_len = len; + + p = buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < pathname->ncomponents; i++) { + const struct nfs4_string *component = &pathname->components[i]; - *--end = '\0'; - buflen--; - - n = pathname->ncomponents; - while (--n >= 0) { - const struct nfs4_string *component = &pathname->components[n]; - buflen -= component->len + 1; - if (buflen < 0) - goto Elong; - end -= component->len; - memcpy(end, component->data, component->len); - *--end = '/'; + *p++ = '/'; + memcpy(p, component->data, component->len); + p += component->len; } - return end; -Elong: - return ERR_PTR(-ENAMETOOLONG); + + *p = 0; + return buf; } /* @@ -100,21 +128,36 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) */ static int nfs4_validate_fspath(struct dentry *dentry, const struct nfs4_fs_locations *locations, - char *page, char *page2) + struct nfs_fs_context *ctx) { - const char *path, *fs_path; + const char *path; + char *fs_path; + unsigned short len; + char *buf; + int n; - path = nfs4_path(dentry, page, PAGE_SIZE); - if (IS_ERR(path)) + buf = kmalloc(4096, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + path = nfs4_path(dentry, buf, 4096); + if (IS_ERR(path)) { + kfree(buf); return PTR_ERR(path); + } - fs_path = nfs4_pathname_string(&locations->fs_path, page2, PAGE_SIZE); - if (IS_ERR(fs_path)) + fs_path = nfs4_pathname_string(&locations->fs_path, &len); + if (IS_ERR(fs_path)) { + kfree(buf); return PTR_ERR(fs_path); + } - if (strncmp(path, fs_path, strlen(fs_path)) != 0) { + n = strncmp(path, fs_path, len); + kfree(buf); + kfree(fs_path); + if (n != 0) { dprintk("%s: path %s does not begin with fsroot %s\n", - __func__, path, fs_path); + __func__, path, ctx->nfs_server.export_path); return -ENOENT; } @@ -236,55 +279,77 @@ out: return new; } -static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, - char *page, char *page2, - const struct nfs4_fs_location *location) +static int try_location(struct fs_context *fc, + const struct nfs4_fs_location *location) { - const size_t addr_bufsize = sizeof(struct sockaddr_storage); - struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client); - struct vfsmount *mnt = ERR_PTR(-ENOENT); - char *mnt_path; - unsigned int maxbuflen; - unsigned int s; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + unsigned int len, s; + char *export_path, *source, *p; + int ret = -ENOENT; + + /* Allocate a buffer big enough to hold any of the hostnames plus a + * terminating char and also a buffer big enough to hold the hostname + * plus a colon plus the path. + */ + len = 0; + for (s = 0; s < location->nservers; s++) { + const struct nfs4_string *buf = &location->servers[s]; + if (buf->len > len) + len = buf->len; + } - mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); - if (IS_ERR(mnt_path)) - return ERR_CAST(mnt_path); - mountdata->mnt_path = mnt_path; - maxbuflen = mnt_path - 1 - page2; + kfree(ctx->nfs_server.hostname); + ctx->nfs_server.hostname = kmalloc(len + 1, GFP_KERNEL); + if (!ctx->nfs_server.hostname) + return -ENOMEM; - mountdata->addr = kmalloc(addr_bufsize, GFP_KERNEL); - if (mountdata->addr == NULL) - return ERR_PTR(-ENOMEM); + export_path = nfs4_pathname_string(&location->rootpath, + &ctx->nfs_server.export_path_len); + if (IS_ERR(export_path)) + return PTR_ERR(export_path); + + ctx->nfs_server.export_path = export_path; + + source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1, + GFP_KERNEL); + if (!source) + return -ENOMEM; + kfree(fc->source); + fc->source = source; for (s = 0; s < location->nservers; s++) { const struct nfs4_string *buf = &location->servers[s]; - if (buf->len <= 0 || buf->len >= maxbuflen) - continue; - if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) continue; - mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, net); - if (mountdata->addrlen == 0) + ctx->nfs_server.addrlen = + nfs_parse_server_name(buf->data, buf->len, + &ctx->nfs_server.address, + sizeof(ctx->nfs_server._address), + fc->net_ns); + if (ctx->nfs_server.addrlen == 0) continue; - memcpy(page2, buf->data, buf->len); - page2[buf->len] = '\0'; - mountdata->hostname = page2; + rpc_set_port(&ctx->nfs_server.address, NFS_PORT); - snprintf(page, PAGE_SIZE, "%s:%s", - mountdata->hostname, - mountdata->mnt_path); + memcpy(ctx->nfs_server.hostname, buf->data, buf->len); + ctx->nfs_server.hostname[buf->len] = '\0'; - mnt = vfs_submount(mountdata->dentry, &nfs4_referral_fs_type, page, mountdata); - if (!IS_ERR(mnt)) - break; + p = source; + memcpy(p, buf->data, buf->len); + p += buf->len; + *p++ = ':'; + memcpy(p, ctx->nfs_server.export_path, ctx->nfs_server.export_path_len); + p += ctx->nfs_server.export_path_len; + *p = 0; + + ret = nfs4_get_referral_tree(fc); + if (ret == 0) + return 0; } - kfree(mountdata->addr); - return mnt; + + return ret; } /** @@ -293,38 +358,23 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, * @locations: array of NFSv4 server location information * */ -static struct vfsmount *nfs_follow_referral(struct dentry *dentry, - const struct nfs4_fs_locations *locations) +static int nfs_follow_referral(struct fs_context *fc, + const struct nfs4_fs_locations *locations) { - struct vfsmount *mnt = ERR_PTR(-ENOENT); - struct nfs_clone_mount mountdata = { - .sb = dentry->d_sb, - .dentry = dentry, - .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor, - }; - char *page = NULL, *page2 = NULL; + struct nfs_fs_context *ctx = nfs_fc2context(fc); int loc, error; if (locations == NULL || locations->nlocations <= 0) - goto out; - - dprintk("%s: referral at %pd2\n", __func__, dentry); - - page = (char *) __get_free_page(GFP_USER); - if (!page) - goto out; + return -ENOENT; - page2 = (char *) __get_free_page(GFP_USER); - if (!page2) - goto out; + dprintk("%s: referral at %pd2\n", __func__, ctx->clone_data.dentry); /* Ensure fs path is a prefix of current dentry path */ - error = nfs4_validate_fspath(dentry, locations, page, page2); - if (error < 0) { - mnt = ERR_PTR(error); - goto out; - } + error = nfs4_validate_fspath(ctx->clone_data.dentry, locations, ctx); + if (error < 0) + return error; + error = -ENOENT; for (loc = 0; loc < locations->nlocations; loc++) { const struct nfs4_fs_location *location = &locations->locations[loc]; @@ -332,15 +382,12 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry, location->rootpath.ncomponents == 0) continue; - mnt = try_location(&mountdata, page, page2, location); - if (!IS_ERR(mnt)) - break; + error = try_location(fc, location); + if (error == 0) + return 0; } -out: - free_page((unsigned long) page); - free_page((unsigned long) page2); - return mnt; + return error; } /* @@ -348,71 +395,72 @@ out: * @dentry - dentry of referral * */ -static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) +static int nfs_do_refmount(struct fs_context *fc, struct rpc_clnt *client) { - struct vfsmount *mnt = ERR_PTR(-ENOMEM); - struct dentry *parent; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry, *parent; struct nfs4_fs_locations *fs_locations = NULL; struct page *page; - int err; + int err = -ENOMEM; /* BUG_ON(IS_ROOT(dentry)); */ page = alloc_page(GFP_KERNEL); - if (page == NULL) - return mnt; + if (!page) + return -ENOMEM; fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (fs_locations == NULL) + if (!fs_locations) goto out_free; /* Get locations */ - mnt = ERR_PTR(-ENOENT); - + dentry = ctx->clone_data.dentry; parent = dget_parent(dentry); dprintk("%s: getting locations for %pd2\n", __func__, dentry); err = nfs4_proc_fs_locations(client, d_inode(parent), &dentry->d_name, fs_locations, page); dput(parent); - if (err != 0 || - fs_locations->nlocations <= 0 || + if (err != 0) + goto out_free_2; + + err = -ENOENT; + if (fs_locations->nlocations <= 0 || fs_locations->fs_path.ncomponents <= 0) - goto out_free; + goto out_free_2; - mnt = nfs_follow_referral(dentry, fs_locations); + err = nfs_follow_referral(fc, fs_locations); +out_free_2: + kfree(fs_locations); out_free: __free_page(page); - kfree(fs_locations); - return mnt; + return err; } -struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, - struct nfs_fh *fh, struct nfs_fattr *fattr) +int nfs4_submount(struct fs_context *fc, struct nfs_server *server) { - rpc_authflavor_t flavor = server->client->cl_auth->au_flavor; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct dentry *dentry = ctx->clone_data.dentry; struct dentry *parent = dget_parent(dentry); struct inode *dir = d_inode(parent); - const struct qstr *name = &dentry->d_name; struct rpc_clnt *client; - struct vfsmount *mnt; + int ret; /* Look it up again to get its attributes and sec flavor */ - client = nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); + client = nfs4_proc_lookup_mountpoint(dir, dentry, ctx->mntfh, + ctx->clone_data.fattr); dput(parent); if (IS_ERR(client)) - return ERR_CAST(client); + return PTR_ERR(client); - if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { - mnt = nfs_do_refmount(client, dentry); - goto out; + ctx->selected_flavor = client->cl_auth->au_flavor; + if (ctx->clone_data.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { + ret = nfs_do_refmount(fc, client); + } else { + ret = nfs_do_submount(fc); } - if (client->cl_auth->au_flavor != flavor) - flavor = client->cl_auth->au_flavor; - mnt = nfs_do_submount(dentry, fh, fattr, flavor); -out: rpc_shutdown_client(client); - return mnt; + return ret; } /* @@ -453,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, rpc_set_port(sap, NFS_PORT); error = -ENOMEM; - hostname = kstrndup(buf->data, buf->len, GFP_KERNEL); + hostname = kmemdup_nul(buf->data, buf->len, GFP_KERNEL); if (hostname == NULL) break; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 76d37161409a..95d07a3dc5d1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1097,11 +1097,12 @@ static int nfs4_call_sync_custom(struct rpc_task_setup *task_setup) return ret; } -static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, - struct nfs_server *server, - struct rpc_message *msg, - struct nfs4_sequence_args *args, - struct nfs4_sequence_res *res) +static int nfs4_do_call_sync(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res, + unsigned short task_flags) { struct nfs_client *clp = server->nfs_client; struct nfs4_call_sync_data data = { @@ -1113,12 +1114,23 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, .rpc_client = clnt, .rpc_message = msg, .callback_ops = clp->cl_mvops->call_sync_ops, - .callback_data = &data + .callback_data = &data, + .flags = task_flags, }; return nfs4_call_sync_custom(&task_setup); } +static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, + struct nfs_server *server, + struct rpc_message *msg, + struct nfs4_sequence_args *args, + struct nfs4_sequence_res *res) +{ + return nfs4_do_call_sync(clnt, server, msg, args, res, 0); +} + + int nfs4_call_sync(struct rpc_clnt *clnt, struct nfs_server *server, struct rpc_message *msg, @@ -3187,6 +3199,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } + if (status == -NFS4ERR_EXPIRED) { + nfs4_schedule_lease_recovery(server->nfs_client); + exception.retry = 1; + continue; + } if (status == -EAGAIN) { /* We must have found a delegation */ exception.retry = 1; @@ -3239,6 +3256,8 @@ static int _nfs4_do_setattr(struct inode *inode, nfs_put_lock_context(l_ctx); if (status == -EIO) return -EBADF; + else if (status == -EAGAIN) + goto zero_stateid; } else { zero_stateid: nfs4_stateid_copy(&arg->stateid, &zero_stateid); @@ -4064,11 +4083,18 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); nfs_fattr_init(fattr); - return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + return nfs4_do_call_sync(server->client, server, &msg, + &args.seq_args, &res.seq_res, task_flags); } int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, @@ -4156,7 +4182,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, + struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_server *server = NFS_SERVER(dir); @@ -4164,7 +4190,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct nfs4_lookup_arg args = { .bitmask = server->attr_bitmask, .dir_fh = NFS_FH(dir), - .name = name, + .name = &dentry->d_name, }; struct nfs4_lookup_res res = { .server = server, @@ -4177,13 +4203,20 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; args.bitmask = nfs4_bitmask(server, label); nfs_fattr_init(fattr); - dprintk("NFS call lookup %s\n", name->name); - status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); + dprintk("NFS call lookup %pd2\n", dentry); + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); + status = nfs4_do_call_sync(clnt, server, &msg, + &args.seq_args, &res.seq_res, task_flags); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -4197,16 +4230,17 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) } static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, - const struct qstr *name, struct nfs_fh *fhandle, + struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs4_exception exception = { .interruptible = true, }; struct rpc_clnt *client = *clnt; + const struct qstr *name = &dentry->d_name; int err; do { - err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr, label); + err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label); trace_nfs4_lookup(dir, name, err); switch (err) { case -NFS4ERR_BADNAME: @@ -4241,14 +4275,14 @@ out: return err; } -static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name, +static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { int status; struct rpc_clnt *client = NFS_CLIENT(dir); - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, label); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label); if (client != NFS_CLIENT(dir)) { rpc_shutdown_client(client); nfs_fixup_secinfo_attributes(fattr); @@ -4257,13 +4291,13 @@ static int nfs4_proc_lookup(struct inode *dir, const struct qstr *name, } struct rpc_clnt * -nfs4_proc_lookup_mountpoint(struct inode *dir, const struct qstr *name, +nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct rpc_clnt *client = NFS_CLIENT(dir); int status; - status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL); + status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL); if (status < 0) return ERR_PTR(status); return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client; @@ -5019,16 +5053,13 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str struct nfs4_exception exception = { .interruptible = true, }; - unsigned long now = jiffies; int err; do { err = _nfs4_do_fsinfo(server, fhandle, fsinfo); trace_nfs4_fsinfo(server, fhandle, fsinfo->fattr, err); if (err == 0) { - nfs4_set_lease_period(server->nfs_client, - fsinfo->lease_time * HZ, - now); + nfs4_set_lease_period(server->nfs_client, fsinfo->lease_time * HZ); break; } err = nfs4_handle_exception(server, err, &exception); @@ -5582,10 +5613,9 @@ out: */ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) { - struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, }; + struct page **pages; struct nfs_getaclargs args = { .fh = NFS_FH(inode), - .acl_pages = pages, .acl_len = buflen, }; struct nfs_getaclres res = { @@ -5596,11 +5626,19 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu .rpc_argp = &args, .rpc_resp = &res, }; - unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; + unsigned int npages; int ret = -ENOMEM, i; + struct nfs_server *server = NFS_SERVER(inode); - if (npages > ARRAY_SIZE(pages)) - return -ERANGE; + if (buflen == 0) + buflen = server->rsize; + + npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1; + pages = kmalloc_array(npages, sizeof(struct page *), GFP_NOFS); + if (!pages) + return -ENOMEM; + + args.acl_pages = pages; for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); @@ -5646,6 +5684,7 @@ out_free: __free_page(pages[i]); if (res.acl_scratch) __free_page(res.acl_scratch); + kfree(pages); return ret; } @@ -6084,6 +6123,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, .callback_data = &setclientid, .flags = RPC_TASK_TIMEOUT | RPC_TASK_NO_ROUND_ROBIN, }; + unsigned long now = jiffies; int status; /* nfs_client_id4 */ @@ -6116,6 +6156,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } + + if (status == 0) + do_renew_lease(clp, now); out: trace_nfs4_setclientid(clp, status); dprintk("NFS reply setclientid: %d\n", status); @@ -6859,7 +6902,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_ case -NFS4ERR_STALE_STATEID: lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; nfs4_schedule_lease_recovery(server->nfs_client); - }; + } } static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type) @@ -8203,6 +8246,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre struct rpc_task *task; struct nfs41_exchange_id_args *argp; struct nfs41_exchange_id_res *resp; + unsigned long now = jiffies; int status; task = nfs4_run_exchange_id(clp, cred, sp4_how, NULL); @@ -8223,6 +8267,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre if (status != 0) goto out; + do_renew_lease(clp, now); + clp->cl_clientid = resp->clientid; clp->cl_exchange_flags = resp->flags; clp->cl_seqid = resp->seqid; @@ -8626,7 +8672,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp, case -EACCES: case -EAGAIN: goto out; - }; + } clp->cl_seqid++; if (!status) { @@ -10001,7 +10047,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .file_ops = &nfs4_file_operations, .getroot = nfs4_proc_get_root, .submount = nfs4_submount, - .try_mount = nfs4_try_mount, + .try_get_tree = nfs4_try_get_tree, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, .lookup = nfs4_proc_lookup, diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index 6ea431b067dd..ff876dda7f06 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -138,15 +138,12 @@ nfs4_kill_renewd(struct nfs_client *clp) * * @clp: pointer to nfs_client * @lease: new value for lease period - * @lastrenewed: time at which lease was last renewed */ void nfs4_set_lease_period(struct nfs_client *clp, - unsigned long lease, - unsigned long lastrenewed) + unsigned long lease) { spin_lock(&clp->cl_lock); clp->cl_lease_time = lease; - clp->cl_last_renewal = lastrenewed; spin_unlock(&clp->cl_lock); /* Cap maximum reconnect timeout at 1/2 lease period */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 34552329233d..f7723d221945 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -92,17 +92,15 @@ static int nfs4_setup_state_renewal(struct nfs_client *clp) { int status; struct nfs_fsinfo fsinfo; - unsigned long now; if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) { nfs4_schedule_state_renewal(clp); return 0; } - now = jiffies; status = nfs4_proc_get_lease_time(clp, &fsinfo); if (status == 0) { - nfs4_set_lease_period(clp, fsinfo.lease_time * HZ, now); + nfs4_set_lease_period(clp, fsinfo.lease_time * HZ); nfs4_schedule_state_renewal(clp); } @@ -766,6 +764,7 @@ void nfs4_put_open_state(struct nfs4_state *state) list_del(&state->open_states); spin_unlock(&inode->i_lock); spin_unlock(&owner->so_lock); + nfs4_inode_return_delegation_on_close(inode); iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); @@ -1135,7 +1134,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid) case -NFS4ERR_MOVED: /* Non-seqid mutating errors */ return; - }; + } /* * Note: no locking needed as we are guaranteed to be first * on the sequence list diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 2c9cbade561a..1475f932d7da 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -4,6 +4,7 @@ */ #include <linux/init.h> #include <linux/module.h> +#include <linux/mount.h> #include <linux/nfs4_mount.h> #include <linux/nfs_fs.h> #include "delegation.h" @@ -18,36 +19,6 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc); static void nfs4_evict_inode(struct inode *inode); -static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); -static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -static struct file_system_type nfs4_remote_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; - -static struct file_system_type nfs4_remote_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_remote_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; - -struct file_system_type nfs4_referral_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs4_referral_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; static const struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, @@ -61,16 +32,15 @@ static const struct super_operations nfs4_sops = { .show_devname = nfs_show_devname, .show_path = nfs_show_path, .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, }; struct nfs_subversion nfs_v4 = { - .owner = THIS_MODULE, - .nfs_fs = &nfs4_fs_type, - .rpc_vers = &nfs_version4, - .rpc_ops = &nfs_v4_clientops, - .sops = &nfs4_sops, - .xattr = nfs4_xattr_handlers, + .owner = THIS_MODULE, + .nfs_fs = &nfs4_fs_type, + .rpc_vers = &nfs_version4, + .rpc_ops = &nfs_v4_clientops, + .sops = &nfs4_sops, + .xattr = nfs4_xattr_handlers, }; static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -101,53 +71,6 @@ static void nfs4_evict_inode(struct inode *inode) nfs_clear_inode(inode); } -/* - * Get the superblock for the NFS4 root partition - */ -static struct dentry * -nfs4_remote_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *info) -{ - struct nfs_mount_info *mount_info = info; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - mount_info->set_security = nfs_set_sb_security; - - /* Get a volume representation */ - server = nfs4_create_server(mount_info, &nfs_v4); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; - } - - mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4); - -out: - return mntroot; -} - -static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, - int flags, void *data, const char *hostname) -{ - struct vfsmount *root_mnt; - char *root_devname; - size_t len; - - len = strlen(hostname) + 5; - root_devname = kmalloc(len, GFP_KERNEL); - if (root_devname == NULL) - return ERR_PTR(-ENOMEM); - /* Does hostname needs to be enclosed in brackets? */ - if (strchr(hostname, ':')) - snprintf(root_devname, len, "[%s]:/", hostname); - else - snprintf(root_devname, len, "%s:/", hostname); - root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); - kfree(root_devname); - return root_mnt; -} - struct nfs_referral_count { struct list_head list; const struct task_struct *task; @@ -214,111 +137,125 @@ static void nfs_referral_loop_unprotect(void) kfree(p); } -static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, - const char *export_path) +static int do_nfs4_mount(struct nfs_server *server, + struct fs_context *fc, + const char *hostname, + const char *export_path) { + struct nfs_fs_context *root_ctx; + struct fs_context *root_fc; + struct vfsmount *root_mnt; struct dentry *dentry; - int err; + size_t len; + int ret; - if (IS_ERR(root_mnt)) - return ERR_CAST(root_mnt); + struct fs_parameter param = { + .key = "source", + .type = fs_value_is_string, + .dirfd = -1, + }; - err = nfs_referral_loop_protect(); - if (err) { - mntput(root_mnt); - return ERR_PTR(err); + if (IS_ERR(server)) + return PTR_ERR(server); + + root_fc = vfs_dup_fs_context(fc); + if (IS_ERR(root_fc)) { + nfs_free_server(server); + return PTR_ERR(root_fc); } + kfree(root_fc->source); + root_fc->source = NULL; - dentry = mount_subtree(root_mnt, export_path); - nfs_referral_loop_unprotect(); + root_ctx = nfs_fc2context(root_fc); + root_ctx->internal = true; + root_ctx->server = server; + /* We leave export_path unset as it's not used to find the root. */ - return dentry; -} + len = strlen(hostname) + 5; + param.string = kmalloc(len, GFP_KERNEL); + if (param.string == NULL) { + put_fs_context(root_fc); + return -ENOMEM; + } -struct dentry *nfs4_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) -{ - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; - struct nfs_parsed_mount_data *data = mount_info->parsed; + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + param.size = snprintf(param.string, len, "[%s]:/", hostname); + else + param.size = snprintf(param.string, len, "%s:/", hostname); + ret = vfs_parse_fs_param(root_fc, ¶m); + kfree(param.string); + if (ret < 0) { + put_fs_context(root_fc); + return ret; + } + root_mnt = fc_mount(root_fc); + put_fs_context(root_fc); + + if (IS_ERR(root_mnt)) + return PTR_ERR(root_mnt); - dfprintk(MOUNT, "--> nfs4_try_mount()\n"); + ret = nfs_referral_loop_protect(); + if (ret) { + mntput(root_mnt); + return ret; + } - export_path = data->nfs_server.export_path; - data->nfs_server.export_path = "/"; - root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info, - data->nfs_server.hostname); - data->nfs_server.export_path = export_path; + dentry = mount_subtree(root_mnt, export_path); + nfs_referral_loop_unprotect(); - res = nfs_follow_remote_path(root_mnt, export_path); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); - return res; + fc->root = dentry; + return 0; } -static struct dentry * -nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) +int nfs4_try_get_tree(struct fs_context *fc) { - struct nfs_mount_info mount_info = { - .fill_super = nfs_fill_super, - .set_security = nfs_clone_sb_security, - .cloned = raw_data, - }; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - - dprintk("--> nfs4_referral_get_sb()\n"); + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err; - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.cloned == NULL || mount_info.mntfh == NULL) - goto out; + dfprintk(MOUNT, "--> nfs4_try_get_tree()\n"); - /* create a new volume representation */ - server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh); - if (IS_ERR(server)) { - mntroot = ERR_CAST(server); - goto out; + /* We create a mount for the server's root, walk to the requested + * location and then create another mount for that. + */ + err= do_nfs4_mount(nfs4_create_server(fc), + fc, ctx->nfs_server.hostname, + ctx->nfs_server.export_path); + if (err) { + nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + dfprintk(MOUNT, "<-- nfs4_try_get_tree() = %d [error]\n", err); + } else { + dfprintk(MOUNT, "<-- nfs4_try_get_tree() = 0\n"); } - - mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4); -out: - nfs_free_fhandle(mount_info.mntfh); - return mntroot; + return err; } /* * Create an NFS4 server record on referral traversal */ -static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +int nfs4_get_referral_tree(struct fs_context *fc) { - struct nfs_clone_mount *data = raw_data; - char *export_path; - struct vfsmount *root_mnt; - struct dentry *res; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + int err; dprintk("--> nfs4_referral_mount()\n"); - export_path = data->mnt_path; - data->mnt_path = "/"; - - root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type, - flags, data, data->hostname); - data->mnt_path = export_path; - - res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %d%s\n", - PTR_ERR_OR_ZERO(res), - IS_ERR(res) ? " [error]" : ""); - return res; + /* create a new volume representation */ + err = do_nfs4_mount(nfs4_create_referral_server(fc), + fc, ctx->nfs_server.hostname, + ctx->nfs_server.export_path); + if (err) { + nfs_errorf(fc, "NFS4: Couldn't follow remote path"); + dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = %d [error]\n", err); + } else { + dfprintk(MOUNT, "<-- nfs4_get_referral_tree() = 0\n"); + } + return err; } - static int __init init_nfs_v4(void) { int err; diff --git a/fs/nfs/nfs4trace.c b/fs/nfs/nfs4trace.c index 1a8f376b3f73..d9ac556bebcf 100644 --- a/fs/nfs/nfs4trace.c +++ b/fs/nfs/nfs4trace.c @@ -24,4 +24,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_done); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_read_pagelist); EXPORT_TRACEPOINT_SYMBOL_GPL(pnfs_mds_fallback_write_pagelist); + +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_read_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_write_error); +EXPORT_TRACEPOINT_SYMBOL_GPL(ff_layout_commit_error); #endif diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index e60b6fbd5ada..1e97e5e04cb4 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -155,6 +155,9 @@ TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED); TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE); TRACE_DEFINE_ENUM(NFS4ERR_XDEV); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS); +TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS); + #define show_nfsv4_errors(error) \ __print_symbolic(error, \ { NFS4_OK, "OK" }, \ @@ -305,7 +308,10 @@ TRACE_DEFINE_ENUM(NFS4ERR_XDEV); { NFS4ERR_WRONGSEC, "WRONGSEC" }, \ { NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \ { NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \ - { NFS4ERR_XDEV, "XDEV" }) + { NFS4ERR_XDEV, "XDEV" }, \ + /* ***** Internal to Linux NFS client ***** */ \ + { NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \ + { NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" }) #define show_open_flags(flags) \ __print_flags(flags, "|", \ @@ -352,7 +358,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(dstaddr, clp->cl_hostname); ), @@ -432,7 +438,8 @@ TRACE_EVENT(nfs4_sequence_done, __entry->target_highest_slotid = res->sr_target_highest_slotid; __entry->status_flags = res->sr_status_flags; - __entry->error = res->sr_status; + __entry->error = res->sr_status < 0 ? + -res->sr_status : 0; ), TP_printk( "error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u " @@ -640,7 +647,7 @@ TRACE_EVENT(nfs4_state_mgr_failed, ), TP_fast_assign( - __entry->error = status; + __entry->error = status < 0 ? -status : 0; __entry->state = clp->cl_state; __assign_str(hostname, clp->cl_hostname); __assign_str(section, section); @@ -659,7 +666,7 @@ TRACE_EVENT(nfs4_xdr_status, TP_PROTO( const struct xdr_stream *xdr, u32 op, - int error + u32 error ), TP_ARGS(xdr, op, error), @@ -691,6 +698,41 @@ TRACE_EVENT(nfs4_xdr_status, ) ); +DECLARE_EVENT_CLASS(nfs4_cb_error_class, + TP_PROTO( + __be32 xid, + u32 cb_ident + ), + + TP_ARGS(xid, cb_ident), + + TP_STRUCT__entry( + __field(u32, xid) + __field(u32, cbident) + ), + + TP_fast_assign( + __entry->xid = be32_to_cpu(xid); + __entry->cbident = cb_ident; + ), + + TP_printk( + "xid=0x%08x cb_ident=0x%08x", + __entry->xid, __entry->cbident + ) +); + +#define DEFINE_CB_ERROR_EVENT(name) \ + DEFINE_EVENT(nfs4_cb_error_class, nfs_cb_##name, \ + TP_PROTO( \ + __be32 xid, \ + u32 cb_ident \ + ), \ + TP_ARGS(xid, cb_ident)) + +DEFINE_CB_ERROR_EVENT(no_clp); +DEFINE_CB_ERROR_EVENT(badprinc); + DECLARE_EVENT_CLASS(nfs4_open_event, TP_PROTO( const struct nfs_open_context *ctx, @@ -849,7 +891,7 @@ TRACE_EVENT(nfs4_close, __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); __entry->fmode = (__force unsigned int)state->state; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid.seqid); __entry->stateid_hash = @@ -914,7 +956,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -986,7 +1028,7 @@ TRACE_EVENT(nfs4_set_lock, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->cmd = cmd; __entry->type = request->fl_type; __entry->start = request->fl_start; @@ -1164,7 +1206,7 @@ TRACE_EVENT(nfs4_delegreturn_exit, TP_fast_assign( __entry->dev = res->server->s_dev; __entry->fhandle = nfs_fhandle_hash(args->fhandle); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(args->stateid->seqid); __entry->stateid_hash = @@ -1204,7 +1246,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event, TP_fast_assign( const struct inode *inode = state->inode; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); @@ -1306,7 +1348,7 @@ TRACE_EVENT(nfs4_lookupp, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = NFS_FILEID(inode); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1342,7 +1384,7 @@ TRACE_EVENT(nfs4_rename, __entry->dev = olddir->i_sb->s_dev; __entry->olddir = NFS_FILEID(olddir); __entry->newdir = NFS_FILEID(newdir); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __assign_str(oldname, oldname->name); __assign_str(newname, newname->name); ), @@ -1433,7 +1475,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event, __entry->dev = inode->i_sb->s_dev; __entry->fileid = NFS_FILEID(inode); __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(stateid->seqid); __entry->stateid_hash = @@ -1489,7 +1531,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event, __entry->valid = fattr->valid; __entry->fhandle = nfs_fhandle_hash(fhandle); __entry->fileid = (fattr->valid & NFS_ATTR_FATTR_FILEID) ? fattr->fileid : 0; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( @@ -1536,7 +1578,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1593,7 +1635,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event, ), TP_fast_assign( - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->fhandle = nfs_fhandle_hash(fhandle); if (!IS_ERR_OR_NULL(inode)) { __entry->fileid = NFS_FILEID(inode); @@ -1694,7 +1736,8 @@ DECLARE_EVENT_CLASS(nfs4_read_event, __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) - __field(size_t, count) + __field(u32, arg_count) + __field(u32, res_count) __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) @@ -1702,13 +1745,18 @@ DECLARE_EVENT_CLASS(nfs4_read_event, TP_fast_assign( const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = hdr->args.offset; - __entry->count = hdr->args.count; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); @@ -1718,14 +1766,14 @@ DECLARE_EVENT_CLASS(nfs4_read_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, - __entry->count, + __entry->arg_count, __entry->res_count, __entry->stateid_seq, __entry->stateid_hash ) ); @@ -1754,7 +1802,8 @@ DECLARE_EVENT_CLASS(nfs4_write_event, __field(u32, fhandle) __field(u64, fileid) __field(loff_t, offset) - __field(size_t, count) + __field(u32, arg_count) + __field(u32, res_count) __field(unsigned long, error) __field(int, stateid_seq) __field(u32, stateid_hash) @@ -1762,13 +1811,18 @@ DECLARE_EVENT_CLASS(nfs4_write_event, TP_fast_assign( const struct inode *inode = hdr->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; const struct nfs4_state *state = hdr->args.context->state; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = hdr->args.offset; - __entry->count = hdr->args.count; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); @@ -1778,14 +1832,14 @@ DECLARE_EVENT_CLASS(nfs4_write_event, TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu stateid=%d:0x%08x", + "offset=%lld count=%u res=%u stateid=%d:0x%08x", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, (long long)__entry->offset, - __entry->count, + __entry->arg_count, __entry->res_count, __entry->stateid_seq, __entry->stateid_hash ) ); @@ -1814,24 +1868,28 @@ DECLARE_EVENT_CLASS(nfs4_commit_event, __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) - __field(loff_t, offset) - __field(size_t, count) __field(unsigned long, error) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; + const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; + __entry->dev = inode->i_sb->s_dev; - __entry->fileid = NFS_FILEID(inode); - __entry->fhandle = nfs_fhandle_hash(NFS_FH(inode)); + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = data->args.offset; __entry->count = data->args.count; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; ), TP_printk( "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zu", + "offset=%lld count=%u", -__entry->error, show_nfsv4_errors(__entry->error), MAJOR(__entry->dev), MINOR(__entry->dev), @@ -1896,7 +1954,7 @@ TRACE_EVENT(nfs4_layoutget, __entry->iomode = args->iomode; __entry->offset = args->offset; __entry->count = args->length; - __entry->error = error; + __entry->error = error < 0 ? -error : 0; __entry->stateid_seq = be32_to_cpu(state->stateid.seqid); __entry->stateid_hash = @@ -2094,6 +2152,115 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); +DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, + TP_PROTO( + const struct nfs_pgio_header *hdr + ), + + TP_ARGS(hdr), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __field(int, stateid_seq) + __field(u32, stateid_hash) + __string(dstaddr, hdr->ds_clp ? + rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + ), + + TP_fast_assign( + const struct inode *inode = hdr->inode; + + __entry->error = hdr->res.op_status; + __entry->fhandle = nfs_fhandle_hash(hdr->args.fh); + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; + __entry->stateid_seq = + be32_to_cpu(hdr->args.stateid.seqid); + __entry->stateid_hash = + nfs_stateid_hash(&hdr->args.stateid); + __assign_str(dstaddr, hdr->ds_clp ? + rpc_peeraddr2str(hdr->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown"); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s", + -__entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count, + __entry->stateid_seq, __entry->stateid_hash, + __get_str(dstaddr) + ) +); + +#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \ + DEFINE_EVENT(nfs4_flexfiles_io_event, name, \ + TP_PROTO( \ + const struct nfs_pgio_header *hdr \ + ), \ + TP_ARGS(hdr)) +DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error); +DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error); + +TRACE_EVENT(ff_layout_commit_error, + TP_PROTO( + const struct nfs_commit_data *data + ), + + TP_ARGS(data), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __string(dstaddr, data->ds_clp ? + rpc_peeraddr2str(data->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown") + ), + + TP_fast_assign( + const struct inode *inode = data->inode; + + __entry->error = data->res.op_status; + __entry->fhandle = nfs_fhandle_hash(data->args.fh); + __entry->fileid = NFS_FILEID(inode); + __entry->dev = inode->i_sb->s_dev; + __entry->offset = data->args.offset; + __entry->count = data->args.count; + __assign_str(dstaddr, data->ds_clp ? + rpc_peeraddr2str(data->ds_clp->cl_rpcclient, + RPC_DISPLAY_ADDR) : "unknown"); + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "offset=%llu count=%u dstaddr=%s", + -__entry->error, + show_nfsv4_errors(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->offset, __entry->count, + __get_str(dstaddr) + ) +); + + #endif /* CONFIG_NFS_V4_1 */ #endif /* _TRACE_NFS4_H */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 728d88b6a698..47817ef0aadb 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1061,7 +1061,7 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve static __be32 * xdr_encode_nfstime4(__be32 *p, const struct timespec64 *t) { - p = xdr_encode_hyper(p, (__s64)t->tv_sec); + p = xdr_encode_hyper(p, t->tv_sec); *p++ = cpu_to_be32(t->tv_nsec); return p; } @@ -4313,11 +4313,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { + struct nfs_writeverf *verf = res->verf; int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_write_verifier(xdr, &res->verf->verifier); + status = decode_write_verifier(xdr, &verf->verifier); + if (!status) + verf->committed = NFS_FILE_SYNC; return status; } diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 2a82dcce5fc1..a9588d19a5ae 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -198,7 +198,66 @@ DEFINE_NFS_INODE_EVENT_DONE(nfs_writeback_inode_exit); DEFINE_NFS_INODE_EVENT(nfs_fsync_enter); DEFINE_NFS_INODE_EVENT_DONE(nfs_fsync_exit); DEFINE_NFS_INODE_EVENT(nfs_access_enter); -DEFINE_NFS_INODE_EVENT_DONE(nfs_access_exit); + +TRACE_EVENT(nfs_access_exit, + TP_PROTO( + const struct inode *inode, + unsigned int mask, + unsigned int permitted, + int error + ), + + TP_ARGS(inode, mask, permitted, error), + + TP_STRUCT__entry( + __field(unsigned long, error) + __field(dev_t, dev) + __field(u32, fhandle) + __field(unsigned char, type) + __field(u64, fileid) + __field(u64, version) + __field(loff_t, size) + __field(unsigned long, nfsi_flags) + __field(unsigned long, cache_validity) + __field(unsigned int, mask) + __field(unsigned int, permitted) + ), + + TP_fast_assign( + const struct nfs_inode *nfsi = NFS_I(inode); + __entry->error = error < 0 ? -error : 0; + __entry->dev = inode->i_sb->s_dev; + __entry->fileid = nfsi->fileid; + __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->type = nfs_umode_to_dtype(inode->i_mode); + __entry->version = inode_peek_iversion_raw(inode); + __entry->size = i_size_read(inode); + __entry->nfsi_flags = nfsi->flags; + __entry->cache_validity = nfsi->cache_validity; + __entry->mask = mask; + __entry->permitted = permitted; + ), + + TP_printk( + "error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x " + "type=%u (%s) version=%llu size=%lld " + "cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) " + "mask=0x%x permitted=0x%x", + -__entry->error, nfs_show_status(__entry->error), + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->fileid, + __entry->fhandle, + __entry->type, + nfs_show_file_type(__entry->type), + (unsigned long long)__entry->version, + (long long)__entry->size, + __entry->cache_validity, + nfs_show_cache_validity(__entry->cache_validity), + __entry->nfsi_flags, + nfs_show_nfsi_flags(__entry->nfsi_flags), + __entry->mask, __entry->permitted + ) +); TRACE_DEFINE_ENUM(LOOKUP_FOLLOW); TRACE_DEFINE_ENUM(LOOKUP_DIRECTORY); @@ -818,75 +877,85 @@ TRACE_EVENT(nfs_sillyrename_unlink, TRACE_EVENT(nfs_initiate_read, TP_PROTO( - const struct inode *inode, - loff_t offset, unsigned long count + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, offset, count), + TP_ARGS(hdr), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; - __entry->offset = offset; - __entry->count = count; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu", + "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count + (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_readpage_done, TP_PROTO( - const struct inode *inode, - int status, loff_t offset, bool eof + const struct rpc_task *task, + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, status, offset, eof), + TP_ARGS(task, hdr), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(bool, eof) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(bool, eof) + __field(int, status) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); - - __entry->status = status; - __entry->offset = offset; - __entry->eof = eof; + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->eof = hdr->res.eof; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d%s", + "offset=%lld count=%u res=%u status=%d%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, __entry->eof ? " eof" : "" ) ); @@ -903,90 +972,144 @@ TRACE_DEFINE_ENUM(NFS_FILE_SYNC); TRACE_EVENT(nfs_initiate_write, TP_PROTO( - const struct inode *inode, - loff_t offset, unsigned long count, - enum nfs3_stable_how stable + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, offset, count, stable), + TP_ARGS(hdr), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) - __field(enum nfs3_stable_how, stable) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) + __field(enum nfs3_stable_how, stable) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; - __entry->offset = offset; - __entry->count = count; - __entry->stable = stable; + __entry->offset = hdr->args.offset; + __entry->count = hdr->args.count; + __entry->stable = hdr->args.stable; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu stable=%s", + "offset=%lld count=%u stable=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count, + (long long)__entry->offset, __entry->count, nfs_show_stable(__entry->stable) ) ); TRACE_EVENT(nfs_writeback_done, TP_PROTO( - const struct inode *inode, - int status, - loff_t offset, - struct nfs_writeverf *writeverf + const struct rpc_task *task, + const struct nfs_pgio_header *hdr ), - TP_ARGS(inode, status, offset, writeverf), + TP_ARGS(task, hdr), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(enum nfs3_stable_how, stable) - __field(unsigned long long, verifier) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, arg_count) + __field(u32, res_count) + __field(int, status) + __field(enum nfs3_stable_how, stable) + __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( + const struct inode *inode = hdr->inode; const struct nfs_inode *nfsi = NFS_I(inode); - - __entry->status = status; - __entry->offset = offset; - __entry->stable = writeverf->committed; - memcpy(&__entry->verifier, &writeverf->verifier, - sizeof(__entry->verifier)); + const struct nfs_fh *fh = hdr->args.fh ? + hdr->args.fh : &nfsi->fh; + const struct nfs_writeverf *verf = hdr->res.verf; + + __entry->status = task->tk_status; + __entry->offset = hdr->args.offset; + __entry->arg_count = hdr->args.count; + __entry->res_count = hdr->res.count; + __entry->stable = verf->committed; + memcpy(__entry->verifier, + &verf->verifier, + NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d stable=%s " - "verifier 0x%016llx", + "offset=%lld count=%u res=%u status=%d stable=%s " + "verifier=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, + (long long)__entry->offset, __entry->arg_count, + __entry->res_count, __entry->status, nfs_show_stable(__entry->stable), - __entry->verifier + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) ) ); +DECLARE_EVENT_CLASS(nfs_page_error_class, + TP_PROTO( + const struct nfs_page *req, + int error + ), + + TP_ARGS(req, error), + + TP_STRUCT__entry( + __field(const void *, req) + __field(pgoff_t, index) + __field(unsigned int, offset) + __field(unsigned int, pgbase) + __field(unsigned int, bytes) + __field(int, error) + ), + + TP_fast_assign( + __entry->req = req; + __entry->index = req->wb_index; + __entry->offset = req->wb_offset; + __entry->pgbase = req->wb_pgbase; + __entry->bytes = req->wb_bytes; + __entry->error = error; + ), + + TP_printk( + "req=%p index=%lu offset=%u pgbase=%u bytes=%u error=%d", + __entry->req, __entry->index, __entry->offset, + __entry->pgbase, __entry->bytes, __entry->error + ) +); + +#define DEFINE_NFS_PAGEERR_EVENT(name) \ + DEFINE_EVENT(nfs_page_error_class, name, \ + TP_PROTO( \ + const struct nfs_page *req, \ + int error \ + ), \ + TP_ARGS(req, error)) + +DEFINE_NFS_PAGEERR_EVENT(nfs_write_error); +DEFINE_NFS_PAGEERR_EVENT(nfs_comp_error); +DEFINE_NFS_PAGEERR_EVENT(nfs_commit_error); + TRACE_EVENT(nfs_initiate_commit, TP_PROTO( const struct nfs_commit_data *data @@ -995,71 +1118,81 @@ TRACE_EVENT(nfs_initiate_commit, TP_ARGS(data), TP_STRUCT__entry( - __field(loff_t, offset) - __field(unsigned long, count) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(u32, count) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; __entry->offset = data->args.offset; __entry->count = data->args.count; __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%lu", + "offset=%lld count=%u", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->count + (long long)__entry->offset, __entry->count ) ); TRACE_EVENT(nfs_commit_done, TP_PROTO( + const struct rpc_task *task, const struct nfs_commit_data *data ), - TP_ARGS(data), + TP_ARGS(task, data), TP_STRUCT__entry( - __field(int, status) - __field(loff_t, offset) - __field(unsigned long long, verifier) __field(dev_t, dev) __field(u32, fhandle) __field(u64, fileid) + __field(loff_t, offset) + __field(int, status) + __field(enum nfs3_stable_how, stable) + __array(char, verifier, NFS4_VERIFIER_SIZE) ), TP_fast_assign( const struct inode *inode = data->inode; const struct nfs_inode *nfsi = NFS_I(inode); + const struct nfs_fh *fh = data->args.fh ? + data->args.fh : &nfsi->fh; + const struct nfs_writeverf *verf = data->res.verf; - __entry->status = data->res.op_status; + __entry->status = task->tk_status; __entry->offset = data->args.offset; - memcpy(&__entry->verifier, &data->verf.verifier, - sizeof(__entry->verifier)); + __entry->stable = verf->committed; + memcpy(__entry->verifier, + &verf->verifier, + NFS4_VERIFIER_SIZE); __entry->dev = inode->i_sb->s_dev; __entry->fileid = nfsi->fileid; - __entry->fhandle = nfs_fhandle_hash(&nfsi->fh); + __entry->fhandle = nfs_fhandle_hash(fh); ), TP_printk( "fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld status=%d verifier 0x%016llx", + "offset=%lld status=%d stable=%s verifier=%s", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, - __entry->offset, __entry->status, - __entry->verifier + (long long)__entry->offset, __entry->status, + nfs_show_stable(__entry->stable), + __print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE) ) ); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index cec3070ab577..542ea8dfd1bc 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1425,7 +1425,7 @@ retry: /* lo ref dropped in pnfs_roc_release() */ layoutreturn = pnfs_prepare_layoutreturn(lo, &stateid, &iomode); /* If the creds don't match, we can't compound the layoutreturn */ - if (!layoutreturn || cred != lo->plh_lc_cred) + if (!layoutreturn || cred_fscmp(cred, lo->plh_lc_cred) != 0) goto out_noroc; roc = layoutreturn; @@ -1998,8 +1998,6 @@ lookup_again: trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg, PNFS_UPDATE_LAYOUT_INVALID_OPEN); - if (status != -EAGAIN) - goto out_unlock; spin_unlock(&ino->i_lock); nfs4_schedule_stateid_recovery(server, ctx->state); pnfs_clear_first_layoutget(lo); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f8a38065c7e4..0fafdadc9c8d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -79,6 +79,10 @@ enum pnfs_try_status { PNFS_TRY_AGAIN = 2, }; +/* error codes for internal use */ +#define NFS4ERR_RESET_TO_MDS 12001 +#define NFS4ERR_RESET_TO_PNFS 12002 + #ifdef CONFIG_NFS_V4_1 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" @@ -91,10 +95,6 @@ enum pnfs_try_status { #define NFS4_DEF_DS_RETRANS 5 #define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ) -/* error codes for internal use */ -#define NFS4ERR_RESET_TO_MDS 12001 -#define NFS4ERR_RESET_TO_PNFS 12002 - enum { NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 82af4809b869..8b37e7f8e789 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -31,12 +31,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); /* Fake up some data that will cause nfs_commit_release to retry the writes. */ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) { - struct nfs_page *first = nfs_list_entry(data->pages.next); + struct nfs_writeverf *verf = data->res.verf; data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ + memset(&verf->verifier, 0, sizeof(verf->verifier)); + verf->committed = NFS_UNSTABLE; } EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 0f7288b94633..15c865cc837f 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -108,10 +108,15 @@ nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, .rpc_resp = fattr, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) + task_flags |= RPC_TASK_TIMEOUT; dprintk("NFS call getattr\n"); nfs_fattr_init(fattr); - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, task_flags); dprintk("NFS reply getattr: %d\n", status); return status; } @@ -147,14 +152,14 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs_proc_lookup(struct inode *dir, const struct qstr *name, +nfs_proc_lookup(struct inode *dir, struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label) { struct nfs_diropargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len + .name = dentry->d_name.name, + .len = dentry->d_name.len }; struct nfs_diropok res = { .fh = fhandle, @@ -166,10 +171,15 @@ nfs_proc_lookup(struct inode *dir, const struct qstr *name, .rpc_resp = &res, }; int status; + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; - dprintk("NFS call lookup %s\n", name->name); + dprintk("NFS call lookup %pd2\n", dentry); nfs_fattr_init(fattr); - status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); dprintk("NFS reply lookup: %d\n", status); return status; } @@ -710,7 +720,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = { .file_ops = &nfs_file_operations, .getroot = nfs_proc_get_root, .submount = nfs_submount, - .try_mount = nfs_try_mount, + .try_get_tree = nfs_try_get_tree, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, .lookup = nfs_proc_lookup, diff --git a/fs/nfs/read.c b/fs/nfs/read.c index cfe0b586eadd..34bb9add2302 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -214,7 +214,7 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr, task_setup_data->flags |= swap_flags; rpc_ops->read_setup(hdr, msg); - trace_nfs_initiate_read(inode, hdr->io_start, hdr->good_bytes); + trace_nfs_initiate_read(hdr); } static void @@ -247,8 +247,7 @@ static int nfs_readpage_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count); - trace_nfs_readpage_done(inode, task->tk_status, - hdr->args.offset, hdr->res.eof); + trace_nfs_readpage_done(task, hdr); if (task->tk_status == -ESTALE) { set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); @@ -282,6 +281,8 @@ static void nfs_readpage_retry(struct rpc_task *task, argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; + resp->count = 0; + resp->eof = 0; rpc_restart_call_prepare(task); } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 8d8d04bb9d64..dada09b391c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -69,250 +69,6 @@ #include "nfs.h" #define NFSDBG_FACILITY NFSDBG_VFS -#define NFS_TEXT_DATA 1 - -#if IS_ENABLED(CONFIG_NFS_V3) -#define NFS_DEFAULT_VERSION 3 -#else -#define NFS_DEFAULT_VERSION 2 -#endif - -#define NFS_MAX_CONNECTIONS 16 - -enum { - /* Mount options that take no arguments */ - Opt_soft, Opt_softerr, Opt_hard, - Opt_posix, Opt_noposix, - Opt_cto, Opt_nocto, - Opt_ac, Opt_noac, - Opt_lock, Opt_nolock, - Opt_udp, Opt_tcp, Opt_rdma, - Opt_acl, Opt_noacl, - Opt_rdirplus, Opt_nordirplus, - Opt_sharecache, Opt_nosharecache, - Opt_resvport, Opt_noresvport, - Opt_fscache, Opt_nofscache, - Opt_migration, Opt_nomigration, - - /* Mount options that take integer arguments */ - Opt_port, - Opt_rsize, Opt_wsize, Opt_bsize, - Opt_timeo, Opt_retrans, - Opt_acregmin, Opt_acregmax, - Opt_acdirmin, Opt_acdirmax, - Opt_actimeo, - Opt_namelen, - Opt_mountport, - Opt_mountvers, - Opt_minorversion, - - /* Mount options that take string arguments */ - Opt_nfsvers, - Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, - Opt_addr, Opt_mountaddr, Opt_clientaddr, - Opt_nconnect, - Opt_lookupcache, - Opt_fscache_uniq, - Opt_local_lock, - - /* Special mount options */ - Opt_userspace, Opt_deprecated, Opt_sloppy, - - Opt_err -}; - -static const match_table_t nfs_mount_option_tokens = { - { Opt_userspace, "bg" }, - { Opt_userspace, "fg" }, - { Opt_userspace, "retry=%s" }, - - { Opt_sloppy, "sloppy" }, - - { Opt_soft, "soft" }, - { Opt_softerr, "softerr" }, - { Opt_hard, "hard" }, - { Opt_deprecated, "intr" }, - { Opt_deprecated, "nointr" }, - { Opt_posix, "posix" }, - { Opt_noposix, "noposix" }, - { Opt_cto, "cto" }, - { Opt_nocto, "nocto" }, - { Opt_ac, "ac" }, - { Opt_noac, "noac" }, - { Opt_lock, "lock" }, - { Opt_nolock, "nolock" }, - { Opt_udp, "udp" }, - { Opt_tcp, "tcp" }, - { Opt_rdma, "rdma" }, - { Opt_acl, "acl" }, - { Opt_noacl, "noacl" }, - { Opt_rdirplus, "rdirplus" }, - { Opt_nordirplus, "nordirplus" }, - { Opt_sharecache, "sharecache" }, - { Opt_nosharecache, "nosharecache" }, - { Opt_resvport, "resvport" }, - { Opt_noresvport, "noresvport" }, - { Opt_fscache, "fsc" }, - { Opt_nofscache, "nofsc" }, - { Opt_migration, "migration" }, - { Opt_nomigration, "nomigration" }, - - { Opt_port, "port=%s" }, - { Opt_rsize, "rsize=%s" }, - { Opt_wsize, "wsize=%s" }, - { Opt_bsize, "bsize=%s" }, - { Opt_timeo, "timeo=%s" }, - { Opt_retrans, "retrans=%s" }, - { Opt_acregmin, "acregmin=%s" }, - { Opt_acregmax, "acregmax=%s" }, - { Opt_acdirmin, "acdirmin=%s" }, - { Opt_acdirmax, "acdirmax=%s" }, - { Opt_actimeo, "actimeo=%s" }, - { Opt_namelen, "namlen=%s" }, - { Opt_mountport, "mountport=%s" }, - { Opt_mountvers, "mountvers=%s" }, - { Opt_minorversion, "minorversion=%s" }, - - { Opt_nfsvers, "nfsvers=%s" }, - { Opt_nfsvers, "vers=%s" }, - - { Opt_sec, "sec=%s" }, - { Opt_proto, "proto=%s" }, - { Opt_mountproto, "mountproto=%s" }, - { Opt_addr, "addr=%s" }, - { Opt_clientaddr, "clientaddr=%s" }, - { Opt_mounthost, "mounthost=%s" }, - { Opt_mountaddr, "mountaddr=%s" }, - - { Opt_nconnect, "nconnect=%s" }, - - { Opt_lookupcache, "lookupcache=%s" }, - { Opt_fscache_uniq, "fsc=%s" }, - { Opt_local_lock, "local_lock=%s" }, - - /* The following needs to be listed after all other options */ - { Opt_nfsvers, "v%s" }, - - { Opt_err, NULL } -}; - -enum { - Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, - Opt_xprt_rdma6, - - Opt_xprt_err -}; - -static const match_table_t nfs_xprt_protocol_tokens = { - { Opt_xprt_udp, "udp" }, - { Opt_xprt_udp6, "udp6" }, - { Opt_xprt_tcp, "tcp" }, - { Opt_xprt_tcp6, "tcp6" }, - { Opt_xprt_rdma, "rdma" }, - { Opt_xprt_rdma6, "rdma6" }, - - { Opt_xprt_err, NULL } -}; - -enum { - Opt_sec_none, Opt_sec_sys, - Opt_sec_krb5, Opt_sec_krb5i, Opt_sec_krb5p, - Opt_sec_lkey, Opt_sec_lkeyi, Opt_sec_lkeyp, - Opt_sec_spkm, Opt_sec_spkmi, Opt_sec_spkmp, - - Opt_sec_err -}; - -static const match_table_t nfs_secflavor_tokens = { - { Opt_sec_none, "none" }, - { Opt_sec_none, "null" }, - { Opt_sec_sys, "sys" }, - - { Opt_sec_krb5, "krb5" }, - { Opt_sec_krb5i, "krb5i" }, - { Opt_sec_krb5p, "krb5p" }, - - { Opt_sec_lkey, "lkey" }, - { Opt_sec_lkeyi, "lkeyi" }, - { Opt_sec_lkeyp, "lkeyp" }, - - { Opt_sec_spkm, "spkm3" }, - { Opt_sec_spkmi, "spkm3i" }, - { Opt_sec_spkmp, "spkm3p" }, - - { Opt_sec_err, NULL } -}; - -enum { - Opt_lookupcache_all, Opt_lookupcache_positive, - Opt_lookupcache_none, - - Opt_lookupcache_err -}; - -static match_table_t nfs_lookupcache_tokens = { - { Opt_lookupcache_all, "all" }, - { Opt_lookupcache_positive, "pos" }, - { Opt_lookupcache_positive, "positive" }, - { Opt_lookupcache_none, "none" }, - - { Opt_lookupcache_err, NULL } -}; - -enum { - Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, - Opt_local_lock_none, - - Opt_local_lock_err -}; - -static match_table_t nfs_local_lock_tokens = { - { Opt_local_lock_all, "all" }, - { Opt_local_lock_flock, "flock" }, - { Opt_local_lock_posix, "posix" }, - { Opt_local_lock_none, "none" }, - - { Opt_local_lock_err, NULL } -}; - -enum { - Opt_vers_2, Opt_vers_3, Opt_vers_4, Opt_vers_4_0, - Opt_vers_4_1, Opt_vers_4_2, - - Opt_vers_err -}; - -static match_table_t nfs_vers_tokens = { - { Opt_vers_2, "2" }, - { Opt_vers_3, "3" }, - { Opt_vers_4, "4" }, - { Opt_vers_4_0, "4.0" }, - { Opt_vers_4_1, "4.1" }, - { Opt_vers_4_2, "4.2" }, - - { Opt_vers_err, NULL } -}; - -static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data); - -struct file_system_type nfs_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; -MODULE_ALIAS_FS("nfs"); -EXPORT_SYMBOL_GPL(nfs_fs_type); - -struct file_system_type nfs_xdev_fs_type = { - .owner = THIS_MODULE, - .name = "nfs", - .mount = nfs_xdev_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, @@ -326,26 +82,10 @@ const struct super_operations nfs_sops = { .show_devname = nfs_show_devname, .show_path = nfs_show_path, .show_stats = nfs_show_stats, - .remount_fs = nfs_remount, }; EXPORT_SYMBOL_GPL(nfs_sops); #if IS_ENABLED(CONFIG_NFS_V4) -static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *); -static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, const char *dev_name); - -struct file_system_type nfs4_fs_type = { - .owner = THIS_MODULE, - .name = "nfs4", - .mount = nfs_fs_mount, - .kill_sb = nfs_kill_super, - .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA, -}; -MODULE_ALIAS_FS("nfs4"); -MODULE_ALIAS("nfs4"); -EXPORT_SYMBOL_GPL(nfs4_fs_type); - static int __init register_nfs4_fs(void) { return register_filesystem(&nfs4_fs_type); @@ -635,6 +375,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", "" }, { NFS_MOUNT_SOFTERR, ",softerr", "" }, + { NFS_MOUNT_SOFTREVAL, ",softreval", "" }, { NFS_MOUNT_POSIX, ",posix", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, @@ -931,141 +672,6 @@ void nfs_umount_begin(struct super_block *sb) } EXPORT_SYMBOL_GPL(nfs_umount_begin); -static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) -{ - struct nfs_parsed_mount_data *data; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (data) { - data->timeo = NFS_UNSPEC_TIMEO; - data->retrans = NFS_UNSPEC_RETRANS; - data->acregmin = NFS_DEF_ACREGMIN; - data->acregmax = NFS_DEF_ACREGMAX; - data->acdirmin = NFS_DEF_ACDIRMIN; - data->acdirmax = NFS_DEF_ACDIRMAX; - data->mount_server.port = NFS_UNSPEC_PORT; - data->nfs_server.port = NFS_UNSPEC_PORT; - data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->selected_flavor = RPC_AUTH_MAXFLAVOR; - data->minorversion = 0; - data->need_mount = true; - data->net = current->nsproxy->net_ns; - data->lsm_opts = NULL; - } - return data; -} - -static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data) -{ - if (data) { - kfree(data->client_address); - kfree(data->mount_server.hostname); - kfree(data->nfs_server.export_path); - kfree(data->nfs_server.hostname); - kfree(data->fscache_uniq); - security_free_mnt_opts(&data->lsm_opts); - kfree(data); - } -} - -/* - * Sanity-check a server address provided by the mount command. - * - * Address family must be initialized, and address must not be - * the ANY address for that family. - */ -static int nfs_verify_server_address(struct sockaddr *addr) -{ - switch (addr->sa_family) { - case AF_INET: { - struct sockaddr_in *sa = (struct sockaddr_in *)addr; - return sa->sin_addr.s_addr != htonl(INADDR_ANY); - } - case AF_INET6: { - struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr; - return !ipv6_addr_any(sa); - } - } - - dfprintk(MOUNT, "NFS: Invalid IP address specified\n"); - return 0; -} - -/* - * Select between a default port value and a user-specified port value. - * If a zero value is set, then autobind will be used. - */ -static void nfs_set_port(struct sockaddr *sap, int *port, - const unsigned short default_port) -{ - if (*port == NFS_UNSPEC_PORT) - *port = default_port; - - rpc_set_port(sap, *port); -} - -/* - * Sanity check the NFS transport protocol. - * - */ -static void nfs_validate_transport_protocol(struct nfs_parsed_mount_data *mnt) -{ - switch (mnt->nfs_server.protocol) { - case XPRT_TRANSPORT_UDP: - case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_RDMA: - break; - default: - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - } -} - -/* - * For text based NFSv2/v3 mounts, the mount protocol transport default - * settings should depend upon the specified NFS transport. - */ -static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) -{ - nfs_validate_transport_protocol(mnt); - - if (mnt->mount_server.protocol == XPRT_TRANSPORT_UDP || - mnt->mount_server.protocol == XPRT_TRANSPORT_TCP) - return; - switch (mnt->nfs_server.protocol) { - case XPRT_TRANSPORT_UDP: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; - break; - case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_RDMA: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; - } -} - -/* - * Add 'flavor' to 'auth_info' if not already present. - * Returns true if 'flavor' ends up in the list, false otherwise - */ -static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, - rpc_authflavor_t flavor) -{ - unsigned int i; - unsigned int max_flavor_len = ARRAY_SIZE(auth_info->flavors); - - /* make sure this flavor isn't already in the list */ - for (i = 0; i < auth_info->flavor_len; i++) { - if (flavor == auth_info->flavors[i]) - return true; - } - - if (auth_info->flavor_len + 1 >= max_flavor_len) { - dfprintk(MOUNT, "NFS: too many sec= flavors\n"); - return false; - } - - auth_info->flavors[auth_info->flavor_len++] = flavor; - return true; -} - /* * Return true if 'match' is in auth_info or auth_info is empty. * Return false otherwise. @@ -1087,633 +693,13 @@ bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, EXPORT_SYMBOL_GPL(nfs_auth_info_match); /* - * Parse the value of the 'sec=' option. - */ -static int nfs_parse_security_flavors(char *value, - struct nfs_parsed_mount_data *mnt) -{ - substring_t args[MAX_OPT_ARGS]; - rpc_authflavor_t pseudoflavor; - char *p; - - dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); - - while ((p = strsep(&value, ":")) != NULL) { - switch (match_token(p, nfs_secflavor_tokens, args)) { - case Opt_sec_none: - pseudoflavor = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - pseudoflavor = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - pseudoflavor = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - pseudoflavor = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - pseudoflavor = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - pseudoflavor = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - pseudoflavor = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - pseudoflavor = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - pseudoflavor = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - pseudoflavor = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - pseudoflavor = RPC_AUTH_GSS_SPKMP; - break; - default: - dfprintk(MOUNT, - "NFS: sec= option '%s' not recognized\n", p); - return 0; - } - - if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) - return 0; - } - - return 1; -} - -static int nfs_parse_version_string(char *string, - struct nfs_parsed_mount_data *mnt, - substring_t *args) -{ - mnt->flags &= ~NFS_MOUNT_VER3; - switch (match_token(string, nfs_vers_tokens, args)) { - case Opt_vers_2: - mnt->version = 2; - break; - case Opt_vers_3: - mnt->flags |= NFS_MOUNT_VER3; - mnt->version = 3; - break; - case Opt_vers_4: - /* Backward compatibility option. In future, - * the mount program should always supply - * a NFSv4 minor version number. - */ - mnt->version = 4; - break; - case Opt_vers_4_0: - mnt->version = 4; - mnt->minorversion = 0; - break; - case Opt_vers_4_1: - mnt->version = 4; - mnt->minorversion = 1; - break; - case Opt_vers_4_2: - mnt->version = 4; - mnt->minorversion = 2; - break; - default: - return 0; - } - return 1; -} - -static int nfs_get_option_str(substring_t args[], char **option) -{ - kfree(*option); - *option = match_strdup(args); - return !*option; -} - -static int nfs_get_option_ul(substring_t args[], unsigned long *option) -{ - int rc; - char *string; - - string = match_strdup(args); - if (string == NULL) - return -ENOMEM; - rc = kstrtoul(string, 10, option); - kfree(string); - - return rc; -} - -static int nfs_get_option_ul_bound(substring_t args[], unsigned long *option, - unsigned long l_bound, unsigned long u_bound) -{ - int ret; - - ret = nfs_get_option_ul(args, option); - if (ret != 0) - return ret; - if (*option < l_bound || *option > u_bound) - return -ERANGE; - return 0; -} - -/* - * Error-check and convert a string of mount options from user space into - * a data structure. The whole mount string is processed; bad options are - * skipped as they are encountered. If there were no errors, return 1; - * otherwise return 0 (zero). - */ -static int nfs_parse_mount_options(char *raw, - struct nfs_parsed_mount_data *mnt) -{ - char *p, *string; - int rc, sloppy = 0, invalid_option = 0; - unsigned short protofamily = AF_UNSPEC; - unsigned short mountfamily = AF_UNSPEC; - - if (!raw) { - dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); - return 1; - } - dfprintk(MOUNT, "NFS: nfs mount opts='%s'\n", raw); - - rc = security_sb_eat_lsm_opts(raw, &mnt->lsm_opts); - if (rc) - goto out_security_failure; - - while ((p = strsep(&raw, ",")) != NULL) { - substring_t args[MAX_OPT_ARGS]; - unsigned long option; - int token; - - if (!*p) - continue; - - dfprintk(MOUNT, "NFS: parsing nfs mount option '%s'\n", p); - - token = match_token(p, nfs_mount_option_tokens, args); - switch (token) { - - /* - * boolean options: foo/nofoo - */ - case Opt_soft: - mnt->flags |= NFS_MOUNT_SOFT; - mnt->flags &= ~NFS_MOUNT_SOFTERR; - break; - case Opt_softerr: - mnt->flags |= NFS_MOUNT_SOFTERR; - mnt->flags &= ~NFS_MOUNT_SOFT; - break; - case Opt_hard: - mnt->flags &= ~(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR); - break; - case Opt_posix: - mnt->flags |= NFS_MOUNT_POSIX; - break; - case Opt_noposix: - mnt->flags &= ~NFS_MOUNT_POSIX; - break; - case Opt_cto: - mnt->flags &= ~NFS_MOUNT_NOCTO; - break; - case Opt_nocto: - mnt->flags |= NFS_MOUNT_NOCTO; - break; - case Opt_ac: - mnt->flags &= ~NFS_MOUNT_NOAC; - break; - case Opt_noac: - mnt->flags |= NFS_MOUNT_NOAC; - break; - case Opt_lock: - mnt->flags &= ~NFS_MOUNT_NONLM; - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_nolock: - mnt->flags |= NFS_MOUNT_NONLM; - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_rdma: - mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(p); - break; - case Opt_acl: - mnt->flags &= ~NFS_MOUNT_NOACL; - break; - case Opt_noacl: - mnt->flags |= NFS_MOUNT_NOACL; - break; - case Opt_rdirplus: - mnt->flags &= ~NFS_MOUNT_NORDIRPLUS; - break; - case Opt_nordirplus: - mnt->flags |= NFS_MOUNT_NORDIRPLUS; - break; - case Opt_sharecache: - mnt->flags &= ~NFS_MOUNT_UNSHARED; - break; - case Opt_nosharecache: - mnt->flags |= NFS_MOUNT_UNSHARED; - break; - case Opt_resvport: - mnt->flags &= ~NFS_MOUNT_NORESVPORT; - break; - case Opt_noresvport: - mnt->flags |= NFS_MOUNT_NORESVPORT; - break; - case Opt_fscache: - mnt->options |= NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; - break; - case Opt_nofscache: - mnt->options &= ~NFS_OPTION_FSCACHE; - kfree(mnt->fscache_uniq); - mnt->fscache_uniq = NULL; - break; - case Opt_migration: - mnt->options |= NFS_OPTION_MIGRATION; - break; - case Opt_nomigration: - mnt->options &= ~NFS_OPTION_MIGRATION; - break; - - /* - * options that take numeric values - */ - case Opt_port: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - mnt->nfs_server.port = option; - break; - case Opt_rsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->rsize = option; - break; - case Opt_wsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->wsize = option; - break; - case Opt_bsize: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->bsize = option; - break; - case Opt_timeo: - if (nfs_get_option_ul_bound(args, &option, 1, INT_MAX)) - goto out_invalid_value; - mnt->timeo = option; - break; - case Opt_retrans: - if (nfs_get_option_ul_bound(args, &option, 0, INT_MAX)) - goto out_invalid_value; - mnt->retrans = option; - break; - case Opt_acregmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmin = option; - break; - case Opt_acregmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmax = option; - break; - case Opt_acdirmin: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acdirmin = option; - break; - case Opt_acdirmax: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acdirmax = option; - break; - case Opt_actimeo: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->acregmin = mnt->acregmax = - mnt->acdirmin = mnt->acdirmax = option; - break; - case Opt_namelen: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - mnt->namlen = option; - break; - case Opt_mountport: - if (nfs_get_option_ul(args, &option) || - option > USHRT_MAX) - goto out_invalid_value; - mnt->mount_server.port = option; - break; - case Opt_mountvers: - if (nfs_get_option_ul(args, &option) || - option < NFS_MNT_VERSION || - option > NFS_MNT3_VERSION) - goto out_invalid_value; - mnt->mount_server.version = option; - break; - case Opt_minorversion: - if (nfs_get_option_ul(args, &option)) - goto out_invalid_value; - if (option > NFS4_MAX_MINOR_VERSION) - goto out_invalid_value; - mnt->minorversion = option; - break; - - /* - * options that take text values - */ - case Opt_nfsvers: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_version_string(string, mnt, args); - kfree(string); - if (!rc) - goto out_invalid_value; - break; - case Opt_sec: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - rc = nfs_parse_security_flavors(string, mnt); - kfree(string); - if (!rc) { - dfprintk(MOUNT, "NFS: unrecognized " - "security flavor\n"); - return 0; - } - break; - case Opt_proto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); - - protofamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma6: - protofamily = AF_INET6; - /* fall through */ - case Opt_xprt_rdma: - /* vector side protocols to TCP */ - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(string); - break; - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - kfree(string); - return 0; - } - kfree(string); - break; - case Opt_mountproto: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_xprt_protocol_tokens, args); - kfree(string); - - mountfamily = AF_INET; - switch (token) { - case Opt_xprt_udp6: - mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_udp: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; - break; - case Opt_xprt_tcp6: - mountfamily = AF_INET6; - /* fall through */ - case Opt_xprt_tcp: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; - break; - case Opt_xprt_rdma: /* not used for side protocols */ - default: - dfprintk(MOUNT, "NFS: unrecognized " - "transport protocol\n"); - return 0; - } - break; - case Opt_addr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - mnt->nfs_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), - (struct sockaddr *) - &mnt->nfs_server.address, - sizeof(mnt->nfs_server.address)); - kfree(string); - if (mnt->nfs_server.addrlen == 0) - goto out_invalid_address; - break; - case Opt_clientaddr: - if (nfs_get_option_str(args, &mnt->client_address)) - goto out_nomem; - break; - case Opt_mounthost: - if (nfs_get_option_str(args, - &mnt->mount_server.hostname)) - goto out_nomem; - break; - case Opt_mountaddr: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - mnt->mount_server.addrlen = - rpc_pton(mnt->net, string, strlen(string), - (struct sockaddr *) - &mnt->mount_server.address, - sizeof(mnt->mount_server.address)); - kfree(string); - if (mnt->mount_server.addrlen == 0) - goto out_invalid_address; - break; - case Opt_nconnect: - if (nfs_get_option_ul_bound(args, &option, 1, NFS_MAX_CONNECTIONS)) - goto out_invalid_value; - mnt->nfs_server.nconnect = option; - break; - case Opt_lookupcache: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, - nfs_lookupcache_tokens, args); - kfree(string); - switch (token) { - case Opt_lookupcache_all: - mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); - break; - case Opt_lookupcache_positive: - mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; - break; - case Opt_lookupcache_none: - mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "lookupcache argument\n"); - return 0; - } - break; - case Opt_fscache_uniq: - if (nfs_get_option_str(args, &mnt->fscache_uniq)) - goto out_nomem; - mnt->options |= NFS_OPTION_FSCACHE; - break; - case Opt_local_lock: - string = match_strdup(args); - if (string == NULL) - goto out_nomem; - token = match_token(string, nfs_local_lock_tokens, - args); - kfree(string); - switch (token) { - case Opt_local_lock_all: - mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - case Opt_local_lock_flock: - mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; - break; - case Opt_local_lock_posix: - mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; - break; - case Opt_local_lock_none: - mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | - NFS_MOUNT_LOCAL_FCNTL); - break; - default: - dfprintk(MOUNT, "NFS: invalid " - "local_lock argument\n"); - return 0; - } - break; - - /* - * Special options - */ - case Opt_sloppy: - sloppy = 1; - dfprintk(MOUNT, "NFS: relaxing parsing rules\n"); - break; - case Opt_userspace: - case Opt_deprecated: - dfprintk(MOUNT, "NFS: ignoring mount option " - "'%s'\n", p); - break; - - default: - invalid_option = 1; - dfprintk(MOUNT, "NFS: unrecognized mount option " - "'%s'\n", p); - } - } - - if (!sloppy && invalid_option) - return 0; - - if (mnt->minorversion && mnt->version != 4) - goto out_minorversion_mismatch; - - if (mnt->options & NFS_OPTION_MIGRATION && - (mnt->version != 4 || mnt->minorversion != 0)) - goto out_migration_misuse; - - /* - * verify that any proto=/mountproto= options match the address - * families in the addr=/mountaddr= options. - */ - if (protofamily != AF_UNSPEC && - protofamily != mnt->nfs_server.address.ss_family) - goto out_proto_mismatch; - - if (mountfamily != AF_UNSPEC) { - if (mnt->mount_server.addrlen) { - if (mountfamily != mnt->mount_server.address.ss_family) - goto out_mountproto_mismatch; - } else { - if (mountfamily != mnt->nfs_server.address.ss_family) - goto out_mountproto_mismatch; - } - } - - return 1; - -out_mountproto_mismatch: - printk(KERN_INFO "NFS: mount server address does not match mountproto= " - "option\n"); - return 0; -out_proto_mismatch: - printk(KERN_INFO "NFS: server address does not match proto= option\n"); - return 0; -out_invalid_address: - printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); - return 0; -out_invalid_value: - printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p); - return 0; -out_minorversion_mismatch: - printk(KERN_INFO "NFS: mount option vers=%u does not support " - "minorversion=%u\n", mnt->version, mnt->minorversion); - return 0; -out_migration_misuse: - printk(KERN_INFO - "NFS: 'migration' not supported for this NFS version\n"); - return 0; -out_nomem: - printk(KERN_INFO "NFS: not enough memory to parse option\n"); - return 0; -out_security_failure: - printk(KERN_INFO "NFS: security options invalid: %d\n", rc); - return 0; -} - -/* - * Ensure that a specified authtype in args->auth_info is supported by - * the server. Returns 0 and sets args->selected_flavor if it's ok, and + * Ensure that a specified authtype in ctx->auth_info is supported by + * the server. Returns 0 and sets ctx->selected_flavor if it's ok, and * -EACCES if not. */ -static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, - rpc_authflavor_t *server_authlist, unsigned int count) +static int nfs_verify_authflavors(struct nfs_fs_context *ctx, + rpc_authflavor_t *server_authlist, + unsigned int count) { rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; bool found_auth_null = false; @@ -1734,7 +720,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, for (i = 0; i < count; i++) { flavor = server_authlist[i]; - if (nfs_auth_info_match(&args->auth_info, flavor)) + if (nfs_auth_info_match(&ctx->auth_info, flavor)) goto out; if (flavor == RPC_AUTH_NULL) @@ -1742,7 +728,7 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, } if (found_auth_null) { - flavor = args->auth_info.flavors[0]; + flavor = ctx->auth_info.flavors[0]; goto out; } @@ -1751,8 +737,8 @@ static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, return -EACCES; out: - args->selected_flavor = flavor; - dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor); + ctx->selected_flavor = flavor; + dfprintk(MOUNT, "NFS: using auth flavor %u\n", ctx->selected_flavor); return 0; } @@ -1760,50 +746,51 @@ out: * Use the remote server's MOUNT service to request the NFS file handle * corresponding to the provided path. */ -static int nfs_request_mount(struct nfs_parsed_mount_data *args, +static int nfs_request_mount(struct fs_context *fc, struct nfs_fh *root_fh, rpc_authflavor_t *server_authlist, unsigned int *server_authlist_len) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_mount_request request = { .sap = (struct sockaddr *) - &args->mount_server.address, - .dirpath = args->nfs_server.export_path, - .protocol = args->mount_server.protocol, + &ctx->mount_server.address, + .dirpath = ctx->nfs_server.export_path, + .protocol = ctx->mount_server.protocol, .fh = root_fh, - .noresvport = args->flags & NFS_MOUNT_NORESVPORT, + .noresvport = ctx->flags & NFS_MOUNT_NORESVPORT, .auth_flav_len = server_authlist_len, .auth_flavs = server_authlist, - .net = args->net, + .net = fc->net_ns, }; int status; - if (args->mount_server.version == 0) { - switch (args->version) { + if (ctx->mount_server.version == 0) { + switch (ctx->version) { default: - args->mount_server.version = NFS_MNT3_VERSION; + ctx->mount_server.version = NFS_MNT3_VERSION; break; case 2: - args->mount_server.version = NFS_MNT_VERSION; + ctx->mount_server.version = NFS_MNT_VERSION; } } - request.version = args->mount_server.version; + request.version = ctx->mount_server.version; - if (args->mount_server.hostname) - request.hostname = args->mount_server.hostname; + if (ctx->mount_server.hostname) + request.hostname = ctx->mount_server.hostname; else - request.hostname = args->nfs_server.hostname; + request.hostname = ctx->nfs_server.hostname; /* * Construct the mount server's address. */ - if (args->mount_server.address.ss_family == AF_UNSPEC) { - memcpy(request.sap, &args->nfs_server.address, - args->nfs_server.addrlen); - args->mount_server.addrlen = args->nfs_server.addrlen; + if (ctx->mount_server.address.sa_family == AF_UNSPEC) { + memcpy(request.sap, &ctx->nfs_server.address, + ctx->nfs_server.addrlen); + ctx->mount_server.addrlen = ctx->nfs_server.addrlen; } - request.salen = args->mount_server.addrlen; - nfs_set_port(request.sap, &args->mount_server.port, 0); + request.salen = ctx->mount_server.addrlen; + nfs_set_port(request.sap, &ctx->mount_server.port, 0); /* * Now ask the mount server to map our export path @@ -1819,20 +806,18 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args, return 0; } -static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +static struct nfs_server *nfs_try_mount_request(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); int status; unsigned int i; bool tried_auth_unix = false; bool auth_null_in_list = false; struct nfs_server *server = ERR_PTR(-EACCES); - struct nfs_parsed_mount_data *args = mount_info->parsed; rpc_authflavor_t authlist[NFS_MAX_SECFLAVORS]; unsigned int authlist_len = ARRAY_SIZE(authlist); - status = nfs_request_mount(args, mount_info->mntfh, authlist, - &authlist_len); + status = nfs_request_mount(fc, ctx->mntfh, authlist, &authlist_len); if (status) return ERR_PTR(status); @@ -1840,13 +825,13 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf * Was a sec= authflavor specified in the options? First, verify * whether the server supports it, and then just try to use it if so. */ - if (args->auth_info.flavor_len > 0) { - status = nfs_verify_authflavors(args, authlist, authlist_len); + if (ctx->auth_info.flavor_len > 0) { + status = nfs_verify_authflavors(ctx, authlist, authlist_len); dfprintk(MOUNT, "NFS: using auth flavor %u\n", - args->selected_flavor); + ctx->selected_flavor); if (status) return ERR_PTR(status); - return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + return ctx->nfs_mod->rpc_ops->create_server(fc); } /* @@ -1872,8 +857,8 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Fallthrough */ } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); - args->selected_flavor = flavor; - server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + ctx->selected_flavor = flavor; + server = ctx->nfs_mod->rpc_ops->create_server(fc); if (!IS_ERR(server)) return server; } @@ -1888,348 +873,23 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); - args->selected_flavor = RPC_AUTH_UNIX; - return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); + ctx->selected_flavor = RPC_AUTH_UNIX; + return ctx->nfs_mod->rpc_ops->create_server(fc); } -struct dentry *nfs_try_mount(int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +int nfs_try_get_tree(struct fs_context *fc) { - struct nfs_server *server; + struct nfs_fs_context *ctx = nfs_fc2context(fc); - if (mount_info->parsed->need_mount) - server = nfs_try_mount_request(mount_info, nfs_mod); + if (ctx->need_mount) + ctx->server = nfs_try_mount_request(fc); else - server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); - - if (IS_ERR(server)) - return ERR_CAST(server); - - return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod); -} -EXPORT_SYMBOL_GPL(nfs_try_mount); - -/* - * Split "dev_name" into "hostname:export_path". - * - * The leftmost colon demarks the split between the server's hostname - * and the export path. If the hostname starts with a left square - * bracket, then it may contain colons. - * - * Note: caller frees hostname and export path, even on error. - */ -static int nfs_parse_devname(const char *dev_name, - char **hostname, size_t maxnamlen, - char **export_path, size_t maxpathlen) -{ - size_t len; - char *end; - - if (unlikely(!dev_name || !*dev_name)) { - dfprintk(MOUNT, "NFS: device name not specified\n"); - return -EINVAL; - } - - /* Is the host name protected with square brakcets? */ - if (*dev_name == '[') { - end = strchr(++dev_name, ']'); - if (end == NULL || end[1] != ':') - goto out_bad_devname; - - len = end - dev_name; - end++; - } else { - char *comma; - - end = strchr(dev_name, ':'); - if (end == NULL) - goto out_bad_devname; - len = end - dev_name; - - /* kill possible hostname list: not supported */ - comma = strchr(dev_name, ','); - if (comma != NULL && comma < end) - len = comma - dev_name; - } - - if (len > maxnamlen) - goto out_hostname; - - /* N.B. caller will free nfs_server.hostname in all cases */ - *hostname = kstrndup(dev_name, len, GFP_KERNEL); - if (*hostname == NULL) - goto out_nomem; - len = strlen(++end); - if (len > maxpathlen) - goto out_path; - *export_path = kstrndup(end, len, GFP_KERNEL); - if (!*export_path) - goto out_nomem; - - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path); - return 0; - -out_bad_devname: - dfprintk(MOUNT, "NFS: device name not in host:path format\n"); - return -EINVAL; + ctx->server = ctx->nfs_mod->rpc_ops->create_server(fc); -out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); - return -ENOMEM; - -out_hostname: - dfprintk(MOUNT, "NFS: server hostname too long\n"); - return -ENAMETOOLONG; - -out_path: - dfprintk(MOUNT, "NFS: export pathname too long\n"); - return -ENAMETOOLONG; + return nfs_get_tree_common(fc); } +EXPORT_SYMBOL_GPL(nfs_try_get_tree); -/* - * Validate the NFS2/NFS3 mount data - * - fills in the mount root filehandle - * - * For option strings, user space handles the following behaviors: - * - * + DNS: mapping server host name to IP address ("addr=" option) - * - * + failure mode: how to behave if a mount request can't be handled - * immediately ("fg/bg" option) - * - * + retry: how often to retry a mount request ("retry=" option) - * - * + breaking back: trying proto=udp after proto=tcp, v2 after v3, - * mountproto=tcp after mountproto=udp, and so on - */ -static int nfs23_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - struct nfs_mount_data *data = (struct nfs_mount_data *)options; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; - - if (data == NULL) - goto out_no_data; - - args->version = NFS_DEFAULT_VERSION; - switch (data->version) { - case 1: - data->namlen = 0; /* fall through */ - case 2: - data->bsize = 0; /* fall through */ - case 3: - if (data->flags & NFS_MOUNT_VER3) - goto out_no_v3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - /* Turn off security negotiation */ - extra_flags |= NFS_MOUNT_SECFLAVOUR; - /* fall through */ - case 4: - if (data->flags & NFS_MOUNT_SECFLAVOUR) - goto out_no_sec; - /* fall through */ - case 5: - memset(data->context, 0, sizeof(data->context)); - /* fall through */ - case 6: - if (data->flags & NFS_MOUNT_VER3) { - if (data->root.size > NFS3_FHSIZE || data->root.size == 0) - goto out_invalid_fh; - mntfh->size = data->root.size; - args->version = 3; - } else { - mntfh->size = NFS2_FHSIZE; - args->version = 2; - } - - - memcpy(mntfh->data, data->root.data, mntfh->size); - if (mntfh->size < sizeof(mntfh->data)) - memset(mntfh->data + mntfh->size, 0, - sizeof(mntfh->data) - mntfh->size); - - /* - * Translate to nfs_parsed_mount_data, which nfs_fill_super - * can deal with. - */ - args->flags = data->flags & NFS_MOUNT_FLAGMASK; - args->flags |= extra_flags; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->need_mount = false; - - memcpy(sap, &data->addr, sizeof(data->addr)); - args->nfs_server.addrlen = sizeof(data->addr); - args->nfs_server.port = ntohs(data->addr.sin_port); - if (sap->sa_family != AF_INET || - !nfs_verify_server_address(sap)) - goto out_no_address; - - if (!(data->flags & NFS_MOUNT_TCP)) - args->nfs_server.protocol = XPRT_TRANSPORT_UDP; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); - args->namlen = data->namlen; - args->bsize = data->bsize; - - if (data->flags & NFS_MOUNT_SECFLAVOUR) - args->selected_flavor = data->pseudoflavor; - else - args->selected_flavor = RPC_AUTH_UNIX; - if (!args->nfs_server.hostname) - goto out_nomem; - - if (!(data->flags & NFS_MOUNT_NONLM)) - args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| - NFS_MOUNT_LOCAL_FCNTL); - else - args->flags |= (NFS_MOUNT_LOCAL_FLOCK| - NFS_MOUNT_LOCAL_FCNTL); - /* - * The legacy version 6 binary mount data from userspace has a - * field used only to transport selinux information into the - * the kernel. To continue to support that functionality we - * have a touch of selinux knowledge here in the NFS code. The - * userspace code converted context=blah to just blah so we are - * converting back to the full string selinux understands. - */ - if (data->context[0]){ -#ifdef CONFIG_SECURITY_SELINUX - int rc; - data->context[NFS_MAX_CONTEXT_LEN] = '\0'; - rc = security_add_mnt_opt("context", data->context, - strlen(data->context), &args->lsm_opts); - if (rc) - return rc; -#else - return -EINVAL; -#endif - } - - break; - default: - return NFS_TEXT_DATA; - } - - return 0; - -out_no_data: - dfprintk(MOUNT, "NFS: mount program didn't pass any mount data\n"); - return -EINVAL; - -out_no_v3: - dfprintk(MOUNT, "NFS: nfs_mount_data version %d does not support v3\n", - data->version); - return -EINVAL; - -out_no_sec: - dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n"); - return -EINVAL; - -out_nomem: - dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); - return -ENOMEM; - -out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; - -out_invalid_fh: - dfprintk(MOUNT, "NFS: invalid root filehandle\n"); - return -EINVAL; -} - -#if IS_ENABLED(CONFIG_NFS_V4) -static int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - if (fs_type == &nfs_fs_type) - return nfs23_validate_mount_data(options, args, mntfh, dev_name); - return nfs4_validate_mount_data(options, args, dev_name); -} -#else -static int nfs_validate_mount_data(struct file_system_type *fs_type, - void *options, - struct nfs_parsed_mount_data *args, - struct nfs_fh *mntfh, - const char *dev_name) -{ - return nfs23_validate_mount_data(options, args, mntfh, dev_name); -} -#endif - -static int nfs_validate_text_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) -{ - int port = 0; - int max_namelen = PAGE_SIZE; - int max_pathlen = NFS_MAXPATHLEN; - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - - if (nfs_parse_mount_options((char *)options, args) == 0) - return -EINVAL; - - if (!nfs_verify_server_address(sap)) - goto out_no_address; - - if (args->version == 4) { -#if IS_ENABLED(CONFIG_NFS_V4) - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) - port = NFS_RDMA_PORT; - else - port = NFS_PORT; - max_namelen = NFS4_MAXNAMLEN; - max_pathlen = NFS4_MAXPATHLEN; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) - goto out_invalid_transport_udp; - nfs4_validate_mount_flags(args); -#else - goto out_v4_not_compiled; -#endif /* CONFIG_NFS_V4 */ - } else { - nfs_set_mount_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_RDMA) - port = NFS_RDMA_PORT; - } - - nfs_set_port(sap, &args->nfs_server.port, port); - - return nfs_parse_devname(dev_name, - &args->nfs_server.hostname, - max_namelen, - &args->nfs_server.export_path, - max_pathlen); - -#if !IS_ENABLED(CONFIG_NFS_V4) -out_v4_not_compiled: - dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n"); - return -EPROTONOSUPPORT; -#else -out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; -#endif /* !CONFIG_NFS_V4 */ - -out_no_address: - dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); - return -EINVAL; -} #define NFS_REMOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \ | NFS_MOUNT_SECURE \ @@ -2246,39 +906,35 @@ out_no_address: static int nfs_compare_remount_data(struct nfs_server *nfss, - struct nfs_parsed_mount_data *data) -{ - if ((data->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || - data->rsize != nfss->rsize || - data->wsize != nfss->wsize || - data->version != nfss->nfs_client->rpc_ops->version || - data->minorversion != nfss->nfs_client->cl_minorversion || - data->retrans != nfss->client->cl_timeout->to_retries || - !nfs_auth_info_match(&data->auth_info, nfss->client->cl_auth->au_flavor) || - data->acregmin != nfss->acregmin / HZ || - data->acregmax != nfss->acregmax / HZ || - data->acdirmin != nfss->acdirmin / HZ || - data->acdirmax != nfss->acdirmax / HZ || - data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || - (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || - data->nfs_server.port != nfss->port || - data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || - !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address, + struct nfs_fs_context *ctx) +{ + if ((ctx->flags ^ nfss->flags) & NFS_REMOUNT_CMP_FLAGMASK || + ctx->rsize != nfss->rsize || + ctx->wsize != nfss->wsize || + ctx->version != nfss->nfs_client->rpc_ops->version || + ctx->minorversion != nfss->nfs_client->cl_minorversion || + ctx->retrans != nfss->client->cl_timeout->to_retries || + !nfs_auth_info_match(&ctx->auth_info, nfss->client->cl_auth->au_flavor) || + ctx->acregmin != nfss->acregmin / HZ || + ctx->acregmax != nfss->acregmax / HZ || + ctx->acdirmin != nfss->acdirmin / HZ || + ctx->acdirmax != nfss->acdirmax / HZ || + ctx->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || + (ctx->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) || + ctx->nfs_server.port != nfss->port || + ctx->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || + !rpc_cmp_addr((struct sockaddr *)&ctx->nfs_server.address, (struct sockaddr *)&nfss->nfs_client->cl_addr)) return -EINVAL; return 0; } -int -nfs_remount(struct super_block *sb, int *flags, char *raw_data) +int nfs_reconfigure(struct fs_context *fc) { - int error; + struct nfs_fs_context *ctx = nfs_fc2context(fc); + struct super_block *sb = fc->root->d_sb; struct nfs_server *nfss = sb->s_fs_info; - struct nfs_parsed_mount_data *data; - struct nfs_mount_data *options = (struct nfs_mount_data *)raw_data; - struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; - u32 nfsvers = nfss->nfs_client->rpc_ops->version; sync_filesystem(sb); @@ -2288,92 +944,38 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) * ones were explicitly specified. Fall back to legacy behavior and * just return success. */ - if ((nfsvers == 4 && (!options4 || options4->version == 1)) || - (nfsvers <= 3 && (!options || (options->version >= 1 && - options->version <= 6)))) + if (ctx->skip_reconfig_option_check) return 0; - data = nfs_alloc_parsed_mount_data(); - if (data == NULL) - return -ENOMEM; - - /* fill out struct with values from existing mount */ - data->flags = nfss->flags; - data->rsize = nfss->rsize; - data->wsize = nfss->wsize; - data->retrans = nfss->client->cl_timeout->to_retries; - data->selected_flavor = nfss->client->cl_auth->au_flavor; - data->acregmin = nfss->acregmin / HZ; - data->acregmax = nfss->acregmax / HZ; - data->acdirmin = nfss->acdirmin / HZ; - data->acdirmax = nfss->acdirmax / HZ; - data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; - data->nfs_server.port = nfss->port; - data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; - data->version = nfsvers; - data->minorversion = nfss->nfs_client->cl_minorversion; - data->net = current->nsproxy->net_ns; - memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, - data->nfs_server.addrlen); - - /* overwrite those values with any that were specified */ - error = -EINVAL; - if (!nfs_parse_mount_options((char *)options, data)) - goto out; - /* * noac is a special case. It implies -o sync, but that's not - * necessarily reflected in the mtab options. do_remount_sb + * necessarily reflected in the mtab options. reconfigure_super * will clear SB_SYNCHRONOUS if -o sync wasn't specified in the * remount options, so we have to explicitly reset it. */ - if (data->flags & NFS_MOUNT_NOAC) - *flags |= SB_SYNCHRONOUS; + if (ctx->flags & NFS_MOUNT_NOAC) { + fc->sb_flags |= SB_SYNCHRONOUS; + fc->sb_flags_mask |= SB_SYNCHRONOUS; + } /* compare new mount options with old ones */ - error = nfs_compare_remount_data(nfss, data); - if (!error) - error = security_sb_remount(sb, data->lsm_opts); -out: - nfs_free_parsed_mount_data(data); - return error; -} -EXPORT_SYMBOL_GPL(nfs_remount); - -/* - * Initialise the common bits of the superblock - */ -static void nfs_initialise_sb(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - sb->s_magic = NFS_SUPER_MAGIC; - - /* We probably want something more informative here */ - snprintf(sb->s_id, sizeof(sb->s_id), - "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - - if (sb->s_blocksize == 0) - sb->s_blocksize = nfs_block_bits(server->wsize, - &sb->s_blocksize_bits); - - nfs_super_set_maxbytes(sb, server->maxfilesize); + return nfs_compare_remount_data(nfss, ctx); } +EXPORT_SYMBOL_GPL(nfs_reconfigure); /* - * Finish setting up an NFS2/3 superblock + * Finish setting up an NFS superblock */ -void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) +static void nfs_fill_super(struct super_block *sb, struct nfs_fs_context *ctx) { - struct nfs_parsed_mount_data *data = mount_info->parsed; struct nfs_server *server = NFS_SB(sb); sb->s_blocksize_bits = 0; sb->s_blocksize = 0; sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr; sb->s_op = server->nfs_client->cl_nfs_mod->sops; - if (data && data->bsize) - sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits); + if (ctx && ctx->bsize) + sb->s_blocksize = nfs_block_size(ctx->bsize, &sb->s_blocksize_bits); if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do @@ -2393,53 +995,27 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) sb->s_time_max = S64_MAX; } - nfs_initialise_sb(sb); -} -EXPORT_SYMBOL_GPL(nfs_fill_super); - -/* - * Finish setting up a cloned NFS2/3/4 superblock - */ -static void nfs_clone_super(struct super_block *sb, - struct nfs_mount_info *mount_info) -{ - const struct super_block *old_sb = mount_info->cloned->sb; - struct nfs_server *server = NFS_SB(sb); - - sb->s_blocksize_bits = old_sb->s_blocksize_bits; - sb->s_blocksize = old_sb->s_blocksize; - sb->s_maxbytes = old_sb->s_maxbytes; - sb->s_xattr = old_sb->s_xattr; - sb->s_op = old_sb->s_op; - sb->s_export_op = old_sb->s_export_op; + sb->s_magic = NFS_SUPER_MAGIC; - if (server->nfs_client->rpc_ops->version != 2) { - /* The VFS shouldn't apply the umask to mode bits. We will do - * so ourselves when necessary. - */ - sb->s_flags |= SB_POSIXACL; - sb->s_time_gran = 1; - } else - sb->s_time_gran = 1000; + /* We probably want something more informative here */ + snprintf(sb->s_id, sizeof(sb->s_id), + "%u:%u", MAJOR(sb->s_dev), MINOR(sb->s_dev)); - if (server->nfs_client->rpc_ops->version != 4) { - sb->s_time_min = 0; - sb->s_time_max = U32_MAX; - } else { - sb->s_time_min = S64_MIN; - sb->s_time_max = S64_MAX; - } + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); - nfs_initialise_sb(sb); + nfs_super_set_maxbytes(sb, server->maxfilesize); } -static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) +static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, + const struct fs_context *fc) { const struct nfs_server *a = s->s_fs_info; const struct rpc_clnt *clnt_a = a->client; const struct rpc_clnt *clnt_b = b->client; - if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK)) + if ((s->s_flags & NFS_SB_MASK) != (fc->sb_flags & NFS_SB_MASK)) goto Ebusy; if (a->nfs_client != b->nfs_client) goto Ebusy; @@ -2464,19 +1040,11 @@ Ebusy: return 0; } -struct nfs_sb_mountdata { - struct nfs_server *server; - int mntflags; -}; - -static int nfs_set_super(struct super_block *s, void *data) +static int nfs_set_super(struct super_block *s, struct fs_context *fc) { - struct nfs_sb_mountdata *sb_mntdata = data; - struct nfs_server *server = sb_mntdata->server; + struct nfs_server *server = fc->s_fs_info; int ret; - s->s_flags = sb_mntdata->mntflags; - s->s_fs_info = server; s->s_d_op = server->nfs_client->rpc_ops->dentry_ops; ret = set_anon_super(s, server); if (ret == 0) @@ -2541,11 +1109,9 @@ static int nfs_compare_userns(const struct nfs_server *old, return 1; } -static int nfs_compare_super(struct super_block *sb, void *data) +static int nfs_compare_super(struct super_block *sb, struct fs_context *fc) { - struct nfs_sb_mountdata *sb_mntdata = data; - struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); - int mntflags = sb_mntdata->mntflags; + struct nfs_server *server = fc->s_fs_info, *old = NFS_SB(sb); if (!nfs_compare_super_address(old, server)) return 0; @@ -2556,13 +1122,12 @@ static int nfs_compare_super(struct super_block *sb, void *data) return 0; if (!nfs_compare_userns(old, server)) return 0; - return nfs_compare_mount_options(sb, server, mntflags); + return nfs_compare_mount_options(sb, server, fc); } #ifdef CONFIG_NFS_FSCACHE static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_parsed_mount_data *parsed, - struct nfs_clone_mount *cloned) + struct nfs_fs_context *ctx) { struct nfs_server *nfss = NFS_SB(sb); char *uniq = NULL; @@ -2571,80 +1136,36 @@ static void nfs_get_cache_cookie(struct super_block *sb, nfss->fscache_key = NULL; nfss->fscache = NULL; - if (parsed) { - if (!(parsed->options & NFS_OPTION_FSCACHE)) - return; - if (parsed->fscache_uniq) { - uniq = parsed->fscache_uniq; - ulen = strlen(parsed->fscache_uniq); - } - } else if (cloned) { - struct nfs_server *mnt_s = NFS_SB(cloned->sb); + if (!ctx) + return; + + if (ctx->clone_data.sb) { + struct nfs_server *mnt_s = NFS_SB(ctx->clone_data.sb); if (!(mnt_s->options & NFS_OPTION_FSCACHE)) return; if (mnt_s->fscache_key) { uniq = mnt_s->fscache_key->key.uniquifier; ulen = mnt_s->fscache_key->key.uniq_len; } - } else + } else { + if (!(ctx->options & NFS_OPTION_FSCACHE)) + return; + if (ctx->fscache_uniq) { + uniq = ctx->fscache_uniq; + ulen = strlen(ctx->fscache_uniq); + } return; + } nfs_fscache_get_super_cookie(sb, uniq, ulen); } #else static void nfs_get_cache_cookie(struct super_block *sb, - struct nfs_parsed_mount_data *parsed, - struct nfs_clone_mount *cloned) + struct nfs_fs_context *ctx) { } #endif -int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) -{ - int error; - unsigned long kflags = 0, kflags_out = 0; - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) - kflags |= SECURITY_LSM_NATIVE_LABELS; - - error = security_sb_set_mnt_opts(s, mount_info->parsed->lsm_opts, - kflags, &kflags_out); - if (error) - goto err; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && - !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) - NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; -err: - return error; -} -EXPORT_SYMBOL_GPL(nfs_set_sb_security); - -int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot, - struct nfs_mount_info *mount_info) -{ - int error; - unsigned long kflags = 0, kflags_out = 0; - - /* clone any lsm security options from the parent to the new sb */ - if (d_inode(mntroot)->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) - return -ESTALE; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) - kflags |= SECURITY_LSM_NATIVE_LABELS; - - error = security_sb_clone_mnt_opts(mount_info->cloned->sb, s, kflags, - &kflags_out); - if (error) - return error; - - if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && - !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) - NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; - return 0; -} -EXPORT_SYMBOL_GPL(nfs_clone_sb_security); - static void nfs_set_readahead(struct backing_dev_info *bdi, unsigned long iomax_pages) { @@ -2652,35 +1173,40 @@ static void nfs_set_readahead(struct backing_dev_info *bdi, bdi->io_pages = iomax_pages; } -struct dentry *nfs_fs_mount_common(struct nfs_server *server, - int flags, const char *dev_name, - struct nfs_mount_info *mount_info, - struct nfs_subversion *nfs_mod) +int nfs_get_tree_common(struct fs_context *fc) { + struct nfs_fs_context *ctx = nfs_fc2context(fc); struct super_block *s; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - int (*compare_super)(struct super_block *, void *) = nfs_compare_super; - struct nfs_sb_mountdata sb_mntdata = { - .mntflags = flags, - .server = server, - }; + int (*compare_super)(struct super_block *, struct fs_context *) = nfs_compare_super; + struct nfs_server *server = ctx->server; + unsigned long kflags = 0, kflags_out = 0; int error; + ctx->server = NULL; + if (IS_ERR(server)) + return PTR_ERR(server); + if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* -o noac implies -o sync */ if (server->flags & NFS_MOUNT_NOAC) - sb_mntdata.mntflags |= SB_SYNCHRONOUS; + fc->sb_flags |= SB_SYNCHRONOUS; - if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL) - if (mount_info->cloned->sb->s_flags & SB_SYNCHRONOUS) - sb_mntdata.mntflags |= SB_SYNCHRONOUS; + if (ctx->clone_data.sb) + if (ctx->clone_data.sb->s_flags & SB_SYNCHRONOUS) + fc->sb_flags |= SB_SYNCHRONOUS; + + if (server->caps & NFS_CAP_SECURITY_LABEL) + fc->lsm_flags |= SECURITY_LSM_NATIVE_LABELS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata); + fc->s_fs_info = server; + s = sget_fc(fc, compare_super, nfs_set_super); + fc->s_fs_info = NULL; if (IS_ERR(s)) { - mntroot = ERR_CAST(s); + error = PTR_ERR(s); + nfs_errorf(fc, "NFS: Couldn't get superblock"); goto out_err_nosb; } @@ -2690,88 +1216,66 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, } else { error = super_setup_bdi_name(s, "%u:%u", MAJOR(server->s_dev), MINOR(server->s_dev)); - if (error) { - mntroot = ERR_PTR(error); + if (error) goto error_splat_super; - } nfs_set_readahead(s->s_bdi, server->rpages); server->super = s; } if (!s->s_root) { + unsigned bsize = ctx->clone_data.inherited_bsize; /* initial superblock/root creation */ - mount_info->fill_super(s, mount_info); - nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); - if (!(server->flags & NFS_MOUNT_UNSHARED)) - s->s_iflags |= SB_I_MULTIROOT; + nfs_fill_super(s, ctx); + if (bsize) { + s->s_blocksize_bits = bsize; + s->s_blocksize = 1U << bsize; + } + nfs_get_cache_cookie(s, ctx); } - mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); - if (IS_ERR(mntroot)) + error = nfs_get_root(s, fc); + if (error < 0) { + nfs_errorf(fc, "NFS: Couldn't get root dentry"); goto error_splat_super; + } - error = mount_info->set_security(s, mntroot, mount_info); + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL) + kflags |= SECURITY_LSM_NATIVE_LABELS; + if (ctx->clone_data.sb) { + if (d_inode(fc->root)->i_fop != &nfs_dir_operations) { + error = -ESTALE; + goto error_splat_root; + } + /* clone any lsm security options from the parent to the new sb */ + error = security_sb_clone_mnt_opts(ctx->clone_data.sb, s, kflags, + &kflags_out); + } else { + error = security_sb_set_mnt_opts(s, fc->security, + kflags, &kflags_out); + } if (error) goto error_splat_root; + if (NFS_SB(s)->caps & NFS_CAP_SECURITY_LABEL && + !(kflags_out & SECURITY_LSM_NATIVE_LABELS)) + NFS_SB(s)->caps &= ~NFS_CAP_SECURITY_LABEL; s->s_flags |= SB_ACTIVE; + error = 0; out: - return mntroot; + return error; out_err_nosb: nfs_free_server(server); goto out; error_splat_root: - dput(mntroot); - mntroot = ERR_PTR(error); + dput(fc->root); + fc->root = NULL; error_splat_super: deactivate_locked_super(s); goto out; } -EXPORT_SYMBOL_GPL(nfs_fs_mount_common); - -struct dentry *nfs_fs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) -{ - struct nfs_mount_info mount_info = { - .fill_super = nfs_fill_super, - .set_security = nfs_set_sb_security, - }; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - struct nfs_subversion *nfs_mod; - int error; - - mount_info.parsed = nfs_alloc_parsed_mount_data(); - mount_info.mntfh = nfs_alloc_fhandle(); - if (mount_info.parsed == NULL || mount_info.mntfh == NULL) - goto out; - - /* Validate the mount data */ - error = nfs_validate_mount_data(fs_type, raw_data, mount_info.parsed, mount_info.mntfh, dev_name); - if (error == NFS_TEXT_DATA) - error = nfs_validate_text_mount_data(raw_data, mount_info.parsed, dev_name); - if (error < 0) { - mntroot = ERR_PTR(error); - goto out; - } - - nfs_mod = get_nfs_version(mount_info.parsed->version); - if (IS_ERR(nfs_mod)) { - mntroot = ERR_CAST(nfs_mod); - goto out; - } - - mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod); - - put_nfs_version(nfs_mod); -out: - nfs_free_parsed_mount_data(mount_info.parsed); - nfs_free_fhandle(mount_info.mntfh); - return mntroot; -} -EXPORT_SYMBOL_GPL(nfs_fs_mount); /* * Destroy an NFS2/3 superblock @@ -2790,150 +1294,8 @@ void nfs_kill_super(struct super_block *s) } EXPORT_SYMBOL_GPL(nfs_kill_super); -/* - * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) - */ -static struct dentry * -nfs_xdev_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *raw_data) -{ - struct nfs_clone_mount *data = raw_data; - struct nfs_mount_info mount_info = { - .fill_super = nfs_clone_super, - .set_security = nfs_clone_sb_security, - .cloned = data, - }; - struct nfs_server *server; - struct dentry *mntroot = ERR_PTR(-ENOMEM); - struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; - - dprintk("--> nfs_xdev_mount()\n"); - - mount_info.mntfh = mount_info.cloned->fh; - - /* create a new volume representation */ - server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); - - if (IS_ERR(server)) - mntroot = ERR_CAST(server); - else - mntroot = nfs_fs_mount_common(server, flags, - dev_name, &mount_info, nfs_mod); - - dprintk("<-- nfs_xdev_mount() = %ld\n", - IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L); - return mntroot; -} - #if IS_ENABLED(CONFIG_NFS_V4) -static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) -{ - args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| - NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); -} - -/* - * Validate NFSv4 mount options - */ -static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) -{ - struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address; - struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; - char *c; - - if (data == NULL) - goto out_no_data; - - args->version = 4; - - switch (data->version) { - case 1: - if (data->host_addrlen > sizeof(args->nfs_server.address)) - goto out_no_address; - if (data->host_addrlen == 0) - goto out_no_address; - args->nfs_server.addrlen = data->host_addrlen; - if (copy_from_user(sap, data->host_addr, data->host_addrlen)) - return -EFAULT; - if (!nfs_verify_server_address(sap)) - goto out_no_address; - args->nfs_server.port = ntohs(((struct sockaddr_in *)sap)->sin_port); - - if (data->auth_flavourlen) { - rpc_authflavor_t pseudoflavor; - if (data->auth_flavourlen > 1) - goto out_inval_auth; - if (copy_from_user(&pseudoflavor, - data->auth_flavours, - sizeof(pseudoflavor))) - return -EFAULT; - args->selected_flavor = pseudoflavor; - } else - args->selected_flavor = RPC_AUTH_UNIX; - - c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); - if (IS_ERR(c)) - return PTR_ERR(c); - args->nfs_server.hostname = c; - - c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); - if (IS_ERR(c)) - return PTR_ERR(c); - args->nfs_server.export_path = c; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); - - c = strndup_user(data->client_addr.data, 16); - if (IS_ERR(c)) - return PTR_ERR(c); - args->client_address = c; - - /* - * Translate to nfs_parsed_mount_data, which nfs4_fill_super - * can deal with. - */ - - args->flags = data->flags & NFS4_MOUNT_FLAGMASK; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->nfs_server.protocol = data->proto; - nfs_validate_transport_protocol(args); - if (args->nfs_server.protocol == XPRT_TRANSPORT_UDP) - goto out_invalid_transport_udp; - - break; - default: - return NFS_TEXT_DATA; - } - - return 0; - -out_no_data: - dfprintk(MOUNT, "NFS4: mount program didn't pass any mount data\n"); - return -EINVAL; - -out_inval_auth: - dfprintk(MOUNT, "NFS4: Invalid number of RPC auth flavours %d\n", - data->auth_flavourlen); - return -EINVAL; - -out_no_address: - dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); - return -EINVAL; - -out_invalid_transport_udp: - dfprintk(MOUNT, "NFSv4: Unsupported transport protocol udp\n"); - return -EINVAL; -} - /* * NFS v4 module parameters need to stay in the * NFS client for backwards compatibility diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 52cab65f91cf..c478b772cc49 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -243,13 +243,24 @@ out: /* A writeback failed: mark the page as bad, and invalidate the page cache */ static void nfs_set_pageerror(struct address_space *mapping) { + struct inode *inode = mapping->host; + nfs_zap_mapping(mapping->host, mapping); + /* Force file size revalidation */ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED | + NFS_INO_REVAL_PAGECACHE | + NFS_INO_INVALID_SIZE; + spin_unlock(&inode->i_lock); } static void nfs_mapping_set_error(struct page *page, int error) { + struct address_space *mapping = page_file_mapping(page); + SetPageError(page); - mapping_set_error(page_file_mapping(page), error); + mapping_set_error(mapping, error); + nfs_set_pageerror(mapping); } /* @@ -592,7 +603,7 @@ release_request: static void nfs_write_error(struct nfs_page *req, int error) { - nfs_set_pageerror(page_file_mapping(req->wb_page)); + trace_nfs_write_error(req, error); nfs_mapping_set_error(req->wb_page, error); nfs_inode_remove_request(req); nfs_end_page_writeback(req); @@ -998,7 +1009,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) nfs_list_remove_request(req); if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && (hdr->good_bytes < bytes)) { - nfs_set_pageerror(page_file_mapping(req->wb_page)); + trace_nfs_comp_error(req, hdr->error); nfs_mapping_set_error(req->wb_page, hdr->error); goto remove_req; } @@ -1403,8 +1414,7 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr, task_setup_data->priority = priority; rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client); - trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes, - hdr->args.stable); + trace_nfs_initiate_write(hdr); } /* If a nfs_flush_* function fails, it should remove reqs from @head and @@ -1568,8 +1578,7 @@ static int nfs_writeback_done(struct rpc_task *task, return status; nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count); - trace_nfs_writeback_done(inode, task->tk_status, - hdr->args.offset, hdr->res.verf); + trace_nfs_writeback_done(task, hdr); if (hdr->res.verf->committed < hdr->args.stable && task->tk_status >= 0) { @@ -1649,6 +1658,8 @@ static void nfs_writeback_result(struct rpc_task *task, */ argp->stable = NFS_FILE_SYNC; } + resp->count = 0; + resp->verf->committed = 0; rpc_restart_call_prepare(task); } } @@ -1824,11 +1835,12 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) /* Call the NFS version-specific code */ NFS_PROTO(data->inode)->commit_done(task, data); - trace_nfs_commit_done(data); + trace_nfs_commit_done(task, data); } static void nfs_commit_release_pages(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; @@ -1847,6 +1859,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) (long long)req_offset(req)); if (status < 0) { if (req->wb_page) { + trace_nfs_commit_error(req, status); nfs_mapping_set_error(req->wb_page, status); nfs_inode_remove_request(req); } @@ -1856,7 +1869,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { + if (verf->committed > NFS_UNSTABLE && + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) { /* We have a match */ if (req->wb_page) nfs_inode_remove_request(req); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c06b1fd130f3..a5f8f03ecd59 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -168,6 +168,9 @@ struct nfs_inode { struct rw_semaphore rmdir_sem; struct mutex commit_mutex; + /* track last access to cached pages */ + unsigned long page_index; + #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index c176f705bf98..465fa98258a3 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -152,6 +152,7 @@ struct nfs_server { #define NFS_MOUNT_LOCAL_FLOCK 0x100000 #define NFS_MOUNT_LOCAL_FCNTL 0x200000 #define NFS_MOUNT_SOFTERR 0x400000 +#define NFS_MOUNT_SOFTREVAL 0x800000 unsigned int caps; /* server capabilities */ unsigned int rsize; /* read size */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 72d5695c1b47..94c77ed55ce1 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1639,6 +1639,7 @@ struct nfs_subversion; struct nfs_mount_info; struct nfs_client_initdata; struct nfs_pageio_descriptor; +struct fs_context; /* * RPC procedure vector for NFSv2/NFSv3 demuxing @@ -1653,16 +1654,14 @@ struct nfs_rpc_ops { int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); - struct vfsmount *(*submount) (struct nfs_server *, struct dentry *, - struct nfs_fh *, struct nfs_fattr *); - struct dentry *(*try_mount) (int, const char *, struct nfs_mount_info *, - struct nfs_subversion *); + int (*submount) (struct fs_context *, struct nfs_server *); + int (*try_get_tree) (struct fs_context *); int (*getattr) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *, struct inode *); int (*setattr) (struct dentry *, struct nfs_fattr *, struct iattr *); - int (*lookup) (struct inode *, const struct qstr *, + int (*lookup) (struct inode *, struct dentry *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *); int (*lookupp) (struct inode *, struct nfs_fh *, @@ -1723,7 +1722,7 @@ struct nfs_rpc_ops { struct nfs_client *(*init_client) (struct nfs_client *, const struct nfs_client_initdata *); void (*free_client) (struct nfs_client *); - struct nfs_server *(*create_server)(struct nfs_mount_info *, struct nfs_subversion *); + struct nfs_server *(*create_server)(struct fs_context *); struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); }; diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index e9ec742796e7..4f6b28487f28 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -113,7 +113,6 @@ struct rpc_authops { int (*hash_cred)(struct auth_cred *, unsigned int); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); - int (*list_pseudoflavors)(rpc_authflavor_t *, int); rpc_authflavor_t (*info2flavor)(struct rpcsec_gss_info *); int (*flavor2info)(rpc_authflavor_t, struct rpcsec_gss_info *); @@ -158,7 +157,6 @@ rpc_authflavor_t rpcauth_get_pseudoflavor(rpc_authflavor_t, struct rpcsec_gss_info *); int rpcauth_get_gssinfo(rpc_authflavor_t, struct rpcsec_gss_info *); -int rpcauth_list_flavors(rpc_authflavor_t *, int); struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int, gfp_t); void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *); struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int); diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index 1cc6cefb1220..48c1b1674cbf 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -150,9 +150,6 @@ struct gss_api_mech *gss_mech_get_by_name(const char *); /* Similar, but get by pseudoflavor. */ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); -/* Fill in an array with a list of supported pseudoflavors */ -int gss_mech_list_pseudoflavors(rpc_authflavor_t *, int); - struct gss_api_mech * gss_mech_get(struct gss_api_mech *); /* For every successful gss_mech_get or gss_mech_get_by_* call there must be a diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 18790582d2a5..c0e4c93324f5 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -729,6 +729,7 @@ TRACE_EVENT(xprtrdma_post_send, TP_STRUCT__entry( __field(const void *, req) + __field(const void *, sc) __field(unsigned int, task_id) __field(unsigned int, client_id) __field(int, num_sge) @@ -743,14 +744,15 @@ TRACE_EVENT(xprtrdma_post_send, __entry->client_id = rqst->rq_task->tk_client ? rqst->rq_task->tk_client->cl_clid : -1; __entry->req = req; + __entry->sc = req->rl_sendctx; __entry->num_sge = req->rl_wr.num_sge; __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; __entry->status = status; ), - TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d", + TP_printk("task:%u@%u req=%p sc=%p (%d SGE%s) %sstatus=%d", __entry->task_id, __entry->client_id, - __entry->req, __entry->num_sge, + __entry->req, __entry->sc, __entry->num_sge, (__entry->num_sge == 1 ? "" : "s"), (__entry->signaled ? "signaled " : ""), __entry->status @@ -849,6 +851,7 @@ TRACE_EVENT(xprtrdma_wc_send, TP_STRUCT__entry( __field(const void *, req) + __field(const void *, sc) __field(unsigned int, unmap_count) __field(unsigned int, status) __field(unsigned int, vendor_err) @@ -856,13 +859,14 @@ TRACE_EVENT(xprtrdma_wc_send, TP_fast_assign( __entry->req = sc->sc_req; + __entry->sc = sc; __entry->unmap_count = sc->sc_unmap_count; __entry->status = wc->status; __entry->vendor_err = __entry->status ? wc->vendor_err : 0; ), - TP_printk("req=%p, unmapped %u pages: %s (%u/0x%x)", - __entry->req, __entry->unmap_count, + TP_printk("req=%p sc=%p unmapped=%u: %s (%u/0x%x)", + __entry->req, __entry->sc, __entry->unmap_count, rdma_show_wc_status(__entry->status), __entry->status, __entry->vendor_err ) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 8c73ffb5f7fd..ee993575d2fa 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -185,6 +185,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(signalled); DEFINE_RPC_RUNNING_EVENT(end); DECLARE_EVENT_CLASS(rpc_task_queued, diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d024af4be85e..8b4d72b1a066 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -175,7 +175,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, return 0; len = (buf + buflen) - delim - 1; - p = kstrndup(delim + 1, len, GFP_KERNEL); + p = kmemdup_nul(delim + 1, len, GFP_KERNEL); if (p) { u32 scope_id = 0; struct net_device *dev; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index cdb05b48de44..5748ad0ba1bd 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -221,55 +221,6 @@ rpcauth_get_gssinfo(rpc_authflavor_t pseudoflavor, struct rpcsec_gss_info *info) } EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo); -/** - * rpcauth_list_flavors - discover registered flavors and pseudoflavors - * @array: array to fill in - * @size: size of "array" - * - * Returns the number of array items filled in, or a negative errno. - * - * The returned array is not sorted by any policy. Callers should not - * rely on the order of the items in the returned array. - */ -int -rpcauth_list_flavors(rpc_authflavor_t *array, int size) -{ - const struct rpc_authops *ops; - rpc_authflavor_t flavor, pseudos[4]; - int i, len, result = 0; - - rcu_read_lock(); - for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) { - ops = rcu_dereference(auth_flavors[flavor]); - if (result >= size) { - result = -ENOMEM; - break; - } - - if (ops == NULL) - continue; - if (ops->list_pseudoflavors == NULL) { - array[result++] = ops->au_flavor; - continue; - } - len = ops->list_pseudoflavors(pseudos, ARRAY_SIZE(pseudos)); - if (len < 0) { - result = len; - break; - } - for (i = 0; i < len; i++) { - if (result >= size) { - result = -ENOMEM; - break; - } - array[result++] = pseudos[i]; - } - } - rcu_read_unlock(); - return result; -} -EXPORT_SYMBOL_GPL(rpcauth_list_flavors); - struct rpc_auth * rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d75fddca44c9..24ca861815b1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2118,7 +2118,6 @@ static const struct rpc_authops authgss_ops = { .hash_cred = gss_hash_cred, .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred, - .list_pseudoflavors = gss_mech_list_pseudoflavors, .info2flavor = gss_mech_info2flavor, .flavor2info = gss_mech_flavor2info, }; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index d3685d4ed9e0..db550bfc2642 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -220,35 +220,6 @@ gss_mech_get_by_pseudoflavor(u32 pseudoflavor) } /** - * gss_mech_list_pseudoflavors - Discover registered GSS pseudoflavors - * @array_ptr: array to fill in - * @size: size of "array" - * - * Returns the number of array items filled in, or a negative errno. - * - * The returned array is not sorted by any policy. Callers should not - * rely on the order of the items in the returned array. - */ -int gss_mech_list_pseudoflavors(rpc_authflavor_t *array_ptr, int size) -{ - struct gss_api_mech *pos = NULL; - int j, i = 0; - - rcu_read_lock(); - list_for_each_entry_rcu(pos, ®istered_mechs, gm_list) { - for (j = 0; j < pos->gm_pf_num; j++) { - if (i >= size) { - spin_unlock(®istered_mechs_lock); - return -ENOMEM; - } - array_ptr[i++] = pos->gm_pfs[j].pseudoflavor; - } - } - rcu_read_unlock(); - return i; -} - -/** * gss_svc_to_pseudoflavor - map a GSS service number to a pseudoflavor * @gm: GSS mechanism handle * @qop: GSS quality-of-protection value diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a3379765605d..7324b21f923e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2130,6 +2130,7 @@ call_connect_status(struct rpc_task *task) case -ENETUNREACH: case -EHOSTUNREACH: case -EPIPE: + case -EPROTO: xprt_conditional_disconnect(task->tk_rqstp->rq_xprt, task->tk_rqstp->rq_connect_cookie); if (RPC_IS_SOFTCONN(task)) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 9c79548c6847..55e900255b0c 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -846,6 +846,8 @@ void rpc_signal_task(struct rpc_task *task) if (!RPC_IS_ACTIVATED(task)) return; + + trace_rpc_task_signalled(task, task->tk_action); set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); @@ -949,7 +951,7 @@ static void __rpc_execute(struct rpc_task *task) * clean up after sleeping on some queue, we don't * break the loop here, but go around once more. */ - dprintk("RPC: %5u got signal\n", task->tk_pid); + trace_rpc_task_signalled(task, task->tk_action); set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); task->tk_rpc_status = -ERESTARTSYS; rpc_exit(task, -ERESTARTSYS); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index f3104be8ff5d..e5497dc2475b 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1079,7 +1079,7 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL_GPL(xdr_enter_page); -static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; +static const struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; void xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf) diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 9d02eae353c6..1a0ae0c61353 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -194,6 +194,10 @@ create_req: req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); if (!req) return NULL; + if (rpcrdma_req_setup(r_xprt, req)) { + rpcrdma_req_destroy(req); + return NULL; + } xprt->bc_alloc_count++; rqst = &req->rl_slot; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 523722be6a16..095be887753e 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -51,28 +51,6 @@ #endif /** - * frwr_is_supported - Check if device supports FRWR - * @device: interface adapter to check - * - * Returns true if device supports FRWR, otherwise false - */ -bool frwr_is_supported(struct ib_device *device) -{ - struct ib_device_attr *attrs = &device->attrs; - - if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) - goto out_not_supported; - if (attrs->max_fast_reg_page_list_len == 0) - goto out_not_supported; - return true; - -out_not_supported: - pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n", - device->name); - return false; -} - -/** * frwr_release_mr - Destroy one MR * @mr: MR allocated by frwr_init_mr * @@ -170,26 +148,48 @@ out_list_err: } /** - * frwr_open - Prepare an endpoint for use with FRWR - * @ia: interface adapter this endpoint will use - * @ep: endpoint to prepare + * frwr_query_device - Prepare a transport for use with FRWR + * @r_xprt: controlling transport instance + * @device: RDMA device to query * * On success, sets: - * ep->rep_attr.cap.max_send_wr - * ep->rep_attr.cap.max_recv_wr + * ep->rep_attr * ep->rep_max_requests - * ia->ri_max_segs + * ia->ri_max_rdma_segs * * And these FRWR-related fields: * ia->ri_max_frwr_depth * ia->ri_mrtype * - * On failure, a negative errno is returned. + * Return values: + * On success, returns zero. + * %-EINVAL - the device does not support FRWR memory registration + * %-ENOMEM - the device is not sufficiently capable for NFS/RDMA */ -int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) +int frwr_query_device(struct rpcrdma_xprt *r_xprt, + const struct ib_device *device) { - struct ib_device_attr *attrs = &ia->ri_id->device->attrs; + const struct ib_device_attr *attrs = &device->attrs; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; int max_qp_wr, depth, delta; + unsigned int max_sge; + + if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) || + attrs->max_fast_reg_page_list_len == 0) { + pr_err("rpcrdma: 'frwr' mode is not supported by device %s\n", + device->name); + return -EINVAL; + } + + max_sge = min_t(unsigned int, attrs->max_send_sge, + RPCRDMA_MAX_SEND_SGES); + if (max_sge < RPCRDMA_MIN_SEND_SGES) { + pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge); + return -ENOMEM; + } + ep->rep_attr.cap.max_send_sge = max_sge; + ep->rep_attr.cap.max_recv_sge = 1; ia->ri_mrtype = IB_MR_TYPE_MEM_REG; if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) @@ -199,14 +199,12 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) * capability, but perform optimally when the MRs are not larger * than a page. */ - if (attrs->max_sge_rd > 1) + if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS) ia->ri_max_frwr_depth = attrs->max_sge_rd; else ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len; if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS) ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS; - dprintk("RPC: %s: max FR page list depth = %u\n", - __func__, ia->ri_max_frwr_depth); /* Add room for frwr register and invalidate WRs. * 1. FRWR reg WR for head @@ -230,7 +228,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) } while (delta > 0); } - max_qp_wr = ia->ri_id->device->attrs.max_qp_wr; + max_qp_wr = attrs->max_qp_wr; max_qp_wr -= RPCRDMA_BACKWARD_WRS; max_qp_wr -= 1; if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) @@ -241,7 +239,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { ep->rep_max_requests = max_qp_wr / depth; if (!ep->rep_max_requests) - return -EINVAL; + return -ENOMEM; ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth; } ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; @@ -250,30 +248,22 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep) ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ - ia->ri_max_segs = + ia->ri_max_rdma_segs = DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth); /* Reply chunks require segments for head and tail buffers */ - ia->ri_max_segs += 2; - if (ia->ri_max_segs > RPCRDMA_MAX_HDR_SEGS) - ia->ri_max_segs = RPCRDMA_MAX_HDR_SEGS; - return 0; -} - -/** - * frwr_maxpages - Compute size of largest payload - * @r_xprt: transport - * - * Returns maximum size of an RPC message, in pages. - * - * FRWR mode conveys a list of pages per chunk segment. The - * maximum length of that list is the FRWR page list depth. - */ -size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + ia->ri_max_rdma_segs += 2; + if (ia->ri_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS) + ia->ri_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS; + + /* Ensure the underlying device is capable of conveying the + * largest r/wsize NFS will ask for. This guarantees that + * failing over from one RDMA device to another will not + * break NFS I/O. + */ + if ((ia->ri_max_rdma_segs * ia->ri_max_frwr_depth) < RPCRDMA_MAX_SEGS) + return -ENOMEM; - return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, - (ia->ri_max_segs - 2) * ia->ri_max_frwr_depth); + return 0; } /** diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index aec3beb93b25..28020ec104d4 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -111,7 +111,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) */ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) { - unsigned int maxsegs = r_xprt->rx_ia.ri_max_segs; + unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs; struct rpcrdma_ep *ep = &r_xprt->rx_ep; ep->rep_max_inline_send = @@ -145,7 +145,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, remaining -= min_t(unsigned int, PAGE_SIZE - offset, remaining); offset = 0; - if (++count > r_xprt->rx_ia.ri_max_send_sges) + if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge) return false; } } @@ -580,22 +580,19 @@ void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc) /* Prepare an SGE for the RPC-over-RDMA transport header. */ -static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt, +static void rpcrdma_prepare_hdr_sge(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, u32 len) { struct rpcrdma_sendctx *sc = req->rl_sendctx; struct rpcrdma_regbuf *rb = req->rl_rdmabuf; struct ib_sge *sge = &sc->sc_sges[req->rl_wr.num_sge++]; - if (!rpcrdma_regbuf_dma_map(r_xprt, rb)) - return false; sge->addr = rdmab_addr(rb); sge->length = len; sge->lkey = rdmab_lkey(rb); ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, DMA_TO_DEVICE); - return true; } /* The head iovec is straightforward, as it is usually already @@ -836,10 +833,9 @@ inline int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, req->rl_wr.num_sge = 0; req->rl_wr.opcode = IB_WR_SEND; - ret = -EIO; - if (!rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen)) - goto out_unmap; + rpcrdma_prepare_hdr_sge(r_xprt, req, hdrlen); + ret = -EIO; switch (rtype) { case rpcrdma_noch_pullup: if (!rpcrdma_prepare_noch_pullup(r_xprt, req, xdr)) @@ -909,7 +905,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) goto out_err; *p++ = rqst->rq_xid; *p++ = rpcrdma_version; - *p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); + *p++ = r_xprt->rx_buf.rb_max_requests; /* When the ULP employs a GSS flavor that guarantees integrity * or privacy, direct data placement of individual data items @@ -1480,8 +1476,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) if (credits == 0) credits = 1; /* don't deadlock */ - else if (credits > buf->rb_max_requests) - credits = buf->rb_max_requests; + else if (credits > r_xprt->rx_ep.rep_max_requests) + credits = r_xprt->rx_ep.rep_max_requests; if (buf->rb_credits != credits) rpcrdma_update_cwnd(r_xprt, credits); rpcrdma_post_recvs(r_xprt, false); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 7395eb2cfdeb..3cfeba68ee9a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -316,7 +316,8 @@ xprt_setup_rdma(struct xprt_create *args) if (args->addrlen > sizeof(xprt->addr)) return ERR_PTR(-EBADF); - xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, 0); + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, + xprt_rdma_slot_table_entries); if (!xprt) return ERR_PTR(-ENOMEM); @@ -358,19 +359,13 @@ xprt_setup_rdma(struct xprt_create *args) if (rc) goto out3; - INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, - xprt_rdma_connect_worker); - - xprt->max_payload = frwr_maxpages(new_xprt); - if (xprt->max_payload == 0) - goto out4; - xprt->max_payload <<= PAGE_SHIFT; - dprintk("RPC: %s: transport data payload maximum: %zu bytes\n", - __func__, xprt->max_payload); - if (!try_module_get(THIS_MODULE)) goto out4; + INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, + xprt_rdma_connect_worker); + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; + dprintk("RPC: %s: %s:%s\n", __func__, xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_PORT]); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fda3889993cb..353f61ac8d51 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -74,9 +74,13 @@ /* * internal functions */ +static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, struct rpcrdma_sendctx *sc); +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); +static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); @@ -174,7 +178,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; out_flushed: - rpcrdma_recv_buffer_put(rep); + rpcrdma_rep_destroy(rep); } static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, @@ -366,18 +370,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) goto out_err; } - switch (xprt_rdma_memreg_strategy) { - case RPCRDMA_FRWR: - if (frwr_is_supported(ia->ri_id->device)) - break; - /*FALLTHROUGH*/ - default: - pr_err("rpcrdma: Device %s does not support memreg mode %d\n", - ia->ri_id->device->name, xprt_rdma_memreg_strategy); - rc = -EINVAL; - goto out_err; - } - return 0; out_err: @@ -391,6 +383,8 @@ out_err: * * Divest transport H/W resources associated with this adapter, * but allow it to be restored later. + * + * Caller must hold the transport send lock. */ void rpcrdma_ia_remove(struct rpcrdma_ia *ia) @@ -398,8 +392,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_req *req; /* This is similar to rpcrdma_ep_destroy, but: * - Don't cancel the connect worker. @@ -422,12 +414,9 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) * mappings and MRs are gone. */ rpcrdma_reps_unmap(r_xprt); - list_for_each_entry(req, &buf->rb_allreqs, rl_all) { - rpcrdma_regbuf_dma_unmap(req->rl_rdmabuf); - rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); - rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); - } + rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); + rpcrdma_sendctxs_destroy(r_xprt); ib_dealloc_pd(ia->ri_pd); ia->ri_pd = NULL; @@ -470,30 +459,20 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; struct ib_cq *sendcq, *recvcq; - unsigned int max_sge; int rc; - ep->rep_max_requests = xprt_rdma_slot_table_entries; + ep->rep_max_requests = r_xprt->rx_xprt.max_reqs; ep->rep_inline_send = xprt_rdma_max_inline_write; ep->rep_inline_recv = xprt_rdma_max_inline_read; - max_sge = min_t(unsigned int, ia->ri_id->device->attrs.max_send_sge, - RPCRDMA_MAX_SEND_SGES); - if (max_sge < RPCRDMA_MIN_SEND_SGES) { - pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge); - return -ENOMEM; - } - ia->ri_max_send_sges = max_sge; - - rc = frwr_open(ia, ep); + rc = frwr_query_device(r_xprt, ia->ri_id->device); if (rc) return rc; + r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->rep_max_requests); ep->rep_attr.event_handler = rpcrdma_qp_event_handler; ep->rep_attr.qp_context = ep; ep->rep_attr.srq = NULL; - ep->rep_attr.cap.max_send_sge = max_sge; - ep->rep_attr.cap.max_recv_sge = 1; ep->rep_attr.cap.max_inline_data = 0; ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->rep_attr.qp_type = IB_QPT_RC; @@ -716,6 +695,10 @@ retry: rpcrdma_reset_cwnd(r_xprt); rpcrdma_post_recvs(r_xprt, true); + rc = rpcrdma_sendctxs_create(r_xprt); + if (rc) + goto out; + rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); if (rc) goto out; @@ -730,6 +713,11 @@ retry: goto out; } + rc = rpcrdma_reqs_setup(r_xprt); + if (rc) { + rpcrdma_ep_disconnect(ep, ia); + goto out; + } rpcrdma_mrs_create(r_xprt); out: @@ -768,6 +756,7 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) rpcrdma_xprt_drain(r_xprt); rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); + rpcrdma_sendctxs_destroy(r_xprt); } /* Fixed-size circular FIFO queue. This implementation is wait-free and @@ -787,20 +776,24 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) * queue activity, and rpcrdma_xprt_drain has flushed all remaining * Send requests. */ -static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) +static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) { + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; unsigned long i; + if (!buf->rb_sc_ctxs) + return; for (i = 0; i <= buf->rb_sc_last; i++) kfree(buf->rb_sc_ctxs[i]); kfree(buf->rb_sc_ctxs); + buf->rb_sc_ctxs = NULL; } -static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia) +static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) { struct rpcrdma_sendctx *sc; - sc = kzalloc(struct_size(sc, sc_sges, ia->ri_max_send_sges), + sc = kzalloc(struct_size(sc, sc_sges, ep->rep_attr.cap.max_send_sge), GFP_KERNEL); if (!sc) return NULL; @@ -820,21 +813,22 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * the ->send_request call to fail temporarily before too many * Sends are posted. */ - i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; - dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i); + i = r_xprt->rx_ep.rep_max_requests + RPCRDMA_MAX_BC_REQUESTS; buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); if (!buf->rb_sc_ctxs) return -ENOMEM; buf->rb_sc_last = i - 1; for (i = 0; i <= buf->rb_sc_last; i++) { - sc = rpcrdma_sendctx_create(&r_xprt->rx_ia); + sc = rpcrdma_sendctx_create(&r_xprt->rx_ep); if (!sc) return -ENOMEM; buf->rb_sc_ctxs[i] = sc; } + buf->rb_sc_head = 0; + buf->rb_sc_tail = 0; return 0; } @@ -933,7 +927,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ia *ia = &r_xprt->rx_ia; unsigned int count; - for (count = 0; count < ia->ri_max_segs; count++) { + for (count = 0; count < ia->ri_max_rdma_segs; count++) { struct rpcrdma_mr *mr; int rc; @@ -1005,32 +999,19 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, gfp_t flags) { struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; - struct rpcrdma_regbuf *rb; struct rpcrdma_req *req; - size_t maxhdrsize; req = kzalloc(sizeof(*req), flags); if (req == NULL) goto out1; - /* Compute maximum header buffer size in bytes */ - maxhdrsize = rpcrdma_fixed_maxsz + 3 + - r_xprt->rx_ia.ri_max_segs * rpcrdma_readchunk_maxsz; - maxhdrsize *= sizeof(__be32); - rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), - DMA_TO_DEVICE, flags); - if (!rb) - goto out2; - req->rl_rdmabuf = rb; - xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); - req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); if (!req->rl_sendbuf) - goto out3; + goto out2; req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); if (!req->rl_recvbuf) - goto out4; + goto out3; INIT_LIST_HEAD(&req->rl_free_mrs); INIT_LIST_HEAD(&req->rl_registered); @@ -1039,10 +1020,8 @@ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, spin_unlock(&buffer->rb_lock); return req; -out4: - kfree(req->rl_sendbuf); out3: - kfree(req->rl_rdmabuf); + kfree(req->rl_sendbuf); out2: kfree(req); out1: @@ -1050,27 +1029,90 @@ out1: } /** - * rpcrdma_reqs_reset - Reset all reqs owned by a transport + * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object * @r_xprt: controlling transport instance + * @req: rpcrdma_req object to set up * - * ASSUMPTION: the rb_allreqs list is stable for the duration, + * Returns zero on success, and a negative errno on failure. + */ +int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) +{ + struct rpcrdma_regbuf *rb; + size_t maxhdrsize; + + /* Compute maximum header buffer size in bytes */ + maxhdrsize = rpcrdma_fixed_maxsz + 3 + + r_xprt->rx_ia.ri_max_rdma_segs * rpcrdma_readchunk_maxsz; + maxhdrsize *= sizeof(__be32); + rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), + DMA_TO_DEVICE, GFP_KERNEL); + if (!rb) + goto out; + + if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) + goto out_free; + + req->rl_rdmabuf = rb; + xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); + return 0; + +out_free: + rpcrdma_regbuf_free(rb); +out: + return -ENOMEM; +} + +/* ASSUMPTION: the rb_allreqs list is stable for the duration, * and thus can be walked without holding rb_lock. Eg. the * caller is holding the transport send lock to exclude * device removal or disconnection. */ -static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) +static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_req *req; + int rc; list_for_each_entry(req, &buf->rb_allreqs, rl_all) { - /* Credits are valid only for one connection */ - req->rl_slot.rq_cong = 0; + rc = rpcrdma_req_setup(r_xprt, req); + if (rc) + return rc; } + return 0; } -static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, - bool temp) +static void rpcrdma_req_reset(struct rpcrdma_req *req) +{ + /* Credits are valid for only one connection */ + req->rl_slot.rq_cong = 0; + + rpcrdma_regbuf_free(req->rl_rdmabuf); + req->rl_rdmabuf = NULL; + + rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); + rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); +} + +/* ASSUMPTION: the rb_allreqs list is stable for the duration, + * and thus can be walked without holding rb_lock. Eg. the + * caller is holding the transport send lock to exclude + * device removal or disconnection. + */ +static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_req *req; + + list_for_each_entry(req, &buf->rb_allreqs, rl_all) + rpcrdma_req_reset(req); +} + +/* No locking needed here. This function is called only by the + * Receive completion handler. + */ +static noinline +struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, + bool temp) { struct rpcrdma_rep *rep; @@ -1083,6 +1125,9 @@ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, if (!rep->rr_rdmabuf) goto out_free; + if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) + goto out_free_regbuf; + xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), rdmab_length(rep->rr_rdmabuf)); rep->rr_cqe.done = rpcrdma_wc_receive; @@ -1095,12 +1140,17 @@ static struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps); return rep; +out_free_regbuf: + rpcrdma_regbuf_free(rep->rr_rdmabuf); out_free: kfree(rep); out: return NULL; } +/* No locking needed here. This function is invoked only by the + * Receive completion handler, or during transport shutdown. + */ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) { list_del(&rep->rr_all); @@ -1130,8 +1180,10 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_rep *rep; - list_for_each_entry(rep, &buf->rb_all_reps, rr_all) + list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); + rep->rr_temp = true; + } } static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) @@ -1153,7 +1205,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; int i, rc; - buf->rb_max_requests = r_xprt->rx_ep.rep_max_requests; buf->rb_bc_srv_max_requests = 0; spin_lock_init(&buf->rb_lock); INIT_LIST_HEAD(&buf->rb_mrs); @@ -1165,7 +1216,7 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) INIT_LIST_HEAD(&buf->rb_all_reps); rc = -ENOMEM; - for (i = 0; i < buf->rb_max_requests; i++) { + for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { struct rpcrdma_req *req; req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, @@ -1177,10 +1228,6 @@ int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) init_llist_head(&buf->rb_free_reps); - rc = rpcrdma_sendctxs_create(r_xprt); - if (rc) - goto out; - return 0; out: rpcrdma_buffer_destroy(buf); @@ -1256,7 +1303,6 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) void rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { - rpcrdma_sendctxs_destroy(buf); rpcrdma_reps_destroy(buf); while (!list_empty(&buf->rb_send_bufs)) { @@ -1497,7 +1543,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct ib_recv_wr *i, *wr, *bad_wr; + struct ib_recv_wr *wr, *bad_wr; struct rpcrdma_rep *rep; int needed, count, rc; @@ -1524,23 +1570,15 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (!rep) break; + trace_xprtrdma_post_recv(rep); rep->rr_recv_wr.next = wr; wr = &rep->rr_recv_wr; --needed; + ++count; } if (!wr) goto out; - for (i = wr; i; i = i->next) { - rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); - - if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) - goto release_wrs; - - trace_xprtrdma_post_recv(rep); - ++count; - } - rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); out: @@ -1557,11 +1595,4 @@ out: } ep->rep_receive_count += count; return; - -release_wrs: - for (i = wr; i;) { - rep = container_of(i, struct rpcrdma_rep, rr_recv_wr); - i = i->next; - rpcrdma_recv_buffer_put(rep); - } } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d796d68609ed..37d5080c250b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -71,9 +71,8 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; int ri_async_rc; - unsigned int ri_max_segs; + unsigned int ri_max_rdma_segs; unsigned int ri_max_frwr_depth; - unsigned int ri_max_send_sges; bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; unsigned long ri_flags; @@ -99,7 +98,7 @@ struct rpcrdma_ep { wait_queue_head_t rep_connect_wait; struct rpcrdma_connect_private rep_cm_private; struct rdma_conn_param rep_remote_cma; - unsigned int rep_max_requests; /* set by /proc */ + unsigned int rep_max_requests; /* depends on device */ unsigned int rep_inline_send; /* negotiated */ unsigned int rep_inline_recv; /* negotiated */ int rep_receive_count; @@ -373,7 +372,7 @@ struct rpcrdma_buffer { struct llist_head rb_free_reps; - u32 rb_max_requests; + __be32 rb_max_requests; u32 rb_credits; /* most recent credit grant */ u32 rb_bc_srv_max_requests; @@ -479,6 +478,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); */ struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, gfp_t flags); +int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_req_destroy(struct rpcrdma_req *req); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); @@ -535,12 +535,11 @@ rpcrdma_data_dir(bool writing) /* Memory registration calls xprtrdma/frwr_ops.c */ -bool frwr_is_supported(struct ib_device *device); void frwr_reset(struct rpcrdma_req *req); -int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep); +int frwr_query_device(struct rpcrdma_xprt *r_xprt, + const struct ib_device *device); int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); void frwr_release_mr(struct rpcrdma_mr *mr); -size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, __be32 xid, @@ -583,7 +582,6 @@ static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) /* RPC/RDMA module init - xprtrdma/transport.c */ -extern unsigned int xprt_rdma_slot_table_entries; extern unsigned int xprt_rdma_max_inline_read; extern unsigned int xprt_rdma_max_inline_write; void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap); |