summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-09-01 01:32:18 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-09-01 01:32:18 +0300
commitf35d1706159e015848ec7421e91b44b614c02dc2 (patch)
tree8a6604d318f604cddf8b84fd809cb1e43dd4553b
parent8ae5d298ef2005da5454fc1680f983e85d3e1622 (diff)
parentb38a6023da6a12b561f0421c6a5a1f7624a1529c (diff)
downloadlinux-f35d1706159e015848ec7421e91b44b614c02dc2.tar.xz
Merge tag 'nfsd-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever: "I'm thrilled to announce that the Linux in-kernel NFS server now offers NFSv4 write delegations. A write delegation enables a client to cache data and metadata for a single file more aggressively, reducing network round trips and server workload. Many thanks to Dai Ngo for contributing this facility, and to Jeff Layton and Neil Brown for reviewing and testing it. This release also sees the removal of all support for DES- and triple-DES-based Kerberos encryption types in the kernel's SunRPC implementation. These encryption types have been deprecated by the Internet community for years and are considered insecure. This change affects both the in-kernel NFS client and server. The server's UDP and TCP socket transports have now fully adopted David Howells' new bio_vec iterator so that no more than one sendmsg() call is needed to transmit each RPC message. In particular, this helps kTLS optimize record boundaries when sending RPC-with-TLS replies, and it takes the server a baby step closer to handling file I/O via folios. We've begun work on overhauling the SunRPC thread scheduler to remove a costly linked-list walk when looking for an idle RPC service thread to wake. The pre-requisites are included in this release. Thanks to Neil Brown for his ongoing work on this improvement" * tag 'nfsd-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (56 commits) Documentation: Add missing documentation for EXPORT_OP flags SUNRPC: Remove unused declaration rpc_modcount() SUNRPC: Remove unused declarations NFSD: da_addr_body field missing in some GETDEVICEINFO replies SUNRPC: Remove return value of svc_pool_wake_idle_thread() SUNRPC: make rqst_should_sleep() idempotent() SUNRPC: Clean up svc_set_num_threads SUNRPC: Count ingress RPC messages per svc_pool SUNRPC: Deduplicate thread wake-up code SUNRPC: Move trace_svc_xprt_enqueue SUNRPC: Add enum svc_auth_status SUNRPC: change svc_xprt::xpt_flags bits to enum SUNRPC: change svc_rqst::rq_flags bits to enum SUNRPC: change svc_pool::sp_flags bits to enum SUNRPC: change cache_head.flags bits to enum SUNRPC: remove timeout arg from svc_recv() SUNRPC: change svc_recv() to return void. SUNRPC: call svc_process() from svc_recv(). nfsd: separate nfsd_last_thread() from nfsd_put() nfsd: Simplify code around svc_exit_thread() call in nfsd() ...
-rw-r--r--Documentation/filesystems/nfs/exporting.rst26
-rw-r--r--fs/exportfs/expfs.c1
-rw-r--r--fs/lockd/mon.c3
-rw-r--r--fs/lockd/svc.c52
-rw-r--r--fs/lockd/svclock.c18
-rw-r--r--fs/locks.c7
-rw-r--r--fs/nfs/callback.c23
-rw-r--r--fs/nfsd/blocklayoutxdr.c9
-rw-r--r--fs/nfsd/cache.h8
-rw-r--r--fs/nfsd/flexfilelayoutxdr.c9
-rw-r--r--fs/nfsd/nfs3proc.c4
-rw-r--r--fs/nfsd/nfs4acl.c34
-rw-r--r--fs/nfsd/nfs4proc.c51
-rw-r--r--fs/nfsd/nfs4state.c162
-rw-r--r--fs/nfsd/nfs4xdr.c39
-rw-r--r--fs/nfsd/nfscache.c204
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsd.h7
-rw-r--r--fs/nfsd/nfsfh.c26
-rw-r--r--fs/nfsd/nfsfh.h6
-rw-r--r--fs/nfsd/nfssvc.c111
-rw-r--r--fs/nfsd/state.h3
-rw-r--r--fs/nfsd/stats.c2
-rw-r--r--fs/nfsd/stats.h7
-rw-r--r--fs/nfsd/trace.h27
-rw-r--r--fs/nfsd/vfs.c52
-rw-r--r--fs/nfsd/xdr4.h11
-rw-r--r--include/linux/lockd/lockd.h4
-rw-r--r--include/linux/sunrpc/cache.h12
-rw-r--r--include/linux/sunrpc/stats.h23
-rw-r--r--include/linux/sunrpc/svc.h52
-rw-r--r--include/linux/sunrpc/svc_xprt.h38
-rw-r--r--include/linux/sunrpc/svcauth.h53
-rw-r--r--include/linux/sunrpc/svcsock.h9
-rw-r--r--include/linux/sunrpc/xdr.h2
-rw-r--r--include/trace/events/sunrpc.h80
-rw-r--r--net/sunrpc/.kunitconfig1
-rw-r--r--net/sunrpc/Kconfig35
-rw-r--r--net/sunrpc/auth_gss/Makefile2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_internal.h23
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c84
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c257
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c69
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c106
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c196
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c77
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c287
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c7
-rw-r--r--net/sunrpc/svc.c97
-rw-r--r--net/sunrpc/svc_xprt.c126
-rw-r--r--net/sunrpc/svcauth.c35
-rw-r--r--net/sunrpc/svcauth_unix.c9
-rw-r--r--net/sunrpc/svcsock.c131
-rw-r--r--net/sunrpc/xdr.c50
54 files changed, 969 insertions, 1799 deletions
diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index 3d97b8d8f735..4b30daee399a 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -215,3 +215,29 @@ following flags are defined:
This flag causes nfsd to close any open files for this inode _before_
calling into the vfs to do an unlink or a rename that would replace
an existing file.
+
+ EXPORT_OP_REMOTE_FS - Backing storage for this filesystem is remote
+ PF_LOCAL_THROTTLE exists for loopback NFSD, where a thread needs to
+ write to one bdi (the final bdi) in order to free up writes queued
+ to another bdi (the client bdi). Such threads get a private balance
+ of dirty pages so that dirty pages for the client bdi do not imact
+ the daemon writing to the final bdi. For filesystems whose durable
+ storage is not local (such as exported NFS filesystems), this
+ constraint has negative consequences. EXPORT_OP_REMOTE_FS enables
+ an export to disable writeback throttling.
+
+ EXPORT_OP_NOATOMIC_ATTR - Filesystem does not update attributes atomically
+ EXPORT_OP_NOATOMIC_ATTR indicates that the exported filesystem
+ cannot provide the semantics required by the "atomic" boolean in
+ NFSv4's change_info4. This boolean indicates to a client whether the
+ returned before and after change attributes were obtained atomically
+ with the respect to the requested metadata operation (UNLINK,
+ OPEN/CREATE, MKDIR, etc).
+
+ EXPORT_OP_FLUSH_ON_CLOSE - Filesystem flushes file data on close(2)
+ On most filesystems, inodes can remain under writeback after the
+ file is closed. NFSD relies on client activity or local flusher
+ threads to handle writeback. Certain filesystems, such as NFS, flush
+ all of an inode's dirty data on last close. Exports that behave this
+ way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
+ waiting for writeback when closing such files.
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index d1dbe47c7975..c20704aa21b3 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -386,6 +386,7 @@ static int export_encode_fh(struct inode *inode, struct fid *fid,
* @inode: the object to encode
* @fid: where to store the file handle fragment
* @max_len: maximum length to store there
+ * @parent: parent directory inode, if wanted
* @flags: properties of the requested file handle
*
* Returns an enum fid_type or a negative errno.
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1d9488cf0534..87a0f207df0b 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -276,6 +276,9 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
{
struct nsm_handle *new;
+ if (!hostname)
+ return NULL;
+
new = kzalloc(sizeof(*new) + hostname_len + 1, GFP_KERNEL);
if (unlikely(new == NULL))
return NULL;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 22d3ff3818f5..6579948070a4 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -45,7 +45,6 @@
#define NLMDBG_FACILITY NLMDBG_SVC
#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
-#define ALLOWED_SIGS (sigmask(SIGKILL))
static struct svc_program nlmsvc_program;
@@ -57,6 +56,12 @@ static unsigned int nlmsvc_users;
static struct svc_serv *nlmsvc_serv;
unsigned long nlmsvc_timeout;
+static void nlmsvc_request_retry(struct timer_list *tl)
+{
+ svc_wake_up(nlmsvc_serv);
+}
+DEFINE_TIMER(nlmsvc_retry, nlmsvc_request_retry);
+
unsigned int lockd_net_id;
/*
@@ -111,26 +116,12 @@ static void set_grace_period(struct net *net)
schedule_delayed_work(&ln->grace_period_end, grace_period);
}
-static void restart_grace(void)
-{
- if (nlmsvc_ops) {
- struct net *net = &init_net;
- struct lockd_net *ln = net_generic(net, lockd_net_id);
-
- cancel_delayed_work_sync(&ln->grace_period_end);
- locks_end_grace(&ln->lockd_manager);
- nlmsvc_invalidate_all();
- set_grace_period(net);
- }
-}
-
/*
* This is the lockd kernel thread
*/
static int
lockd(void *vrqstp)
{
- int err = 0;
struct svc_rqst *rqstp = vrqstp;
struct net *net = &init_net;
struct lockd_net *ln = net_generic(net, lockd_net_id);
@@ -138,9 +129,6 @@ lockd(void *vrqstp)
/* try_to_freeze() is called from svc_recv() */
set_freezable();
- /* Allow SIGKILL to tell lockd to drop all of its locks */
- allow_signal(SIGKILL);
-
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
/*
@@ -148,33 +136,12 @@ lockd(void *vrqstp)
* NFS mount or NFS daemon has gone away.
*/
while (!kthread_should_stop()) {
- long timeout = MAX_SCHEDULE_TIMEOUT;
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-
/* update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nlm_max_connections;
- if (signalled()) {
- flush_signals(current);
- restart_grace();
- continue;
- }
-
- timeout = nlmsvc_retry_blocked();
-
- /*
- * Find a socket with data available and call its
- * recvfrom routine.
- */
- err = svc_recv(rqstp, timeout);
- if (err == -EAGAIN || err == -EINTR)
- continue;
- dprintk("lockd: request from %s\n",
- svc_print_addr(rqstp, buf, sizeof(buf)));
-
- svc_process(rqstp);
+ nlmsvc_retry_blocked();
+ svc_recv(rqstp);
}
- flush_signals(current);
if (nlmsvc_ops)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
@@ -407,6 +374,7 @@ static void lockd_put(void)
#endif
svc_set_num_threads(nlmsvc_serv, NULL, 0);
+ timer_delete_sync(&nlmsvc_retry);
nlmsvc_serv = NULL;
dprintk("lockd_down: service destroyed\n");
}
@@ -538,7 +506,7 @@ static inline int is_callback(u32 proc)
}
-static int lockd_authenticate(struct svc_rqst *rqstp)
+static enum svc_auth_status lockd_authenticate(struct svc_rqst *rqstp)
{
rqstp->rq_client = NULL;
switch (rqstp->rq_authop->flavour) {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c43ccdf28ed9..43aeba9de55c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -131,12 +131,14 @@ static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
static inline void
nlmsvc_remove_block(struct nlm_block *block)
{
+ spin_lock(&nlm_blocked_lock);
if (!list_empty(&block->b_list)) {
- spin_lock(&nlm_blocked_lock);
list_del_init(&block->b_list);
spin_unlock(&nlm_blocked_lock);
nlmsvc_release_block(block);
+ return;
}
+ spin_unlock(&nlm_blocked_lock);
}
/*
@@ -152,6 +154,7 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
file, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end, lock->fl.fl_type);
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) {
fl = &block->b_call->a_args.lock.fl;
dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
@@ -161,9 +164,11 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
nlmdbg_cookie2a(&block->b_call->a_args.cookie));
if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
kref_get(&block->b_count);
+ spin_unlock(&nlm_blocked_lock);
return block;
}
}
+ spin_unlock(&nlm_blocked_lock);
return NULL;
}
@@ -185,16 +190,19 @@ nlmsvc_find_block(struct nlm_cookie *cookie)
{
struct nlm_block *block;
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) {
if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie))
goto found;
}
+ spin_unlock(&nlm_blocked_lock);
return NULL;
found:
dprintk("nlmsvc_find_block(%s): block=%p\n", nlmdbg_cookie2a(cookie), block);
kref_get(&block->b_count);
+ spin_unlock(&nlm_blocked_lock);
return block;
}
@@ -317,6 +325,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *host,
restart:
mutex_lock(&file->f_mutex);
+ spin_lock(&nlm_blocked_lock);
list_for_each_entry_safe(block, next, &file->f_blocks, b_flist) {
if (!match(block->b_host, host))
continue;
@@ -325,11 +334,13 @@ restart:
if (list_empty(&block->b_list))
continue;
kref_get(&block->b_count);
+ spin_unlock(&nlm_blocked_lock);
mutex_unlock(&file->f_mutex);
nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
goto restart;
}
+ spin_unlock(&nlm_blocked_lock);
mutex_unlock(&file->f_mutex);
}
@@ -1008,7 +1019,7 @@ retry_deferred_block(struct nlm_block *block)
* picks up locks that can be granted, or grant notifications that must
* be retransmitted.
*/
-unsigned long
+void
nlmsvc_retry_blocked(void)
{
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -1038,5 +1049,6 @@ nlmsvc_retry_blocked(void)
}
spin_unlock(&nlm_blocked_lock);
- return timeout;
+ if (timeout < MAX_SCHEDULE_TIMEOUT)
+ mod_timer(&nlmsvc_retry, jiffies + timeout);
}
diff --git a/fs/locks.c b/fs/locks.c
index a45efc16945d..76ad05f8070a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1744,13 +1744,6 @@ generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **pri
if (is_deleg && !inode_trylock(inode))
return -EAGAIN;
- if (is_deleg && arg == F_WRLCK) {
- /* Write delegations are not currently supported: */
- inode_unlock(inode);
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
-
percpu_down_read(&file_rwsem);
spin_lock(&ctx->flc_lock);
time_out_leases(inode, &dispose);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 456af7d230cf..466ebf1d41b2 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -74,23 +74,12 @@ out_err:
static int
nfs4_callback_svc(void *vrqstp)
{
- int err;
struct svc_rqst *rqstp = vrqstp;
set_freezable();
- while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
- /*
- * Listen for a request on the socket
- */
- err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
- if (err == -EAGAIN || err == -EINTR)
- continue;
- svc_process(rqstp);
- }
+ while (!kthread_freezable_should_stop(NULL))
+ svc_recv(rqstp);
svc_exit_thread(rqstp);
return 0;
@@ -112,11 +101,7 @@ nfs41_callback_svc(void *vrqstp)
set_freezable();
while (!kthread_freezable_should_stop(NULL)) {
-
- if (signal_pending(current))
- flush_signals(current);
-
- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE);
spin_lock_bh(&serv->sv_cb_lock);
if (!list_empty(&serv->sv_cb_list)) {
req = list_first_entry(&serv->sv_cb_list,
@@ -387,7 +372,7 @@ check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
* All other checking done after NFS decoding where the nfs_client can be
* found in nfs4_callback_compound
*/
-static int nfs_callback_authenticate(struct svc_rqst *rqstp)
+static enum svc_auth_status nfs_callback_authenticate(struct svc_rqst *rqstp)
{
rqstp->rq_auth_stat = rpc_autherr_badcred;
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 8e9c1a0f8d38..1ed2f691ebb9 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -83,6 +83,15 @@ nfsd4_block_encode_getdeviceinfo(struct xdr_stream *xdr,
int len = sizeof(__be32), ret, i;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
p = xdr_reserve_space(xdr, len + sizeof(__be32));
if (!p)
return nfserr_resource;
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4c9b87850ab1..929248c6ca84 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -19,7 +19,7 @@
* typical sockaddr_storage. This is for space reasons, since sockaddr_storage
* is much larger than a sockaddr_in6.
*/
-struct svc_cacherep {
+struct nfsd_cacherep {
struct {
/* Keep often-read xid, csum in the same cache line: */
__be32 k_xid;
@@ -84,8 +84,10 @@ int nfsd_net_reply_cache_init(struct nfsd_net *nn);
void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
-int nfsd_cache_lookup(struct svc_rqst *);
-void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+int nfsd_cache_lookup(struct svc_rqst *rqstp,
+ struct nfsd_cacherep **cacherep);
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp);
int nfsd_reply_cache_stats_show(struct seq_file *m, void *v);
#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c
index e81d2a5cf381..bb205328e043 100644
--- a/fs/nfsd/flexfilelayoutxdr.c
+++ b/fs/nfsd/flexfilelayoutxdr.c
@@ -85,6 +85,15 @@ nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr,
int addr_len;
__be32 *p;
+ /*
+ * See paragraph 5 of RFC 8881 S18.40.3.
+ */
+ if (!gdp->gd_maxcount) {
+ if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ return nfserr_resource;
+ return nfs_ok;
+ }
+
/* len + padding for two strings */
addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len;
ver_len = 20;
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index fc8d5b7db9f8..268ef57751c4 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -307,7 +307,9 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
- fh_fill_pre_attrs(fhp);
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
host_err = vfs_create(&nop_mnt_idmap, inode, child, iap->ia_mode, true);
if (host_err < 0) {
status = nfserrno(host_err);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 518203821790..96e786b5e544 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -441,7 +441,7 @@ struct posix_ace_state_array {
* calculated so far: */
struct posix_acl_state {
- int empty;
+ unsigned char valid;
struct posix_ace_state owner;
struct posix_ace_state group;
struct posix_ace_state other;
@@ -457,7 +457,6 @@ init_state(struct posix_acl_state *state, int cnt)
int alloc;
memset(state, 0, sizeof(struct posix_acl_state));
- state->empty = 1;
/*
* In the worst case, each individual acl could be for a distinct
* named user or group, but we don't know which, so we allocate
@@ -500,7 +499,7 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
* and effective cases: when there are no inheritable ACEs,
* calls ->set_acl with a NULL ACL structure.
*/
- if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
+ if (!state->valid && (flags & NFS4_ACL_TYPE_DEFAULT))
return NULL;
/*
@@ -622,11 +621,12 @@ static void process_one_v4_ace(struct posix_acl_state *state,
struct nfs4_ace *ace)
{
u32 mask = ace->access_mask;
+ short type = ace2type(ace);
int i;
- state->empty = 0;
+ state->valid |= type;
- switch (ace2type(ace)) {
+ switch (type) {
case ACL_USER_OBJ:
if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
allow_bits(&state->owner, mask);
@@ -726,6 +726,30 @@ static int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl,
if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
process_one_v4_ace(&effective_acl_state, ace);
}
+
+ /*
+ * At this point, the default ACL may have zeroed-out entries for owner,
+ * group and other. That usually results in a non-sensical resulting ACL
+ * that denies all access except to any ACE that was explicitly added.
+ *
+ * The setfacl command solves a similar problem with this logic:
+ *
+ * "If a Default ACL entry is created, and the Default ACL contains
+ * no owner, owning group, or others entry, a copy of the ACL
+ * owner, owning group, or others entry is added to the Default ACL."
+ *
+ * Copy any missing ACEs from the effective set, if any ACEs were
+ * explicitly set.
+ */
+ if (default_acl_state.valid) {
+ if (!(default_acl_state.valid & ACL_USER_OBJ))
+ default_acl_state.owner = effective_acl_state.owner;
+ if (!(default_acl_state.valid & ACL_GROUP_OBJ))
+ default_acl_state.group = effective_acl_state.group;
+ if (!(default_acl_state.valid & ACL_OTHER))
+ default_acl_state.other = effective_acl_state.other;
+ }
+
*pacl = posix_state_to_acl(&effective_acl_state, flags);
if (IS_ERR(*pacl)) {
ret = PTR_ERR(*pacl);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5ae670807449..5ca748309c26 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -297,12 +297,12 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
if (d_really_is_positive(child)) {
- status = nfs_ok;
-
/* NFSv4 protocol requires change attributes even though
* no change happened.
*/
- fh_fill_both_attrs(fhp);
+ status = fh_fill_both_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
switch (open->op_createmode) {
case NFS4_CREATE_UNCHECKED:
@@ -345,7 +345,9 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!IS_POSIXACL(inode))
iap->ia_mode &= ~current_umask();
- fh_fill_pre_attrs(fhp);
+ status = fh_fill_pre_attrs(fhp);
+ if (status != nfs_ok)
+ goto out;
status = nfsd4_vfs_create(fhp, child, open);
if (status != nfs_ok)
goto out;
@@ -380,6 +382,38 @@ out:
return status;
}
+/**
+ * set_change_info - set up the change_info4 for a reply
+ * @cinfo: pointer to nfsd4_change_info to be populated
+ * @fhp: pointer to svc_fh to use as source
+ *
+ * Many operations in NFSv4 require change_info4 in the reply. This function
+ * populates that from the info that we (should!) have already collected. In
+ * the event that we didn't get any pre-attrs, just zero out both.
+ */
+static void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+ cinfo->atomic = (u32)(fhp->fh_pre_saved && fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
+ cinfo->before_change = fhp->fh_pre_change;
+ cinfo->after_change = fhp->fh_post_change;
+
+ /*
+ * If fetching the pre-change attributes failed, then we should
+ * have already failed the whole operation. We could have still
+ * failed to fetch post-change attributes however.
+ *
+ * If we didn't get post-op attrs, just zero-out the after
+ * field since we don't know what it should be. If the pre_saved
+ * field isn't set for some reason, throw warning and just copy
+ * whatever is in the after field.
+ */
+ if (WARN_ON_ONCE(!fhp->fh_pre_saved))
+ cinfo->before_change = 0;
+ if (!fhp->fh_post_saved)
+ cinfo->after_change = cinfo->before_change + 1;
+}
+
static __be32
do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
{
@@ -424,11 +458,11 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
} else {
status = nfsd_lookup(rqstp, current_fh,
open->op_fname, open->op_fnamelen, *resfh);
- if (!status)
+ if (status == nfs_ok)
/* NFSv4 protocol requires change attributes even though
* no change happened.
*/
- fh_fill_both_attrs(current_fh);
+ status = fh_fill_both_attrs(current_fh);
}
if (status)
goto out;
@@ -1313,12 +1347,11 @@ try_again:
/* found a match */
if (ni->nsui_busy) {
/* wait - and try again */
- prepare_to_wait(&nn->nfsd_ssc_waitq, &wait,
- TASK_INTERRUPTIBLE);
+ prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE);
spin_unlock(&nn->nfsd_ssc_lock);
/* allow 20secs for mount/unmount for now - revisit */
- if (signal_pending(current) ||
+ if (kthread_should_stop() ||
(schedule_timeout(20*HZ) == 0)) {
finish_wait(&nn->nfsd_ssc_waitq, &wait);
kfree(work);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index daf305daa751..8534693eb6a4 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -649,6 +649,18 @@ find_readable_file(struct nfs4_file *f)
return ret;
}
+static struct nfsd_file *
+find_rw_file(struct nfs4_file *f)
+{
+ struct nfsd_file *ret;
+
+ spin_lock(&f->fi_lock);
+ ret = nfsd_file_get(f->fi_fds[O_RDWR]);
+ spin_unlock(&f->fi_lock);
+
+ return ret;
+}
+
struct nfsd_file *
find_any_file(struct nfs4_file *f)
{
@@ -1144,7 +1156,7 @@ static void block_delegations(struct knfsd_fh *fh)
static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
- struct nfs4_clnt_odstate *odstate)
+ struct nfs4_clnt_odstate *odstate, u32 dl_type)
{
struct nfs4_delegation *dp;
long n;
@@ -1170,7 +1182,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_clnt_odstate = odstate;
get_clnt_odstate(odstate);
- dp->dl_type = NFS4_OPEN_DELEGATE_READ;
+ dp->dl_type = dl_type;
dp->dl_retries = 1;
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -5449,8 +5461,9 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct nfs4_file *fp = stp->st_stid.sc_file;
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
- struct nfsd_file *nf;
+ struct nfsd_file *nf = NULL;
struct file_lock *fl;
+ u32 dl_type;
/*
* The fi_had_conflict and nfs_get_existing_delegation checks
@@ -5460,15 +5473,35 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- nf = find_readable_file(fp);
- if (!nf) {
- /*
- * We probably could attempt another open and get a read
- * delegation, but for now, don't bother until the
- * client actually sends us one.
- */
- return ERR_PTR(-EAGAIN);
+ /*
+ * Try for a write delegation first. RFC8881 section 10.4 says:
+ *
+ * "An OPEN_DELEGATE_WRITE delegation allows the client to handle,
+ * on its own, all opens."
+ *
+ * Furthermore the client can use a write delegation for most READ
+ * operations as well, so we require a O_RDWR file here.
+ *
+ * Offer a write delegation in the case of a BOTH open, and ensure
+ * we get the O_RDWR descriptor.
+ */
+ if ((open->op_share_access & NFS4_SHARE_ACCESS_BOTH) == NFS4_SHARE_ACCESS_BOTH) {
+ nf = find_rw_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_WRITE;
}
+
+ /*
+ * If the file is being opened O_RDONLY or we couldn't get a O_RDWR
+ * file for some reason, then try for a read delegation instead.
+ */
+ if (!nf && (open->op_share_access & NFS4_SHARE_ACCESS_READ)) {
+ nf = find_readable_file(fp);
+ dl_type = NFS4_OPEN_DELEGATE_READ;
+ }
+
+ if (!nf)
+ return ERR_PTR(-EAGAIN);
+
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
@@ -5491,11 +5524,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
return ERR_PTR(status);
status = -ENOMEM;
- dp = alloc_init_deleg(clp, fp, odstate);
+ dp = alloc_init_deleg(clp, fp, odstate, dl_type);
if (!dp)
goto out_delegees;
- fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ);
+ fl = nfs4_alloc_init_lease(dp, dl_type);
if (!fl)
goto out_clnt_odstate;
@@ -5568,10 +5601,28 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
}
/*
- * Attempt to hand out a delegation.
+ * The Linux NFS server does not offer write delegations to NFSv4.0
+ * clients in order to avoid conflicts between write delegations and
+ * GETATTRs requesting CHANGE or SIZE attributes.
+ *
+ * With NFSv4.1 and later minorversions, the SEQUENCE operation that
+ * begins each COMPOUND contains a client ID. Delegation recall can
+ * be avoided when the server recognizes the client sending a
+ * GETATTR also holds write delegation it conflicts with.
+ *
+ * However, the NFSv4.0 protocol does not enable a server to
+ * determine that a GETATTR originated from the client holding the
+ * conflicting delegation versus coming from some other client. Per
+ * RFC 7530 Section 16.7.5, the server must recall or send a
+ * CB_GETATTR even when the GETATTR originates from the client that
+ * holds the conflicting delegation.
*
- * Note we don't support write delegations, and won't until the vfs has
- * proper support for them.
+ * An NFSv4.0 client can trigger a pathological situation if it
+ * always sends a DELEGRETURN preceded by a conflicting GETATTR in
+ * the same COMPOUND. COMPOUND execution will always stop at the
+ * GETATTR and the DELEGRETURN will never get executed. The server
+ * eventually revokes the delegation, which can result in loss of
+ * open or lock state.
*/
static void
nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
@@ -5590,8 +5641,6 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
case NFS4_OPEN_CLAIM_PREVIOUS:
if (!cb_up)
open->op_recall = 1;
- if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
- goto out_no_deleg;
break;
case NFS4_OPEN_CLAIM_NULL:
parent = currentfh;
@@ -5606,6 +5655,9 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
goto out_no_deleg;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out_no_deleg;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE &&
+ !clp->cl_minorversion)
+ goto out_no_deleg;
break;
default:
goto out_no_deleg;
@@ -5616,8 +5668,13 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
- trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
- open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
+ trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ } else {
+ open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
+ trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
+ }
nfs4_put_stid(&dp->dl_stid);
return;
out_no_deleg:
@@ -8341,3 +8398,68 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
{
get_stateid(cstate, &u->write.wr_stateid);
}
+
+/**
+ * nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
+ * @rqstp: RPC transaction context
+ * @inode: file to be checked for a conflict
+ *
+ * This function is called when there is a conflict between a write
+ * delegation and a change/size GETATTR from another client. The server
+ * must either use the CB_GETATTR to get the current values of the
+ * attributes from the client that holds the delegation or recall the
+ * delegation before replying to the GETATTR. See RFC 8881 section
+ * 18.7.4.
+ *
+ * The current implementation does not support CB_GETATTR yet. However
+ * this can avoid recalling the delegation could be added in follow up
+ * work.
+ *
+ * Returns 0 if there is no conflict; otherwise an nfs_stat
+ * code is returned.
+ */
+__be32
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+{
+ __be32 status;
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+ struct nfs4_delegation *dp;
+
+ ctx = locks_inode_context(inode);
+ if (!ctx)
+ return 0;
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+ if (fl->fl_flags == FL_LAYOUT)
+ continue;
+ if (fl->fl_lmops != &nfsd_lease_mng_ops) {
+ /*
+ * non-nfs lease, if it's a lease with F_RDLCK then
+ * we are done; there isn't any write delegation
+ * on this inode
+ */
+ if (fl->fl_type == F_RDLCK)
+ break;
+ goto break_lease;
+ }
+ if (fl->fl_type == F_WRLCK) {
+ dp = fl->fl_owner;
+ if (dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+ spin_unlock(&ctx->flc_lock);
+ return 0;
+ }
+break_lease:
+ spin_unlock(&ctx->flc_lock);
+ nfsd_stats_wdeleg_getattr_inc();
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ return 0;
+ }
+ break;
+ }
+ spin_unlock(&ctx->flc_lock);
+ return 0;
+}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index b30dca7de8cc..2e40c74d2f72 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2984,6 +2984,11 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
if (status)
goto out;
}
+ if (bmval0 & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+ status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+ if (status)
+ goto out;
+ }
err = vfs_getattr(&path, &stat,
STATX_BASIC_STATS | STATX_BTIME | STATX_CHANGE_COOKIE,
@@ -3973,17 +3978,20 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
if (nfserr)
return nfserr;
- p = xdr_reserve_space(xdr, 32);
+
+ p = xdr_reserve_space(xdr, XDR_UNIT * 8);
if (!p)
return nfserr_resource;
*p++ = cpu_to_be32(open->op_recall);
/*
+ * Always flush on close
+ *
* TODO: space_limit's in delegations
*/
*p++ = cpu_to_be32(NFS4_LIMIT_SIZE);
- *p++ = cpu_to_be32(~(u32)0);
- *p++ = cpu_to_be32(~(u32)0);
+ *p++ = xdr_zero;
+ *p++ = xdr_zero;
/*
* TODO: ACE's in delegations
@@ -4678,20 +4686,17 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = cpu_to_be32(gdev->gd_layout_type);
- /* If maxcount is 0 then just update notifications */
- if (gdev->gd_maxcount != 0) {
- ops = nfsd4_layout_ops[gdev->gd_layout_type];
- nfserr = ops->encode_getdeviceinfo(xdr, gdev);
- if (nfserr) {
- /*
- * We don't bother to burden the layout drivers with
- * enforcing gd_maxcount, just tell the client to
- * come back with a bigger buffer if it's not enough.
- */
- if (xdr->buf->len + 4 > gdev->gd_maxcount)
- goto toosmall;
- return nfserr;
- }
+ ops = nfsd4_layout_ops[gdev->gd_layout_type];
+ nfserr = ops->encode_getdeviceinfo(xdr, gdev);
+ if (nfserr) {
+ /*
+ * We don't bother to burden the layout drivers with
+ * enforcing gd_maxcount, just tell the client to
+ * come back with a bigger buffer if it's not enough.
+ */
+ if (xdr->buf->len + 4 > gdev->gd_maxcount)
+ goto toosmall;
+ return nfserr;
}
if (gdev->gd_notify_types) {
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index a8eda1c85829..80621a709510 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -84,11 +84,11 @@ nfsd_hashsize(unsigned int limit)
return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
}
-static struct svc_cacherep *
-nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
- struct nfsd_net *nn)
+static struct nfsd_cacherep *
+nfsd_cacherep_alloc(struct svc_rqst *rqstp, __wsum csum,
+ struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
rp = kmem_cache_alloc(drc_slab, GFP_KERNEL);
if (rp) {
@@ -110,36 +110,64 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum,
return rp;
}
-static void
-nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
- struct nfsd_net *nn)
+static void nfsd_cacherep_free(struct nfsd_cacherep *rp)
{
- if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
- nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
+ if (rp->c_type == RC_REPLBUFF)
kfree(rp->c_replvec.iov_base);
+ kmem_cache_free(drc_slab, rp);
+}
+
+static unsigned long
+nfsd_cacherep_dispose(struct list_head *dispose)
+{
+ struct nfsd_cacherep *rp;
+ unsigned long freed = 0;
+
+ while (!list_empty(dispose)) {
+ rp = list_first_entry(dispose, struct nfsd_cacherep, c_lru);
+ list_del(&rp->c_lru);
+ nfsd_cacherep_free(rp);
+ freed++;
}
+ return freed;
+}
+
+static void
+nfsd_cacherep_unlink_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ struct nfsd_cacherep *rp)
+{
+ if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base)
+ nfsd_stats_drc_mem_usage_sub(nn, rp->c_replvec.iov_len);
if (rp->c_state != RC_UNUSED) {
rb_erase(&rp->c_node, &b->rb_head);
list_del(&rp->c_lru);
atomic_dec(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_sub(nn, sizeof(*rp));
}
- kmem_cache_free(drc_slab, rp);
}
static void
-nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp,
+nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
+ struct nfsd_net *nn)
+{
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ nfsd_cacherep_free(rp);
+}
+
+static void
+nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
struct nfsd_net *nn)
{
spin_lock(&b->cache_lock);
- nfsd_reply_cache_free_locked(b, rp, nn);
+ nfsd_cacherep_unlink_locked(nn, b, rp);
spin_unlock(&b->cache_lock);
+ nfsd_cacherep_free(rp);
}
int nfsd_drc_slab_create(void)
{
drc_slab = kmem_cache_create("nfsd_drc",
- sizeof(struct svc_cacherep), 0, 0, NULL);
+ sizeof(struct nfsd_cacherep), 0, 0, NULL);
return drc_slab ? 0: -ENOMEM;
}
@@ -208,7 +236,7 @@ out_shrinker:
void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
{
- struct svc_cacherep *rp;
+ struct nfsd_cacherep *rp;
unsigned int i;
unregister_shrinker(&nn->nfsd_reply_cache_shrinker);
@@ -216,7 +244,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
for (i = 0; i < nn->drc_hashsize; i++) {
struct list_head *head = &nn->drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
- rp = list_first_entry(head, struct svc_cacherep, c_lru);
+ rp = list_first_entry(head, struct nfsd_cacherep, c_lru);
nfsd_reply_cache_free_locked(&nn->drc_hashtbl[i],
rp, nn);
}
@@ -233,7 +261,7 @@ void nfsd_reply_cache_shutdown(struct nfsd_net *nn)
* not already scheduled.
*/
static void
-lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
+lru_put_end(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp)
{
rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &b->lru_head);
@@ -247,12 +275,21 @@ nfsd_cache_bucket_find(__be32 xid, struct nfsd_net *nn)
return &nn->drc_hashtbl[hash];
}
-static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
- unsigned int max)
+/*
+ * Remove and return no more than @max expired entries in bucket @b.
+ * If @max is zero, do not limit the number of removed entries.
+ */
+static void
+nfsd_prune_bucket_locked(struct nfsd_net *nn, struct nfsd_drc_bucket *b,
+ unsigned int max, struct list_head *dispose)
{
- struct svc_cacherep *rp, *tmp;
- long freed = 0;
+ unsigned long expiry = jiffies - RC_EXPIRE;
+ struct nfsd_cacherep *rp, *tmp;
+ unsigned int freed = 0;
+
+ lockdep_assert_held(&b->cache_lock);
+ /* The bucket LRU is ordered oldest-first. */
list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
/*
* Don't free entries attached to calls that are still
@@ -260,60 +297,77 @@ static long prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn,
*/
if (rp->c_state == RC_INPROG)
continue;
+
if (atomic_read(&nn->num_drc_entries) <= nn->max_drc_entries &&
- time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
+ time_before(expiry, rp->c_timestamp))
break;
- nfsd_reply_cache_free_locked(b, rp, nn);
- if (max && freed++ > max)
+
+ nfsd_cacherep_unlink_locked(nn, b, rp);
+ list_add(&rp->c_lru, dispose);
+
+ if (max && ++freed > max)
break;
}
- return freed;
}
-static long nfsd_prune_bucket(struct nfsd_drc_bucket *b, struct nfsd_net *nn)
+/**
+ * nfsd_reply_cache_count - count_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Returns the total number of entries in the duplicate reply cache. To
+ * keep things simple and quick, this is not the number of expired entries
+ * in the cache (ie, the number that would be removed by a call to
+ * nfsd_reply_cache_scan).
+ */
+static unsigned long
+nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return prune_bucket(b, nn, 3);
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+
+ return atomic_read(&nn->num_drc_entries);
}
-/*
- * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
- * Also prune the oldest ones when the total exceeds the max number of entries.
+/**
+ * nfsd_reply_cache_scan - scan_objects method for the DRC shrinker
+ * @shrink: our registered shrinker context
+ * @sc: garbage collection parameters
+ *
+ * Free expired entries on each bucket's LRU list until we've released
+ * nr_to_scan freed objects. Nothing will be released if the cache
+ * has not exceeded it's max_drc_entries limit.
+ *
+ * Returns the number of entries released by this call.
*/
-static long
-prune_cache_entries(struct nfsd_net *nn)
+static unsigned long
+nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
+ struct nfsd_net *nn = container_of(shrink,
+ struct nfsd_net, nfsd_reply_cache_shrinker);
+ unsigned long freed = 0;
+ LIST_HEAD(dispose);
unsigned int i;
- long freed = 0;
for (i = 0; i < nn->drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &nn->drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
+
spin_lock(&b->cache_lock);
- freed += prune_bucket(b, nn, 0);
+ nfsd_prune_bucket_locked(nn, b, 0, &dispose);
spin_unlock(&b->cache_lock);
- }
- return freed;
-}
-static unsigned long
-nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
-{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
+ freed += nfsd_cacherep_dispose(&dispose);
+ if (freed > sc->nr_to_scan)
+ break;
+ }
- return atomic_read(&nn->num_drc_entries);
+ trace_nfsd_drc_gc(nn, freed);
+ return freed;
}
-static unsigned long
-nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
-{
- struct nfsd_net *nn = container_of(shrink,
- struct nfsd_net, nfsd_reply_cache_shrinker);
-
- return prune_cache_entries(nn);
-}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
*/
@@ -348,8 +402,8 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
}
static int
-nfsd_cache_key_cmp(const struct svc_cacherep *key,
- const struct svc_cacherep *rp, struct nfsd_net *nn)
+nfsd_cache_key_cmp(const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp, struct nfsd_net *nn)
{
if (key->c_key.k_xid == rp->c_key.k_xid &&
key->c_key.k_csum != rp->c_key.k_csum) {
@@ -365,11 +419,11 @@ nfsd_cache_key_cmp(const struct svc_cacherep *key,
* Must be called with cache_lock held. Returns the found entry or
* inserts an empty key on failure.
*/
-static struct svc_cacherep *
-nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
+static struct nfsd_cacherep *
+nfsd_cache_insert(struct nfsd_drc_bucket *b, struct nfsd_cacherep *key,
struct nfsd_net *nn)
{
- struct svc_cacherep *rp, *ret = key;
+ struct nfsd_cacherep *rp, *ret = key;
struct rb_node **p = &b->rb_head.rb_node,
*parent = NULL;
unsigned int entries = 0;
@@ -378,7 +432,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key,
while (*p != NULL) {
++entries;
parent = *p;
- rp = rb_entry(parent, struct svc_cacherep, c_node);
+ rp = rb_entry(parent, struct nfsd_cacherep, c_node);
cmp = nfsd_cache_key_cmp(key, rp, nn);
if (cmp < 0)
@@ -411,6 +465,7 @@ out:
/**
* nfsd_cache_lookup - Find an entry in the duplicate reply cache
* @rqstp: Incoming Call to find
+ * @cacherep: OUT: DRC entry for this request
*
* Try to find an entry matching the current call in the cache. When none
* is found, we try to grab the oldest expired entry off the LRU list. If
@@ -423,16 +478,17 @@ out:
* %RC_REPLY: Reply from cache
* %RC_DROPIT: Do not process the request further
*/
-int nfsd_cache_lookup(struct svc_rqst *rqstp)
+int nfsd_cache_lookup(struct svc_rqst *rqstp, struct nfsd_cacherep **cacherep)
{
struct nfsd_net *nn;
- struct svc_cacherep *rp, *found;
+ struct nfsd_cacherep *rp, *found;
__wsum csum;
struct nfsd_drc_bucket *b;
int type = rqstp->rq_cachetype;
+ unsigned long freed;
+ LIST_HEAD(dispose);
int rtn = RC_DOIT;
- rqstp->rq_cacherep = NULL;
if (type == RC_NOCACHE) {
nfsd_stats_rc_nocache_inc();
goto out;
@@ -445,7 +501,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
* preallocate an entry.
*/
nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- rp = nfsd_reply_cache_alloc(rqstp, csum, nn);
+ rp = nfsd_cacherep_alloc(rqstp, csum, nn);
if (!rp)
goto out;
@@ -454,20 +510,18 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp)
found = nfsd_cache_insert(b, rp, nn);
if (found != rp)
goto found_entry;
-
- nfsd_stats_rc_misses_inc();
- rqstp->rq_cacherep = rp;
+ *cacherep = rp;
rp->c_state = RC_INPROG;
+ nfsd_prune_bucket_locked(nn, b, 3, &dispose);
+ spin_unlock(&b->cache_lock);
+ freed = nfsd_cacherep_dispose(&dispose);
+ trace_nfsd_drc_gc(nn, freed);
+
+ nfsd_stats_rc_misses_inc();
atomic_inc(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
-
- nfsd_prune_bucket(b, nn);
-
-out_unlock:
- spin_unlock(&b->cache_lock);
-out:
- return rtn;
+ goto out;
found_entry:
/* We found a matching entry which is either in progress or done. */
@@ -505,12 +559,16 @@ found_entry:
out_trace:
trace_nfsd_drc_found(nn, rqstp, rtn);
- goto out_unlock;
+out_unlock:
+ spin_unlock(&b->cache_lock);
+out:
+ return rtn;
}
/**
* nfsd_cache_update - Update an entry in the duplicate reply cache.
* @rqstp: svc_rqst with a finished Reply
+ * @rp: IN: DRC entry for this request
* @cachetype: which cache to update
* @statp: pointer to Reply's NFS status code, or NULL
*
@@ -528,10 +586,10 @@ out_trace:
* nfsd failed to encode a reply that otherwise would have been cached.
* In this case, nfsd_cache_update is called with statp == NULL.
*/
-void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
+void nfsd_cache_update(struct svc_rqst *rqstp, struct nfsd_cacherep *rp,
+ int cachetype, __be32 *statp)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
- struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
struct nfsd_drc_bucket *b;
int len;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 3709830f90a6..7ed02fb88a36 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1627,6 +1627,7 @@ static void __exit exit_nfsd(void)
}
MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_DESCRIPTION("In-kernel NFS server");
MODULE_LICENSE("GPL");
module_init(init_nfsd)
module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index d88498f8b275..11c14faa6c67 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -96,7 +96,12 @@ int nfsd_pool_stats_open(struct inode *, struct file *);
int nfsd_pool_stats_release(struct inode *, struct file *);
void nfsd_shutdown_threads(struct net *net);
-void nfsd_put(struct net *net);
+static inline void nfsd_put(struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
+ svc_put(nn->nfsd_serv);
+}
bool i_am_nfsd(void);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c291389a1d71..355bf0db3235 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -614,7 +614,7 @@ out_negative:
* @fhp: file handle to be updated
*
*/
-void fh_fill_pre_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
struct inode *inode;
@@ -622,12 +622,12 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
__be32 err;
if (fhp->fh_no_wcc || fhp->fh_pre_saved)
- return;
+ return nfs_ok;
inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
if (err)
- return;
+ return err;
if (v4)
fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
@@ -636,6 +636,7 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
fhp->fh_pre_ctime = stat.ctime;
fhp->fh_pre_size = stat.size;
fhp->fh_pre_saved = true;
+ return nfs_ok;
}
/**
@@ -643,26 +644,27 @@ void fh_fill_pre_attrs(struct svc_fh *fhp)
* @fhp: file handle to be updated
*
*/
-void fh_fill_post_attrs(struct svc_fh *fhp)
+__be32 fh_fill_post_attrs(struct svc_fh *fhp)
{
bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
struct inode *inode = d_inode(fhp->fh_dentry);
__be32 err;
if (fhp->fh_no_wcc)
- return;
+ return nfs_ok;
if (fhp->fh_post_saved)
printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr);
if (err)
- return;
+ return err;
fhp->fh_post_saved = true;
if (v4)
fhp->fh_post_change =
nfsd4_change_attribute(&fhp->fh_post_attr, inode);
+ return nfs_ok;
}
/**
@@ -672,16 +674,20 @@ void fh_fill_post_attrs(struct svc_fh *fhp)
* This is used when the directory wasn't changed, but wcc attributes
* are needed anyway.
*/
-void fh_fill_both_attrs(struct svc_fh *fhp)
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp)
{
- fh_fill_post_attrs(fhp);
- if (!fhp->fh_post_saved)
- return;
+ __be32 err;
+
+ err = fh_fill_post_attrs(fhp);
+ if (err)
+ return err;
+
fhp->fh_pre_change = fhp->fh_post_change;
fhp->fh_pre_mtime = fhp->fh_post_attr.mtime;
fhp->fh_pre_ctime = fhp->fh_post_attr.ctime;
fhp->fh_pre_size = fhp->fh_post_attr.size;
fhp->fh_pre_saved = true;
+ return nfs_ok;
}
/*
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 4e0ecf0ae2cf..40426f899e76 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -294,7 +294,7 @@ static inline void fh_clear_pre_post_attrs(struct svc_fh *fhp)
}
u64 nfsd4_change_attribute(struct kstat *stat, struct inode *inode);
-extern void fh_fill_pre_attrs(struct svc_fh *fhp);
-extern void fh_fill_post_attrs(struct svc_fh *fhp);
-extern void fh_fill_both_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_pre_attrs(struct svc_fh *fhp);
+__be32 fh_fill_post_attrs(struct svc_fh *fhp);
+__be32 __must_check fh_fill_both_attrs(struct svc_fh *fhp);
#endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 2154fa63c5f2..1582af33e204 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -542,9 +542,14 @@ static struct notifier_block nfsd_inet6addr_notifier = {
/* Only used under nfsd_mutex, so this atomic may be overkill: */
static atomic_t nfsd_notifier_refcount = ATOMIC_INIT(0);
-static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
+static void nfsd_last_thread(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv = nn->nfsd_serv;
+
+ spin_lock(&nfsd_notifier_lock);
+ nn->nfsd_serv = NULL;
+ spin_unlock(&nfsd_notifier_lock);
/* check if the notifier still has clients */
if (atomic_dec_return(&nfsd_notifier_refcount) == 0) {
@@ -554,6 +559,8 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
#endif
}
+ svc_xprt_destroy_all(serv, net);
+
/*
* write_ports can create the server without actually starting
* any threads--if we get shut down before any threads are
@@ -644,7 +651,8 @@ void nfsd_shutdown_threads(struct net *net)
svc_get(serv);
/* Kill outstanding nfsd threads */
svc_set_num_threads(serv, NULL, 0);
- nfsd_put(net);
+ nfsd_last_thread(net);
+ svc_put(serv);
mutex_unlock(&nfsd_mutex);
}
@@ -674,9 +682,6 @@ int nfsd_create_serv(struct net *net)
serv->sv_maxconn = nn->max_connections;
error = svc_bind(serv, net);
if (error < 0) {
- /* NOT nfsd_put() as notifiers (see below) haven't
- * been set up yet.
- */
svc_put(serv);
return error;
}
@@ -719,29 +724,6 @@ int nfsd_get_nrthreads(int n, int *nthreads, struct net *net)
return 0;
}
-/* This is the callback for kref_put() below.
- * There is no code here as the first thing to be done is
- * call svc_shutdown_net(), but we cannot get the 'net' from
- * the kref. So do all the work when kref_put returns true.
- */
-static void nfsd_noop(struct kref *ref)
-{
-}
-
-void nfsd_put(struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
-
- if (kref_put(&nn->nfsd_serv->sv_refcnt, nfsd_noop)) {
- svc_xprt_destroy_all(nn->nfsd_serv, net);
- nfsd_last_thread(nn->nfsd_serv, net);
- svc_destroy(&nn->nfsd_serv->sv_refcnt);
- spin_lock(&nfsd_notifier_lock);
- nn->nfsd_serv = NULL;
- spin_unlock(&nfsd_notifier_lock);
- }
-}
-
int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
{
int i = 0;
@@ -792,7 +774,7 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
if (err)
break;
}
- nfsd_put(net);
+ svc_put(nn->nfsd_serv);
return err;
}
@@ -807,6 +789,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
int error;
bool nfsd_up_before;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct svc_serv *serv;
mutex_lock(&nfsd_mutex);
dprintk("nfsd: creating service\n");
@@ -826,22 +809,25 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
goto out;
nfsd_up_before = nn->nfsd_net_up;
+ serv = nn->nfsd_serv;
error = nfsd_startup_net(net, cred);
if (error)
goto out_put;
- error = svc_set_num_threads(nn->nfsd_serv, NULL, nrservs);
+ error = svc_set_num_threads(serv, NULL, nrservs);
if (error)
goto out_shutdown;
- error = nn->nfsd_serv->sv_nrthreads;
+ error = serv->sv_nrthreads;
+ if (error == 0)
+ nfsd_last_thread(net);
out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown_net(net);
out_put:
/* Threads now hold service active */
if (xchg(&nn->keep_active, 0))
- nfsd_put(net);
- nfsd_put(net);
+ svc_put(serv);
+ svc_put(serv);
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -953,7 +939,6 @@ nfsd(void *vrqstp)
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- int err;
/* At this point, the thread shares current->fs
* with the init process. We need to create files with the
@@ -965,15 +950,6 @@ nfsd(void *vrqstp)
current->fs->umask = 0;
- /*
- * thread is spawned with all signals set to SIG_IGN, re-enable
- * the ones that will bring down the thread
- */
- allow_signal(SIGKILL);
- allow_signal(SIGHUP);
- allow_signal(SIGINT);
- allow_signal(SIGQUIT);
-
atomic_inc(&nfsdstats.th_cnt);
set_freezable();
@@ -981,54 +957,19 @@ nfsd(void *vrqstp)
/*
* The main request loop
*/
- for (;;) {
+ while (!kthread_should_stop()) {
/* Update sv_maxconn if it has changed */
rqstp->rq_server->sv_maxconn = nn->max_connections;
- /*
- * Find a socket with data available and call its
- * recvfrom routine.
- */
- while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
- ;
- if (err == -EINTR)
- break;
- validate_process_creds();
- svc_process(rqstp);
+ svc_recv(rqstp);
validate_process_creds();
}
- /* Clear signals before calling svc_exit_thread() */
- flush_signals(current);
-
atomic_dec(&nfsdstats.th_cnt);
out:
- /* Take an extra ref so that the svc_put in svc_exit_thread()
- * doesn't call svc_destroy()
- */
- svc_get(nn->nfsd_serv);
-
/* Release the thread */
svc_exit_thread(rqstp);
-
- /* We need to drop a ref, but may not drop the last reference
- * without holding nfsd_mutex, and we cannot wait for nfsd_mutex as that
- * could deadlock with nfsd_shutdown_threads() waiting for us.
- * So three options are:
- * - drop a non-final reference,
- * - get the mutex without waiting
- * - sleep briefly andd try the above again
- */
- while (!svc_put_not_last(nn->nfsd_serv)) {
- if (mutex_trylock(&nfsd_mutex)) {
- nfsd_put(net);
- mutex_unlock(&nfsd_mutex);
- break;
- }
- msleep(20);
- }
-
return 0;
}
@@ -1046,6 +987,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
{
const struct svc_procedure *proc = rqstp->rq_procinfo;
__be32 *statp = rqstp->rq_accept_statp;
+ struct nfsd_cacherep *rp;
/*
* Give the xdr decoder a chance to change this if it wants
@@ -1056,7 +998,8 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
goto out_decode_err;
- switch (nfsd_cache_lookup(rqstp)) {
+ rp = NULL;
+ switch (nfsd_cache_lookup(rqstp, &rp)) {
case RC_DOIT:
break;
case RC_REPLY:
@@ -1072,7 +1015,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
goto out_encode_err;
- nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
+ nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, statp + 1);
out_cached_reply:
return 1;
@@ -1082,13 +1025,13 @@ out_decode_err:
return 1;
out_update_drop:
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
out_dropit:
return 0;
out_encode_err:
trace_nfsd_cant_encode_err(rqstp);
- nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ nfsd_cache_update(rqstp, rp, RC_NOCACHE, NULL);
*statp = rpc_system_err;
return 1;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index d49d3060ed4f..cbddcf484dba 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -732,4 +732,7 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
cmpxchg(&clp->cl_state, NFSD4_COURTESY, NFSD4_EXPIRABLE);
return clp->cl_state == NFSD4_EXPIRABLE;
}
+
+extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
+ struct inode *inode);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 777e24e5da33..63797635e1c3 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -65,6 +65,8 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_printf(seq, " %lld",
percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
}
+ seq_printf(seq, "\nwdeleg_getattr %lld",
+ percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n');
#endif
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 9b43dc3d9991..cf5524e7ca06 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -22,6 +22,7 @@ enum {
NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
+ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
#endif
NFSD_STATS_COUNTERS_NUM
};
@@ -93,4 +94,10 @@ static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
}
+#ifdef CONFIG_NFSD_V4
+static inline void nfsd_stats_wdeleg_getattr_inc(void)
+{
+ percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
+}
+#endif
#endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 2af74983f146..803904348871 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -607,6 +607,7 @@ DEFINE_STATEID_EVENT(layout_recall_release);
DEFINE_STATEID_EVENT(open);
DEFINE_STATEID_EVENT(deleg_read);
+DEFINE_STATEID_EVENT(deleg_write);
DEFINE_STATEID_EVENT(deleg_return);
DEFINE_STATEID_EVENT(deleg_recall);
@@ -1240,8 +1241,8 @@ TRACE_EVENT(nfsd_drc_found,
TRACE_EVENT(nfsd_drc_mismatch,
TP_PROTO(
const struct nfsd_net *nn,
- const struct svc_cacherep *key,
- const struct svc_cacherep *rp
+ const struct nfsd_cacherep *key,
+ const struct nfsd_cacherep *rp
),
TP_ARGS(nn, key, rp),
TP_STRUCT__entry(
@@ -1261,6 +1262,28 @@ TRACE_EVENT(nfsd_drc_mismatch,
__entry->ingress)
);
+TRACE_EVENT_CONDITION(nfsd_drc_gc,
+ TP_PROTO(
+ const struct nfsd_net *nn,
+ unsigned long freed
+ ),
+ TP_ARGS(nn, freed),
+ TP_CONDITION(freed > 0),
+ TP_STRUCT__entry(
+ __field(unsigned long long, boot_time)
+ __field(unsigned long, freed)
+ __field(int, total)
+ ),
+ TP_fast_assign(
+ __entry->boot_time = nn->boot_time;
+ __entry->freed = freed;
+ __entry->total = atomic_read(&nn->num_drc_entries);
+ ),
+ TP_printk("boot_time=%16llx total=%d freed=%lu",
+ __entry->boot_time, __entry->total, __entry->freed
+ )
+);
+
TRACE_EVENT(nfsd_cb_args,
TP_PROTO(
const struct nfs4_client *clp,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 9b7acba382fe..48260cf68fde 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1540,7 +1540,9 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
dput(dchild);
if (err)
goto out_unlock;
- fh_fill_pre_attrs(fhp);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
fh_fill_post_attrs(fhp);
out_unlock:
@@ -1635,13 +1637,16 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
inode_unlock(dentry->d_inode);
goto out_drop_write;
}
- fh_fill_pre_attrs(fhp);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
err = nfserrno(host_err);
cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
if (!err)
nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(dentry->d_inode);
if (!err)
err = nfserrno(commit_metadata(fhp));
@@ -1703,7 +1708,9 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
err = nfserr_noent;
if (d_really_is_negative(dold))
goto out_dput;
- fh_fill_pre_attrs(ffhp);
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_dput;
host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
fh_fill_post_attrs(ffhp);
inode_unlock(dirp);
@@ -1789,8 +1796,12 @@ retry:
}
trap = lock_rename(tdentry, fdentry);
- fh_fill_pre_attrs(ffhp);
- fh_fill_pre_attrs(tfhp);
+ err = fh_fill_pre_attrs(ffhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ err = fh_fill_pre_attrs(tfhp);
+ if (err != nfs_ok)
+ goto out_unlock;
odentry = lookup_one_len(fname, fdentry, flen);
host_err = PTR_ERR(odentry);
@@ -1857,6 +1868,7 @@ retry:
fh_fill_post_attrs(ffhp);
fh_fill_post_attrs(tfhp);
}
+out_unlock:
unlock_rename(tdentry, fdentry);
fh_drop_write(ffhp);
@@ -1916,12 +1928,14 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
goto out_unlock;
}
rinode = d_inode(rdentry);
- ihold(rinode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ihold(rinode);
if (!type)
type = d_inode(rdentry)->i_mode & S_IFMT;
- fh_fill_pre_attrs(fhp);
if (type != S_IFDIR) {
int retries;
@@ -2341,16 +2355,18 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
return nfserrno(ret);
inode_lock(fhp->fh_dentry->d_inode);
- fh_fill_pre_attrs(fhp);
-
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
name, NULL);
-
+ err = nfsd_xattr_errno(ret);
fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(fhp->fh_dentry->d_inode);
fh_drop_write(fhp);
- return nfsd_xattr_errno(ret);
+ return err;
}
__be32
@@ -2368,15 +2384,17 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
if (ret)
return nfserrno(ret);
inode_lock(fhp->fh_dentry->d_inode);
- fh_fill_pre_attrs(fhp);
-
- ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry, name, buf,
- len, flags, NULL);
+ err = fh_fill_pre_attrs(fhp);
+ if (err != nfs_ok)
+ goto out_unlock;
+ ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
+ name, buf, len, flags, NULL);
fh_fill_post_attrs(fhp);
+ err = nfsd_xattr_errno(ret);
+out_unlock:
inode_unlock(fhp->fh_dentry->d_inode);
fh_drop_write(fhp);
-
- return nfsd_xattr_errno(ret);
+ return err;
}
#endif
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 510978e602da..9d918a79dc16 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -774,17 +774,6 @@ void warn_on_nonidempotent_op(struct nfsd4_op *op);
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
-static inline void
-set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
-{
- BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
-
- cinfo->before_change = fhp->fh_pre_change;
- cinfo->after_change = fhp->fh_post_change;
-}
-
-
bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index f42594a9efe0..0f016d69c996 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -204,6 +204,8 @@ extern unsigned long nlmsvc_timeout;
extern bool nsm_use_hostnames;
extern u32 nsm_local_state;
+extern struct timer_list nlmsvc_retry;
+
/*
* Lockd client functions
*/
@@ -280,7 +282,7 @@ __be32 nlmsvc_testlock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *,
struct nlm_lock *, struct nlm_cookie *);
__be32 nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-unsigned long nlmsvc_retry_blocked(void);
+void nlmsvc_retry_blocked(void);
void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t match);
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 518bd28f5ab8..35766963dd14 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -56,10 +56,14 @@ struct cache_head {
struct kref ref;
unsigned long flags;
};
-#define CACHE_VALID 0 /* Entry contains valid data */
-#define CACHE_NEGATIVE 1 /* Negative entry - there is no match for the key */
-#define CACHE_PENDING 2 /* An upcall has been sent but no reply received yet*/
-#define CACHE_CLEANED 3 /* Entry has been cleaned from cache */
+
+/* cache_head.flags */
+enum {
+ CACHE_VALID, /* Entry contains valid data */
+ CACHE_NEGATIVE, /* Negative entry - there is no match for the key */
+ CACHE_PENDING, /* An upcall has been sent but no reply received yet*/
+ CACHE_CLEANED, /* Entry has been cleaned from cache */
+};
#define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */
diff --git a/include/linux/sunrpc/stats.h b/include/linux/sunrpc/stats.h
index d94d4f410507..3ce1550d1beb 100644
--- a/include/linux/sunrpc/stats.h
+++ b/include/linux/sunrpc/stats.h
@@ -43,22 +43,6 @@ struct net;
#ifdef CONFIG_PROC_FS
int rpc_proc_init(struct net *);
void rpc_proc_exit(struct net *);
-#else
-static inline int rpc_proc_init(struct net *net)
-{
- return 0;
-}
-
-static inline void rpc_proc_exit(struct net *net)
-{
-}
-#endif
-
-#ifdef MODULE
-void rpc_modcount(struct inode *, int);
-#endif
-
-#ifdef CONFIG_PROC_FS
struct proc_dir_entry * rpc_proc_register(struct net *,struct rpc_stat *);
void rpc_proc_unregister(struct net *,const char *);
void rpc_proc_zero(const struct rpc_program *);
@@ -69,7 +53,14 @@ void svc_proc_unregister(struct net *, const char *);
void svc_seq_show(struct seq_file *,
const struct svc_stat *);
#else
+static inline int rpc_proc_init(struct net *net)
+{
+ return 0;
+}
+static inline void rpc_proc_exit(struct net *net)
+{
+}
static inline struct proc_dir_entry *rpc_proc_register(struct net *net, struct rpc_stat *s) { return NULL; }
static inline void rpc_proc_unregister(struct net *net, const char *p) {}
static inline void rpc_proc_zero(const struct rpc_program *p) {}
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f8751118c122..dbf5b21feafe 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -39,16 +39,20 @@ struct svc_pool {
struct list_head sp_all_threads; /* all server threads */
/* statistics on pool operation */
+ struct percpu_counter sp_messages_arrived;
struct percpu_counter sp_sockets_queued;
struct percpu_counter sp_threads_woken;
- struct percpu_counter sp_threads_timedout;
-#define SP_TASK_PENDING (0) /* still work to do even if no
- * xprt is queued. */
-#define SP_CONGESTED (1)
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;
+/* bits for sp_flags */
+enum {
+ SP_TASK_PENDING, /* still work to do even if no xprt is queued */
+ SP_CONGESTED, /* all threads are busy, none idle */
+};
+
+
/*
* RPC service.
*
@@ -120,19 +124,6 @@ static inline void svc_put(struct svc_serv *serv)
kref_put(&serv->sv_refcnt, svc_destroy);
}
-/**
- * svc_put_not_last - decrement non-final reference count on SUNRPC serv
- * @serv: the svc_serv to have count decremented
- *
- * Returns: %true is refcount was decremented.
- *
- * If the refcount is 1, it is not decremented and instead failure is reported.
- */
-static inline bool svc_put_not_last(struct svc_serv *serv)
-{
- return refcount_dec_not_one(&serv->sv_refcnt.refcount);
-}
-
/*
* Maximum payload size supported by a kernel RPC server.
* This is use to determine the max number of pages nfsd is
@@ -232,16 +223,6 @@ struct svc_rqst {
u32 rq_proc; /* procedure number */
u32 rq_prot; /* IP protocol */
int rq_cachetype; /* catering to nfsd */
-#define RQ_SECURE (0) /* secure port */
-#define RQ_LOCAL (1) /* local request */
-#define RQ_USEDEFERRAL (2) /* use deferral */
-#define RQ_DROPME (3) /* drop current reply */
-#define RQ_SPLICE_OK (4) /* turned off in gss privacy
- * to prevent encrypting page
- * cache pages */
-#define RQ_VICTIM (5) /* about to be shut down */
-#define RQ_BUSY (6) /* request is busy */
-#define RQ_DATA (7) /* request has data */
unsigned long rq_flags; /* flags field */
ktime_t rq_qtime; /* enqueue time */
@@ -265,7 +246,6 @@ struct svc_rqst {
/* Catering to nfsd */
struct auth_domain * rq_client; /* RPC peer info */
struct auth_domain * rq_gssclient; /* "gss/"-style peer info */
- struct svc_cacherep * rq_cacherep; /* cache info */
struct task_struct *rq_task; /* service thread */
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
@@ -273,6 +253,19 @@ struct svc_rqst {
void ** rq_lease_breaker; /* The v4 client breaking a lease */
};
+/* bits for rq_flags */
+enum {
+ RQ_SECURE, /* secure port */
+ RQ_LOCAL, /* local request */
+ RQ_USEDEFERRAL, /* use deferral */
+ RQ_DROPME, /* drop current reply */
+ RQ_SPLICE_OK, /* turned off in gss privacy to prevent
+ * encrypting page cache pages */
+ RQ_VICTIM, /* about to be shut down */
+ RQ_BUSY, /* request is busy */
+ RQ_DATA, /* request has data */
+};
+
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
/*
@@ -344,7 +337,7 @@ struct svc_program {
char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */
struct svc_stat * pg_stats; /* rpc statistics */
- int (*pg_authenticate)(struct svc_rqst *);
+ enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
__be32 (*pg_init_request)(struct svc_rqst *,
const struct svc_program *,
struct svc_process_info *);
@@ -427,6 +420,7 @@ int svc_register(const struct svc_serv *, struct net *, const int,
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
+void svc_pool_wake_idle_thread(struct svc_pool *pool);
struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
const char * svc_proc_name(const struct svc_rqst *rqstp);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index a6b12631db21..fa55d12dc765 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -56,23 +56,6 @@ struct svc_xprt {
struct list_head xpt_list;
struct list_head xpt_ready;
unsigned long xpt_flags;
-#define XPT_BUSY 0 /* enqueued/receiving */
-#define XPT_CONN 1 /* conn pending */
-#define XPT_CLOSE 2 /* dead or dying */
-#define XPT_DATA 3 /* data pending */
-#define XPT_TEMP 4 /* connected transport */
-#define XPT_DEAD 6 /* transport closed */
-#define XPT_CHNGBUF 7 /* need to change snd/rcv buf sizes */
-#define XPT_DEFERRED 8 /* deferred request pending */
-#define XPT_OLD 9 /* used for xprt aging mark+sweep */
-#define XPT_LISTENER 10 /* listening endpoint */
-#define XPT_CACHE_AUTH 11 /* cache auth info */
-#define XPT_LOCAL 12 /* connection from loopback interface */
-#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
-#define XPT_CONG_CTRL 14 /* has congestion control */
-#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
-#define XPT_TLS_SESSION 16 /* transport-layer security established */
-#define XPT_PEER_AUTH 17 /* peer has been authenticated */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
@@ -97,6 +80,27 @@ struct svc_xprt {
struct rpc_xprt_switch *xpt_bc_xps; /* NFSv4.1 backchannel */
};
+/* flag bits for xpt_flags */
+enum {
+ XPT_BUSY, /* enqueued/receiving */
+ XPT_CONN, /* conn pending */
+ XPT_CLOSE, /* dead or dying */
+ XPT_DATA, /* data pending */
+ XPT_TEMP, /* connected transport */
+ XPT_DEAD, /* transport closed */
+ XPT_CHNGBUF, /* need to change snd/rcv buf sizes */
+ XPT_DEFERRED, /* deferred request pending */
+ XPT_OLD, /* used for xprt aging mark+sweep */
+ XPT_LISTENER, /* listening endpoint */
+ XPT_CACHE_AUTH, /* cache auth info */
+ XPT_LOCAL, /* connection from loopback interface */
+ XPT_KILL_TEMP, /* call xpo_kill_temp_xprt before closing */
+ XPT_CONG_CTRL, /* has congestion control */
+ XPT_HANDSHAKE, /* xprt requests a handshake */
+ XPT_TLS_SESSION, /* transport-layer security established */
+ XPT_PEER_AUTH, /* peer has been authenticated */
+};
+
static inline void unregister_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u)
{
spin_lock(&xpt->xpt_lock);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 6d9cc9080aca..6f90203edbf8 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -83,6 +83,19 @@ struct auth_domain {
struct rcu_head rcu_head;
};
+enum svc_auth_status {
+ SVC_GARBAGE = 1,
+ SVC_SYSERR,
+ SVC_VALID,
+ SVC_NEGATIVE,
+ SVC_OK,
+ SVC_DROP,
+ SVC_CLOSE,
+ SVC_DENIED,
+ SVC_PENDING,
+ SVC_COMPLETE,
+};
+
/*
* Each authentication flavour registers an auth_ops
* structure.
@@ -98,6 +111,8 @@ struct auth_domain {
* is (probably) already in place. Certainly space is
* reserved for it.
* DROP - simply drop the request. It may have been deferred
+ * CLOSE - like SVC_DROP, but request is definitely lost.
+ * If there is a tcp connection, it should be closed.
* GARBAGE - rpc garbage_args error
* SYSERR - rpc system_err error
* DENIED - authp holds reason for denial.
@@ -111,14 +126,10 @@ struct auth_domain {
*
* release() is given a request after the procedure has been run.
* It should sign/encrypt the results if needed
- * It should return:
- * OK - the resbuf is ready to be sent
- * DROP - the reply should be quitely dropped
- * DENIED - authp holds a reason for MSG_DENIED
- * SYSERR - rpc system_err
*
* domain_release()
* This call releases a domain.
+ *
* set_client()
* Givens a pending request (struct svc_rqst), finds and assigns
* an appropriate 'auth_domain' as the client.
@@ -127,44 +138,28 @@ struct auth_ops {
char * name;
struct module *owner;
int flavour;
- int (*accept)(struct svc_rqst *rq);
- int (*release)(struct svc_rqst *rq);
- void (*domain_release)(struct auth_domain *);
- int (*set_client)(struct svc_rqst *rq);
-};
-#define SVC_GARBAGE 1
-#define SVC_SYSERR 2
-#define SVC_VALID 3
-#define SVC_NEGATIVE 4
-#define SVC_OK 5
-#define SVC_DROP 6
-#define SVC_CLOSE 7 /* Like SVC_DROP, but request is definitely
- * lost so if there is a tcp connection, it
- * should be closed
- */
-#define SVC_DENIED 8
-#define SVC_PENDING 9
-#define SVC_COMPLETE 10
+ enum svc_auth_status (*accept)(struct svc_rqst *rqstp);
+ int (*release)(struct svc_rqst *rqstp);
+ void (*domain_release)(struct auth_domain *dom);
+ enum svc_auth_status (*set_client)(struct svc_rqst *rqstp);
+};
struct svc_xprt;
-extern int svc_authenticate(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp);
extern int svc_authorise(struct svc_rqst *rqstp);
-extern int svc_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp);
extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
extern void svc_auth_unregister(rpc_authflavor_t flavor);
extern struct auth_domain *unix_domain_find(char *name);
extern void auth_domain_put(struct auth_domain *item);
-extern int auth_unix_add_addr(struct net *net, struct in6_addr *addr, struct auth_domain *dom);
extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
extern struct auth_domain *auth_domain_find(char *name);
-extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
-extern int auth_unix_forget_old(struct auth_domain *dom);
extern void svcauth_unix_purge(struct net *net);
extern void svcauth_unix_info_release(struct svc_xprt *xpt);
-extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
+extern enum svc_auth_status svcauth_unix_set_client(struct svc_rqst *rqstp);
extern int unix_gid_cache_create(struct net *net);
extern void unix_gid_cache_destroy(struct net *net);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index a7116048a4d4..7c78ec6356b9 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -35,8 +35,8 @@ struct svc_sock {
/* Total length of the data (not including fragment headers)
* received so far in the fragments making up this rpc: */
u32 sk_datalen;
- /* Number of queued send requests */
- atomic_t sk_sendqlen;
+
+ struct page_frag_cache sk_frag_cache;
struct completion sk_handshake_done;
@@ -56,8 +56,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
/*
* Function prototypes.
*/
-void svc_close_net(struct svc_serv *, struct net *);
-int svc_recv(struct svc_rqst *, long);
+void svc_recv(struct svc_rqst *rqstp);
void svc_send(struct svc_rqst *rqstp);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
@@ -66,8 +65,6 @@ int svc_addsock(struct svc_serv *serv, struct net *net,
const struct cred *cred);
void svc_init_xprt_sock(void);
void svc_cleanup_xprt_sock(void);
-struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot);
-void svc_sock_destroy(struct svc_xprt *);
/*
* svc_makesock socket characteristics
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index f89ec4b5ea16..42f9d7eb9a1a 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -139,6 +139,8 @@ void xdr_terminate_string(const struct xdr_buf *, const u32);
size_t xdr_buf_pagecount(const struct xdr_buf *buf);
int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
void xdr_free_bvec(struct xdr_buf *buf);
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr);
static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
{
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 43711753616a..6beb38c1dcb5 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1706,7 +1706,7 @@ TRACE_DEFINE_ENUM(SVC_DENIED);
TRACE_DEFINE_ENUM(SVC_PENDING);
TRACE_DEFINE_ENUM(SVC_COMPLETE);
-#define svc_show_status(status) \
+#define show_svc_auth_status(status) \
__print_symbolic(status, \
{ SVC_GARBAGE, "SVC_GARBAGE" }, \
{ SVC_SYSERR, "SVC_SYSERR" }, \
@@ -1743,7 +1743,10 @@ TRACE_DEFINE_ENUM(SVC_COMPLETE);
__entry->xid, __get_sockaddr(server), __get_sockaddr(client)
TRACE_EVENT_CONDITION(svc_authenticate,
- TP_PROTO(const struct svc_rqst *rqst, int auth_res),
+ TP_PROTO(
+ const struct svc_rqst *rqst,
+ enum svc_auth_status auth_res
+ ),
TP_ARGS(rqst, auth_res),
@@ -1766,7 +1769,7 @@ TRACE_EVENT_CONDITION(svc_authenticate,
TP_printk(SVC_RQST_ENDPOINT_FORMAT
" auth_res=%s auth_stat=%s",
SVC_RQST_ENDPOINT_VARARGS,
- svc_show_status(__entry->svc_status),
+ show_svc_auth_status(__entry->svc_status),
rpc_show_auth_stat(__entry->auth_stat))
);
@@ -1918,25 +1921,42 @@ TRACE_EVENT(svc_stats_latency,
__get_str(procedure), __entry->execute)
);
+/*
+ * from include/linux/sunrpc/svc_xprt.h
+ */
+#define SVC_XPRT_FLAG_LIST \
+ svc_xprt_flag(BUSY) \
+ svc_xprt_flag(CONN) \
+ svc_xprt_flag(CLOSE) \
+ svc_xprt_flag(DATA) \
+ svc_xprt_flag(TEMP) \
+ svc_xprt_flag(DEAD) \
+ svc_xprt_flag(CHNGBUF) \
+ svc_xprt_flag(DEFERRED) \
+ svc_xprt_flag(OLD) \
+ svc_xprt_flag(LISTENER) \
+ svc_xprt_flag(CACHE_AUTH) \
+ svc_xprt_flag(LOCAL) \
+ svc_xprt_flag(KILL_TEMP) \
+ svc_xprt_flag(CONG_CTRL) \
+ svc_xprt_flag(HANDSHAKE) \
+ svc_xprt_flag(TLS_SESSION) \
+ svc_xprt_flag_end(PEER_AUTH)
+
+#undef svc_xprt_flag
+#undef svc_xprt_flag_end
+#define svc_xprt_flag(x) TRACE_DEFINE_ENUM(XPT_##x);
+#define svc_xprt_flag_end(x) TRACE_DEFINE_ENUM(XPT_##x);
+
+SVC_XPRT_FLAG_LIST
+
+#undef svc_xprt_flag
+#undef svc_xprt_flag_end
+#define svc_xprt_flag(x) { BIT(XPT_##x), #x },
+#define svc_xprt_flag_end(x) { BIT(XPT_##x), #x }
+
#define show_svc_xprt_flags(flags) \
- __print_flags(flags, "|", \
- { BIT(XPT_BUSY), "BUSY" }, \
- { BIT(XPT_CONN), "CONN" }, \
- { BIT(XPT_CLOSE), "CLOSE" }, \
- { BIT(XPT_DATA), "DATA" }, \
- { BIT(XPT_TEMP), "TEMP" }, \
- { BIT(XPT_DEAD), "DEAD" }, \
- { BIT(XPT_CHNGBUF), "CHNGBUF" }, \
- { BIT(XPT_DEFERRED), "DEFERRED" }, \
- { BIT(XPT_OLD), "OLD" }, \
- { BIT(XPT_LISTENER), "LISTENER" }, \
- { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
- { BIT(XPT_LOCAL), "LOCAL" }, \
- { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
- { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
- { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
- { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
- { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
+ __print_flags(flags, "|", SVC_XPRT_FLAG_LIST)
TRACE_EVENT(svc_xprt_create_err,
TP_PROTO(
@@ -1994,25 +2014,25 @@ TRACE_EVENT(svc_xprt_create_err,
TRACE_EVENT(svc_xprt_enqueue,
TP_PROTO(
const struct svc_xprt *xprt,
- const struct svc_rqst *rqst
+ unsigned long flags
),
- TP_ARGS(xprt, rqst),
+ TP_ARGS(xprt, flags),
TP_STRUCT__entry(
SVC_XPRT_ENDPOINT_FIELDS(xprt)
-
- __field(int, pid)
),
TP_fast_assign(
- SVC_XPRT_ENDPOINT_ASSIGNMENTS(xprt);
-
- __entry->pid = rqst? rqst->rq_task->pid : 0;
+ __assign_sockaddr(server, &xprt->xpt_local,
+ xprt->xpt_locallen);
+ __assign_sockaddr(client, &xprt->xpt_remote,
+ xprt->xpt_remotelen);
+ __entry->flags = flags;
+ __entry->netns_ino = xprt->xpt_net->ns.inum;
),
- TP_printk(SVC_XPRT_ENDPOINT_FORMAT " pid=%d",
- SVC_XPRT_ENDPOINT_VARARGS, __entry->pid)
+ TP_printk(SVC_XPRT_ENDPOINT_FORMAT, SVC_XPRT_ENDPOINT_VARARGS)
);
TRACE_EVENT(svc_xprt_dequeue,
diff --git a/net/sunrpc/.kunitconfig b/net/sunrpc/.kunitconfig
index a55a00fa649b..eb02b906c295 100644
--- a/net/sunrpc/.kunitconfig
+++ b/net/sunrpc/.kunitconfig
@@ -23,7 +23,6 @@ CONFIG_NFS_FS=y
CONFIG_SUNRPC=y
CONFIG_SUNRPC_GSS=y
CONFIG_RPCSEC_GSS_KRB5=y
-CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y
CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 4afc5fd71d44..2d8b67dac7b5 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -34,38 +34,6 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
-config RPCSEC_GSS_KRB5_SIMPLIFIED
- bool
- depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_CRYPTOSYSTEM
- bool
- depends on RPCSEC_GSS_KRB5
-
-config RPCSEC_GSS_KRB5_ENCTYPES_DES
- bool "Enable Kerberos enctypes based on DES (deprecated)"
- depends on RPCSEC_GSS_KRB5
- depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_ECB
- depends on CRYPTO_HMAC && CRYPTO_MD5 && CRYPTO_SHA1
- depends on CRYPTO_DES
- default n
- select RPCSEC_GSS_KRB5_SIMPLIFIED
- help
- Choose Y to enable the use of deprecated Kerberos 5
- encryption types that utilize Data Encryption Standard
- (DES) based ciphers. These include des-cbc-md5,
- des-cbc-crc, and des-cbc-md4, which were deprecated by
- RFC 6649, and des3-cbc-sha1, which was deprecated by RFC
- 8429.
-
- These encryption types are known to be insecure, therefore
- the default setting of this option is N. Support for these
- encryption types is available only for compatibility with
- legacy NFS client and server implementations.
-
- Removal of support is planned for a subsequent kernel
- release.
-
config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
bool "Enable Kerberos enctypes based on AES and SHA-1"
depends on RPCSEC_GSS_KRB5
@@ -73,7 +41,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
depends on CRYPTO_HMAC && CRYPTO_SHA1
depends on CRYPTO_AES
default y
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Advanced Encryption Standard (AES) ciphers and
@@ -86,7 +53,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA
depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_CAMELLIA
depends on CRYPTO_CMAC
default n
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Camellia ciphers (RFC 3713) and CMAC digests
@@ -100,7 +66,6 @@ config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2
depends on CRYPTO_HMAC && CRYPTO_SHA256 && CRYPTO_SHA512
depends on CRYPTO_AES
default n
- select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
help
Choose Y to enable the use of Kerberos 5 encryption types
that utilize Advanced Encryption Standard (AES) ciphers and
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 012ae1720689..ad1736d93b76 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -12,6 +12,6 @@ auth_rpcgss-y := auth_gss.o gss_generic_token.o \
obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
- gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+ gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
obj-$(CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST) += gss_krb5_test.o
diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h
index b673e2626acb..3afd4065bf3d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_internal.h
+++ b/net/sunrpc/auth_gss/gss_krb5_internal.h
@@ -33,7 +33,6 @@ struct gss_krb5_enctype {
const u32 Ke_length; /* encryption subkey length, in octets */
const u32 Ki_length; /* integrity subkey length, in octets */
- int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask);
int (*derive_key)(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *in,
struct xdr_netobj *out,
@@ -85,24 +84,15 @@ struct krb5_ctx {
* GSS Kerberos 5 mechanism Per-Message calls.
*/
-u32 gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token);
u32 gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token);
-u32 gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token);
u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
struct xdr_netobj *read_token);
-u32 gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages);
u32 gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
struct xdr_buf *buf, struct page **pages);
-u32 gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align);
u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
struct xdr_buf *buf, unsigned int *slack,
unsigned int *align);
@@ -113,12 +103,6 @@ u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
/* Key Derivation Functions */
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *label,
- gfp_t gfp_mask);
-
int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *inkey,
struct xdr_netobj *outkey,
@@ -169,13 +153,6 @@ static inline int krb5_derive_key(struct krb5_ctx *kctx,
return gk5e->derive_key(gk5e, inkey, outkey, &label, gfp_mask);
}
-s32 krb5_make_seq_num(struct krb5_ctx *kctx, struct crypto_sync_skcipher *key,
- int direction, u32 seqnum, unsigned char *cksum,
- unsigned char *buf);
-
-s32 krb5_get_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
- unsigned char *buf, int *direction, u32 *seqnum);
-
void krb5_make_confounder(u8 *p, int conflen);
u32 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 5347fe1cc93f..06d8ee0db000 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -222,90 +222,6 @@ err_return:
return ret;
}
-#define smask(step) ((1<<step)-1)
-#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
-#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
-
-static void mit_des_fixup_key_parity(u8 key[8])
-{
- int i;
- for (i = 0; i < 8; i++) {
- key[i] &= 0xfe;
- key[i] |= 1^parity_char(key[i]);
- }
-}
-
-static int krb5_random_to_key_v1(const struct gss_krb5_enctype *gk5e,
- struct xdr_netobj *randombits,
- struct xdr_netobj *key)
-{
- int i, ret = -EINVAL;
-
- if (key->len != 24) {
- dprintk("%s: key->len is %d\n", __func__, key->len);
- goto err_out;
- }
- if (randombits->len != 21) {
- dprintk("%s: randombits->len is %d\n",
- __func__, randombits->len);
- goto err_out;
- }
-
- /* take the seven bytes, move them around into the top 7 bits of the
- 8 key bytes, then compute the parity bits. Do this three times. */
-
- for (i = 0; i < 3; i++) {
- memcpy(key->data + i*8, randombits->data + i*7, 7);
- key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
- ((key->data[i*8+1]&1)<<2) |
- ((key->data[i*8+2]&1)<<3) |
- ((key->data[i*8+3]&1)<<4) |
- ((key->data[i*8+4]&1)<<5) |
- ((key->data[i*8+5]&1)<<6) |
- ((key->data[i*8+6]&1)<<7));
-
- mit_des_fixup_key_parity(key->data + i*8);
- }
- ret = 0;
-err_out:
- return ret;
-}
-
-/**
- * krb5_derive_key_v1 - Derive a subkey for an RFC 3961 enctype
- * @gk5e: Kerberos 5 enctype profile
- * @inkey: base protocol key
- * @outkey: OUT: derived key
- * @label: subkey usage label
- * @gfp_mask: memory allocation control flags
- *
- * Caller sets @outkey->len to the desired length of the derived key.
- *
- * On success, returns 0 and fills in @outkey. A negative errno value
- * is returned on failure.
- */
-int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *label,
- gfp_t gfp_mask)
-{
- struct xdr_netobj inblock;
- int ret;
-
- inblock.len = gk5e->keybytes;
- inblock.data = kmalloc(inblock.len, gfp_mask);
- if (!inblock.data)
- return -ENOMEM;
-
- ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask);
- if (!ret)
- ret = krb5_random_to_key_v1(gk5e, &inblock, outkey);
-
- kfree_sensitive(inblock.data);
- return ret;
-}
-
/*
* This is the identity function, with some sanity checking.
*/
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 20e21d08badb..e31cfdf7eadc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -30,61 +30,7 @@
static struct gss_api_mech gss_kerberos_mech;
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask);
-static int gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-static int gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask);
-#endif
-
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
- /*
- * DES (All DES enctypes are mapped to the same gss functionality)
- */
- {
- .etype = ENCTYPE_DES_CBC_RAW,
- .ctype = CKSUMTYPE_RSA_MD5,
- .name = "des-cbc-crc",
- .encrypt_name = "cbc(des)",
- .cksum_name = "md5",
- .import_ctx = gss_krb5_import_ctx_des,
- .get_mic = gss_krb5_get_mic_v1,
- .verify_mic = gss_krb5_verify_mic_v1,
- .wrap = gss_krb5_wrap_v1,
- .unwrap = gss_krb5_unwrap_v1,
- .signalg = SGN_ALG_DES_MAC_MD5,
- .sealalg = SEAL_ALG_DES,
- .keybytes = 7,
- .keylength = 8,
- .cksumlength = 8,
- .keyed_cksum = 0,
- },
- /*
- * 3DES
- */
- {
- .etype = ENCTYPE_DES3_CBC_RAW,
- .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
- .name = "des3-hmac-sha1",
- .encrypt_name = "cbc(des3_ede)",
- .cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v1,
- .derive_key = krb5_derive_key_v1,
- .get_mic = gss_krb5_get_mic_v1,
- .verify_mic = gss_krb5_verify_mic_v1,
- .wrap = gss_krb5_wrap_v1,
- .unwrap = gss_krb5_unwrap_v1,
- .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
- .sealalg = SEAL_ALG_DES3KD,
- .keybytes = 21,
- .keylength = 24,
- .cksumlength = 20,
- .keyed_cksum = 1,
- },
-#endif
-
#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
/*
* AES-128 with SHA-1 (RFC 3962)
@@ -96,7 +42,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.encrypt_name = "cts(cbc(aes))",
.aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_derive_key_v2,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -126,7 +71,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.encrypt_name = "cts(cbc(aes))",
.aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_derive_key_v2,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -166,7 +110,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(128),
.Ki_length = BITS2OCTETS(128),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_feedback_cmac,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -193,7 +136,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(256),
.Ki_length = BITS2OCTETS(256),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_feedback_cmac,
.encrypt = gss_krb5_aes_encrypt,
.decrypt = gss_krb5_aes_decrypt,
@@ -223,7 +165,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(128),
.Ki_length = BITS2OCTETS(128),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_hmac_sha2,
.encrypt = krb5_etm_encrypt,
.decrypt = krb5_etm_decrypt,
@@ -250,7 +191,6 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.Ke_length = BITS2OCTETS(256),
.Ki_length = BITS2OCTETS(192),
- .import_ctx = gss_krb5_import_ctx_v2,
.derive_key = krb5_kdf_hmac_sha2,
.encrypt = krb5_etm_encrypt,
.decrypt = krb5_etm_decrypt,
@@ -284,12 +224,6 @@ static void gss_krb5_prepare_enctype_priority_list(void)
ENCTYPE_AES256_CTS_HMAC_SHA1_96,
ENCTYPE_AES128_CTS_HMAC_SHA1_96,
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
- ENCTYPE_DES3_CBC_SHA1,
- ENCTYPE_DES_CBC_MD5,
- ENCTYPE_DES_CBC_CRC,
- ENCTYPE_DES_CBC_MD4,
-#endif
};
size_t total, i;
char buf[16];
@@ -330,185 +264,6 @@ const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype)
EXPORT_SYMBOL_IF_KUNIT(gss_krb5_lookup_enctype);
static struct crypto_sync_skcipher *
-gss_krb5_alloc_cipher_v1(struct krb5_ctx *ctx, struct xdr_netobj *key)
-{
- struct crypto_sync_skcipher *tfm;
-
- tfm = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
- if (IS_ERR(tfm))
- return NULL;
- if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) {
- crypto_free_sync_skcipher(tfm);
- return NULL;
- }
- return tfm;
-}
-
-static inline const void *
-get_key(const void *p, const void *end,
- struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
-{
- struct crypto_sync_skcipher *tfm;
- struct xdr_netobj key;
- int alg;
-
- p = simple_get_bytes(p, end, &alg, sizeof(alg));
- if (IS_ERR(p))
- goto out_err;
- switch (alg) {
- case ENCTYPE_DES_CBC_CRC:
- case ENCTYPE_DES_CBC_MD4:
- case ENCTYPE_DES_CBC_MD5:
- /* Map all these key types to ENCTYPE_DES_CBC_RAW */
- alg = ENCTYPE_DES_CBC_RAW;
- break;
- }
- if (!gss_krb5_lookup_enctype(alg)) {
- pr_warn("gss_krb5: unsupported enctype: %d\n", alg);
- goto out_err_inval;
- }
-
- p = simple_get_netobj(p, end, &key);
- if (IS_ERR(p))
- goto out_err;
- tfm = gss_krb5_alloc_cipher_v1(ctx, &key);
- kfree(key.data);
- if (!tfm) {
- pr_warn("gss_krb5: failed to initialize cipher '%s'\n",
- ctx->gk5e->encrypt_name);
- goto out_err_inval;
- }
- *res = tfm;
-
- return p;
-
-out_err_inval:
- p = ERR_PTR(-EINVAL);
-out_err:
- return p;
-}
-
-static int
-gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
-{
- u32 seq_send;
- int tmp;
- u32 time32;
-
- p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
- if (IS_ERR(p))
- goto out_err;
-
- /* Old format supports only DES! Any other enctype uses new format */
- ctx->enctype = ENCTYPE_DES_CBC_RAW;
-
- ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
- if (ctx->gk5e == NULL) {
- p = ERR_PTR(-EINVAL);
- goto out_err;
- }
-
- /* The downcall format was designed before we completely understood
- * the uses of the context fields; so it includes some stuff we
- * just give some minimal sanity-checking, and some we ignore
- * completely (like the next twenty bytes): */
- if (unlikely(p + 20 > end || p + 20 < p)) {
- p = ERR_PTR(-EFAULT);
- goto out_err;
- }
- p += 20;
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SGN_ALG_DES_MAC_MD5) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
- if (IS_ERR(p))
- goto out_err;
- if (tmp != SEAL_ALG_DES) {
- p = ERR_PTR(-ENOSYS);
- goto out_err;
- }
- p = simple_get_bytes(p, end, &time32, sizeof(time32));
- if (IS_ERR(p))
- goto out_err;
- /* unsigned 32-bit time overflows in year 2106 */
- ctx->endtime = (time64_t)time32;
- p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
- if (IS_ERR(p))
- goto out_err;
- atomic_set(&ctx->seq_send, seq_send);
- p = simple_get_netobj(p, end, &ctx->mech_used);
- if (IS_ERR(p))
- goto out_err;
- p = get_key(p, end, ctx, &ctx->enc);
- if (IS_ERR(p))
- goto out_err_free_mech;
- p = get_key(p, end, ctx, &ctx->seq);
- if (IS_ERR(p))
- goto out_err_free_key1;
- if (p != end) {
- p = ERR_PTR(-EFAULT);
- goto out_err_free_key2;
- }
-
- return 0;
-
-out_err_free_key2:
- crypto_free_sync_skcipher(ctx->seq);
-out_err_free_key1:
- crypto_free_sync_skcipher(ctx->enc);
-out_err_free_mech:
- kfree(ctx->mech_used.data);
-out_err:
- return PTR_ERR(p);
-}
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-static int
-gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
- return -EINVAL;
-}
-
-static int
-gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask)
-{
- struct xdr_netobj keyin, keyout;
-
- keyin.data = ctx->Ksess;
- keyin.len = ctx->gk5e->keylength;
-
- ctx->seq = gss_krb5_alloc_cipher_v1(ctx, &keyin);
- if (ctx->seq == NULL)
- goto out_err;
- ctx->enc = gss_krb5_alloc_cipher_v1(ctx, &keyin);
- if (ctx->enc == NULL)
- goto out_free_seq;
-
- /* derive cksum */
- keyout.data = ctx->cksum;
- keyout.len = ctx->gk5e->keylength;
- if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_SIGN,
- KEY_USAGE_SEED_CHECKSUM, gfp_mask))
- goto out_free_enc;
-
- return 0;
-
-out_free_enc:
- crypto_free_sync_skcipher(ctx->enc);
-out_free_seq:
- crypto_free_sync_skcipher(ctx->seq);
-out_err:
- return -EINVAL;
-}
-#endif
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
-
-static struct crypto_sync_skcipher *
gss_krb5_alloc_cipher_v2(const char *cname, const struct xdr_netobj *key)
{
struct crypto_sync_skcipher *tfm;
@@ -636,8 +391,6 @@ out_free:
goto out;
}
-#endif
-
static int
gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
gfp_t gfp_mask)
@@ -671,9 +424,6 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
if (IS_ERR(p))
goto out_err;
- /* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
- if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
- ctx->enctype = ENCTYPE_DES3_CBC_RAW;
ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
@@ -700,7 +450,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
- return ctx->gk5e->import_ctx(ctx, gfp_mask);
+ return gss_krb5_import_ctx_v2(ctx, gfp_mask);
out_err:
return PTR_ERR(p);
@@ -718,10 +468,7 @@ gss_krb5_import_sec_context(const void *p, size_t len, struct gss_ctx *ctx_id,
if (ctx == NULL)
return -ENOMEM;
- if (len == 85)
- ret = gss_import_v1_context(p, end, ctx);
- else
- ret = gss_import_v2_context(p, end, ctx, gfp_mask);
+ ret = gss_import_v2_context(p, end, ctx, gfp_mask);
memzero_explicit(&ctx->Ksess, sizeof(ctx->Ksess));
if (ret) {
kfree(ctx);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 146aa755f07d..ce540df9bce4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -71,75 +71,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static void *
-setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
-{
- u16 *ptr;
- void *krb5_hdr;
- int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
-
- token->len = g_token_size(&ctx->mech_used, body_size);
-
- ptr = (u16 *)token->data;
- g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
-
- /* ptr now at start of header described in rfc 1964, section 1.2.1: */
- krb5_hdr = ptr;
- *ptr++ = KG_TOK_MIC_MSG;
- /*
- * signalg is stored as if it were converted from LE to host endian, even
- * though it's an opaque pair of bytes according to the RFC.
- */
- *ptr++ = (__force u16)cpu_to_le16(ctx->gk5e->signalg);
- *ptr++ = SEAL_ALG_NONE;
- *ptr = 0xffff;
-
- return krb5_hdr;
-}
-
-u32
-gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- void *ptr;
- time64_t now;
- u32 seq_send;
- u8 *cksumkey;
-
- dprintk("RPC: %s\n", __func__);
- BUG_ON(ctx == NULL);
-
- now = ktime_get_real_seconds();
-
- ptr = setup_token(ctx, token);
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
- KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&ctx->seq_send);
-
- if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
- return GSS_S_FAILURE;
-
- return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-#endif
-
static void *
setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
deleted file mode 100644
index 1babc3474e10..000000000000
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * linux/net/sunrpc/gss_krb5_seqnum.c
- *
- * Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/util_seqnum.c
- *
- * Copyright (c) 2000 The Regents of the University of Michigan.
- * All rights reserved.
- *
- * Andy Adamson <andros@umich.edu>
- */
-
-/*
- * Copyright 1993 by OpenVision Technologies, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appears in all copies and
- * that both that copyright notice and this permission notice appear in
- * supporting documentation, and that the name of OpenVision not be used
- * in advertising or publicity pertaining to distribution of the software
- * without specific, written prior permission. OpenVision makes no
- * representations about the suitability of this software for any
- * purpose. It is provided "as is" without express or implied warranty.
- *
- * OPENVISION DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
- * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
- * EVENT SHALL OPENVISION BE LIABLE FOR ANY SPECIAL, INDIRECT OR
- * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
- * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
- * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
- * PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <crypto/skcipher.h>
-#include <linux/types.h>
-#include <linux/sunrpc/gss_krb5.h>
-
-#include "gss_krb5_internal.h"
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
-s32
-krb5_make_seq_num(struct krb5_ctx *kctx,
- struct crypto_sync_skcipher *key,
- int direction,
- u32 seqnum,
- unsigned char *cksum, unsigned char *buf)
-{
- unsigned char *plain;
- s32 code;
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- plain[0] = (unsigned char) (seqnum & 0xff);
- plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
- plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
- plain[3] = (unsigned char) ((seqnum >> 24) & 0xff);
-
- plain[4] = direction;
- plain[5] = direction;
- plain[6] = direction;
- plain[7] = direction;
-
- code = krb5_encrypt(key, cksum, plain, buf, 8);
- kfree(plain);
- return code;
-}
-
-s32
-krb5_get_seq_num(struct krb5_ctx *kctx,
- unsigned char *cksum,
- unsigned char *buf,
- int *direction, u32 *seqnum)
-{
- s32 code;
- unsigned char *plain;
- struct crypto_sync_skcipher *key = kctx->seq;
-
- dprintk("RPC: krb5_get_seq_num:\n");
-
- plain = kmalloc(8, GFP_KERNEL);
- if (!plain)
- return -ENOMEM;
-
- if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
- goto out;
-
- if ((plain[4] != plain[5]) || (plain[4] != plain[6]) ||
- (plain[4] != plain[7])) {
- code = (s32)KG_BAD_SEQ;
- goto out;
- }
-
- *direction = plain[4];
-
- *seqnum = ((plain[0]) |
- (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
-
-out:
- kfree(plain);
- return code;
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
index 95ca783795c5..85625e3f3814 100644
--- a/net/sunrpc/auth_gss/gss_krb5_test.c
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -320,208 +320,12 @@ static void rfc3961_nfold_case(struct kunit *test)
"result mismatch");
}
-/*
- * RFC 3961 Appendix A.3. DES3 DR and DK
- *
- * These tests show the derived-random and derived-key values for the
- * des3-hmac-sha1-kd encryption scheme, using the DR and DK functions
- * defined in section 6.3.1. The input keys were randomly generated;
- * the usage values are from this specification.
- *
- * This test material is copyright (C) The Internet Society (2005).
- */
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_155,
- 0x00, 0x00, 0x00, 0x01, 0x55
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_1aa,
- 0x00, 0x00, 0x00, 0x01, 0xaa
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_kerberos,
- 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_base_key,
- 0xdc, 0xe0, 0x6b, 0x1f, 0x64, 0xc8, 0x57, 0xa1,
- 0x1c, 0x3d, 0xb5, 0x7c, 0x51, 0x89, 0x9b, 0x2c,
- 0xc1, 0x79, 0x10, 0x08, 0xce, 0x97, 0x3b, 0x92
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_derived_key,
- 0x92, 0x51, 0x79, 0xd0, 0x45, 0x91, 0xa7, 0x9b,
- 0x5d, 0x31, 0x92, 0xc4, 0xa7, 0xe9, 0xc2, 0x89,
- 0xb0, 0x49, 0xc7, 0x1f, 0x6e, 0xe6, 0x04, 0xcd
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_base_key,
- 0x5e, 0x13, 0xd3, 0x1c, 0x70, 0xef, 0x76, 0x57,
- 0x46, 0x57, 0x85, 0x31, 0xcb, 0x51, 0xc1, 0x5b,
- 0xf1, 0x1c, 0xa8, 0x2c, 0x97, 0xce, 0xe9, 0xf2
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_derived_key,
- 0x9e, 0x58, 0xe5, 0xa1, 0x46, 0xd9, 0x94, 0x2a,
- 0x10, 0x1c, 0x46, 0x98, 0x45, 0xd6, 0x7a, 0x20,
- 0xe3, 0xc4, 0x25, 0x9e, 0xd9, 0x13, 0xf2, 0x07
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_base_key,
- 0x98, 0xe6, 0xfd, 0x8a, 0x04, 0xa4, 0xb6, 0x85,
- 0x9b, 0x75, 0xa1, 0x76, 0x54, 0x0b, 0x97, 0x52,
- 0xba, 0xd3, 0xec, 0xd6, 0x10, 0xa2, 0x52, 0xbc
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_derived_key,
- 0x13, 0xfe, 0xf8, 0x0d, 0x76, 0x3e, 0x94, 0xec,
- 0x6d, 0x13, 0xfd, 0x2c, 0xa1, 0xd0, 0x85, 0x07,
- 0x02, 0x49, 0xda, 0xd3, 0x98, 0x08, 0xea, 0xbf
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_base_key,
- 0x62, 0x2a, 0xec, 0x25, 0xa2, 0xfe, 0x2c, 0xad,
- 0x70, 0x94, 0x68, 0x0b, 0x7c, 0x64, 0x94, 0x02,
- 0x80, 0x08, 0x4c, 0x1a, 0x7c, 0xec, 0x92, 0xb5
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_derived_key,
- 0xf8, 0xdf, 0xbf, 0x04, 0xb0, 0x97, 0xe6, 0xd9,
- 0xdc, 0x07, 0x02, 0x68, 0x6b, 0xcb, 0x34, 0x89,
- 0xd9, 0x1f, 0xd9, 0xa4, 0x51, 0x6b, 0x70, 0x3e
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_base_key,
- 0xd3, 0xf8, 0x29, 0x8c, 0xcb, 0x16, 0x64, 0x38,
- 0xdc, 0xb9, 0xb9, 0x3e, 0xe5, 0xa7, 0x62, 0x92,
- 0x86, 0xa4, 0x91, 0xf8, 0x38, 0xf8, 0x02, 0xfb
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_derived_key,
- 0x23, 0x70, 0xda, 0x57, 0x5d, 0x2a, 0x3d, 0xa8,
- 0x64, 0xce, 0xbf, 0xdc, 0x52, 0x04, 0xd5, 0x6d,
- 0xf7, 0x79, 0xa7, 0xdf, 0x43, 0xd9, 0xda, 0x43
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_base_key,
- 0xc1, 0x08, 0x16, 0x49, 0xad, 0xa7, 0x43, 0x62,
- 0xe6, 0xa1, 0x45, 0x9d, 0x01, 0xdf, 0xd3, 0x0d,
- 0x67, 0xc2, 0x23, 0x4c, 0x94, 0x07, 0x04, 0xda
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_derived_key,
- 0x34, 0x80, 0x57, 0xec, 0x98, 0xfd, 0xc4, 0x80,
- 0x16, 0x16, 0x1c, 0x2a, 0x4c, 0x7a, 0x94, 0x3e,
- 0x92, 0xae, 0x49, 0x2c, 0x98, 0x91, 0x75, 0xf7
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_base_key,
- 0x5d, 0x15, 0x4a, 0xf2, 0x38, 0xf4, 0x67, 0x13,
- 0x15, 0x57, 0x19, 0xd5, 0x5e, 0x2f, 0x1f, 0x79,
- 0x0d, 0xd6, 0x61, 0xf2, 0x79, 0xa7, 0x91, 0x7c
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_derived_key,
- 0xa8, 0x80, 0x8a, 0xc2, 0x67, 0xda, 0xda, 0x3d,
- 0xcb, 0xe9, 0xa7, 0xc8, 0x46, 0x26, 0xfb, 0xc7,
- 0x61, 0xc2, 0x94, 0xb0, 0x13, 0x15, 0xe5, 0xc1
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_base_key,
- 0x79, 0x85, 0x62, 0xe0, 0x49, 0x85, 0x2f, 0x57,
- 0xdc, 0x8c, 0x34, 0x3b, 0xa1, 0x7f, 0x2c, 0xa1,
- 0xd9, 0x73, 0x94, 0xef, 0xc8, 0xad, 0xc4, 0x43
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_derived_key,
- 0xc8, 0x13, 0xf8, 0x8a, 0x3b, 0xe3, 0xb3, 0x34,
- 0xf7, 0x54, 0x25, 0xce, 0x91, 0x75, 0xfb, 0xe3,
- 0xc8, 0x49, 0x3b, 0x89, 0xc8, 0x70, 0x3b, 0x49
-);
-
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_base_key,
- 0x26, 0xdc, 0xe3, 0x34, 0xb5, 0x45, 0x29, 0x2f,
- 0x2f, 0xea, 0xb9, 0xa8, 0x70, 0x1a, 0x89, 0xa4,
- 0xb9, 0x9e, 0xb9, 0x94, 0x2c, 0xec, 0xd0, 0x16
-);
-DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_derived_key,
- 0xf4, 0x8f, 0xfd, 0x6e, 0x83, 0xf8, 0x3e, 0x73,
- 0x54, 0xe6, 0x94, 0xfd, 0x25, 0x2c, 0xf8, 0x3b,
- 0xfe, 0x58, 0xf7, 0xd5, 0xba, 0x37, 0xec, 0x5d
-);
-
-static const struct gss_krb5_test_param rfc3961_kdf_test_params[] = {
- {
- .desc = "des3-hmac-sha1 key derivation case 1",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test1_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test1_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 2",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test2_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test2_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 3",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test3_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test3_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 4",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test4_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test4_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 5",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test5_base_key,
- .usage = &des3_dk_usage_kerberos,
- .expected_result = &des3_dk_test5_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 6",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test6_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test6_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 7",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test7_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test7_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 8",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test8_base_key,
- .usage = &des3_dk_usage_155,
- .expected_result = &des3_dk_test8_derived_key,
- },
- {
- .desc = "des3-hmac-sha1 key derivation case 9",
- .enctype = ENCTYPE_DES3_CBC_RAW,
- .base_key = &des3_dk_test9_base_key,
- .usage = &des3_dk_usage_1aa,
- .expected_result = &des3_dk_test9_derived_key,
- },
-};
-
-/* Creates the function rfc3961_kdf_gen_params */
-KUNIT_ARRAY_PARAM(rfc3961_kdf, rfc3961_kdf_test_params, gss_krb5_get_desc);
-
static struct kunit_case rfc3961_test_cases[] = {
{
.name = "RFC 3961 n-fold",
.run_case = rfc3961_nfold_case,
.generate_params = rfc3961_nfold_gen_params,
},
- {
- .name = "RFC 3961 key derivation",
- .run_case = kdf_case,
- .generate_params = rfc3961_kdf_gen_params,
- },
{}
};
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 7d6d4ae4a3c9..4fbc50a0a2c4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -69,83 +69,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-/* read_token is a mic token, and message_buffer is the data that the mic was
- * supposedly taken over. */
-u32
-gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- s32 now;
- int direction;
- u32 seqnum;
- unsigned char *ptr = (unsigned char *)read_token->data;
- int bodysize;
- u8 *cksumkey;
-
- dprintk("RPC: krb5_read_token\n");
-
- if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
- read_token->len))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_MIC_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_MIC_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != ctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != SEAL_ALG_NONE)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (ctx->gk5e->keyed_cksum)
- cksumkey = ctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(ctx, ptr, 8, message_buffer, 0,
- cksumkey, KG_USAGE_SIGN, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- ctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > ctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
- &direction, &seqnum))
- return GSS_S_FAILURE;
-
- if ((ctx->initiate && direction != 0xff) ||
- (!ctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- return GSS_S_COMPLETE;
-}
-#endif
-
u32
gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
struct xdr_netobj *read_token)
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 6d6b082380b2..b3e1738ff6bf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -40,293 +40,6 @@
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
-
-static inline int
-gss_krb5_padding(int blocksize, int length)
-{
- return blocksize - (length % blocksize);
-}
-
-static inline void
-gss_krb5_add_padding(struct xdr_buf *buf, int offset, int blocksize)
-{
- int padding = gss_krb5_padding(blocksize, buf->len - offset);
- char *p;
- struct kvec *iov;
-
- if (buf->page_len || buf->tail[0].iov_len)
- iov = &buf->tail[0];
- else
- iov = &buf->head[0];
- p = iov->iov_base + iov->iov_len;
- iov->iov_len += padding;
- buf->len += padding;
- memset(p, padding, padding);
-}
-
-static inline int
-gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
-{
- u8 *ptr;
- u8 pad;
- size_t len = buf->len;
-
- if (len <= buf->head[0].iov_len) {
- pad = *(u8 *)(buf->head[0].iov_base + len - 1);
- if (pad > buf->head[0].iov_len)
- return -EINVAL;
- buf->head[0].iov_len -= pad;
- goto out;
- } else
- len -= buf->head[0].iov_len;
- if (len <= buf->page_len) {
- unsigned int last = (buf->page_base + len - 1)
- >>PAGE_SHIFT;
- unsigned int offset = (buf->page_base + len - 1)
- & (PAGE_SIZE - 1);
- ptr = kmap_atomic(buf->pages[last]);
- pad = *(ptr + offset);
- kunmap_atomic(ptr);
- goto out;
- } else
- len -= buf->page_len;
- BUG_ON(len > buf->tail[0].iov_len);
- pad = *(u8 *)(buf->tail[0].iov_base + len - 1);
-out:
- /* XXX: NOTE: we do not adjust the page lengths--they represent
- * a range of data in the real filesystem page cache, and we need
- * to know that range so the xdr code can properly place read data.
- * However adjusting the head length, as we do above, is harmless.
- * In the case of a request that fits into a single page, the server
- * also uses length and head length together to determine the original
- * start of the request to copy the request for deferal; so it's
- * easier on the server if we adjust head and tail length in tandem.
- * It's not really a problem that we don't fool with the page and
- * tail lengths, though--at worst badly formed xdr might lead the
- * server to attempt to parse the padding.
- * XXX: Document all these weird requirements for gss mechanism
- * wrap/unwrap functions. */
- if (pad > blocksize)
- return -EINVAL;
- if (buf->len > pad)
- buf->len -= pad;
- else
- return -EINVAL;
- return 0;
-}
-
-/* Assumptions: the head and tail of inbuf are ours to play with.
- * The pages, however, may be real pages in the page cache and we replace
- * them with scratch pages from **pages before writing to them. */
-/* XXX: obviously the above should be documentation of wrap interface,
- * and shouldn't be in this kerberos-specific file. */
-
-/* XXX factor out common code with seal/unseal. */
-
-u32
-gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages)
-{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- int blocksize = 0, plainlen;
- unsigned char *ptr, *msg_start;
- time64_t now;
- int headlen;
- struct page **tmp_pages;
- u32 seq_send;
- u8 *cksumkey;
- u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
-
- dprintk("RPC: %s\n", __func__);
-
- now = ktime_get_real_seconds();
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- gss_krb5_add_padding(buf, offset, blocksize);
- BUG_ON((buf->len - offset) % blocksize);
- plainlen = conflen + buf->len - offset;
-
- headlen = g_token_size(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
- (buf->len - offset);
-
- ptr = buf->head[0].iov_base + offset;
- /* shift data to make room for header. */
- xdr_extend_head(buf, offset, headlen);
-
- /* XXX Would be cleverer to encrypt while copying. */
- BUG_ON((buf->len - offset - headlen) % blocksize);
-
- g_make_token_header(&kctx->mech_used,
- GSS_KRB5_TOK_HDR_LEN +
- kctx->gk5e->cksumlength + plainlen, &ptr);
-
-
- /* ptr now at header described in rfc 1964, section 1.2.1: */
- ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
- ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
-
- msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
-
- /*
- * signalg and sealalg are stored as if they were converted from LE
- * to host endian, even though they're opaque pairs of bytes according
- * to the RFC.
- */
- *(__le16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
- *(__le16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
- ptr[6] = 0xff;
- ptr[7] = 0xff;
-
- krb5_make_confounder(msg_start, conflen);
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- /* XXXJBF: UGH!: */
- tmp_pages = buf->pages;
- buf->pages = pages;
- if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
- buf->pages = tmp_pages;
-
- memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
-
- seq_send = atomic_fetch_inc(&kctx->seq_send);
-
- /* XXX would probably be more efficient to compute checksum
- * and encrypt at the same time: */
- if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
- seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
- return GSS_S_FAILURE;
-
- if (gss_encrypt_xdr_buf(kctx->enc, buf,
- offset + headlen - conflen, pages))
- return GSS_S_FAILURE;
-
- return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
-}
-
-u32
-gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align)
-{
- int signalg;
- int sealalg;
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
- .data = cksumdata};
- time64_t now;
- int direction;
- s32 seqnum;
- unsigned char *ptr;
- int bodysize;
- void *data_start, *orig_start;
- int data_len;
- int blocksize;
- u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
- int crypt_offset;
- u8 *cksumkey;
- unsigned int saved_len = buf->len;
-
- dprintk("RPC: gss_unwrap_kerberos\n");
-
- ptr = (u8 *)buf->head[0].iov_base + offset;
- if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
- len - offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) ||
- (ptr[1] != (KG_TOK_WRAP_MSG & 0xff)))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* XXX sanity-check bodysize?? */
-
- /* get the sign and seal algorithms */
-
- signalg = ptr[2] + (ptr[3] << 8);
- if (signalg != kctx->gk5e->signalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- sealalg = ptr[4] + (ptr[5] << 8);
- if (sealalg != kctx->gk5e->sealalg)
- return GSS_S_DEFECTIVE_TOKEN;
-
- if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /*
- * Data starts after token header and checksum. ptr points
- * to the beginning of the token header
- */
- crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
- (unsigned char *)buf->head[0].iov_base;
-
- buf->len = len;
- if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
- return GSS_S_DEFECTIVE_TOKEN;
-
- if (kctx->gk5e->keyed_cksum)
- cksumkey = kctx->cksum;
- else
- cksumkey = NULL;
-
- if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
- cksumkey, KG_USAGE_SEAL, &md5cksum))
- return GSS_S_FAILURE;
-
- if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
- kctx->gk5e->cksumlength))
- return GSS_S_BAD_SIG;
-
- /* it got through unscathed. Make sure the context is unexpired */
-
- now = ktime_get_real_seconds();
-
- if (now > kctx->endtime)
- return GSS_S_CONTEXT_EXPIRED;
-
- /* do sequencing checks */
-
- if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
- ptr + 8, &direction, &seqnum))
- return GSS_S_BAD_SIG;
-
- if ((kctx->initiate && direction != 0xff) ||
- (!kctx->initiate && direction != 0))
- return GSS_S_BAD_SIG;
-
- /* Copy the data back to the right position. XXX: Would probably be
- * better to copy and encrypt at the same time. */
-
- blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
- data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
- conflen;
- orig_start = buf->head[0].iov_base + offset;
- data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
- memmove(orig_start, data_start, data_len);
- buf->head[0].iov_len -= (data_start - orig_start);
- buf->len = len - (data_start - orig_start);
-
- if (gss_krb5_remove_padding(buf, blocksize))
- return GSS_S_DEFECTIVE_TOKEN;
-
- /* slack must include room for krb5 padding */
- *slack = XDR_QUADLEN(saved_len - buf->len);
- /* The GSS blob always precedes the RPC message payload */
- *align = *slack;
- return GSS_S_COMPLETE;
-}
-
-#endif
-
/*
* We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need
* to do more than that, we shift repeatedly. Kevin Coffman reports
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c4a566737085..18734e70c5dd 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -986,7 +986,7 @@ bad_unwrap:
return -EINVAL;
}
-static int
+static enum svc_auth_status
svcauth_gss_set_client(struct svc_rqst *rqstp)
{
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1634,7 +1634,7 @@ svcauth_gss_decode_credbody(struct xdr_stream *xdr,
*
* The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531).
*/
-static int
+static enum svc_auth_status
svcauth_gss_accept(struct svc_rqst *rqstp)
{
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
@@ -1945,9 +1945,6 @@ bad_wrap:
* %0: the Reply is ready to be sent
* %-ENOMEM: failed to allocate memory
* %-EINVAL: encoding error
- *
- * XXX: These return values do not match the return values documented
- * for the auth_ops ->release method in linux/sunrpc/svcauth.h.
*/
static int
svcauth_gss_release(struct svc_rqst *rqstp)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 587811a002c9..dc21e6c732db 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -513,9 +513,9 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
INIT_LIST_HEAD(&pool->sp_all_threads);
spin_lock_init(&pool->sp_lock);
+ percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
- percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
}
return serv;
@@ -588,9 +588,9 @@ svc_destroy(struct kref *ref)
for (i = 0; i < serv->sv_nrpools; i++) {
struct svc_pool *pool = &serv->sv_pools[i];
+ percpu_counter_destroy(&pool->sp_messages_arrived);
percpu_counter_destroy(&pool->sp_sockets_queued);
percpu_counter_destroy(&pool->sp_threads_woken);
- percpu_counter_destroy(&pool->sp_threads_timedout);
}
kfree(serv->sv_pools);
kfree(serv);
@@ -689,23 +689,44 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
return rqstp;
}
-/*
- * Choose a pool in which to create a new thread, for svc_set_num_threads
+/**
+ * svc_pool_wake_idle_thread - Awaken an idle thread in @pool
+ * @pool: service thread pool
+ *
+ * Can be called from soft IRQ or process context. Finding an idle
+ * service thread and marking it BUSY is atomic with respect to
+ * other calls to svc_pool_wake_idle_thread().
+ *
*/
-static inline struct svc_pool *
-choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+void svc_pool_wake_idle_thread(struct svc_pool *pool)
{
- if (pool != NULL)
- return pool;
+ struct svc_rqst *rqstp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+ if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
+ continue;
+
+ WRITE_ONCE(rqstp->rq_qtime, ktime_get());
+ wake_up_process(rqstp->rq_task);
+ rcu_read_unlock();
+ percpu_counter_inc(&pool->sp_threads_woken);
+ trace_svc_wake_up(rqstp->rq_task->pid);
+ return;
+ }
+ rcu_read_unlock();
- return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+ set_bit(SP_CONGESTED, &pool->sp_flags);
}
-/*
- * Choose a thread to kill, for svc_set_num_threads
- */
-static inline struct task_struct *
-choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+static struct svc_pool *
+svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+ return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+}
+
+static struct task_struct *
+svc_pool_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
{
unsigned int i;
struct task_struct *task = NULL;
@@ -713,7 +734,6 @@ choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
if (pool != NULL) {
spin_lock_bh(&pool->sp_lock);
} else {
- /* choose a pool in round-robin fashion */
for (i = 0; i < serv->sv_nrpools; i++) {
pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
spin_lock_bh(&pool->sp_lock);
@@ -728,21 +748,15 @@ found_pool:
if (!list_empty(&pool->sp_all_threads)) {
struct svc_rqst *rqstp;
- /*
- * Remove from the pool->sp_all_threads list
- * so we don't try to kill it again.
- */
rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
set_bit(RQ_VICTIM, &rqstp->rq_flags);
list_del_rcu(&rqstp->rq_all);
task = rqstp->rq_task;
}
spin_unlock_bh(&pool->sp_lock);
-
return task;
}
-/* create new threads */
static int
svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -754,13 +768,12 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
do {
nrservs--;
- chosen_pool = choose_pool(serv, pool, &state);
-
+ chosen_pool = svc_pool_next(serv, pool, &state);
node = svc_pool_map_get_node(chosen_pool->sp_id);
+
rqstp = svc_prepare_thread(serv, chosen_pool, node);
if (IS_ERR(rqstp))
return PTR_ERR(rqstp);
-
task = kthread_create_on_node(serv->sv_threadfn, rqstp,
node, "%s", serv->sv_name);
if (IS_ERR(task)) {
@@ -779,15 +792,6 @@ svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
-/*
- * Create or destroy enough new threads to make the number
- * of threads the given number. If `pool' is non-NULL, applies
- * only to threads in that pool, otherwise round-robins between
- * all pools. Caller must ensure that mutual exclusion between this and
- * server startup or shutdown.
- */
-
-/* destroy old threads */
static int
svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -795,9 +799,8 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
struct task_struct *task;
unsigned int state = serv->sv_nrthreads-1;
- /* destroy old threads */
do {
- task = choose_victim(serv, pool, &state);
+ task = svc_pool_victim(serv, pool, &state);
if (task == NULL)
break;
rqstp = kthread_data(task);
@@ -809,6 +812,23 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
return 0;
}
+/**
+ * svc_set_num_threads - adjust number of threads per RPC service
+ * @serv: RPC service to adjust
+ * @pool: Specific pool from which to choose threads, or NULL
+ * @nrservs: New number of threads for @serv (0 or less means kill all threads)
+ *
+ * Create or destroy threads to make the number of threads for @serv the
+ * given number. If @pool is non-NULL, change only threads in that pool;
+ * otherwise, round-robin between all pools for @serv. @serv's
+ * sv_nrthreads is adjusted for each thread created or destroyed.
+ *
+ * Caller must ensure mutual exclusion between this and server startup or
+ * shutdown.
+ *
+ * Returns zero on success or a negative errno if an error occurred while
+ * starting a thread.
+ */
int
svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
{
@@ -1277,8 +1297,9 @@ svc_process_common(struct svc_rqst *rqstp)
const struct svc_procedure *procp = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_process_info process;
- int auth_res, rc;
+ enum svc_auth_status auth_res;
unsigned int aoffset;
+ int rc;
__be32 *p;
/* Will be turned off by GSS integrity and privacy services */
@@ -1333,6 +1354,9 @@ svc_process_common(struct svc_rqst *rqstp)
goto dropit;
case SVC_COMPLETE:
goto sendit;
+ default:
+ pr_warn_once("Unexpected svc_auth_status (%d)\n", auth_res);
+ goto err_system_err;
}
if (progp == NULL)
@@ -1516,7 +1540,6 @@ out_baddir:
out_drop:
svc_drop(rqstp);
}
-EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 62c7919ea610..4cfe9640df48 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -434,6 +434,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
smp_rmb();
xpt_flags = READ_ONCE(xprt->xpt_flags);
+ trace_svc_xprt_enqueue(xprt, xpt_flags);
if (xpt_flags & BIT(XPT_BUSY))
return false;
if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
@@ -456,7 +457,6 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
void svc_xprt_enqueue(struct svc_xprt *xprt)
{
struct svc_pool *pool;
- struct svc_rqst *rqstp = NULL;
if (!svc_xprt_ready(xprt))
return;
@@ -476,21 +476,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
spin_unlock_bh(&pool->sp_lock);
- /* find a thread for this xprt */
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- percpu_counter_inc(&pool->sp_threads_woken);
- rqstp->rq_qtime = ktime_get();
- wake_up_process(rqstp->rq_task);
- goto out_unlock;
- }
- set_bit(SP_CONGESTED, &pool->sp_flags);
- rqstp = NULL;
-out_unlock:
- rcu_read_unlock();
- trace_svc_xprt_enqueue(xprt, rqstp);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
@@ -581,7 +567,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
svc_xprt_put(xprt);
}
-/*
+/**
+ * svc_wake_up - Wake up a service thread for non-transport work
+ * @serv: RPC service
+ *
* Some svc_serv's will have occasional work to do, even when a xprt is not
* waiting to be serviced. This function is there to "kick" a task in one of
* those services so that it can wake up and do that work. Note that we only
@@ -590,27 +579,10 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
*/
void svc_wake_up(struct svc_serv *serv)
{
- struct svc_rqst *rqstp;
- struct svc_pool *pool;
-
- pool = &serv->sv_pools[0];
+ struct svc_pool *pool = &serv->sv_pools[0];
- rcu_read_lock();
- list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
- /* skip any that aren't queued */
- if (test_bit(RQ_BUSY, &rqstp->rq_flags))
- continue;
- rcu_read_unlock();
- wake_up_process(rqstp->rq_task);
- trace_svc_wake_up(rqstp->rq_task->pid);
- return;
- }
- rcu_read_unlock();
-
- /* No free entries available */
set_bit(SP_TASK_PENDING, &pool->sp_flags);
- smp_wmb();
- trace_svc_wake_up(0);
+ svc_pool_wake_idle_thread(pool);
}
EXPORT_SYMBOL_GPL(svc_wake_up);
@@ -679,7 +651,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-static int svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
@@ -701,10 +673,10 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
/* Made progress, don't sleep yet */
continue;
- set_current_state(TASK_INTERRUPTIBLE);
- if (signalled() || kthread_should_stop()) {
+ set_current_state(TASK_IDLE);
+ if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
- return -EINTR;
+ return false;
}
trace_svc_alloc_arg_err(pages, ret);
memalloc_retry_wait(GFP_KERNEL);
@@ -723,7 +695,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
arg->tail[0].iov_len = 0;
rqstp->rq_xid = xdr_zero;
- return 0;
+ return true;
}
static bool
@@ -732,7 +704,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
struct svc_pool *pool = rqstp->rq_pool;
/* did someone call svc_wake_up? */
- if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+ if (test_bit(SP_TASK_PENDING, &pool->sp_flags))
return false;
/* was a socket queued? */
@@ -740,7 +712,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return false;
/* are we shutting down? */
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
return false;
/* are we freezing? */
@@ -750,10 +722,9 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return true;
}
-static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp)
{
struct svc_pool *pool = rqstp->rq_pool;
- long time_left = 0;
/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);
@@ -762,18 +733,14 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (rqstp->rq_xprt)
goto out_found;
- /*
- * We have to be able to interrupt this wait
- * to bring down the daemons ...
- */
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_IDLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp)))
- time_left = schedule_timeout(timeout);
+ schedule();
else
__set_current_state(TASK_RUNNING);
@@ -781,17 +748,16 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
set_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb__after_atomic();
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt)
goto out_found;
- if (!time_left)
- percpu_counter_inc(&pool->sp_threads_timedout);
-
- if (signalled() || kthread_should_stop())
- return ERR_PTR(-EINTR);
- return ERR_PTR(-EAGAIN);
+ if (kthread_should_stop())
+ return NULL;
+ return NULL;
out_found:
+ clear_bit(SP_TASK_PENDING, &pool->sp_flags);
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
@@ -867,37 +833,35 @@ out:
return len;
}
-/*
- * Receive the next request on any transport. This code is carefully
- * organised not to touch any cachelines in the shared svc_serv
- * structure, only cachelines in the local svc_pool.
+/**
+ * svc_recv - Receive and process the next request on any transport
+ * @rqstp: an idle RPC service thread
+ *
+ * This code is carefully organised not to touch any cachelines in
+ * the shared svc_serv structure, only cachelines in the local
+ * svc_pool.
*/
-int svc_recv(struct svc_rqst *rqstp, long timeout)
+void svc_recv(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt = NULL;
struct svc_serv *serv = rqstp->rq_server;
- int len, err;
+ int len;
- err = svc_alloc_arg(rqstp);
- if (err)
+ if (!svc_alloc_arg(rqstp))
goto out;
try_to_freeze();
cond_resched();
- err = -EINTR;
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
goto out;
- xprt = svc_get_next_xprt(rqstp, timeout);
- if (IS_ERR(xprt)) {
- err = PTR_ERR(xprt);
+ xprt = svc_get_next_xprt(rqstp);
+ if (!xprt)
goto out;
- }
len = svc_handle_xprt(rqstp, xprt);
/* No data, incomplete (TCP) read, or accept() */
- err = -EAGAIN;
if (len <= 0)
goto out_release;
@@ -909,13 +873,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats)
serv->sv_stats->netcnt++;
+ percpu_counter_inc(&rqstp->rq_pool->sp_messages_arrived);
rqstp->rq_stime = ktime_get();
- return len;
+ svc_process(rqstp);
+out:
+ return;
out_release:
rqstp->rq_res.len = 0;
svc_xprt_release(rqstp);
-out:
- return err;
}
EXPORT_SYMBOL_GPL(svc_recv);
@@ -1456,12 +1421,11 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
return 0;
}
- seq_printf(m, "%u %llu %llu %llu %llu\n",
- pool->sp_id,
- percpu_counter_sum_positive(&pool->sp_sockets_queued),
- percpu_counter_sum_positive(&pool->sp_sockets_queued),
- percpu_counter_sum_positive(&pool->sp_threads_woken),
- percpu_counter_sum_positive(&pool->sp_threads_timedout));
+ seq_printf(m, "%u %llu %llu %llu 0\n",
+ pool->sp_id,
+ percpu_counter_sum_positive(&pool->sp_messages_arrived),
+ percpu_counter_sum_positive(&pool->sp_sockets_queued),
+ percpu_counter_sum_positive(&pool->sp_threads_woken));
return 0;
}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 67d8245a08af..aa4429d0b810 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -60,8 +60,19 @@ svc_put_auth_ops(struct auth_ops *aops)
module_put(aops->owner);
}
-int
-svc_authenticate(struct svc_rqst *rqstp)
+/**
+ * svc_authenticate - Initialize an outgoing credential
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: XDR encoding of the result can begin
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_COMPLETE: GSS context lifetime event; no further action
+ * %SVC_DROP: Drop this request; no further action
+ * %SVC_CLOSE: Like drop, but also close transport connection
+ */
+enum svc_auth_status svc_authenticate(struct svc_rqst *rqstp)
{
struct auth_ops *aops;
u32 flavor;
@@ -89,16 +100,28 @@ svc_authenticate(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_authenticate);
-int svc_set_client(struct svc_rqst *rqstp)
+/**
+ * svc_set_client - Assign an appropriate 'auth_domain' as the client
+ * @rqstp: RPC execution context
+ *
+ * Return values:
+ * %SVC_OK: Client was found and assigned
+ * %SVC_DENY: Client was explicitly denied
+ * %SVC_DROP: Ignore this request
+ * %SVC_CLOSE: Ignore this request and close the connection
+ */
+enum svc_auth_status svc_set_client(struct svc_rqst *rqstp)
{
rqstp->rq_client = NULL;
return rqstp->rq_authop->set_client(rqstp);
}
EXPORT_SYMBOL_GPL(svc_set_client);
-/* A request, which was authenticated, has now executed.
- * Time to finalise the credentials and verifier
- * and release and resources
+/**
+ * svc_authorise - Finalize credentials/verifier and release resources
+ * @rqstp: RPC execution context
+ *
+ * Returns zero on success, or a negative errno.
*/
int svc_authorise(struct svc_rqst *rqstp)
{
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 174783f804fa..04b45588ae6f 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -665,7 +665,7 @@ static struct group_info *unix_gid_find(kuid_t uid, struct svc_rqst *rqstp)
}
}
-int
+enum svc_auth_status
svcauth_unix_set_client(struct svc_rqst *rqstp)
{
struct sockaddr_in *sin;
@@ -736,7 +736,6 @@ out:
rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
-
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
/**
@@ -751,7 +750,7 @@ EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_null_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -828,7 +827,7 @@ struct auth_ops svcauth_null = {
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_tls_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
@@ -913,7 +912,7 @@ struct auth_ops svcauth_tls = {
*
* rqstp->rq_auth_stat is set as mandated by RFC 5531.
*/
-static int
+static enum svc_auth_status
svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 8c9a8ee76aa0..998687421fa6 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -36,6 +36,8 @@
#include <linux/skbuff.h>
#include <linux/file.h>
#include <linux/freezer.h>
+#include <linux/bvec.h>
+
#include <net/sock.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -695,9 +697,10 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
.msg_name = &rqstp->rq_addr,
.msg_namelen = rqstp->rq_addrlen,
.msg_control = cmh,
+ .msg_flags = MSG_SPLICE_PAGES,
.msg_controllen = sizeof(buffer),
};
- unsigned int sent;
+ unsigned int count;
int err;
svc_udp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
@@ -710,22 +713,23 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (err < 0)
- goto out_unlock;
+ count = xdr_buf_to_bvec(rqstp->rq_bvec,
+ ARRAY_SIZE(rqstp->rq_bvec), xdr);
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ count, 0);
+ err = sock_sendmsg(svsk->sk_sock, &msg);
}
- xdr_free_bvec(xdr);
+
trace_svcsock_udp_send(xprt, err);
-out_unlock:
+
mutex_unlock(&xprt->xpt_mutex);
- if (err < 0)
- return err;
- return sent;
+ return err;
out_notconn:
mutex_unlock(&xprt->xpt_mutex);
@@ -1089,6 +1093,9 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk)
/* If we have more data, signal svc_xprt_enqueue() to try again */
svsk->sk_tcplen = 0;
svsk->sk_marker = xdr_zero;
+
+ smp_wmb();
+ tcp_set_rcvlowat(svsk->sk_sk, 1);
}
/**
@@ -1178,10 +1185,17 @@ err_incomplete:
goto err_delete;
if (len == want)
svc_tcp_fragment_received(svsk);
- else
+ else {
+ /* Avoid more ->sk_data_ready() calls until the rest
+ * of the message has arrived. This reduces service
+ * thread wake-ups on large incoming messages. */
+ tcp_set_rcvlowat(svsk->sk_sk,
+ svc_sock_reclen(svsk) - svsk->sk_tcplen);
+
trace_svcsock_tcp_recv_short(&svsk->sk_xprt,
svc_sock_reclen(svsk),
svsk->sk_tcplen - sizeof(rpc_fraghdr));
+ }
goto err_noclose;
error:
if (len != -EAGAIN)
@@ -1198,75 +1212,51 @@ err_noclose:
return 0; /* record not complete */
}
-static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
- int flags)
-{
- struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
-
- iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
- return sock_sendmsg(sock, &msg);
-}
-
/*
* MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
*
- * In addition, the logic assumes that * .bv_len is never larger
- * than PAGE_SIZE.
+ * Note that the send is non-blocking. The caller has incremented
+ * the reference count on each page backing the RPC message, and
+ * the network layer will "put" these pages when transmission is
+ * complete.
+ *
+ * This is safe for our RPC services because the memory backing
+ * the head and tail components is never kmalloc'd. These always
+ * come from pages in the svc_rqst::rq_pages array.
*/
-static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
+static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp,
rpc_fraghdr marker, unsigned int *sentp)
{
- const struct kvec *head = xdr->head;
- const struct kvec *tail = xdr->tail;
- struct kvec rm = {
- .iov_base = &marker,
- .iov_len = sizeof(marker),
- };
struct msghdr msg = {
- .msg_flags = 0,
+ .msg_flags = MSG_SPLICE_PAGES,
};
+ unsigned int count;
+ void *buf;
int ret;
*sentp = 0;
- ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
- if (ret < 0)
- return ret;
-
- ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != rm.iov_len)
- return -EAGAIN;
-
- ret = svc_tcp_send_kvec(sock, head, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != head->iov_len)
- goto out;
- if (xdr_buf_pagecount(xdr))
- xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
-
- msg.msg_flags = MSG_SPLICE_PAGES;
- iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
- xdr_buf_pagecount(xdr), xdr->page_len);
- ret = sock_sendmsg(sock, &msg);
+ /* The stream record marker is copied into a temporary page
+ * fragment buffer so that it can be included in rq_bvec.
+ */
+ buf = page_frag_alloc(&svsk->sk_frag_cache, sizeof(marker),
+ GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ memcpy(buf, &marker, sizeof(marker));
+ bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker));
+
+ count = xdr_buf_to_bvec(rqstp->rq_bvec + 1,
+ ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res);
+
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
+ 1 + count, sizeof(marker) + rqstp->rq_res.len);
+ ret = sock_sendmsg(svsk->sk_sock, &msg);
if (ret < 0)
return ret;
*sentp += ret;
-
- if (tail->iov_len) {
- ret = svc_tcp_send_kvec(sock, tail, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- }
-
-out:
return 0;
}
@@ -1292,23 +1282,17 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
svc_tcp_release_ctxt(xprt, rqstp->rq_xprt_ctxt);
rqstp->rq_xprt_ctxt = NULL;
- atomic_inc(&svsk->sk_sendqlen);
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- tcp_sock_set_cork(svsk->sk_sk, true);
- err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
- xdr_free_bvec(xdr);
+ err = svc_tcp_sendmsg(svsk, rqstp, marker, &sent);
trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
- if (atomic_dec_and_test(&svsk->sk_sendqlen))
- tcp_sock_set_cork(svsk->sk_sk, false);
mutex_unlock(&xprt->xpt_mutex);
return sent;
out_notconn:
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -ENOTCONN;
out_close:
@@ -1317,7 +1301,6 @@ out_close:
(err < 0) ? "got error" : "sent",
(err < 0) ? err : sent, xdr->len);
svc_xprt_deferred_close(xprt);
- atomic_dec(&svsk->sk_sendqlen);
mutex_unlock(&xprt->xpt_mutex);
return -EAGAIN;
}
@@ -1644,6 +1627,7 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct page_frag_cache *pfc = &svsk->sk_frag_cache;
struct socket *sock = svsk->sk_sock;
trace_svcsock_free(svsk, sock);
@@ -1653,5 +1637,8 @@ static void svc_sock_free(struct svc_xprt *xprt)
sockfd_put(sock);
else
sock_release(sock);
+ if (pfc->va)
+ __page_frag_cache_drain(virt_to_head_page(pfc->va),
+ pfc->pagecnt_bias);
kfree(svsk);
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2a22e78af116..358e6de91775 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -165,6 +165,56 @@ xdr_free_bvec(struct xdr_buf *buf)
}
/**
+ * xdr_buf_to_bvec - Copy components of an xdr_buf into a bio_vec array
+ * @bvec: bio_vec array to populate
+ * @bvec_size: element count of @bio_vec
+ * @xdr: xdr_buf to be copied
+ *
+ * Returns the number of entries consumed in @bvec.
+ */
+unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size,
+ const struct xdr_buf *xdr)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ unsigned int count = 0;
+
+ if (head->iov_len) {
+ bvec_set_virt(bvec++, head->iov_base, head->iov_len);
+ ++count;
+ }
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct page **pages = xdr->pages;
+
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining > 0) {
+ len = min_t(unsigned int, remaining,
+ PAGE_SIZE - offset);
+ bvec_set_page(bvec++, *pages++, len, offset);
+ remaining -= len;
+ offset = 0;
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+ }
+
+ if (tail->iov_len) {
+ bvec_set_virt(bvec, tail->iov_base, tail->iov_len);
+ if (unlikely(++count > bvec_size))
+ goto bvec_overflow;
+ }
+
+ return count;
+
+bvec_overflow:
+ pr_warn_once("%s: bio_vec array overflow\n", __func__);
+ return count - 1;
+}
+
+/**
* xdr_inline_pages - Prepare receive buffer for a large reply
* @xdr: xdr_buf into which reply will be placed
* @offset: expected offset where data payload will start, in bytes