From 55b6dd54c3bcb6edf7ad630a4510759f4b0cf1cd Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 13 Jan 2026 13:37:39 -0500
Subject: nfsd/sunrpc: add svc_rqst->rq_private pointer and remove
 rq_lease_breaker

rq_lease_breaker has always been a NFSv4 specific layering violation in
svc_rqst. The reason it's there though is that we need a place that is
thread-local, and accessible from the svc_rqst pointer.

Add a new rq_private pointer to struct svc_rqst. This is intended for
use by the threads that are handling the service. sunrpc code doesn't
touch it.

In nfsd, define a new struct nfsd_thread_local_info. nfsd declares one
of these on the stack and puts a pointer to it in rq_private.

Add a new ntli_lease_breaker field to the new struct and convert all of
the places that access rq_lease_breaker to use the new field instead.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4dc14c7a711b..ab8237ba9596 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -175,6 +175,9 @@ static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
 /*
  * The context of a single thread, including the request currently being
  * processed.
+ *
+ * RPC programs are free to use rq_private to stash thread-local information.
+ * The sunrpc layer will not access it.
  */
 struct svc_rqst {
 	struct list_head	rq_all;		/* all threads list */
@@ -251,7 +254,7 @@ struct svc_rqst {
 	unsigned long		bc_to_initval;
 	unsigned int		bc_to_retries;
 	unsigned int		rq_status_counter; /* RPC processing counter */
-	void			**rq_lease_breaker; /* The v4 client breaking a lease */
+	void			*rq_private;	/* For use by the service thread */
 };
 
 /* bits for rq_flags */
-- 
cgit v1.2.3


From 322ecd01bf8ad7e0da21e174679aff1759e68b2c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Tue, 13 Jan 2026 13:37:40 -0500
Subject: nfsd/sunrpc: move rq_cachetype into struct nfsd_thread_local_info

The svc_rqst->rq_cachetype field is only accessed by nfsd. Move it
into the nfsd_thread_local_info instead.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Benjamin Coddington <bcodding@hammerspace.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4xdr.c          | 3 ++-
 fs/nfsd/nfscache.c         | 3 ++-
 fs/nfsd/nfsd.h             | 1 +
 fs/nfsd/nfssvc.c           | 5 +++--
 include/linux/sunrpc/svc.h | 1 -
 5 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9d234913100b..690f7a3122ec 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2598,6 +2598,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
 static bool
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
+	struct nfsd_thread_local_info *ntli = argp->rqstp->rq_private;
 	struct nfsd4_op *op;
 	bool cachethis = false;
 	int auth_slack= argp->rqstp->rq_auth_slack;
@@ -2690,7 +2691,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	if (argp->minorversion)
 		cachethis = false;
 	svc_reserve_auth(argp->rqstp, max_reply + readbytes);
-	argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
+	ntli->ntli_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
 
 	argp->splice_ok = nfsd_read_splice_ok(argp->rqstp);
 	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ab13ee9c7fd8..154468ceccdc 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -467,10 +467,11 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
 		      unsigned int len, struct nfsd_cacherep **cacherep)
 {
 	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+	struct nfsd_thread_local_info *ntli = rqstp->rq_private;
 	struct nfsd_cacherep	*rp, *found;
 	__wsum			csum;
 	struct nfsd_drc_bucket	*b;
-	int type = rqstp->rq_cachetype;
+	int type = ntli->ntli_cachetype;
 	LIST_HEAD(dispose);
 	int rtn = RC_DOIT;
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 938906c6d10c..a2e35a4fa105 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -84,6 +84,7 @@ extern const struct seq_operations nfs_exports_op;
 
 struct nfsd_thread_local_info {
 	struct nfs4_client	**ntli_lease_breaker;
+	int			ntli_cachetype;
 };
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fd979e5392a1..4f1ab3222a4d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -972,6 +972,7 @@ nfsd(void *vrqstp)
  */
 int nfsd_dispatch(struct svc_rqst *rqstp)
 {
+	struct nfsd_thread_local_info *ntli = rqstp->rq_private;
 	const struct svc_procedure *proc = rqstp->rq_procinfo;
 	__be32 *statp = rqstp->rq_accept_statp;
 	struct nfsd_cacherep *rp;
@@ -982,7 +983,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	 * Give the xdr decoder a chance to change this if it wants
 	 * (necessary in the NFSv4.0 compound case)
 	 */
-	rqstp->rq_cachetype = proc->pc_cachetype;
+	ntli->ntli_cachetype = proc->pc_cachetype;
 
 	/*
 	 * ->pc_decode advances the argument stream past the NFS
@@ -1027,7 +1028,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
 	 */
 	smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
 
-	nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
+	nfsd_cache_update(rqstp, rp, ntli->ntli_cachetype, nfs_reply);
 out_cached_reply:
 	return 1;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ab8237ba9596..62152e4f3bcc 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -218,7 +218,6 @@ struct svc_rqst {
 	u32			rq_vers;	/* program version */
 	u32			rq_proc;	/* procedure number */
 	u32			rq_prot;	/* IP protocol */
-	int			rq_cachetype;	/* catering to nfsd */
 	unsigned long		rq_flags;	/* flags field */
 	ktime_t			rq_qtime;	/* enqueue time */
 
-- 
cgit v1.2.3


From 153b9e025308417d167332c93e1bcc11174178de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:23 -0500
Subject: lockd: Relocate and rename nlm_drop_reply

The nlm_drop_reply status code is internal to the kernel's lockd
implementation and must never appear on the wire. Its previous
location in xdr.h grouped it with legitimate NLM protocol status
codes, obscuring this critical distinction.

Relocate the definition to lockd.h with a comment block for internal
status codes, and rename to nlm__int__drop_reply to make its
internal-only nature explicit. This prepares for adding additional
internal status codes in subsequent patches.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc4proc.c         | 22 ++++++++++++++--------
 fs/lockd/svclock.c          |  4 ++--
 fs/lockd/svcproc.c          | 24 +++++++++++++++---------
 fs/nfsd/lockd.c             |  2 +-
 include/linux/lockd/lockd.h |  6 ++++++
 include/linux/lockd/xdr.h   |  2 --
 6 files changed, 38 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4b6f18d97734..9c756d07223a 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -104,12 +104,13 @@ __nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock,
 				       &resp->lock);
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: TEST4        status %d\n", ntohl(resp->status));
@@ -140,13 +141,14 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
 					argp->block, &argp->cookie,
 					argp->reclaim);
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
@@ -182,7 +184,8 @@ __nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
@@ -222,7 +225,8 @@ __nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
@@ -369,7 +373,8 @@ nlm4svc_proc_share(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to create the share */
 	resp->status = nlmsvc_share_file(host, file, argp);
@@ -404,7 +409,8 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = nlmsvc_unshare_file(host, file, argp);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 255a847ca0b6..d86b02153c7c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -463,7 +463,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
 		block->b_deferred_req =
 			rqstp->rq_chandle.defer(block->b_cache_req);
 		if (block->b_deferred_req != NULL)
-			status = nlm_drop_reply;
+			status = nlm__int__drop_reply;
 	}
 	dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n",
 		block, block->b_flags, ntohl(status));
@@ -531,7 +531,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			ret = nlm_lck_denied;
 			goto out;
 		}
-		ret = nlm_drop_reply;
+		ret = nlm__int__drop_reply;
 		goto out;
 	}
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 95c6bf7ab757..2a2e48a9bd12 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -25,7 +25,7 @@ static inline __be32 cast_status(__be32 status)
 	case nlm_lck_denied_nolocks:
 	case nlm_lck_blocked:
 	case nlm_lck_denied_grace_period:
-	case nlm_drop_reply:
+	case nlm__int__drop_reply:
 		break;
 	case nlm4_deadlock:
 		status = nlm_lck_denied;
@@ -122,12 +122,13 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = cast_status(nlmsvc_testlock(rqstp, file, host,
 						   &argp->lock, &resp->lock));
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: TEST          status %d vers %d\n",
@@ -159,13 +160,14 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
 					       argp->block, &argp->cookie,
 					       argp->reclaim));
-	if (resp->status == nlm_drop_reply)
+	if (resp->status == nlm__int__drop_reply)
 		rc = rpc_drop_reply;
 	else
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
@@ -202,7 +204,8 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
@@ -243,7 +246,8 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
@@ -400,7 +404,8 @@ nlmsvc_proc_share(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to create the share */
 	resp->status = cast_status(nlmsvc_share_file(host, file, argp));
@@ -435,7 +440,8 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp)
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+		return resp->status == nlm__int__drop_reply ?
+			rpc_drop_reply : rpc_success;
 
 	/* Now try to unshare the file */
 	resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index c774ce9aa296..8c230ccd6645 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -71,7 +71,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
 		 * to callback when the delegation is returned but might
 		 * not have a proper lock request to block on.
 		 */
-		return nlm_drop_reply;
+		return nlm__int__drop_reply;
 	case nfserr_stale:
 		return nlm_stale_fh;
 	default:
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 330e38776bb2..fdefec39553f 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -38,6 +38,12 @@
  */
 #define LOCKD_DFLT_TIMEO	10
 
+/*
+ * Internal-use status codes, not to be placed on the wire.
+ * Version handlers translate these to appropriate wire values.
+ */
+#define nlm__int__drop_reply	cpu_to_be32(30000)
+
 /*
  * Lockd host handle (used both by the client and server personality).
  */
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 17d53165d9f2..292e4e38d17d 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -33,8 +33,6 @@ struct svc_rqst;
 #define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
 #define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
 
-#define nlm_drop_reply		cpu_to_be32(30000)
-
 /* Lock info passed via NLM */
 struct nlm_lock {
 	char *			caller;
-- 
cgit v1.2.3


From 9e0d0c61940796893e0c2200cdc7be0684218238 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:24 -0500
Subject: lockd: Introduce nlm__int__deadlock

The use of CONFIG_LOCKD_V4 in combination with a later cast_status()
in the NLMv3 code is difficult to reason about. Instead, replace the
use of nlm_deadlock with an implementation-defined status value that
version-specific code translates appropriately.

The new approach establishes a translation boundary: generic lockd
code returns nlm__int__deadlock when posix_lock_file() yields
-EDEADLK. Version-specific handlers (svc4proc.c for NLMv4,
svcproc.c for NLMv3) translate this internal status to the
appropriate wire protocol value. NLMv4 maps to nlm4_deadlock;
NLMv3 maps to nlm_lck_denied (since NLMv3 lacks a deadlock-specific
status code).

Later this modification will also remove the need to include NLMv4
headers in NLMv3 and generic code.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc4proc.c         | 10 ++++++++--
 fs/lockd/svclock.c          |  8 +-------
 fs/lockd/svcproc.c          |  4 +++-
 include/linux/lockd/lockd.h |  1 +
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 9c756d07223a..55b6dcc56db1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -148,10 +148,16 @@ __nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
 	resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
 					argp->block, &argp->cookie,
 					argp->reclaim);
-	if (resp->status == nlm__int__drop_reply)
+	switch (resp->status) {
+	case nlm__int__drop_reply:
 		rc = rpc_drop_reply;
-	else
+		break;
+	case nlm__int__deadlock:
+		resp->status = nlm4_deadlock;
+		fallthrough;
+	default:
 		dprintk("lockd: LOCK         status %d\n", ntohl(resp->status));
+	}
 
 	nlmsvc_release_lockowner(&argp->lock);
 	nlmsvc_release_host(host);
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index d86b02153c7c..5edf00751a1e 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -33,12 +33,6 @@
 
 #define NLMDBG_FACILITY		NLMDBG_SVCLOCK
 
-#ifdef CONFIG_LOCKD_V4
-#define nlm_deadlock	nlm4_deadlock
-#else
-#define nlm_deadlock	nlm_lck_denied
-#endif
-
 static void nlmsvc_release_block(struct nlm_block *block);
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static void	nlmsvc_remove_block(struct nlm_block *block);
@@ -589,7 +583,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			goto out;
 		case -EDEADLK:
 			nlmsvc_remove_block(block);
-			ret = nlm_deadlock;
+			ret = nlm__int__deadlock;
 			goto out;
 		default:			/* includes ENOLCK */
 			nlmsvc_remove_block(block);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 2a2e48a9bd12..27ed71935e45 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -27,7 +27,7 @@ static inline __be32 cast_status(__be32 status)
 	case nlm_lck_denied_grace_period:
 	case nlm__int__drop_reply:
 		break;
-	case nlm4_deadlock:
+	case nlm__int__deadlock:
 		status = nlm_lck_denied;
 		break;
 	default:
@@ -39,6 +39,8 @@ static inline __be32 cast_status(__be32 status)
 #else
 static inline __be32 cast_status(__be32 status)
 {
+	if (status == nlm__int__deadlock)
+		status = nlm_lck_denied;
 	return status;
 }
 #endif
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index fdefec39553f..793691912137 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -43,6 +43,7 @@
  * Version handlers translate these to appropriate wire values.
  */
 #define nlm__int__drop_reply	cpu_to_be32(30000)
+#define nlm__int__deadlock	cpu_to_be32(30001)
 
 /*
  * Lockd host handle (used both by the client and server personality).
-- 
cgit v1.2.3


From 7db001e03d7a668ca6c3789fee42a24236ca90f6 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:25 -0500
Subject: lockd: Have nlm_fopen() return errno values

The nlm_fopen() function is part of the API between nfsd and lockd.

Currently its return value is an on-the-wire NLM status code. But
that forces NFSD to include NLM wire protocol definitions despite
having no other dependency on the NLM wire protocol.

In addition, a CONFIG_LOCKD_V4 Kconfig symbol appears in the middle
of NFSD source code.

Refactor: Let's not use on-the-wire values as part of a high-level
API between two Linux kernel modules. That's what we have errno for,
right?

And, instead of simply moving the CONFIG_LOCKD_V4 check, we can get
rid of it entirely and let the decision of what actual NLM status
code goes on the wire to be left up to NLM version-specific code.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svc4proc.c         | 18 +++++++++++++---
 fs/lockd/svcproc.c          | 14 ++++++++++++-
 fs/lockd/svcsubs.c          | 27 ++++++++++++++++++------
 fs/nfsd/lockd.c             | 50 +++++++++++++++++++++++++--------------------
 include/linux/lockd/bind.h  |  8 +++-----
 include/linux/lockd/lockd.h |  2 ++
 6 files changed, 82 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 55b6dcc56db1..4ceb27cc72e4 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -73,9 +73,21 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 no_locks:
 	nlmsvc_release_host(host);
- 	if (error)
-		return error;	
-	return nlm_lck_denied_nolocks;
+	switch (error) {
+	case nlm_granted:
+		return nlm_lck_denied_nolocks;
+	case nlm__int__stale_fh:
+		return nlm4_stale_fh;
+	case nlm__int__failed:
+		return nlm4_failed;
+	default:
+		if (be32_to_cpu(error) >= 30000) {
+			pr_warn_once("lockd: unhandled internal status %u\n",
+				     be32_to_cpu(error));
+			return nlm4_failed;
+		}
+		return error;
+	}
 }
 
 /*
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 27ed71935e45..272c8f36ed2a 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -39,8 +39,20 @@ static inline __be32 cast_status(__be32 status)
 #else
 static inline __be32 cast_status(__be32 status)
 {
-	if (status == nlm__int__deadlock)
+	switch (status) {
+	case nlm__int__deadlock:
 		status = nlm_lck_denied;
+		break;
+	case nlm__int__stale_fh:
+	case nlm__int__failed:
+		status = nlm_lck_denied_nolocks;
+		break;
+	default:
+		if (be32_to_cpu(status) >= 30000)
+			pr_warn_once("lockd: unhandled internal status %u\n",
+				     be32_to_cpu(status));
+		break;
+	}
 	return status;
 }
 #endif
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dd0214dcb695..967739d2aa90 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -87,14 +87,29 @@ static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
 			   struct nlm_file *file, int mode)
 {
 	struct file **fp = &file->f_file[mode];
-	__be32	nfserr;
+	__be32 nlmerr = nlm_granted;
+	int error;
 
 	if (*fp)
-		return 0;
-	nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
-	if (nfserr)
-		dprintk("lockd: open failed (error %d)\n", nfserr);
-	return nfserr;
+		return nlmerr;
+
+	error = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
+	if (error) {
+		dprintk("lockd: open failed (errno %d)\n", error);
+		switch (error) {
+		case -EWOULDBLOCK:
+			nlmerr = nlm__int__drop_reply;
+			break;
+		case -ESTALE:
+			nlmerr = nlm__int__stale_fh;
+			break;
+		default:
+			nlmerr = nlm__int__failed;
+			break;
+		}
+	}
+
+	return nlmerr;
 }
 
 /*
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 8c230ccd6645..6fe1325815e0 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -14,19 +14,20 @@
 
 #define NFSDDBG_FACILITY		NFSDDBG_LOCKD
 
-#ifdef CONFIG_LOCKD_V4
-#define nlm_stale_fh	nlm4_stale_fh
-#define nlm_failed	nlm4_failed
-#else
-#define nlm_stale_fh	nlm_lck_denied_nolocks
-#define nlm_failed	nlm_lck_denied_nolocks
-#endif
-/*
- * Note: we hold the dentry use count while the file is open.
+/**
+ * nlm_fopen - Open an NFSD file
+ * @rqstp: NLM RPC procedure execution context
+ * @f: NFS file handle to be opened
+ * @filp: OUT: an opened struct file
+ * @flags: the POSIX open flags to use
+ *
+ * nlm_fopen() holds the dentry reference until nlm_fclose() releases it.
+ *
+ * Returns zero on success or a negative errno value if the file
+ * cannot be opened.
  */
-static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
-		int mode)
+static int nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f,
+		     struct file **filp, int flags)
 {
 	__be32		nfserr;
 	int		access;
@@ -47,18 +48,17 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
 	 * if NFSEXP_NOAUTHNLM is set.  Some older clients use AUTH_NULL
 	 * for NLM requests.
 	 */
-	access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+	access = (flags == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
 	access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS;
 	nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
 	fh_put(&fh);
-	/* We return nlm error codes as nlm doesn't know
-	 * about nfsd, but nfsd does know about nlm..
-	 */
+
 	switch (nfserr) {
 	case nfs_ok:
-		return 0;
+		break;
 	case nfserr_jukebox:
-		/* this error can indicate a presence of a conflicting
+		/*
+		 * This error can indicate a presence of a conflicting
 		 * delegation to an NLM lock request. Options are:
 		 * (1) For now, drop this request and make the client
 		 * retry. When delegation is returned, client's lock retry
@@ -66,19 +66,25 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
 		 * (2) NLM4_DENIED as per "spec" signals to the client
 		 * that the lock is unavailable now but client can retry.
 		 * Linux client implementation does not. It treats
-		 * NLM4_DENIED same as NLM4_FAILED and errors the request.
+		 * NLM4_DENIED same as NLM4_FAILED and fails the request.
 		 * (3) For the future, treat this as blocked lock and try
 		 * to callback when the delegation is returned but might
 		 * not have a proper lock request to block on.
 		 */
-		return nlm__int__drop_reply;
+		return -EWOULDBLOCK;
 	case nfserr_stale:
-		return nlm_stale_fh;
+		return -ESTALE;
 	default:
-		return nlm_failed;
+		return -ENOLCK;
 	}
+
+	return 0;
 }
 
+/**
+ * nlm_fclose - Close an NFSD file
+ * @filp: a struct file that was opened by nlm_fopen()
+ */
 static void
 nlm_fclose(struct file *filp)
 {
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index c53c81242e72..2f5dd9e943ee 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -26,11 +26,9 @@ struct rpc_clnt;
  * This is the set of functions for lockd->nfsd communication
  */
 struct nlmsvc_binding {
-	__be32			(*fopen)(struct svc_rqst *,
-						struct nfs_fh *,
-						struct file **,
-						int mode);
-	void			(*fclose)(struct file *);
+	int		(*fopen)(struct svc_rqst *rqstp, struct nfs_fh *f,
+				 struct file **filp, int flags);
+	void		(*fclose)(struct file *filp);
 };
 
 extern const struct nlmsvc_binding *nlmsvc_ops;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 793691912137..195e6ce28f6e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -44,6 +44,8 @@
  */
 #define nlm__int__drop_reply	cpu_to_be32(30000)
 #define nlm__int__deadlock	cpu_to_be32(30001)
+#define nlm__int__stale_fh	cpu_to_be32(30002)
+#define nlm__int__failed	cpu_to_be32(30003)
 
 /*
  * Lockd host handle (used both by the client and server personality).
-- 
cgit v1.2.3


From efb5b15e3b78f5644dd2d4ddec8880e0c9aa5b5f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:26 -0500
Subject: lockd: Relocate nlmsvc_unlock API declarations

The nlmsvc_unlock_all_by_sb() and nlmsvc_unlock_all_by_ip()
functions are part of lockd's external API, consumed by other
kernel subsystems. Their declarations currently reside in
linux/lockd/lockd.h alongside internal implementation details,
which blurs the boundary between lockd's public interface and
its private internals.

Moving these declarations to linux/lockd/bind.h groups them
with other external API functions and makes the separation
explicit. This clarifies which functions are intended for
external use and reduces the risk of internal implementation
details leaking into the public API surface.

Build-tested with allyesconfig; no functional changes.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsctl.c            | 2 +-
 include/linux/lockd/bind.h  | 7 +++++++
 include/linux/lockd/lockd.h | 6 ------
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 71aabdaa1d15..0bf01ae411c5 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -11,7 +11,7 @@
 #include <linux/fs_context.h>
 
 #include <linux/sunrpc/svcsock.h>
-#include <linux/lockd/lockd.h>
+#include <linux/lockd/bind.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 2f5dd9e943ee..82eca0a13ccc 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -21,6 +21,7 @@
 struct svc_rqst;
 struct rpc_task;
 struct rpc_clnt;
+struct super_block;
 
 /*
  * This is the set of functions for lockd->nfsd communication
@@ -80,4 +81,10 @@ extern int	nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, vo
 extern int	lockd_up(struct net *net, const struct cred *cred);
 extern void	lockd_down(struct net *net);
 
+/*
+ * Cluster failover support
+ */
+int nlmsvc_unlock_all_by_sb(struct super_block *sb);
+int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
+
 #endif /* LINUX_LOCKD_BIND_H */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 195e6ce28f6e..0d883f48ec21 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -311,12 +311,6 @@ void		  nlmsvc_mark_resources(struct net *);
 void		  nlmsvc_free_host_resources(struct nlm_host *);
 void		  nlmsvc_invalidate_all(void);
 
-/*
- * Cluster failover support
- */
-int           nlmsvc_unlock_all_by_sb(struct super_block *sb);
-int           nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
-
 static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
 {
 	return file->f_file[O_RDONLY] ?
-- 
cgit v1.2.3


From 840621fd2ff23ada8b9262d90477e75232566e6b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:27 -0500
Subject: NFS: Use nlmclnt_shutdown_rpc_clnt() to safely shut down NLM

A race condition exists in shutdown_store() when writing to the sysfs
"shutdown" file concurrently with nlm_shutdown_hosts_net(). Without
synchronization, the following sequence can occur:

  1. shutdown_store() reads server->nlm_host (non-NULL)
  2. nlm_shutdown_hosts_net() acquires nlm_host_mutex, calls
     rpc_shutdown_client(), sets h_rpcclnt to NULL, and potentially
     frees the host via nlm_gc_hosts()
  3. shutdown_store() dereferences the now-stale or freed host

Introduce nlmclnt_shutdown_rpc_clnt(), which acquires nlm_host_mutex
before accessing h_rpcclnt. This synchronizes with
nlm_shutdown_hosts_net() and ensures the rpc_clnt pointer remains
valid during the shutdown operation.

This change also improves API layering: NFS client code no longer
needs to include the internal lockd header to access nlm_host fields.
The new helper resides in bind.h alongside other public lockd
interfaces.

Reported-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/host.c            | 29 +++++++++++++++++++++++++++++
 fs/nfs/sysfs.c             |  4 ++--
 include/linux/lockd/bind.h |  1 +
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 1a9582a10a86..015900d2d4c2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -306,6 +306,35 @@ void nlmclnt_release_host(struct nlm_host *host)
 	}
 }
 
+/* Callback for rpc_cancel_tasks() - matches all tasks for cancellation */
+static bool nlmclnt_match_all(const struct rpc_task *task, const void *data)
+{
+	return true;
+}
+
+/**
+ * nlmclnt_shutdown_rpc_clnt - safely shut down NLM client RPC operations
+ * @host: nlm_host to shut down
+ *
+ * Cancels outstanding RPC tasks and marks the client as shut down.
+ * Synchronizes with nlmclnt_release_host() via nlm_host_mutex to prevent
+ * races between shutdown and host destruction. Safe to call if h_rpcclnt
+ * is NULL or already shut down.
+ */
+void nlmclnt_shutdown_rpc_clnt(struct nlm_host *host)
+{
+	struct rpc_clnt *clnt;
+
+	mutex_lock(&nlm_host_mutex);
+	clnt = host->h_rpcclnt;
+	if (clnt) {
+		clnt->cl_shutdown = 1;
+		rpc_cancel_tasks(clnt, -EIO, nlmclnt_match_all, NULL);
+	}
+	mutex_unlock(&nlm_host_mutex);
+}
+EXPORT_SYMBOL_GPL(nlmclnt_shutdown_rpc_clnt);
+
 /**
  * nlmsvc_lookup_host - Find an NLM host handle matching a remote client
  * @rqstp: incoming NLM request
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 7d8921f524a6..051da37770d8 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -12,7 +12,7 @@
 #include <linux/string.h>
 #include <linux/nfs_fs.h>
 #include <linux/rcupdate.h>
-#include <linux/lockd/lockd.h>
+#include <linux/lockd/bind.h>
 
 #include "internal.h"
 #include "nfs4_fs.h"
@@ -285,7 +285,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
 		shutdown_client(server->client_acl);
 
 	if (server->nlm_host)
-		shutdown_client(server->nlm_host->h_rpcclnt);
+		nlmclnt_shutdown_rpc_clnt(server->nlm_host);
 out:
 	shutdown_nfs_client(server->nfs_client);
 	return count;
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 82eca0a13ccc..39c124dcb19c 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -57,6 +57,7 @@ struct nlmclnt_initdata {
 extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
 extern void	nlmclnt_done(struct nlm_host *host);
 extern struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host);
+extern void	nlmclnt_shutdown_rpc_clnt(struct nlm_host *host);
 
 /*
  * NLM client operations provide a means to modify RPC processing of NLM
-- 
cgit v1.2.3


From f4d5f8caadd858f11b21e8a9e5c85290fc21a568 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:28 -0500
Subject: lockd: Move xdr4.h from include/linux/lockd/ to fs/lockd/

The xdr4.h header declares NLMv4-specific XDR encoder/decoder
functions and error codes that are used exclusively within the
lockd subsystem. Moving it from include/linux/lockd/ to fs/lockd/
clarifies the intended scope of these declarations and prevents
external code from depending on lockd-internal interfaces.

This change reduces the public API surface of the lockd module
and makes it easier to refactor NLMv4 internals without risk of
breaking out-of-tree consumers. The header's contents are
implementation details of the NLMv4 wire protocol handling, not
a contract with other kernel subsystems.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/clnt4xdr.c         |  2 ++
 fs/lockd/svc4proc.c         |  2 ++
 fs/lockd/xdr4.c             |  1 +
 fs/lockd/xdr4.h             | 34 ++++++++++++++++++++++++++++++++++
 include/linux/lockd/bind.h  |  3 ---
 include/linux/lockd/lockd.h |  7 ++++---
 include/linux/lockd/xdr4.h  | 43 -------------------------------------------
 7 files changed, 43 insertions(+), 49 deletions(-)
 create mode 100644 fs/lockd/xdr4.h
 delete mode 100644 include/linux/lockd/xdr4.h

(limited to 'include')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 527458db4525..23896073c7e5 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -17,6 +17,8 @@
 
 #include <uapi/linux/nfs3.h>
 
+#include "xdr4.h"
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4ceb27cc72e4..51d072a83a49 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -14,6 +14,8 @@
 #include <linux/lockd/share.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "xdr4.h"
+
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 /*
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index e343c820301f..5b1e15977697 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -19,6 +19,7 @@
 #include <linux/lockd/lockd.h>
 
 #include "svcxdr.h"
+#include "xdr4.h"
 
 static inline s64
 loff_t_to_s64(loff_t offset)
diff --git a/fs/lockd/xdr4.h b/fs/lockd/xdr4.h
new file mode 100644
index 000000000000..7be318c0512b
--- /dev/null
+++ b/fs/lockd/xdr4.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * XDR types for the NLM protocol
+ *
+ * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_XDR4_H
+#define _LOCKD_XDR4_H
+
+/* error codes new to NLMv4 */
+#define	nlm4_deadlock		cpu_to_be32(NLM_DEADLCK)
+#define	nlm4_rofs		cpu_to_be32(NLM_ROFS)
+#define	nlm4_stale_fh		cpu_to_be32(NLM_STALE_FH)
+#define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
+#define	nlm4_failed		cpu_to_be32(NLM_FAILED)
+
+void	nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
+bool	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+#endif /* _LOCKD_XDR4_H */
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 39c124dcb19c..077da0696f12 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -13,9 +13,6 @@
 #include <linux/lockd/nlm.h>
 /* need xdr-encoded error codes too, so... */
 #include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
 
 /* Dummy declarations */
 struct svc_rqst;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0d883f48ec21..46f244141645 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -22,9 +22,6 @@
 #include <linux/utsname.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
 #include <linux/lockd/debug.h>
 #include <linux/sunrpc/svc.h>
 
@@ -235,6 +232,10 @@ int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
 				  struct nlm_rqst *);
 void		  nlmclnt_next_cookie(struct nlm_cookie *);
 
+#ifdef CONFIG_LOCKD_V4
+extern const struct rpc_version nlm_version4;
+#endif
+
 /*
  * Host cache
  */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
deleted file mode 100644
index 72831e35dca3..000000000000
--- a/include/linux/lockd/xdr4.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/xdr4.h
- *
- * XDR types for the NLM protocol
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LOCKD_XDR4_H
-#define LOCKD_XDR4_H
-
-#include <linux/fs.h>
-#include <linux/nfs.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/lockd/xdr.h>
-
-/* error codes new to NLMv4 */
-#define	nlm4_deadlock		cpu_to_be32(NLM_DEADLCK)
-#define	nlm4_rofs		cpu_to_be32(NLM_ROFS)
-#define	nlm4_stale_fh		cpu_to_be32(NLM_STALE_FH)
-#define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
-#define	nlm4_failed		cpu_to_be32(NLM_FAILED)
-
-void	nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
-bool	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-extern const struct rpc_version nlm_version4;
-
-#endif /* LOCKD_XDR4_H */
-- 
cgit v1.2.3


From 4db2f8a016dc9f9b357bfbf5c507c2582bb36730 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:29 -0500
Subject: lockd: Move share.h from include/linux/lockd/ to fs/lockd/

The share.h header defines struct nlm_share and declares the DOS
share management functions used by the NLM server to implement
NLM_SHARE and NLM_UNSHARE operations. These interfaces are used
exclusively within the lockd subsystem. A git grep search confirms
no external code references them.

Relocating this header from include/linux/lockd/ to fs/lockd/
narrows the public API surface of the lockd module. Out-of-tree
code cannot depend on these internal interfaces after this change.
Future refactoring of the share management implementation thus
requires no consideration of external consumers.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/share.h            | 30 ++++++++++++++++++++++++++++++
 fs/lockd/svc4proc.c         |  2 +-
 fs/lockd/svcproc.c          |  3 ++-
 fs/lockd/svcshare.c         |  3 ++-
 fs/lockd/svcsubs.c          |  3 ++-
 include/linux/lockd/lockd.h |  2 ++
 include/linux/lockd/share.h | 32 --------------------------------
 7 files changed, 39 insertions(+), 36 deletions(-)
 create mode 100644 fs/lockd/share.h
 delete mode 100644 include/linux/lockd/share.h

(limited to 'include')

diff --git a/fs/lockd/share.h b/fs/lockd/share.h
new file mode 100644
index 000000000000..d8f4ebd9c278
--- /dev/null
+++ b/fs/lockd/share.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DOS share management for lockd.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_SHARE_H
+#define _LOCKD_SHARE_H
+
+/*
+ * DOS share for a specific file
+ */
+struct nlm_share {
+	struct nlm_share *	s_next;		/* linked list */
+	struct nlm_host *	s_host;		/* client host */
+	struct nlm_file *	s_file;		/* shared file */
+	struct xdr_netobj	s_owner;	/* owner handle */
+	u32			s_access;	/* access mode */
+	u32			s_mode;		/* deny mode */
+};
+
+__be32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
+					       struct nlm_args *);
+__be32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
+					       struct nlm_args *);
+void	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *,
+					       nlm_host_match_fn_t);
+
+#endif /* _LOCKD_SHARE_H */
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 51d072a83a49..da88b638d90d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,9 +11,9 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "share.h"
 #include "xdr4.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 272c8f36ed2a..8441fabd019f 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,9 +11,10 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "share.h"
+
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 #ifdef CONFIG_LOCKD_V4
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 88c81ce1148d..8e06840834c6 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -15,7 +15,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
+
+#include "share.h"
 
 static inline int
 nlm_cmp_owner(struct nlm_share *share, struct xdr_netobj *oh)
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 967739d2aa90..ce596a17112c 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -16,11 +16,12 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <uapi/linux/nfs2.h>
 
+#include "share.h"
+
 #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
 
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 46f244141645..eebcecd12fae 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -155,6 +155,8 @@ struct nlm_rqst {
 	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
 };
 
+struct nlm_share;
+
 /*
  * This struct describes a file held open by lockd on behalf of
  * an NFS client.
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
deleted file mode 100644
index 1f18a9faf645..000000000000
--- a/include/linux/lockd/share.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/share.h
- *
- * DOS share management for lockd.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_SHARE_H
-#define LINUX_LOCKD_SHARE_H
-
-/*
- * DOS share for a specific file
- */
-struct nlm_share {
-	struct nlm_share *	s_next;		/* linked list */
-	struct nlm_host *	s_host;		/* client host */
-	struct nlm_file *	s_file;		/* shared file */
-	struct xdr_netobj	s_owner;	/* owner handle */
-	u32			s_access;	/* access mode */
-	u32			s_mode;		/* deny mode */
-};
-
-__be32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
-					       struct nlm_args *);
-__be32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
-					       struct nlm_args *);
-void	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *,
-					       nlm_host_match_fn_t);
-
-#endif /* LINUX_LOCKD_SHARE_H */
-- 
cgit v1.2.3


From 2c562c6e6715619ce34bb37d8a0a5e40fdcc7a44 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:30 -0500
Subject: lockd: Relocate include/linux/lockd/lockd.h

Headers placed in include/linux/ form part of the kernel's
internal API and signal to subsystem maintainers that other
parts of the kernel may depend on them. By moving lockd.h
into fs/lockd/, lockd becomes a more self-contained module
whose internal interfaces are clearly distinguished from its
public contract with the rest of the kernel. This relocation
addresses a long-standing XXX comment in the header itself
that acknowledged the file's misplacement. Future changes to
lockd internals can now proceed with confidence that external
consumers are not inadvertently coupled to implementation
details.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/clnt4xdr.c         |   3 +-
 fs/lockd/clntlock.c         |   2 +-
 fs/lockd/clntproc.c         |   2 +-
 fs/lockd/clntxdr.c          |   3 +-
 fs/lockd/host.c             |   2 +-
 fs/lockd/lockd.h            | 395 +++++++++++++++++++++++++++++++++++++++++++
 fs/lockd/mon.c              |   2 +-
 fs/lockd/svc.c              |   2 +-
 fs/lockd/svc4proc.c         |   2 +-
 fs/lockd/svclock.c          |   3 +-
 fs/lockd/svcproc.c          |   2 +-
 fs/lockd/svcshare.c         |   2 +-
 fs/lockd/svcsubs.c          |   2 +-
 fs/lockd/trace.h            |   3 +-
 fs/lockd/xdr.c              |   3 +-
 fs/lockd/xdr4.c             |   2 +-
 include/linux/lockd/lockd.h | 401 --------------------------------------------
 17 files changed, 414 insertions(+), 417 deletions(-)
 create mode 100644 fs/lockd/lockd.h
 delete mode 100644 include/linux/lockd/lockd.h

(limited to 'include')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 23896073c7e5..61ee5fa6dfa4 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -13,7 +13,8 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #include <uapi/linux/nfs3.h>
 
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 85bc0f3e91df..8fa30c42c92a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -15,9 +15,9 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svc_xprt.h>
-#include <linux/lockd/lockd.h>
 #include <linux/kthread.h>
 
+#include "lockd.h"
 #include "trace.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fb4d0752c9bb..7f211008a5d2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -18,8 +18,8 @@
 #include <linux/freezer.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 
+#include "lockd.h"
 #include "trace.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 6ea3448d2d31..65555f5224b1 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -15,7 +15,8 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #include <uapi/linux/nfs2.h>
 
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 015900d2d4c2..ea8a8e166f7e 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -16,13 +16,13 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 #include <linux/mutex.h>
 
 #include <linux/sunrpc/svc_xprt.h>
 
 #include <net/ipv6.h>
 
+#include "lockd.h"
 #include "netns.h"
 
 #define NLMDBG_FACILITY		NLMDBG_HOSTCACHE
diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
new file mode 100644
index 000000000000..9bcf89765a69
--- /dev/null
+++ b/fs/lockd/lockd.h
@@ -0,0 +1,395 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_LOCKD_H
+#define _LOCKD_LOCKD_H
+
+#include <linux/exportfs.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
+#include <linux/fs.h>
+#include <linux/kref.h>
+#include <linux/refcount.h>
+#include <linux/utsname.h>
+#include <linux/lockd/bind.h>
+#include <linux/lockd/xdr.h>
+#include <linux/lockd/debug.h>
+#include <linux/sunrpc/svc.h>
+
+/*
+ * Version string
+ */
+#define LOCKD_VERSION		"0.5"
+
+/*
+ * Default timeout for RPC calls (seconds)
+ */
+#define LOCKD_DFLT_TIMEO	10
+
+/*
+ * Internal-use status codes, not to be placed on the wire.
+ * Version handlers translate these to appropriate wire values.
+ */
+#define nlm__int__drop_reply	cpu_to_be32(30000)
+#define nlm__int__deadlock	cpu_to_be32(30001)
+#define nlm__int__stale_fh	cpu_to_be32(30002)
+#define nlm__int__failed	cpu_to_be32(30003)
+
+/*
+ * Lockd host handle (used both by the client and server personality).
+ */
+struct nlm_host {
+	struct hlist_node	h_hash;		/* doubly linked list */
+	struct sockaddr_storage	h_addr;		/* peer address */
+	size_t			h_addrlen;
+	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
+	size_t			h_srcaddrlen;
+	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
+	char			*h_name;		/* remote hostname */
+	u32			h_version;	/* interface version */
+	unsigned short		h_proto;	/* transport proto */
+	unsigned short		h_reclaiming : 1,
+				h_server     : 1, /* server side, not client side */
+				h_noresvport : 1,
+				h_inuse      : 1;
+	wait_queue_head_t	h_gracewait;	/* wait while reclaiming */
+	struct rw_semaphore	h_rwsem;	/* Reboot recovery lock */
+	u32			h_state;	/* pseudo-state counter */
+	u32			h_nsmstate;	/* true remote NSM state */
+	u32			h_pidcount;	/* Pseudopids */
+	refcount_t		h_count;	/* reference count */
+	struct mutex		h_mutex;	/* mutex for pmap binding */
+	unsigned long		h_nextrebind;	/* next portmap call */
+	unsigned long		h_expires;	/* eligible for GC */
+	struct list_head	h_lockowners;	/* Lockowners for the client */
+	spinlock_t		h_lock;
+	struct list_head	h_granted;	/* Locks in GRANTED state */
+	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
+	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
+	char			*h_addrbuf;	/* address eyecatcher */
+	struct net		*net;		/* host net */
+	const struct cred	*h_cred;
+	char			nodename[UNX_MAXNODENAME + 1];
+	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
+};
+
+/*
+ * The largest string sm_addrbuf should hold is a full-size IPv6 address
+ * (no "::" anywhere) with a scope ID.  The buffer size is computed to
+ * hold eight groups of colon-separated four-hex-digit numbers, a
+ * percent sign, a scope id (at most 32 bits, in decimal), and NUL.
+ */
+#define NSM_ADDRBUF		((8 * 4 + 7) + (1 + 10) + 1)
+
+struct nsm_handle {
+	struct list_head	sm_link;
+	refcount_t		sm_count;
+	char			*sm_mon_name;
+	char			*sm_name;
+	struct sockaddr_storage	sm_addr;
+	size_t			sm_addrlen;
+	unsigned int		sm_monitored : 1,
+				sm_sticky : 1;	/* don't unmonitor */
+	struct nsm_private	sm_priv;
+	char			sm_addrbuf[NSM_ADDRBUF];
+};
+
+/*
+ * Rigorous type checking on sockaddr type conversions
+ */
+static inline struct sockaddr *nlm_addr(const struct nlm_host *host)
+{
+	return (struct sockaddr *)&host->h_addr;
+}
+
+static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
+{
+	return (struct sockaddr *)&host->h_srcaddr;
+}
+
+/*
+ * Map an fl_owner_t into a unique 32-bit "pid"
+ */
+struct nlm_lockowner {
+	struct list_head list;
+	refcount_t count;
+
+	struct nlm_host *host;
+	fl_owner_t owner;
+	uint32_t pid;
+};
+
+/*
+ * This is the representation of a blocked client lock.
+ */
+struct nlm_wait {
+	struct list_head	b_list;		/* linked list */
+	wait_queue_head_t	b_wait;		/* where to wait on */
+	struct nlm_host		*b_host;
+	struct file_lock	*b_lock;	/* local file lock */
+	__be32			b_status;	/* grant callback status */
+};
+
+/*
+ * Memory chunk for NLM client RPC request.
+ */
+#define NLMCLNT_OHSIZE		((__NEW_UTS_LEN) + 10u)
+struct nlm_rqst {
+	refcount_t		a_count;
+	unsigned int		a_flags;	/* initial RPC task flags */
+	struct nlm_host *	a_host;		/* host handle */
+	struct nlm_args		a_args;		/* arguments */
+	struct nlm_res		a_res;		/* result */
+	struct nlm_block *	a_block;
+	unsigned int		a_retries;	/* Retry count */
+	u8			a_owner[NLMCLNT_OHSIZE];
+	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
+};
+
+struct nlm_share;
+
+/*
+ * This struct describes a file held open by lockd on behalf of
+ * an NFS client.
+ */
+struct nlm_file {
+	struct hlist_node	f_list;		/* linked list */
+	struct nfs_fh		f_handle;	/* NFS file handle */
+	struct file *		f_file[2];	/* VFS file pointers,
+						   indexed by O_ flags */
+	struct nlm_share *	f_shares;	/* DOS shares */
+	struct list_head	f_blocks;	/* blocked locks */
+	unsigned int		f_locks;	/* guesstimate # of locks */
+	unsigned int		f_count;	/* reference count */
+	struct mutex		f_mutex;	/* avoid concurrent access */
+};
+
+/*
+ * This is a server block (i.e. a lock requested by some client which
+ * couldn't be granted because of a conflicting lock).
+ */
+#define NLM_NEVER		(~(unsigned long) 0)
+/* timeout on non-blocking call: */
+#define NLM_TIMEOUT		(7 * HZ)
+
+struct nlm_block {
+	struct kref		b_count;	/* Reference count */
+	struct list_head	b_list;		/* linked list of all blocks */
+	struct list_head	b_flist;	/* linked list (per file) */
+	struct nlm_rqst	*	b_call;		/* RPC args & callback info */
+	struct svc_serv *	b_daemon;	/* NLM service */
+	struct nlm_host *	b_host;		/* host handle for RPC clnt */
+	unsigned long		b_when;		/* next re-xmit */
+	unsigned int		b_id;		/* block id */
+	unsigned char		b_granted;	/* VFS granted lock */
+	struct nlm_file *	b_file;		/* file in question */
+	struct cache_req *	b_cache_req;	/* deferred request handling */
+	struct cache_deferred_req * b_deferred_req;
+	unsigned int		b_flags;	/* block flags */
+#define B_QUEUED		1	/* lock queued */
+#define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
+#define B_TIMED_OUT		4	/* filesystem too slow to respond */
+};
+
+/*
+ * Global variables
+ */
+extern const struct rpc_program	nlm_program;
+extern const struct svc_procedure nlmsvc_procedures[24];
+#ifdef CONFIG_LOCKD_V4
+extern const struct svc_procedure nlmsvc_procedures4[24];
+#endif
+extern int			nlmsvc_grace_period;
+extern unsigned long		nlm_timeout;
+extern bool			nsm_use_hostnames;
+extern u32			nsm_local_state;
+
+extern struct timer_list	nlmsvc_retry;
+
+/*
+ * Lockd client functions
+ */
+struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
+int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+void		  nlmclnt_release_call(struct nlm_rqst *);
+void		  nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
+					struct file_lock *fl);
+void		  nlmclnt_queue_block(struct nlm_wait *block);
+__be32		  nlmclnt_dequeue_block(struct nlm_wait *block);
+int		  nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
+__be32		  nlmclnt_grant(const struct sockaddr *addr,
+				const struct nlm_lock *lock);
+void		  nlmclnt_recovery(struct nlm_host *);
+int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
+				  struct nlm_rqst *);
+void		  nlmclnt_next_cookie(struct nlm_cookie *);
+
+#ifdef CONFIG_LOCKD_V4
+extern const struct rpc_version nlm_version4;
+#endif
+
+/*
+ * Host cache
+ */
+struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
+					const size_t salen,
+					const unsigned short protocol,
+					const u32 version,
+					const char *hostname,
+					int noresvport,
+					struct net *net,
+					const struct cred *cred);
+void		  nlmclnt_release_host(struct nlm_host *);
+struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
+					const char *hostname,
+					const size_t hostname_len);
+void		  nlmsvc_release_host(struct nlm_host *);
+struct rpc_clnt * nlm_bind_host(struct nlm_host *);
+void		  nlm_rebind_host(struct nlm_host *);
+struct nlm_host * nlm_get_host(struct nlm_host *);
+void		  nlm_shutdown_hosts(void);
+void		  nlm_shutdown_hosts_net(struct net *net);
+void		  nlm_host_rebooted(const struct net *net,
+					const struct nlm_reboot *);
+
+/*
+ * Host monitoring
+ */
+int		  nsm_monitor(const struct nlm_host *host);
+void		  nsm_unmonitor(const struct nlm_host *host);
+
+struct nsm_handle *nsm_get_handle(const struct net *net,
+					const struct sockaddr *sap,
+					const size_t salen,
+					const char *hostname,
+					const size_t hostname_len);
+struct nsm_handle *nsm_reboot_lookup(const struct net *net,
+					const struct nlm_reboot *info);
+void		  nsm_release(struct nsm_handle *nsm);
+
+/*
+ * This is used in garbage collection and resource reclaim
+ * A return value != 0 means destroy the lock/block/share
+ */
+typedef int	  (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
+
+/*
+ * Server-side lock handling
+ */
+int		  lock_to_openmode(struct file_lock *);
+__be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
+			      struct nlm_host *, struct nlm_lock *, int,
+			      struct nlm_cookie *, int);
+__be32		  nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *);
+__be32		  nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
+			struct nlm_host *host, struct nlm_lock *lock,
+			struct nlm_lock *conflock);
+__be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
+void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
+void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
+					nlm_host_match_fn_t match);
+void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
+void		  nlmsvc_release_call(struct nlm_rqst *);
+void		  nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
+
+/*
+ * File handling for the server personality
+ */
+__be32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
+					struct nlm_lock *);
+void		  nlm_release_file(struct nlm_file *);
+void		  nlmsvc_put_lockowner(struct nlm_lockowner *);
+void		  nlmsvc_release_lockowner(struct nlm_lock *);
+void		  nlmsvc_mark_resources(struct net *);
+void		  nlmsvc_free_host_resources(struct nlm_host *);
+void		  nlmsvc_invalidate_all(void);
+
+static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
+{
+	return file->f_file[O_RDONLY] ?
+	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
+}
+
+static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
+{
+	return file_inode(nlmsvc_file_file(file));
+}
+
+static inline bool
+nlmsvc_file_cannot_lock(const struct nlm_file *file)
+{
+	return exportfs_cannot_lock(nlmsvc_file_file(file)->f_path.dentry->d_sb->s_export_op);
+}
+
+static inline int __nlm_privileged_request4(const struct sockaddr *sap)
+{
+	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+
+	if (ntohs(sin->sin_port) > 1023)
+		return 0;
+
+	return ipv4_is_loopback(sin->sin_addr.s_addr);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline int __nlm_privileged_request6(const struct sockaddr *sap)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+
+	if (ntohs(sin6->sin6_port) > 1023)
+		return 0;
+
+	if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
+		return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]);
+
+	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
+}
+#else	/* IS_ENABLED(CONFIG_IPV6) */
+static inline int __nlm_privileged_request6(const struct sockaddr *sap)
+{
+	return 0;
+}
+#endif	/* IS_ENABLED(CONFIG_IPV6) */
+
+/*
+ * Ensure incoming requests are from local privileged callers.
+ *
+ * Return TRUE if sender is local and is connecting via a privileged port;
+ * otherwise return FALSE.
+ */
+static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
+{
+	const struct sockaddr *sap = svc_addr(rqstp);
+
+	switch (sap->sa_family) {
+	case AF_INET:
+		return __nlm_privileged_request4(sap);
+	case AF_INET6:
+		return __nlm_privileged_request6(sap);
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Compare two NLM locks.
+ * When the second lock is of type F_UNLCK, this acts like a wildcard.
+ */
+static inline int nlm_compare_locks(const struct file_lock *fl1,
+				    const struct file_lock *fl2)
+{
+	return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file)
+	     && fl1->c.flc_pid   == fl2->c.flc_pid
+	     && fl1->c.flc_owner == fl2->c.flc_owner
+	     && fl1->fl_start == fl2->fl_start
+	     && fl1->fl_end   == fl2->fl_end
+	     &&(fl1->c.flc_type  == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
+}
+
+extern const struct lock_manager_operations nlmsvc_lock_operations;
+
+#endif /* _LOCKD_LOCKD_H */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index b8fc732e1c67..3d3ee88ca4dc 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -16,10 +16,10 @@
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 
 #include <linux/unaligned.h>
 
+#include "lockd.h"
 #include "netns.h"
 
 #define NLMDBG_FACILITY		NLMDBG_MONITOR
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index dcd80c4e74c9..9dd7f8e11544 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -36,9 +36,9 @@
 #include <net/ip.h>
 #include <net/addrconf.h>
 #include <net/ipv6.h>
-#include <linux/lockd/lockd.h>
 #include <linux/nfs.h>
 
+#include "lockd.h"
 #include "netns.h"
 #include "procfs.h"
 #include "netlink.h"
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index da88b638d90d..86dfeb6ce68d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -10,9 +10,9 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/lockd/lockd.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "lockd.h"
 #include "share.h"
 #include "xdr4.h"
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 5edf00751a1e..1c800fffe69c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -29,7 +29,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc_xprt.h>
 #include <linux/lockd/nlm.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #define NLMDBG_FACILITY		NLMDBG_SVCLOCK
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 8441fabd019f..e9a6bcc3bf2e 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -10,9 +10,9 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/lockd/lockd.h>
 #include <linux/sunrpc/svc_xprt.h>
 
+#include "lockd.h"
 #include "share.h"
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 8e06840834c6..8675ac80ab16 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -14,8 +14,8 @@
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
 
+#include "lockd.h"
 #include "share.h"
 
 static inline int
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index ce596a17112c..71eaec5ed8d7 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -15,11 +15,11 @@
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/lockd/lockd.h>
 #include <linux/module.h>
 #include <linux/mount.h>
 #include <uapi/linux/nfs2.h>
 
+#include "lockd.h"
 #include "share.h"
 
 #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
diff --git a/fs/lockd/trace.h b/fs/lockd/trace.h
index 7461b13b6e74..7214d7e96a42 100644
--- a/fs/lockd/trace.h
+++ b/fs/lockd/trace.h
@@ -8,7 +8,8 @@
 #include <linux/tracepoint.h>
 #include <linux/crc32.h>
 #include <linux/nfs.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
 
 #ifdef CONFIG_LOCKD_V4
 #define NLM_STATUS_LIST					\
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index adfcce2bf11b..5aac49d1875a 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -15,13 +15,12 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
 
 #include <uapi/linux/nfs2.h>
 
+#include "lockd.h"
 #include "svcxdr.h"
 
-
 static inline loff_t
 s32_to_loff_t(__s32 offset)
 {
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 5b1e15977697..f57d4881d5f1 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -16,8 +16,8 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
 
+#include "lockd.h"
 #include "svcxdr.h"
 #include "xdr4.h"
 
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
deleted file mode 100644
index eebcecd12fae..000000000000
--- a/include/linux/lockd/lockd.h
+++ /dev/null
@@ -1,401 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/lockd.h
- *
- * General-purpose lockd include file.
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_LOCKD_H
-#define LINUX_LOCKD_LOCKD_H
-
-/* XXX: a lot of this should really be under fs/lockd. */
-
-#include <linux/exportfs.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <net/ipv6.h>
-#include <linux/fs.h>
-#include <linux/kref.h>
-#include <linux/refcount.h>
-#include <linux/utsname.h>
-#include <linux/lockd/bind.h>
-#include <linux/lockd/xdr.h>
-#include <linux/lockd/debug.h>
-#include <linux/sunrpc/svc.h>
-
-/*
- * Version string
- */
-#define LOCKD_VERSION		"0.5"
-
-/*
- * Default timeout for RPC calls (seconds)
- */
-#define LOCKD_DFLT_TIMEO	10
-
-/*
- * Internal-use status codes, not to be placed on the wire.
- * Version handlers translate these to appropriate wire values.
- */
-#define nlm__int__drop_reply	cpu_to_be32(30000)
-#define nlm__int__deadlock	cpu_to_be32(30001)
-#define nlm__int__stale_fh	cpu_to_be32(30002)
-#define nlm__int__failed	cpu_to_be32(30003)
-
-/*
- * Lockd host handle (used both by the client and server personality).
- */
-struct nlm_host {
-	struct hlist_node	h_hash;		/* doubly linked list */
-	struct sockaddr_storage	h_addr;		/* peer address */
-	size_t			h_addrlen;
-	struct sockaddr_storage	h_srcaddr;	/* our address (optional) */
-	size_t			h_srcaddrlen;
-	struct rpc_clnt		*h_rpcclnt;	/* RPC client to talk to peer */
-	char			*h_name;		/* remote hostname */
-	u32			h_version;	/* interface version */
-	unsigned short		h_proto;	/* transport proto */
-	unsigned short		h_reclaiming : 1,
-				h_server     : 1, /* server side, not client side */
-				h_noresvport : 1,
-				h_inuse      : 1;
-	wait_queue_head_t	h_gracewait;	/* wait while reclaiming */
-	struct rw_semaphore	h_rwsem;	/* Reboot recovery lock */
-	u32			h_state;	/* pseudo-state counter */
-	u32			h_nsmstate;	/* true remote NSM state */
-	u32			h_pidcount;	/* Pseudopids */
-	refcount_t		h_count;	/* reference count */
-	struct mutex		h_mutex;	/* mutex for pmap binding */
-	unsigned long		h_nextrebind;	/* next portmap call */
-	unsigned long		h_expires;	/* eligible for GC */
-	struct list_head	h_lockowners;	/* Lockowners for the client */
-	spinlock_t		h_lock;
-	struct list_head	h_granted;	/* Locks in GRANTED state */
-	struct list_head	h_reclaim;	/* Locks in RECLAIM state */
-	struct nsm_handle	*h_nsmhandle;	/* NSM status handle */
-	char			*h_addrbuf;	/* address eyecatcher */
-	struct net		*net;		/* host net */
-	const struct cred	*h_cred;
-	char			nodename[UNX_MAXNODENAME + 1];
-	const struct nlmclnt_operations	*h_nlmclnt_ops;	/* Callback ops for NLM users */
-};
-
-/*
- * The largest string sm_addrbuf should hold is a full-size IPv6 address
- * (no "::" anywhere) with a scope ID.  The buffer size is computed to
- * hold eight groups of colon-separated four-hex-digit numbers, a
- * percent sign, a scope id (at most 32 bits, in decimal), and NUL.
- */
-#define NSM_ADDRBUF		((8 * 4 + 7) + (1 + 10) + 1)
-
-struct nsm_handle {
-	struct list_head	sm_link;
-	refcount_t		sm_count;
-	char			*sm_mon_name;
-	char			*sm_name;
-	struct sockaddr_storage	sm_addr;
-	size_t			sm_addrlen;
-	unsigned int		sm_monitored : 1,
-				sm_sticky : 1;	/* don't unmonitor */
-	struct nsm_private	sm_priv;
-	char			sm_addrbuf[NSM_ADDRBUF];
-};
-
-/*
- * Rigorous type checking on sockaddr type conversions
- */
-static inline struct sockaddr *nlm_addr(const struct nlm_host *host)
-{
-	return (struct sockaddr *)&host->h_addr;
-}
-
-static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
-{
-	return (struct sockaddr *)&host->h_srcaddr;
-}
-
-/*
- * Map an fl_owner_t into a unique 32-bit "pid"
- */
-struct nlm_lockowner {
-	struct list_head list;
-	refcount_t count;
-
-	struct nlm_host *host;
-	fl_owner_t owner;
-	uint32_t pid;
-};
-
-/*
- * This is the representation of a blocked client lock.
- */
-struct nlm_wait {
-	struct list_head	b_list;		/* linked list */
-	wait_queue_head_t	b_wait;		/* where to wait on */
-	struct nlm_host		*b_host;
-	struct file_lock	*b_lock;	/* local file lock */
-	__be32			b_status;	/* grant callback status */
-};
-
-/*
- * Memory chunk for NLM client RPC request.
- */
-#define NLMCLNT_OHSIZE		((__NEW_UTS_LEN) + 10u)
-struct nlm_rqst {
-	refcount_t		a_count;
-	unsigned int		a_flags;	/* initial RPC task flags */
-	struct nlm_host *	a_host;		/* host handle */
-	struct nlm_args		a_args;		/* arguments */
-	struct nlm_res		a_res;		/* result */
-	struct nlm_block *	a_block;
-	unsigned int		a_retries;	/* Retry count */
-	u8			a_owner[NLMCLNT_OHSIZE];
-	void *	a_callback_data; /* sent to nlmclnt_operations callbacks */
-};
-
-struct nlm_share;
-
-/*
- * This struct describes a file held open by lockd on behalf of
- * an NFS client.
- */
-struct nlm_file {
-	struct hlist_node	f_list;		/* linked list */
-	struct nfs_fh		f_handle;	/* NFS file handle */
-	struct file *		f_file[2];	/* VFS file pointers,
-						   indexed by O_ flags */
-	struct nlm_share *	f_shares;	/* DOS shares */
-	struct list_head	f_blocks;	/* blocked locks */
-	unsigned int		f_locks;	/* guesstimate # of locks */
-	unsigned int		f_count;	/* reference count */
-	struct mutex		f_mutex;	/* avoid concurrent access */
-};
-
-/*
- * This is a server block (i.e. a lock requested by some client which
- * couldn't be granted because of a conflicting lock).
- */
-#define NLM_NEVER		(~(unsigned long) 0)
-/* timeout on non-blocking call: */
-#define NLM_TIMEOUT		(7 * HZ)
-
-struct nlm_block {
-	struct kref		b_count;	/* Reference count */
-	struct list_head	b_list;		/* linked list of all blocks */
-	struct list_head	b_flist;	/* linked list (per file) */
-	struct nlm_rqst	*	b_call;		/* RPC args & callback info */
-	struct svc_serv *	b_daemon;	/* NLM service */
-	struct nlm_host *	b_host;		/* host handle for RPC clnt */
-	unsigned long		b_when;		/* next re-xmit */
-	unsigned int		b_id;		/* block id */
-	unsigned char		b_granted;	/* VFS granted lock */
-	struct nlm_file *	b_file;		/* file in question */
-	struct cache_req *	b_cache_req;	/* deferred request handling */
-	struct cache_deferred_req * b_deferred_req;
-	unsigned int		b_flags;	/* block flags */
-#define B_QUEUED		1	/* lock queued */
-#define B_GOT_CALLBACK		2	/* got lock or conflicting lock */
-#define B_TIMED_OUT		4	/* filesystem too slow to respond */
-};
-
-/*
- * Global variables
- */
-extern const struct rpc_program	nlm_program;
-extern const struct svc_procedure nlmsvc_procedures[24];
-#ifdef CONFIG_LOCKD_V4
-extern const struct svc_procedure nlmsvc_procedures4[24];
-#endif
-extern int			nlmsvc_grace_period;
-extern unsigned long		nlm_timeout;
-extern bool			nsm_use_hostnames;
-extern u32			nsm_local_state;
-
-extern struct timer_list	nlmsvc_retry;
-
-/*
- * Lockd client functions
- */
-struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
-int		  nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
-int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
-void		  nlmclnt_release_call(struct nlm_rqst *);
-void		  nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
-					struct file_lock *fl);
-void		  nlmclnt_queue_block(struct nlm_wait *block);
-__be32		  nlmclnt_dequeue_block(struct nlm_wait *block);
-int		  nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
-__be32		  nlmclnt_grant(const struct sockaddr *addr,
-				const struct nlm_lock *lock);
-void		  nlmclnt_recovery(struct nlm_host *);
-int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
-				  struct nlm_rqst *);
-void		  nlmclnt_next_cookie(struct nlm_cookie *);
-
-#ifdef CONFIG_LOCKD_V4
-extern const struct rpc_version nlm_version4;
-#endif
-
-/*
- * Host cache
- */
-struct nlm_host  *nlmclnt_lookup_host(const struct sockaddr *sap,
-					const size_t salen,
-					const unsigned short protocol,
-					const u32 version,
-					const char *hostname,
-					int noresvport,
-					struct net *net,
-					const struct cred *cred);
-void		  nlmclnt_release_host(struct nlm_host *);
-struct nlm_host  *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
-					const char *hostname,
-					const size_t hostname_len);
-void		  nlmsvc_release_host(struct nlm_host *);
-struct rpc_clnt * nlm_bind_host(struct nlm_host *);
-void		  nlm_rebind_host(struct nlm_host *);
-struct nlm_host * nlm_get_host(struct nlm_host *);
-void		  nlm_shutdown_hosts(void);
-void		  nlm_shutdown_hosts_net(struct net *net);
-void		  nlm_host_rebooted(const struct net *net,
-					const struct nlm_reboot *);
-
-/*
- * Host monitoring
- */
-int		  nsm_monitor(const struct nlm_host *host);
-void		  nsm_unmonitor(const struct nlm_host *host);
-
-struct nsm_handle *nsm_get_handle(const struct net *net,
-					const struct sockaddr *sap,
-					const size_t salen,
-					const char *hostname,
-					const size_t hostname_len);
-struct nsm_handle *nsm_reboot_lookup(const struct net *net,
-					const struct nlm_reboot *info);
-void		  nsm_release(struct nsm_handle *nsm);
-
-/*
- * This is used in garbage collection and resource reclaim
- * A return value != 0 means destroy the lock/block/share
- */
-typedef int	  (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
-
-/*
- * Server-side lock handling
- */
-int		  lock_to_openmode(struct file_lock *);
-__be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
-			      struct nlm_host *, struct nlm_lock *, int,
-			      struct nlm_cookie *, int);
-__be32		  nlmsvc_unlock(struct net *net, struct nlm_file *, struct nlm_lock *);
-__be32		  nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
-			struct nlm_host *host, struct nlm_lock *lock,
-			struct nlm_lock *conflock);
-__be32		  nlmsvc_cancel_blocked(struct net *net, struct nlm_file *, struct nlm_lock *);
-void		  nlmsvc_retry_blocked(struct svc_rqst *rqstp);
-void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
-					nlm_host_match_fn_t match);
-void		  nlmsvc_grant_reply(struct nlm_cookie *, __be32);
-void		  nlmsvc_release_call(struct nlm_rqst *);
-void		  nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
-
-/*
- * File handling for the server personality
- */
-__be32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
-					struct nlm_lock *);
-void		  nlm_release_file(struct nlm_file *);
-void		  nlmsvc_put_lockowner(struct nlm_lockowner *);
-void		  nlmsvc_release_lockowner(struct nlm_lock *);
-void		  nlmsvc_mark_resources(struct net *);
-void		  nlmsvc_free_host_resources(struct nlm_host *);
-void		  nlmsvc_invalidate_all(void);
-
-static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
-{
-	return file->f_file[O_RDONLY] ?
-	       file->f_file[O_RDONLY] : file->f_file[O_WRONLY];
-}
-
-static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
-{
-	return file_inode(nlmsvc_file_file(file));
-}
-
-static inline bool
-nlmsvc_file_cannot_lock(const struct nlm_file *file)
-{
-	return exportfs_cannot_lock(nlmsvc_file_file(file)->f_path.dentry->d_sb->s_export_op);
-}
-
-static inline int __nlm_privileged_request4(const struct sockaddr *sap)
-{
-	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
-
-	if (ntohs(sin->sin_port) > 1023)
-		return 0;
-
-	return ipv4_is_loopback(sin->sin_addr.s_addr);
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-{
-	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
-
-	if (ntohs(sin6->sin6_port) > 1023)
-		return 0;
-
-	if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_MAPPED)
-		return ipv4_is_loopback(sin6->sin6_addr.s6_addr32[3]);
-
-	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
-}
-#else	/* IS_ENABLED(CONFIG_IPV6) */
-static inline int __nlm_privileged_request6(const struct sockaddr *sap)
-{
-	return 0;
-}
-#endif	/* IS_ENABLED(CONFIG_IPV6) */
-
-/*
- * Ensure incoming requests are from local privileged callers.
- *
- * Return TRUE if sender is local and is connecting via a privileged port;
- * otherwise return FALSE.
- */
-static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
-{
-	const struct sockaddr *sap = svc_addr(rqstp);
-
-	switch (sap->sa_family) {
-	case AF_INET:
-		return __nlm_privileged_request4(sap);
-	case AF_INET6:
-		return __nlm_privileged_request6(sap);
-	default:
-		return 0;
-	}
-}
-
-/*
- * Compare two NLM locks.
- * When the second lock is of type F_UNLCK, this acts like a wildcard.
- */
-static inline int nlm_compare_locks(const struct file_lock *fl1,
-				    const struct file_lock *fl2)
-{
-	return file_inode(fl1->c.flc_file) == file_inode(fl2->c.flc_file)
-	     && fl1->c.flc_pid   == fl2->c.flc_pid
-	     && fl1->c.flc_owner == fl2->c.flc_owner
-	     && fl1->fl_start == fl2->fl_start
-	     && fl1->fl_end   == fl2->fl_end
-	     &&(fl1->c.flc_type  == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
-}
-
-extern const struct lock_manager_operations nlmsvc_lock_operations;
-
-#endif /* LINUX_LOCKD_LOCKD_H */
-- 
cgit v1.2.3


From 236f3171ac690f632e13d391f47c68c3a8519bd2 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:31 -0500
Subject: lockd: Remove lockd/debug.h

The lockd include structure has unnecessary indirection. The header
include/linux/lockd/debug.h is consumed only by fs/lockd/lockd.h,
creating an extra compilation dependency and making the code harder
to navigate.

Fold the debug.h definitions directly into lockd.h and remove the
now-redundant header. This reduces the include tree depth and makes
the debug-related definitions easier to find when working on lockd
internals.

Build-tested with lockd built as module and built-in.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/lockd.h            | 24 +++++++++++++++++++++++-
 include/linux/lockd/debug.h | 40 ----------------------------------------
 2 files changed, 23 insertions(+), 41 deletions(-)
 delete mode 100644 include/linux/lockd/debug.h

(limited to 'include')

diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
index 9bcf89765a69..460ccb701749 100644
--- a/fs/lockd/lockd.h
+++ b/fs/lockd/lockd.h
@@ -16,9 +16,31 @@
 #include <linux/utsname.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
-#include <linux/lockd/debug.h>
+#include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/svc.h>
 
+/*
+ * Enable lockd debugging.
+ * Requires CONFIG_SUNRPC_DEBUG.
+ */
+#undef ifdebug
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define ifdebug(flag)		if (unlikely(nlm_debug & NLMDBG_##flag))
+#else
+# define ifdebug(flag)		if (0)
+#endif
+
+#define NLMDBG_SVC		0x0001
+#define NLMDBG_CLIENT		0x0002
+#define NLMDBG_CLNTLOCK		0x0004
+#define NLMDBG_SVCLOCK		0x0008
+#define NLMDBG_MONITOR		0x0010
+#define NLMDBG_CLNTSUBS		0x0020
+#define NLMDBG_SVCSUBS		0x0040
+#define NLMDBG_HOSTCACHE	0x0080
+#define NLMDBG_XDR		0x0100
+#define NLMDBG_ALL		0x7fff
+
 /*
  * Version string
  */
diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h
deleted file mode 100644
index eede2ab5246f..000000000000
--- a/include/linux/lockd/debug.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/debug.h
- *
- * Debugging stuff.
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_DEBUG_H
-#define LINUX_LOCKD_DEBUG_H
-
-#include <linux/sunrpc/debug.h>
-
-/*
- * Enable lockd debugging.
- * Requires RPC_DEBUG.
- */
-#undef ifdebug
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define ifdebug(flag)		if (unlikely(nlm_debug & NLMDBG_##flag))
-#else
-# define ifdebug(flag)		if (0)
-#endif
-
-/*
- * Debug flags
- */
-#define NLMDBG_SVC		0x0001
-#define NLMDBG_CLIENT		0x0002
-#define NLMDBG_CLNTLOCK		0x0004
-#define NLMDBG_SVCLOCK		0x0008
-#define NLMDBG_MONITOR		0x0010
-#define NLMDBG_CLNTSUBS		0x0020
-#define NLMDBG_SVCSUBS		0x0040
-#define NLMDBG_HOSTCACHE	0x0080
-#define NLMDBG_XDR		0x0100
-#define NLMDBG_ALL		0x7fff
-
-#endif /* LINUX_LOCKD_DEBUG_H */
-- 
cgit v1.2.3


From 615384a24b1e6b0f091ebc1dfbf7ec8b4c27fa81 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:32 -0500
Subject: lockd: Move xdr.h from include/linux/lockd/ to fs/lockd/

The lockd subsystem unnecessarily exposes internal NLM XDR type
definitions through the global include path. These definitions
are not used by any code outside fs/lockd/, making them
inappropriate for include/linux/lockd/.

Moving xdr.h to fs/lockd/ narrows the API surface and clarifies
that these types are internal implementation details. The
comment in linux/lockd/bind.h stating xdr.h was needed for
"xdr-encoded error codes" is stale: no lockd API consumers use
those codes.

Forward declarations for struct nfs_fh and struct file_lock are
added to bind.h because their definitions were previously pulled
in transitively through xdr.h. Additionally, nfs3proc.c and
proc.c need explicit includes of filelock.h for FL_CLOSE and
for accessing struct file_lock members, respectively.

Built and tested with lockd client/server operations. No
functional change.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/lockd.h           |   2 +-
 fs/lockd/xdr.h             | 111 ++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs3proc.c          |   1 +
 fs/nfs/proc.c              |   1 +
 include/linux/lockd/bind.h |   5 +-
 include/linux/lockd/xdr.h  | 113 ---------------------------------------------
 6 files changed, 116 insertions(+), 117 deletions(-)
 create mode 100644 fs/lockd/xdr.h
 delete mode 100644 include/linux/lockd/xdr.h

(limited to 'include')

diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
index 460ccb701749..6f83b9a7257f 100644
--- a/fs/lockd/lockd.h
+++ b/fs/lockd/lockd.h
@@ -15,7 +15,7 @@
 #include <linux/refcount.h>
 #include <linux/utsname.h>
 #include <linux/lockd/bind.h>
-#include <linux/lockd/xdr.h>
+#include "xdr.h"
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/svc.h>
 
diff --git a/fs/lockd/xdr.h b/fs/lockd/xdr.h
new file mode 100644
index 000000000000..af821ecf2a4e
--- /dev/null
+++ b/fs/lockd/xdr.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * XDR types for the NLM protocol
+ *
+ * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_XDR_H
+#define _LOCKD_XDR_H
+
+#include <linux/fs.h>
+#include <linux/filelock.h>
+#include <linux/nfs.h>
+#include <linux/sunrpc/xdr.h>
+
+#define SM_MAXSTRLEN		1024
+#define SM_PRIV_SIZE		16
+
+struct nsm_private {
+	unsigned char		data[SM_PRIV_SIZE];
+};
+
+struct svc_rqst;
+
+#define NLM_MAXCOOKIELEN    	32
+#define NLM_MAXSTRLEN		1024
+
+#define	nlm_granted		cpu_to_be32(NLM_LCK_GRANTED)
+#define	nlm_lck_denied		cpu_to_be32(NLM_LCK_DENIED)
+#define	nlm_lck_denied_nolocks	cpu_to_be32(NLM_LCK_DENIED_NOLOCKS)
+#define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
+#define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
+
+/* Lock info passed via NLM */
+struct nlm_lock {
+	char *			caller;
+	unsigned int		len; 	/* length of "caller" */
+	struct nfs_fh		fh;
+	struct xdr_netobj	oh;
+	u32			svid;
+	u64			lock_start;
+	u64			lock_len;
+	struct file_lock	fl;
+};
+
+/*
+ *	NLM cookies. Technically they can be 1K, but Linux only uses 8 bytes.
+ *	FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to
+ *	32 bytes.
+ */
+
+struct nlm_cookie
+{
+	unsigned char data[NLM_MAXCOOKIELEN];
+	unsigned int len;
+};
+
+/*
+ * Generic lockd arguments for all but sm_notify
+ */
+struct nlm_args {
+	struct nlm_cookie	cookie;
+	struct nlm_lock		lock;
+	u32			block;
+	u32			reclaim;
+	u32			state;
+	u32			monitor;
+	u32			fsm_access;
+	u32			fsm_mode;
+};
+
+/*
+ * Generic lockd result
+ */
+struct nlm_res {
+	struct nlm_cookie	cookie;
+	__be32			status;
+	struct nlm_lock		lock;
+};
+
+/*
+ * statd callback when client has rebooted
+ */
+struct nlm_reboot {
+	char			*mon;
+	unsigned int		len;
+	u32			state;
+	struct nsm_private	priv;
+};
+
+/*
+ * Contents of statd callback when monitored host rebooted
+ */
+#define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
+
+bool	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+#endif /* _LOCKD_XDR_H */
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index be2aebf62056..95d7cd564b74 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -16,6 +16,7 @@
 #include <linux/nfs3.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/filelock.h>
 #include <linux/lockd/bind.h>
 #include <linux/nfs_mount.h>
 #include <linux/freezer.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 8c3d2efa2636..70795684b8e8 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
 #include <linux/nfs2.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
+#include <linux/filelock.h>
 #include <linux/lockd/bind.h>
 #include <linux/freezer.h>
 #include "internal.h"
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 077da0696f12..ba9258c96bfd 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -11,10 +11,9 @@
 #define LINUX_LOCKD_BIND_H
 
 #include <linux/lockd/nlm.h>
-/* need xdr-encoded error codes too, so... */
-#include <linux/lockd/xdr.h>
 
-/* Dummy declarations */
+struct file_lock;
+struct nfs_fh;
 struct svc_rqst;
 struct rpc_task;
 struct rpc_clnt;
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
deleted file mode 100644
index 292e4e38d17d..000000000000
--- a/include/linux/lockd/xdr.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/xdr.h
- *
- * XDR types for the NLM protocol
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LOCKD_XDR_H
-#define LOCKD_XDR_H
-
-#include <linux/fs.h>
-#include <linux/filelock.h>
-#include <linux/nfs.h>
-#include <linux/sunrpc/xdr.h>
-
-#define SM_MAXSTRLEN		1024
-#define SM_PRIV_SIZE		16
-
-struct nsm_private {
-	unsigned char		data[SM_PRIV_SIZE];
-};
-
-struct svc_rqst;
-
-#define NLM_MAXCOOKIELEN    	32
-#define NLM_MAXSTRLEN		1024
-
-#define	nlm_granted		cpu_to_be32(NLM_LCK_GRANTED)
-#define	nlm_lck_denied		cpu_to_be32(NLM_LCK_DENIED)
-#define	nlm_lck_denied_nolocks	cpu_to_be32(NLM_LCK_DENIED_NOLOCKS)
-#define	nlm_lck_blocked		cpu_to_be32(NLM_LCK_BLOCKED)
-#define	nlm_lck_denied_grace_period	cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
-
-/* Lock info passed via NLM */
-struct nlm_lock {
-	char *			caller;
-	unsigned int		len; 	/* length of "caller" */
-	struct nfs_fh		fh;
-	struct xdr_netobj	oh;
-	u32			svid;
-	u64			lock_start;
-	u64			lock_len;
-	struct file_lock	fl;
-};
-
-/*
- *	NLM cookies. Technically they can be 1K, but Linux only uses 8 bytes.
- *	FreeBSD uses 16, Apple Mac OS X 10.3 uses 20. Therefore we set it to
- *	32 bytes.
- */
-
-struct nlm_cookie
-{
-	unsigned char data[NLM_MAXCOOKIELEN];
-	unsigned int len;
-};
-
-/*
- * Generic lockd arguments for all but sm_notify
- */
-struct nlm_args {
-	struct nlm_cookie	cookie;
-	struct nlm_lock		lock;
-	u32			block;
-	u32			reclaim;
-	u32			state;
-	u32			monitor;
-	u32			fsm_access;
-	u32			fsm_mode;
-};
-
-/*
- * Generic lockd result
- */
-struct nlm_res {
-	struct nlm_cookie	cookie;
-	__be32			status;
-	struct nlm_lock		lock;
-};
-
-/*
- * statd callback when client has rebooted
- */
-struct nlm_reboot {
-	char			*mon;
-	unsigned int		len;
-	u32			state;
-	struct nsm_private	priv;
-};
-
-/*
- * Contents of statd callback when monitored host rebooted
- */
-#define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
-
-bool	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-#endif /* LOCKD_XDR_H */
-- 
cgit v1.2.3


From 5829352e568d24dd04ae112128a4f44748d073bc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 Jan 2026 10:19:33 -0500
Subject: lockd: Make linux/lockd/nlm.h an internal header

The NLM protocol constants and status codes in nlm.h are needed
only by lockd's internal implementation. NFS client code and
NFSD interact with lockd through the stable API in bind.h and
have no direct use for protocol-level definitions.

Exposing these definitions globally via bind.h creates unnecessary
coupling between lockd internals and its consumers. Moving nlm.h
from include/linux/lockd/ to fs/lockd/ clarifies the API boundary:
bind.h provides the lockd service interface, while nlm.h remains
available only to code within fs/lockd/ that implements the
protocol.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/lockd.h           |  1 +
 fs/lockd/nlm.h             | 56 ++++++++++++++++++++++++++++++++++++++++++++
 fs/lockd/svclock.c         |  1 -
 include/linux/lockd/bind.h |  2 --
 include/linux/lockd/nlm.h  | 58 ----------------------------------------------
 5 files changed, 57 insertions(+), 61 deletions(-)
 create mode 100644 fs/lockd/nlm.h
 delete mode 100644 include/linux/lockd/nlm.h

(limited to 'include')

diff --git a/fs/lockd/lockd.h b/fs/lockd/lockd.h
index 6f83b9a7257f..e73c6b348154 100644
--- a/fs/lockd/lockd.h
+++ b/fs/lockd/lockd.h
@@ -14,6 +14,7 @@
 #include <linux/kref.h>
 #include <linux/refcount.h>
 #include <linux/utsname.h>
+#include "nlm.h"
 #include <linux/lockd/bind.h>
 #include "xdr.h"
 #include <linux/sunrpc/debug.h>
diff --git a/fs/lockd/nlm.h b/fs/lockd/nlm.h
new file mode 100644
index 000000000000..47be65d0111f
--- /dev/null
+++ b/fs/lockd/nlm.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Declarations for the Network Lock Manager protocol.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LOCKD_NLM_H
+#define _LOCKD_NLM_H
+
+
+/* Maximum file offset in file_lock.fl_end */
+# define NLM_OFFSET_MAX		((s32) 0x7fffffff)
+# define NLM4_OFFSET_MAX	((s64) ((~(u64)0) >> 1))
+
+/* Return states for NLM */
+enum {
+	NLM_LCK_GRANTED			= 0,
+	NLM_LCK_DENIED			= 1,
+	NLM_LCK_DENIED_NOLOCKS		= 2,
+	NLM_LCK_BLOCKED			= 3,
+	NLM_LCK_DENIED_GRACE_PERIOD	= 4,
+#ifdef CONFIG_LOCKD_V4
+	NLM_DEADLCK			= 5,
+	NLM_ROFS			= 6,
+	NLM_STALE_FH			= 7,
+	NLM_FBIG			= 8,
+	NLM_FAILED			= 9,
+#endif
+};
+
+#define NLM_PROGRAM		100021
+
+#define NLMPROC_NULL		0
+#define NLMPROC_TEST		1
+#define NLMPROC_LOCK		2
+#define NLMPROC_CANCEL		3
+#define NLMPROC_UNLOCK		4
+#define NLMPROC_GRANTED		5
+#define NLMPROC_TEST_MSG	6
+#define NLMPROC_LOCK_MSG	7
+#define NLMPROC_CANCEL_MSG	8
+#define NLMPROC_UNLOCK_MSG	9
+#define NLMPROC_GRANTED_MSG	10
+#define NLMPROC_TEST_RES	11
+#define NLMPROC_LOCK_RES	12
+#define NLMPROC_CANCEL_RES	13
+#define NLMPROC_UNLOCK_RES	14
+#define NLMPROC_GRANTED_RES	15
+#define NLMPROC_NSM_NOTIFY	16		/* statd callback */
+#define NLMPROC_SHARE		20
+#define NLMPROC_UNSHARE		21
+#define NLMPROC_NM_LOCK		22
+#define NLMPROC_FREE_ALL	23
+
+#endif /* _LOCKD_NLM_H */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 1c800fffe69c..e687103e42d1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -28,7 +28,6 @@
 #include <linux/sched.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc_xprt.h>
-#include <linux/lockd/nlm.h>
 
 #include "lockd.h"
 
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index ba9258c96bfd..b614e0deea72 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -10,8 +10,6 @@
 #ifndef LINUX_LOCKD_BIND_H
 #define LINUX_LOCKD_BIND_H
 
-#include <linux/lockd/nlm.h>
-
 struct file_lock;
 struct nfs_fh;
 struct svc_rqst;
diff --git a/include/linux/lockd/nlm.h b/include/linux/lockd/nlm.h
deleted file mode 100644
index 6e343ef760dc..000000000000
--- a/include/linux/lockd/nlm.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/nlm.h
- *
- * Declarations for the Network Lock Manager protocol.
- *
- * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_NLM_H
-#define LINUX_LOCKD_NLM_H
-
-
-/* Maximum file offset in file_lock.fl_end */
-# define NLM_OFFSET_MAX		((s32) 0x7fffffff)
-# define NLM4_OFFSET_MAX	((s64) ((~(u64)0) >> 1))
-
-/* Return states for NLM */
-enum {
-	NLM_LCK_GRANTED			= 0,
-	NLM_LCK_DENIED			= 1,
-	NLM_LCK_DENIED_NOLOCKS		= 2,
-	NLM_LCK_BLOCKED			= 3,
-	NLM_LCK_DENIED_GRACE_PERIOD	= 4,
-#ifdef CONFIG_LOCKD_V4
-	NLM_DEADLCK			= 5,
-	NLM_ROFS			= 6,
-	NLM_STALE_FH			= 7,
-	NLM_FBIG			= 8,
-	NLM_FAILED			= 9,
-#endif
-};
-
-#define NLM_PROGRAM		100021
-
-#define NLMPROC_NULL		0
-#define NLMPROC_TEST		1
-#define NLMPROC_LOCK		2
-#define NLMPROC_CANCEL		3
-#define NLMPROC_UNLOCK		4
-#define NLMPROC_GRANTED		5
-#define NLMPROC_TEST_MSG	6
-#define NLMPROC_LOCK_MSG	7
-#define NLMPROC_CANCEL_MSG	8
-#define NLMPROC_UNLOCK_MSG	9
-#define NLMPROC_GRANTED_MSG	10
-#define NLMPROC_TEST_RES	11
-#define NLMPROC_LOCK_RES	12
-#define NLMPROC_CANCEL_RES	13
-#define NLMPROC_UNLOCK_RES	14
-#define NLMPROC_GRANTED_RES	15
-#define NLMPROC_NSM_NOTIFY	16		/* statd callback */
-#define NLMPROC_SHARE		20
-#define NLMPROC_UNSHARE		21
-#define NLMPROC_NM_LOCK		22
-#define NLMPROC_FREE_ALL	23
-
-#endif /* LINUX_LOCKD_NLM_H */
-- 
cgit v1.2.3


From adcc59114ccd402259c089b0fea24da5e4974563 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Feb 2026 21:21:50 +0100
Subject: sunrpc: Kill RPC_IFDEBUG()

RPC_IFDEBUG() is used in only two places. In one the user of
the definition is guarded by ifdeffery, in the second one
it's implied due to dprintk() usage. Kill the macro and move
the ifdeffery to the regular condition with the variable defined
inside, while in the second case add the same conditional and
move the respective code there.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfsfh.c                          |  9 ++++++---
 include/linux/sunrpc/debug.h             |  2 --
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 27 ++++++++++++++-------------
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ed85dd43da18..68b629fbaaeb 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -105,9 +105,12 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
 {
 	/* Check if the request originated from a secure port. */
 	if (rqstp && !nfsd_originating_port_ok(rqstp, cred, exp)) {
-		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
-		dprintk("nfsd: request from insecure port %s!\n",
-		        svc_print_addr(rqstp, buf, sizeof(buf)));
+		if (IS_ENABLED(CONFIG_SUNRPC_DEBUG)) {
+			char buf[RPC_MAX_ADDRBUFLEN];
+
+			dprintk("nfsd: request from insecure port %s!\n",
+			        svc_print_addr(rqstp, buf, sizeof(buf)));
+		}
 		return nfserr_perm;
 	}
 
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index eb4bd62df319..93d1a11ffbfb 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -49,12 +49,10 @@ do {									\
 	}								\
 } while (0)
 
-# define RPC_IFDEBUG(x)		x
 #else
 # define ifdebug(fac)		if (0)
 # define dfprintk(fac, fmt, ...)	do {} while (0)
 # define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
-# define RPC_IFDEBUG(x)
 #endif
 
 /*
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9b623849723e..f2d72181a6fe 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -414,7 +414,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	struct ib_qp_init_attr qp_attr;
 	struct ib_device *dev;
 	int ret = 0;
-	RPC_IFDEBUG(struct sockaddr *sap);
 
 	listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
 	clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -560,18 +559,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 		goto errout;
 	}
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-	dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
-	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
-	dprintk("    local address   : %pIS:%u\n", sap, rpc_get_port(sap));
-	sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
-	dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
-	dprintk("    max_sge         : %d\n", newxprt->sc_max_send_sges);
-	dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
-	dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
-	dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
-	dprintk("    ord             : %d\n", conn_param.initiator_depth);
-#endif
+	if (IS_ENABLED(CONFIG_SUNRPC_DEBUG)) {
+		struct sockaddr *sap;
+
+		dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
+		sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
+		dprintk("    local address   : %pIS:%u\n", sap, rpc_get_port(sap));
+		sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
+		dprintk("    remote address  : %pIS:%u\n", sap, rpc_get_port(sap));
+		dprintk("    max_sge         : %d\n", newxprt->sc_max_send_sges);
+		dprintk("    sq_depth        : %d\n", newxprt->sc_sq_depth);
+		dprintk("    rdma_rw_ctxs    : %d\n", ctxts);
+		dprintk("    max_requests    : %d\n", newxprt->sc_max_requests);
+		dprintk("    ord             : %d\n", conn_param.initiator_depth);
+	}
 
 	return &newxprt->sc_xprt;
 
-- 
cgit v1.2.3


From 6f57293abb8d087de830dd3f02e66d94b3e59973 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Feb 2026 21:21:51 +0100
Subject: sunrpc: Fix compilation error (`make W=1`) when dprintk() is no-op

Clang compiler is not happy about set but unused variables:

.../flexfilelayout/flexfilelayoutdev.c:56:9: error: variable 'ret' set but not used [-Werror,-Wunused-but-set-variable]
.../flexfilelayout/flexfilelayout.c:1505:6: error: variable 'err' set but not used [-Werror,-Wunused-but-set-variable]
.../nfs4proc.c:9244:12: error: variable 'ptr' set but not used [-Werror,-Wunused-but-set-variable]

Fix these by forwarding parameters of dprintk() to no_printk().
The positive side-effect is a format-string checker enabled even for the cases
when dprintk() is no-op.

Fixes: d67ae825a59d ("pnfs/flexfiles: Add the FlexFile Layout Driver")
Fixes: fc931582c260 ("nfs41: create_session operation")
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/svclock.c           | 5 +++++
 include/linux/sunrpc/debug.h | 8 ++++++--
 include/linux/sunrpc/sched.h | 3 ---
 3 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e687103e42d1..ee23f5802af1 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -74,6 +74,11 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
 
 	return buf;
 }
+#else
+static inline const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+	return "???";
+}
 #endif
 
 /*
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 93d1a11ffbfb..ab61bed2f7af 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -38,6 +38,8 @@ extern unsigned int		nlm_debug;
 do {									\
 	ifdebug(fac)							\
 		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
+	else								\
+		no_printk(fmt, ##__VA_ARGS__);				\
 } while (0)
 
 # define dfprintk_rcu(fac, fmt, ...)					\
@@ -46,13 +48,15 @@ do {									\
 		rcu_read_lock();					\
 		__sunrpc_printk(fmt, ##__VA_ARGS__);			\
 		rcu_read_unlock();					\
+	} else {							\
+		no_printk(fmt, ##__VA_ARGS__);				\
 	}								\
 } while (0)
 
 #else
 # define ifdebug(fac)		if (0)
-# define dfprintk(fac, fmt, ...)	do {} while (0)
-# define dfprintk_rcu(fac, fmt, ...)	do {} while (0)
+# define dfprintk(fac, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
+# define dfprintk_rcu(fac, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /*
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ccba79ebf893..0dbdf3722537 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -95,10 +95,7 @@ struct rpc_task {
 	int			tk_rpc_status;	/* Result of last RPC operation */
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 	unsigned short		tk_pid;		/* debugging aid */
-#endif
 	unsigned char		tk_priority : 2,/* Task priority */
 				tk_garb_retry : 2,
 				tk_cred_retry : 2;
-- 
cgit v1.2.3


From f52792f484ba2316853736856dde19b7e7458861 Mon Sep 17 00:00:00 2001
From: Dai Ngo <dai.ngo@oracle.com>
Date: Fri, 13 Feb 2026 10:36:30 -0800
Subject: NFSD: Enforce timeout on layout recall and integrate lease manager
 fencing

When a layout conflict triggers a recall, enforcing a timeout is
necessary to prevent excessive nfsd threads from being blocked in
__break_lease ensuring the server continues servicing incoming
requests efficiently.

This patch introduces a new function to lease_manager_operations:

lm_breaker_timedout: Invoked when a lease recall times out and is
about to be disposed of. This function enables the lease manager
to inform the caller whether the file_lease should remain on the
flc_list or be disposed of.

For the NFSD lease manager, this function now handles layout recall
timeouts. If the layout type supports fencing and the client has not
been fenced, a fence operation is triggered to prevent the client
from accessing the block device.

While the fencing operation is in progress, the conflicting file_lease
remains on the flc_list until fencing is complete. This guarantees
that no other clients can access the file, and the client with
exclusive access is properly blocked before disposal.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 .../admin-guide/nfs/pnfs-block-server.rst          |  30 ++++
 Documentation/admin-guide/nfs/pnfs-scsi-server.rst |  31 +++++
 Documentation/filesystems/locking.rst              |   2 +
 fs/locks.c                                         |  26 +++-
 fs/nfsd/blocklayout.c                              |  42 +++++-
 fs/nfsd/nfs4layouts.c                              | 152 ++++++++++++++++++++-
 fs/nfsd/nfs4state.c                                |   1 +
 fs/nfsd/pnfs.h                                     |   5 +-
 fs/nfsd/state.h                                    |   6 +
 include/linux/filelock.h                           |   1 +
 10 files changed, 279 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/Documentation/admin-guide/nfs/pnfs-block-server.rst b/Documentation/admin-guide/nfs/pnfs-block-server.rst
index 20fe9f5117fe..b4f5997009af 100644
--- a/Documentation/admin-guide/nfs/pnfs-block-server.rst
+++ b/Documentation/admin-guide/nfs/pnfs-block-server.rst
@@ -40,3 +40,33 @@ how to translate the device into a serial number from SCSI EVPD 0x80::
 
 	echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
 	EOF
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+    FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+    Where:
+
+    IP_address: refers to the IP address of the affected client.
+    #n: indicates the unique client identifier.
+    dev_name: specifies the name of the block device related
+              to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+    . shutdown or power off the client being fenced.
+    . manually expire the client to release all its state on the server:
+
+      echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+
+      Where:
+
+      clid: is the unique client identifier displayed in the system log.
diff --git a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
index b2eec2288329..db34afbf67a9 100644
--- a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
+++ b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
@@ -22,3 +22,34 @@ option and the underlying SCSI device support persistent reservations.
 On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
 enabled, and the file system is mounted using the NFSv4.1 protocol
 version (mount -o vers=4.1).
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+    FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+    Where:
+
+    IP_address: refers to the IP address of the affected client.
+    #n: indicates the unique client identifier.
+    dev_name: specifies the name of the block device related
+              to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+    . shutdown or power off the client being fenced.
+    . manually expire the client to release all its state on the server:
+
+      echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+
+      Where:
+
+      clid: is the unique client identifier displayed in the system log.
+
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 8025df6e6499..8421ea21bd35 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -398,6 +398,7 @@ prototypes::
 	bool (*lm_breaker_owns_lease)(struct file_lock *);
         bool (*lm_lock_expirable)(struct file_lock *);
         void (*lm_expire_lock)(void);
+        bool (*lm_breaker_timedout)(struct file_lease *);
 
 locking rules:
 
@@ -412,6 +413,7 @@ lm_breaker_owns_lease:	yes     	no			no
 lm_lock_expirable	yes		no			no
 lm_expire_lock		no		no			yes
 lm_open_conflict	yes		no			no
+lm_breaker_timedout     yes             no                      no
 ======================	=============	=================	=========
 
 buffer_head
diff --git a/fs/locks.c b/fs/locks.c
index d13ec930b7bb..8e44b1f6c15a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1534,6 +1534,7 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
 {
 	struct file_lock_context *ctx = inode->i_flctx;
 	struct file_lease *fl, *tmp;
+	bool remove;
 
 	lockdep_assert_held(&ctx->flc_lock);
 
@@ -1541,8 +1542,19 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
 		trace_time_out_leases(inode, fl);
 		if (past_time(fl->fl_downgrade_time))
 			lease_modify(fl, F_RDLCK, dispose);
-		if (past_time(fl->fl_break_time))
-			lease_modify(fl, F_UNLCK, dispose);
+
+		remove = true;
+		if (past_time(fl->fl_break_time)) {
+			/*
+			 * Consult the lease manager when a lease break times
+			 * out to determine whether the lease should be disposed
+			 * of.
+			 */
+			if (fl->fl_lmops && fl->fl_lmops->lm_breaker_timedout)
+				remove = fl->fl_lmops->lm_breaker_timedout(fl);
+			if (remove)
+				lease_modify(fl, F_UNLCK, dispose);
+		}
 	}
 }
 
@@ -1670,9 +1682,13 @@ int __break_lease(struct inode *inode, unsigned int flags)
 restart:
 	fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
 	break_time = fl->fl_break_time;
-	if (break_time != 0)
-		break_time -= jiffies;
-	if (break_time == 0)
+	if (break_time != 0) {
+		if (time_after(jiffies, break_time)) {
+			fl->fl_break_time = jiffies + lease_break_time * HZ;
+			break_time = lease_break_time * HZ;
+		} else
+			break_time -= jiffies;
+	} else
 		break_time++;
 	locks_insert_block(&fl->c, &new_fl->c, leases_conflict);
 	trace_break_lease_block(inode, new_fl);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 8b987fca1e60..9d829c84f374 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -297,6 +297,7 @@ static inline int nfsd4_scsi_fence_insert(struct nfs4_client *clp,
 		ret = 0;
 	}
 	xa_unlock(xa);
+	clp->cl_fence_retry_warn = false;
 	return ret;
 }
 
@@ -443,15 +444,33 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
 	return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
 }
 
-static void
+/*
+ * Perform the fence operation to prevent the client from accessing the
+ * block device. If a fence operation is already in progress, wait for
+ * it to complete before checking the NFSD_MDS_PR_FENCED flag. Once the
+ * operation is complete, check the flag. If NFSD_MDS_PR_FENCED is set,
+ * update the layout stateid by setting the ls_fenced flag to indicate
+ * that the client has been fenced.
+ *
+ * The cl_fence_mutex ensures that the fence operation has been fully
+ * completed, rather than just in progress, when returning from this
+ * function.
+ *
+ * Return true if client was fenced otherwise return false.
+ */
+static bool
 nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 {
 	struct nfs4_client *clp = ls->ls_stid.sc_client;
 	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
 	int status;
+	bool ret;
 
-	if (nfsd4_scsi_fence_set(clp, bdev->bd_dev))
-		return;
+	mutex_lock(&clp->cl_fence_mutex);
+	if (nfsd4_scsi_fence_set(clp, bdev->bd_dev)) {
+		mutex_unlock(&clp->cl_fence_mutex);
+		return true;
+	}
 
 	status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
 			nfsd4_scsi_pr_key(clp),
@@ -470,13 +489,22 @@ nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
 	 * PR_STS_RESERVATION_CONFLICT, which would cause an infinite
 	 * retry loop.
 	 */
-	if (status < 0 ||
-	    status == PR_STS_PATH_FAILED ||
-	    status == PR_STS_PATH_FAST_FAILED ||
-	    status == PR_STS_RETRY_PATH_FAILURE)
+	switch (status) {
+	case 0:
+	case PR_STS_IOERR:
+	case PR_STS_RESERVATION_CONFLICT:
+		ret = true;
+		break;
+	default:
+		/* retry-able and other errors */
+		ret = false;
 		nfsd4_scsi_fence_clear(clp, bdev->bd_dev);
+		break;
+	}
+	mutex_unlock(&clp->cl_fence_mutex);
 
 	trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
+	return ret;
 }
 
 const struct nfsd4_layout_ops scsi_layout_ops = {
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ad7af8cfcf1f..69e41105efdd 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -27,6 +27,8 @@ static struct kmem_cache *nfs4_layout_stateid_cache;
 static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
 static const struct lease_manager_operations nfsd4_layouts_lm_ops;
 
+static void nfsd4_layout_fence_worker(struct work_struct *work);
+
 const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] =  {
 #ifdef CONFIG_NFSD_FLEXFILELAYOUT
 	[LAYOUT_FLEX_FILES]	= &ff_layout_ops,
@@ -177,6 +179,13 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
 
 	trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid);
 
+	spin_lock(&ls->ls_lock);
+	if (delayed_work_pending(&ls->ls_fence_work)) {
+		spin_unlock(&ls->ls_lock);
+		cancel_delayed_work_sync(&ls->ls_fence_work);
+	} else
+		spin_unlock(&ls->ls_lock);
+
 	spin_lock(&clp->cl_lock);
 	list_del_init(&ls->ls_perclnt);
 	spin_unlock(&clp->cl_lock);
@@ -271,6 +280,10 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
 	list_add(&ls->ls_perfile, &fp->fi_lo_states);
 	spin_unlock(&fp->fi_lock);
 
+	ls->ls_fenced = false;
+	ls->ls_fence_delay = 0;
+	INIT_DELAYED_WORK(&ls->ls_fence_work, nfsd4_layout_fence_worker);
+
 	trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
 	return ls;
 }
@@ -747,11 +760,9 @@ static bool
 nfsd4_layout_lm_break(struct file_lease *fl)
 {
 	/*
-	 * We don't want the locks code to timeout the lease for us;
-	 * we'll remove it ourself if a layout isn't returned
-	 * in time:
+	 * Enforce break lease timeout to prevent NFSD
+	 * thread from hanging in __break_lease.
 	 */
-	fl->fl_break_time = 0;
 	nfsd4_recall_file_layout(fl->c.flc_owner);
 	return false;
 }
@@ -782,10 +793,143 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
 	return 0;
 }
 
+static void
+nfsd4_layout_fence_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct nfs4_layout_stateid *ls = container_of(dwork,
+			struct nfs4_layout_stateid, ls_fence_work);
+	struct nfsd_file *nf;
+	struct block_device *bdev;
+	struct nfs4_client *clp;
+	struct nfsd_net *nn;
+
+	/*
+	 * The workqueue clears WORK_STRUCT_PENDING before invoking
+	 * this callback. Re-arm immediately so that
+	 * delayed_work_pending() returns true while the fence
+	 * operation is in progress, preventing
+	 * lm_breaker_timedout() from taking a duplicate reference.
+	 */
+	mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+
+	spin_lock(&ls->ls_lock);
+	if (list_empty(&ls->ls_layouts)) {
+		spin_unlock(&ls->ls_lock);
+dispose:
+		cancel_delayed_work(&ls->ls_fence_work);
+		/* unlock the lease so that tasks waiting on it can proceed */
+		nfsd4_close_layout(ls);
+
+		ls->ls_fenced = true;
+		nfs4_put_stid(&ls->ls_stid);
+		return;
+	}
+	spin_unlock(&ls->ls_lock);
+
+	rcu_read_lock();
+	nf = nfsd_file_get(ls->ls_file);
+	rcu_read_unlock();
+	if (!nf)
+		goto dispose;
+
+	clp = ls->ls_stid.sc_client;
+	nn = net_generic(clp->net, nfsd_net_id);
+	bdev = nf->nf_file->f_path.mnt->mnt_sb->s_bdev;
+	if (nfsd4_layout_ops[ls->ls_layout_type]->fence_client(ls, nf)) {
+		/* fenced ok */
+		nfsd_file_put(nf);
+		pr_warn("%s: FENCED client[%pISpc] clid[%d] to device[%s]\n",
+			__func__, (struct sockaddr *)&clp->cl_addr,
+			clp->cl_clientid.cl_id - nn->clientid_base,
+			bdev->bd_disk->disk_name);
+		goto dispose;
+	}
+	/* fence failed */
+	nfsd_file_put(nf);
+
+	if (!clp->cl_fence_retry_warn) {
+		pr_warn("%s: FENCE failed client[%pISpc] clid[%d] device[%s]\n",
+			__func__, (struct sockaddr *)&clp->cl_addr,
+			clp->cl_clientid.cl_id - nn->clientid_base,
+			bdev->bd_disk->disk_name);
+		clp->cl_fence_retry_warn = true;
+	}
+	/*
+	 * The fence worker retries the fencing operation indefinitely to
+	 * prevent data corruption. The admin needs to take the following
+	 * actions to restore access to the file for other clients:
+	 *
+	 *  . shutdown or power off the client being fenced.
+	 *  . manually expire the client to release all its state on the server;
+	 *    echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+	 *
+	 *    Where:
+	 *
+	 *    clid: is the unique client identifier displayed in
+	 *          the warning message above.
+	 */
+	if (!ls->ls_fence_delay)
+		ls->ls_fence_delay = HZ;
+	else
+		ls->ls_fence_delay = min(ls->ls_fence_delay << 1,
+					 MAX_FENCE_DELAY);
+	mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, ls->ls_fence_delay);
+}
+
+/**
+ * nfsd4_layout_lm_breaker_timedout - The layout recall has timed out.
+ * @fl: file to check
+ *
+ * If the layout type supports a fence operation, schedule a worker to
+ * fence the client from accessing the block device.
+ *
+ * This function runs under the protection of the spin_lock flc_lock.
+ * At this time, the file_lease associated with the layout stateid is
+ * on the flc_list. A reference count is incremented on the layout
+ * stateid to prevent it from being freed while the fence worker is
+ * executing. Once the fence worker finishes its operation, it releases
+ * this reference.
+ *
+ * The fence worker continues to run until either the client has been
+ * fenced or the layout becomes invalid. The layout can become invalid
+ * as a result of a LAYOUTRETURN or when the CB_LAYOUT recall callback
+ * has completed.
+ *
+ * Return true if the file_lease should be disposed of by the caller;
+ * otherwise, return false.
+ */
+static bool
+nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
+{
+	struct nfs4_layout_stateid *ls = fl->c.flc_owner;
+
+	if ((!nfsd4_layout_ops[ls->ls_layout_type]->fence_client) ||
+			ls->ls_fenced)
+		return true;
+	if (delayed_work_pending(&ls->ls_fence_work))
+		return false;
+	/*
+	 * Make sure layout has not been returned yet before
+	 * taking a reference count on the layout stateid.
+	 */
+	spin_lock(&ls->ls_lock);
+	if (list_empty(&ls->ls_layouts) ||
+			!refcount_inc_not_zero(&ls->ls_stid.sc_count)) {
+		spin_unlock(&ls->ls_lock);
+		return true;
+	}
+	spin_unlock(&ls->ls_lock);
+
+	mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+	return false;
+}
+
 static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
 	.lm_break		= nfsd4_layout_lm_break,
 	.lm_change		= nfsd4_layout_lm_change,
 	.lm_open_conflict	= nfsd4_layout_lm_open_conflict,
+	.lm_breaker_timedout	= nfsd4_layout_lm_breaker_timedout,
 };
 
 int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1b4c101ff04b..1d31f2bb2162 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2386,6 +2386,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
 #endif
 #ifdef CONFIG_NFSD_SCSILAYOUT
 	xa_init(&clp->cl_dev_fences);
+	mutex_init(&clp->cl_fence_mutex);
 #endif
 	INIT_LIST_HEAD(&clp->async_copies);
 	spin_lock_init(&clp->async_lock);
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index db9af780438b..f7bee4dc5d3d 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -11,6 +11,9 @@
 
 struct xdr_stream;
 
+/* Cap exponential backoff between fence retries at 3 minutes */
+#define	MAX_FENCE_DELAY		((unsigned int)(3 * 60 * HZ))
+
 struct nfsd4_deviceid_map {
 	struct list_head	hash;
 	u64			idx;
@@ -38,7 +41,7 @@ struct nfsd4_layout_ops {
 			struct svc_rqst *rqstp,
 			struct nfsd4_layoutcommit *lcp);
 
-	void (*fence_client)(struct nfs4_layout_stateid *ls,
+	bool (*fence_client)(struct nfs4_layout_stateid *ls,
 			     struct nfsd_file *file);
 };
 
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 99aeaab9cf2b..ec1c5467012e 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -456,6 +456,7 @@ struct nfs4_client {
 	struct list_head        cl_lru;         /* tail queue */
 #ifdef CONFIG_NFSD_PNFS
 	struct list_head	cl_lo_states;	/* outstanding layout states */
+	bool			cl_fence_retry_warn;
 #endif
 	struct xdr_netobj	cl_name; 	/* id generated by client */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
@@ -529,6 +530,7 @@ struct nfs4_client {
 	time64_t		cl_ra_time;
 #ifdef CONFIG_NFSD_SCSILAYOUT
 	struct xarray		cl_dev_fences;
+	struct mutex		cl_fence_mutex;
 #endif
 };
 
@@ -745,6 +747,10 @@ struct nfs4_layout_stateid {
 	stateid_t			ls_recall_sid;
 	bool				ls_recalled;
 	struct mutex			ls_mutex;
+
+	struct delayed_work		ls_fence_work;
+	unsigned int			ls_fence_delay;
+	bool				ls_fenced;
 };
 
 static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index d2c9740e26a8..5f0a2fb31450 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -50,6 +50,7 @@ struct lease_manager_operations {
 	void (*lm_setup)(struct file_lease *, void **);
 	bool (*lm_breaker_owns_lease)(struct file_lease *);
 	int (*lm_open_conflict)(struct file *, int);
+	bool (*lm_breaker_timedout)(struct file_lease *fl);
 };
 
 struct lock_manager {
-- 
cgit v1.2.3


From 5bc37b759ec0cdde2c652a2637d704f2d6306617 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 17 Feb 2026 17:06:53 -0500
Subject: Documentation: Add the RPC language description of NLM version 4

In order to generate source code to encode and decode NLMv4 protocol
elements, include a copy of the RPC language description of NLMv4
for xdrgen to process. The language description is an amalgam of
RFC 1813 and the Open Group's XNFS specification:

  https://pubs.opengroup.org/onlinepubs/9629799/chap10.htm

The C code committed here was generated from the new nlm4.x file
using tools/net/sunrpc/xdrgen/xdrgen.

The goals of replacing hand-written XDR functions with ones that
are tool-generated are to improve memory safety and make XDR
encoding and decoding less brittle to maintain.

The xdrgen utility derives both the type definitions and the
encode/decode functions directly from protocol specifications,
using names and symbols familiar to anyone who knows those specs.
Unlike hand-written code that can inadvertently diverge from the
specification, xdrgen guarantees that the generated code matches
the specification exactly.

We would eventually like xdrgen to generate Rust code as well,
making the conversion of the kernel's NFS stacks to use Rust just
a little easier for us.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 Documentation/sunrpc/xdr/nlm4.x    | 211 +++++++++++
 fs/lockd/Makefile                  |  30 +-
 fs/lockd/nlm4xdr_gen.c             | 724 +++++++++++++++++++++++++++++++++++++
 fs/lockd/nlm4xdr_gen.h             |  32 ++
 include/linux/sunrpc/xdrgen/nlm4.h | 233 ++++++++++++
 5 files changed, 1229 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/sunrpc/xdr/nlm4.x
 create mode 100644 fs/lockd/nlm4xdr_gen.c
 create mode 100644 fs/lockd/nlm4xdr_gen.h
 create mode 100644 include/linux/sunrpc/xdrgen/nlm4.h

(limited to 'include')

diff --git a/Documentation/sunrpc/xdr/nlm4.x b/Documentation/sunrpc/xdr/nlm4.x
new file mode 100644
index 000000000000..0c44a80ef674
--- /dev/null
+++ b/Documentation/sunrpc/xdr/nlm4.x
@@ -0,0 +1,211 @@
+/*
+ * This file was extracted by hand from
+ * https://www.rfc-editor.org/rfc/rfc1813.html .
+ *
+ * Note that RFC 1813 is Informational. Its official date of
+ * publication (June 1995) is before the IETF required its RFCs to
+ * carry an explicit copyright or other IP ownership notices.
+ *
+ * Note also that RFC 1813 does not specify the whole NLM4 protocol.
+ * In particular, the argument and result types are not present in
+ * that document, and had to be reverse-engineered.
+ */
+
+/*
+ * The NLMv4 protocol
+ */
+
+pragma header nlm4;
+
+/*
+ * The following definitions are missing in RFC 1813,
+ * but can be found in the OpenNetworking Network Lock
+ * Manager protocol:
+ *
+ * https://pubs.opengroup.org/onlinepubs/9629799/chap10.htm
+ */
+
+const LM_MAXSTRLEN = 1024;
+
+const LM_MAXNAMELEN = 1025;
+
+const MAXNETOBJ_SZ = 1024;
+
+typedef opaque netobj<MAXNETOBJ_SZ>;
+
+enum fsh4_mode {
+	fsm_DN = 0,		/* deny none */
+	fsm_DR = 1,		/* deny read */
+	fsm_DW = 2,		/* deny write */
+	fsm_DRW = 3		/* deny read/write */
+};
+
+enum fsh4_access {
+	fsa_NONE = 0,		/* for completeness */
+	fsa_R = 1,		/* read-only */
+	fsa_W = 2,		/* write-only */
+	fsa_RW = 3		/* read/write */
+};
+
+/*
+ * The following definitions come from the OpenNetworking
+ * Network Status Monitor protocol:
+ *
+ * https://pubs.opengroup.org/onlinepubs/9629799/chap11.htm
+ */
+
+const SM_MAXSTRLEN = 1024;
+
+/*
+ * The NLM protocol as extracted from:
+ * https://tools.ietf.org/html/rfc1813 Appendix II
+ */
+
+typedef unsigned hyper uint64;
+
+typedef hyper int64;
+
+typedef unsigned long uint32;
+
+typedef long int32;
+
+enum nlm4_stats {
+	NLM4_GRANTED = 0,
+	NLM4_DENIED = 1,
+	NLM4_DENIED_NOLOCKS = 2,
+	NLM4_BLOCKED = 3,
+	NLM4_DENIED_GRACE_PERIOD = 4,
+	NLM4_DEADLCK = 5,
+	NLM4_ROFS = 6,
+	NLM4_STALE_FH = 7,
+	NLM4_FBIG = 8,
+	NLM4_FAILED = 9
+};
+
+pragma big_endian nlm4_stats;
+
+struct nlm4_holder {
+	bool		exclusive;
+	int32		svid;
+	netobj		oh;
+	uint64		l_offset;
+	uint64		l_len;
+};
+
+union nlm4_testrply switch (nlm4_stats stat) {
+	case NLM4_DENIED:
+		nlm4_holder	holder;
+	default:
+		void;
+};
+
+struct nlm4_stat {
+	nlm4_stats	stat;
+};
+
+struct nlm4_res {
+	netobj		cookie;
+	nlm4_stat	stat;
+};
+
+struct nlm4_testres {
+	netobj		cookie;
+	nlm4_testrply	stat;
+};
+
+struct nlm4_lock {
+	string		caller_name<LM_MAXSTRLEN>;
+	netobj		fh;
+	netobj		oh;
+	int32		svid;
+	uint64		l_offset;
+	uint64		l_len;
+};
+
+struct nlm4_lockargs {
+	netobj		cookie;
+	bool		block;
+	bool		exclusive;
+	nlm4_lock	alock;
+	bool		reclaim;
+	int32		state;
+};
+
+struct nlm4_cancargs {
+	netobj		cookie;
+	bool		block;
+	bool		exclusive;
+	nlm4_lock	alock;
+};
+
+struct nlm4_testargs {
+	netobj		cookie;
+	bool		exclusive;
+	nlm4_lock	alock;
+};
+
+struct nlm4_unlockargs {
+	netobj		cookie;
+	nlm4_lock	alock;
+};
+
+struct nlm4_share {
+	string		caller_name<LM_MAXSTRLEN>;
+	netobj		fh;
+	netobj		oh;
+	fsh4_mode	mode;
+	fsh4_access	access;
+};
+
+struct nlm4_shareargs {
+	netobj		cookie;
+	nlm4_share	share;
+	bool		reclaim;
+};
+
+struct nlm4_shareres {
+	netobj		cookie;
+	nlm4_stats	stat;
+	int32		sequence;
+};
+
+struct nlm4_notify {
+	string		name<LM_MAXNAMELEN>;
+	int32		state;
+};
+
+/*
+ * Argument for the Linux-private SM_NOTIFY procedure
+ */
+const SM_PRIV_SIZE = 16;
+
+struct nlm4_notifyargs {
+	nlm4_notify	notify;
+	opaque		private[SM_PRIV_SIZE];
+};
+
+program NLM4_PROG {
+	version NLM4_VERS {
+		void		NLMPROC4_NULL(void)			= 0;
+		nlm4_testres	NLMPROC4_TEST(nlm4_testargs)		= 1;
+		nlm4_res	NLMPROC4_LOCK(nlm4_lockargs)		= 2;
+		nlm4_res	NLMPROC4_CANCEL(nlm4_cancargs)		= 3;
+		nlm4_res	NLMPROC4_UNLOCK(nlm4_unlockargs)	= 4;
+		nlm4_res	NLMPROC4_GRANTED(nlm4_testargs)		= 5;
+		void		NLMPROC4_TEST_MSG(nlm4_testargs)	= 6;
+		void		NLMPROC4_LOCK_MSG(nlm4_lockargs)	= 7;
+		void		NLMPROC4_CANCEL_MSG(nlm4_cancargs)	= 8;
+		void		NLMPROC4_UNLOCK_MSG(nlm4_unlockargs)	= 9;
+		void		NLMPROC4_GRANTED_MSG(nlm4_testargs)	= 10;
+		void		NLMPROC4_TEST_RES(nlm4_testres)		= 11;
+		void		NLMPROC4_LOCK_RES(nlm4_res)		= 12;
+		void		NLMPROC4_CANCEL_RES(nlm4_res)		= 13;
+		void		NLMPROC4_UNLOCK_RES(nlm4_res)		= 14;
+		void		NLMPROC4_GRANTED_RES(nlm4_res)		= 15;
+		void		NLMPROC4_SM_NOTIFY(nlm4_notifyargs)	= 16;
+		nlm4_shareres	NLMPROC4_SHARE(nlm4_shareargs)		= 20;
+		nlm4_shareres	NLMPROC4_UNSHARE(nlm4_shareargs)	= 21;
+		nlm4_res	NLMPROC4_NM_LOCK(nlm4_lockargs)		= 22;
+		void		NLMPROC4_FREE_ALL(nlm4_notify)		= 23;
+	} = 4;
+} = 100021;
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 51bbe22d21e3..8e9d18a4348c 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -9,5 +9,33 @@ obj-$(CONFIG_LOCKD) += lockd.o
 
 lockd-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
 	   svcshare.o svcproc.o svcsubs.o mon.o trace.o xdr.o netlink.o
-lockd-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
+lockd-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o nlm4xdr_gen.o
 lockd-$(CONFIG_PROC_FS) += procfs.o
+
+#
+# XDR code generation (requires Python and additional packages)
+#
+# The generated *xdr_gen.{h,c} files are checked into git. Normal kernel
+# builds do not require the xdrgen tool or its Python dependencies.
+#
+# Developers modifying .x files in Documentation/sunrpc/xdr/ should run
+# "make xdrgen" to regenerate the affected files.
+#
+.PHONY: xdrgen
+
+XDRGEN			= ../../tools/net/sunrpc/xdrgen/xdrgen
+
+XDRGEN_DEFINITIONS	= ../../include/linux/sunrpc/xdrgen/nlm4.h
+XDRGEN_DECLARATIONS	= nlm4xdr_gen.h
+XDRGEN_SOURCE		= nlm4xdr_gen.c
+
+xdrgen: $(XDRGEN_DEFINITIONS) $(XDRGEN_DECLARATIONS) $(XDRGEN_SOURCE)
+
+../../include/linux/sunrpc/xdrgen/nlm4.h: ../../Documentation/sunrpc/xdr/nlm4.x
+	$(XDRGEN) definitions $< > $@
+
+nlm4xdr_gen.h: ../../Documentation/sunrpc/xdr/nlm4.x
+	$(XDRGEN) declarations $< > $@
+
+nlm4xdr_gen.c: ../../Documentation/sunrpc/xdr/nlm4.x
+	$(XDRGEN) source --peer server $< > $@
diff --git a/fs/lockd/nlm4xdr_gen.c b/fs/lockd/nlm4xdr_gen.c
new file mode 100644
index 000000000000..1c8c221db456
--- /dev/null
+++ b/fs/lockd/nlm4xdr_gen.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: GPL-2.0
+// Generated by xdrgen. Manual edits will be lost.
+// XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x
+// XDR specification modification time: Thu Dec 25 13:10:19 2025
+
+#include <linux/sunrpc/svc.h>
+
+#include "nlm4xdr_gen.h"
+
+static bool __maybe_unused
+xdrgen_decode_netobj(struct xdr_stream *xdr, netobj *ptr)
+{
+	return xdrgen_decode_opaque(xdr, ptr, MAXNETOBJ_SZ);
+}
+
+static bool __maybe_unused
+xdrgen_decode_fsh4_mode(struct xdr_stream *xdr, fsh4_mode *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_fsh4_access(struct xdr_stream *xdr, fsh4_access *ptr)
+{
+	u32 val;
+
+	if (xdr_stream_decode_u32(xdr, &val) < 0)
+		return false;
+	*ptr = val;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_uint64(struct xdr_stream *xdr, uint64 *ptr)
+{
+	return xdrgen_decode_unsigned_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_int64(struct xdr_stream *xdr, int64 *ptr)
+{
+	return xdrgen_decode_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_uint32(struct xdr_stream *xdr, uint32 *ptr)
+{
+	return xdrgen_decode_unsigned_long(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_int32(struct xdr_stream *xdr, int32 *ptr)
+{
+	return xdrgen_decode_long(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_stats(struct xdr_stream *xdr, nlm4_stats *ptr)
+{
+	return xdr_stream_decode_be32(xdr, ptr) == 0;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_holder(struct xdr_stream *xdr, struct nlm4_holder *ptr)
+{
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->svid))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_offset))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testrply(struct xdr_stream *xdr, struct nlm4_testrply *ptr)
+{
+	if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+		return false;
+	switch (ptr->stat) {
+	case __constant_cpu_to_be32(NLM4_DENIED):
+		if (!xdrgen_decode_nlm4_holder(xdr, &ptr->u.holder))
+			return false;
+		break;
+	default:
+		break;
+	}
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_stat(struct xdr_stream *xdr, struct nlm4_stat *ptr)
+{
+	if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_res(struct xdr_stream *xdr, struct nlm4_res *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_stat(xdr, &ptr->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testres(struct xdr_stream *xdr, struct nlm4_testres *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_testrply(xdr, &ptr->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_lock(struct xdr_stream *xdr, struct nlm4_lock *ptr)
+{
+	if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXSTRLEN))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->fh))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->svid))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_offset))
+		return false;
+	if (!xdrgen_decode_uint64(xdr, &ptr->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_lockargs(struct xdr_stream *xdr, struct nlm4_lockargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->block))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->reclaim))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_cancargs(struct xdr_stream *xdr, struct nlm4_cancargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->block))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testargs(struct xdr_stream *xdr, struct nlm4_testargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_unlockargs(struct xdr_stream *xdr, struct nlm4_unlockargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_share(struct xdr_stream *xdr, struct nlm4_share *ptr)
+{
+	if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXSTRLEN))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->fh))
+		return false;
+	if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+		return false;
+	if (!xdrgen_decode_fsh4_mode(xdr, &ptr->mode))
+		return false;
+	if (!xdrgen_decode_fsh4_access(xdr, &ptr->access))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_shareargs(struct xdr_stream *xdr, struct nlm4_shareargs *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_share(xdr, &ptr->share))
+		return false;
+	if (!xdrgen_decode_bool(xdr, &ptr->reclaim))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_shareres(struct xdr_stream *xdr, struct nlm4_shareres *ptr)
+{
+	if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+		return false;
+	if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->sequence))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_notify(struct xdr_stream *xdr, struct nlm4_notify *ptr)
+{
+	if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXNAMELEN))
+		return false;
+	if (!xdrgen_decode_int32(xdr, &ptr->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_notifyargs(struct xdr_stream *xdr, struct nlm4_notifyargs *ptr)
+{
+	if (!xdrgen_decode_nlm4_notify(xdr, &ptr->notify))
+		return false;
+	if (xdr_stream_decode_opaque_fixed(xdr, ptr->private, SM_PRIV_SIZE) < 0)
+		return false;
+	return true;
+}
+
+/**
+ * nlm4_svc_decode_void - Decode a void argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	return xdrgen_decode_void(xdr);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_testargs - Decode a nlm4_testargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_testargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_testargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_lockargs - Decode a nlm4_lockargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_lockargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_lockargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_cancargs - Decode a nlm4_cancargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_cancargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_cancargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_unlockargs - Decode a nlm4_unlockargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_unlockargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_unlockargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_testres - Decode a nlm4_testres argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_testres *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_testres(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_res - Decode a nlm4_res argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_res *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_res(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_notifyargs - Decode a nlm4_notifyargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_notifyargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_notifyargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_notifyargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_shareargs - Decode a nlm4_shareargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_shareargs *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_shareargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_notify - Decode a nlm4_notify argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ *   %true: procedure arguments decoded successfully
+ *   %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_notify *argp = rqstp->rq_argp;
+
+	return xdrgen_decode_nlm4_notify(xdr, argp);
+}
+
+static bool __maybe_unused
+xdrgen_encode_netobj(struct xdr_stream *xdr, const netobj value)
+{
+	return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
+}
+
+static bool __maybe_unused
+xdrgen_encode_fsh4_mode(struct xdr_stream *xdr, fsh4_mode value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_fsh4_access(struct xdr_stream *xdr, fsh4_access value)
+{
+	return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_uint64(struct xdr_stream *xdr, const uint64 value)
+{
+	return xdrgen_encode_unsigned_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_int64(struct xdr_stream *xdr, const int64 value)
+{
+	return xdrgen_encode_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_uint32(struct xdr_stream *xdr, const uint32 value)
+{
+	return xdrgen_encode_unsigned_long(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_int32(struct xdr_stream *xdr, const int32 value)
+{
+	return xdrgen_encode_long(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_stats(struct xdr_stream *xdr, nlm4_stats value)
+{
+	return xdr_stream_encode_be32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_holder(struct xdr_stream *xdr, const struct nlm4_holder *value)
+{
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->svid))
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->oh))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_offset))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testrply(struct xdr_stream *xdr, const struct nlm4_testrply *ptr)
+{
+	if (!xdrgen_encode_nlm4_stats(xdr, ptr->stat))
+		return false;
+	switch (ptr->stat) {
+	case __constant_cpu_to_be32(NLM4_DENIED):
+		if (!xdrgen_encode_nlm4_holder(xdr, &ptr->u.holder))
+			return false;
+		break;
+	default:
+		break;
+	}
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_stat(struct xdr_stream *xdr, const struct nlm4_stat *value)
+{
+	if (!xdrgen_encode_nlm4_stats(xdr, value->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_res(struct xdr_stream *xdr, const struct nlm4_res *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_stat(xdr, &value->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testres(struct xdr_stream *xdr, const struct nlm4_testres *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_testrply(xdr, &value->stat))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_lock(struct xdr_stream *xdr, const struct nlm4_lock *value)
+{
+	if (value->caller_name.len > LM_MAXSTRLEN)
+		return false;
+	if (xdr_stream_encode_opaque(xdr, value->caller_name.data, value->caller_name.len) < 0)
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->fh))
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->oh))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->svid))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_offset))
+		return false;
+	if (!xdrgen_encode_uint64(xdr, value->l_len))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_lockargs(struct xdr_stream *xdr, const struct nlm4_lockargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->block))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->reclaim))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_cancargs(struct xdr_stream *xdr, const struct nlm4_cancargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->block))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testargs(struct xdr_stream *xdr, const struct nlm4_testargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->exclusive))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_unlockargs(struct xdr_stream *xdr, const struct nlm4_unlockargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_share(struct xdr_stream *xdr, const struct nlm4_share *value)
+{
+	if (value->caller_name.len > LM_MAXSTRLEN)
+		return false;
+	if (xdr_stream_encode_opaque(xdr, value->caller_name.data, value->caller_name.len) < 0)
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->fh))
+		return false;
+	if (!xdrgen_encode_netobj(xdr, value->oh))
+		return false;
+	if (!xdrgen_encode_fsh4_mode(xdr, value->mode))
+		return false;
+	if (!xdrgen_encode_fsh4_access(xdr, value->access))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_shareargs(struct xdr_stream *xdr, const struct nlm4_shareargs *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_share(xdr, &value->share))
+		return false;
+	if (!xdrgen_encode_bool(xdr, value->reclaim))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_shareres(struct xdr_stream *xdr, const struct nlm4_shareres *value)
+{
+	if (!xdrgen_encode_netobj(xdr, value->cookie))
+		return false;
+	if (!xdrgen_encode_nlm4_stats(xdr, value->stat))
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->sequence))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_notify(struct xdr_stream *xdr, const struct nlm4_notify *value)
+{
+	if (value->name.len > LM_MAXNAMELEN)
+		return false;
+	if (xdr_stream_encode_opaque(xdr, value->name.data, value->name.len) < 0)
+		return false;
+	if (!xdrgen_encode_int32(xdr, value->state))
+		return false;
+	return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_notifyargs(struct xdr_stream *xdr, const struct nlm4_notifyargs *value)
+{
+	if (!xdrgen_encode_nlm4_notify(xdr, &value->notify))
+		return false;
+	if (xdr_stream_encode_opaque_fixed(xdr, value->private, SM_PRIV_SIZE) < 0)
+		return false;
+	return true;
+}
+
+/**
+ * nlm4_svc_encode_void - Encode a void result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	return xdrgen_encode_void(xdr);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_testres - Encode a nlm4_testres result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_testres *resp = rqstp->rq_resp;
+
+	return xdrgen_encode_nlm4_testres(xdr, resp);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_res - Encode a nlm4_res result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_res *resp = rqstp->rq_resp;
+
+	return xdrgen_encode_nlm4_res(xdr, resp);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_shareres - Encode a nlm4_shareres result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ *   %true: procedure results encoded successfully
+ *   %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+	struct nlm4_shareres *resp = rqstp->rq_resp;
+
+	return xdrgen_encode_nlm4_shareres(xdr, resp);
+}
diff --git a/fs/lockd/nlm4xdr_gen.h b/fs/lockd/nlm4xdr_gen.h
new file mode 100644
index 000000000000..b6008b296a3e
--- /dev/null
+++ b/fs/lockd/nlm4xdr_gen.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x */
+/* XDR specification modification time: Thu Dec 25 13:10:19 2025 */
+
+#ifndef _LINUX_XDRGEN_NLM4_DECL_H
+#define _LINUX_XDRGEN_NLM4_DECL_H
+
+#include <linux/types.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+#include <linux/sunrpc/xdrgen/_builtins.h>
+#include <linux/sunrpc/xdrgen/nlm4.h>
+
+bool nlm4_svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_notifyargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool nlm4_svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+#endif /* _LINUX_XDRGEN_NLM4_DECL_H */
diff --git a/include/linux/sunrpc/xdrgen/nlm4.h b/include/linux/sunrpc/xdrgen/nlm4.h
new file mode 100644
index 000000000000..e95e8f105624
--- /dev/null
+++ b/include/linux/sunrpc/xdrgen/nlm4.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x */
+/* XDR specification modification time: Thu Dec 25 13:10:19 2025 */
+
+#ifndef _LINUX_XDRGEN_NLM4_DEF_H
+#define _LINUX_XDRGEN_NLM4_DEF_H
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+
+enum { LM_MAXSTRLEN = 1024 };
+
+enum { LM_MAXNAMELEN = 1025 };
+
+enum { MAXNETOBJ_SZ = 1024 };
+
+typedef opaque netobj;
+
+enum fsh4_mode {
+	fsm_DN = 0,
+	fsm_DR = 1,
+	fsm_DW = 2,
+	fsm_DRW = 3,
+};
+
+typedef enum fsh4_mode fsh4_mode;
+
+enum fsh4_access {
+	fsa_NONE = 0,
+	fsa_R = 1,
+	fsa_W = 2,
+	fsa_RW = 3,
+};
+
+typedef enum fsh4_access fsh4_access;
+
+enum { SM_MAXSTRLEN = 1024 };
+
+typedef u64 uint64;
+
+typedef s64 int64;
+
+typedef u32 uint32;
+
+typedef s32 int32;
+
+enum nlm4_stats {
+	NLM4_GRANTED = 0,
+	NLM4_DENIED = 1,
+	NLM4_DENIED_NOLOCKS = 2,
+	NLM4_BLOCKED = 3,
+	NLM4_DENIED_GRACE_PERIOD = 4,
+	NLM4_DEADLCK = 5,
+	NLM4_ROFS = 6,
+	NLM4_STALE_FH = 7,
+	NLM4_FBIG = 8,
+	NLM4_FAILED = 9,
+};
+
+typedef __be32 nlm4_stats;
+
+struct nlm4_holder {
+	bool exclusive;
+	int32 svid;
+	netobj oh;
+	uint64 l_offset;
+	uint64 l_len;
+};
+
+struct nlm4_testrply {
+	nlm4_stats stat;
+	union {
+		struct nlm4_holder holder;
+	} u;
+};
+
+struct nlm4_stat {
+	nlm4_stats stat;
+};
+
+struct nlm4_res {
+	netobj cookie;
+	struct nlm4_stat stat;
+};
+
+struct nlm4_testres {
+	netobj cookie;
+	struct nlm4_testrply stat;
+};
+
+struct nlm4_lock {
+	string caller_name;
+	netobj fh;
+	netobj oh;
+	int32 svid;
+	uint64 l_offset;
+	uint64 l_len;
+};
+
+struct nlm4_lockargs {
+	netobj cookie;
+	bool block;
+	bool exclusive;
+	struct nlm4_lock alock;
+	bool reclaim;
+	int32 state;
+};
+
+struct nlm4_cancargs {
+	netobj cookie;
+	bool block;
+	bool exclusive;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_testargs {
+	netobj cookie;
+	bool exclusive;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_unlockargs {
+	netobj cookie;
+	struct nlm4_lock alock;
+};
+
+struct nlm4_share {
+	string caller_name;
+	netobj fh;
+	netobj oh;
+	fsh4_mode mode;
+	fsh4_access access;
+};
+
+struct nlm4_shareargs {
+	netobj cookie;
+	struct nlm4_share share;
+	bool reclaim;
+};
+
+struct nlm4_shareres {
+	netobj cookie;
+	nlm4_stats stat;
+	int32 sequence;
+};
+
+struct nlm4_notify {
+	string name;
+	int32 state;
+};
+
+enum { SM_PRIV_SIZE = 16 };
+
+struct nlm4_notifyargs {
+	struct nlm4_notify notify;
+	u8 private[SM_PRIV_SIZE];
+};
+
+enum {
+	NLMPROC4_NULL = 0,
+	NLMPROC4_TEST = 1,
+	NLMPROC4_LOCK = 2,
+	NLMPROC4_CANCEL = 3,
+	NLMPROC4_UNLOCK = 4,
+	NLMPROC4_GRANTED = 5,
+	NLMPROC4_TEST_MSG = 6,
+	NLMPROC4_LOCK_MSG = 7,
+	NLMPROC4_CANCEL_MSG = 8,
+	NLMPROC4_UNLOCK_MSG = 9,
+	NLMPROC4_GRANTED_MSG = 10,
+	NLMPROC4_TEST_RES = 11,
+	NLMPROC4_LOCK_RES = 12,
+	NLMPROC4_CANCEL_RES = 13,
+	NLMPROC4_UNLOCK_RES = 14,
+	NLMPROC4_GRANTED_RES = 15,
+	NLMPROC4_SM_NOTIFY = 16,
+	NLMPROC4_SHARE = 20,
+	NLMPROC4_UNSHARE = 21,
+	NLMPROC4_NM_LOCK = 22,
+	NLMPROC4_FREE_ALL = 23,
+};
+
+#ifndef NLM4_PROG
+#define NLM4_PROG (100021)
+#endif
+
+#define NLM4_netobj_sz                  (XDR_unsigned_int + XDR_QUADLEN(MAXNETOBJ_SZ))
+#define NLM4_fsh4_mode_sz               (XDR_int)
+#define NLM4_fsh4_access_sz             (XDR_int)
+#define NLM4_uint64_sz                  \
+	(XDR_unsigned_hyper)
+#define NLM4_int64_sz                   \
+	(XDR_hyper)
+#define NLM4_uint32_sz                  \
+	(XDR_unsigned_long)
+#define NLM4_int32_sz                   \
+	(XDR_long)
+#define NLM4_nlm4_stats_sz              (XDR_int)
+#define NLM4_nlm4_holder_sz             \
+	(XDR_bool + NLM4_int32_sz + NLM4_netobj_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_testrply_sz           \
+	(NLM4_nlm4_stats_sz + NLM4_nlm4_holder_sz)
+#define NLM4_nlm4_stat_sz               \
+	(NLM4_nlm4_stats_sz)
+#define NLM4_nlm4_res_sz                \
+	(NLM4_netobj_sz + NLM4_nlm4_stat_sz)
+#define NLM4_nlm4_testres_sz            \
+	(NLM4_netobj_sz + NLM4_nlm4_testrply_sz)
+#define NLM4_nlm4_lock_sz               \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_int32_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_lockargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz + XDR_bool + NLM4_int32_sz)
+#define NLM4_nlm4_cancargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_testargs_sz           \
+	(NLM4_netobj_sz + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_unlockargs_sz         \
+	(NLM4_netobj_sz + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_share_sz              \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_fsh4_mode_sz + NLM4_fsh4_access_sz)
+#define NLM4_nlm4_shareargs_sz          \
+	(NLM4_netobj_sz + NLM4_nlm4_share_sz + XDR_bool)
+#define NLM4_nlm4_shareres_sz           \
+	(NLM4_netobj_sz + NLM4_nlm4_stats_sz + NLM4_int32_sz)
+#define NLM4_nlm4_notify_sz             \
+	(XDR_unsigned_int + XDR_QUADLEN(LM_MAXNAMELEN) + NLM4_int32_sz)
+#define NLM4_nlm4_notifyargs_sz         \
+	(NLM4_nlm4_notify_sz + XDR_QUADLEN(SM_PRIV_SIZE))
+#define NLM4_MAX_ARGS_SZ                \
+	(NLM4_nlm4_lockargs_sz)
+
+#endif /* _LINUX_XDRGEN_NLM4_DEF_H */
-- 
cgit v1.2.3


From 6b4f16a532e794e0df90baf15173e2166f863864 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 21 Feb 2026 13:39:59 -0500
Subject: sunrpc: Add XPT flags missing from SVC_XPRT_FLAG_LIST

Commit eccbbc7c00a5 ("nfsd: don't use sv_nrthreads in connection
limiting calculations.") and commit 898374fdd7f0 ("nfsd: unregister
with rpcbind when deleting a transport") added new XPT flags but
neglected to update the show_svc_xprt_flags() macro.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/trace/events/sunrpc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 750ecce56930..ff855197880d 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1933,7 +1933,9 @@ TRACE_EVENT(svc_stats_latency,
 	svc_xprt_flag(CONG_CTRL)					\
 	svc_xprt_flag(HANDSHAKE)					\
 	svc_xprt_flag(TLS_SESSION)					\
-	svc_xprt_flag_end(PEER_AUTH)
+	svc_xprt_flag(PEER_AUTH)					\
+	svc_xprt_flag(PEER_VALID)					\
+	svc_xprt_flag_end(RPCB_UNREG)
 
 #undef svc_xprt_flag
 #undef svc_xprt_flag_end
-- 
cgit v1.2.3


From 17c1d66579ff27a7a8f2f407d1425272ff6fdd8c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:09:59 -0500
Subject: sunrpc: convert queue_lock from global spinlock to per-cache-detail
 lock

The global queue_lock serializes all upcall queue operations across
every cache_detail instance. Convert it to a per-cache-detail spinlock
so that different caches (e.g. auth.unix.ip vs nfsd.fh) no longer
contend with each other on queue operations.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h |  1 +
 net/sunrpc/cache.c           | 47 ++++++++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index e783132e481f..3d32dd1f7b05 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -113,6 +113,7 @@ struct cache_detail {
 
 	/* fields for communication over channel */
 	struct list_head	queue;
+	spinlock_t		queue_lock;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 86b3fd5a429d..1cfaae488c6c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -400,6 +400,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
 	spin_lock_init(&cd->hash_lock);
 	INIT_LIST_HEAD(&cd->queue);
+	spin_lock_init(&cd->queue_lock);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
@@ -803,8 +804,6 @@ void cache_clean_deferred(void *owner)
  *
  */
 
-static DEFINE_SPINLOCK(queue_lock);
-
 struct cache_queue {
 	struct list_head	list;
 	int			reader;	/* if 0, then request */
@@ -847,7 +846,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 	inode_lock(inode); /* protect against multiple concurrent
 			      * readers on this file */
  again:
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 	/* need to find next request */
 	while (rp->q.list.next != &cd->queue &&
 	       list_entry(rp->q.list.next, struct cache_queue, list)
@@ -856,7 +855,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		list_move(&rp->q.list, next);
 	}
 	if (rp->q.list.next == &cd->queue) {
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 		inode_unlock(inode);
 		WARN_ON_ONCE(rp->offset);
 		return 0;
@@ -865,7 +864,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 	WARN_ON_ONCE(rq->q.reader);
 	if (rp->offset == 0)
 		rq->readers++;
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 
 	if (rq->len == 0) {
 		err = cache_request(cd, rq);
@@ -876,9 +875,9 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 
 	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
 		err = -EAGAIN;
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		list_move(&rp->q.list, &rq->q.list);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 	} else {
 		if (rp->offset + count > rq->len)
 			count = rq->len - rp->offset;
@@ -888,26 +887,26 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		rp->offset += count;
 		if (rp->offset >= rq->len) {
 			rp->offset = 0;
-			spin_lock(&queue_lock);
+			spin_lock(&cd->queue_lock);
 			list_move(&rp->q.list, &rq->q.list);
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 		}
 		err = 0;
 	}
  out:
 	if (rp->offset == 0) {
 		/* need to release rq */
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		rq->readers--;
 		if (rq->readers == 0 &&
 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
 			list_del(&rq->q.list);
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 			cache_put(rq->item, cd);
 			kfree(rq->buf);
 			kfree(rq);
 		} else
-			spin_unlock(&queue_lock);
+			spin_unlock(&cd->queue_lock);
 	}
 	if (err == -EAGAIN)
 		goto again;
@@ -988,7 +987,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 	if (!rp)
 		return mask;
 
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 
 	for (cq= &rp->q; &cq->list != &cd->queue;
 	     cq = list_entry(cq->list.next, struct cache_queue, list))
@@ -996,7 +995,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 			mask |= EPOLLIN | EPOLLRDNORM;
 			break;
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 	return mask;
 }
 
@@ -1011,7 +1010,7 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 	if (cmd != FIONREAD || !rp)
 		return -EINVAL;
 
-	spin_lock(&queue_lock);
+	spin_lock(&cd->queue_lock);
 
 	/* only find the length remaining in current request,
 	 * or the length of the next request
@@ -1024,7 +1023,7 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 			len = cr->len - rp->offset;
 			break;
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&cd->queue_lock);
 
 	return put_user(len, (int __user *)arg);
 }
@@ -1046,9 +1045,9 @@ static int cache_open(struct inode *inode, struct file *filp,
 		rp->offset = 0;
 		rp->q.reader = 1;
 
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		list_add(&rp->q.list, &cd->queue);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 	}
 	if (filp->f_mode & FMODE_WRITE)
 		atomic_inc(&cd->writers);
@@ -1064,7 +1063,7 @@ static int cache_release(struct inode *inode, struct file *filp,
 	if (rp) {
 		struct cache_request *rq = NULL;
 
-		spin_lock(&queue_lock);
+		spin_lock(&cd->queue_lock);
 		if (rp->offset) {
 			struct cache_queue *cq;
 			for (cq = &rp->q; &cq->list != &cd->queue;
@@ -1086,7 +1085,7 @@ static int cache_release(struct inode *inode, struct file *filp,
 			rp->offset = 0;
 		}
 		list_del(&rp->q.list);
-		spin_unlock(&queue_lock);
+		spin_unlock(&cd->queue_lock);
 
 		if (rq) {
 			cache_put(rq->item, cd);
@@ -1113,7 +1112,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 	struct cache_request *cr;
 	LIST_HEAD(dequeued);
 
-	spin_lock(&queue_lock);
+	spin_lock(&detail->queue_lock);
 	list_for_each_entry_safe(cq, tmp, &detail->queue, list)
 		if (!cq->reader) {
 			cr = container_of(cq, struct cache_request, q);
@@ -1126,7 +1125,7 @@ static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 				continue;
 			list_move(&cr->q.list, &dequeued);
 		}
-	spin_unlock(&queue_lock);
+	spin_unlock(&detail->queue_lock);
 	while (!list_empty(&dequeued)) {
 		cr = list_entry(dequeued.next, struct cache_request, q.list);
 		list_del(&cr->q.list);
@@ -1251,7 +1250,7 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 	crq->buf = buf;
 	crq->len = 0;
 	crq->readers = 0;
-	spin_lock(&queue_lock);
+	spin_lock(&detail->queue_lock);
 	if (test_bit(CACHE_PENDING, &h->flags)) {
 		crq->item = cache_get(h);
 		list_add_tail(&crq->q.list, &detail->queue);
@@ -1259,7 +1258,7 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 	} else
 		/* Lost a race, no longer PENDING, so don't enqueue */
 		ret = -EAGAIN;
-	spin_unlock(&queue_lock);
+	spin_unlock(&detail->queue_lock);
 	wake_up(&queue_wait);
 	if (ret == -EAGAIN) {
 		kfree(buf);
-- 
cgit v1.2.3


From 552d0e17ea042fc4f959c4543cbbd0e54de7a8e9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:10:00 -0500
Subject: sunrpc: convert queue_wait from global to per-cache-detail waitqueue

The queue_wait waitqueue is currently a file-scoped global, so a
wake_up for one cache_detail wakes pollers on all caches. Convert it
to a per-cache-detail field so that only pollers on the relevant cache
are woken.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h | 2 ++
 net/sunrpc/cache.c           | 7 +++----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 3d32dd1f7b05..031379efba24 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -16,6 +16,7 @@
 #include <linux/atomic.h>
 #include <linux/kstrtox.h>
 #include <linux/proc_fs.h>
+#include <linux/wait.h>
 
 /*
  * Each cache requires:
@@ -114,6 +115,7 @@ struct cache_detail {
 	/* fields for communication over channel */
 	struct list_head	queue;
 	spinlock_t		queue_lock;
+	wait_queue_head_t	queue_wait;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1cfaae488c6c..fd02dca1f07a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -401,6 +401,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
 	spin_lock_init(&cd->hash_lock);
 	INIT_LIST_HEAD(&cd->queue);
 	spin_lock_init(&cd->queue_lock);
+	init_waitqueue_head(&cd->queue_wait);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
@@ -970,8 +971,6 @@ out:
 	return ret;
 }
 
-static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
-
 static __poll_t cache_poll(struct file *filp, poll_table *wait,
 			       struct cache_detail *cd)
 {
@@ -979,7 +978,7 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 	struct cache_reader *rp = filp->private_data;
 	struct cache_queue *cq;
 
-	poll_wait(filp, &queue_wait, wait);
+	poll_wait(filp, &cd->queue_wait, wait);
 
 	/* alway allow write */
 	mask = EPOLLOUT | EPOLLWRNORM;
@@ -1259,7 +1258,7 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 		/* Lost a race, no longer PENDING, so don't enqueue */
 		ret = -EAGAIN;
 	spin_unlock(&detail->queue_lock);
-	wake_up(&queue_wait);
+	wake_up(&detail->queue_wait);
 	if (ret == -EAGAIN) {
 		kfree(buf);
 		kfree(crq);
-- 
cgit v1.2.3


From facc4e3c80420e3466003ce09b576e005b56a015 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Mon, 23 Feb 2026 12:10:01 -0500
Subject: sunrpc: split cache_detail queue into request and reader lists

Replace the single interleaved queue (which mixed cache_request and
cache_reader entries distinguished by a ->reader flag) with two
dedicated lists: cd->requests for upcall requests and cd->readers
for open file handles.

Readers now track their position via a monotonically increasing
sequence number (next_seqno) rather than by their position in the
shared list. Each cache_request is assigned a seqno when enqueued,
and a new cache_next_request() helper finds the next request at or
after a given seqno.

This eliminates the cache_queue wrapper struct entirely, simplifies
the reader-skipping loops in cache_read/cache_poll/cache_ioctl/
cache_release, and makes the data flow easier to reason about.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/cache.h |   4 +-
 net/sunrpc/cache.c           | 143 ++++++++++++++++++-------------------------
 2 files changed, 62 insertions(+), 85 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 031379efba24..b1e595c2615b 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -113,9 +113,11 @@ struct cache_detail {
 	int			entries;
 
 	/* fields for communication over channel */
-	struct list_head	queue;
+	struct list_head	requests;
+	struct list_head	readers;
 	spinlock_t		queue_lock;
 	wait_queue_head_t	queue_wait;
+	u64			next_seqno;
 
 	atomic_t		writers;		/* how many time is /channel open */
 	time64_t		last_close;		/* if no writers, when did last close */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index fd02dca1f07a..7081c1214e6c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -399,9 +399,11 @@ static struct delayed_work cache_cleaner;
 void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
 	spin_lock_init(&cd->hash_lock);
-	INIT_LIST_HEAD(&cd->queue);
+	INIT_LIST_HEAD(&cd->requests);
+	INIT_LIST_HEAD(&cd->readers);
 	spin_lock_init(&cd->queue_lock);
 	init_waitqueue_head(&cd->queue_wait);
+	cd->next_seqno = 0;
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
 	cd->entries = 0;
@@ -796,29 +798,20 @@ void cache_clean_deferred(void *owner)
  * On read, you get a full request, or block.
  * On write, an update request is processed.
  * Poll works if anything to read, and always allows write.
- *
- * Implemented by linked list of requests.  Each open file has
- * a ->private that also exists in this list.  New requests are added
- * to the end and may wakeup and preceding readers.
- * New readers are added to the head.  If, on read, an item is found with
- * CACHE_UPCALLING clear, we free it from the list.
- *
  */
 
-struct cache_queue {
-	struct list_head	list;
-	int			reader;	/* if 0, then request */
-};
 struct cache_request {
-	struct cache_queue	q;
+	struct list_head	list;
 	struct cache_head	*item;
-	char			* buf;
+	char			*buf;
 	int			len;
 	int			readers;
+	u64			seqno;
 };
 struct cache_reader {
-	struct cache_queue	q;
+	struct list_head	list;
 	int			offset;	/* if non-0, we have a refcnt on next request */
+	u64			next_seqno;
 };
 
 static int cache_request(struct cache_detail *detail,
@@ -833,6 +826,17 @@ static int cache_request(struct cache_detail *detail,
 	return PAGE_SIZE - len;
 }
 
+static struct cache_request *
+cache_next_request(struct cache_detail *cd, u64 seqno)
+{
+	struct cache_request *rq;
+
+	list_for_each_entry(rq, &cd->requests, list)
+		if (rq->seqno >= seqno)
+			return rq;
+	return NULL;
+}
+
 static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 			  loff_t *ppos, struct cache_detail *cd)
 {
@@ -849,20 +853,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
  again:
 	spin_lock(&cd->queue_lock);
 	/* need to find next request */
-	while (rp->q.list.next != &cd->queue &&
-	       list_entry(rp->q.list.next, struct cache_queue, list)
-	       ->reader) {
-		struct list_head *next = rp->q.list.next;
-		list_move(&rp->q.list, next);
-	}
-	if (rp->q.list.next == &cd->queue) {
+	rq = cache_next_request(cd, rp->next_seqno);
+	if (!rq) {
 		spin_unlock(&cd->queue_lock);
 		inode_unlock(inode);
 		WARN_ON_ONCE(rp->offset);
 		return 0;
 	}
-	rq = container_of(rp->q.list.next, struct cache_request, q.list);
-	WARN_ON_ONCE(rq->q.reader);
 	if (rp->offset == 0)
 		rq->readers++;
 	spin_unlock(&cd->queue_lock);
@@ -876,9 +873,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 
 	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
 		err = -EAGAIN;
-		spin_lock(&cd->queue_lock);
-		list_move(&rp->q.list, &rq->q.list);
-		spin_unlock(&cd->queue_lock);
+		rp->next_seqno = rq->seqno + 1;
 	} else {
 		if (rp->offset + count > rq->len)
 			count = rq->len - rp->offset;
@@ -888,9 +883,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		rp->offset += count;
 		if (rp->offset >= rq->len) {
 			rp->offset = 0;
-			spin_lock(&cd->queue_lock);
-			list_move(&rp->q.list, &rq->q.list);
-			spin_unlock(&cd->queue_lock);
+			rp->next_seqno = rq->seqno + 1;
 		}
 		err = 0;
 	}
@@ -901,7 +894,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
 		rq->readers--;
 		if (rq->readers == 0 &&
 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
-			list_del(&rq->q.list);
+			list_del(&rq->list);
 			spin_unlock(&cd->queue_lock);
 			cache_put(rq->item, cd);
 			kfree(rq->buf);
@@ -976,7 +969,6 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 {
 	__poll_t mask;
 	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
 
 	poll_wait(filp, &cd->queue_wait, wait);
 
@@ -988,12 +980,8 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
 
 	spin_lock(&cd->queue_lock);
 
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
-			mask |= EPOLLIN | EPOLLRDNORM;
-			break;
-		}
+	if (cache_next_request(cd, rp->next_seqno))
+		mask |= EPOLLIN | EPOLLRDNORM;
 	spin_unlock(&cd->queue_lock);
 	return mask;
 }
@@ -1004,7 +992,7 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 {
 	int len = 0;
 	struct cache_reader *rp = filp->private_data;
-	struct cache_queue *cq;
+	struct cache_request *rq;
 
 	if (cmd != FIONREAD || !rp)
 		return -EINVAL;
@@ -1014,14 +1002,9 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
 	/* only find the length remaining in current request,
 	 * or the length of the next request
 	 */
-	for (cq= &rp->q; &cq->list != &cd->queue;
-	     cq = list_entry(cq->list.next, struct cache_queue, list))
-		if (!cq->reader) {
-			struct cache_request *cr =
-				container_of(cq, struct cache_request, q);
-			len = cr->len - rp->offset;
-			break;
-		}
+	rq = cache_next_request(cd, rp->next_seqno);
+	if (rq)
+		len = rq->len - rp->offset;
 	spin_unlock(&cd->queue_lock);
 
 	return put_user(len, (int __user *)arg);
@@ -1042,10 +1025,10 @@ static int cache_open(struct inode *inode, struct file *filp,
 			return -ENOMEM;
 		}
 		rp->offset = 0;
-		rp->q.reader = 1;
+		rp->next_seqno = 0;
 
 		spin_lock(&cd->queue_lock);
-		list_add(&rp->q.list, &cd->queue);
+		list_add(&rp->list, &cd->readers);
 		spin_unlock(&cd->queue_lock);
 	}
 	if (filp->f_mode & FMODE_WRITE)
@@ -1064,26 +1047,21 @@ static int cache_release(struct inode *inode, struct file *filp,
 
 		spin_lock(&cd->queue_lock);
 		if (rp->offset) {
-			struct cache_queue *cq;
-			for (cq = &rp->q; &cq->list != &cd->queue;
-			     cq = list_entry(cq->list.next,
-					     struct cache_queue, list))
-				if (!cq->reader) {
-					struct cache_request *cr =
-						container_of(cq,
-						struct cache_request, q);
-					cr->readers--;
-					if (cr->readers == 0 &&
-					    !test_bit(CACHE_PENDING,
-						      &cr->item->flags)) {
-						list_del(&cr->q.list);
-						rq = cr;
-					}
-					break;
+			struct cache_request *cr;
+
+			cr = cache_next_request(cd, rp->next_seqno);
+			if (cr) {
+				cr->readers--;
+				if (cr->readers == 0 &&
+				    !test_bit(CACHE_PENDING,
+					      &cr->item->flags)) {
+					list_del(&cr->list);
+					rq = cr;
 				}
+			}
 			rp->offset = 0;
 		}
-		list_del(&rp->q.list);
+		list_del(&rp->list);
 		spin_unlock(&cd->queue_lock);
 
 		if (rq) {
@@ -1107,27 +1085,24 @@ static int cache_release(struct inode *inode, struct file *filp,
 
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
 {
-	struct cache_queue *cq, *tmp;
-	struct cache_request *cr;
+	struct cache_request *cr, *tmp;
 	LIST_HEAD(dequeued);
 
 	spin_lock(&detail->queue_lock);
-	list_for_each_entry_safe(cq, tmp, &detail->queue, list)
-		if (!cq->reader) {
-			cr = container_of(cq, struct cache_request, q);
-			if (cr->item != ch)
-				continue;
-			if (test_bit(CACHE_PENDING, &ch->flags))
-				/* Lost a race and it is pending again */
-				break;
-			if (cr->readers != 0)
-				continue;
-			list_move(&cr->q.list, &dequeued);
-		}
+	list_for_each_entry_safe(cr, tmp, &detail->requests, list) {
+		if (cr->item != ch)
+			continue;
+		if (test_bit(CACHE_PENDING, &ch->flags))
+			/* Lost a race and it is pending again */
+			break;
+		if (cr->readers != 0)
+			continue;
+		list_move(&cr->list, &dequeued);
+	}
 	spin_unlock(&detail->queue_lock);
 	while (!list_empty(&dequeued)) {
-		cr = list_entry(dequeued.next, struct cache_request, q.list);
-		list_del(&cr->q.list);
+		cr = list_entry(dequeued.next, struct cache_request, list);
+		list_del(&cr->list);
 		cache_put(cr->item, detail);
 		kfree(cr->buf);
 		kfree(cr);
@@ -1245,14 +1220,14 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
 		return -EAGAIN;
 	}
 
-	crq->q.reader = 0;
 	crq->buf = buf;
 	crq->len = 0;
 	crq->readers = 0;
 	spin_lock(&detail->queue_lock);
 	if (test_bit(CACHE_PENDING, &h->flags)) {
 		crq->item = cache_get(h);
-		list_add_tail(&crq->q.list, &detail->queue);
+		crq->seqno = detail->next_seqno++;
+		list_add_tail(&crq->list, &detail->requests);
 		trace_cache_entry_upcall(detail, h);
 	} else
 		/* Lost a race, no longer PENDING, so don't enqueue */
-- 
cgit v1.2.3


From 62346217fd722510c3551858ad7d0fcfab8cce7e Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@hammerspace.com>
Date: Wed, 25 Feb 2026 07:51:36 -0500
Subject: NFSD: Add a key for signing filehandles

A future patch will enable NFSD to sign filehandles by appending a Message
Authentication Code(MAC).  To do this, NFSD requires a secret 128-bit key
that can persist across reboots.  A persisted key allows the server to
accept filehandles after a restart.  Enable NFSD to be configured with this
key via the netlink interface.

Link: https://lore.kernel.org/linux-nfs/cover.1772022373.git.bcodding@hammerspace.com
Signed-off-by: Benjamin Coddington <bcodding@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 Documentation/netlink/specs/nfsd.yaml |  6 ++++++
 fs/nfsd/netlink.c                     |  5 +++--
 fs/nfsd/netns.h                       |  1 +
 fs/nfsd/nfsctl.c                      | 38 ++++++++++++++++++++++++++++++++++-
 fs/nfsd/trace.h                       | 22 ++++++++++++++++++++
 include/uapi/linux/nfsd_netlink.h     |  1 +
 6 files changed, 70 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml
index f87b5a05e5e9..8ab43c8253b2 100644
--- a/Documentation/netlink/specs/nfsd.yaml
+++ b/Documentation/netlink/specs/nfsd.yaml
@@ -81,6 +81,11 @@ attribute-sets:
       -
         name: min-threads
         type: u32
+      -
+        name: fh-key
+        type: binary
+        checks:
+            exact-len: 16
   -
     name: version
     attributes:
@@ -163,6 +168,7 @@ operations:
             - leasetime
             - scope
             - min-threads
+            - fh-key
     -
       name: threads-get
       doc: get the maximum number of running threads
diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c
index 887525964451..81c943345d13 100644
--- a/fs/nfsd/netlink.c
+++ b/fs/nfsd/netlink.c
@@ -24,12 +24,13 @@ const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = {
 };
 
 /* NFSD_CMD_THREADS_SET - do */
-static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_MIN_THREADS + 1] = {
+static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_FH_KEY + 1] = {
 	[NFSD_A_SERVER_THREADS] = { .type = NLA_U32, },
 	[NFSD_A_SERVER_GRACETIME] = { .type = NLA_U32, },
 	[NFSD_A_SERVER_LEASETIME] = { .type = NLA_U32, },
 	[NFSD_A_SERVER_SCOPE] = { .type = NLA_NUL_STRING, },
 	[NFSD_A_SERVER_MIN_THREADS] = { .type = NLA_U32, },
+	[NFSD_A_SERVER_FH_KEY] = NLA_POLICY_EXACT_LEN(16),
 };
 
 /* NFSD_CMD_VERSION_SET - do */
@@ -58,7 +59,7 @@ static const struct genl_split_ops nfsd_nl_ops[] = {
 		.cmd		= NFSD_CMD_THREADS_SET,
 		.doit		= nfsd_nl_threads_set_doit,
 		.policy		= nfsd_threads_set_nl_policy,
-		.maxattr	= NFSD_A_SERVER_MIN_THREADS,
+		.maxattr	= NFSD_A_SERVER_FH_KEY,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
 	},
 	{
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 3a89d4708e8a..6ad3fe5d7e12 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -227,6 +227,7 @@ struct nfsd_net {
 	spinlock_t              local_clients_lock;
 	struct list_head	local_clients;
 #endif
+	siphash_key_t		*fh_key;
 };
 
 /* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 0bf01ae411c5..20ec00f323b4 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1581,6 +1581,32 @@ out_unlock:
 	return ret;
 }
 
+/**
+ * nfsd_nl_fh_key_set - helper to copy fh_key from userspace
+ * @attr: nlattr NFSD_A_SERVER_FH_KEY
+ * @nn: nfsd_net
+ *
+ * Callers should hold nfsd_mutex, returns 0 on success or negative errno.
+ * Callers must ensure the server is shut down (sv_nrthreads == 0),
+ * userspace documentation asserts the key may only be set when the server
+ * is not running.
+ */
+static int nfsd_nl_fh_key_set(const struct nlattr *attr, struct nfsd_net *nn)
+{
+	siphash_key_t *fh_key = nn->fh_key;
+
+	if (!fh_key) {
+		fh_key = kmalloc(sizeof(siphash_key_t), GFP_KERNEL);
+		if (!fh_key)
+			return -ENOMEM;
+		nn->fh_key = fh_key;
+	}
+
+	fh_key->key[0] = get_unaligned_le64(nla_data(attr));
+	fh_key->key[1] = get_unaligned_le64(nla_data(attr) + 8);
+	return 0;
+}
+
 /**
  * nfsd_nl_threads_set_doit - set the number of running threads
  * @skb: reply buffer
@@ -1622,7 +1648,8 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
 
 	if (info->attrs[NFSD_A_SERVER_GRACETIME] ||
 	    info->attrs[NFSD_A_SERVER_LEASETIME] ||
-	    info->attrs[NFSD_A_SERVER_SCOPE]) {
+	    info->attrs[NFSD_A_SERVER_SCOPE] ||
+	    info->attrs[NFSD_A_SERVER_FH_KEY]) {
 		ret = -EBUSY;
 		if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads)
 			goto out_unlock;
@@ -1651,6 +1678,14 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
 		attr = info->attrs[NFSD_A_SERVER_SCOPE];
 		if (attr)
 			scope = nla_data(attr);
+
+		attr = info->attrs[NFSD_A_SERVER_FH_KEY];
+		if (attr) {
+			ret = nfsd_nl_fh_key_set(attr, nn);
+			trace_nfsd_ctl_fh_key_set((const char *)nn->fh_key, ret);
+			if (ret)
+				goto out_unlock;
+		}
 	}
 
 	attr = info->attrs[NFSD_A_SERVER_MIN_THREADS];
@@ -2237,6 +2272,7 @@ static __net_exit void nfsd_net_exit(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
+	kfree_sensitive(nn->fh_key);
 	nfsd_proc_stat_shutdown(net);
 	percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
 	nfsd_idmap_shutdown(net);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index d1d0b0dd0545..185a998996a0 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -2240,6 +2240,28 @@ TRACE_EVENT(nfsd_end_grace,
 	)
 );
 
+TRACE_EVENT(nfsd_ctl_fh_key_set,
+	TP_PROTO(
+		const char *key,
+		int result
+	),
+	TP_ARGS(key, result),
+	TP_STRUCT__entry(
+		__field(u32, key_hash)
+		__field(int, result)
+	),
+	TP_fast_assign(
+		if (key)
+			__entry->key_hash = ~crc32_le(0xFFFFFFFF, key, 16);
+		else
+			__entry->key_hash = 0;
+		__entry->result = result;
+	),
+	TP_printk("key=0x%08x result=%d",
+		__entry->key_hash, __entry->result
+	)
+);
+
 DECLARE_EVENT_CLASS(nfsd_copy_class,
 	TP_PROTO(
 		const struct nfsd4_copy *copy
diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h
index e9efbc9e63d8..97c7447f4d14 100644
--- a/include/uapi/linux/nfsd_netlink.h
+++ b/include/uapi/linux/nfsd_netlink.h
@@ -36,6 +36,7 @@ enum {
 	NFSD_A_SERVER_LEASETIME,
 	NFSD_A_SERVER_SCOPE,
 	NFSD_A_SERVER_MIN_THREADS,
+	NFSD_A_SERVER_FH_KEY,
 
 	__NFSD_A_SERVER_MAX,
 	NFSD_A_SERVER_MAX = (__NFSD_A_SERVER_MAX - 1)
-- 
cgit v1.2.3


From a002ad8a9bc89c084bc40933065c88336700837e Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@hammerspace.com>
Date: Wed, 25 Feb 2026 07:51:37 -0500
Subject: NFSD/export: Add sign_fh export option

In order to signal that filehandles on this export should be signed, add a
"sign_fh" export option.  Filehandle signing can help the server defend
against certain filehandle guessing attacks.

Setting the "sign_fh" export option sets NFSEXP_SIGN_FH.  In a future patch
NFSD uses this signal to append a MAC onto filehandles for that export.

While we're in here, tidy a few stray expflags to more closely align to the
export flag order.

Link: https://lore.kernel.org/linux-nfs/cover.1772022373.git.bcodding@hammerspace.com
Signed-off-by: Benjamin Coddington <bcodding@hammerspace.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/export.c                 | 5 +++--
 include/uapi/linux/nfsd/export.h | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8e8a76a44ff0..7f4a51b832ef 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1362,13 +1362,14 @@ static struct flags {
 	{ NFSEXP_ASYNC, {"async", "sync"}},
 	{ NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
 	{ NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
+	{ NFSEXP_SECURITY_LABEL, {"security_label", ""}},
+	{ NFSEXP_SIGN_FH, {"sign_fh", ""}},
 	{ NFSEXP_NOHIDE, {"nohide", ""}},
-	{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
 	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
 	{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+	{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
 	{ NFSEXP_V4ROOT, {"v4root", ""}},
 	{ NFSEXP_PNFS, {"pnfs", ""}},
-	{ NFSEXP_SECURITY_LABEL, {"security_label", ""}},
 	{ 0, {"", ""}}
 };
 
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index a73ca3703abb..de647cf166c3 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -34,7 +34,7 @@
 #define NFSEXP_GATHERED_WRITES	0x0020
 #define NFSEXP_NOREADDIRPLUS    0x0040
 #define NFSEXP_SECURITY_LABEL	0x0080
-/* 0x100 currently unused */
+#define NFSEXP_SIGN_FH		0x0100
 #define NFSEXP_NOHIDE		0x0200
 #define NFSEXP_NOSUBTREECHECK	0x0400
 #define	NFSEXP_NOAUTHNLM	0x0800		/* Don't authenticate NLM requests - just trust */
@@ -55,7 +55,7 @@
 #define NFSEXP_PNFS		0x20000
 
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS		0x3FEFF
+#define NFSEXP_ALLFLAGS		0x3FFFF
 
 /* The flags that may vary depending on security flavor: */
 #define NFSEXP_SECINFO_FLAGS	(NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
-- 
cgit v1.2.3


From ee66b9e3e1c69efc986f3932555f07121c3460a7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:35 -0500
Subject: SUNRPC: Allocate a separate Reply page array

struct svc_rqst uses a single dynamically-allocated page array
(rq_pages) for both the incoming RPC Call message and the outgoing
RPC Reply message. rq_respages is a sliding pointer into rq_pages
that each transport receive path must compute based on how many
pages the Call consumed. This boundary tracking is a source of
confusion and bugs, and prevents an RPC transaction from having
both a large Call and a large Reply simultaneously.

Allocate rq_respages as its own page array, eliminating the boundary
arithmetic. This decouples Call and Reply buffer lifetimes,
following the precedent set by rq_bvec (a separate dynamically-
allocated array for I/O vectors).

Each svc_rqst now pins twice as many pages as before. For a server
running 16 threads with a 1MB maximum payload, the additional cost
is roughly 16MB of pinned memory. The new dynamic svc thread count
facility keeps this overhead minimal on an idle server. A subsequent
patch in this series limits per-request repopulation to only the
pages released during the previous RPC, avoiding a full-array scan
on each call to svc_alloc_arg().

Note: We've considered several alternatives to maintaining a full
second array. Each alternative reintroduces either boundary logic
complexity or I/O-path allocation pressure.

rq_next_page is initialized in svc_alloc_arg() and svc_process()
during Reply construction, and in svc_rdma_recvfrom() as a
precaution on error paths. Transport receive paths no longer compute
it from the Call size.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h              | 47 ++++++++++++++++-----------------
 net/sunrpc/svc.c                        | 29 ++++++++++++++++----
 net/sunrpc/svc_xprt.c                   | 36 ++++++++++++++++++-------
 net/sunrpc/svcsock.c                    |  6 -----
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 15 +++--------
 5 files changed, 77 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 62152e4f3bcc..3b1a98ab5cba 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -134,25 +134,24 @@ enum {
 extern u32 svc_max_payload(const struct svc_rqst *rqstp);
 
 /*
- * RPC Requests and replies are stored in one or more pages.
- * We maintain an array of pages for each server thread.
- * Requests are copied into these pages as they arrive.  Remaining
- * pages are available to write the reply into.
+ * RPC Call and Reply messages each have their own page array.
+ * rq_pages holds the incoming Call message; rq_respages holds
+ * the outgoing Reply message. Both arrays are sized to
+ * svc_serv_maxpages() entries and are allocated dynamically.
  *
- * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread
- * needs to allocate more to replace those used in sending.  To help keep track
- * of these pages we have a receive list where all pages initialy live, and a
- * send list where pages are moved to when there are to be part of a reply.
+ * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each
+ * server thread needs to allocate more to replace those used in
+ * sending.
  *
- * We use xdr_buf for holding responses as it fits well with NFS
- * read responses (that have a header, and some data pages, and possibly
- * a tail) and means we can share some client side routines.
+ * xdr_buf holds responses; the structure fits NFS read responses
+ * (header, data pages, optional tail) and enables sharing of
+ * client-side routines.
  *
- * The xdr_buf.head kvec always points to the first page in the rq_*pages
- * list.  The xdr_buf.pages pointer points to the second page on that
- * list.  xdr_buf.tail points to the end of the first page.
- * This assumes that the non-page part of an rpc reply will fit
- * in a page - NFSd ensures this.  lockd also has no trouble.
+ * The xdr_buf.head kvec always points to the first page in the
+ * rq_*pages list. The xdr_buf.pages pointer points to the second
+ * page on that list. xdr_buf.tail points to the end of the first
+ * page. This assumes that the non-page part of an rpc reply will
+ * fit in a page - NFSd ensures this. lockd also has no trouble.
  */
 
 /**
@@ -162,10 +161,10 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * Returns a count of pages or vectors that can hold the maximum
  * size RPC message for @serv.
  *
- * Each request/reply pair can have at most one "payload", plus two
- * pages, one for the request, and one for the reply.
- * nfsd_splice_actor() might need an extra page when a READ payload
- * is not page-aligned.
+ * Each page array can hold at most one payload plus two
+ * overhead pages (one for the RPC header, one for tail data).
+ * nfsd_splice_actor() might need an extra page when a READ
+ * payload is not page-aligned.
  */
 static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
 {
@@ -204,11 +203,11 @@ struct svc_rqst {
 	struct xdr_stream	rq_res_stream;
 	struct folio		*rq_scratch_folio;
 	struct xdr_buf		rq_res;
-	unsigned long		rq_maxpages;	/* num of entries in rq_pages */
-	struct page *		*rq_pages;
-	struct page *		*rq_respages;	/* points into rq_pages */
+	unsigned long		rq_maxpages;	/* entries per page array */
+	struct page *		*rq_pages;	/* Call buffer pages */
+	struct page *		*rq_respages;	/* Reply buffer pages */
 	struct page *		*rq_next_page; /* next reply page to use */
-	struct page *		*rq_page_end;  /* one past the last page */
+	struct page *		*rq_page_end;  /* one past the last reply page */
 
 	struct folio_batch	rq_fbatch;
 	struct bio_vec		*rq_bvec;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f7ec02457328..9abef638b1e0 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -638,13 +638,23 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
 {
 	rqstp->rq_maxpages = svc_serv_maxpages(serv);
 
-	/* rq_pages' last entry is NULL for historical reasons. */
+	/* +1 for a NULL sentinel readable by nfsd_splice_actor() */
 	rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1,
 				       sizeof(struct page *),
 				       GFP_KERNEL, node);
 	if (!rqstp->rq_pages)
 		return false;
 
+	/* +1 for a NULL sentinel at rq_page_end (see svc_rqst_replace_page) */
+	rqstp->rq_respages = kcalloc_node(rqstp->rq_maxpages + 1,
+					  sizeof(struct page *),
+					  GFP_KERNEL, node);
+	if (!rqstp->rq_respages) {
+		kfree(rqstp->rq_pages);
+		rqstp->rq_pages = NULL;
+		return false;
+	}
+
 	return true;
 }
 
@@ -656,10 +666,19 @@ svc_release_buffer(struct svc_rqst *rqstp)
 {
 	unsigned long i;
 
-	for (i = 0; i < rqstp->rq_maxpages; i++)
-		if (rqstp->rq_pages[i])
-			put_page(rqstp->rq_pages[i]);
-	kfree(rqstp->rq_pages);
+	if (rqstp->rq_pages) {
+		for (i = 0; i < rqstp->rq_maxpages; i++)
+			if (rqstp->rq_pages[i])
+				put_page(rqstp->rq_pages[i]);
+		kfree(rqstp->rq_pages);
+	}
+
+	if (rqstp->rq_respages) {
+		for (i = 0; i < rqstp->rq_maxpages; i++)
+			if (rqstp->rq_respages[i])
+				put_page(rqstp->rq_respages[i]);
+		kfree(rqstp->rq_respages);
+	}
 }
 
 static void
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 56a663b8939f..e027765f4307 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -650,14 +650,13 @@ static void svc_check_conn_limits(struct svc_serv *serv)
 	}
 }
 
-static bool svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_fill_pages(struct svc_rqst *rqstp, struct page **pages,
+			   unsigned long npages)
 {
-	struct xdr_buf *arg = &rqstp->rq_arg;
-	unsigned long pages, filled, ret;
+	unsigned long filled, ret;
 
-	pages = rqstp->rq_maxpages;
-	for (filled = 0; filled < pages; filled = ret) {
-		ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages);
+	for (filled = 0; filled < npages; filled = ret) {
+		ret = alloc_pages_bulk(GFP_KERNEL, npages, pages);
 		if (ret > filled)
 			/* Made progress, don't sleep yet */
 			continue;
@@ -667,11 +666,29 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
 			set_current_state(TASK_RUNNING);
 			return false;
 		}
-		trace_svc_alloc_arg_err(pages, ret);
+		trace_svc_alloc_arg_err(npages, ret);
 		memalloc_retry_wait(GFP_KERNEL);
 	}
-	rqstp->rq_page_end = &rqstp->rq_pages[pages];
-	rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
+	return true;
+}
+
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
+{
+	struct xdr_buf *arg = &rqstp->rq_arg;
+	unsigned long pages;
+
+	pages = rqstp->rq_maxpages;
+
+	if (!svc_fill_pages(rqstp, rqstp->rq_pages, pages))
+		return false;
+	if (!svc_fill_pages(rqstp, rqstp->rq_respages, pages))
+		return false;
+	rqstp->rq_next_page = rqstp->rq_respages;
+	rqstp->rq_page_end = &rqstp->rq_respages[pages];
+	/* svc_rqst_replace_page() dereferences *rq_next_page even
+	 * at rq_page_end; NULL prevents releasing a garbage page.
+	 */
+	rqstp->rq_page_end[0] = NULL;
 
 	/* Make arg->head point to first page and arg->pages point to rest */
 	arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
@@ -1277,7 +1294,6 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
 	rqstp->rq_addrlen     = dr->addrlen;
 	/* Save off transport header len in case we get deferred again */
 	rqstp->rq_daddr       = dr->daddr;
-	rqstp->rq_respages    = rqstp->rq_pages;
 	rqstp->rq_xprt_ctxt   = dr->xprt_ctxt;
 
 	dr->xprt_ctxt = NULL;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f28c6076f7e8..c86f28f720f7 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -351,8 +351,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
 
 	for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE)
 		bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0);
-	rqstp->rq_respages = &rqstp->rq_pages[i];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
 	iov_iter_bvec(&msg.msg_iter, ITER_DEST, bvec, i, buflen);
 	if (seek) {
@@ -677,13 +675,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 	if (len <= rqstp->rq_arg.head[0].iov_len) {
 		rqstp->rq_arg.head[0].iov_len = len;
 		rqstp->rq_arg.page_len = 0;
-		rqstp->rq_respages = rqstp->rq_pages+1;
 	} else {
 		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
-		rqstp->rq_respages = rqstp->rq_pages + 1 +
-			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
 	}
-	rqstp->rq_next_page = rqstp->rq_respages+1;
 
 	if (serv->sv_stats)
 		serv->sv_stats->netudpcnt++;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e7e4a39ca6c6..3081a37a5896 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -861,18 +861,12 @@ static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp,
 	unsigned int i;
 
 	/* Transfer the Read chunk pages into @rqstp.rq_pages, replacing
-	 * the rq_pages that were already allocated for this rqstp.
+	 * the receive buffer pages already allocated for this rqstp.
 	 */
-	release_pages(rqstp->rq_respages, ctxt->rc_page_count);
+	release_pages(rqstp->rq_pages, ctxt->rc_page_count);
 	for (i = 0; i < ctxt->rc_page_count; i++)
 		rqstp->rq_pages[i] = ctxt->rc_pages[i];
 
-	/* Update @rqstp's result send buffer to start after the
-	 * last page in the RDMA Read payload.
-	 */
-	rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
-
 	/* Prevent svc_rdma_recv_ctxt_put() from releasing the
 	 * pages in ctxt::rc_pages a second time.
 	 */
@@ -931,10 +925,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	struct svc_rdma_recv_ctxt *ctxt;
 	int ret;
 
-	/* Prevent svc_xprt_release() from releasing pages in rq_pages
-	 * when returning 0 or an error.
+	/* Precaution: a zero page count on error return causes
+	 * svc_rqst_release_pages() to release nothing.
 	 */
-	rqstp->rq_respages = rqstp->rq_pages;
 	rqstp->rq_next_page = rqstp->rq_respages;
 
 	rqstp->rq_xprt_ctxt = NULL;
-- 
cgit v1.2.3


From 7ed7504287a627834f2a35ef04e5dfd26d1c8986 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:38 -0500
Subject: SUNRPC: Track consumed rq_pages entries

The rq_pages array holds pages allocated for incoming RPC requests.
Two transport receive paths NULL entries in rq_pages to prevent
svc_rqst_release_pages() from freeing pages that the transport has
taken ownership of:

- svc_tcp_save_pages() moves partial request data pages to
  svsk->sk_pages during multi-fragment TCP reassembly.

- svc_rdma_clear_rqst_pages() moves request data pages to
  head->rc_pages because they are targets of active RDMA Read WRs.

A new rq_pages_nfree field in struct svc_rqst records how many
entries were NULLed. svc_alloc_arg() uses it to refill only those
entries rather than scanning the full rq_pages array. In steady
state, the transport NULLs a handful of entries per RPC, so the
allocator visits only those entries instead of the full ~259 slots
(for 1MB messages).

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h        | 10 ++++++++++
 net/sunrpc/svc.c                  |  1 +
 net/sunrpc/svc_xprt.c             | 11 ++++++++---
 net/sunrpc/svcsock.c              |  1 +
 net/sunrpc/xprtrdma/svc_rdma_rw.c |  1 +
 5 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 3b1a98ab5cba..c3399cf64524 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -143,6 +143,15 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * server thread needs to allocate more to replace those used in
  * sending.
  *
+ * rq_pages request page contract:
+ *
+ * Transport receive paths that move request data pages out of
+ * rq_pages -- TCP multi-fragment reassembly (svc_tcp_save_pages)
+ * and RDMA Read I/O (svc_rdma_clear_rqst_pages) -- NULL those
+ * entries to prevent svc_rqst_release_pages() from freeing pages
+ * still in transport use, and set rq_pages_nfree to the count.
+ * svc_alloc_arg() refills only that many rq_pages entries.
+ *
  * xdr_buf holds responses; the structure fits NFS read responses
  * (header, data pages, optional tail) and enables sharing of
  * client-side routines.
@@ -204,6 +213,7 @@ struct svc_rqst {
 	struct folio		*rq_scratch_folio;
 	struct xdr_buf		rq_res;
 	unsigned long		rq_maxpages;	/* entries per page array */
+	unsigned long		rq_pages_nfree;	/* rq_pages entries NULLed by transport */
 	struct page *		*rq_pages;	/* Call buffer pages */
 	struct page *		*rq_respages;	/* Reply buffer pages */
 	struct page *		*rq_next_page; /* next reply page to use */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 0ce16e9abdf6..6e57e35fa6d6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -655,6 +655,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
 		return false;
 	}
 
+	rqstp->rq_pages_nfree = rqstp->rq_maxpages;
 	return true;
 }
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e027765f4307..795b5729525f 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -675,12 +675,17 @@ static bool svc_fill_pages(struct svc_rqst *rqstp, struct page **pages,
 static bool svc_alloc_arg(struct svc_rqst *rqstp)
 {
 	struct xdr_buf *arg = &rqstp->rq_arg;
-	unsigned long pages;
+	unsigned long pages, nfree;
 
 	pages = rqstp->rq_maxpages;
 
-	if (!svc_fill_pages(rqstp, rqstp->rq_pages, pages))
-		return false;
+	nfree = rqstp->rq_pages_nfree;
+	if (nfree) {
+		if (!svc_fill_pages(rqstp, rqstp->rq_pages, nfree))
+			return false;
+		rqstp->rq_pages_nfree = 0;
+	}
+
 	if (!svc_fill_pages(rqstp, rqstp->rq_respages, pages))
 		return false;
 	rqstp->rq_next_page = rqstp->rq_respages;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c86f28f720f7..2ce43f9995f1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1009,6 +1009,7 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
 		svsk->sk_pages[i] = rqstp->rq_pages[i];
 		rqstp->rq_pages[i] = NULL;
 	}
+	rqstp->rq_pages_nfree = npages;
 }
 
 static void svc_tcp_clear_pages(struct svc_sock *svsk)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 4ec2f9ae06aa..cf4a1762b629 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1107,6 +1107,7 @@ static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp,
 		head->rc_pages[i] = rqstp->rq_pages[i];
 		rqstp->rq_pages[i] = NULL;
 	}
+	rqstp->rq_pages_nfree = head->rc_page_count;
 }
 
 /**
-- 
cgit v1.2.3


From d7f3efd9ff474867b04e1ea784690f02450a245b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 26 Feb 2026 09:47:39 -0500
Subject: SUNRPC: Optimize rq_respages allocation in svc_alloc_arg

svc_alloc_arg() invokes alloc_pages_bulk() with the full rq_maxpages
count (~259 for 1MB messages) for the rq_respages array, causing a
full-array scan despite most slots holding valid pages.

svc_rqst_release_pages() NULLs only the range

  [rq_respages, rq_next_page)

after each RPC, so only that range contains NULL entries. Limit the
rq_respages fill in svc_alloc_arg() to that range instead of
scanning the full array.

svc_init_buffer() initializes rq_next_page to span the entire
rq_respages array, so the first svc_alloc_arg() call fills all
slots.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 4 ++++
 net/sunrpc/svc.c           | 1 +
 net/sunrpc/svc_xprt.c      | 8 +++++++-
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index c3399cf64524..669c944eaf7f 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -152,6 +152,10 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
  * still in transport use, and set rq_pages_nfree to the count.
  * svc_alloc_arg() refills only that many rq_pages entries.
  *
+ * For rq_respages, svc_rqst_release_pages() NULLs entries in
+ * [rq_respages, rq_next_page) after each RPC. svc_alloc_arg()
+ * refills only that range.
+ *
  * xdr_buf holds responses; the structure fits NFS read responses
  * (header, data pages, optional tail) and enables sharing of
  * client-side routines.
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e57e35fa6d6..5e0b5ec2fd52 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -656,6 +656,7 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
 	}
 
 	rqstp->rq_pages_nfree = rqstp->rq_maxpages;
+	rqstp->rq_next_page = rqstp->rq_respages + rqstp->rq_maxpages;
 	return true;
 }
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 795b5729525f..b16e710926c1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -686,8 +686,14 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
 		rqstp->rq_pages_nfree = 0;
 	}
 
-	if (!svc_fill_pages(rqstp, rqstp->rq_respages, pages))
+	if (WARN_ON_ONCE(rqstp->rq_next_page < rqstp->rq_respages))
 		return false;
+	nfree = rqstp->rq_next_page - rqstp->rq_respages;
+	if (nfree) {
+		if (!svc_fill_pages(rqstp, rqstp->rq_respages, nfree))
+			return false;
+	}
+
 	rqstp->rq_next_page = rqstp->rq_respages;
 	rqstp->rq_page_end = &rqstp->rq_respages[pages];
 	/* svc_rqst_replace_page() dereferences *rq_next_page even
-- 
cgit v1.2.3


From ccc89b9d1ed233349cfe8d87b842e7351b74d8de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Feb 2026 09:03:28 -0500
Subject: svcrdma: Add fair queuing for Send Queue access

When the Send Queue fills, multiple threads may wait for SQ slots.
The previous implementation had no ordering guarantee, allowing
starvation when one thread repeatedly acquires slots while others
wait indefinitely.

Introduce a ticket-based fair queuing system. Each waiter takes a
ticket number and is served in FIFO order. This ensures forward
progress for all waiters when SQ capacity is constrained.

The implementation has two phases:
1. Fast path: attempt to reserve SQ slots without waiting
2. Slow path: take a ticket, wait for turn, then wait for slots

The ticket system adds two atomic counters to the transport:
- sc_sq_ticket_head: next ticket to issue
- sc_sq_ticket_tail: ticket currently being served

A dedicated wait queue (sc_sq_ticket_wait) handles ticket
ordering, separate from sc_send_wait which handles SQ capacity.
This separation ensures that send completions (the high-frequency
wake source) wake only the current ticket holder rather than all
queued waiters. Ticket handoff wakes only the ticket wait queue,
and each ticket holder that exits via connection close propagates
the wake to the next waiter in line.

When a waiter successfully reserves slots, it advances the tail
counter and wakes the next waiter. This creates an orderly handoff
that prevents starvation while maintaining good throughput on the
fast path when contention is low.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h          |  10 ++
 net/sunrpc/xprtrdma/svc_rdma_rw.c        |  37 ++-----
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 162 +++++++++++++++++++++++--------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |   6 +-
 4 files changed, 146 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 57f4fd94166a..658b8498177e 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -84,6 +84,9 @@ struct svcxprt_rdma {
 
 	atomic_t             sc_sq_avail;	/* SQEs ready to be consumed */
 	unsigned int	     sc_sq_depth;	/* Depth of SQ */
+	atomic_t	     sc_sq_ticket_head;	/* Next ticket to issue */
+	atomic_t	     sc_sq_ticket_tail;	/* Ticket currently serving */
+	wait_queue_head_t    sc_sq_ticket_wait;	/* Ticket ordering waitlist */
 	__be32		     sc_fc_credits;	/* Forward credits */
 	u32		     sc_max_requests;	/* Max requests */
 	u32		     sc_max_bc_requests;/* Backward credits */
@@ -306,6 +309,13 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 				    struct svc_rdma_recv_ctxt *rctxt,
 				    int status);
 extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
+extern int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
+			    const struct rpc_rdma_cid *cid, int sqecount);
+extern int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
+				  const struct rpc_rdma_cid *cid,
+				  const struct ib_send_wr *bad_wr,
+				  const struct ib_send_wr *first_wr,
+				  int sqecount, int ret);
 extern int svc_rdma_sendto(struct svc_rqst *);
 extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
 				   unsigned int length);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index cf4a1762b629..97bce806974b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -405,34 +405,17 @@ static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma,
 		cqe = NULL;
 	}
 
-	do {
-		if (atomic_sub_return(cc->cc_sqecount,
-				      &rdma->sc_sq_avail) > 0) {
-			cc->cc_posttime = ktime_get();
-			ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
-			if (ret)
-				break;
-			return 0;
-		}
-
-		percpu_counter_inc(&svcrdma_stat_sq_starve);
-		trace_svcrdma_sq_full(rdma, &cc->cc_cid);
-		atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
-		wait_event(rdma->sc_send_wait,
-			   atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
-		trace_svcrdma_sq_retry(rdma, &cc->cc_cid);
-	} while (1);
-
-	trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret);
-	svc_xprt_deferred_close(&rdma->sc_xprt);
-
-	/* If even one was posted, there will be a completion. */
-	if (bad_wr != first_wr)
-		return 0;
+	ret = svc_rdma_sq_wait(rdma, &cc->cc_cid, cc->cc_sqecount);
+	if (ret < 0)
+		return ret;
 
-	atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
-	wake_up(&rdma->sc_send_wait);
-	return -ENOTCONN;
+	cc->cc_posttime = ktime_get();
+	ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+	if (ret)
+		return svc_rdma_post_send_err(rdma, &cc->cc_cid, bad_wr,
+					      first_wr, cc->cc_sqecount,
+					      ret);
+	return 0;
 }
 
 /* Build a bvec that covers one kvec in an xdr_buf.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 17c8429da9d5..02559947272a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -294,6 +294,117 @@ void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail)
 		wake_up(&rdma->sc_send_wait);
 }
 
+/**
+ * svc_rdma_sq_wait - Wait for SQ slots using fair queuing
+ * @rdma: controlling transport
+ * @cid: completion ID for tracing
+ * @sqecount: number of SQ entries needed
+ *
+ * A ticket-based system ensures fair ordering when multiple threads
+ * wait for Send Queue capacity. Each waiter takes a ticket and is
+ * served in order, preventing starvation.
+ *
+ * Protocol invariant: every ticket holder must increment
+ * sc_sq_ticket_tail exactly once, whether the reservation
+ * succeeds or the connection closes. Failing to advance the
+ * tail stalls all subsequent waiters.
+ *
+ * The ticket counters are signed 32-bit atomics. After
+ * wrapping through INT_MAX, the equality check
+ * (tail == ticket) remains correct because both counters
+ * advance monotonically and the comparison uses exact
+ * equality rather than relational operators.
+ *
+ * Return values:
+ *   %0: SQ slots were reserved successfully
+ *   %-ENOTCONN: The connection was lost
+ */
+int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
+		     const struct rpc_rdma_cid *cid, int sqecount)
+{
+	int ticket;
+
+	/* Fast path: try to reserve SQ slots without waiting.
+	 *
+	 * A failed reservation temporarily understates sc_sq_avail
+	 * until the compensating atomic_add restores it. A Send
+	 * completion arriving in that window sees a lower count
+	 * than reality, but the value self-corrects once the add
+	 * completes. No ordering guarantee is needed here because
+	 * the slow path serializes all contended waiters.
+	 */
+	if (likely(atomic_sub_return(sqecount, &rdma->sc_sq_avail) >= 0))
+		return 0;
+	atomic_add(sqecount, &rdma->sc_sq_avail);
+
+	/* Slow path: take a ticket and wait in line */
+	ticket = atomic_fetch_inc(&rdma->sc_sq_ticket_head);
+
+	percpu_counter_inc(&svcrdma_stat_sq_starve);
+	trace_svcrdma_sq_full(rdma, cid);
+
+	/* Wait until all earlier tickets have been served */
+	wait_event(rdma->sc_sq_ticket_wait,
+		   test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) ||
+		   atomic_read(&rdma->sc_sq_ticket_tail) == ticket);
+	if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+		goto out_close;
+
+	/* It's our turn. Wait for enough SQ slots to be available. */
+	while (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
+		atomic_add(sqecount, &rdma->sc_sq_avail);
+
+		wait_event(rdma->sc_send_wait,
+			   test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) ||
+			   atomic_read(&rdma->sc_sq_avail) >= sqecount);
+		if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+			goto out_close;
+	}
+
+	/* Slots reserved successfully. Let the next waiter proceed. */
+	atomic_inc(&rdma->sc_sq_ticket_tail);
+	wake_up(&rdma->sc_sq_ticket_wait);
+	trace_svcrdma_sq_retry(rdma, cid);
+	return 0;
+
+out_close:
+	atomic_inc(&rdma->sc_sq_ticket_tail);
+	wake_up(&rdma->sc_sq_ticket_wait);
+	return -ENOTCONN;
+}
+
+/**
+ * svc_rdma_post_send_err - Handle ib_post_send failure
+ * @rdma: controlling transport
+ * @cid: completion ID for tracing
+ * @bad_wr: first WR that was not posted
+ * @first_wr: first WR in the chain
+ * @sqecount: number of SQ entries that were reserved
+ * @ret: error code from ib_post_send
+ *
+ * Return values:
+ *   %0: At least one WR was posted; a completion handles cleanup
+ *   %-ENOTCONN: No WRs were posted; SQ slots are released
+ */
+int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
+			   const struct rpc_rdma_cid *cid,
+			   const struct ib_send_wr *bad_wr,
+			   const struct ib_send_wr *first_wr,
+			   int sqecount, int ret)
+{
+	trace_svcrdma_sq_post_err(rdma, cid, ret);
+	svc_xprt_deferred_close(&rdma->sc_xprt);
+
+	/* If even one WR was posted, a Send completion will
+	 * return the reserved SQ slots.
+	 */
+	if (bad_wr != first_wr)
+		return 0;
+
+	svc_rdma_wake_send_waiters(rdma, sqecount);
+	return -ENOTCONN;
+}
+
 /**
  * svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
  * @cq: Completion Queue context
@@ -336,11 +447,6 @@ flushed:
  * that these values remain available after the ib_post_send() call.
  * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
  *
- * Note there is potential for starvation when the Send Queue is
- * full because there is no order to when waiting threads are
- * awoken. The transport is typically provisioned with a deep
- * enough Send Queue that SQ exhaustion should be a rare event.
- *
  * Return values:
  *   %0: @ctxt's WR chain was posted successfully
  *   %-ENOTCONN: The connection was lost
@@ -362,42 +468,16 @@ int svc_rdma_post_send(struct svcxprt_rdma *rdma,
 				      send_wr->sg_list[0].length,
 				      DMA_TO_DEVICE);
 
-	/* If the SQ is full, wait until an SQ entry is available */
-	while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
-		if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
-			svc_rdma_wake_send_waiters(rdma, sqecount);
-
-			/* When the transport is torn down, assume
-			 * ib_drain_sq() will trigger enough Send
-			 * completions to wake us. The XPT_CLOSE test
-			 * above should then cause the while loop to
-			 * exit.
-			 */
-			percpu_counter_inc(&svcrdma_stat_sq_starve);
-			trace_svcrdma_sq_full(rdma, &cid);
-			wait_event(rdma->sc_send_wait,
-				   atomic_read(&rdma->sc_sq_avail) > 0);
-			trace_svcrdma_sq_retry(rdma, &cid);
-			continue;
-		}
-
-		trace_svcrdma_post_send(ctxt);
-		ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
-		if (ret) {
-			trace_svcrdma_sq_post_err(rdma, &cid, ret);
-			svc_xprt_deferred_close(&rdma->sc_xprt);
-
-			/* If even one WR was posted, there will be a
-			 * Send completion that bumps sc_sq_avail.
-			 */
-			if (bad_wr == first_wr) {
-				svc_rdma_wake_send_waiters(rdma, sqecount);
-				break;
-			}
-		}
-		return 0;
-	}
-	return -ENOTCONN;
+	ret = svc_rdma_sq_wait(rdma, &cid, sqecount);
+	if (ret < 0)
+		return ret;
+
+	trace_svcrdma_post_send(ctxt);
+	ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+	if (ret)
+		return svc_rdma_post_send_err(rdma, &cid, bad_wr,
+					      first_wr, sqecount, ret);
+	return 0;
 }
 
 /**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index f2d72181a6fe..f18bc60d9f4f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -179,6 +179,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
 	init_llist_head(&cma_xprt->sc_recv_ctxts);
 	init_llist_head(&cma_xprt->sc_rw_ctxts);
 	init_waitqueue_head(&cma_xprt->sc_send_wait);
+	init_waitqueue_head(&cma_xprt->sc_sq_ticket_wait);
 
 	spin_lock_init(&cma_xprt->sc_lock);
 	spin_lock_init(&cma_xprt->sc_rq_dto_lock);
@@ -477,6 +478,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 	if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
 		newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
 	atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
+	atomic_set(&newxprt->sc_sq_ticket_head, 0);
+	atomic_set(&newxprt->sc_sq_ticket_tail, 0);
 
 	newxprt->sc_pd = ib_alloc_pd(dev, 0);
 	if (IS_ERR(newxprt->sc_pd)) {
@@ -649,7 +652,8 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 	 * If there are already waiters on the SQ,
 	 * return false.
 	 */
-	if (waitqueue_active(&rdma->sc_send_wait))
+	if (waitqueue_active(&rdma->sc_send_wait) ||
+	    waitqueue_active(&rdma->sc_sq_ticket_wait))
 		return 0;
 
 	/* Otherwise return true. */
-- 
cgit v1.2.3


From d16f060f3ee297424c0aba047b1d49208adb9318 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 27 Feb 2026 09:03:31 -0500
Subject: svcrdma: Add Write chunk WRs to the RPC's Send WR chain

Previously, Write chunk RDMA Writes were posted via a separate
ib_post_send() call with their own completion handler. Each Write
chunk incurred a doorbell and generated a completion event.

Link Write chunk WRs onto the RPC Reply's Send WR chain so that a
single ib_post_send() call posts both the RDMA Writes and the Send
WR. A single completion event signals that all operations have
finished. This reduces both doorbell rate and completion rate, as
well as eliminating the latency of a round-trip between the Write
chunk completion and the subsequent Send WR posting.

The lifecycle of Write chunk resources changes: previously, the
svc_rdma_write_done() completion handler released Write chunk
resources when RDMA Writes completed. With WR chaining, resources
remain live until the Send completion. A new sc_write_info_list
tracks Write chunk metadata attached to each Send context, and
svc_rdma_write_chunk_release() frees these resources when the
Send context is released.

The svc_rdma_write_done() handler now handles only error cases.
On success it returns immediately since the Send completion handles
resource release. On failure (WR flush), it closes the connection
to signal to the client that the RPC Reply is incomplete.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h       | 13 +++--
 net/sunrpc/xprtrdma/svc_rdma_rw.c     | 94 +++++++++++++++++++++++++++--------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 +++-
 3 files changed, 91 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 658b8498177e..df6e08aaad57 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -216,6 +216,7 @@ struct svc_rdma_recv_ctxt {
  */
 struct svc_rdma_write_info {
 	struct svcxprt_rdma	*wi_rdma;
+	struct list_head	wi_list;
 
 	const struct svc_rdma_chunk	*wi_chunk;
 
@@ -244,7 +245,10 @@ struct svc_rdma_send_ctxt {
 	struct ib_cqe		sc_cqe;
 	struct xdr_buf		sc_hdrbuf;
 	struct xdr_stream	sc_stream;
+
+	struct list_head	sc_write_info_list;
 	struct svc_rdma_write_info sc_reply_info;
+
 	void			*sc_xprt_buf;
 	int			sc_page_count;
 	int			sc_cur_sge_no;
@@ -277,11 +281,14 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
 extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
 				struct svc_rdma_chunk_ctxt *cc,
 				enum dma_data_direction dir);
+extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+					 struct svc_rdma_send_ctxt *ctxt);
 extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
 					 struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
-				    const struct svc_rdma_recv_ctxt *rctxt,
-				    const struct xdr_buf *xdr);
+extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+				       const struct svc_rdma_recv_ctxt *rctxt,
+				       struct svc_rdma_send_ctxt *sctxt,
+				       const struct xdr_buf *xdr);
 extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
 					const struct svc_rdma_pcl *write_pcl,
 					const struct svc_rdma_pcl *reply_pcl,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 97bce806974b..ebc90c12c835 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -251,6 +251,28 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
 	queue_work(svcrdma_wq, &info->wi_work);
 }
 
+/**
+ * svc_rdma_write_chunk_release - Release Write chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ *
+ * Write chunk resources remain live until Send completion because
+ * Write WRs are chained to the Send WR. This function releases all
+ * write_info structures accumulated on @ctxt->sc_write_info_list.
+ */
+void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+				  struct svc_rdma_send_ctxt *ctxt)
+{
+	struct svc_rdma_write_info *info;
+
+	while (!list_empty(&ctxt->sc_write_info_list)) {
+		info = list_first_entry(&ctxt->sc_write_info_list,
+					struct svc_rdma_write_info, wi_list);
+		list_del(&info->wi_list);
+		svc_rdma_write_info_free(info);
+	}
+}
+
 /**
  * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
  * @rdma: controlling transport
@@ -307,13 +329,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct ib_cqe *cqe = wc->wr_cqe;
 	struct svc_rdma_chunk_ctxt *cc =
 			container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
-	struct svc_rdma_write_info *info =
-			container_of(cc, struct svc_rdma_write_info, wi_cc);
 
 	switch (wc->status) {
 	case IB_WC_SUCCESS:
 		trace_svcrdma_wc_write(&cc->cc_cid);
-		break;
+		return;
 	case IB_WC_WR_FLUSH_ERR:
 		trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
 		break;
@@ -321,12 +341,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
 		trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
 	}
 
-	svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
-
-	if (unlikely(wc->status != IB_WC_SUCCESS))
-		svc_xprt_deferred_close(&rdma->sc_xprt);
-
-	svc_rdma_write_info_free(info);
+	/* The RDMA Write has flushed, so the client won't get
+	 * some of the outgoing RPC message. Signal the loss
+	 * to the client by closing the connection.
+	 */
+	svc_xprt_deferred_close(&rdma->sc_xprt);
 }
 
 /**
@@ -600,13 +619,27 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
 	return xdr->len;
 }
 
-static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
-				     const struct svc_rdma_chunk *chunk,
-				     const struct xdr_buf *xdr)
+/*
+ * svc_rdma_prepare_write_chunk - Link Write WRs for @chunk onto @sctxt's chain
+ *
+ * Write WRs are prepended to the Send WR chain so that a single
+ * ib_post_send() posts both RDMA Writes and the final Send. Only
+ * the first WR in each chunk gets a CQE for error detection;
+ * subsequent WRs complete without individual completion events.
+ * The Send WR's signaled completion indicates all chained
+ * operations have finished.
+ */
+static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
+					struct svc_rdma_send_ctxt *sctxt,
+					const struct svc_rdma_chunk *chunk,
+					const struct xdr_buf *xdr)
 {
 	struct svc_rdma_write_info *info;
 	struct svc_rdma_chunk_ctxt *cc;
+	struct ib_send_wr *first_wr;
 	struct xdr_buf payload;
+	struct list_head *pos;
+	struct ib_cqe *cqe;
 	int ret;
 
 	if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
@@ -622,10 +655,25 @@ static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
 	if (ret != payload.len)
 		goto out_err;
 
-	trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
-	ret = svc_rdma_post_chunk_ctxt(rdma, cc);
-	if (ret < 0)
+	ret = -EINVAL;
+	if (unlikely(sctxt->sc_sqecount + cc->cc_sqecount > rdma->sc_sq_depth))
 		goto out_err;
+
+	first_wr = sctxt->sc_wr_chain;
+	cqe = &cc->cc_cqe;
+	list_for_each(pos, &cc->cc_rwctxts) {
+		struct svc_rdma_rw_ctxt *rwc;
+
+		rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+		first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+					   rdma->sc_port_num, cqe, first_wr);
+		cqe = NULL;
+	}
+	sctxt->sc_wr_chain = first_wr;
+	sctxt->sc_sqecount += cc->cc_sqecount;
+	list_add(&info->wi_list, &sctxt->sc_write_info_list);
+
+	trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
 	return 0;
 
 out_err:
@@ -634,17 +682,19 @@ out_err:
 }
 
 /**
- * svc_rdma_send_write_list - Send all chunks on the Write list
+ * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
  * @rdma: controlling RDMA transport
  * @rctxt: Write list provisioned by the client
+ * @sctxt: Send WR resources
  * @xdr: xdr_buf containing an RPC Reply message
  *
- * Returns zero on success, or a negative errno if one or more
- * Write chunks could not be sent.
+ * Returns zero on success, or a negative errno if WR chain
+ * construction fails for one or more Write chunks.
  */
-int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
-			     const struct svc_rdma_recv_ctxt *rctxt,
-			     const struct xdr_buf *xdr)
+int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+				const struct svc_rdma_recv_ctxt *rctxt,
+				struct svc_rdma_send_ctxt *sctxt,
+				const struct xdr_buf *xdr)
 {
 	struct svc_rdma_chunk *chunk;
 	int ret;
@@ -652,7 +702,7 @@ int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
 	pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
 		if (!chunk->ch_payload_length)
 			break;
-		ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
+		ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index bef68efa7034..8b3f0c8c14b2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -150,6 +150,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
 	ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
 	ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
 	ctxt->sc_cqe.done = svc_rdma_wc_send;
+	INIT_LIST_HEAD(&ctxt->sc_write_info_list);
 	ctxt->sc_xprt_buf = buffer;
 	xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
 		     rdma->sc_max_req_size);
@@ -237,6 +238,7 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
 	struct ib_device *device = rdma->sc_cm_id->device;
 	unsigned int i;
 
+	svc_rdma_write_chunk_release(rdma, ctxt);
 	svc_rdma_reply_chunk_release(rdma, ctxt);
 
 	if (ctxt->sc_page_count)
@@ -1054,6 +1056,12 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
 	sctxt->sc_send_wr.num_sge = 1;
 	sctxt->sc_send_wr.opcode = IB_WR_SEND;
 	sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
+
+	/* Ensure only the error message is posted, not any previously
+	 * prepared Write chunk WRs.
+	 */
+	sctxt->sc_wr_chain = &sctxt->sc_send_wr;
+	sctxt->sc_sqecount = 1;
 	if (svc_rdma_post_send(rdma, sctxt))
 		goto put_ctxt;
 	return;
@@ -1101,7 +1109,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
 	if (!p)
 		goto put_ctxt;
 
-	ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
+	ret = svc_rdma_prepare_write_list(rdma, rctxt, sctxt, &rqstp->rq_res);
 	if (ret < 0)
 		goto put_ctxt;
 
-- 
cgit v1.2.3


From 3603bf99062c6d563df4fba3848f829d5401d959 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 28 Feb 2026 14:09:22 -0800
Subject: SUNRPC: xdr.h: fix all kernel-doc warnings

Correct a function parameter name (s/page/folio/) and add function
return value sections for multiple functions to eliminate
kernel-doc warnings:

Warning: include/linux/sunrpc/xdr.h:298 function parameter 'folio' not
 described in 'xdr_set_scratch_folio'
Warning: include/linux/sunrpc/xdr.h:337 No description found for return
 value of 'xdr_stream_remaining'
Warning: include/linux/sunrpc/xdr.h:357 No description found for return
 value of 'xdr_align_size'
Warning: include/linux/sunrpc/xdr.h:374 No description found for return
 value of 'xdr_pad_size'
Warning: include/linux/sunrpc/xdr.h:387 No description found for return
 value of 'xdr_stream_encode_item_present'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/xdr.h | 48 +++++++++++++++++++++++-----------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 152597750f55..b639a6fafcbc 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -290,7 +290,7 @@ xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
 /**
  * xdr_set_scratch_folio - Attach a scratch buffer for decoding data
  * @xdr: pointer to xdr_stream struct
- * @page: an anonymous folio
+ * @folio: an anonymous folio
  *
  * See xdr_set_scratch_buffer().
  */
@@ -330,7 +330,7 @@ static inline void xdr_commit_encode(struct xdr_stream *xdr)
  * xdr_stream_remaining - Return the number of bytes remaining in the stream
  * @xdr: pointer to struct xdr_stream
  *
- * Return value:
+ * Returns:
  *   Number of bytes remaining in @xdr before xdr->end
  */
 static inline size_t
@@ -350,7 +350,7 @@ ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor,
  * xdr_align_size - Calculate padded size of an object
  * @n: Size of an object being XDR encoded (in bytes)
  *
- * Return value:
+ * Returns:
  *   Size (in bytes) of the object including xdr padding
  */
 static inline size_t
@@ -368,7 +368,7 @@ xdr_align_size(size_t n)
  * This implementation avoids the need for conditional
  * branches or modulo division.
  *
- * Return value:
+ * Returns:
  *   Size (in bytes) of the needed XDR pad
  */
 static inline size_t xdr_pad_size(size_t n)
@@ -380,7 +380,7 @@ static inline size_t xdr_pad_size(size_t n)
  * xdr_stream_encode_item_present - Encode a "present" list item
  * @xdr: pointer to xdr_stream
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -399,7 +399,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
  * xdr_stream_encode_item_absent - Encode a "not present" list item
  * @xdr: pointer to xdr_stream
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -419,7 +419,7 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
  * @p: address in a buffer into which to encode
  * @n: boolean value to encode
  *
- * Return value:
+ * Returns:
  *   Address of item following the encoded boolean
  */
 static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
@@ -433,7 +433,7 @@ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
  * @xdr: pointer to xdr_stream
  * @n: boolean value to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -453,7 +453,7 @@ static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n)
  * @xdr: pointer to xdr_stream
  * @n: integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -474,7 +474,7 @@ xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n)
  * @xdr: pointer to xdr_stream
  * @n: integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -495,7 +495,7 @@ xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n)
  * @xdr: pointer to xdr_stream
  * @n: 64-bit integer to encode
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -517,7 +517,7 @@ xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n)
  * @ptr: pointer to void pointer
  * @len: size of object
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -542,7 +542,7 @@ xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len)
  * @ptr: pointer to opaque data object
  * @len: size of object pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -563,7 +563,7 @@ xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t l
  * @ptr: pointer to opaque data object
  * @len: size of object pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -585,7 +585,7 @@ xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len)
  * @array: array of integers
  * @array_size: number of elements in @array
  *
- * Return values:
+ * Returns:
  *   On success, returns length in bytes of XDR buffer consumed
  *   %-EMSGSIZE on XDR buffer overflow
  */
@@ -608,7 +608,7 @@ xdr_stream_encode_uint32_array(struct xdr_stream *xdr,
  * xdr_item_is_absent - symbolically handle XDR discriminators
  * @p: pointer to undecoded discriminator
  *
- * Return values:
+ * Returns:
  *   %true if the following XDR item is absent
  *   %false if the following XDR item is present
  */
@@ -621,7 +621,7 @@ static inline bool xdr_item_is_absent(const __be32 *p)
  * xdr_item_is_present - symbolically handle XDR discriminators
  * @p: pointer to undecoded discriminator
  *
- * Return values:
+ * Returns:
  *   %true if the following XDR item is present
  *   %false if the following XDR item is absent
  */
@@ -635,7 +635,7 @@ static inline bool xdr_item_is_present(const __be32 *p)
  * @xdr: pointer to xdr_stream
  * @ptr: pointer to a u32 in which to store the result
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -656,7 +656,7 @@ xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -677,7 +677,7 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -698,7 +698,7 @@ xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr)
  * @xdr: pointer to xdr_stream
  * @ptr: location to store 64-bit integer
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -720,7 +720,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
  * @ptr: location to store data
  * @len: size of buffer pointed to by @ptr
  *
- * Return values:
+ * Returns:
  *   %0 on success
  *   %-EBADMSG on XDR buffer overflow
  */
@@ -746,7 +746,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
  * on @xdr. It is therefore expected that the object it points to should
  * be processed immediately.
  *
- * Return values:
+ * Returns:
  *   On success, returns size of object stored in *@ptr
  *   %-EBADMSG on XDR buffer overflow
  *   %-EMSGSIZE if the size of the object would exceed @maxlen
@@ -777,7 +777,7 @@ xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxle
  * @array: location to store the integer array or NULL
  * @array_size: number of elements to store
  *
- * Return values:
+ * Returns:
  *   On success, returns number of elements stored in @array
  *   %-EBADMSG on XDR buffer overflow
  *   %-EMSGSIZE if the size of the array exceeds @array_size
-- 
cgit v1.2.3


From 4e2866b2baaddfff6069a2f18fc134c1d5a08f2b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 11 Mar 2026 12:18:54 -0400
Subject: SUNRPC: Add svc_rqst_page_release() helper

svc_rqst_replace_page() releases displaced pages through a
per-rqst folio batch, but exposes the add-or-flush sequence
directly. svc_tcp_restore_pages() releases displaced pages
individually with put_page().

Introduce svc_rqst_page_release() to encapsulate the
batched release mechanism. Convert svc_rqst_replace_page()
and svc_tcp_restore_pages() to use it. The latter now
benefits from the same batched release that
svc_rqst_replace_page() already uses.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc.h | 15 +++++++++++++++
 net/sunrpc/svc.c           |  7 ++-----
 net/sunrpc/svcsock.c       |  2 +-
 3 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 669c944eaf7f..1ebd9c7efa70 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -498,6 +498,21 @@ int		   svc_generic_rpcbind_set(struct net *net,
 
 #define	RPC_MAX_ADDRBUFLEN	(63U)
 
+/**
+ * svc_rqst_page_release - release a page associated with an RPC transaction
+ * @rqstp: RPC transaction context
+ * @page: page to release
+ *
+ * Released pages are batched and freed together, reducing
+ * allocator pressure under heavy RPC workloads.
+ */
+static inline void svc_rqst_page_release(struct svc_rqst *rqstp,
+					 struct page *page)
+{
+	if (!folio_batch_add(&rqstp->rq_fbatch, page_folio(page)))
+		__folio_batch_release(&rqstp->rq_fbatch);
+}
+
 /*
  * When we want to reduce the size of the reserved space in the response
  * buffer, we need to take into account the size of any checksum data that
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 5e0b5ec2fd52..576fa42e7abf 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -976,11 +976,8 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
 		return false;
 	}
 
-	if (*rqstp->rq_next_page) {
-		if (!folio_batch_add(&rqstp->rq_fbatch,
-				page_folio(*rqstp->rq_next_page)))
-			__folio_batch_release(&rqstp->rq_fbatch);
-	}
+	if (*rqstp->rq_next_page)
+		svc_rqst_page_release(rqstp, *rqstp->rq_next_page);
 
 	get_page(page);
 	*(rqstp->rq_next_page++) = page;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2ce43f9995f1..7be3de1a1aed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -988,7 +988,7 @@ static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
 	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	for (i = 0; i < npages; i++) {
 		if (rqstp->rq_pages[i] != NULL)
-			put_page(rqstp->rq_pages[i]);
+			svc_rqst_page_release(rqstp, rqstp->rq_pages[i]);
 		BUG_ON(svsk->sk_pages[i] == NULL);
 		rqstp->rq_pages[i] = svsk->sk_pages[i];
 		svsk->sk_pages[i] = NULL;
-- 
cgit v1.2.3