summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/nfs/pnfs-block-server.rst30
-rw-r--r--Documentation/admin-guide/nfs/pnfs-scsi-server.rst31
-rw-r--r--Documentation/filesystems/locking.rst2
-rw-r--r--Documentation/filesystems/nfs/exporting.rst85
-rw-r--r--Documentation/netlink/specs/nfsd.yaml6
-rw-r--r--Documentation/sunrpc/xdr/nlm4.x211
-rw-r--r--fs/lockd/Makefile30
-rw-r--r--fs/lockd/clnt4xdr.c5
-rw-r--r--fs/lockd/clntlock.c2
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/clntxdr.c3
-rw-r--r--fs/lockd/host.c31
-rw-r--r--fs/lockd/lockd.h (renamed from include/linux/lockd/lockd.h)101
-rw-r--r--fs/lockd/mon.c2
-rw-r--r--fs/lockd/nlm.h (renamed from include/linux/lockd/nlm.h)8
-rw-r--r--fs/lockd/nlm4xdr_gen.c724
-rw-r--r--fs/lockd/nlm4xdr_gen.h32
-rw-r--r--fs/lockd/share.h (renamed from include/linux/lockd/share.h)19
-rw-r--r--fs/lockd/svc.c50
-rw-r--r--fs/lockd/svc4proc.c1718
-rw-r--r--fs/lockd/svclock.c21
-rw-r--r--fs/lockd/svcproc.c126
-rw-r--r--fs/lockd/svcshare.c40
-rw-r--r--fs/lockd/svcsubs.c32
-rw-r--r--fs/lockd/trace.h3
-rw-r--r--fs/lockd/xdr.c6
-rw-r--r--fs/lockd/xdr.h (renamed from include/linux/lockd/xdr.h)15
-rw-r--r--fs/lockd/xdr4.c347
-rw-r--r--fs/locks.c26
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/sysfs.c4
-rw-r--r--fs/nfsd/Kconfig12
-rw-r--r--fs/nfsd/blocklayout.c102
-rw-r--r--fs/nfsd/debugfs.c4
-rw-r--r--fs/nfsd/export.c5
-rw-r--r--fs/nfsd/lockd.c50
-rw-r--r--fs/nfsd/netlink.c5
-rw-r--r--fs/nfsd/netns.h7
-rw-r--r--fs/nfsd/nfs3xdr.c4
-rw-r--r--fs/nfsd/nfs4callback.c113
-rw-r--r--fs/nfsd/nfs4layouts.c152
-rw-r--r--fs/nfsd/nfs4proc.c3
-rw-r--r--fs/nfsd/nfs4state.c113
-rw-r--r--fs/nfsd/nfs4xdr.c26
-rw-r--r--fs/nfsd/nfscache.c3
-rw-r--r--fs/nfsd/nfsctl.c45
-rw-r--r--fs/nfsd/nfsd.h6
-rw-r--r--fs/nfsd/nfsfh.c83
-rw-r--r--fs/nfsd/nfssvc.c10
-rw-r--r--fs/nfsd/nfsxdr.c2
-rw-r--r--fs/nfsd/pnfs.h5
-rw-r--r--fs/nfsd/state.h32
-rw-r--r--fs/nfsd/trace.h23
-rw-r--r--include/linux/filelock.h1
-rw-r--r--include/linux/lockd/bind.h26
-rw-r--r--include/linux/lockd/debug.h40
-rw-r--r--include/linux/lockd/xdr4.h43
-rw-r--r--include/linux/sunrpc/cache.h7
-rw-r--r--include/linux/sunrpc/debug.h10
-rw-r--r--include/linux/sunrpc/sched.h3
-rw-r--r--include/linux/sunrpc/svc.h82
-rw-r--r--include/linux/sunrpc/svc_rdma.h23
-rw-r--r--include/linux/sunrpc/xdr.h48
-rw-r--r--include/linux/sunrpc/xdrgen/nlm4.h233
-rw-r--r--include/trace/events/sunrpc.h4
-rw-r--r--include/uapi/linux/nfsd/export.h4
-rw-r--r--include/uapi/linux/nfsd_netlink.h1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c93
-rw-r--r--net/sunrpc/cache.c249
-rw-r--r--net/sunrpc/svc.c68
-rw-r--r--net/sunrpc/svc_xprt.c47
-rw-r--r--net/sunrpc/svcsock.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c28
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c374
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c196
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c33
78 files changed, 4421 insertions, 1724 deletions
diff --git a/Documentation/admin-guide/nfs/pnfs-block-server.rst b/Documentation/admin-guide/nfs/pnfs-block-server.rst
index 20fe9f5117fe..7667dd2e17f1 100644
--- a/Documentation/admin-guide/nfs/pnfs-block-server.rst
+++ b/Documentation/admin-guide/nfs/pnfs-block-server.rst
@@ -40,3 +40,33 @@ how to translate the device into a serial number from SCSI EVPD 0x80::
echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
EOF
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+ FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+ where:
+
+ - IP_address: refers to the IP address of the affected client.
+ - #n: indicates the unique client identifier.
+ - dev_name: specifies the name of the block device related
+ to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+ - shutdown or power off the client being fenced.
+ - manually expire the client to release all its state on the server::
+
+ echo 'expire' > /proc/fs/nfsd/clients/clid/ctl
+
+ where:
+
+ - clid: is the unique client identifier displayed in the system log.
diff --git a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
index b2eec2288329..b202508d281d 100644
--- a/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
+++ b/Documentation/admin-guide/nfs/pnfs-scsi-server.rst
@@ -22,3 +22,34 @@ option and the underlying SCSI device support persistent reservations.
On the client make sure the kernel has the CONFIG_PNFS_BLOCK option
enabled, and the file system is mounted using the NFSv4.1 protocol
version (mount -o vers=4.1).
+
+If the nfsd server needs to fence a non-responding client and the
+fencing operation fails, the server logs a warning message in the
+system log with the following format:
+
+ FENCE failed client[IP_address] clid[#n] device[dev_name]
+
+ where:
+
+ - IP_address: refers to the IP address of the affected client.
+ - #n: indicates the unique client identifier.
+ - dev_name: specifies the name of the block device related
+ to the fencing attempt.
+
+The server will repeatedly retry the operation indefinitely. During
+this time, access to the affected file is restricted for all other
+clients. This is to prevent potential data corruption if multiple
+clients access the same file simultaneously.
+
+To restore access to the affected file for other clients, the admin
+needs to take the following actions:
+
+ - shutdown or power off the client being fenced.
+ - manually expire the client to release all its state on the server::
+
+ echo 'expire' > /proc/fs/nfsd/clients/clid/ctl
+
+ where:
+
+ - clid: is the unique client identifier displayed in the system log.
+
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 8025df6e6499..8421ea21bd35 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -398,6 +398,7 @@ prototypes::
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *);
void (*lm_expire_lock)(void);
+ bool (*lm_breaker_timedout)(struct file_lease *);
locking rules:
@@ -412,6 +413,7 @@ lm_breaker_owns_lease: yes no no
lm_lock_expirable yes no no
lm_expire_lock no no yes
lm_open_conflict yes no no
+lm_breaker_timedout yes no no
====================== ============= ================= =========
buffer_head
diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index a01d9b9b5bc3..4aa59b0bf253 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -206,3 +206,88 @@ following flags are defined:
all of an inode's dirty data on last close. Exports that behave this
way should set EXPORT_OP_FLUSH_ON_CLOSE so that NFSD knows to skip
waiting for writeback when closing such files.
+
+Signed Filehandles
+------------------
+
+To protect against filehandle guessing attacks, the Linux NFS server can be
+configured to sign filehandles with a Message Authentication Code (MAC).
+
+Standard NFS filehandles are often predictable. If an attacker can guess
+a valid filehandle for a file they do not have permission to access via
+directory traversal, they may be able to bypass path-based permissions
+(though they still remain subject to inode-level permissions).
+
+Signed filehandles prevent this by appending a MAC to the filehandle
+before it is sent to the client. Upon receiving a filehandle back from a
+client, the server re-calculates the MAC using its internal key and
+verifies it against the one provided. If the signatures do not match,
+the server treats the filehandle as invalid (returning NFS[34]ERR_STALE).
+
+Note that signing filehandles provides integrity and authenticity but
+not confidentiality. The contents of the filehandle remain visible to
+the client; they simply cannot be forged or modified.
+
+Configuration
+~~~~~~~~~~~~~
+
+To enable signed filehandles, the administrator must provide a signing
+key to the kernel and enable the "sign_fh" export option.
+
+1. Providing a Key
+ The signing key is managed via the nfsd netlink interface. This key
+ is per-network-namespace and must be set before any exports using
+ "sign_fh" become active.
+
+2. Export Options
+ The feature is controlled on a per-export basis in /etc/exports:
+
+ sign_fh
+ Enables signing for all filehandles generated under this export.
+
+ no_sign_fh
+ (Default) Disables signing.
+
+Key Management and Rotation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The security of this mechanism relies entirely on the secrecy of the
+signing key.
+
+Initial Setup:
+ The key should be generated using a high-quality random source and
+ loaded early in the boot process or during the nfs-server startup
+ sequence.
+
+Changing Keys:
+ If a key is changed while clients have active mounts, existing
+ filehandles held by those clients will become invalid, resulting in
+ "Stale file handle" errors on the client side.
+
+Safe Rotation:
+ Currently, there is no mechanism for "graceful" key rotation
+ (maintaining multiple valid keys). Changing the key is an atomic
+ operation that immediately invalidates all previous signatures.
+
+Transitioning Exports
+~~~~~~~~~~~~~~~~~~~~~
+
+When adding or removing the "sign_fh" flag from an active export, the
+following behaviors should be expected:
+
++-------------------+---------------------------------------------------+
+| Change | Result for Existing Clients |
++===================+===================================================+
+| Adding sign_fh | Clients holding unsigned filehandles will find |
+| | them rejected, as the server now expects a |
+| | signature. |
++-------------------+---------------------------------------------------+
+| Removing sign_fh | Clients holding signed filehandles will find them |
+| | rejected, as the server now expects the |
+| | filehandle to end at its traditional boundary |
+| | without a MAC. |
++-------------------+---------------------------------------------------+
+
+Because filehandles are often cached persistently by clients, adding or
+removing this option should generally be done during a scheduled maintenance
+window involving a NFS client unmount/remount.
diff --git a/Documentation/netlink/specs/nfsd.yaml b/Documentation/netlink/specs/nfsd.yaml
index f87b5a05e5e9..8ab43c8253b2 100644
--- a/Documentation/netlink/specs/nfsd.yaml
+++ b/Documentation/netlink/specs/nfsd.yaml
@@ -81,6 +81,11 @@ attribute-sets:
-
name: min-threads
type: u32
+ -
+ name: fh-key
+ type: binary
+ checks:
+ exact-len: 16
-
name: version
attributes:
@@ -163,6 +168,7 @@ operations:
- leasetime
- scope
- min-threads
+ - fh-key
-
name: threads-get
doc: get the maximum number of running threads
diff --git a/Documentation/sunrpc/xdr/nlm4.x b/Documentation/sunrpc/xdr/nlm4.x
new file mode 100644
index 000000000000..0c44a80ef674
--- /dev/null
+++ b/Documentation/sunrpc/xdr/nlm4.x
@@ -0,0 +1,211 @@
+/*
+ * This file was extracted by hand from
+ * https://www.rfc-editor.org/rfc/rfc1813.html .
+ *
+ * Note that RFC 1813 is Informational. Its official date of
+ * publication (June 1995) is before the IETF required its RFCs to
+ * carry an explicit copyright or other IP ownership notices.
+ *
+ * Note also that RFC 1813 does not specify the whole NLM4 protocol.
+ * In particular, the argument and result types are not present in
+ * that document, and had to be reverse-engineered.
+ */
+
+/*
+ * The NLMv4 protocol
+ */
+
+pragma header nlm4;
+
+/*
+ * The following definitions are missing in RFC 1813,
+ * but can be found in the OpenNetworking Network Lock
+ * Manager protocol:
+ *
+ * https://pubs.opengroup.org/onlinepubs/9629799/chap10.htm
+ */
+
+const LM_MAXSTRLEN = 1024;
+
+const LM_MAXNAMELEN = 1025;
+
+const MAXNETOBJ_SZ = 1024;
+
+typedef opaque netobj<MAXNETOBJ_SZ>;
+
+enum fsh4_mode {
+ fsm_DN = 0, /* deny none */
+ fsm_DR = 1, /* deny read */
+ fsm_DW = 2, /* deny write */
+ fsm_DRW = 3 /* deny read/write */
+};
+
+enum fsh4_access {
+ fsa_NONE = 0, /* for completeness */
+ fsa_R = 1, /* read-only */
+ fsa_W = 2, /* write-only */
+ fsa_RW = 3 /* read/write */
+};
+
+/*
+ * The following definitions come from the OpenNetworking
+ * Network Status Monitor protocol:
+ *
+ * https://pubs.opengroup.org/onlinepubs/9629799/chap11.htm
+ */
+
+const SM_MAXSTRLEN = 1024;
+
+/*
+ * The NLM protocol as extracted from:
+ * https://tools.ietf.org/html/rfc1813 Appendix II
+ */
+
+typedef unsigned hyper uint64;
+
+typedef hyper int64;
+
+typedef unsigned long uint32;
+
+typedef long int32;
+
+enum nlm4_stats {
+ NLM4_GRANTED = 0,
+ NLM4_DENIED = 1,
+ NLM4_DENIED_NOLOCKS = 2,
+ NLM4_BLOCKED = 3,
+ NLM4_DENIED_GRACE_PERIOD = 4,
+ NLM4_DEADLCK = 5,
+ NLM4_ROFS = 6,
+ NLM4_STALE_FH = 7,
+ NLM4_FBIG = 8,
+ NLM4_FAILED = 9
+};
+
+pragma big_endian nlm4_stats;
+
+struct nlm4_holder {
+ bool exclusive;
+ int32 svid;
+ netobj oh;
+ uint64 l_offset;
+ uint64 l_len;
+};
+
+union nlm4_testrply switch (nlm4_stats stat) {
+ case NLM4_DENIED:
+ nlm4_holder holder;
+ default:
+ void;
+};
+
+struct nlm4_stat {
+ nlm4_stats stat;
+};
+
+struct nlm4_res {
+ netobj cookie;
+ nlm4_stat stat;
+};
+
+struct nlm4_testres {
+ netobj cookie;
+ nlm4_testrply stat;
+};
+
+struct nlm4_lock {
+ string caller_name<LM_MAXSTRLEN>;
+ netobj fh;
+ netobj oh;
+ int32 svid;
+ uint64 l_offset;
+ uint64 l_len;
+};
+
+struct nlm4_lockargs {
+ netobj cookie;
+ bool block;
+ bool exclusive;
+ nlm4_lock alock;
+ bool reclaim;
+ int32 state;
+};
+
+struct nlm4_cancargs {
+ netobj cookie;
+ bool block;
+ bool exclusive;
+ nlm4_lock alock;
+};
+
+struct nlm4_testargs {
+ netobj cookie;
+ bool exclusive;
+ nlm4_lock alock;
+};
+
+struct nlm4_unlockargs {
+ netobj cookie;
+ nlm4_lock alock;
+};
+
+struct nlm4_share {
+ string caller_name<LM_MAXSTRLEN>;
+ netobj fh;
+ netobj oh;
+ fsh4_mode mode;
+ fsh4_access access;
+};
+
+struct nlm4_shareargs {
+ netobj cookie;
+ nlm4_share share;
+ bool reclaim;
+};
+
+struct nlm4_shareres {
+ netobj cookie;
+ nlm4_stats stat;
+ int32 sequence;
+};
+
+struct nlm4_notify {
+ string name<LM_MAXNAMELEN>;
+ int32 state;
+};
+
+/*
+ * Argument for the Linux-private SM_NOTIFY procedure
+ */
+const SM_PRIV_SIZE = 16;
+
+struct nlm4_notifyargs {
+ nlm4_notify notify;
+ opaque private[SM_PRIV_SIZE];
+};
+
+program NLM4_PROG {
+ version NLM4_VERS {
+ void NLMPROC4_NULL(void) = 0;
+ nlm4_testres NLMPROC4_TEST(nlm4_testargs) = 1;
+ nlm4_res NLMPROC4_LOCK(nlm4_lockargs) = 2;
+ nlm4_res NLMPROC4_CANCEL(nlm4_cancargs) = 3;
+ nlm4_res NLMPROC4_UNLOCK(nlm4_unlockargs) = 4;
+ nlm4_res NLMPROC4_GRANTED(nlm4_testargs) = 5;
+ void NLMPROC4_TEST_MSG(nlm4_testargs) = 6;
+ void NLMPROC4_LOCK_MSG(nlm4_lockargs) = 7;
+ void NLMPROC4_CANCEL_MSG(nlm4_cancargs) = 8;
+ void NLMPROC4_UNLOCK_MSG(nlm4_unlockargs) = 9;
+ void NLMPROC4_GRANTED_MSG(nlm4_testargs) = 10;
+ void NLMPROC4_TEST_RES(nlm4_testres) = 11;
+ void NLMPROC4_LOCK_RES(nlm4_res) = 12;
+ void NLMPROC4_CANCEL_RES(nlm4_res) = 13;
+ void NLMPROC4_UNLOCK_RES(nlm4_res) = 14;
+ void NLMPROC4_GRANTED_RES(nlm4_res) = 15;
+ void NLMPROC4_SM_NOTIFY(nlm4_notifyargs) = 16;
+ nlm4_shareres NLMPROC4_SHARE(nlm4_shareargs) = 20;
+ nlm4_shareres NLMPROC4_UNSHARE(nlm4_shareargs) = 21;
+ nlm4_res NLMPROC4_NM_LOCK(nlm4_lockargs) = 22;
+ void NLMPROC4_FREE_ALL(nlm4_notify) = 23;
+ } = 4;
+} = 100021;
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 51bbe22d21e3..808f0f2a7be1 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -9,5 +9,33 @@ obj-$(CONFIG_LOCKD) += lockd.o
lockd-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
svcshare.o svcproc.o svcsubs.o mon.o trace.o xdr.o netlink.o
-lockd-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
+lockd-$(CONFIG_LOCKD_V4) += clnt4xdr.o svc4proc.o nlm4xdr_gen.o
lockd-$(CONFIG_PROC_FS) += procfs.o
+
+#
+# XDR code generation (requires Python and additional packages)
+#
+# The generated *xdr_gen.{h,c} files are checked into git. Normal kernel
+# builds do not require the xdrgen tool or its Python dependencies.
+#
+# Developers modifying .x files in Documentation/sunrpc/xdr/ should run
+# "make xdrgen" to regenerate the affected files.
+#
+.PHONY: xdrgen
+
+XDRGEN = ../../tools/net/sunrpc/xdrgen/xdrgen
+
+XDRGEN_DEFINITIONS = ../../include/linux/sunrpc/xdrgen/nlm4.h
+XDRGEN_DECLARATIONS = nlm4xdr_gen.h
+XDRGEN_SOURCE = nlm4xdr_gen.c
+
+xdrgen: $(XDRGEN_DEFINITIONS) $(XDRGEN_DECLARATIONS) $(XDRGEN_SOURCE)
+
+../../include/linux/sunrpc/xdrgen/nlm4.h: ../../Documentation/sunrpc/xdr/nlm4.x
+ $(XDRGEN) definitions $< > $@
+
+nlm4xdr_gen.h: ../../Documentation/sunrpc/xdr/nlm4.x
+ $(XDRGEN) declarations $< > $@
+
+nlm4xdr_gen.c: ../../Documentation/sunrpc/xdr/nlm4.x
+ $(XDRGEN) source --peer server $< > $@
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 527458db4525..2058733eacf8 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -13,7 +13,8 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
#include <uapi/linux/nfs3.h>
@@ -284,7 +285,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
fl->c.flc_type = exclusive != 0 ? F_WRLCK : F_RDLCK;
p = xdr_decode_hyper(p, &l_offset);
xdr_decode_hyper(p, &l_len);
- nlm4svc_set_file_lock_range(fl, l_offset, l_len);
+ lockd_set_file_lock_range4(fl, l_offset, l_len);
error = 0;
out:
return error;
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 85bc0f3e91df..8fa30c42c92a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -15,9 +15,9 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svc_xprt.h>
-#include <linux/lockd/lockd.h>
#include <linux/kthread.h>
+#include "lockd.h"
#include "trace.h"
#define NLMDBG_FACILITY NLMDBG_CLIENT
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index fb4d0752c9bb..7f211008a5d2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -18,8 +18,8 @@
#include <linux/freezer.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
+#include "lockd.h"
#include "trace.h"
#define NLMDBG_FACILITY NLMDBG_CLIENT
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 6ea3448d2d31..65555f5224b1 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -15,7 +15,8 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
#include <uapi/linux/nfs2.h>
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 1a9582a10a86..ea8a8e166f7e 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -16,13 +16,13 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
#include <linux/mutex.h>
#include <linux/sunrpc/svc_xprt.h>
#include <net/ipv6.h>
+#include "lockd.h"
#include "netns.h"
#define NLMDBG_FACILITY NLMDBG_HOSTCACHE
@@ -306,6 +306,35 @@ void nlmclnt_release_host(struct nlm_host *host)
}
}
+/* Callback for rpc_cancel_tasks() - matches all tasks for cancellation */
+static bool nlmclnt_match_all(const struct rpc_task *task, const void *data)
+{
+ return true;
+}
+
+/**
+ * nlmclnt_shutdown_rpc_clnt - safely shut down NLM client RPC operations
+ * @host: nlm_host to shut down
+ *
+ * Cancels outstanding RPC tasks and marks the client as shut down.
+ * Synchronizes with nlmclnt_release_host() via nlm_host_mutex to prevent
+ * races between shutdown and host destruction. Safe to call if h_rpcclnt
+ * is NULL or already shut down.
+ */
+void nlmclnt_shutdown_rpc_clnt(struct nlm_host *host)
+{
+ struct rpc_clnt *clnt;
+
+ mutex_lock(&nlm_host_mutex);
+ clnt = host->h_rpcclnt;
+ if (clnt) {
+ clnt->cl_shutdown = 1;
+ rpc_cancel_tasks(clnt, -EIO, nlmclnt_match_all, NULL);
+ }
+ mutex_unlock(&nlm_host_mutex);
+}
+EXPORT_SYMBOL_GPL(nlmclnt_shutdown_rpc_clnt);
+
/**
* nlmsvc_lookup_host - Find an NLM host handle matching a remote client
* @rqstp: incoming NLM request
diff --git a/include/linux/lockd/lockd.h b/fs/lockd/lockd.h
index 330e38776bb2..a7c85ab6d4b5 100644
--- a/include/linux/lockd/lockd.h
+++ b/fs/lockd/lockd.h
@@ -1,16 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * linux/include/linux/lockd/lockd.h
- *
- * General-purpose lockd include file.
- *
* Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
*/
-#ifndef LINUX_LOCKD_LOCKD_H
-#define LINUX_LOCKD_LOCKD_H
-
-/* XXX: a lot of this should really be under fs/lockd. */
+#ifndef _LOCKD_LOCKD_H
+#define _LOCKD_LOCKD_H
#include <linux/exportfs.h>
#include <linux/in.h>
@@ -20,15 +14,35 @@
#include <linux/kref.h>
#include <linux/refcount.h>
#include <linux/utsname.h>
+#include "nlm.h"
#include <linux/lockd/bind.h>
-#include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
-#include <linux/lockd/debug.h>
+#include "xdr.h"
+#include <linux/sunrpc/debug.h>
#include <linux/sunrpc/svc.h>
/*
+ * Enable lockd debugging.
+ * Requires CONFIG_SUNRPC_DEBUG.
+ */
+#undef ifdebug
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+# define ifdebug(flag) if (unlikely(nlm_debug & NLMDBG_##flag))
+#else
+# define ifdebug(flag) if (0)
+#endif
+
+#define NLMDBG_SVC 0x0001
+#define NLMDBG_CLIENT 0x0002
+#define NLMDBG_CLNTLOCK 0x0004
+#define NLMDBG_SVCLOCK 0x0008
+#define NLMDBG_MONITOR 0x0010
+#define NLMDBG_CLNTSUBS 0x0020
+#define NLMDBG_SVCSUBS 0x0040
+#define NLMDBG_HOSTCACHE 0x0080
+#define NLMDBG_XDR 0x0100
+#define NLMDBG_ALL 0x7fff
+
+/*
* Version string
*/
#define LOCKD_VERSION "0.5"
@@ -38,6 +52,22 @@
*/
#define LOCKD_DFLT_TIMEO 10
+/* error codes new to NLMv4 */
+#define nlm4_deadlock cpu_to_be32(NLM_DEADLCK)
+#define nlm4_rofs cpu_to_be32(NLM_ROFS)
+#define nlm4_stale_fh cpu_to_be32(NLM_STALE_FH)
+#define nlm4_fbig cpu_to_be32(NLM_FBIG)
+#define nlm4_failed cpu_to_be32(NLM_FAILED)
+
+/*
+ * Internal-use status codes, not to be placed on the wire.
+ * Version handlers translate these to appropriate wire values.
+ */
+#define nlm__int__drop_reply cpu_to_be32(30000)
+#define nlm__int__deadlock cpu_to_be32(30001)
+#define nlm__int__stale_fh cpu_to_be32(30002)
+#define nlm__int__failed cpu_to_be32(30003)
+
/*
* Lockd host handle (used both by the client and server personality).
*/
@@ -149,6 +179,8 @@ struct nlm_rqst {
void * a_callback_data; /* sent to nlmclnt_operations callbacks */
};
+struct nlm_share;
+
/*
* This struct describes a file held open by lockd on behalf of
* an NFS client.
@@ -196,9 +228,10 @@ struct nlm_block {
* Global variables
*/
extern const struct rpc_program nlm_program;
-extern const struct svc_procedure nlmsvc_procedures[24];
+extern const struct svc_version nlmsvc_version1;
+extern const struct svc_version nlmsvc_version3;
#ifdef CONFIG_LOCKD_V4
-extern const struct svc_procedure nlmsvc_procedures4[24];
+extern const struct svc_version nlmsvc_version4;
#endif
extern int nlmsvc_grace_period;
extern unsigned long nlm_timeout;
@@ -226,6 +259,10 @@ int nlmclnt_reclaim(struct nlm_host *, struct file_lock *,
struct nlm_rqst *);
void nlmclnt_next_cookie(struct nlm_cookie *);
+#ifdef CONFIG_LOCKD_V4
+extern const struct rpc_version nlm_version4;
+#endif
+
/*
* Host cache
*/
@@ -289,6 +326,7 @@ void nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
void nlmsvc_grant_reply(struct nlm_cookie *, __be32);
void nlmsvc_release_call(struct nlm_rqst *);
void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
+int nlmsvc_dispatch(struct svc_rqst *rqstp);
/*
* File handling for the server personality
@@ -302,12 +340,6 @@ void nlmsvc_mark_resources(struct net *);
void nlmsvc_free_host_resources(struct nlm_host *);
void nlmsvc_invalidate_all(void);
-/*
- * Cluster failover support
- */
-int nlmsvc_unlock_all_by_sb(struct super_block *sb);
-int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
-
static inline struct file *nlmsvc_file_file(const struct nlm_file *file)
{
return file->f_file[O_RDONLY] ?
@@ -390,6 +422,31 @@ static inline int nlm_compare_locks(const struct file_lock *fl1,
&&(fl1->c.flc_type == fl2->c.flc_type || fl2->c.flc_type == F_UNLCK);
}
+/**
+ * lockd_set_file_lock_range4 - set the byte range of a file_lock
+ * @fl: file_lock whose length fields are to be initialized
+ * @off: starting offset of the lock, in bytes
+ * @len: length of the byte range, in bytes, or zero
+ *
+ * The NLMv4 protocol represents lock byte ranges as (start, length),
+ * where length zero means "lock to end of file." The kernel's file_lock
+ * structure uses (start, end) representation. Convert from NLMv4 format
+ * to file_lock format, clamping the starting offset and treating
+ * arithmetic overflow as "lock to EOF."
+ */
+static inline void
+lockd_set_file_lock_range4(struct file_lock *fl, u64 off, u64 len)
+{
+ u64 clamped_off = (off > OFFSET_MAX) ? OFFSET_MAX : off;
+ s64 end = clamped_off + len - 1;
+
+ fl->fl_start = clamped_off;
+ if (len == 0 || end < 0)
+ fl->fl_end = OFFSET_MAX;
+ else
+ fl->fl_end = end;
+}
+
extern const struct lock_manager_operations nlmsvc_lock_operations;
-#endif /* LINUX_LOCKD_LOCKD_H */
+#endif /* _LOCKD_LOCKD_H */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index b8fc732e1c67..3d3ee88ca4dc 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -16,10 +16,10 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/xprtsock.h>
#include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
#include <linux/unaligned.h>
+#include "lockd.h"
#include "netns.h"
#define NLMDBG_FACILITY NLMDBG_MONITOR
diff --git a/include/linux/lockd/nlm.h b/fs/lockd/nlm.h
index 6e343ef760dc..47be65d0111f 100644
--- a/include/linux/lockd/nlm.h
+++ b/fs/lockd/nlm.h
@@ -1,14 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * linux/include/linux/lockd/nlm.h
- *
* Declarations for the Network Lock Manager protocol.
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
-#ifndef LINUX_LOCKD_NLM_H
-#define LINUX_LOCKD_NLM_H
+#ifndef _LOCKD_NLM_H
+#define _LOCKD_NLM_H
/* Maximum file offset in file_lock.fl_end */
@@ -55,4 +53,4 @@ enum {
#define NLMPROC_NM_LOCK 22
#define NLMPROC_FREE_ALL 23
-#endif /* LINUX_LOCKD_NLM_H */
+#endif /* _LOCKD_NLM_H */
diff --git a/fs/lockd/nlm4xdr_gen.c b/fs/lockd/nlm4xdr_gen.c
new file mode 100644
index 000000000000..1c8c221db456
--- /dev/null
+++ b/fs/lockd/nlm4xdr_gen.c
@@ -0,0 +1,724 @@
+// SPDX-License-Identifier: GPL-2.0
+// Generated by xdrgen. Manual edits will be lost.
+// XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x
+// XDR specification modification time: Thu Dec 25 13:10:19 2025
+
+#include <linux/sunrpc/svc.h>
+
+#include "nlm4xdr_gen.h"
+
+static bool __maybe_unused
+xdrgen_decode_netobj(struct xdr_stream *xdr, netobj *ptr)
+{
+ return xdrgen_decode_opaque(xdr, ptr, MAXNETOBJ_SZ);
+}
+
+static bool __maybe_unused
+xdrgen_decode_fsh4_mode(struct xdr_stream *xdr, fsh4_mode *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_fsh4_access(struct xdr_stream *xdr, fsh4_access *ptr)
+{
+ u32 val;
+
+ if (xdr_stream_decode_u32(xdr, &val) < 0)
+ return false;
+ *ptr = val;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_uint64(struct xdr_stream *xdr, uint64 *ptr)
+{
+ return xdrgen_decode_unsigned_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_int64(struct xdr_stream *xdr, int64 *ptr)
+{
+ return xdrgen_decode_hyper(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_uint32(struct xdr_stream *xdr, uint32 *ptr)
+{
+ return xdrgen_decode_unsigned_long(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_int32(struct xdr_stream *xdr, int32 *ptr)
+{
+ return xdrgen_decode_long(xdr, ptr);
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_stats(struct xdr_stream *xdr, nlm4_stats *ptr)
+{
+ return xdr_stream_decode_be32(xdr, ptr) == 0;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_holder(struct xdr_stream *xdr, struct nlm4_holder *ptr)
+{
+ if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+ return false;
+ if (!xdrgen_decode_int32(xdr, &ptr->svid))
+ return false;
+ if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+ return false;
+ if (!xdrgen_decode_uint64(xdr, &ptr->l_offset))
+ return false;
+ if (!xdrgen_decode_uint64(xdr, &ptr->l_len))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testrply(struct xdr_stream *xdr, struct nlm4_testrply *ptr)
+{
+ if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+ return false;
+ switch (ptr->stat) {
+ case __constant_cpu_to_be32(NLM4_DENIED):
+ if (!xdrgen_decode_nlm4_holder(xdr, &ptr->u.holder))
+ return false;
+ break;
+ default:
+ break;
+ }
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_stat(struct xdr_stream *xdr, struct nlm4_stat *ptr)
+{
+ if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_res(struct xdr_stream *xdr, struct nlm4_res *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_nlm4_stat(xdr, &ptr->stat))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testres(struct xdr_stream *xdr, struct nlm4_testres *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_nlm4_testrply(xdr, &ptr->stat))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_lock(struct xdr_stream *xdr, struct nlm4_lock *ptr)
+{
+ if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXSTRLEN))
+ return false;
+ if (!xdrgen_decode_netobj(xdr, &ptr->fh))
+ return false;
+ if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+ return false;
+ if (!xdrgen_decode_int32(xdr, &ptr->svid))
+ return false;
+ if (!xdrgen_decode_uint64(xdr, &ptr->l_offset))
+ return false;
+ if (!xdrgen_decode_uint64(xdr, &ptr->l_len))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_lockargs(struct xdr_stream *xdr, struct nlm4_lockargs *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_bool(xdr, &ptr->block))
+ return false;
+ if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+ return false;
+ if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+ return false;
+ if (!xdrgen_decode_bool(xdr, &ptr->reclaim))
+ return false;
+ if (!xdrgen_decode_int32(xdr, &ptr->state))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_cancargs(struct xdr_stream *xdr, struct nlm4_cancargs *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_bool(xdr, &ptr->block))
+ return false;
+ if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+ return false;
+ if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_testargs(struct xdr_stream *xdr, struct nlm4_testargs *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_bool(xdr, &ptr->exclusive))
+ return false;
+ if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_unlockargs(struct xdr_stream *xdr, struct nlm4_unlockargs *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_nlm4_lock(xdr, &ptr->alock))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_share(struct xdr_stream *xdr, struct nlm4_share *ptr)
+{
+ if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXSTRLEN))
+ return false;
+ if (!xdrgen_decode_netobj(xdr, &ptr->fh))
+ return false;
+ if (!xdrgen_decode_netobj(xdr, &ptr->oh))
+ return false;
+ if (!xdrgen_decode_fsh4_mode(xdr, &ptr->mode))
+ return false;
+ if (!xdrgen_decode_fsh4_access(xdr, &ptr->access))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_shareargs(struct xdr_stream *xdr, struct nlm4_shareargs *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_nlm4_share(xdr, &ptr->share))
+ return false;
+ if (!xdrgen_decode_bool(xdr, &ptr->reclaim))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_shareres(struct xdr_stream *xdr, struct nlm4_shareres *ptr)
+{
+ if (!xdrgen_decode_netobj(xdr, &ptr->cookie))
+ return false;
+ if (!xdrgen_decode_nlm4_stats(xdr, &ptr->stat))
+ return false;
+ if (!xdrgen_decode_int32(xdr, &ptr->sequence))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_notify(struct xdr_stream *xdr, struct nlm4_notify *ptr)
+{
+ if (!xdrgen_decode_string(xdr, (string *)ptr, LM_MAXNAMELEN))
+ return false;
+ if (!xdrgen_decode_int32(xdr, &ptr->state))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_decode_nlm4_notifyargs(struct xdr_stream *xdr, struct nlm4_notifyargs *ptr)
+{
+ if (!xdrgen_decode_nlm4_notify(xdr, &ptr->notify))
+ return false;
+ if (xdr_stream_decode_opaque_fixed(xdr, ptr->private, SM_PRIV_SIZE) < 0)
+ return false;
+ return true;
+}
+
+/**
+ * nlm4_svc_decode_void - Decode a void argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ return xdrgen_decode_void(xdr);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_testargs - Decode a nlm4_testargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_testargs *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_testargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_lockargs - Decode a nlm4_lockargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_lockargs *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_lockargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_cancargs - Decode a nlm4_cancargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_cancargs *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_cancargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_unlockargs - Decode a nlm4_unlockargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_unlockargs *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_unlockargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_testres - Decode a nlm4_testres argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_testres *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_testres(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_res - Decode a nlm4_res argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_res *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_res(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_notifyargs - Decode a nlm4_notifyargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_notifyargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_notifyargs *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_notifyargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_shareargs - Decode a nlm4_shareargs argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_shareargs *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_shareargs(xdr, argp);
+}
+
+/**
+ * nlm4_svc_decode_nlm4_notify - Decode a nlm4_notify argument
+ * @rqstp: RPC transaction context
+ * @xdr: source XDR data stream
+ *
+ * Return values:
+ * %true: procedure arguments decoded successfully
+ * %false: decode failed
+ */
+bool nlm4_svc_decode_nlm4_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_notify *argp = rqstp->rq_argp;
+
+ return xdrgen_decode_nlm4_notify(xdr, argp);
+}
+
+static bool __maybe_unused
+xdrgen_encode_netobj(struct xdr_stream *xdr, const netobj value)
+{
+ return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
+}
+
+static bool __maybe_unused
+xdrgen_encode_fsh4_mode(struct xdr_stream *xdr, fsh4_mode value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_fsh4_access(struct xdr_stream *xdr, fsh4_access value)
+{
+ return xdr_stream_encode_u32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_uint64(struct xdr_stream *xdr, const uint64 value)
+{
+ return xdrgen_encode_unsigned_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_int64(struct xdr_stream *xdr, const int64 value)
+{
+ return xdrgen_encode_hyper(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_uint32(struct xdr_stream *xdr, const uint32 value)
+{
+ return xdrgen_encode_unsigned_long(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_int32(struct xdr_stream *xdr, const int32 value)
+{
+ return xdrgen_encode_long(xdr, value);
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_stats(struct xdr_stream *xdr, nlm4_stats value)
+{
+ return xdr_stream_encode_be32(xdr, value) == XDR_UNIT;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_holder(struct xdr_stream *xdr, const struct nlm4_holder *value)
+{
+ if (!xdrgen_encode_bool(xdr, value->exclusive))
+ return false;
+ if (!xdrgen_encode_int32(xdr, value->svid))
+ return false;
+ if (!xdrgen_encode_netobj(xdr, value->oh))
+ return false;
+ if (!xdrgen_encode_uint64(xdr, value->l_offset))
+ return false;
+ if (!xdrgen_encode_uint64(xdr, value->l_len))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testrply(struct xdr_stream *xdr, const struct nlm4_testrply *ptr)
+{
+ if (!xdrgen_encode_nlm4_stats(xdr, ptr->stat))
+ return false;
+ switch (ptr->stat) {
+ case __constant_cpu_to_be32(NLM4_DENIED):
+ if (!xdrgen_encode_nlm4_holder(xdr, &ptr->u.holder))
+ return false;
+ break;
+ default:
+ break;
+ }
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_stat(struct xdr_stream *xdr, const struct nlm4_stat *value)
+{
+ if (!xdrgen_encode_nlm4_stats(xdr, value->stat))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_res(struct xdr_stream *xdr, const struct nlm4_res *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_nlm4_stat(xdr, &value->stat))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testres(struct xdr_stream *xdr, const struct nlm4_testres *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_nlm4_testrply(xdr, &value->stat))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_lock(struct xdr_stream *xdr, const struct nlm4_lock *value)
+{
+ if (value->caller_name.len > LM_MAXSTRLEN)
+ return false;
+ if (xdr_stream_encode_opaque(xdr, value->caller_name.data, value->caller_name.len) < 0)
+ return false;
+ if (!xdrgen_encode_netobj(xdr, value->fh))
+ return false;
+ if (!xdrgen_encode_netobj(xdr, value->oh))
+ return false;
+ if (!xdrgen_encode_int32(xdr, value->svid))
+ return false;
+ if (!xdrgen_encode_uint64(xdr, value->l_offset))
+ return false;
+ if (!xdrgen_encode_uint64(xdr, value->l_len))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_lockargs(struct xdr_stream *xdr, const struct nlm4_lockargs *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_bool(xdr, value->block))
+ return false;
+ if (!xdrgen_encode_bool(xdr, value->exclusive))
+ return false;
+ if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+ return false;
+ if (!xdrgen_encode_bool(xdr, value->reclaim))
+ return false;
+ if (!xdrgen_encode_int32(xdr, value->state))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_cancargs(struct xdr_stream *xdr, const struct nlm4_cancargs *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_bool(xdr, value->block))
+ return false;
+ if (!xdrgen_encode_bool(xdr, value->exclusive))
+ return false;
+ if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_testargs(struct xdr_stream *xdr, const struct nlm4_testargs *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_bool(xdr, value->exclusive))
+ return false;
+ if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_unlockargs(struct xdr_stream *xdr, const struct nlm4_unlockargs *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_nlm4_lock(xdr, &value->alock))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_share(struct xdr_stream *xdr, const struct nlm4_share *value)
+{
+ if (value->caller_name.len > LM_MAXSTRLEN)
+ return false;
+ if (xdr_stream_encode_opaque(xdr, value->caller_name.data, value->caller_name.len) < 0)
+ return false;
+ if (!xdrgen_encode_netobj(xdr, value->fh))
+ return false;
+ if (!xdrgen_encode_netobj(xdr, value->oh))
+ return false;
+ if (!xdrgen_encode_fsh4_mode(xdr, value->mode))
+ return false;
+ if (!xdrgen_encode_fsh4_access(xdr, value->access))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_shareargs(struct xdr_stream *xdr, const struct nlm4_shareargs *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_nlm4_share(xdr, &value->share))
+ return false;
+ if (!xdrgen_encode_bool(xdr, value->reclaim))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_shareres(struct xdr_stream *xdr, const struct nlm4_shareres *value)
+{
+ if (!xdrgen_encode_netobj(xdr, value->cookie))
+ return false;
+ if (!xdrgen_encode_nlm4_stats(xdr, value->stat))
+ return false;
+ if (!xdrgen_encode_int32(xdr, value->sequence))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_notify(struct xdr_stream *xdr, const struct nlm4_notify *value)
+{
+ if (value->name.len > LM_MAXNAMELEN)
+ return false;
+ if (xdr_stream_encode_opaque(xdr, value->name.data, value->name.len) < 0)
+ return false;
+ if (!xdrgen_encode_int32(xdr, value->state))
+ return false;
+ return true;
+}
+
+static bool __maybe_unused
+xdrgen_encode_nlm4_notifyargs(struct xdr_stream *xdr, const struct nlm4_notifyargs *value)
+{
+ if (!xdrgen_encode_nlm4_notify(xdr, &value->notify))
+ return false;
+ if (xdr_stream_encode_opaque_fixed(xdr, value->private, SM_PRIV_SIZE) < 0)
+ return false;
+ return true;
+}
+
+/**
+ * nlm4_svc_encode_void - Encode a void result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ * %true: procedure results encoded successfully
+ * %false: encode failed
+ */
+bool nlm4_svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ return xdrgen_encode_void(xdr);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_testres - Encode a nlm4_testres result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ * %true: procedure results encoded successfully
+ * %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_testres *resp = rqstp->rq_resp;
+
+ return xdrgen_encode_nlm4_testres(xdr, resp);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_res - Encode a nlm4_res result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ * %true: procedure results encoded successfully
+ * %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_res *resp = rqstp->rq_resp;
+
+ return xdrgen_encode_nlm4_res(xdr, resp);
+}
+
+/**
+ * nlm4_svc_encode_nlm4_shareres - Encode a nlm4_shareres result
+ * @rqstp: RPC transaction context
+ * @xdr: target XDR data stream
+ *
+ * Return values:
+ * %true: procedure results encoded successfully
+ * %false: encode failed
+ */
+bool nlm4_svc_encode_nlm4_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct nlm4_shareres *resp = rqstp->rq_resp;
+
+ return xdrgen_encode_nlm4_shareres(xdr, resp);
+}
diff --git a/fs/lockd/nlm4xdr_gen.h b/fs/lockd/nlm4xdr_gen.h
new file mode 100644
index 000000000000..b6008b296a3e
--- /dev/null
+++ b/fs/lockd/nlm4xdr_gen.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x */
+/* XDR specification modification time: Thu Dec 25 13:10:19 2025 */
+
+#ifndef _LINUX_XDRGEN_NLM4_DECL_H
+#define _LINUX_XDRGEN_NLM4_DECL_H
+
+#include <linux/types.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+#include <linux/sunrpc/xdrgen/_builtins.h>
+#include <linux/sunrpc/xdrgen/nlm4.h>
+
+bool nlm4_svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_notifyargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_decode_nlm4_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+bool nlm4_svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nlm4_svc_encode_nlm4_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
+#endif /* _LINUX_XDRGEN_NLM4_DECL_H */
diff --git a/include/linux/lockd/share.h b/fs/lockd/share.h
index 1f18a9faf645..20ea8ee49168 100644
--- a/include/linux/lockd/share.h
+++ b/fs/lockd/share.h
@@ -1,14 +1,15 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * linux/include/linux/lockd/share.h
- *
* DOS share management for lockd.
*
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
-#ifndef LINUX_LOCKD_SHARE_H
-#define LINUX_LOCKD_SHARE_H
+#ifndef _LOCKD_SHARE_H
+#define _LOCKD_SHARE_H
+
+/* Synthetic svid for lockowner lookup during share operations */
+#define LOCKD_SHARE_SVID (~(u32)0)
/*
* DOS share for a specific file
@@ -22,11 +23,11 @@ struct nlm_share {
u32 s_mode; /* deny mode */
};
-__be32 nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
- struct nlm_args *);
-__be32 nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
- struct nlm_args *);
+__be32 nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
+ struct xdr_netobj *oh, u32 access, u32 mode);
+__be32 nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
+ struct xdr_netobj *oh);
void nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *,
nlm_host_match_fn_t);
-#endif /* LINUX_LOCKD_SHARE_H */
+#endif /* _LOCKD_SHARE_H */
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index dcd80c4e74c9..490551369ef2 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -36,15 +36,14 @@
#include <net/ip.h>
#include <net/addrconf.h>
#include <net/ipv6.h>
-#include <linux/lockd/lockd.h>
#include <linux/nfs.h>
+#include "lockd.h"
#include "netns.h"
#include "procfs.h"
#include "netlink.h"
#define NLMDBG_FACILITY NLMDBG_SVC
-#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
static struct svc_program nlmsvc_program;
@@ -319,6 +318,7 @@ static struct notifier_block lockd_inet6addr_notifier = {
static int lockd_get(void)
{
struct svc_serv *serv;
+ unsigned int bufsize;
int error;
if (nlmsvc_serv) {
@@ -334,7 +334,15 @@ static int lockd_get(void)
printk(KERN_WARNING
"lockd_up: no pid, %d users??\n", nlmsvc_users);
- serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, lockd);
+#ifdef CONFIG_LOCKD_V4
+ bufsize = 1024 + max3(nlmsvc_version1.vs_xdrsize,
+ nlmsvc_version3.vs_xdrsize,
+ nlmsvc_version4.vs_xdrsize);
+#else
+ bufsize = 1024 + max(nlmsvc_version1.vs_xdrsize,
+ nlmsvc_version3.vs_xdrsize);
+#endif
+ serv = svc_create(&nlmsvc_program, bufsize, lockd);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
return -ENOMEM;
@@ -640,7 +648,7 @@ module_exit(exit_nlm);
* %0: Processing complete; do not send a Reply
* %1: Processing complete; send Reply in rqstp->rq_res
*/
-static int nlmsvc_dispatch(struct svc_rqst *rqstp)
+int nlmsvc_dispatch(struct svc_rqst *rqstp)
{
const struct svc_procedure *procp = rqstp->rq_procinfo;
__be32 *statp = rqstp->rq_accept_statp;
@@ -671,40 +679,6 @@ out_encode_err:
/*
* Define NLM program and procedures
*/
-static DEFINE_PER_CPU_ALIGNED(unsigned long, nlmsvc_version1_count[17]);
-static const struct svc_version nlmsvc_version1 = {
- .vs_vers = 1,
- .vs_nproc = 17,
- .vs_proc = nlmsvc_procedures,
- .vs_count = nlmsvc_version1_count,
- .vs_dispatch = nlmsvc_dispatch,
- .vs_xdrsize = NLMSVC_XDRSIZE,
-};
-
-static DEFINE_PER_CPU_ALIGNED(unsigned long,
- nlmsvc_version3_count[ARRAY_SIZE(nlmsvc_procedures)]);
-static const struct svc_version nlmsvc_version3 = {
- .vs_vers = 3,
- .vs_nproc = ARRAY_SIZE(nlmsvc_procedures),
- .vs_proc = nlmsvc_procedures,
- .vs_count = nlmsvc_version3_count,
- .vs_dispatch = nlmsvc_dispatch,
- .vs_xdrsize = NLMSVC_XDRSIZE,
-};
-
-#ifdef CONFIG_LOCKD_V4
-static DEFINE_PER_CPU_ALIGNED(unsigned long,
- nlmsvc_version4_count[ARRAY_SIZE(nlmsvc_procedures4)]);
-static const struct svc_version nlmsvc_version4 = {
- .vs_vers = 4,
- .vs_nproc = ARRAY_SIZE(nlmsvc_procedures4),
- .vs_proc = nlmsvc_procedures4,
- .vs_count = nlmsvc_version4_count,
- .vs_dispatch = nlmsvc_dispatch,
- .vs_xdrsize = NLMSVC_XDRSIZE,
-};
-#endif
-
static const struct svc_version *nlmsvc_version[] = {
[1] = &nlmsvc_version1,
[3] = &nlmsvc_version3,
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4b6f18d97734..5de41e249534 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -10,257 +10,528 @@
#include <linux/types.h>
#include <linux/time.h>
-#include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
#include <linux/sunrpc/svc_xprt.h>
-#define NLMDBG_FACILITY NLMDBG_CLIENT
+#include "lockd.h"
/*
- * Obtain client and file from arguments
+ * xdr.h defines SM_MAXSTRLEN and SM_PRIV_SIZE as macros.
+ * nlm4xdr_gen.h defines them as enum constants. Undefine the
+ * macros to allow the xdrgen enum definitions to be used.
*/
-static __be32
-nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
- struct nlm_host **hostp, struct nlm_file **filp)
-{
- struct nlm_host *host = NULL;
- struct nlm_file *file = NULL;
- struct nlm_lock *lock = &argp->lock;
- __be32 error = 0;
+#undef SM_MAXSTRLEN
+#undef SM_PRIV_SIZE
- /* nfsd callbacks must have been installed for this procedure */
- if (!nlmsvc_ops)
- return nlm_lck_denied_nolocks;
+#include "share.h"
+#include "nlm4xdr_gen.h"
- if (lock->lock_start > OFFSET_MAX ||
- (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start))))
- return nlm4_fbig;
+/*
+ * Wrapper structures combine xdrgen types with legacy nlm_lock.
+ * The xdrgen field must be first so the structure can be cast
+ * to its XDR type for the RPC dispatch layer.
+ */
+struct nlm4_testargs_wrapper {
+ struct nlm4_testargs xdrgen;
+ struct nlm_lock lock;
+};
- /* Obtain host handle */
- if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len))
- || (argp->monitor && nsm_monitor(host) < 0))
- goto no_locks;
- *hostp = host;
-
- /* Obtain file pointer. Not used by FREE_ALL call. */
- if (filp != NULL) {
- int mode = lock_to_openmode(&lock->fl);
-
- lock->fl.c.flc_flags = FL_POSIX;
-
- error = nlm_lookup_file(rqstp, &file, lock);
- if (error)
- goto no_locks;
- *filp = file;
-
- /* Set up the missing parts of the file_lock structure */
- lock->fl.c.flc_file = file->f_file[mode];
- lock->fl.c.flc_pid = current->tgid;
- lock->fl.fl_start = (loff_t)lock->lock_start;
- lock->fl.fl_end = lock->lock_len ?
- (loff_t)(lock->lock_start + lock->lock_len - 1) :
- OFFSET_MAX;
- lock->fl.fl_lmops = &nlmsvc_lock_operations;
- nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
- if (!lock->fl.c.flc_owner) {
- /* lockowner allocation has failed */
- nlmsvc_release_host(host);
- return nlm_lck_denied_nolocks;
- }
- }
+static_assert(offsetof(struct nlm4_testargs_wrapper, xdrgen) == 0);
- return 0;
+struct nlm4_lockargs_wrapper {
+ struct nlm4_lockargs xdrgen;
+ struct nlm_cookie cookie;
+ struct nlm_lock lock;
+};
-no_locks:
- nlmsvc_release_host(host);
- if (error)
- return error;
- return nlm_lck_denied_nolocks;
-}
+static_assert(offsetof(struct nlm4_lockargs_wrapper, xdrgen) == 0);
-/*
- * NULL: Test for presence of service
- */
-static __be32
-nlm4svc_proc_null(struct svc_rqst *rqstp)
-{
- dprintk("lockd: NULL called\n");
- return rpc_success;
-}
+struct nlm4_cancargs_wrapper {
+ struct nlm4_cancargs xdrgen;
+ struct nlm_lock lock;
+};
-/*
- * TEST: Check for conflicting lock
- */
-static __be32
-__nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
-{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_host *host;
- struct nlm_file *file;
- __be32 rc = rpc_success;
+static_assert(offsetof(struct nlm4_cancargs_wrapper, xdrgen) == 0);
- dprintk("lockd: TEST4 called\n");
- resp->cookie = argp->cookie;
+struct nlm4_unlockargs_wrapper {
+ struct nlm4_unlockargs xdrgen;
+ struct nlm_lock lock;
+};
- /* Obtain client and file */
- if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+static_assert(offsetof(struct nlm4_unlockargs_wrapper, xdrgen) == 0);
- /* Now check for conflicting locks */
- resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock,
- &resp->lock);
- if (resp->status == nlm_drop_reply)
- rc = rpc_drop_reply;
- else
- dprintk("lockd: TEST4 status %d\n", ntohl(resp->status));
+struct nlm4_notifyargs_wrapper {
+ struct nlm4_notifyargs xdrgen;
+ struct nlm_reboot reboot;
+};
- nlmsvc_release_lockowner(&argp->lock);
- nlmsvc_release_host(host);
- nlm_release_file(file);
- return rc;
-}
+static_assert(offsetof(struct nlm4_notifyargs_wrapper, xdrgen) == 0);
-static __be32
-nlm4svc_proc_test(struct svc_rqst *rqstp)
-{
- return __nlm4svc_proc_test(rqstp, rqstp->rq_resp);
-}
+struct nlm4_notify_wrapper {
+ struct nlm4_notify xdrgen;
+};
-static __be32
-__nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
-{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_host *host;
- struct nlm_file *file;
- __be32 rc = rpc_success;
+static_assert(offsetof(struct nlm4_notify_wrapper, xdrgen) == 0);
- dprintk("lockd: LOCK called\n");
+struct nlm4_testres_wrapper {
+ struct nlm4_testres xdrgen;
+ struct nlm_lock lock;
+};
- resp->cookie = argp->cookie;
+struct nlm4_shareargs_wrapper {
+ struct nlm4_shareargs xdrgen;
+ struct nlm_lock lock;
+};
- /* Obtain client and file */
- if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+static_assert(offsetof(struct nlm4_shareargs_wrapper, xdrgen) == 0);
- /* Now try to lock the file */
- resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
- argp->block, &argp->cookie,
- argp->reclaim);
- if (resp->status == nlm_drop_reply)
- rc = rpc_drop_reply;
- else
- dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
+static_assert(offsetof(struct nlm4_testres_wrapper, xdrgen) == 0);
- nlmsvc_release_lockowner(&argp->lock);
- nlmsvc_release_host(host);
- nlm_release_file(file);
- return rc;
+struct nlm4_res_wrapper {
+ struct nlm4_res xdrgen;
+ struct nlm_cookie cookie;
+};
+
+static_assert(offsetof(struct nlm4_res_wrapper, xdrgen) == 0);
+
+struct nlm4_shareres_wrapper {
+ struct nlm4_shareres xdrgen;
+};
+
+static_assert(offsetof(struct nlm4_shareres_wrapper, xdrgen) == 0);
+
+static __be32
+nlm4_netobj_to_cookie(struct nlm_cookie *cookie, netobj *object)
+{
+ if (object->len > NLM_MAXCOOKIELEN)
+ return nlm_lck_denied_nolocks;
+ cookie->len = object->len;
+ memcpy(cookie->data, object->data, object->len);
+ return nlm_granted;
}
static __be32
-nlm4svc_proc_lock(struct svc_rqst *rqstp)
+nlm4_lock_to_nlm_lock(struct nlm_lock *lock, struct nlm4_lock *alock)
{
- return __nlm4svc_proc_lock(rqstp, rqstp->rq_resp);
+ if (alock->fh.len > NFS_MAXFHSIZE)
+ return nlm_lck_denied;
+ lock->fh.size = alock->fh.len;
+ memcpy(lock->fh.data, alock->fh.data, alock->fh.len);
+ lock->oh.len = alock->oh.len;
+ lock->oh.data = alock->oh.data;
+ lock->svid = alock->svid;
+ locks_init_lock(&lock->fl);
+ lockd_set_file_lock_range4(&lock->fl, alock->l_offset, alock->l_len);
+ return nlm_granted;
+}
+
+static struct nlm_host *
+nlm4svc_lookup_host(struct svc_rqst *rqstp, string caller, bool monitored)
+{
+ struct nlm_host *host;
+
+ if (!nlmsvc_ops)
+ return NULL;
+ host = nlmsvc_lookup_host(rqstp, caller.data, caller.len);
+ if (!host)
+ return NULL;
+ if (monitored && nsm_monitor(host) < 0) {
+ nlmsvc_release_host(host);
+ return NULL;
+ }
+ return host;
}
static __be32
-__nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
+nlm4svc_lookup_file(struct svc_rqst *rqstp, struct nlm_host *host,
+ struct nlm_lock *lock, struct nlm_file **filp,
+ struct nlm4_lock *xdr_lock, unsigned char type)
{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_host *host;
- struct nlm_file *file;
+ struct file_lock *fl = &lock->fl;
+ struct nlm_file *file = NULL;
+ __be32 error;
- dprintk("lockd: CANCEL called\n");
+ if (xdr_lock->fh.len > NFS_MAXFHSIZE)
+ return nlm_lck_denied_nolocks;
+ lock->fh.size = xdr_lock->fh.len;
+ memcpy(lock->fh.data, xdr_lock->fh.data, xdr_lock->fh.len);
- resp->cookie = argp->cookie;
+ lock->oh.len = xdr_lock->oh.len;
+ lock->oh.data = xdr_lock->oh.data;
- /* Don't accept requests during grace period */
- if (locks_in_grace(SVC_NET(rqstp))) {
- resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
- }
+ lock->svid = xdr_lock->svid;
+ lock->lock_start = xdr_lock->l_offset;
+ lock->lock_len = xdr_lock->l_len;
- /* Obtain client and file */
- if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ if (lock->lock_start > OFFSET_MAX ||
+ (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start))))
+ return nlm4_fbig;
- /* Try to cancel request. */
- resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
+ locks_init_lock(fl);
+ fl->c.flc_type = type;
+ lockd_set_file_lock_range4(fl, lock->lock_start, lock->lock_len);
+
+ error = nlm_lookup_file(rqstp, &file, lock);
+ switch (error) {
+ case nlm_granted:
+ break;
+ case nlm__int__stale_fh:
+ return nlm4_stale_fh;
+ case nlm__int__failed:
+ return nlm4_failed;
+ default:
+ return error;
+ }
+ *filp = file;
+
+ fl->c.flc_flags = FL_POSIX;
+ fl->c.flc_file = file->f_file[lock_to_openmode(fl)];
+ fl->c.flc_pid = current->tgid;
+ fl->fl_lmops = &nlmsvc_lock_operations;
+ nlmsvc_locks_init_private(fl, host, (pid_t)lock->svid);
+ if (!fl->c.flc_owner)
+ return nlm_lck_denied_nolocks;
- dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
- nlmsvc_release_lockowner(&argp->lock);
- nlmsvc_release_host(host);
- nlm_release_file(file);
- return rpc_success;
+ return nlm_granted;
}
+/**
+ * nlm4svc_proc_null - NULL: Test for presence of service
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully
+ *
+ * RPC synopsis:
+ * void NLMPROC4_NULL(void) = 0;
+ */
static __be32
-nlm4svc_proc_cancel(struct svc_rqst *rqstp)
+nlm4svc_proc_null(struct svc_rqst *rqstp)
{
- return __nlm4svc_proc_cancel(rqstp, rqstp->rq_resp);
+ return rpc_success;
}
-/*
- * UNLOCK: release a lock
+/**
+ * nlm4svc_proc_test - TEST: Check for conflicting lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_drop_reply: Do not send an RPC reply.
+ *
+ * RPC synopsis:
+ * nlm4_testres NLMPROC4_TEST(nlm4_testargs) = 1;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_GRANTED: The server would be able to grant the
+ * requested lock.
+ * %NLM4_DENIED: The requested lock conflicted with existing
+ * lock reservations for the file.
+ * %NLM4_DENIED_NOLOCKS: The server could not allocate the resources
+ * needed to process the request.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ *
+ * The Linux NLM server implementation also returns:
+ * %NLM4_STALE_FH: The request specified an invalid file handle.
+ * %NLM4_FBIG: The request specified a length or offset
+ * that exceeds the range supported by the
+ * server.
+ * %NLM4_FAILED: The request failed for an unspecified reason.
*/
-static __be32
-__nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
+static __be32 nlm4svc_proc_test(struct svc_rqst *rqstp)
{
- struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm4_testargs_wrapper *argp = rqstp->rq_argp;
+ unsigned char type = argp->xdrgen.exclusive ? F_WRLCK : F_RDLCK;
+ struct nlm4_testres_wrapper *resp = rqstp->rq_resp;
+ struct nlm_file *file = NULL;
struct nlm_host *host;
- struct nlm_file *file;
- dprintk("lockd: UNLOCK called\n");
+ resp->xdrgen.cookie = argp->xdrgen.cookie;
- resp->cookie = argp->cookie;
+ resp->xdrgen.stat.stat = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ goto out;
- /* Don't accept new lock requests during grace period */
- if (locks_in_grace(SVC_NET(rqstp))) {
- resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
+ resp->xdrgen.stat.stat = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock,
+ type);
+ if (resp->xdrgen.stat.stat)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlmsvc_testlock(rqstp, file, host,
+ &argp->lock, &resp->lock);
+ nlmsvc_release_lockowner(&argp->lock);
+
+ if (resp->xdrgen.stat.stat == nlm_lck_denied) {
+ struct nlm_lock *conf = &resp->lock;
+ struct nlm4_holder *holder = &resp->xdrgen.stat.u.holder;
+
+ holder->exclusive = (conf->fl.c.flc_type != F_RDLCK);
+ holder->svid = conf->svid;
+ holder->oh.len = conf->oh.len;
+ holder->oh.data = conf->oh.data;
+ holder->l_offset = conf->fl.fl_start;
+ if (conf->fl.fl_end == OFFSET_MAX)
+ holder->l_len = 0;
+ else
+ holder->l_len = conf->fl.fl_end - conf->fl.fl_start + 1;
}
- /* Obtain client and file */
- if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->xdrgen.stat.stat == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
+}
- /* Now try to remove the lock */
- resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
+static __be32
+nlm4svc_do_lock(struct svc_rqst *rqstp, bool monitored)
+{
+ struct nlm4_lockargs_wrapper *argp = rqstp->rq_argp;
+ unsigned char type = argp->xdrgen.exclusive ? F_WRLCK : F_RDLCK;
+ struct nlm4_res_wrapper *resp = rqstp->rq_resp;
+ struct nlm_file *file = NULL;
+ struct nlm_host *host = NULL;
+
+ resp->xdrgen.cookie = argp->xdrgen.cookie;
+
+ resp->xdrgen.stat.stat = nlm4_netobj_to_cookie(&argp->cookie,
+ &argp->xdrgen.cookie);
+ if (resp->xdrgen.stat.stat)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name,
+ monitored);
+ if (!host)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock,
+ type);
+ if (resp->xdrgen.stat.stat)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlmsvc_lock(rqstp, file, host, &argp->lock,
+ argp->xdrgen.block, &argp->cookie,
+ argp->xdrgen.reclaim);
+ if (resp->xdrgen.stat.stat == nlm__int__deadlock)
+ resp->xdrgen.stat.stat = nlm4_deadlock;
- dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
+
+out:
+ if (file)
+ nlm_release_file(file);
nlmsvc_release_host(host);
- nlm_release_file(file);
- return rpc_success;
+ return resp->xdrgen.stat.stat == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
}
+/**
+ * nlm4svc_proc_lock - LOCK: Establish a monitored lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_drop_reply: Do not send an RPC reply.
+ *
+ * RPC synopsis:
+ * nlm4_res NLMPROC4_LOCK(nlm4_lockargs) = 2;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_GRANTED: The requested lock was granted.
+ * %NLM4_DENIED: The requested lock conflicted with existing
+ * lock reservations for the file.
+ * %NLM4_DENIED_NOLOCKS: The server could not allocate the resources
+ * needed to process the request.
+ * %NLM4_BLOCKED: The blocking request cannot be granted
+ * immediately. The server will send an
+ * NLMPROC4_GRANTED callback to the client when
+ * the lock can be granted.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ *
+ * The Linux NLM server implementation also returns:
+ * %NLM4_DEADLCK: The request could not be granted and
+ * blocking would cause a deadlock.
+ * %NLM4_STALE_FH: The request specified an invalid file handle.
+ * %NLM4_FBIG: The request specified a length or offset
+ * that exceeds the range supported by the
+ * server.
+ * %NLM4_FAILED: The request failed for an unspecified reason.
+ */
static __be32
-nlm4svc_proc_unlock(struct svc_rqst *rqstp)
+nlm4svc_proc_lock(struct svc_rqst *rqstp)
{
- return __nlm4svc_proc_unlock(rqstp, rqstp->rq_resp);
+ return nlm4svc_do_lock(rqstp, true);
}
-/*
- * GRANTED: A server calls us to tell that a process' lock request
- * was granted
+/**
+ * nlm4svc_proc_cancel - CANCEL: Cancel an outstanding blocked lock request
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully
+ * %rpc_drop_reply: Do not send an RPC reply
+ *
+ * RPC synopsis:
+ * nlm4_res NLMPROC4_CANCEL(nlm4_cancargs) = 3;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_LCK_GRANTED: The requested lock was canceled.
+ * %NLM4_LCK_DENIED: There was no lock to cancel.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ *
+ * The Linux NLM server implementation also returns:
+ * %NLM4_DENIED_NOLOCKS: A needed resource could not be allocated.
+ * %NLM4_STALE_FH: The request specified an invalid file handle.
+ * %NLM4_FBIG: The request specified a length or offset
+ * that exceeds the range supported by the
+ * server.
+ * %NLM4_FAILED: The request failed for an unspecified reason.
*/
static __be32
-__nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_res *resp)
+nlm4svc_proc_cancel(struct svc_rqst *rqstp)
{
- struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm4_cancargs_wrapper *argp = rqstp->rq_argp;
+ unsigned char type = argp->xdrgen.exclusive ? F_WRLCK : F_RDLCK;
+ struct nlm4_res_wrapper *resp = rqstp->rq_resp;
+ struct net *net = SVC_NET(rqstp);
+ struct nlm_host *host = NULL;
+ struct nlm_file *file = NULL;
+
+ resp->xdrgen.cookie = argp->xdrgen.cookie;
+
+ resp->xdrgen.stat.stat = nlm_lck_denied_grace_period;
+ if (locks_in_grace(net))
+ goto out;
+
+ resp->xdrgen.stat.stat = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock,
+ type);
+ if (resp->xdrgen.stat.stat)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlmsvc_cancel_blocked(net, file, &argp->lock);
+ nlmsvc_release_lockowner(&argp->lock);
- resp->cookie = argp->cookie;
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->xdrgen.stat.stat == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
+}
- dprintk("lockd: GRANTED called\n");
- resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock);
- dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
- return rpc_success;
+/**
+ * nlm4svc_proc_unlock - UNLOCK: Remove a lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_drop_reply: Do not send an RPC reply.
+ *
+ * RPC synopsis:
+ * nlm4_res NLMPROC4_UNLOCK(nlm4_unlockargs) = 4;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_GRANTED: The requested lock was released.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ *
+ * The Linux NLM server implementation also returns:
+ * %NLM4_DENIED_NOLOCKS: A needed resource could not be allocated.
+ * %NLM4_STALE_FH: The request specified an invalid file handle.
+ * %NLM4_FBIG: The request specified a length or offset
+ * that exceeds the range supported by the
+ * server.
+ * %NLM4_FAILED: The request failed for an unspecified reason.
+ */
+static __be32
+nlm4svc_proc_unlock(struct svc_rqst *rqstp)
+{
+ struct nlm4_unlockargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm4_res_wrapper *resp = rqstp->rq_resp;
+ struct net *net = SVC_NET(rqstp);
+ struct nlm_host *host = NULL;
+ struct nlm_file *file = NULL;
+
+ resp->xdrgen.cookie = argp->xdrgen.cookie;
+
+ resp->xdrgen.stat.stat = nlm_lck_denied_grace_period;
+ if (locks_in_grace(net))
+ goto out;
+
+ resp->xdrgen.stat.stat = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock,
+ F_UNLCK);
+ if (resp->xdrgen.stat.stat)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlmsvc_unlock(net, file, &argp->lock);
+ nlmsvc_release_lockowner(&argp->lock);
+
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->xdrgen.stat.stat == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
}
+/**
+ * nlm4svc_proc_granted - GRANTED: Server grants a previously blocked lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ *
+ * RPC synopsis:
+ * nlm4_res NLMPROC4_GRANTED(nlm4_testargs) = 5;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_GRANTED: The requested lock was granted.
+ * %NLM4_DENIED: The server could not allocate the resources
+ * needed to process the request.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ */
static __be32
nlm4svc_proc_granted(struct svc_rqst *rqstp)
{
- return __nlm4svc_proc_granted(rqstp, rqstp->rq_resp);
+ struct nlm4_testargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm4_res_wrapper *resp = rqstp->rq_resp;
+
+ resp->xdrgen.cookie = argp->xdrgen.cookie;
+
+ resp->xdrgen.stat.stat = nlm4_lock_to_nlm_lock(&argp->lock,
+ &argp->xdrgen.alock);
+ if (resp->xdrgen.stat.stat)
+ goto out;
+
+ resp->xdrgen.stat.stat = nlmclnt_grant(svc_addr(rqstp), &argp->lock);
+
+out:
+ return rpc_success;
}
/*
@@ -281,24 +552,17 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
};
/*
- * `Async' versions of the above service routines. They aren't really,
- * because we send the callback before the reply proper. I hope this
- * doesn't break any clients.
+ * Dispatch an async callback RPC to a client with a pre-resolved host.
+ * Caller provides a reference to @host; this function takes ownership
+ * and releases it via nlmsvc_release_host() before returning.
*/
-static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc,
- __be32 (*func)(struct svc_rqst *, struct nlm_res *))
+static __be32
+nlm4svc_callback(struct svc_rqst *rqstp, struct nlm_host *host, u32 proc,
+ __be32 (*func)(struct svc_rqst *, struct nlm_res *))
{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_host *host;
struct nlm_rqst *call;
__be32 stat;
- host = nlmsvc_lookup_host(rqstp,
- argp->lock.caller,
- argp->lock.len);
- if (host == NULL)
- return rpc_system_err;
-
call = nlm_alloc_call(host);
nlmsvc_release_host(host);
if (call == NULL)
@@ -316,433 +580,845 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc,
return rpc_success;
}
-static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp)
+static __be32
+__nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_res *resp)
{
- dprintk("lockd: TEST_MSG called\n");
- return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, __nlm4svc_proc_test);
-}
+ struct nlm4_testargs_wrapper *argp = rqstp->rq_argp;
+ unsigned char type = argp->xdrgen.exclusive ? F_WRLCK : F_RDLCK;
+ struct nlm_lockowner *owner;
+ struct nlm_file *file = NULL;
+ struct nlm_host *host = NULL;
+
+ resp->status = nlm_lck_denied_nolocks;
+ if (nlm4_netobj_to_cookie(&resp->cookie, &argp->xdrgen.cookie))
+ goto out;
+
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ goto out;
+
+ resp->status = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock, type);
+ if (resp->status)
+ goto out;
+
+ owner = argp->lock.fl.c.flc_owner;
+ resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock,
+ &resp->lock);
+ nlmsvc_put_lockowner(owner);
-static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp)
-{
- dprintk("lockd: LOCK_MSG called\n");
- return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, __nlm4svc_proc_lock);
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->status == nlm__int__drop_reply ? rpc_drop_reply : rpc_success;
}
-static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp)
+/**
+ * nlm4svc_proc_test_msg - TEST_MSG: Check for conflicting lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_system_err: RPC execution failed.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_TEST_MSG(nlm4_testargs) = 6;
+ *
+ * The response to this request is delivered via the TEST_RES procedure.
+ */
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp)
{
- dprintk("lockd: CANCEL_MSG called\n");
- return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, __nlm4svc_proc_cancel);
+ struct nlm4_testargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm_host *host;
+
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ return rpc_system_err;
+
+ return nlm4svc_callback(rqstp, host, NLMPROC4_TEST_RES,
+ __nlm4svc_proc_test_msg);
}
-static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp)
+static __be32
+__nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_res *resp)
{
- dprintk("lockd: UNLOCK_MSG called\n");
- return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, __nlm4svc_proc_unlock);
+ struct nlm4_lockargs_wrapper *argp = rqstp->rq_argp;
+ unsigned char type = argp->xdrgen.exclusive ? F_WRLCK : F_RDLCK;
+ struct nlm_file *file = NULL;
+ struct nlm_host *host = NULL;
+
+ resp->status = nlm_lck_denied_nolocks;
+ if (nlm4_netobj_to_cookie(&resp->cookie, &argp->xdrgen.cookie))
+ goto out;
+
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, true);
+ if (!host)
+ goto out;
+
+ resp->status = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock, type);
+ if (resp->status)
+ goto out;
+
+ resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
+ argp->xdrgen.block, &resp->cookie,
+ argp->xdrgen.reclaim);
+ nlmsvc_release_lockowner(&argp->lock);
+
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
}
-static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp)
+/**
+ * nlm4svc_proc_lock_msg - LOCK_MSG: Establish a monitored lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_system_err: RPC execution failed.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_LOCK_MSG(nlm4_lockargs) = 7;
+ *
+ * The response to this request is delivered via the LOCK_RES procedure.
+ */
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp)
{
- dprintk("lockd: GRANTED_MSG called\n");
- return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, __nlm4svc_proc_granted);
+ struct nlm4_lockargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm_host *host;
+
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, true);
+ if (!host)
+ return rpc_system_err;
+
+ return nlm4svc_callback(rqstp, host, NLMPROC4_LOCK_RES,
+ __nlm4svc_proc_lock_msg);
}
-/*
- * SHARE: create a DOS share or alter existing share.
- */
static __be32
-nlm4svc_proc_share(struct svc_rqst *rqstp)
+__nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_res *resp)
{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_res *resp = rqstp->rq_resp;
- struct nlm_host *host;
- struct nlm_file *file;
-
- dprintk("lockd: SHARE called\n");
+ struct nlm4_cancargs_wrapper *argp = rqstp->rq_argp;
+ unsigned char type = argp->xdrgen.exclusive ? F_WRLCK : F_RDLCK;
+ struct net *net = SVC_NET(rqstp);
+ struct nlm_file *file = NULL;
+ struct nlm_host *host = NULL;
+
+ resp->status = nlm_lck_denied_nolocks;
+ if (nlm4_netobj_to_cookie(&resp->cookie, &argp->xdrgen.cookie))
+ goto out;
+
+ resp->status = nlm_lck_denied_grace_period;
+ if (locks_in_grace(net))
+ goto out;
+
+ resp->status = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ goto out;
+
+ resp->status = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock, type);
+ if (resp->status)
+ goto out;
+
+ resp->status = nlmsvc_cancel_blocked(net, file, &argp->lock);
+ nlmsvc_release_lockowner(&argp->lock);
- resp->cookie = argp->cookie;
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
+}
- /* Don't accept new lock requests during grace period */
- if (locks_in_grace(SVC_NET(rqstp)) && !argp->reclaim) {
- resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
- }
+/**
+ * nlm4svc_proc_cancel_msg - CANCEL_MSG: Cancel an outstanding lock request
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_system_err: RPC execution failed.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_CANCEL_MSG(nlm4_cancargs) = 8;
+ *
+ * The response to this request is delivered via the CANCEL_RES procedure.
+ */
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp)
+{
+ struct nlm4_cancargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm_host *host;
- /* Obtain client and file */
- if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ return rpc_system_err;
- /* Now try to create the share */
- resp->status = nlmsvc_share_file(host, file, argp);
+ return nlm4svc_callback(rqstp, host, NLMPROC4_CANCEL_RES,
+ __nlm4svc_proc_cancel_msg);
+}
- dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
+static __be32
+__nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+ struct nlm4_unlockargs_wrapper *argp = rqstp->rq_argp;
+ struct net *net = SVC_NET(rqstp);
+ struct nlm_file *file = NULL;
+ struct nlm_host *host = NULL;
+
+ resp->status = nlm_lck_denied_nolocks;
+ if (nlm4_netobj_to_cookie(&resp->cookie, &argp->xdrgen.cookie))
+ goto out;
+
+ resp->status = nlm_lck_denied_grace_period;
+ if (locks_in_grace(net))
+ goto out;
+
+ resp->status = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ goto out;
+
+ resp->status = nlm4svc_lookup_file(rqstp, host, &argp->lock,
+ &file, &argp->xdrgen.alock, F_UNLCK);
+ if (resp->status)
+ goto out;
+
+ resp->status = nlmsvc_unlock(net, file, &argp->lock);
nlmsvc_release_lockowner(&argp->lock);
+
+out:
+ if (file)
+ nlm_release_file(file);
nlmsvc_release_host(host);
- nlm_release_file(file);
- return rpc_success;
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
}
-/*
- * UNSHARE: Release a DOS share.
+/**
+ * nlm4svc_proc_unlock_msg - UNLOCK_MSG: Remove an existing lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_system_err: RPC execution failed.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_UNLOCK_MSG(nlm4_unlockargs) = 9;
+ *
+ * The response to this request is delivered via the UNLOCK_RES procedure.
*/
-static __be32
-nlm4svc_proc_unshare(struct svc_rqst *rqstp)
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp)
{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_res *resp = rqstp->rq_resp;
- struct nlm_host *host;
- struct nlm_file *file;
+ struct nlm4_unlockargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm_host *host;
- dprintk("lockd: UNSHARE called\n");
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ return rpc_system_err;
- resp->cookie = argp->cookie;
+ return nlm4svc_callback(rqstp, host, NLMPROC4_UNLOCK_RES,
+ __nlm4svc_proc_unlock_msg);
+}
- /* Don't accept requests during grace period */
- if (locks_in_grace(SVC_NET(rqstp))) {
- resp->status = nlm_lck_denied_grace_period;
- return rpc_success;
- }
+static __be32
+__nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_res *resp)
+{
+ struct nlm4_testargs_wrapper *argp = rqstp->rq_argp;
- /* Obtain client and file */
- if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ resp->status = nlm_lck_denied;
+ if (nlm4_netobj_to_cookie(&resp->cookie, &argp->xdrgen.cookie))
+ goto out;
- /* Now try to lock the file */
- resp->status = nlmsvc_unshare_file(host, file, argp);
+ if (nlm4_lock_to_nlm_lock(&argp->lock, &argp->xdrgen.alock))
+ goto out;
- dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
- nlmsvc_release_lockowner(&argp->lock);
- nlmsvc_release_host(host);
- nlm_release_file(file);
+ resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock);
+
+out:
return rpc_success;
}
-/*
- * NM_LOCK: Create an unmonitored lock
+/**
+ * nlm4svc_proc_granted_msg - GRANTED_MSG: Blocked lock has been granted
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_system_err: RPC execution failed.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_GRANTED_MSG(nlm4_testargs) = 10;
+ *
+ * The response to this request is delivered via the GRANTED_RES procedure.
*/
-static __be32
-nlm4svc_proc_nm_lock(struct svc_rqst *rqstp)
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp)
{
- struct nlm_args *argp = rqstp->rq_argp;
+ struct nlm4_testargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm_host *host;
- dprintk("lockd: NM_LOCK called\n");
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.alock.caller_name, false);
+ if (!host)
+ return rpc_system_err;
- argp->monitor = 0; /* just clean the monitor flag */
- return nlm4svc_proc_lock(rqstp);
+ return nlm4svc_callback(rqstp, host, NLMPROC4_GRANTED_RES,
+ __nlm4svc_proc_granted_msg);
}
-/*
- * FREE_ALL: Release all locks and shares held by client
+/**
+ * nlm4svc_proc_granted_res - GRANTED_RES: Lock Granted result
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_GRANTED_RES(nlm4_res) = 15;
*/
-static __be32
-nlm4svc_proc_free_all(struct svc_rqst *rqstp)
+static __be32 nlm4svc_proc_granted_res(struct svc_rqst *rqstp)
{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_host *host;
+ struct nlm4_res_wrapper *argp = rqstp->rq_argp;
- /* Obtain client */
- if (nlm4svc_retrieve_args(rqstp, argp, &host, NULL))
+ if (!nlmsvc_ops)
return rpc_success;
- nlmsvc_free_host_resources(host);
- nlmsvc_release_host(host);
+ if (nlm4_netobj_to_cookie(&argp->cookie, &argp->xdrgen.cookie))
+ return rpc_success;
+ nlmsvc_grant_reply(&argp->cookie, argp->xdrgen.stat.stat);
+
return rpc_success;
}
-/*
- * SM_NOTIFY: private callback from statd (not part of official NLM proto)
+/**
+ * nlm4svc_proc_sm_notify - SM_NOTIFY: Peer has rebooted
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_system_err: RPC execution failed.
+ *
+ * The SM_NOTIFY procedure is a private callback from Linux statd and is
+ * not part of the official NLM protocol.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_SM_NOTIFY(nlm4_notifyargs) = 16;
*/
-static __be32
-nlm4svc_proc_sm_notify(struct svc_rqst *rqstp)
+static __be32 nlm4svc_proc_sm_notify(struct svc_rqst *rqstp)
{
- struct nlm_reboot *argp = rqstp->rq_argp;
-
- dprintk("lockd: SM_NOTIFY called\n");
+ struct nlm4_notifyargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm_reboot *reboot = &argp->reboot;
if (!nlm_privileged_requester(rqstp)) {
char buf[RPC_MAX_ADDRBUFLEN];
- printk(KERN_WARNING "lockd: rejected NSM callback from %s\n",
- svc_print_addr(rqstp, buf, sizeof(buf)));
+
+ pr_warn("lockd: rejected NSM callback from %s\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
return rpc_system_err;
}
- nlm_host_rebooted(SVC_NET(rqstp), argp);
+ reboot->len = argp->xdrgen.notify.name.len;
+ reboot->mon = (char *)argp->xdrgen.notify.name.data;
+ reboot->state = argp->xdrgen.notify.state;
+ memcpy(&reboot->priv.data, argp->xdrgen.private,
+ sizeof(reboot->priv.data));
+
+ nlm_host_rebooted(SVC_NET(rqstp), reboot);
+
return rpc_success;
}
-/*
- * client sent a GRANTED_RES, let's remove the associated block
+/**
+ * nlm4svc_proc_unused - stub for unused procedures
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_proc_unavail: Program can't support procedure.
*/
-static __be32
-nlm4svc_proc_granted_res(struct svc_rqst *rqstp)
+static __be32 nlm4svc_proc_unused(struct svc_rqst *rqstp)
{
- struct nlm_res *argp = rqstp->rq_argp;
+ return rpc_proc_unavail;
+}
- if (!nlmsvc_ops)
- return rpc_success;
+/**
+ * nlm4svc_proc_share - SHARE: Open a file using DOS file-sharing modes
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_drop_reply: Do not send an RPC reply.
+ *
+ * RPC synopsis:
+ * nlm4_shareres NLMPROC4_SHARE(nlm4_shareargs) = 20;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_GRANTED: The requested share lock was granted.
+ * %NLM4_DENIED: The requested lock conflicted with existing
+ * lock reservations for the file.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ *
+ * The Linux NLM server implementation also returns:
+ * %NLM4_DENIED_NOLOCKS: A needed resource could not be allocated.
+ * %NLM4_STALE_FH: The request specified an invalid file handle.
+ * %NLM4_FBIG: The request specified a length or offset
+ * that exceeds the range supported by the
+ * server.
+ * %NLM4_FAILED: The request failed for an unspecified reason.
+ */
+static __be32 nlm4svc_proc_share(struct svc_rqst *rqstp)
+{
+ struct nlm4_shareargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm4_shareres_wrapper *resp = rqstp->rq_resp;
+ struct nlm_lock *lock = &argp->lock;
+ struct nlm_host *host = NULL;
+ struct nlm_file *file = NULL;
+ struct nlm4_lock xdr_lock = {
+ .fh = argp->xdrgen.share.fh,
+ .oh = argp->xdrgen.share.oh,
+ .svid = LOCKD_SHARE_SVID,
+ };
+
+ resp->xdrgen.cookie = argp->xdrgen.cookie;
+
+ resp->xdrgen.stat = nlm_lck_denied_grace_period;
+ if (locks_in_grace(SVC_NET(rqstp)) && !argp->xdrgen.reclaim)
+ goto out;
+
+ resp->xdrgen.stat = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.share.caller_name, true);
+ if (!host)
+ goto out;
+
+ resp->xdrgen.stat = nlm4svc_lookup_file(rqstp, host, lock, &file,
+ &xdr_lock, F_RDLCK);
+ if (resp->xdrgen.stat)
+ goto out;
+
+ resp->xdrgen.stat = nlmsvc_share_file(host, file, &lock->oh,
+ argp->xdrgen.share.access,
+ argp->xdrgen.share.mode);
+
+ nlmsvc_release_lockowner(lock);
+
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->xdrgen.stat == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
+}
- dprintk("lockd: GRANTED_RES called\n");
+/**
+ * nlm4svc_proc_unshare - UNSHARE: Release a share reservation
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_drop_reply: Do not send an RPC reply.
+ *
+ * RPC synopsis:
+ * nlm4_shareres NLMPROC4_UNSHARE(nlm4_shareargs) = 21;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_GRANTED: The share reservation was released.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ *
+ * The Linux NLM server implementation also returns:
+ * %NLM4_DENIED_NOLOCKS: A needed resource could not be allocated.
+ * %NLM4_STALE_FH: The request specified an invalid file handle.
+ * %NLM4_FBIG: The request specified a length or offset
+ * that exceeds the range supported by the
+ * server.
+ * %NLM4_FAILED: The request failed for an unspecified reason.
+ */
+static __be32 nlm4svc_proc_unshare(struct svc_rqst *rqstp)
+{
+ struct nlm4_shareargs_wrapper *argp = rqstp->rq_argp;
+ struct nlm4_shareres_wrapper *resp = rqstp->rq_resp;
+ struct nlm_lock *lock = &argp->lock;
+ struct nlm4_lock xdr_lock = {
+ .fh = argp->xdrgen.share.fh,
+ .oh = argp->xdrgen.share.oh,
+ .svid = LOCKD_SHARE_SVID,
+ };
+ struct nlm_host *host = NULL;
+ struct nlm_file *file = NULL;
+
+ resp->xdrgen.cookie = argp->xdrgen.cookie;
+
+ resp->xdrgen.stat = nlm_lck_denied_grace_period;
+ if (locks_in_grace(SVC_NET(rqstp)))
+ goto out;
+
+ resp->xdrgen.stat = nlm_lck_denied_nolocks;
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.share.caller_name, true);
+ if (!host)
+ goto out;
+
+ resp->xdrgen.stat = nlm4svc_lookup_file(rqstp, host, lock, &file,
+ &xdr_lock, F_RDLCK);
+ if (resp->xdrgen.stat)
+ goto out;
+
+ resp->xdrgen.stat = nlmsvc_unshare_file(host, file, &lock->oh);
+
+ nlmsvc_release_lockowner(lock);
+
+out:
+ if (file)
+ nlm_release_file(file);
+ nlmsvc_release_host(host);
+ return resp->xdrgen.stat == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
+}
- nlmsvc_grant_reply(&argp->cookie, argp->status);
- return rpc_success;
+/**
+ * nlm4svc_proc_nm_lock - NM_LOCK: Establish a non-monitored lock
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ * %rpc_drop_reply: Do not send an RPC reply.
+ *
+ * RPC synopsis:
+ * nlm4_res NLMPROC4_NM_LOCK(nlm4_lockargs) = 22;
+ *
+ * Permissible procedure status codes:
+ * %NLM4_GRANTED: The requested lock was granted.
+ * %NLM4_DENIED: The requested lock conflicted with existing
+ * lock reservations for the file.
+ * %NLM4_DENIED_NOLOCKS: The server could not allocate the resources
+ * needed to process the request.
+ * %NLM4_BLOCKED: The blocking request cannot be granted
+ * immediately. The server will send an
+ * NLMPROC4_GRANTED callback to the client when
+ * the lock can be granted.
+ * %NLM4_DENIED_GRACE_PERIOD: The server has recently restarted and is
+ * re-establishing existing locks, and is not
+ * yet ready to accept normal service requests.
+ *
+ * The Linux NLM server implementation also returns:
+ * %NLM4_DEADLCK: The request could not be granted and
+ * blocking would cause a deadlock.
+ * %NLM4_STALE_FH: The request specified an invalid file handle.
+ * %NLM4_FBIG: The request specified a length or offset
+ * that exceeds the range supported by the
+ * server.
+ * %NLM4_FAILED: The request failed for an unspecified reason.
+ */
+static __be32 nlm4svc_proc_nm_lock(struct svc_rqst *rqstp)
+{
+ return nlm4svc_do_lock(rqstp, false);
}
-static __be32
-nlm4svc_proc_unused(struct svc_rqst *rqstp)
+/**
+ * nlm4svc_proc_free_all - FREE_ALL: Discard client's lock and share state
+ * @rqstp: RPC transaction context
+ *
+ * Returns:
+ * %rpc_success: RPC executed successfully.
+ *
+ * RPC synopsis:
+ * void NLMPROC4_FREE_ALL(nlm4_notify) = 23;
+ */
+static __be32 nlm4svc_proc_free_all(struct svc_rqst *rqstp)
{
- return rpc_proc_unavail;
+ struct nlm4_notify_wrapper *argp = rqstp->rq_argp;
+ struct nlm_host *host;
+
+ host = nlm4svc_lookup_host(rqstp, argp->xdrgen.name, false);
+ if (!host)
+ goto out;
+
+ nlmsvc_free_host_resources(host);
+
+ nlmsvc_release_host(host);
+
+out:
+ return rpc_success;
}
/*
- * NLM Server procedures.
+ * NLMv4 Server procedures.
*/
-struct nlm_void { int dummy; };
-
-#define Ck (1+XDR_QUADLEN(NLM_MAXCOOKIELEN)) /* cookie */
-#define No (1+1024/4) /* netobj */
-#define St 1 /* status */
-#define Rg 4 /* range (offset + length) */
-
-const struct svc_procedure nlmsvc_procedures4[24] = {
- [NLMPROC_NULL] = {
- .pc_func = nlm4svc_proc_null,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_void),
- .pc_argzero = sizeof(struct nlm_void),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "NULL",
+static const struct svc_procedure nlm4svc_procedures[24] = {
+ [NLMPROC4_NULL] = {
+ .pc_func = nlm4svc_proc_null,
+ .pc_decode = nlm4_svc_decode_void,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = XDR_void,
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "NULL",
},
- [NLMPROC_TEST] = {
- .pc_func = nlm4svc_proc_test,
- .pc_decode = nlm4svc_decode_testargs,
- .pc_encode = nlm4svc_encode_testres,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St+2+No+Rg,
- .pc_name = "TEST",
+ [NLMPROC4_TEST] = {
+ .pc_func = nlm4svc_proc_test,
+ .pc_decode = nlm4_svc_decode_nlm4_testargs,
+ .pc_encode = nlm4_svc_encode_nlm4_testres,
+ .pc_argsize = sizeof(struct nlm4_testargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_testres_wrapper),
+ .pc_xdrressize = NLM4_nlm4_testres_sz,
+ .pc_name = "TEST",
},
- [NLMPROC_LOCK] = {
- .pc_func = nlm4svc_proc_lock,
- .pc_decode = nlm4svc_decode_lockargs,
- .pc_encode = nlm4svc_encode_res,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St,
- .pc_name = "LOCK",
+ [NLMPROC4_LOCK] = {
+ .pc_func = nlm4svc_proc_lock,
+ .pc_decode = nlm4_svc_decode_nlm4_lockargs,
+ .pc_encode = nlm4_svc_encode_nlm4_res,
+ .pc_argsize = sizeof(struct nlm4_lockargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_res_wrapper),
+ .pc_xdrressize = NLM4_nlm4_res_sz,
+ .pc_name = "LOCK",
},
- [NLMPROC_CANCEL] = {
- .pc_func = nlm4svc_proc_cancel,
- .pc_decode = nlm4svc_decode_cancargs,
- .pc_encode = nlm4svc_encode_res,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St,
- .pc_name = "CANCEL",
+ [NLMPROC4_CANCEL] = {
+ .pc_func = nlm4svc_proc_cancel,
+ .pc_decode = nlm4_svc_decode_nlm4_cancargs,
+ .pc_encode = nlm4_svc_encode_nlm4_res,
+ .pc_argsize = sizeof(struct nlm4_cancargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_res_wrapper),
+ .pc_xdrressize = NLM4_nlm4_res_sz,
+ .pc_name = "CANCEL",
},
- [NLMPROC_UNLOCK] = {
- .pc_func = nlm4svc_proc_unlock,
- .pc_decode = nlm4svc_decode_unlockargs,
- .pc_encode = nlm4svc_encode_res,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St,
- .pc_name = "UNLOCK",
+ [NLMPROC4_UNLOCK] = {
+ .pc_func = nlm4svc_proc_unlock,
+ .pc_decode = nlm4_svc_decode_nlm4_unlockargs,
+ .pc_encode = nlm4_svc_encode_nlm4_res,
+ .pc_argsize = sizeof(struct nlm4_unlockargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_res_wrapper),
+ .pc_xdrressize = NLM4_nlm4_res_sz,
+ .pc_name = "UNLOCK",
},
- [NLMPROC_GRANTED] = {
- .pc_func = nlm4svc_proc_granted,
- .pc_decode = nlm4svc_decode_testargs,
- .pc_encode = nlm4svc_encode_res,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St,
- .pc_name = "GRANTED",
+ [NLMPROC4_GRANTED] = {
+ .pc_func = nlm4svc_proc_granted,
+ .pc_decode = nlm4_svc_decode_nlm4_testargs,
+ .pc_encode = nlm4_svc_encode_nlm4_res,
+ .pc_argsize = sizeof(struct nlm4_testargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_res_wrapper),
+ .pc_xdrressize = NLM4_nlm4_res_sz,
+ .pc_name = "GRANTED",
},
- [NLMPROC_TEST_MSG] = {
- .pc_func = nlm4svc_proc_test_msg,
- .pc_decode = nlm4svc_decode_testargs,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "TEST_MSG",
+ [NLMPROC4_TEST_MSG] = {
+ .pc_func = nlm4svc_proc_test_msg,
+ .pc_decode = nlm4_svc_decode_nlm4_testargs,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_testargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "TEST_MSG",
},
- [NLMPROC_LOCK_MSG] = {
- .pc_func = nlm4svc_proc_lock_msg,
- .pc_decode = nlm4svc_decode_lockargs,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "LOCK_MSG",
+ [NLMPROC4_LOCK_MSG] = {
+ .pc_func = nlm4svc_proc_lock_msg,
+ .pc_decode = nlm4_svc_decode_nlm4_lockargs,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_lockargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "LOCK_MSG",
},
- [NLMPROC_CANCEL_MSG] = {
- .pc_func = nlm4svc_proc_cancel_msg,
- .pc_decode = nlm4svc_decode_cancargs,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "CANCEL_MSG",
+ [NLMPROC4_CANCEL_MSG] = {
+ .pc_func = nlm4svc_proc_cancel_msg,
+ .pc_decode = nlm4_svc_decode_nlm4_cancargs,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_cancargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "CANCEL_MSG",
},
- [NLMPROC_UNLOCK_MSG] = {
- .pc_func = nlm4svc_proc_unlock_msg,
- .pc_decode = nlm4svc_decode_unlockargs,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "UNLOCK_MSG",
+ [NLMPROC4_UNLOCK_MSG] = {
+ .pc_func = nlm4svc_proc_unlock_msg,
+ .pc_decode = nlm4_svc_decode_nlm4_unlockargs,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_unlockargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "UNLOCK_MSG",
},
- [NLMPROC_GRANTED_MSG] = {
- .pc_func = nlm4svc_proc_granted_msg,
- .pc_decode = nlm4svc_decode_testargs,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "GRANTED_MSG",
+ [NLMPROC4_GRANTED_MSG] = {
+ .pc_func = nlm4svc_proc_granted_msg,
+ .pc_decode = nlm4_svc_decode_nlm4_testargs,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_testargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "GRANTED_MSG",
},
- [NLMPROC_TEST_RES] = {
- .pc_func = nlm4svc_proc_null,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_res),
- .pc_argzero = sizeof(struct nlm_res),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "TEST_RES",
+ [NLMPROC4_TEST_RES] = {
+ .pc_func = nlm4svc_proc_null,
+ .pc_decode = nlm4_svc_decode_nlm4_testres,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_testres),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "TEST_RES",
},
- [NLMPROC_LOCK_RES] = {
- .pc_func = nlm4svc_proc_null,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_res),
- .pc_argzero = sizeof(struct nlm_res),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "LOCK_RES",
+ [NLMPROC4_LOCK_RES] = {
+ .pc_func = nlm4svc_proc_null,
+ .pc_decode = nlm4_svc_decode_nlm4_res,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_res),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "LOCK_RES",
},
- [NLMPROC_CANCEL_RES] = {
- .pc_func = nlm4svc_proc_null,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_res),
- .pc_argzero = sizeof(struct nlm_res),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "CANCEL_RES",
+ [NLMPROC4_CANCEL_RES] = {
+ .pc_func = nlm4svc_proc_null,
+ .pc_decode = nlm4_svc_decode_nlm4_res,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_res),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "CANCEL_RES",
},
- [NLMPROC_UNLOCK_RES] = {
- .pc_func = nlm4svc_proc_null,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_res),
- .pc_argzero = sizeof(struct nlm_res),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "UNLOCK_RES",
+ [NLMPROC4_UNLOCK_RES] = {
+ .pc_func = nlm4svc_proc_null,
+ .pc_decode = nlm4_svc_decode_nlm4_res,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_res),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "UNLOCK_RES",
},
- [NLMPROC_GRANTED_RES] = {
- .pc_func = nlm4svc_proc_granted_res,
- .pc_decode = nlm4svc_decode_res,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_res),
- .pc_argzero = sizeof(struct nlm_res),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "GRANTED_RES",
+ [NLMPROC4_GRANTED_RES] = {
+ .pc_func = nlm4svc_proc_granted_res,
+ .pc_decode = nlm4_svc_decode_nlm4_res,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_res_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "GRANTED_RES",
},
- [NLMPROC_NSM_NOTIFY] = {
- .pc_func = nlm4svc_proc_sm_notify,
- .pc_decode = nlm4svc_decode_reboot,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_reboot),
- .pc_argzero = sizeof(struct nlm_reboot),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "SM_NOTIFY",
+ [NLMPROC4_SM_NOTIFY] = {
+ .pc_func = nlm4svc_proc_sm_notify,
+ .pc_decode = nlm4_svc_decode_nlm4_notifyargs,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_notifyargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "SM_NOTIFY",
},
[17] = {
- .pc_func = nlm4svc_proc_unused,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_void),
- .pc_argzero = sizeof(struct nlm_void),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = 0,
- .pc_name = "UNUSED",
+ .pc_func = nlm4svc_proc_unused,
+ .pc_decode = nlm4_svc_decode_void,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = 0,
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "UNUSED",
},
[18] = {
- .pc_func = nlm4svc_proc_unused,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_void),
- .pc_argzero = sizeof(struct nlm_void),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = 0,
- .pc_name = "UNUSED",
+ .pc_func = nlm4svc_proc_unused,
+ .pc_decode = nlm4_svc_decode_void,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = 0,
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "UNUSED",
},
[19] = {
- .pc_func = nlm4svc_proc_unused,
- .pc_decode = nlm4svc_decode_void,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_void),
- .pc_argzero = sizeof(struct nlm_void),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = 0,
- .pc_name = "UNUSED",
+ .pc_func = nlm4svc_proc_unused,
+ .pc_decode = nlm4_svc_decode_void,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = 0,
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "UNUSED",
},
- [NLMPROC_SHARE] = {
- .pc_func = nlm4svc_proc_share,
- .pc_decode = nlm4svc_decode_shareargs,
- .pc_encode = nlm4svc_encode_shareres,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St+1,
- .pc_name = "SHARE",
+ [NLMPROC4_SHARE] = {
+ .pc_func = nlm4svc_proc_share,
+ .pc_decode = nlm4_svc_decode_nlm4_shareargs,
+ .pc_encode = nlm4_svc_encode_nlm4_shareres,
+ .pc_argsize = sizeof(struct nlm4_shareargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_shareres_wrapper),
+ .pc_xdrressize = NLM4_nlm4_shareres_sz,
+ .pc_name = "SHARE",
},
- [NLMPROC_UNSHARE] = {
- .pc_func = nlm4svc_proc_unshare,
- .pc_decode = nlm4svc_decode_shareargs,
- .pc_encode = nlm4svc_encode_shareres,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St+1,
- .pc_name = "UNSHARE",
+ [NLMPROC4_UNSHARE] = {
+ .pc_func = nlm4svc_proc_unshare,
+ .pc_decode = nlm4_svc_decode_nlm4_shareargs,
+ .pc_encode = nlm4_svc_encode_nlm4_shareres,
+ .pc_argsize = sizeof(struct nlm4_shareargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_shareres_wrapper),
+ .pc_xdrressize = NLM4_nlm4_shareres_sz,
+ .pc_name = "UNSHARE",
},
- [NLMPROC_NM_LOCK] = {
- .pc_func = nlm4svc_proc_nm_lock,
- .pc_decode = nlm4svc_decode_lockargs,
- .pc_encode = nlm4svc_encode_res,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_res),
- .pc_xdrressize = Ck+St,
- .pc_name = "NM_LOCK",
+ [NLMPROC4_NM_LOCK] = {
+ .pc_func = nlm4svc_proc_nm_lock,
+ .pc_decode = nlm4_svc_decode_nlm4_lockargs,
+ .pc_encode = nlm4_svc_encode_nlm4_res,
+ .pc_argsize = sizeof(struct nlm4_lockargs_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = sizeof(struct nlm4_res_wrapper),
+ .pc_xdrressize = NLM4_nlm4_res_sz,
+ .pc_name = "NM_LOCK",
},
- [NLMPROC_FREE_ALL] = {
- .pc_func = nlm4svc_proc_free_all,
- .pc_decode = nlm4svc_decode_notify,
- .pc_encode = nlm4svc_encode_void,
- .pc_argsize = sizeof(struct nlm_args),
- .pc_argzero = sizeof(struct nlm_args),
- .pc_ressize = sizeof(struct nlm_void),
- .pc_xdrressize = St,
- .pc_name = "FREE_ALL",
+ [NLMPROC4_FREE_ALL] = {
+ .pc_func = nlm4svc_proc_free_all,
+ .pc_decode = nlm4_svc_decode_nlm4_notify,
+ .pc_encode = nlm4_svc_encode_void,
+ .pc_argsize = sizeof(struct nlm4_notify_wrapper),
+ .pc_argzero = 0,
+ .pc_ressize = 0,
+ .pc_xdrressize = XDR_void,
+ .pc_name = "FREE_ALL",
},
};
+
+/*
+ * Storage requirements for XDR arguments and results
+ */
+union nlm4svc_xdrstore {
+ struct nlm4_testargs_wrapper testargs;
+ struct nlm4_lockargs_wrapper lockargs;
+ struct nlm4_cancargs_wrapper cancargs;
+ struct nlm4_unlockargs_wrapper unlockargs;
+ struct nlm4_notifyargs_wrapper notifyargs;
+ struct nlm4_shareargs_wrapper shareargs;
+ struct nlm4_notify_wrapper notify;
+ struct nlm4_testres_wrapper testres;
+ struct nlm4_res_wrapper res;
+ struct nlm4_shareres_wrapper shareres;
+};
+
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nlm4svc_call_counters[ARRAY_SIZE(nlm4svc_procedures)]);
+
+const struct svc_version nlmsvc_version4 = {
+ .vs_vers = 4,
+ .vs_nproc = ARRAY_SIZE(nlm4svc_procedures),
+ .vs_proc = nlm4svc_procedures,
+ .vs_count = nlm4svc_call_counters,
+ .vs_dispatch = nlmsvc_dispatch,
+ .vs_xdrsize = sizeof(union nlm4svc_xdrstore),
+};
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 0b6be8b8aeb1..b98b1d0ada35 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -28,16 +28,10 @@
#include <linux/sched.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc_xprt.h>
-#include <linux/lockd/nlm.h>
-#include <linux/lockd/lockd.h>
-#define NLMDBG_FACILITY NLMDBG_SVCLOCK
+#include "lockd.h"
-#ifdef CONFIG_LOCKD_V4
-#define nlm_deadlock nlm4_deadlock
-#else
-#define nlm_deadlock nlm_lck_denied
-#endif
+#define NLMDBG_FACILITY NLMDBG_SVCLOCK
static void nlmsvc_release_block(struct nlm_block *block);
static void nlmsvc_insert_block(struct nlm_block *block, unsigned long);
@@ -80,6 +74,11 @@ static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
return buf;
}
+#else
+static inline const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
+{
+ return "???";
+}
#endif
/*
@@ -463,7 +462,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
block->b_deferred_req =
rqstp->rq_chandle.defer(block->b_cache_req);
if (block->b_deferred_req != NULL)
- status = nlm_drop_reply;
+ status = nlm__int__drop_reply;
}
dprintk("lockd: nlmsvc_defer_lock_rqst block %p flags %d status %d\n",
block, block->b_flags, ntohl(status));
@@ -531,7 +530,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
ret = nlm_lck_denied;
goto out;
}
- ret = nlm_drop_reply;
+ ret = nlm__int__drop_reply;
goto out;
}
@@ -589,7 +588,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
case -EDEADLK:
nlmsvc_remove_block(block);
- ret = nlm_deadlock;
+ ret = nlm__int__deadlock;
goto out;
default: /* includes ENOLCK */
nlmsvc_remove_block(block);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 5817ef272332..749abf8886ba 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -10,39 +10,52 @@
#include <linux/types.h>
#include <linux/time.h>
-#include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
#include <linux/sunrpc/svc_xprt.h>
+#include "lockd.h"
+#include "share.h"
+
#define NLMDBG_FACILITY NLMDBG_CLIENT
#ifdef CONFIG_LOCKD_V4
-static __be32
-cast_to_nlm(__be32 status, u32 vers)
+static inline __be32 cast_status(__be32 status)
{
- /* Note: status is assumed to be in network byte order !!! */
- if (vers != 4){
- switch (status) {
- case nlm_granted:
- case nlm_lck_denied:
- case nlm_lck_denied_nolocks:
- case nlm_lck_blocked:
- case nlm_lck_denied_grace_period:
- case nlm_drop_reply:
- break;
- case nlm4_deadlock:
- status = nlm_lck_denied;
- break;
- default:
- status = nlm_lck_denied_nolocks;
- }
+ switch (status) {
+ case nlm_granted:
+ case nlm_lck_denied:
+ case nlm_lck_denied_nolocks:
+ case nlm_lck_blocked:
+ case nlm_lck_denied_grace_period:
+ case nlm__int__drop_reply:
+ break;
+ case nlm__int__deadlock:
+ status = nlm_lck_denied;
+ break;
+ default:
+ status = nlm_lck_denied_nolocks;
}
- return (status);
+ return status;
}
-#define cast_status(status) (cast_to_nlm(status, rqstp->rq_vers))
#else
-#define cast_status(status) (status)
+static inline __be32 cast_status(__be32 status)
+{
+ switch (status) {
+ case nlm__int__deadlock:
+ status = nlm_lck_denied;
+ break;
+ case nlm__int__stale_fh:
+ case nlm__int__failed:
+ status = nlm_lck_denied_nolocks;
+ break;
+ default:
+ if (be32_to_cpu(status) >= 30000)
+ pr_warn_once("lockd: unhandled internal status %u\n",
+ be32_to_cpu(status));
+ break;
+ }
+ return status;
+}
#endif
/*
@@ -124,12 +137,13 @@ __nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_res *resp)
/* Obtain client and file */
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
/* Now check for conflicting locks */
resp->status = cast_status(nlmsvc_testlock(rqstp, file, host,
&argp->lock, &resp->lock));
- if (resp->status == nlm_drop_reply)
+ if (resp->status == nlm__int__drop_reply)
rc = rpc_drop_reply;
else
dprintk("lockd: TEST status %d vers %d\n",
@@ -161,13 +175,14 @@ __nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_res *resp)
/* Obtain client and file */
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
/* Now try to lock the file */
resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
argp->block, &argp->cookie,
argp->reclaim));
- if (resp->status == nlm_drop_reply)
+ if (resp->status == nlm__int__drop_reply)
rc = rpc_drop_reply;
else
dprintk("lockd: LOCK status %d\n", ntohl(resp->status));
@@ -204,7 +219,8 @@ __nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_res *resp)
/* Obtain client and file */
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
/* Try to cancel request. */
resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
@@ -245,7 +261,8 @@ __nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_res *resp)
/* Obtain client and file */
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
/* Now try to remove the lock */
resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
@@ -402,10 +419,13 @@ nlmsvc_proc_share(struct svc_rqst *rqstp)
/* Obtain client and file */
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
/* Now try to create the share */
- resp->status = cast_status(nlmsvc_share_file(host, file, argp));
+ resp->status = cast_status(nlmsvc_share_file(host, file, &argp->lock.oh,
+ argp->fsm_access,
+ argp->fsm_mode));
dprintk("lockd: SHARE status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
@@ -437,10 +457,12 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp)
/* Obtain client and file */
if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
- return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
+ return resp->status == nlm__int__drop_reply ?
+ rpc_drop_reply : rpc_success;
/* Now try to unshare the file */
- resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
+ resp->status = cast_status(nlmsvc_unshare_file(host, file,
+ &argp->lock.oh));
dprintk("lockd: UNSHARE status %d\n", ntohl(resp->status));
nlmsvc_release_lockowner(&argp->lock);
@@ -536,7 +558,7 @@ struct nlm_void { int dummy; };
#define No (1+1024/4) /* Net Obj */
#define Rg 2 /* range - offset + size */
-const struct svc_procedure nlmsvc_procedures[24] = {
+static const struct svc_procedure nlmsvc_procedures[24] = {
[NLMPROC_NULL] = {
.pc_func = nlmsvc_proc_null,
.pc_decode = nlmsvc_decode_void,
@@ -778,3 +800,39 @@ const struct svc_procedure nlmsvc_procedures[24] = {
.pc_name = "FREE_ALL",
},
};
+
+/*
+ * Storage requirements for XDR arguments and results
+ */
+union nlmsvc_xdrstore {
+ struct nlm_args args;
+ struct nlm_res res;
+ struct nlm_reboot reboot;
+};
+
+/*
+ * NLMv1 defines only procedures 1 - 15. Linux lockd also implements
+ * procedures 0 (NULL) and 16 (SM_NOTIFY).
+ */
+static DEFINE_PER_CPU_ALIGNED(unsigned long, nlm1svc_call_counters[17]);
+
+const struct svc_version nlmsvc_version1 = {
+ .vs_vers = 1,
+ .vs_nproc = 17,
+ .vs_proc = nlmsvc_procedures,
+ .vs_count = nlm1svc_call_counters,
+ .vs_dispatch = nlmsvc_dispatch,
+ .vs_xdrsize = sizeof(union nlmsvc_xdrstore),
+};
+
+static DEFINE_PER_CPU_ALIGNED(unsigned long,
+ nlm3svc_call_counters[ARRAY_SIZE(nlmsvc_procedures)]);
+
+const struct svc_version nlmsvc_version3 = {
+ .vs_vers = 3,
+ .vs_nproc = ARRAY_SIZE(nlmsvc_procedures),
+ .vs_proc = nlmsvc_procedures,
+ .vs_count = nlm3svc_call_counters,
+ .vs_dispatch = nlmsvc_dispatch,
+ .vs_xdrsize = sizeof(union nlmsvc_xdrstore),
+};
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index 88c81ce1148d..53f5655c128c 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -14,8 +14,9 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
-#include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
+
+#include "lockd.h"
+#include "share.h"
static inline int
nlm_cmp_owner(struct nlm_share *share, struct xdr_netobj *oh)
@@ -24,12 +25,21 @@ nlm_cmp_owner(struct nlm_share *share, struct xdr_netobj *oh)
&& !memcmp(share->s_owner.data, oh->data, oh->len);
}
+/**
+ * nlmsvc_share_file - create a share
+ * @host: Network client peer
+ * @file: File to be shared
+ * @oh: Share owner handle
+ * @access: Requested access mode
+ * @mode: Requested file sharing mode
+ *
+ * Returns an NLM status code.
+ */
__be32
nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
- struct nlm_args *argp)
+ struct xdr_netobj *oh, u32 access, u32 mode)
{
struct nlm_share *share;
- struct xdr_netobj *oh = &argp->lock.oh;
u8 *ohdata;
if (nlmsvc_file_cannot_lock(file))
@@ -38,13 +48,11 @@ nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
for (share = file->f_shares; share; share = share->s_next) {
if (share->s_host == host && nlm_cmp_owner(share, oh))
goto update;
- if ((argp->fsm_access & share->s_mode)
- || (argp->fsm_mode & share->s_access ))
+ if ((access & share->s_mode) || (mode & share->s_access))
return nlm_lck_denied;
}
- share = kmalloc(sizeof(*share) + oh->len,
- GFP_KERNEL);
+ share = kmalloc(sizeof(*share) + oh->len, GFP_KERNEL);
if (share == NULL)
return nlm_lck_denied_nolocks;
@@ -60,20 +68,24 @@ nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
file->f_shares = share;
update:
- share->s_access = argp->fsm_access;
- share->s_mode = argp->fsm_mode;
+ share->s_access = access;
+ share->s_mode = mode;
return nlm_granted;
}
-/*
- * Delete a share.
+/**
+ * nlmsvc_unshare_file - delete a share
+ * @host: Network client peer
+ * @file: File to be unshared
+ * @oh: Share owner handle
+ *
+ * Returns an NLM status code.
*/
__be32
nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
- struct nlm_args *argp)
+ struct xdr_netobj *oh)
{
struct nlm_share *share, **shpp;
- struct xdr_netobj *oh = &argp->lock.oh;
if (nlmsvc_file_cannot_lock(file))
return nlm_lck_denied_nolocks;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 79f3dd2fd366..344e6c187cde 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -15,12 +15,13 @@
#include <linux/mutex.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/addr.h>
-#include <linux/lockd/lockd.h>
-#include <linux/lockd/share.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <uapi/linux/nfs2.h>
+#include "lockd.h"
+#include "share.h"
+
#define NLMDBG_FACILITY NLMDBG_SVCSUBS
@@ -87,14 +88,29 @@ static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
struct nlm_file *file, int mode)
{
struct file **fp = &file->f_file[mode];
- __be32 nfserr;
+ __be32 nlmerr = nlm_granted;
+ int error;
if (*fp)
- return 0;
- nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
- if (nfserr)
- dprintk("lockd: open failed (error %d)\n", nfserr);
- return nfserr;
+ return nlmerr;
+
+ error = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
+ if (error) {
+ dprintk("lockd: open failed (errno %d)\n", error);
+ switch (error) {
+ case -EWOULDBLOCK:
+ nlmerr = nlm__int__drop_reply;
+ break;
+ case -ESTALE:
+ nlmerr = nlm__int__stale_fh;
+ break;
+ default:
+ nlmerr = nlm__int__failed;
+ break;
+ }
+ }
+
+ return nlmerr;
}
/*
diff --git a/fs/lockd/trace.h b/fs/lockd/trace.h
index 7461b13b6e74..7214d7e96a42 100644
--- a/fs/lockd/trace.h
+++ b/fs/lockd/trace.h
@@ -8,7 +8,8 @@
#include <linux/tracepoint.h>
#include <linux/crc32.h>
#include <linux/nfs.h>
-#include <linux/lockd/lockd.h>
+
+#include "lockd.h"
#ifdef CONFIG_LOCKD_V4
#define NLM_STATUS_LIST \
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index adfcce2bf11b..dfca8b8dab73 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -15,13 +15,13 @@
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
#include <uapi/linux/nfs2.h>
+#include "lockd.h"
+#include "share.h"
#include "svcxdr.h"
-
static inline loff_t
s32_to_loff_t(__s32 offset)
{
@@ -275,7 +275,7 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
- lock->svid = ~(u32)0;
+ lock->svid = LOCKD_SHARE_SVID;
if (!svcxdr_decode_cookie(xdr, &argp->cookie))
return false;
diff --git a/include/linux/lockd/xdr.h b/fs/lockd/xdr.h
index 17d53165d9f2..3c60817c4349 100644
--- a/include/linux/lockd/xdr.h
+++ b/fs/lockd/xdr.h
@@ -1,14 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * linux/include/linux/lockd/xdr.h
- *
* XDR types for the NLM protocol
*
* Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
*/
-#ifndef LOCKD_XDR_H
-#define LOCKD_XDR_H
+#ifndef _LOCKD_XDR_H
+#define _LOCKD_XDR_H
#include <linux/fs.h>
#include <linux/filelock.h>
@@ -33,8 +31,6 @@ struct svc_rqst;
#define nlm_lck_blocked cpu_to_be32(NLM_LCK_BLOCKED)
#define nlm_lck_denied_grace_period cpu_to_be32(NLM_LCK_DENIED_GRACE_PERIOD)
-#define nlm_drop_reply cpu_to_be32(30000)
-
/* Lock info passed via NLM */
struct nlm_lock {
char * caller;
@@ -92,11 +88,6 @@ struct nlm_reboot {
struct nsm_private priv;
};
-/*
- * Contents of statd callback when monitored host rebooted
- */
-#define NLMSVC_XDRSIZE sizeof(struct nlm_args)
-
bool nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
bool nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
bool nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
@@ -112,4 +103,4 @@ bool nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
bool nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
bool nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-#endif /* LOCKD_XDR_H */
+#endif /* _LOCKD_XDR_H */
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
deleted file mode 100644
index e343c820301f..000000000000
--- a/fs/lockd/xdr4.c
+++ /dev/null
@@ -1,347 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/fs/lockd/xdr4.c
- *
- * XDR support for lockd and the lock client.
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
- * Copyright (C) 1999, Trond Myklebust <trond.myklebust@fys.uio.no>
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/nfs.h>
-
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/stats.h>
-#include <linux/lockd/lockd.h>
-
-#include "svcxdr.h"
-
-static inline s64
-loff_t_to_s64(loff_t offset)
-{
- s64 res;
- if (offset > NLM4_OFFSET_MAX)
- res = NLM4_OFFSET_MAX;
- else if (offset < -NLM4_OFFSET_MAX)
- res = -NLM4_OFFSET_MAX;
- else
- res = offset;
- return res;
-}
-
-void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len)
-{
- s64 end = off + len - 1;
-
- fl->fl_start = off;
- if (len == 0 || end < 0)
- fl->fl_end = OFFSET_MAX;
- else
- fl->fl_end = end;
-}
-
-/*
- * NLM file handles are defined by specification to be a variable-length
- * XDR opaque no longer than 1024 bytes. However, this implementation
- * limits their length to the size of an NFSv3 file handle.
- */
-static bool
-svcxdr_decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
-{
- __be32 *p;
- u32 len;
-
- if (xdr_stream_decode_u32(xdr, &len) < 0)
- return false;
- if (len > NFS_MAXFHSIZE)
- return false;
-
- p = xdr_inline_decode(xdr, len);
- if (!p)
- return false;
- fh->size = len;
- memcpy(fh->data, p, len);
- memset(fh->data + len, 0, sizeof(fh->data) - len);
-
- return true;
-}
-
-static bool
-svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock)
-{
- struct file_lock *fl = &lock->fl;
-
- if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
- return false;
- if (!svcxdr_decode_fhandle(xdr, &lock->fh))
- return false;
- if (!svcxdr_decode_owner(xdr, &lock->oh))
- return false;
- if (xdr_stream_decode_u32(xdr, &lock->svid) < 0)
- return false;
- if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0)
- return false;
- if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0)
- return false;
-
- locks_init_lock(fl);
- fl->c.flc_type = F_RDLCK;
- nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len);
- return true;
-}
-
-static bool
-svcxdr_encode_holder(struct xdr_stream *xdr, const struct nlm_lock *lock)
-{
- const struct file_lock *fl = &lock->fl;
- s64 start, len;
-
- /* exclusive */
- if (xdr_stream_encode_bool(xdr, fl->c.flc_type != F_RDLCK) < 0)
- return false;
- if (xdr_stream_encode_u32(xdr, lock->svid) < 0)
- return false;
- if (!svcxdr_encode_owner(xdr, &lock->oh))
- return false;
- start = loff_t_to_s64(fl->fl_start);
- if (fl->fl_end == OFFSET_MAX)
- len = 0;
- else
- len = loff_t_to_s64(fl->fl_end - fl->fl_start + 1);
- if (xdr_stream_encode_u64(xdr, start) < 0)
- return false;
- if (xdr_stream_encode_u64(xdr, len) < 0)
- return false;
-
- return true;
-}
-
-static bool
-svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
-{
- if (!svcxdr_encode_stats(xdr, resp->status))
- return false;
- switch (resp->status) {
- case nlm_lck_denied:
- if (!svcxdr_encode_holder(xdr, &resp->lock))
- return false;
- }
-
- return true;
-}
-
-
-/*
- * Decode Call arguments
- */
-
-bool
-nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- return true;
-}
-
-bool
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_args *argp = rqstp->rq_argp;
- u32 exclusive;
-
- if (!svcxdr_decode_cookie(xdr, &argp->cookie))
- return false;
- if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
- return false;
- if (!svcxdr_decode_lock(xdr, &argp->lock))
- return false;
- if (exclusive)
- argp->lock.fl.c.flc_type = F_WRLCK;
-
- return true;
-}
-
-bool
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_args *argp = rqstp->rq_argp;
- u32 exclusive;
-
- if (!svcxdr_decode_cookie(xdr, &argp->cookie))
- return false;
- if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
- return false;
- if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
- return false;
- if (!svcxdr_decode_lock(xdr, &argp->lock))
- return false;
- if (exclusive)
- argp->lock.fl.c.flc_type = F_WRLCK;
- if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
- return false;
- if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
- return false;
- argp->monitor = 1; /* monitor client by default */
-
- return true;
-}
-
-bool
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_args *argp = rqstp->rq_argp;
- u32 exclusive;
-
- if (!svcxdr_decode_cookie(xdr, &argp->cookie))
- return false;
- if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
- return false;
- if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
- return false;
- if (!svcxdr_decode_lock(xdr, &argp->lock))
- return false;
- if (exclusive)
- argp->lock.fl.c.flc_type = F_WRLCK;
-
- return true;
-}
-
-bool
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_args *argp = rqstp->rq_argp;
-
- if (!svcxdr_decode_cookie(xdr, &argp->cookie))
- return false;
- if (!svcxdr_decode_lock(xdr, &argp->lock))
- return false;
- argp->lock.fl.c.flc_type = F_UNLCK;
-
- return true;
-}
-
-bool
-nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_res *resp = rqstp->rq_argp;
-
- if (!svcxdr_decode_cookie(xdr, &resp->cookie))
- return false;
- if (!svcxdr_decode_stats(xdr, &resp->status))
- return false;
-
- return true;
-}
-
-bool
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_reboot *argp = rqstp->rq_argp;
- __be32 *p;
- u32 len;
-
- if (xdr_stream_decode_u32(xdr, &len) < 0)
- return false;
- if (len > SM_MAXSTRLEN)
- return false;
- p = xdr_inline_decode(xdr, len);
- if (!p)
- return false;
- argp->len = len;
- argp->mon = (char *)p;
- if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
- return false;
- p = xdr_inline_decode(xdr, SM_PRIV_SIZE);
- if (!p)
- return false;
- memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
-
- return true;
-}
-
-bool
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_lock *lock = &argp->lock;
-
- locks_init_lock(&lock->fl);
- lock->svid = ~(u32)0;
-
- if (!svcxdr_decode_cookie(xdr, &argp->cookie))
- return false;
- if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
- return false;
- if (!svcxdr_decode_fhandle(xdr, &lock->fh))
- return false;
- if (!svcxdr_decode_owner(xdr, &lock->oh))
- return false;
- /* XXX: Range checks are missing in the original code */
- if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0)
- return false;
- if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0)
- return false;
-
- return true;
-}
-
-bool
-nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_args *argp = rqstp->rq_argp;
- struct nlm_lock *lock = &argp->lock;
-
- if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
- return false;
- if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
- return false;
-
- return true;
-}
-
-
-/*
- * Encode Reply results
- */
-
-bool
-nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- return true;
-}
-
-bool
-nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_res *resp = rqstp->rq_resp;
-
- return svcxdr_encode_cookie(xdr, &resp->cookie) &&
- svcxdr_encode_testrply(xdr, resp);
-}
-
-bool
-nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_res *resp = rqstp->rq_resp;
-
- return svcxdr_encode_cookie(xdr, &resp->cookie) &&
- svcxdr_encode_stats(xdr, resp->status);
-}
-
-bool
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
-{
- struct nlm_res *resp = rqstp->rq_resp;
-
- if (!svcxdr_encode_cookie(xdr, &resp->cookie))
- return false;
- if (!svcxdr_encode_stats(xdr, resp->status))
- return false;
- /* sequence */
- if (xdr_stream_encode_u32(xdr, 0) < 0)
- return false;
-
- return true;
-}
diff --git a/fs/locks.c b/fs/locks.c
index d8b066fb4210..fead53474c30 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1534,6 +1534,7 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
{
struct file_lock_context *ctx = inode->i_flctx;
struct file_lease *fl, *tmp;
+ bool remove;
lockdep_assert_held(&ctx->flc_lock);
@@ -1541,8 +1542,19 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose)
trace_time_out_leases(inode, fl);
if (past_time(fl->fl_downgrade_time))
lease_modify(fl, F_RDLCK, dispose);
- if (past_time(fl->fl_break_time))
- lease_modify(fl, F_UNLCK, dispose);
+
+ remove = true;
+ if (past_time(fl->fl_break_time)) {
+ /*
+ * Consult the lease manager when a lease break times
+ * out to determine whether the lease should be disposed
+ * of.
+ */
+ if (fl->fl_lmops && fl->fl_lmops->lm_breaker_timedout)
+ remove = fl->fl_lmops->lm_breaker_timedout(fl);
+ if (remove)
+ lease_modify(fl, F_UNLCK, dispose);
+ }
}
}
@@ -1670,9 +1682,13 @@ int __break_lease(struct inode *inode, unsigned int flags)
restart:
fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list);
break_time = fl->fl_break_time;
- if (break_time != 0)
- break_time -= jiffies;
- if (break_time == 0)
+ if (break_time != 0) {
+ if (time_after(jiffies, break_time)) {
+ fl->fl_break_time = jiffies + lease_break_time * HZ;
+ break_time = lease_break_time * HZ;
+ } else
+ break_time -= jiffies;
+ } else
break_time++;
locks_insert_block(&fl->c, &new_fl->c, leases_conflict);
trace_break_lease_block(inode, new_fl);
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 11f9f69cde61..d54a141a89b3 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -380,14 +380,13 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
sector_t isect, extent_length = 0;
struct parallel_io *par = NULL;
loff_t offset = header->args.offset;
- size_t count = header->args.count;
struct page **pages = header->args.pages;
int pg_index = header->args.pgbase >> PAGE_SHIFT;
unsigned int pg_len;
struct blk_plug plug;
int i;
- dprintk("%s enter, %zu@%lld\n", __func__, count, offset);
+ dprintk("%s enter, %u@%lld\n", __func__, header->args.count, offset);
/* At this point, header->page_aray is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
@@ -428,7 +427,6 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
}
offset += pg_len;
- count -= pg_len;
isect += (pg_len >> SECTOR_SHIFT);
extent_length -= (pg_len >> SECTOR_SHIFT);
}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index be2aebf62056..95d7cd564b74 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -16,6 +16,7 @@
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
+#include <linux/filelock.h>
#include <linux/lockd/bind.h>
#include <linux/nfs_mount.h>
#include <linux/freezer.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 8c3d2efa2636..70795684b8e8 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -41,6 +41,7 @@
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
+#include <linux/filelock.h>
#include <linux/lockd/bind.h>
#include <linux/freezer.h>
#include "internal.h"
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index 1da4f707f9ef..3a197252a132 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -13,7 +13,7 @@
#include <linux/nfs_fs.h>
#include <net/net_namespace.h>
#include <linux/rcupdate.h>
-#include <linux/lockd/lockd.h>
+#include <linux/lockd/bind.h>
#include "internal.h"
#include "nfs4_fs.h"
@@ -288,7 +288,7 @@ shutdown_store(struct kobject *kobj, struct kobj_attribute *attr,
shutdown_client(server->client_acl);
if (server->nlm_host)
- shutdown_client(server->nlm_host->h_rpcclnt);
+ nlmclnt_shutdown_rpc_clnt(server->nlm_host);
out:
shutdown_nfs_client(server->nfs_client);
return count;
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 4fd6e818565e..ffb76761d6a8 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -7,6 +7,7 @@ config NFSD
select CRC32
select CRYPTO_LIB_MD5 if NFSD_LEGACY_CLIENT_TRACKING
select CRYPTO_LIB_SHA256 if NFSD_V4
+ select CRYPTO # required by RPCSEC_GSS_KRB5 and signed filehandles
select LOCKD
select SUNRPC
select EXPORTFS
@@ -78,7 +79,6 @@ config NFSD_V4
depends on NFSD && PROC_FS
select FS_POSIX_ACL
select RPCSEC_GSS_KRB5
- select CRYPTO # required by RPCSEC_GSS_KRB5
select GRACE_PERIOD
select NFS_V4_2_SSC_HELPER if NFS_V4_2
help
@@ -177,16 +177,6 @@ config NFSD_LEGACY_CLIENT_TRACKING
and will be removed in the future. Say Y here if you need support
for them in the interim.
-config NFSD_V4_DELEG_TIMESTAMPS
- bool "Support delegated timestamps"
- depends on NFSD_V4
- default n
- help
- NFSD implements delegated timestamps according to
- draft-ietf-nfsv4-delstid-08 "Extending the Opening of Files". This
- is currently an experimental feature and is therefore left disabled
- by default.
-
config NFSD_V4_POSIX_ACLS
bool "Support NFSv4 POSIX draft ACLs"
depends on NFSD_V4
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index a7cfba29990e..9d829c84f374 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -273,6 +273,52 @@ const struct nfsd4_layout_ops bl_layout_ops = {
#endif /* CONFIG_NFSD_BLOCKLAYOUT */
#ifdef CONFIG_NFSD_SCSILAYOUT
+
+#define NFSD_MDS_PR_FENCED XA_MARK_0
+
+/*
+ * Clear the fence flag if the device already has an entry. This occurs
+ * when a client re-registers after a previous fence, allowing new
+ * layouts for this device.
+ *
+ * Insert only on first registration. This bounds cl_dev_fences to the
+ * count of devices this client has accessed, preventing unbounded growth.
+ */
+static inline int nfsd4_scsi_fence_insert(struct nfs4_client *clp,
+ dev_t device)
+{
+ struct xarray *xa = &clp->cl_dev_fences;
+ int ret;
+
+ xa_lock(xa);
+ ret = __xa_insert(xa, device, XA_ZERO_ENTRY, GFP_KERNEL);
+ if (ret == -EBUSY) {
+ __xa_clear_mark(xa, device, NFSD_MDS_PR_FENCED);
+ ret = 0;
+ }
+ xa_unlock(xa);
+ clp->cl_fence_retry_warn = false;
+ return ret;
+}
+
+static inline bool nfsd4_scsi_fence_set(struct nfs4_client *clp, dev_t device)
+{
+ struct xarray *xa = &clp->cl_dev_fences;
+ bool skip;
+
+ xa_lock(xa);
+ skip = xa_get_mark(xa, device, NFSD_MDS_PR_FENCED);
+ if (!skip)
+ __xa_set_mark(xa, device, NFSD_MDS_PR_FENCED);
+ xa_unlock(xa);
+ return skip;
+}
+
+static inline void nfsd4_scsi_fence_clear(struct nfs4_client *clp, dev_t device)
+{
+ xa_clear_mark(&clp->cl_dev_fences, device, NFSD_MDS_PR_FENCED);
+}
+
#define NFSD_MDS_PR_KEY 0x0100000000000000ULL
/*
@@ -342,6 +388,10 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
goto out_free_dev;
}
+ ret = nfsd4_scsi_fence_insert(clp, sb->s_bdev->bd_dev);
+ if (ret < 0)
+ goto out_free_dev;
+
ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
if (ret) {
pr_err("pNFS: failed to register key for device %s.\n",
@@ -394,17 +444,67 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
}
-static void
+/*
+ * Perform the fence operation to prevent the client from accessing the
+ * block device. If a fence operation is already in progress, wait for
+ * it to complete before checking the NFSD_MDS_PR_FENCED flag. Once the
+ * operation is complete, check the flag. If NFSD_MDS_PR_FENCED is set,
+ * update the layout stateid by setting the ls_fenced flag to indicate
+ * that the client has been fenced.
+ *
+ * The cl_fence_mutex ensures that the fence operation has been fully
+ * completed, rather than just in progress, when returning from this
+ * function.
+ *
+ * Return true if client was fenced otherwise return false.
+ */
+static bool
nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
int status;
+ bool ret;
+
+ mutex_lock(&clp->cl_fence_mutex);
+ if (nfsd4_scsi_fence_set(clp, bdev->bd_dev)) {
+ mutex_unlock(&clp->cl_fence_mutex);
+ return true;
+ }
status = bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp),
PR_EXCLUSIVE_ACCESS_REG_ONLY, true);
+ /*
+ * Reset to allow retry only when the command could not have
+ * reached the device. Negative status means a local error
+ * (e.g., -ENOMEM) prevented the command from being sent.
+ * PR_STS_PATH_FAILED, PR_STS_PATH_FAST_FAILED, and
+ * PR_STS_RETRY_PATH_FAILURE indicate transport path failures
+ * before device delivery.
+ *
+ * For all other errors, the command may have reached the device
+ * and the preempt may have succeeded. Avoid resetting, since
+ * retrying a successful preempt returns PR_STS_IOERR or
+ * PR_STS_RESERVATION_CONFLICT, which would cause an infinite
+ * retry loop.
+ */
+ switch (status) {
+ case 0:
+ case PR_STS_IOERR:
+ case PR_STS_RESERVATION_CONFLICT:
+ ret = true;
+ break;
+ default:
+ /* retry-able and other errors */
+ ret = false;
+ nfsd4_scsi_fence_clear(clp, bdev->bd_dev);
+ break;
+ }
+ mutex_unlock(&clp->cl_fence_mutex);
+
trace_nfsd_pnfs_fence(clp, bdev->bd_disk->disk_name, status);
+ return ret;
}
const struct nfsd4_layout_ops scsi_layout_ops = {
diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c
index 7f44689e0a53..386fd1c54f52 100644
--- a/fs/nfsd/debugfs.c
+++ b/fs/nfsd/debugfs.c
@@ -140,4 +140,8 @@ void nfsd_debugfs_init(void)
debugfs_create_file("io_cache_write", 0644, nfsd_top_dir, NULL,
&nfsd_io_cache_write_fops);
+#ifdef CONFIG_NFSD_V4
+ debugfs_create_bool("delegated_timestamps", 0644, nfsd_top_dir,
+ &nfsd_delegts_enabled);
+#endif
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 1aadfa8e0406..665153f1720e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1362,13 +1362,14 @@ static struct flags {
{ NFSEXP_ASYNC, {"async", "sync"}},
{ NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
{ NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
+ { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
+ { NFSEXP_SIGN_FH, {"sign_fh", ""}},
{ NFSEXP_NOHIDE, {"nohide", ""}},
- { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+ { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
{ NFSEXP_V4ROOT, {"v4root", ""}},
{ NFSEXP_PNFS, {"pnfs", ""}},
- { NFSEXP_SECURITY_LABEL, {"security_label", ""}},
{ 0, {"", ""}}
};
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index c774ce9aa296..6fe1325815e0 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -14,19 +14,20 @@
#define NFSDDBG_FACILITY NFSDDBG_LOCKD
-#ifdef CONFIG_LOCKD_V4
-#define nlm_stale_fh nlm4_stale_fh
-#define nlm_failed nlm4_failed
-#else
-#define nlm_stale_fh nlm_lck_denied_nolocks
-#define nlm_failed nlm_lck_denied_nolocks
-#endif
-/*
- * Note: we hold the dentry use count while the file is open.
+/**
+ * nlm_fopen - Open an NFSD file
+ * @rqstp: NLM RPC procedure execution context
+ * @f: NFS file handle to be opened
+ * @filp: OUT: an opened struct file
+ * @flags: the POSIX open flags to use
+ *
+ * nlm_fopen() holds the dentry reference until nlm_fclose() releases it.
+ *
+ * Returns zero on success or a negative errno value if the file
+ * cannot be opened.
*/
-static __be32
-nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
- int mode)
+static int nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f,
+ struct file **filp, int flags)
{
__be32 nfserr;
int access;
@@ -47,18 +48,17 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
* if NFSEXP_NOAUTHNLM is set. Some older clients use AUTH_NULL
* for NLM requests.
*/
- access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
+ access = (flags == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
access |= NFSD_MAY_NLM | NFSD_MAY_OWNER_OVERRIDE | NFSD_MAY_BYPASS_GSS;
nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
- /* We return nlm error codes as nlm doesn't know
- * about nfsd, but nfsd does know about nlm..
- */
+
switch (nfserr) {
case nfs_ok:
- return 0;
+ break;
case nfserr_jukebox:
- /* this error can indicate a presence of a conflicting
+ /*
+ * This error can indicate a presence of a conflicting
* delegation to an NLM lock request. Options are:
* (1) For now, drop this request and make the client
* retry. When delegation is returned, client's lock retry
@@ -66,19 +66,25 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
* (2) NLM4_DENIED as per "spec" signals to the client
* that the lock is unavailable now but client can retry.
* Linux client implementation does not. It treats
- * NLM4_DENIED same as NLM4_FAILED and errors the request.
+ * NLM4_DENIED same as NLM4_FAILED and fails the request.
* (3) For the future, treat this as blocked lock and try
* to callback when the delegation is returned but might
* not have a proper lock request to block on.
*/
- return nlm_drop_reply;
+ return -EWOULDBLOCK;
case nfserr_stale:
- return nlm_stale_fh;
+ return -ESTALE;
default:
- return nlm_failed;
+ return -ENOLCK;
}
+
+ return 0;
}
+/**
+ * nlm_fclose - Close an NFSD file
+ * @filp: a struct file that was opened by nlm_fopen()
+ */
static void
nlm_fclose(struct file *filp)
{
diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c
index 887525964451..81c943345d13 100644
--- a/fs/nfsd/netlink.c
+++ b/fs/nfsd/netlink.c
@@ -24,12 +24,13 @@ const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = {
};
/* NFSD_CMD_THREADS_SET - do */
-static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_MIN_THREADS + 1] = {
+static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_FH_KEY + 1] = {
[NFSD_A_SERVER_THREADS] = { .type = NLA_U32, },
[NFSD_A_SERVER_GRACETIME] = { .type = NLA_U32, },
[NFSD_A_SERVER_LEASETIME] = { .type = NLA_U32, },
[NFSD_A_SERVER_SCOPE] = { .type = NLA_NUL_STRING, },
[NFSD_A_SERVER_MIN_THREADS] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_FH_KEY] = NLA_POLICY_EXACT_LEN(16),
};
/* NFSD_CMD_VERSION_SET - do */
@@ -58,7 +59,7 @@ static const struct genl_split_ops nfsd_nl_ops[] = {
.cmd = NFSD_CMD_THREADS_SET,
.doit = nfsd_nl_threads_set_doit,
.policy = nfsd_threads_set_nl_policy,
- .maxattr = NFSD_A_SERVER_MIN_THREADS,
+ .maxattr = NFSD_A_SERVER_FH_KEY,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
{
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 9fa600602658..27da1a3edacb 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -25,6 +25,7 @@
#define SESSION_HASH_SIZE 512
struct cld_net;
+struct nfsd_net_cb;
struct nfsd4_client_tracking_ops;
enum {
@@ -99,6 +100,9 @@ struct nfsd_net {
*/
struct list_head client_lru;
struct list_head close_lru;
+
+ /* protects del_recall_lru and delegation hash/unhash */
+ spinlock_t deleg_lock ____cacheline_aligned;
struct list_head del_recall_lru;
/* protected by blocked_locks_lock */
@@ -224,6 +228,9 @@ struct nfsd_net {
spinlock_t local_clients_lock;
struct list_head local_clients;
#endif
+ siphash_key_t *fh_key;
+
+ struct nfsd_net_cb *nfsd_cb;
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index ef4971d71ac4..2ff9a991a8fb 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1069,7 +1069,7 @@ svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name,
*
* Return values:
* %0: Entry was successfully encoded.
- * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
+ * %-EINVAL: An encoding problem occurred, secondary status code in resp->common.err
*
* On exit, the following fields are updated:
* - resp->xdr
@@ -1144,7 +1144,7 @@ out_noattrs:
*
* Return values:
* %0: Entry was successfully encoded.
- * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
+ * %-EINVAL: An encoding problem occurred, secondary status code in resp->common.err
*
* On exit, the following fields are updated:
* - resp->xdr
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index aea8bdd2fdc4..50827405468d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1016,7 +1016,7 @@ static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp,
.p_decode = nfs4_xdr_dec_##restype, \
.p_arglen = NFS4_enc_##argtype##_sz, \
.p_replen = NFS4_dec_##restype##_sz, \
- .p_statidx = NFSPROC4_CB_##call, \
+ .p_statidx = NFSPROC4_CLNT_##proc, \
.p_name = #proc, \
}
@@ -1032,39 +1032,14 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
-static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
-static const struct rpc_version nfs_cb_version4 = {
-/*
- * Note on the callback rpc program version number: despite language in rfc
- * 5661 section 18.36.3 requiring servers to use 4 in this field, the
- * official xdr descriptions for both 4.0 and 4.1 specify version 1, and
- * in practice that appears to be what implementations use. The section
- * 18.36.3 language is expected to be fixed in an erratum.
- */
- .number = 1,
- .nrprocs = ARRAY_SIZE(nfs4_cb_procedures),
- .procs = nfs4_cb_procedures,
- .counts = nfs4_cb_counts,
-};
+#define NFS4_CB_PROGRAM 0x40000000
+#define NFS4_CB_VERSION 1
-static const struct rpc_version *nfs_cb_version[2] = {
- [1] = &nfs_cb_version4,
-};
-
-static const struct rpc_program cb_program;
-
-static struct rpc_stat cb_stats = {
- .program = &cb_program
-};
-
-#define NFS4_CALLBACK 0x40000000
-static const struct rpc_program cb_program = {
- .name = "nfs4_cb",
- .number = NFS4_CALLBACK,
- .nrvers = ARRAY_SIZE(nfs_cb_version),
- .version = nfs_cb_version,
- .stats = &cb_stats,
- .pipe_dir_name = "nfsd4_cb",
+struct nfsd_net_cb {
+ struct rpc_version version4;
+ const struct rpc_version *versions[NFS4_CB_VERSION + 1];
+ struct rpc_program program;
+ struct rpc_stat stat;
};
static int max_cb_time(struct net *net)
@@ -1140,6 +1115,7 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
int maxtime = max_cb_time(clp->net);
struct rpc_timeout timeparms = {
.to_initval = maxtime,
@@ -1152,14 +1128,14 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
.addrsize = conn->cb_addrlen,
.saddress = (struct sockaddr *) &conn->cb_saddr,
.timeout = &timeparms,
- .program = &cb_program,
- .version = 1,
+ .version = NFS4_CB_VERSION,
.flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
.cred = current_cred(),
};
struct rpc_clnt *client;
const struct cred *cred;
+ args.program = &nn->nfsd_cb->program;
if (clp->cl_minorversion == 0) {
if (!clp->cl_cred.cr_principal &&
(clp->cl_cred.cr_flavor >= RPC_AUTH_GSS_KRB5)) {
@@ -1786,3 +1762,70 @@ bool nfsd4_run_cb(struct nfsd4_callback *cb)
nfsd41_cb_inflight_end(clp);
return queued;
}
+
+/**
+ * nfsd_net_cb_shutdown - release per-netns callback RPC program resources
+ * @nn: NFS server network namespace
+ *
+ * Frees resources allocated by nfsd_net_cb_init().
+ */
+void nfsd_net_cb_shutdown(struct nfsd_net *nn)
+{
+ struct nfsd_net_cb *cb = nn->nfsd_cb;
+
+ if (cb) {
+ kfree(cb->version4.counts);
+ kfree(cb);
+ nn->nfsd_cb = NULL;
+ }
+}
+
+/**
+ * nfsd_net_cb_init - initialize per-netns callback RPC program
+ * @nn: NFS server network namespace
+ *
+ * Sets up the callback RPC program, version table, procedure
+ * counts, and statistics structure for @nn. Caller must release
+ * these resources using nfsd_net_cb_shutdown().
+ *
+ * Return: 0 on success, or -ENOMEM if allocation fails.
+ */
+int nfsd_net_cb_init(struct nfsd_net *nn)
+{
+ struct nfsd_net_cb *cb;
+
+ cb = kzalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb)
+ return -ENOMEM;
+
+ cb->version4.counts = kzalloc_objs(unsigned int,
+ ARRAY_SIZE(nfs4_cb_procedures), GFP_KERNEL);
+ if (!cb->version4.counts) {
+ kfree(cb);
+ return -ENOMEM;
+ }
+ /*
+ * Note on the callback rpc program version number: despite language
+ * in rfc 5661 section 18.36.3 requiring servers to use 4 in this
+ * field, the official xdr descriptions for both 4.0 and 4.1 specify
+ * version 1, and in practice that appears to be what implementations
+ * use. The section 18.36.3 language is expected to be fixed in an
+ * erratum.
+ */
+ cb->version4.number = NFS4_CB_VERSION;
+ cb->version4.nrprocs = ARRAY_SIZE(nfs4_cb_procedures);
+ cb->version4.procs = nfs4_cb_procedures;
+ cb->versions[NFS4_CB_VERSION] = &cb->version4;
+
+ cb->program.name = "nfs4_cb";
+ cb->program.number = NFS4_CB_PROGRAM;
+ cb->program.nrvers = ARRAY_SIZE(cb->versions);
+ cb->program.version = &cb->versions[0];
+ cb->program.pipe_dir_name = "nfsd4_cb";
+ cb->program.stats = &cb->stat;
+ cb->stat.program = &cb->program;
+
+ nn->nfsd_cb = cb;
+
+ return 0;
+}
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ad7af8cfcf1f..69e41105efdd 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -27,6 +27,8 @@ static struct kmem_cache *nfs4_layout_stateid_cache;
static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
static const struct lease_manager_operations nfsd4_layouts_lm_ops;
+static void nfsd4_layout_fence_worker(struct work_struct *work);
+
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
#ifdef CONFIG_NFSD_FLEXFILELAYOUT
[LAYOUT_FLEX_FILES] = &ff_layout_ops,
@@ -177,6 +179,13 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
trace_nfsd_layoutstate_free(&ls->ls_stid.sc_stateid);
+ spin_lock(&ls->ls_lock);
+ if (delayed_work_pending(&ls->ls_fence_work)) {
+ spin_unlock(&ls->ls_lock);
+ cancel_delayed_work_sync(&ls->ls_fence_work);
+ } else
+ spin_unlock(&ls->ls_lock);
+
spin_lock(&clp->cl_lock);
list_del_init(&ls->ls_perclnt);
spin_unlock(&clp->cl_lock);
@@ -271,6 +280,10 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
list_add(&ls->ls_perfile, &fp->fi_lo_states);
spin_unlock(&fp->fi_lock);
+ ls->ls_fenced = false;
+ ls->ls_fence_delay = 0;
+ INIT_DELAYED_WORK(&ls->ls_fence_work, nfsd4_layout_fence_worker);
+
trace_nfsd_layoutstate_alloc(&ls->ls_stid.sc_stateid);
return ls;
}
@@ -747,11 +760,9 @@ static bool
nfsd4_layout_lm_break(struct file_lease *fl)
{
/*
- * We don't want the locks code to timeout the lease for us;
- * we'll remove it ourself if a layout isn't returned
- * in time:
+ * Enforce break lease timeout to prevent NFSD
+ * thread from hanging in __break_lease.
*/
- fl->fl_break_time = 0;
nfsd4_recall_file_layout(fl->c.flc_owner);
return false;
}
@@ -782,10 +793,143 @@ nfsd4_layout_lm_open_conflict(struct file *filp, int arg)
return 0;
}
+static void
+nfsd4_layout_fence_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct nfs4_layout_stateid *ls = container_of(dwork,
+ struct nfs4_layout_stateid, ls_fence_work);
+ struct nfsd_file *nf;
+ struct block_device *bdev;
+ struct nfs4_client *clp;
+ struct nfsd_net *nn;
+
+ /*
+ * The workqueue clears WORK_STRUCT_PENDING before invoking
+ * this callback. Re-arm immediately so that
+ * delayed_work_pending() returns true while the fence
+ * operation is in progress, preventing
+ * lm_breaker_timedout() from taking a duplicate reference.
+ */
+ mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+
+ spin_lock(&ls->ls_lock);
+ if (list_empty(&ls->ls_layouts)) {
+ spin_unlock(&ls->ls_lock);
+dispose:
+ cancel_delayed_work(&ls->ls_fence_work);
+ /* unlock the lease so that tasks waiting on it can proceed */
+ nfsd4_close_layout(ls);
+
+ ls->ls_fenced = true;
+ nfs4_put_stid(&ls->ls_stid);
+ return;
+ }
+ spin_unlock(&ls->ls_lock);
+
+ rcu_read_lock();
+ nf = nfsd_file_get(ls->ls_file);
+ rcu_read_unlock();
+ if (!nf)
+ goto dispose;
+
+ clp = ls->ls_stid.sc_client;
+ nn = net_generic(clp->net, nfsd_net_id);
+ bdev = nf->nf_file->f_path.mnt->mnt_sb->s_bdev;
+ if (nfsd4_layout_ops[ls->ls_layout_type]->fence_client(ls, nf)) {
+ /* fenced ok */
+ nfsd_file_put(nf);
+ pr_warn("%s: FENCED client[%pISpc] clid[%d] to device[%s]\n",
+ __func__, (struct sockaddr *)&clp->cl_addr,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ bdev->bd_disk->disk_name);
+ goto dispose;
+ }
+ /* fence failed */
+ nfsd_file_put(nf);
+
+ if (!clp->cl_fence_retry_warn) {
+ pr_warn("%s: FENCE failed client[%pISpc] clid[%d] device[%s]\n",
+ __func__, (struct sockaddr *)&clp->cl_addr,
+ clp->cl_clientid.cl_id - nn->clientid_base,
+ bdev->bd_disk->disk_name);
+ clp->cl_fence_retry_warn = true;
+ }
+ /*
+ * The fence worker retries the fencing operation indefinitely to
+ * prevent data corruption. The admin needs to take the following
+ * actions to restore access to the file for other clients:
+ *
+ * . shutdown or power off the client being fenced.
+ * . manually expire the client to release all its state on the server;
+ * echo 'expire' > /proc/fs/nfsd/clients/clid/ctl'.
+ *
+ * Where:
+ *
+ * clid: is the unique client identifier displayed in
+ * the warning message above.
+ */
+ if (!ls->ls_fence_delay)
+ ls->ls_fence_delay = HZ;
+ else
+ ls->ls_fence_delay = min(ls->ls_fence_delay << 1,
+ MAX_FENCE_DELAY);
+ mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, ls->ls_fence_delay);
+}
+
+/**
+ * nfsd4_layout_lm_breaker_timedout - The layout recall has timed out.
+ * @fl: file to check
+ *
+ * If the layout type supports a fence operation, schedule a worker to
+ * fence the client from accessing the block device.
+ *
+ * This function runs under the protection of the spin_lock flc_lock.
+ * At this time, the file_lease associated with the layout stateid is
+ * on the flc_list. A reference count is incremented on the layout
+ * stateid to prevent it from being freed while the fence worker is
+ * executing. Once the fence worker finishes its operation, it releases
+ * this reference.
+ *
+ * The fence worker continues to run until either the client has been
+ * fenced or the layout becomes invalid. The layout can become invalid
+ * as a result of a LAYOUTRETURN or when the CB_LAYOUT recall callback
+ * has completed.
+ *
+ * Return true if the file_lease should be disposed of by the caller;
+ * otherwise, return false.
+ */
+static bool
+nfsd4_layout_lm_breaker_timedout(struct file_lease *fl)
+{
+ struct nfs4_layout_stateid *ls = fl->c.flc_owner;
+
+ if ((!nfsd4_layout_ops[ls->ls_layout_type]->fence_client) ||
+ ls->ls_fenced)
+ return true;
+ if (delayed_work_pending(&ls->ls_fence_work))
+ return false;
+ /*
+ * Make sure layout has not been returned yet before
+ * taking a reference count on the layout stateid.
+ */
+ spin_lock(&ls->ls_lock);
+ if (list_empty(&ls->ls_layouts) ||
+ !refcount_inc_not_zero(&ls->ls_stid.sc_count)) {
+ spin_unlock(&ls->ls_lock);
+ return true;
+ }
+ spin_unlock(&ls->ls_lock);
+
+ mod_delayed_work(system_dfl_wq, &ls->ls_fence_work, 0);
+ return false;
+}
+
static const struct lease_manager_operations nfsd4_layouts_lm_ops = {
.lm_break = nfsd4_layout_lm_break,
.lm_change = nfsd4_layout_lm_change,
.lm_open_conflict = nfsd4_layout_lm_open_conflict,
+ .lm_breaker_timedout = nfsd4_layout_lm_breaker_timedout,
};
int
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6880c5c520e7..85e94c30285a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -3043,6 +3043,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd_thread_local_info *ntli = rqstp->rq_private;
__be32 status;
resp->xdr = &rqstp->rq_res_stream;
@@ -3081,7 +3082,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
}
check_if_stalefh_allowed(args);
- rqstp->rq_lease_breaker = (void **)&cstate->clp;
+ ntli->ntli_lease_breaker = &cstate->clp;
trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
while (!status && resp->opcnt < args->opcnt) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a569d89ac912..c2d13b26a687 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -76,6 +76,8 @@ static const stateid_t close_stateid = {
static u64 current_sessionid = 1;
+bool nfsd_delegts_enabled __read_mostly = true;
+
#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
#define ONE_STATEID(stateid) (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
@@ -91,13 +93,6 @@ static void deleg_reaper(struct nfsd_net *nn);
/* Locking: */
-/*
- * Currently used for the del_recall_lru and file hash table. In an
- * effort to decrease the scope of the client_mutex, this spinlock may
- * eventually cover more:
- */
-static DEFINE_SPINLOCK(state_lock);
-
enum nfsd4_st_mutex_lock_subclass {
OPEN_STATEID_MUTEX = 0,
LOCK_STATEID_MUTEX = 1,
@@ -1293,8 +1288,9 @@ nfs4_delegation_exists(struct nfs4_client *clp, struct nfs4_file *fp)
{
struct nfs4_delegation *searchdp = NULL;
struct nfs4_client *searchclp = NULL;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- lockdep_assert_held(&state_lock);
+ lockdep_assert_held(&nn->deleg_lock);
lockdep_assert_held(&fp->fi_lock);
list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
@@ -1323,8 +1319,9 @@ static int
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
struct nfs4_client *clp = dp->dl_stid.sc_client;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
- lockdep_assert_held(&state_lock);
+ lockdep_assert_held(&nn->deleg_lock);
lockdep_assert_held(&fp->fi_lock);
lockdep_assert_held(&clp->cl_lock);
@@ -1346,8 +1343,10 @@ static bool
unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
+ struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
+ nfsd_net_id);
- lockdep_assert_held(&state_lock);
+ lockdep_assert_held(&nn->deleg_lock);
if (!delegation_hashed(dp))
return false;
@@ -1372,10 +1371,12 @@ unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask)
static void destroy_delegation(struct nfs4_delegation *dp)
{
bool unhashed;
+ struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
+ nfsd_net_id);
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED);
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
if (unhashed)
destroy_unhashed_deleg(dp);
}
@@ -1495,8 +1496,24 @@ release_all_access(struct nfs4_ol_stateid *stp)
}
}
+/**
+ * nfs4_replay_free_cache - release dynamically allocated replay buffer
+ * @rp: replay cache to reset
+ *
+ * If @rp->rp_buf points to a kmalloc'd buffer, free it and reset
+ * rp_buf to the inline rp_ibuf. Always zeroes rp_buflen.
+ */
+void nfs4_replay_free_cache(struct nfs4_replay *rp)
+{
+ if (rp->rp_buf != rp->rp_ibuf)
+ kfree(rp->rp_buf);
+ rp->rp_buf = rp->rp_ibuf;
+ rp->rp_buflen = 0;
+}
+
static inline void nfs4_free_stateowner(struct nfs4_stateowner *sop)
{
+ nfs4_replay_free_cache(&sop->so_replay);
kfree(sop->so_owner.data);
sop->so_ops->so_free(sop);
}
@@ -1838,11 +1855,11 @@ void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb)
case SC_TYPE_DELEG:
refcount_inc(&stid->sc_count);
dp = delegstateid(stid);
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
if (!unhash_delegation_locked(
dp, SC_STATUS_ADMIN_REVOKED))
dp = NULL;
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
if (dp)
revoke_delegation(dp);
break;
@@ -2382,6 +2399,10 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
#ifdef CONFIG_NFSD_PNFS
INIT_LIST_HEAD(&clp->cl_lo_states);
#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ xa_init(&clp->cl_dev_fences);
+ mutex_init(&clp->cl_fence_mutex);
+#endif
INIT_LIST_HEAD(&clp->async_copies);
spin_lock_init(&clp->async_lock);
spin_lock_init(&clp->cl_lock);
@@ -2504,13 +2525,13 @@ __destroy_client(struct nfs4_client *clp)
struct nfs4_delegation *dp;
LIST_HEAD(reaplist);
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
@@ -2543,6 +2564,9 @@ __destroy_client(struct nfs4_client *clp)
svc_xprt_put(clp->cl_cb_conn.cb_xprt);
atomic_add_unless(&nn->nfs4_client_count, -1, 0);
nfsd4_dec_courtesy_client_count(nn, clp);
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ xa_destroy(&clp->cl_dev_fences);
+#endif
free_client(clp);
wake_up_all(&expiry_wq);
}
@@ -5418,12 +5442,12 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
* If the dl_time != 0, then we know that it has already been
* queued for a lease break. Don't queue it again.
*/
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
if (delegation_hashed(dp) && dp->dl_time == 0) {
dp->dl_time = ktime_get_boottime_seconds();
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
}
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
}
static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
@@ -5535,13 +5559,15 @@ nfsd_break_deleg_cb(struct file_lease *fl)
static bool nfsd_breaker_owns_lease(struct file_lease *fl)
{
struct nfs4_delegation *dl = fl->c.flc_owner;
+ struct nfsd_thread_local_info *ntli;
struct svc_rqst *rqst;
struct nfs4_client *clp;
rqst = nfsd_current_rqst();
if (!nfsd_v4client(rqst))
return false;
- clp = *(rqst->rq_lease_breaker);
+ ntli = rqst->rq_private;
+ clp = *ntli->ntli_lease_breaker;
return dl->dl_stid.sc_client == clp;
}
@@ -6036,17 +6062,16 @@ nfsd4_verify_setuid_write(struct nfsd4_open *open, struct nfsd_file *nf)
return 0;
}
-#ifdef CONFIG_NFSD_V4_DELEG_TIMESTAMPS
+/*
+ * Timestamp delegation was introduced in RFC7862. Runtime switch for disabling
+ * this feature is /sys/kernel/debug/nfsd/delegated_timestamps.
+ */
static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
{
+ if (!nfsd_delegts_enabled)
+ return false;
return open->op_deleg_want & OPEN4_SHARE_ACCESS_WANT_DELEG_TIMESTAMPS;
}
-#else /* CONFIG_NFSD_V4_DELEG_TIMESTAMPS */
-static bool nfsd4_want_deleg_timestamps(const struct nfsd4_open *open)
-{
- return false;
-}
-#endif /* CONFIG NFSD_V4_DELEG_TIMESTAMPS */
static struct nfs4_delegation *
nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
@@ -6054,6 +6079,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
{
bool deleg_ts = nfsd4_want_deleg_timestamps(open);
struct nfs4_client *clp = stp->st_stid.sc_client;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
struct nfs4_file *fp = stp->st_stid.sc_file;
struct nfs4_clnt_odstate *odstate = stp->st_clnt_odstate;
struct nfs4_delegation *dp;
@@ -6113,7 +6139,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
return ERR_PTR(-EOPNOTSUPP);
}
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
spin_lock(&fp->fi_lock);
if (nfs4_delegation_exists(clp, fp))
status = -EAGAIN;
@@ -6128,7 +6154,7 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
} else
fp->fi_delegees++;
spin_unlock(&fp->fi_lock);
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
if (nf)
nfsd_file_put(nf);
if (status)
@@ -6172,13 +6198,13 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (fp->fi_had_conflict)
goto out_unlock;
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock);
status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&clp->cl_lock);
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
if (status)
goto out_unlock;
@@ -6257,12 +6283,12 @@ nfsd4_add_rdaccess_to_wrdeleg(struct svc_rqst *rqstp, struct nfsd4_open *open,
return (false);
fp = stp->st_stid.sc_file;
spin_lock(&fp->fi_lock);
- __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ);
if (!fp->fi_fds[O_RDONLY]) {
+ __nfs4_file_get_access(fp, NFS4_SHARE_ACCESS_READ);
fp->fi_fds[O_RDONLY] = nf;
+ fp->fi_rdeleg_file = nfsd_file_get(fp->fi_fds[O_RDONLY]);
nf = NULL;
}
- fp->fi_rdeleg_file = nfsd_file_get(fp->fi_fds[O_RDONLY]);
spin_unlock(&fp->fi_lock);
if (nf)
nfsd_file_put(nf);
@@ -6954,7 +6980,7 @@ nfs4_laundromat(struct nfsd_net *nn)
nfs40_clean_admin_revoked(nn, &lt);
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (!state_expired(&lt, dp->dl_time))
@@ -6963,7 +6989,7 @@ nfs4_laundromat(struct nfsd_net *nn)
unhash_delegation_locked(dp, SC_STATUS_REVOKED);
list_add(&dp->dl_recall_lru, &reaplist);
}
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
while (!list_empty(&reaplist)) {
dp = list_first_entry(&reaplist, struct nfs4_delegation,
dl_recall_lru);
@@ -8986,6 +9012,7 @@ static int nfs4_state_create_net(struct net *net)
INIT_LIST_HEAD(&nn->client_lru);
INIT_LIST_HEAD(&nn->close_lru);
INIT_LIST_HEAD(&nn->del_recall_lru);
+ spin_lock_init(&nn->deleg_lock);
spin_lock_init(&nn->client_lock);
spin_lock_init(&nn->s2s_cp_lock);
idr_init(&nn->s2s_cp_stateids);
@@ -9117,13 +9144,13 @@ nfs4_state_shutdown_net(struct net *net)
locks_end_grace(&nn->nfsd4_manager);
INIT_LIST_HEAD(&reaplist);
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
@@ -9348,13 +9375,14 @@ __be32
nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
struct nfs4_delegation **pdp)
{
- __be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd_thread_local_info *ntli = rqstp->rq_private;
struct file_lock_context *ctx;
struct nfs4_delegation *dp = NULL;
struct file_lease *fl;
struct nfs4_cb_fattr *ncf;
struct inode *inode = d_inode(dentry);
+ __be32 status;
ctx = locks_inode_context(inode);
if (!ctx)
@@ -9375,7 +9403,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct dentry *dentry,
break;
}
if (dp == NULL || dp == NON_NFSD_LEASE ||
- dp->dl_recall.cb_clp == *(rqstp->rq_lease_breaker)) {
+ dp->dl_recall.cb_clp == *(ntli->ntli_lease_breaker)) {
spin_unlock(&ctx->flc_lock);
if (dp == NON_NFSD_LEASE) {
status = nfserrno(nfsd_open_break_lease(inode,
@@ -9445,6 +9473,7 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
struct nfsd_file *nf)
{
struct nfs4_client *clp = cstate->clp;
+ struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
struct nfs4_delegation *dp;
struct file_lease *fl;
struct nfs4_file *fp, *rfp;
@@ -9468,7 +9497,7 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
}
/* if this client already has one, return that it's unavailable */
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
spin_lock(&fp->fi_lock);
/* existing delegation? */
if (nfs4_delegation_exists(clp, fp)) {
@@ -9480,7 +9509,7 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
++fp->fi_delegees;
}
spin_unlock(&fp->fi_lock);
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
if (status) {
put_nfs4_file(fp);
@@ -9509,13 +9538,13 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate,
* trying to set a delegation on the same file. If that happens,
* then just say UNAVAIL.
*/
- spin_lock(&state_lock);
+ spin_lock(&nn->deleg_lock);
spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock);
status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
spin_unlock(&clp->cl_lock);
- spin_unlock(&state_lock);
+ spin_unlock(&nn->deleg_lock);
if (!status) {
put_nfs4_file(fp);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 9d234913100b..2a0946c630e1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2598,6 +2598,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
static bool
nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
{
+ struct nfsd_thread_local_info *ntli = argp->rqstp->rq_private;
struct nfsd4_op *op;
bool cachethis = false;
int auth_slack= argp->rqstp->rq_auth_slack;
@@ -2690,7 +2691,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
if (argp->minorversion)
cachethis = false;
svc_reserve_auth(argp->rqstp, max_reply + readbytes);
- argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
+ ntli->ntli_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
argp->splice_ok = nfsd_read_splice_ok(argp->rqstp);
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
@@ -6281,14 +6282,23 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
int len = xdr->buf->len - (op_status_offset + XDR_UNIT);
so->so_replay.rp_status = op->status;
- if (len <= NFSD4_REPLAY_ISIZE) {
- so->so_replay.rp_buflen = len;
- read_bytes_from_xdr_buf(xdr->buf,
- op_status_offset + XDR_UNIT,
- so->so_replay.rp_buf, len);
- } else {
- so->so_replay.rp_buflen = 0;
+ if (len > NFSD4_REPLAY_ISIZE) {
+ char *buf = kmalloc(len, GFP_KERNEL);
+
+ nfs4_replay_free_cache(&so->so_replay);
+ if (buf) {
+ so->so_replay.rp_buf = buf;
+ } else {
+ /* rp_buflen already zeroed; skip caching */
+ goto status;
+ }
+ } else if (so->so_replay.rp_buf != so->so_replay.rp_ibuf) {
+ nfs4_replay_free_cache(&so->so_replay);
}
+ so->so_replay.rp_buflen = len;
+ read_bytes_from_xdr_buf(xdr->buf,
+ op_status_offset + XDR_UNIT,
+ so->so_replay.rp_buf, len);
}
status:
op->status = nfsd4_map_status(op->status,
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index ab13ee9c7fd8..154468ceccdc 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -467,10 +467,11 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
unsigned int len, struct nfsd_cacherep **cacherep)
{
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+ struct nfsd_thread_local_info *ntli = rqstp->rq_private;
struct nfsd_cacherep *rp, *found;
__wsum csum;
struct nfsd_drc_bucket *b;
- int type = rqstp->rq_cachetype;
+ int type = ntli->ntli_cachetype;
LIST_HEAD(dispose);
int rtn = RC_DOIT;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 71aabdaa1d15..39e7012a60d8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -11,7 +11,7 @@
#include <linux/fs_context.h>
#include <linux/sunrpc/svcsock.h>
-#include <linux/lockd/lockd.h>
+#include <linux/lockd/bind.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
@@ -1582,6 +1582,32 @@ out_unlock:
}
/**
+ * nfsd_nl_fh_key_set - helper to copy fh_key from userspace
+ * @attr: nlattr NFSD_A_SERVER_FH_KEY
+ * @nn: nfsd_net
+ *
+ * Callers should hold nfsd_mutex, returns 0 on success or negative errno.
+ * Callers must ensure the server is shut down (sv_nrthreads == 0),
+ * userspace documentation asserts the key may only be set when the server
+ * is not running.
+ */
+static int nfsd_nl_fh_key_set(const struct nlattr *attr, struct nfsd_net *nn)
+{
+ siphash_key_t *fh_key = nn->fh_key;
+
+ if (!fh_key) {
+ fh_key = kmalloc(sizeof(siphash_key_t), GFP_KERNEL);
+ if (!fh_key)
+ return -ENOMEM;
+ nn->fh_key = fh_key;
+ }
+
+ fh_key->key[0] = get_unaligned_le64(nla_data(attr));
+ fh_key->key[1] = get_unaligned_le64(nla_data(attr) + 8);
+ return 0;
+}
+
+/**
* nfsd_nl_threads_set_doit - set the number of running threads
* @skb: reply buffer
* @info: netlink metadata and command arguments
@@ -1622,7 +1648,8 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NFSD_A_SERVER_GRACETIME] ||
info->attrs[NFSD_A_SERVER_LEASETIME] ||
- info->attrs[NFSD_A_SERVER_SCOPE]) {
+ info->attrs[NFSD_A_SERVER_SCOPE] ||
+ info->attrs[NFSD_A_SERVER_FH_KEY]) {
ret = -EBUSY;
if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads)
goto out_unlock;
@@ -1651,6 +1678,14 @@ int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
attr = info->attrs[NFSD_A_SERVER_SCOPE];
if (attr)
scope = nla_data(attr);
+
+ attr = info->attrs[NFSD_A_SERVER_FH_KEY];
+ if (attr) {
+ ret = nfsd_nl_fh_key_set(attr, nn);
+ trace_nfsd_ctl_fh_key_set((const char *)nn->fh_key, ret);
+ if (ret)
+ goto out_unlock;
+ }
}
attr = info->attrs[NFSD_A_SERVER_MIN_THREADS];
@@ -2168,6 +2203,9 @@ static __net_init int nfsd_net_init(struct net *net)
int retval;
int i;
+ retval = nfsd_net_cb_init(nn);
+ if (retval)
+ return retval;
retval = nfsd_export_init(net);
if (retval)
goto out_export_error;
@@ -2208,6 +2246,7 @@ out_repcache_error:
out_idmap_error:
nfsd_export_shutdown(net);
out_export_error:
+ nfsd_net_cb_shutdown(nn);
return retval;
}
@@ -2237,6 +2276,8 @@ static __net_exit void nfsd_net_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ kfree_sensitive(nn->fh_key);
+ nfsd_net_cb_shutdown(nn);
nfsd_proc_stat_shutdown(net);
percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
nfsd_idmap_shutdown(net);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index a01d70953358..7c009f07c90b 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -82,6 +82,11 @@ extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op;
+struct nfsd_thread_local_info {
+ struct nfs4_client **ntli_lease_breaker;
+ int ntli_cachetype;
+};
+
/*
* Common void argument and result helpers
*/
@@ -155,6 +160,7 @@ static inline void nfsd_debugfs_exit(void) {}
#endif
extern bool nfsd_disable_splice_read __read_mostly;
+extern bool nfsd_delegts_enabled __read_mostly;
enum {
/* Any new NFSD_IO enum value must be added at the end */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index ee72c9565e4f..429ca5c6ec08 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -11,6 +11,7 @@
#include <linux/exportfs.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include <crypto/utils.h>
#include "nfsd.h"
#include "vfs.h"
#include "auth.h"
@@ -105,9 +106,12 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
{
/* Check if the request originated from a secure port. */
if (rqstp && !nfsd_originating_port_ok(rqstp, cred, exp)) {
- RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
- dprintk("nfsd: request from insecure port %s!\n",
- svc_print_addr(rqstp, buf, sizeof(buf)));
+ if (IS_ENABLED(CONFIG_SUNRPC_DEBUG)) {
+ char buf[RPC_MAX_ADDRBUFLEN];
+
+ dprintk("nfsd: request from insecure port %s!\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
+ }
return nfserr_perm;
}
@@ -137,6 +141,57 @@ static inline __be32 check_pseudo_root(struct dentry *dentry,
return nfs_ok;
}
+/* Size of a file handle MAC, in 4-octet words */
+#define FH_MAC_WORDS (sizeof(__le64) / 4)
+
+static bool fh_append_mac(struct svc_fh *fhp, struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct knfsd_fh *fh = &fhp->fh_handle;
+ siphash_key_t *fh_key = nn->fh_key;
+ __le64 hash;
+
+ if (!fh_key)
+ goto out_no_key;
+ if (fh->fh_size + sizeof(hash) > fhp->fh_maxsize)
+ goto out_no_space;
+
+ hash = cpu_to_le64(siphash(&fh->fh_raw, fh->fh_size, fh_key));
+ memcpy(&fh->fh_raw[fh->fh_size], &hash, sizeof(hash));
+ fh->fh_size += sizeof(hash);
+ return true;
+
+out_no_key:
+ pr_warn_ratelimited("NFSD: unable to sign filehandles, fh_key not set.\n");
+ return false;
+
+out_no_space:
+ pr_warn_ratelimited("NFSD: unable to sign filehandles, fh_size %zu would be greater than fh_maxsize %d.\n",
+ fh->fh_size + sizeof(hash), fhp->fh_maxsize);
+ return false;
+}
+
+/*
+ * Verify that the filehandle's MAC was hashed from this filehandle
+ * given the server's fh_key:
+ */
+static bool fh_verify_mac(struct svc_fh *fhp, struct net *net)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct knfsd_fh *fh = &fhp->fh_handle;
+ siphash_key_t *fh_key = nn->fh_key;
+ __le64 hash;
+
+ if (!fh_key) {
+ pr_warn_ratelimited("NFSD: unable to verify signed filehandles, fh_key not set.\n");
+ return false;
+ }
+
+ hash = cpu_to_le64(siphash(&fh->fh_raw, fh->fh_size - sizeof(hash), fh_key));
+ return crypto_memneq(&fh->fh_raw[fh->fh_size - sizeof(hash)],
+ &hash, sizeof(hash)) == 0;
+}
+
/*
* Use the given filehandle to look up the corresponding export and
* dentry. On success, the results are used to set fh_export and
@@ -233,13 +288,21 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
/*
* Look up the dentry using the NFS file handle.
*/
- error = nfserr_badhandle;
-
fileid_type = fh->fh_fileid_type;
+ error = nfserr_stale;
- if (fileid_type == FILEID_ROOT)
+ if (fileid_type == FILEID_ROOT) {
+ /* We don't sign or verify the root, no per-file identity */
dentry = dget(exp->ex_path.dentry);
- else {
+ } else {
+ if (exp->ex_flags & NFSEXP_SIGN_FH) {
+ if (!fh_verify_mac(fhp, net)) {
+ trace_nfsd_set_fh_dentry_badmac(rqstp, fhp, -ESTALE);
+ goto out;
+ }
+ data_left -= FH_MAC_WORDS;
+ }
+
dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
data_left, fileid_type, 0,
nfsd_acceptable, exp);
@@ -255,6 +318,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net,
}
}
}
+
+ error = nfserr_badhandle;
if (dentry == NULL)
goto out;
if (IS_ERR(dentry)) {
@@ -495,6 +560,10 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
fhp->fh_handle.fh_fileid_type =
fileid_type > 0 ? fileid_type : FILEID_INVALID;
fhp->fh_handle.fh_size += maxsize * 4;
+
+ if (exp->ex_flags & NFSEXP_SIGN_FH)
+ if (!fh_append_mac(fhp, exp->cd->net))
+ fhp->fh_handle.fh_fileid_type = FILEID_INVALID;
} else {
fhp->fh_handle.fh_fileid_type = FILEID_ROOT;
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4a04208393b8..4f1ab3222a4d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -887,6 +887,7 @@ nfsd(void *vrqstp)
struct svc_xprt *perm_sock = list_entry(rqstp->rq_server->sv_permsocks.next, typeof(struct svc_xprt), xpt_list);
struct net *net = perm_sock->xpt_net;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct nfsd_thread_local_info ntli = { };
bool have_mutex = false;
/* At this point, the thread shares current->fs
@@ -901,6 +902,10 @@ nfsd(void *vrqstp)
set_freezable();
+ /* use dynamic allocation if ntli should ever become large */
+ static_assert(sizeof(struct nfsd_thread_local_info) < 256);
+ rqstp->rq_private = &ntli;
+
/*
* The main request loop
*/
@@ -967,6 +972,7 @@ nfsd(void *vrqstp)
*/
int nfsd_dispatch(struct svc_rqst *rqstp)
{
+ struct nfsd_thread_local_info *ntli = rqstp->rq_private;
const struct svc_procedure *proc = rqstp->rq_procinfo;
__be32 *statp = rqstp->rq_accept_statp;
struct nfsd_cacherep *rp;
@@ -977,7 +983,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
* Give the xdr decoder a chance to change this if it wants
* (necessary in the NFSv4.0 compound case)
*/
- rqstp->rq_cachetype = proc->pc_cachetype;
+ ntli->ntli_cachetype = proc->pc_cachetype;
/*
* ->pc_decode advances the argument stream past the NFS
@@ -1022,7 +1028,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp)
*/
smp_store_release(&rqstp->rq_status_counter, rqstp->rq_status_counter + 1);
- nfsd_cache_update(rqstp, rp, rqstp->rq_cachetype, nfs_reply);
+ nfsd_cache_update(rqstp, rp, ntli->ntli_cachetype, nfs_reply);
out_cached_reply:
return 1;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index fc262ceafca9..ae71e0621317 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -605,7 +605,7 @@ svcxdr_encode_entry_common(struct nfsd_readdirres *resp, const char *name,
*
* Return values:
* %0: Entry was successfully encoded.
- * %-EINVAL: An encoding problem occured, secondary status code in resp->common.err
+ * %-EINVAL: An encoding problem occurred, secondary status code in resp->common.err
*
* On exit, the following fields are updated:
* - resp->xdr
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index db9af780438b..f7bee4dc5d3d 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -11,6 +11,9 @@
struct xdr_stream;
+/* Cap exponential backoff between fence retries at 3 minutes */
+#define MAX_FENCE_DELAY ((unsigned int)(3 * 60 * HZ))
+
struct nfsd4_deviceid_map {
struct list_head hash;
u64 idx;
@@ -38,7 +41,7 @@ struct nfsd4_layout_ops {
struct svc_rqst *rqstp,
struct nfsd4_layoutcommit *lcp);
- void (*fence_client)(struct nfs4_layout_stateid *ls,
+ bool (*fence_client)(struct nfs4_layout_stateid *ls,
struct nfsd_file *file);
};
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index c0ca115c3b74..953675eba5c3 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -123,7 +123,7 @@ struct nfs4_stid {
#define SC_TYPE_LAYOUT BIT(3)
unsigned short sc_type;
-/* state_lock protects sc_status for delegation stateids.
+/* nn->deleg_lock protects sc_status for delegation stateids.
* ->cl_lock protects sc_status for open and lock stateids.
* ->st_mutex also protect sc_status for open stateids.
* ->ls_lock protects sc_status for layout stateids.
@@ -456,6 +456,7 @@ struct nfs4_client {
struct list_head cl_lru; /* tail queue */
#ifdef CONFIG_NFSD_PNFS
struct list_head cl_lo_states; /* outstanding layout states */
+ bool cl_fence_retry_warn;
#endif
struct xdr_netobj cl_name; /* id generated by client */
nfs4_verifier cl_verifier; /* generated by client */
@@ -527,6 +528,10 @@ struct nfs4_client {
struct nfsd4_cb_recall_any *cl_ra;
time64_t cl_ra_time;
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ struct xarray cl_dev_fences;
+ struct mutex cl_fence_mutex;
+#endif
};
/* struct nfs4_client_reset
@@ -549,10 +554,10 @@ struct nfs4_client_reclaim {
* ~32(deleg. ace) = 112 bytes
*
* Some responses can exceed this. A LOCK denial includes the conflicting
- * lock owner, which can be up to 1024 bytes (NFS4_OPAQUE_LIMIT). Responses
- * larger than REPLAY_ISIZE are not cached in rp_ibuf; only rp_status is
- * saved. Enlarging this constant increases the size of every
- * nfs4_stateowner.
+ * lock owner, which can be up to 1024 bytes (NFS4_OPAQUE_LIMIT). When a
+ * response exceeds REPLAY_ISIZE, a buffer is dynamically allocated. If
+ * that allocation fails, only rp_status is saved. Enlarging this constant
+ * increases the size of every nfs4_stateowner.
*/
#define NFSD4_REPLAY_ISIZE 112
@@ -564,12 +569,14 @@ struct nfs4_client_reclaim {
struct nfs4_replay {
__be32 rp_status;
unsigned int rp_buflen;
- char *rp_buf;
+ char *rp_buf; /* rp_ibuf or kmalloc'd */
struct knfsd_fh rp_openfh;
int rp_locked;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
+extern void nfs4_replay_free_cache(struct nfs4_replay *rp);
+
struct nfs4_stateowner;
struct nfs4_stateowner_operations {
@@ -742,6 +749,10 @@ struct nfs4_layout_stateid {
stateid_t ls_recall_sid;
bool ls_recalled;
struct mutex ls_mutex;
+
+ struct delayed_work ls_fence_work;
+ unsigned int ls_fence_delay;
+ bool ls_fenced;
};
static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
@@ -851,6 +862,8 @@ struct nfsd_file *find_any_file(struct nfs4_file *f);
#ifdef CONFIG_NFSD_V4
void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb);
void nfsd4_cancel_copy_by_sb(struct net *net, struct super_block *sb);
+int nfsd_net_cb_init(struct nfsd_net *nn);
+void nfsd_net_cb_shutdown(struct nfsd_net *nn);
#else
static inline void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb)
{
@@ -858,6 +871,13 @@ static inline void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *
static inline void nfsd4_cancel_copy_by_sb(struct net *net, struct super_block *sb)
{
}
+static inline int nfsd_net_cb_init(struct nfsd_net *nn)
+{
+ return 0;
+}
+static inline void nfsd_net_cb_shutdown(struct nfsd_net *nn)
+{
+}
#endif
/* grace period management */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index d1d0b0dd0545..5ad38f50836d 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -373,6 +373,7 @@ DEFINE_EVENT_CONDITION(nfsd_fh_err_class, nfsd_##name, \
DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badexport);
DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badhandle);
+DEFINE_NFSD_FH_ERR_EVENT(set_fh_dentry_badmac);
TRACE_EVENT(nfsd_exp_find_key,
TP_PROTO(const struct svc_expkey *key,
@@ -2240,6 +2241,28 @@ TRACE_EVENT(nfsd_end_grace,
)
);
+TRACE_EVENT(nfsd_ctl_fh_key_set,
+ TP_PROTO(
+ const char *key,
+ int result
+ ),
+ TP_ARGS(key, result),
+ TP_STRUCT__entry(
+ __field(u32, key_hash)
+ __field(int, result)
+ ),
+ TP_fast_assign(
+ if (key)
+ __entry->key_hash = ~crc32_le(0xFFFFFFFF, key, 16);
+ else
+ __entry->key_hash = 0;
+ __entry->result = result;
+ ),
+ TP_printk("key=0x%08x result=%d",
+ __entry->key_hash, __entry->result
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_copy_class,
TP_PROTO(
const struct nfsd4_copy *copy
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index d2c9740e26a8..5f0a2fb31450 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -50,6 +50,7 @@ struct lease_manager_operations {
void (*lm_setup)(struct file_lease *, void **);
bool (*lm_breaker_owns_lease)(struct file_lease *);
int (*lm_open_conflict)(struct file *, int);
+ bool (*lm_breaker_timedout)(struct file_lease *fl);
};
struct lock_manager {
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index c53c81242e72..b614e0deea72 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -10,27 +10,20 @@
#ifndef LINUX_LOCKD_BIND_H
#define LINUX_LOCKD_BIND_H
-#include <linux/lockd/nlm.h>
-/* need xdr-encoded error codes too, so... */
-#include <linux/lockd/xdr.h>
-#ifdef CONFIG_LOCKD_V4
-#include <linux/lockd/xdr4.h>
-#endif
-
-/* Dummy declarations */
+struct file_lock;
+struct nfs_fh;
struct svc_rqst;
struct rpc_task;
struct rpc_clnt;
+struct super_block;
/*
* This is the set of functions for lockd->nfsd communication
*/
struct nlmsvc_binding {
- __be32 (*fopen)(struct svc_rqst *,
- struct nfs_fh *,
- struct file **,
- int mode);
- void (*fclose)(struct file *);
+ int (*fopen)(struct svc_rqst *rqstp, struct nfs_fh *f,
+ struct file **filp, int flags);
+ void (*fclose)(struct file *filp);
};
extern const struct nlmsvc_binding *nlmsvc_ops;
@@ -58,6 +51,7 @@ struct nlmclnt_initdata {
extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
extern void nlmclnt_done(struct nlm_host *host);
extern struct rpc_clnt *nlmclnt_rpc_clnt(struct nlm_host *host);
+extern void nlmclnt_shutdown_rpc_clnt(struct nlm_host *host);
/*
* NLM client operations provide a means to modify RPC processing of NLM
@@ -82,4 +76,10 @@ extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, vo
extern int lockd_up(struct net *net, const struct cred *cred);
extern void lockd_down(struct net *net);
+/*
+ * Cluster failover support
+ */
+int nlmsvc_unlock_all_by_sb(struct super_block *sb);
+int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
+
#endif /* LINUX_LOCKD_BIND_H */
diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h
deleted file mode 100644
index eede2ab5246f..000000000000
--- a/include/linux/lockd/debug.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/debug.h
- *
- * Debugging stuff.
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LINUX_LOCKD_DEBUG_H
-#define LINUX_LOCKD_DEBUG_H
-
-#include <linux/sunrpc/debug.h>
-
-/*
- * Enable lockd debugging.
- * Requires RPC_DEBUG.
- */
-#undef ifdebug
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define ifdebug(flag) if (unlikely(nlm_debug & NLMDBG_##flag))
-#else
-# define ifdebug(flag) if (0)
-#endif
-
-/*
- * Debug flags
- */
-#define NLMDBG_SVC 0x0001
-#define NLMDBG_CLIENT 0x0002
-#define NLMDBG_CLNTLOCK 0x0004
-#define NLMDBG_SVCLOCK 0x0008
-#define NLMDBG_MONITOR 0x0010
-#define NLMDBG_CLNTSUBS 0x0020
-#define NLMDBG_SVCSUBS 0x0040
-#define NLMDBG_HOSTCACHE 0x0080
-#define NLMDBG_XDR 0x0100
-#define NLMDBG_ALL 0x7fff
-
-#endif /* LINUX_LOCKD_DEBUG_H */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
deleted file mode 100644
index 72831e35dca3..000000000000
--- a/include/linux/lockd/xdr4.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/linux/lockd/xdr4.h
- *
- * XDR types for the NLM protocol
- *
- * Copyright (C) 1996 Olaf Kirch <okir@monad.swb.de>
- */
-
-#ifndef LOCKD_XDR4_H
-#define LOCKD_XDR4_H
-
-#include <linux/fs.h>
-#include <linux/nfs.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/lockd/xdr.h>
-
-/* error codes new to NLMv4 */
-#define nlm4_deadlock cpu_to_be32(NLM_DEADLCK)
-#define nlm4_rofs cpu_to_be32(NLM_ROFS)
-#define nlm4_stale_fh cpu_to_be32(NLM_STALE_FH)
-#define nlm4_fbig cpu_to_be32(NLM_FBIG)
-#define nlm4_failed cpu_to_be32(NLM_FAILED)
-
-void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len);
-bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-bool nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-bool nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-
-extern const struct rpc_version nlm_version4;
-
-#endif /* LOCKD_XDR4_H */
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index e783132e481f..b1e595c2615b 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -16,6 +16,7 @@
#include <linux/atomic.h>
#include <linux/kstrtox.h>
#include <linux/proc_fs.h>
+#include <linux/wait.h>
/*
* Each cache requires:
@@ -112,7 +113,11 @@ struct cache_detail {
int entries;
/* fields for communication over channel */
- struct list_head queue;
+ struct list_head requests;
+ struct list_head readers;
+ spinlock_t queue_lock;
+ wait_queue_head_t queue_wait;
+ u64 next_seqno;
atomic_t writers; /* how many time is /channel open */
time64_t last_close; /* if no writers, when did last close */
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index eb4bd62df319..ab61bed2f7af 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -38,6 +38,8 @@ extern unsigned int nlm_debug;
do { \
ifdebug(fac) \
__sunrpc_printk(fmt, ##__VA_ARGS__); \
+ else \
+ no_printk(fmt, ##__VA_ARGS__); \
} while (0)
# define dfprintk_rcu(fac, fmt, ...) \
@@ -46,15 +48,15 @@ do { \
rcu_read_lock(); \
__sunrpc_printk(fmt, ##__VA_ARGS__); \
rcu_read_unlock(); \
+ } else { \
+ no_printk(fmt, ##__VA_ARGS__); \
} \
} while (0)
-# define RPC_IFDEBUG(x) x
#else
# define ifdebug(fac) if (0)
-# define dfprintk(fac, fmt, ...) do {} while (0)
-# define dfprintk_rcu(fac, fmt, ...) do {} while (0)
-# define RPC_IFDEBUG(x)
+# define dfprintk(fac, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
+# define dfprintk_rcu(fac, fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif
/*
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ccba79ebf893..0dbdf3722537 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -95,10 +95,7 @@ struct rpc_task {
int tk_rpc_status; /* Result of last RPC operation */
unsigned short tk_flags; /* misc flags */
unsigned short tk_timeouts; /* maj timeouts */
-
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
unsigned short tk_pid; /* debugging aid */
-#endif
unsigned char tk_priority : 2,/* Task priority */
tk_garb_retry : 2,
tk_cred_retry : 2;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index a11acf5cd63b..4be6204f6630 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -134,25 +134,37 @@ enum {
extern u32 svc_max_payload(const struct svc_rqst *rqstp);
/*
- * RPC Requests and replies are stored in one or more pages.
- * We maintain an array of pages for each server thread.
- * Requests are copied into these pages as they arrive. Remaining
- * pages are available to write the reply into.
+ * RPC Call and Reply messages each have their own page array.
+ * rq_pages holds the incoming Call message; rq_respages holds
+ * the outgoing Reply message. Both arrays are sized to
+ * svc_serv_maxpages() entries and are allocated dynamically.
*
- * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each server thread
- * needs to allocate more to replace those used in sending. To help keep track
- * of these pages we have a receive list where all pages initialy live, and a
- * send list where pages are moved to when there are to be part of a reply.
+ * Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each
+ * server thread needs to allocate more to replace those used in
+ * sending.
*
- * We use xdr_buf for holding responses as it fits well with NFS
- * read responses (that have a header, and some data pages, and possibly
- * a tail) and means we can share some client side routines.
+ * rq_pages request page contract:
*
- * The xdr_buf.head kvec always points to the first page in the rq_*pages
- * list. The xdr_buf.pages pointer points to the second page on that
- * list. xdr_buf.tail points to the end of the first page.
- * This assumes that the non-page part of an rpc reply will fit
- * in a page - NFSd ensures this. lockd also has no trouble.
+ * Transport receive paths that move request data pages out of
+ * rq_pages -- TCP multi-fragment reassembly (svc_tcp_save_pages)
+ * and RDMA Read I/O (svc_rdma_clear_rqst_pages) -- NULL those
+ * entries to prevent svc_rqst_release_pages() from freeing pages
+ * still in transport use, and set rq_pages_nfree to the count.
+ * svc_alloc_arg() refills only that many rq_pages entries.
+ *
+ * For rq_respages, svc_rqst_release_pages() NULLs entries in
+ * [rq_respages, rq_next_page) after each RPC. svc_alloc_arg()
+ * refills only that range.
+ *
+ * xdr_buf holds responses; the structure fits NFS read responses
+ * (header, data pages, optional tail) and enables sharing of
+ * client-side routines.
+ *
+ * The xdr_buf.head kvec always points to the first page in the
+ * rq_*pages list. The xdr_buf.pages pointer points to the second
+ * page on that list. xdr_buf.tail points to the end of the first
+ * page. This assumes that the non-page part of an rpc reply will
+ * fit in a page - NFSd ensures this. lockd also has no trouble.
*/
/**
@@ -162,10 +174,10 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp);
* Returns a count of pages or vectors that can hold the maximum
* size RPC message for @serv.
*
- * Each request/reply pair can have at most one "payload", plus two
- * pages, one for the request, and one for the reply.
- * nfsd_splice_actor() might need an extra page when a READ payload
- * is not page-aligned.
+ * Each page array can hold at most one payload plus two
+ * overhead pages (one for the RPC header, one for tail data).
+ * nfsd_splice_actor() might need an extra page when a READ
+ * payload is not page-aligned.
*/
static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
{
@@ -175,6 +187,9 @@ static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
/*
* The context of a single thread, including the request currently being
* processed.
+ *
+ * RPC programs are free to use rq_private to stash thread-local information.
+ * The sunrpc layer will not access it.
*/
struct svc_rqst {
struct list_head rq_all; /* all threads list */
@@ -201,11 +216,12 @@ struct svc_rqst {
struct xdr_stream rq_res_stream;
struct folio *rq_scratch_folio;
struct xdr_buf rq_res;
- unsigned long rq_maxpages; /* num of entries in rq_pages */
- struct page * *rq_pages;
- struct page * *rq_respages; /* points into rq_pages */
+ unsigned long rq_maxpages; /* entries per page array */
+ unsigned long rq_pages_nfree; /* rq_pages entries NULLed by transport */
+ struct page * *rq_pages; /* Call buffer pages */
+ struct page * *rq_respages; /* Reply buffer pages */
struct page * *rq_next_page; /* next reply page to use */
- struct page * *rq_page_end; /* one past the last page */
+ struct page * *rq_page_end; /* one past the last reply page */
struct folio_batch rq_fbatch;
struct bio_vec *rq_bvec;
@@ -215,7 +231,6 @@ struct svc_rqst {
u32 rq_vers; /* program version */
u32 rq_proc; /* procedure number */
u32 rq_prot; /* IP protocol */
- int rq_cachetype; /* catering to nfsd */
unsigned long rq_flags; /* flags field */
ktime_t rq_qtime; /* enqueue time */
@@ -251,7 +266,7 @@ struct svc_rqst {
unsigned long bc_to_initval;
unsigned int bc_to_retries;
unsigned int rq_status_counter; /* RPC processing counter */
- void **rq_lease_breaker; /* The v4 client breaking a lease */
+ void *rq_private; /* For use by the service thread */
};
/* bits for rq_flags */
@@ -483,6 +498,21 @@ int svc_generic_rpcbind_set(struct net *net,
#define RPC_MAX_ADDRBUFLEN (63U)
+/**
+ * svc_rqst_page_release - release a page associated with an RPC transaction
+ * @rqstp: RPC transaction context
+ * @page: page to release
+ *
+ * Released pages are batched and freed together, reducing
+ * allocator pressure under heavy RPC workloads.
+ */
+static inline void svc_rqst_page_release(struct svc_rqst *rqstp,
+ struct page *page)
+{
+ if (!folio_batch_add(&rqstp->rq_fbatch, page_folio(page)))
+ __folio_batch_release(&rqstp->rq_fbatch);
+}
+
/*
* When we want to reduce the size of the reserved space in the response
* buffer, we need to take into account the size of any checksum data that
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 57f4fd94166a..df6e08aaad57 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -84,6 +84,9 @@ struct svcxprt_rdma {
atomic_t sc_sq_avail; /* SQEs ready to be consumed */
unsigned int sc_sq_depth; /* Depth of SQ */
+ atomic_t sc_sq_ticket_head; /* Next ticket to issue */
+ atomic_t sc_sq_ticket_tail; /* Ticket currently serving */
+ wait_queue_head_t sc_sq_ticket_wait; /* Ticket ordering waitlist */
__be32 sc_fc_credits; /* Forward credits */
u32 sc_max_requests; /* Max requests */
u32 sc_max_bc_requests;/* Backward credits */
@@ -213,6 +216,7 @@ struct svc_rdma_recv_ctxt {
*/
struct svc_rdma_write_info {
struct svcxprt_rdma *wi_rdma;
+ struct list_head wi_list;
const struct svc_rdma_chunk *wi_chunk;
@@ -241,7 +245,10 @@ struct svc_rdma_send_ctxt {
struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
+
+ struct list_head sc_write_info_list;
struct svc_rdma_write_info sc_reply_info;
+
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
@@ -274,11 +281,14 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir);
+extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr);
+extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr);
extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_pcl *write_pcl,
const struct svc_rdma_pcl *reply_pcl,
@@ -306,6 +316,13 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *rctxt,
int status);
extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
+extern int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
+ const struct rpc_rdma_cid *cid, int sqecount);
+extern int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
+ const struct rpc_rdma_cid *cid,
+ const struct ib_send_wr *bad_wr,
+ const struct ib_send_wr *first_wr,
+ int sqecount, int ret);
extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 152597750f55..b639a6fafcbc 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -290,7 +290,7 @@ xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
/**
* xdr_set_scratch_folio - Attach a scratch buffer for decoding data
* @xdr: pointer to xdr_stream struct
- * @page: an anonymous folio
+ * @folio: an anonymous folio
*
* See xdr_set_scratch_buffer().
*/
@@ -330,7 +330,7 @@ static inline void xdr_commit_encode(struct xdr_stream *xdr)
* xdr_stream_remaining - Return the number of bytes remaining in the stream
* @xdr: pointer to struct xdr_stream
*
- * Return value:
+ * Returns:
* Number of bytes remaining in @xdr before xdr->end
*/
static inline size_t
@@ -350,7 +350,7 @@ ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor,
* xdr_align_size - Calculate padded size of an object
* @n: Size of an object being XDR encoded (in bytes)
*
- * Return value:
+ * Returns:
* Size (in bytes) of the object including xdr padding
*/
static inline size_t
@@ -368,7 +368,7 @@ xdr_align_size(size_t n)
* This implementation avoids the need for conditional
* branches or modulo division.
*
- * Return value:
+ * Returns:
* Size (in bytes) of the needed XDR pad
*/
static inline size_t xdr_pad_size(size_t n)
@@ -380,7 +380,7 @@ static inline size_t xdr_pad_size(size_t n)
* xdr_stream_encode_item_present - Encode a "present" list item
* @xdr: pointer to xdr_stream
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -399,7 +399,7 @@ static inline ssize_t xdr_stream_encode_item_present(struct xdr_stream *xdr)
* xdr_stream_encode_item_absent - Encode a "not present" list item
* @xdr: pointer to xdr_stream
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -419,7 +419,7 @@ static inline int xdr_stream_encode_item_absent(struct xdr_stream *xdr)
* @p: address in a buffer into which to encode
* @n: boolean value to encode
*
- * Return value:
+ * Returns:
* Address of item following the encoded boolean
*/
static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
@@ -433,7 +433,7 @@ static inline __be32 *xdr_encode_bool(__be32 *p, u32 n)
* @xdr: pointer to xdr_stream
* @n: boolean value to encode
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -453,7 +453,7 @@ static inline int xdr_stream_encode_bool(struct xdr_stream *xdr, __u32 n)
* @xdr: pointer to xdr_stream
* @n: integer to encode
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -474,7 +474,7 @@ xdr_stream_encode_u32(struct xdr_stream *xdr, __u32 n)
* @xdr: pointer to xdr_stream
* @n: integer to encode
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -495,7 +495,7 @@ xdr_stream_encode_be32(struct xdr_stream *xdr, __be32 n)
* @xdr: pointer to xdr_stream
* @n: 64-bit integer to encode
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -517,7 +517,7 @@ xdr_stream_encode_u64(struct xdr_stream *xdr, __u64 n)
* @ptr: pointer to void pointer
* @len: size of object
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -542,7 +542,7 @@ xdr_stream_encode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t len)
* @ptr: pointer to opaque data object
* @len: size of object pointed to by @ptr
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -563,7 +563,7 @@ xdr_stream_encode_opaque_fixed(struct xdr_stream *xdr, const void *ptr, size_t l
* @ptr: pointer to opaque data object
* @len: size of object pointed to by @ptr
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -585,7 +585,7 @@ xdr_stream_encode_opaque(struct xdr_stream *xdr, const void *ptr, size_t len)
* @array: array of integers
* @array_size: number of elements in @array
*
- * Return values:
+ * Returns:
* On success, returns length in bytes of XDR buffer consumed
* %-EMSGSIZE on XDR buffer overflow
*/
@@ -608,7 +608,7 @@ xdr_stream_encode_uint32_array(struct xdr_stream *xdr,
* xdr_item_is_absent - symbolically handle XDR discriminators
* @p: pointer to undecoded discriminator
*
- * Return values:
+ * Returns:
* %true if the following XDR item is absent
* %false if the following XDR item is present
*/
@@ -621,7 +621,7 @@ static inline bool xdr_item_is_absent(const __be32 *p)
* xdr_item_is_present - symbolically handle XDR discriminators
* @p: pointer to undecoded discriminator
*
- * Return values:
+ * Returns:
* %true if the following XDR item is present
* %false if the following XDR item is absent
*/
@@ -635,7 +635,7 @@ static inline bool xdr_item_is_present(const __be32 *p)
* @xdr: pointer to xdr_stream
* @ptr: pointer to a u32 in which to store the result
*
- * Return values:
+ * Returns:
* %0 on success
* %-EBADMSG on XDR buffer overflow
*/
@@ -656,7 +656,7 @@ xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr)
* @xdr: pointer to xdr_stream
* @ptr: location to store integer
*
- * Return values:
+ * Returns:
* %0 on success
* %-EBADMSG on XDR buffer overflow
*/
@@ -677,7 +677,7 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
* @xdr: pointer to xdr_stream
* @ptr: location to store integer
*
- * Return values:
+ * Returns:
* %0 on success
* %-EBADMSG on XDR buffer overflow
*/
@@ -698,7 +698,7 @@ xdr_stream_decode_be32(struct xdr_stream *xdr, __be32 *ptr)
* @xdr: pointer to xdr_stream
* @ptr: location to store 64-bit integer
*
- * Return values:
+ * Returns:
* %0 on success
* %-EBADMSG on XDR buffer overflow
*/
@@ -720,7 +720,7 @@ xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
* @ptr: location to store data
* @len: size of buffer pointed to by @ptr
*
- * Return values:
+ * Returns:
* %0 on success
* %-EBADMSG on XDR buffer overflow
*/
@@ -746,7 +746,7 @@ xdr_stream_decode_opaque_fixed(struct xdr_stream *xdr, void *ptr, size_t len)
* on @xdr. It is therefore expected that the object it points to should
* be processed immediately.
*
- * Return values:
+ * Returns:
* On success, returns size of object stored in *@ptr
* %-EBADMSG on XDR buffer overflow
* %-EMSGSIZE if the size of the object would exceed @maxlen
@@ -777,7 +777,7 @@ xdr_stream_decode_opaque_inline(struct xdr_stream *xdr, void **ptr, size_t maxle
* @array: location to store the integer array or NULL
* @array_size: number of elements to store
*
- * Return values:
+ * Returns:
* On success, returns number of elements stored in @array
* %-EBADMSG on XDR buffer overflow
* %-EMSGSIZE if the size of the array exceeds @array_size
diff --git a/include/linux/sunrpc/xdrgen/nlm4.h b/include/linux/sunrpc/xdrgen/nlm4.h
new file mode 100644
index 000000000000..e95e8f105624
--- /dev/null
+++ b/include/linux/sunrpc/xdrgen/nlm4.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Generated by xdrgen. Manual edits will be lost. */
+/* XDR specification file: ../../Documentation/sunrpc/xdr/nlm4.x */
+/* XDR specification modification time: Thu Dec 25 13:10:19 2025 */
+
+#ifndef _LINUX_XDRGEN_NLM4_DEF_H
+#define _LINUX_XDRGEN_NLM4_DEF_H
+
+#include <linux/types.h>
+#include <linux/sunrpc/xdrgen/_defs.h>
+
+enum { LM_MAXSTRLEN = 1024 };
+
+enum { LM_MAXNAMELEN = 1025 };
+
+enum { MAXNETOBJ_SZ = 1024 };
+
+typedef opaque netobj;
+
+enum fsh4_mode {
+ fsm_DN = 0,
+ fsm_DR = 1,
+ fsm_DW = 2,
+ fsm_DRW = 3,
+};
+
+typedef enum fsh4_mode fsh4_mode;
+
+enum fsh4_access {
+ fsa_NONE = 0,
+ fsa_R = 1,
+ fsa_W = 2,
+ fsa_RW = 3,
+};
+
+typedef enum fsh4_access fsh4_access;
+
+enum { SM_MAXSTRLEN = 1024 };
+
+typedef u64 uint64;
+
+typedef s64 int64;
+
+typedef u32 uint32;
+
+typedef s32 int32;
+
+enum nlm4_stats {
+ NLM4_GRANTED = 0,
+ NLM4_DENIED = 1,
+ NLM4_DENIED_NOLOCKS = 2,
+ NLM4_BLOCKED = 3,
+ NLM4_DENIED_GRACE_PERIOD = 4,
+ NLM4_DEADLCK = 5,
+ NLM4_ROFS = 6,
+ NLM4_STALE_FH = 7,
+ NLM4_FBIG = 8,
+ NLM4_FAILED = 9,
+};
+
+typedef __be32 nlm4_stats;
+
+struct nlm4_holder {
+ bool exclusive;
+ int32 svid;
+ netobj oh;
+ uint64 l_offset;
+ uint64 l_len;
+};
+
+struct nlm4_testrply {
+ nlm4_stats stat;
+ union {
+ struct nlm4_holder holder;
+ } u;
+};
+
+struct nlm4_stat {
+ nlm4_stats stat;
+};
+
+struct nlm4_res {
+ netobj cookie;
+ struct nlm4_stat stat;
+};
+
+struct nlm4_testres {
+ netobj cookie;
+ struct nlm4_testrply stat;
+};
+
+struct nlm4_lock {
+ string caller_name;
+ netobj fh;
+ netobj oh;
+ int32 svid;
+ uint64 l_offset;
+ uint64 l_len;
+};
+
+struct nlm4_lockargs {
+ netobj cookie;
+ bool block;
+ bool exclusive;
+ struct nlm4_lock alock;
+ bool reclaim;
+ int32 state;
+};
+
+struct nlm4_cancargs {
+ netobj cookie;
+ bool block;
+ bool exclusive;
+ struct nlm4_lock alock;
+};
+
+struct nlm4_testargs {
+ netobj cookie;
+ bool exclusive;
+ struct nlm4_lock alock;
+};
+
+struct nlm4_unlockargs {
+ netobj cookie;
+ struct nlm4_lock alock;
+};
+
+struct nlm4_share {
+ string caller_name;
+ netobj fh;
+ netobj oh;
+ fsh4_mode mode;
+ fsh4_access access;
+};
+
+struct nlm4_shareargs {
+ netobj cookie;
+ struct nlm4_share share;
+ bool reclaim;
+};
+
+struct nlm4_shareres {
+ netobj cookie;
+ nlm4_stats stat;
+ int32 sequence;
+};
+
+struct nlm4_notify {
+ string name;
+ int32 state;
+};
+
+enum { SM_PRIV_SIZE = 16 };
+
+struct nlm4_notifyargs {
+ struct nlm4_notify notify;
+ u8 private[SM_PRIV_SIZE];
+};
+
+enum {
+ NLMPROC4_NULL = 0,
+ NLMPROC4_TEST = 1,
+ NLMPROC4_LOCK = 2,
+ NLMPROC4_CANCEL = 3,
+ NLMPROC4_UNLOCK = 4,
+ NLMPROC4_GRANTED = 5,
+ NLMPROC4_TEST_MSG = 6,
+ NLMPROC4_LOCK_MSG = 7,
+ NLMPROC4_CANCEL_MSG = 8,
+ NLMPROC4_UNLOCK_MSG = 9,
+ NLMPROC4_GRANTED_MSG = 10,
+ NLMPROC4_TEST_RES = 11,
+ NLMPROC4_LOCK_RES = 12,
+ NLMPROC4_CANCEL_RES = 13,
+ NLMPROC4_UNLOCK_RES = 14,
+ NLMPROC4_GRANTED_RES = 15,
+ NLMPROC4_SM_NOTIFY = 16,
+ NLMPROC4_SHARE = 20,
+ NLMPROC4_UNSHARE = 21,
+ NLMPROC4_NM_LOCK = 22,
+ NLMPROC4_FREE_ALL = 23,
+};
+
+#ifndef NLM4_PROG
+#define NLM4_PROG (100021)
+#endif
+
+#define NLM4_netobj_sz (XDR_unsigned_int + XDR_QUADLEN(MAXNETOBJ_SZ))
+#define NLM4_fsh4_mode_sz (XDR_int)
+#define NLM4_fsh4_access_sz (XDR_int)
+#define NLM4_uint64_sz \
+ (XDR_unsigned_hyper)
+#define NLM4_int64_sz \
+ (XDR_hyper)
+#define NLM4_uint32_sz \
+ (XDR_unsigned_long)
+#define NLM4_int32_sz \
+ (XDR_long)
+#define NLM4_nlm4_stats_sz (XDR_int)
+#define NLM4_nlm4_holder_sz \
+ (XDR_bool + NLM4_int32_sz + NLM4_netobj_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_testrply_sz \
+ (NLM4_nlm4_stats_sz + NLM4_nlm4_holder_sz)
+#define NLM4_nlm4_stat_sz \
+ (NLM4_nlm4_stats_sz)
+#define NLM4_nlm4_res_sz \
+ (NLM4_netobj_sz + NLM4_nlm4_stat_sz)
+#define NLM4_nlm4_testres_sz \
+ (NLM4_netobj_sz + NLM4_nlm4_testrply_sz)
+#define NLM4_nlm4_lock_sz \
+ (XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_int32_sz + NLM4_uint64_sz + NLM4_uint64_sz)
+#define NLM4_nlm4_lockargs_sz \
+ (NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz + XDR_bool + NLM4_int32_sz)
+#define NLM4_nlm4_cancargs_sz \
+ (NLM4_netobj_sz + XDR_bool + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_testargs_sz \
+ (NLM4_netobj_sz + XDR_bool + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_unlockargs_sz \
+ (NLM4_netobj_sz + NLM4_nlm4_lock_sz)
+#define NLM4_nlm4_share_sz \
+ (XDR_unsigned_int + XDR_QUADLEN(LM_MAXSTRLEN) + NLM4_netobj_sz + NLM4_netobj_sz + NLM4_fsh4_mode_sz + NLM4_fsh4_access_sz)
+#define NLM4_nlm4_shareargs_sz \
+ (NLM4_netobj_sz + NLM4_nlm4_share_sz + XDR_bool)
+#define NLM4_nlm4_shareres_sz \
+ (NLM4_netobj_sz + NLM4_nlm4_stats_sz + NLM4_int32_sz)
+#define NLM4_nlm4_notify_sz \
+ (XDR_unsigned_int + XDR_QUADLEN(LM_MAXNAMELEN) + NLM4_int32_sz)
+#define NLM4_nlm4_notifyargs_sz \
+ (NLM4_nlm4_notify_sz + XDR_QUADLEN(SM_PRIV_SIZE))
+#define NLM4_MAX_ARGS_SZ \
+ (NLM4_nlm4_lockargs_sz)
+
+#endif /* _LINUX_XDRGEN_NLM4_DEF_H */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 750ecce56930..ff855197880d 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1933,7 +1933,9 @@ TRACE_EVENT(svc_stats_latency,
svc_xprt_flag(CONG_CTRL) \
svc_xprt_flag(HANDSHAKE) \
svc_xprt_flag(TLS_SESSION) \
- svc_xprt_flag_end(PEER_AUTH)
+ svc_xprt_flag(PEER_AUTH) \
+ svc_xprt_flag(PEER_VALID) \
+ svc_xprt_flag_end(RPCB_UNREG)
#undef svc_xprt_flag
#undef svc_xprt_flag_end
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index a73ca3703abb..de647cf166c3 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -34,7 +34,7 @@
#define NFSEXP_GATHERED_WRITES 0x0020
#define NFSEXP_NOREADDIRPLUS 0x0040
#define NFSEXP_SECURITY_LABEL 0x0080
-/* 0x100 currently unused */
+#define NFSEXP_SIGN_FH 0x0100
#define NFSEXP_NOHIDE 0x0200
#define NFSEXP_NOSUBTREECHECK 0x0400
#define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */
@@ -55,7 +55,7 @@
#define NFSEXP_PNFS 0x20000
/* All flags that we claim to support. (Note we don't support NOACL.) */
-#define NFSEXP_ALLFLAGS 0x3FEFF
+#define NFSEXP_ALLFLAGS 0x3FFFF
/* The flags that may vary depending on security flavor: */
#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h
index e9efbc9e63d8..97c7447f4d14 100644
--- a/include/uapi/linux/nfsd_netlink.h
+++ b/include/uapi/linux/nfsd_netlink.h
@@ -36,6 +36,7 @@ enum {
NFSD_A_SERVER_LEASETIME,
NFSD_A_SERVER_SCOPE,
NFSD_A_SERVER_MIN_THREADS,
+ NFSD_A_SERVER_FH_KEY,
__NFSD_A_SERVER_MAX,
NFSD_A_SERVER_MAX = (__NFSD_A_SERVER_MAX - 1)
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
index a5bff02cd7ba..dde1ee934d0d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_test.c
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -63,10 +63,11 @@ static void kdf_case(struct kunit *test)
KUNIT_ASSERT_EQ(test, err, 0);
/* Assert */
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->expected_result->data,
- derivedkey.data, derivedkey.len), 0,
- "key mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_result->data,
+ derivedkey.data,
+ derivedkey.len,
+ "key mismatch");
}
static void checksum_case(struct kunit *test)
@@ -111,10 +112,11 @@ static void checksum_case(struct kunit *test)
KUNIT_ASSERT_EQ(test, err, 0);
/* Assert */
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->expected_result->data,
- checksum.data, checksum.len), 0,
- "checksum mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_result->data,
+ checksum.data,
+ checksum.len,
+ "checksum mismatch");
crypto_free_ahash(tfm);
}
@@ -314,10 +316,11 @@ static void rfc3961_nfold_case(struct kunit *test)
param->expected_result->len * 8, result);
/* Assert */
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->expected_result->data,
- result, param->expected_result->len), 0,
- "result mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_result->data,
+ result,
+ param->expected_result->len,
+ "result mismatch");
}
static struct kunit_case rfc3961_test_cases[] = {
@@ -569,14 +572,16 @@ static void rfc3962_encrypt_case(struct kunit *test)
KUNIT_EXPECT_EQ_MSG(test,
param->expected_result->len, buf.len,
"ciphertext length mismatch");
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->expected_result->data,
- text, param->expected_result->len), 0,
- "ciphertext mismatch");
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->next_iv->data, iv,
- param->next_iv->len), 0,
- "IV mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_result->data,
+ text,
+ param->expected_result->len,
+ "ciphertext mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->next_iv->data,
+ iv,
+ param->next_iv->len,
+ "IV mismatch");
crypto_free_sync_skcipher(cts_tfm);
crypto_free_sync_skcipher(cbc_tfm);
@@ -1194,15 +1199,17 @@ static void rfc6803_encrypt_case(struct kunit *test)
KUNIT_EXPECT_EQ_MSG(test, param->expected_result->len,
buf.len + checksum.len,
"ciphertext length mismatch");
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->expected_result->data,
- buf.head[0].iov_base, buf.len), 0,
- "encrypted result mismatch");
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->expected_result->data +
- (param->expected_result->len - checksum.len),
- checksum.data, checksum.len), 0,
- "HMAC mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_result->data,
+ buf.head[0].iov_base,
+ buf.len,
+ "encrypted result mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_result->data +
+ (param->expected_result->len - checksum.len),
+ checksum.data,
+ checksum.len,
+ "HMAC mismatch");
crypto_free_ahash(ahash_tfm);
crypto_free_sync_skcipher(cts_tfm);
@@ -1687,15 +1694,16 @@ static void rfc8009_encrypt_case(struct kunit *test)
KUNIT_EXPECT_EQ_MSG(test,
param->expected_result->len, buf.len,
"ciphertext length mismatch");
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->expected_result->data,
- buf.head[0].iov_base,
- param->expected_result->len), 0,
- "ciphertext mismatch");
- KUNIT_EXPECT_EQ_MSG(test, memcmp(param->expected_hmac->data,
- checksum.data,
- checksum.len), 0,
- "HMAC mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_result->data,
+ buf.head[0].iov_base,
+ param->expected_result->len,
+ "ciphertext mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->expected_hmac->data,
+ checksum.data,
+ checksum.len,
+ "HMAC mismatch");
crypto_free_ahash(ahash_tfm);
crypto_free_sync_skcipher(cts_tfm);
@@ -1826,10 +1834,11 @@ static void encrypt_selftest_case(struct kunit *test)
KUNIT_EXPECT_EQ_MSG(test,
param->plaintext->len, buf.len,
"length mismatch");
- KUNIT_EXPECT_EQ_MSG(test,
- memcmp(param->plaintext->data,
- buf.head[0].iov_base, buf.len), 0,
- "plaintext mismatch");
+ KUNIT_EXPECT_MEMEQ_MSG(test,
+ param->plaintext->data,
+ buf.head[0].iov_base,
+ buf.len,
+ "plaintext mismatch");
crypto_free_sync_skcipher(cts_tfm);
crypto_free_sync_skcipher(cbc_tfm);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ef8b7e8b1e9c..7081c1214e6c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -134,11 +134,11 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
return tmp;
}
+ cache_get(new);
hlist_add_head_rcu(&new->cache_list, head);
detail->entries++;
if (detail->nextcheck > new->expiry_time)
detail->nextcheck = new->expiry_time + 1;
- cache_get(new);
spin_unlock(&detail->hash_lock);
if (freeme)
@@ -233,9 +233,9 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
spin_lock(&detail->hash_lock);
cache_entry_update(detail, tmp, new);
- hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
- detail->entries++;
cache_get(tmp);
+ hlist_add_head_rcu(&tmp->cache_list, &detail->hash_table[hash]);
+ detail->entries++;
cache_fresh_locked(tmp, new->expiry_time, detail);
cache_fresh_locked(old, 0, detail);
spin_unlock(&detail->hash_lock);
@@ -399,7 +399,11 @@ static struct delayed_work cache_cleaner;
void sunrpc_init_cache_detail(struct cache_detail *cd)
{
spin_lock_init(&cd->hash_lock);
- INIT_LIST_HEAD(&cd->queue);
+ INIT_LIST_HEAD(&cd->requests);
+ INIT_LIST_HEAD(&cd->readers);
+ spin_lock_init(&cd->queue_lock);
+ init_waitqueue_head(&cd->queue_wait);
+ cd->next_seqno = 0;
spin_lock(&cache_list_lock);
cd->nextcheck = 0;
cd->entries = 0;
@@ -794,31 +798,20 @@ void cache_clean_deferred(void *owner)
* On read, you get a full request, or block.
* On write, an update request is processed.
* Poll works if anything to read, and always allows write.
- *
- * Implemented by linked list of requests. Each open file has
- * a ->private that also exists in this list. New requests are added
- * to the end and may wakeup and preceding readers.
- * New readers are added to the head. If, on read, an item is found with
- * CACHE_UPCALLING clear, we free it from the list.
- *
*/
-static DEFINE_SPINLOCK(queue_lock);
-
-struct cache_queue {
- struct list_head list;
- int reader; /* if 0, then request */
-};
struct cache_request {
- struct cache_queue q;
+ struct list_head list;
struct cache_head *item;
- char * buf;
+ char *buf;
int len;
int readers;
+ u64 seqno;
};
struct cache_reader {
- struct cache_queue q;
+ struct list_head list;
int offset; /* if non-0, we have a refcnt on next request */
+ u64 next_seqno;
};
static int cache_request(struct cache_detail *detail,
@@ -833,6 +826,17 @@ static int cache_request(struct cache_detail *detail,
return PAGE_SIZE - len;
}
+static struct cache_request *
+cache_next_request(struct cache_detail *cd, u64 seqno)
+{
+ struct cache_request *rq;
+
+ list_for_each_entry(rq, &cd->requests, list)
+ if (rq->seqno >= seqno)
+ return rq;
+ return NULL;
+}
+
static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
loff_t *ppos, struct cache_detail *cd)
{
@@ -847,25 +851,18 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
inode_lock(inode); /* protect against multiple concurrent
* readers on this file */
again:
- spin_lock(&queue_lock);
+ spin_lock(&cd->queue_lock);
/* need to find next request */
- while (rp->q.list.next != &cd->queue &&
- list_entry(rp->q.list.next, struct cache_queue, list)
- ->reader) {
- struct list_head *next = rp->q.list.next;
- list_move(&rp->q.list, next);
- }
- if (rp->q.list.next == &cd->queue) {
- spin_unlock(&queue_lock);
+ rq = cache_next_request(cd, rp->next_seqno);
+ if (!rq) {
+ spin_unlock(&cd->queue_lock);
inode_unlock(inode);
WARN_ON_ONCE(rp->offset);
return 0;
}
- rq = container_of(rp->q.list.next, struct cache_request, q.list);
- WARN_ON_ONCE(rq->q.reader);
if (rp->offset == 0)
rq->readers++;
- spin_unlock(&queue_lock);
+ spin_unlock(&cd->queue_lock);
if (rq->len == 0) {
err = cache_request(cd, rq);
@@ -876,9 +873,7 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
err = -EAGAIN;
- spin_lock(&queue_lock);
- list_move(&rp->q.list, &rq->q.list);
- spin_unlock(&queue_lock);
+ rp->next_seqno = rq->seqno + 1;
} else {
if (rp->offset + count > rq->len)
count = rq->len - rp->offset;
@@ -888,26 +883,24 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
rp->offset += count;
if (rp->offset >= rq->len) {
rp->offset = 0;
- spin_lock(&queue_lock);
- list_move(&rp->q.list, &rq->q.list);
- spin_unlock(&queue_lock);
+ rp->next_seqno = rq->seqno + 1;
}
err = 0;
}
out:
if (rp->offset == 0) {
/* need to release rq */
- spin_lock(&queue_lock);
+ spin_lock(&cd->queue_lock);
rq->readers--;
if (rq->readers == 0 &&
!test_bit(CACHE_PENDING, &rq->item->flags)) {
- list_del(&rq->q.list);
- spin_unlock(&queue_lock);
+ list_del(&rq->list);
+ spin_unlock(&cd->queue_lock);
cache_put(rq->item, cd);
kfree(rq->buf);
kfree(rq);
} else
- spin_unlock(&queue_lock);
+ spin_unlock(&cd->queue_lock);
}
if (err == -EAGAIN)
goto again;
@@ -971,16 +964,13 @@ out:
return ret;
}
-static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
-
static __poll_t cache_poll(struct file *filp, poll_table *wait,
struct cache_detail *cd)
{
__poll_t mask;
struct cache_reader *rp = filp->private_data;
- struct cache_queue *cq;
- poll_wait(filp, &queue_wait, wait);
+ poll_wait(filp, &cd->queue_wait, wait);
/* alway allow write */
mask = EPOLLOUT | EPOLLWRNORM;
@@ -988,15 +978,11 @@ static __poll_t cache_poll(struct file *filp, poll_table *wait,
if (!rp)
return mask;
- spin_lock(&queue_lock);
+ spin_lock(&cd->queue_lock);
- for (cq= &rp->q; &cq->list != &cd->queue;
- cq = list_entry(cq->list.next, struct cache_queue, list))
- if (!cq->reader) {
- mask |= EPOLLIN | EPOLLRDNORM;
- break;
- }
- spin_unlock(&queue_lock);
+ if (cache_next_request(cd, rp->next_seqno))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ spin_unlock(&cd->queue_lock);
return mask;
}
@@ -1006,25 +992,20 @@ static int cache_ioctl(struct inode *ino, struct file *filp,
{
int len = 0;
struct cache_reader *rp = filp->private_data;
- struct cache_queue *cq;
+ struct cache_request *rq;
if (cmd != FIONREAD || !rp)
return -EINVAL;
- spin_lock(&queue_lock);
+ spin_lock(&cd->queue_lock);
/* only find the length remaining in current request,
* or the length of the next request
*/
- for (cq= &rp->q; &cq->list != &cd->queue;
- cq = list_entry(cq->list.next, struct cache_queue, list))
- if (!cq->reader) {
- struct cache_request *cr =
- container_of(cq, struct cache_request, q);
- len = cr->len - rp->offset;
- break;
- }
- spin_unlock(&queue_lock);
+ rq = cache_next_request(cd, rp->next_seqno);
+ if (rq)
+ len = rq->len - rp->offset;
+ spin_unlock(&cd->queue_lock);
return put_user(len, (int __user *)arg);
}
@@ -1044,11 +1025,11 @@ static int cache_open(struct inode *inode, struct file *filp,
return -ENOMEM;
}
rp->offset = 0;
- rp->q.reader = 1;
+ rp->next_seqno = 0;
- spin_lock(&queue_lock);
- list_add(&rp->q.list, &cd->queue);
- spin_unlock(&queue_lock);
+ spin_lock(&cd->queue_lock);
+ list_add(&rp->list, &cd->readers);
+ spin_unlock(&cd->queue_lock);
}
if (filp->f_mode & FMODE_WRITE)
atomic_inc(&cd->writers);
@@ -1064,29 +1045,24 @@ static int cache_release(struct inode *inode, struct file *filp,
if (rp) {
struct cache_request *rq = NULL;
- spin_lock(&queue_lock);
+ spin_lock(&cd->queue_lock);
if (rp->offset) {
- struct cache_queue *cq;
- for (cq = &rp->q; &cq->list != &cd->queue;
- cq = list_entry(cq->list.next,
- struct cache_queue, list))
- if (!cq->reader) {
- struct cache_request *cr =
- container_of(cq,
- struct cache_request, q);
- cr->readers--;
- if (cr->readers == 0 &&
- !test_bit(CACHE_PENDING,
- &cr->item->flags)) {
- list_del(&cr->q.list);
- rq = cr;
- }
- break;
+ struct cache_request *cr;
+
+ cr = cache_next_request(cd, rp->next_seqno);
+ if (cr) {
+ cr->readers--;
+ if (cr->readers == 0 &&
+ !test_bit(CACHE_PENDING,
+ &cr->item->flags)) {
+ list_del(&cr->list);
+ rq = cr;
}
+ }
rp->offset = 0;
}
- list_del(&rp->q.list);
- spin_unlock(&queue_lock);
+ list_del(&rp->list);
+ spin_unlock(&cd->queue_lock);
if (rq) {
cache_put(rq->item, cd);
@@ -1109,27 +1085,24 @@ static int cache_release(struct inode *inode, struct file *filp,
static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
{
- struct cache_queue *cq, *tmp;
- struct cache_request *cr;
+ struct cache_request *cr, *tmp;
LIST_HEAD(dequeued);
- spin_lock(&queue_lock);
- list_for_each_entry_safe(cq, tmp, &detail->queue, list)
- if (!cq->reader) {
- cr = container_of(cq, struct cache_request, q);
- if (cr->item != ch)
- continue;
- if (test_bit(CACHE_PENDING, &ch->flags))
- /* Lost a race and it is pending again */
- break;
- if (cr->readers != 0)
- continue;
- list_move(&cr->q.list, &dequeued);
- }
- spin_unlock(&queue_lock);
+ spin_lock(&detail->queue_lock);
+ list_for_each_entry_safe(cr, tmp, &detail->requests, list) {
+ if (cr->item != ch)
+ continue;
+ if (test_bit(CACHE_PENDING, &ch->flags))
+ /* Lost a race and it is pending again */
+ break;
+ if (cr->readers != 0)
+ continue;
+ list_move(&cr->list, &dequeued);
+ }
+ spin_unlock(&detail->queue_lock);
while (!list_empty(&dequeued)) {
- cr = list_entry(dequeued.next, struct cache_request, q.list);
- list_del(&cr->q.list);
+ cr = list_entry(dequeued.next, struct cache_request, list);
+ list_del(&cr->list);
cache_put(cr->item, detail);
kfree(cr->buf);
kfree(cr);
@@ -1247,20 +1220,20 @@ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
return -EAGAIN;
}
- crq->q.reader = 0;
crq->buf = buf;
crq->len = 0;
crq->readers = 0;
- spin_lock(&queue_lock);
+ spin_lock(&detail->queue_lock);
if (test_bit(CACHE_PENDING, &h->flags)) {
crq->item = cache_get(h);
- list_add_tail(&crq->q.list, &detail->queue);
+ crq->seqno = detail->next_seqno++;
+ list_add_tail(&crq->list, &detail->requests);
trace_cache_entry_upcall(detail, h);
} else
/* Lost a race, no longer PENDING, so don't enqueue */
ret = -EAGAIN;
- spin_unlock(&queue_lock);
- wake_up(&queue_wait);
+ spin_unlock(&detail->queue_lock);
+ wake_up(&detail->queue_wait);
if (ret == -EAGAIN) {
kfree(buf);
kfree(crq);
@@ -1378,18 +1351,14 @@ static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list)
if (!entry--)
return ch;
- n &= ~((1LL<<32) - 1);
- do {
- hash++;
- n += 1LL<<32;
- } while(hash < cd->hash_size &&
- hlist_empty(&cd->hash_table[hash]));
- if (hash >= cd->hash_size)
- return NULL;
- *pos = n+1;
- return hlist_entry_safe(rcu_dereference_raw(
+ ch = NULL;
+ while (!ch && ++hash < cd->hash_size)
+ ch = hlist_entry_safe(rcu_dereference(
hlist_first_rcu(&cd->hash_table[hash])),
struct cache_head, cache_list);
+
+ *pos = ((long long)hash << 32) + 1;
+ return ch;
}
static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
@@ -1398,29 +1367,29 @@ static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
int hash = (*pos >> 32);
struct cache_detail *cd = m->private;
- if (p == SEQ_START_TOKEN)
+ if (p == SEQ_START_TOKEN) {
hash = 0;
- else if (ch->cache_list.next == NULL) {
- hash++;
- *pos += 1LL<<32;
- } else {
- ++*pos;
- return hlist_entry_safe(rcu_dereference_raw(
- hlist_next_rcu(&ch->cache_list)),
- struct cache_head, cache_list);
+ ch = NULL;
}
- *pos &= ~((1LL<<32) - 1);
- while (hash < cd->hash_size &&
- hlist_empty(&cd->hash_table[hash])) {
+ while (hash < cd->hash_size) {
+ if (ch)
+ ch = hlist_entry_safe(
+ rcu_dereference(
+ hlist_next_rcu(&ch->cache_list)),
+ struct cache_head, cache_list);
+ else
+ ch = hlist_entry_safe(
+ rcu_dereference(
+ hlist_first_rcu(&cd->hash_table[hash])),
+ struct cache_head, cache_list);
+ if (ch) {
+ ++*pos;
+ return ch;
+ }
hash++;
- *pos += 1LL<<32;
+ *pos = (long long)hash << 32;
}
- if (hash >= cd->hash_size)
- return NULL;
- ++*pos;
- return hlist_entry_safe(rcu_dereference_raw(
- hlist_first_rcu(&cd->hash_table[hash])),
- struct cache_head, cache_list);
+ return NULL;
}
void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d8ccb8e4b5c2..576fa42e7abf 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -638,13 +638,25 @@ svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node)
{
rqstp->rq_maxpages = svc_serv_maxpages(serv);
- /* rq_pages' last entry is NULL for historical reasons. */
+ /* +1 for a NULL sentinel readable by nfsd_splice_actor() */
rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1,
sizeof(struct page *),
GFP_KERNEL, node);
if (!rqstp->rq_pages)
return false;
+ /* +1 for a NULL sentinel at rq_page_end (see svc_rqst_replace_page) */
+ rqstp->rq_respages = kcalloc_node(rqstp->rq_maxpages + 1,
+ sizeof(struct page *),
+ GFP_KERNEL, node);
+ if (!rqstp->rq_respages) {
+ kfree(rqstp->rq_pages);
+ rqstp->rq_pages = NULL;
+ return false;
+ }
+
+ rqstp->rq_pages_nfree = rqstp->rq_maxpages;
+ rqstp->rq_next_page = rqstp->rq_respages + rqstp->rq_maxpages;
return true;
}
@@ -656,10 +668,19 @@ svc_release_buffer(struct svc_rqst *rqstp)
{
unsigned long i;
- for (i = 0; i < rqstp->rq_maxpages; i++)
- if (rqstp->rq_pages[i])
- put_page(rqstp->rq_pages[i]);
- kfree(rqstp->rq_pages);
+ if (rqstp->rq_pages) {
+ for (i = 0; i < rqstp->rq_maxpages; i++)
+ if (rqstp->rq_pages[i])
+ put_page(rqstp->rq_pages[i]);
+ kfree(rqstp->rq_pages);
+ }
+
+ if (rqstp->rq_respages) {
+ for (i = 0; i < rqstp->rq_maxpages; i++)
+ if (rqstp->rq_respages[i])
+ put_page(rqstp->rq_respages[i]);
+ kfree(rqstp->rq_respages);
+ }
}
static void
@@ -934,11 +955,11 @@ svc_set_num_threads(struct svc_serv *serv, unsigned int min_threads,
EXPORT_SYMBOL_GPL(svc_set_num_threads);
/**
- * svc_rqst_replace_page - Replace one page in rq_pages[]
+ * svc_rqst_replace_page - Replace one page in rq_respages[]
* @rqstp: svc_rqst with pages to replace
* @page: replacement page
*
- * When replacing a page in rq_pages, batch the release of the
+ * When replacing a page in rq_respages, batch the release of the
* replaced pages to avoid hammering the page allocator.
*
* Return values:
@@ -947,19 +968,16 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
*/
bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
{
- struct page **begin = rqstp->rq_pages;
- struct page **end = &rqstp->rq_pages[rqstp->rq_maxpages];
+ struct page **begin = rqstp->rq_respages;
+ struct page **end = rqstp->rq_page_end;
if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) {
trace_svc_replace_page_err(rqstp);
return false;
}
- if (*rqstp->rq_next_page) {
- if (!folio_batch_add(&rqstp->rq_fbatch,
- page_folio(*rqstp->rq_next_page)))
- __folio_batch_release(&rqstp->rq_fbatch);
- }
+ if (*rqstp->rq_next_page)
+ svc_rqst_page_release(rqstp, *rqstp->rq_next_page);
get_page(page);
*(rqstp->rq_next_page++) = page;
@@ -971,18 +989,24 @@ EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
* svc_rqst_release_pages - Release Reply buffer pages
* @rqstp: RPC transaction context
*
- * Release response pages that might still be in flight after
- * svc_send, and any spliced filesystem-owned pages.
+ * Release response pages in the range [rq_respages, rq_next_page).
+ * NULL entries in this range are skipped, allowing transports to
+ * transfer pages to a send context before this function runs.
*/
void svc_rqst_release_pages(struct svc_rqst *rqstp)
{
- int i, count = rqstp->rq_next_page - rqstp->rq_respages;
-
- if (count) {
- release_pages(rqstp->rq_respages, count);
- for (i = 0; i < count; i++)
- rqstp->rq_respages[i] = NULL;
+ struct page **pp;
+
+ for (pp = rqstp->rq_respages; pp < rqstp->rq_next_page; pp++) {
+ if (*pp) {
+ if (!folio_batch_add(&rqstp->rq_fbatch,
+ page_folio(*pp)))
+ __folio_batch_release(&rqstp->rq_fbatch);
+ *pp = NULL;
+ }
}
+ if (rqstp->rq_fbatch.nr)
+ __folio_batch_release(&rqstp->rq_fbatch);
}
/**
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 56a663b8939f..b16e710926c1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -650,14 +650,13 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-static bool svc_alloc_arg(struct svc_rqst *rqstp)
+static bool svc_fill_pages(struct svc_rqst *rqstp, struct page **pages,
+ unsigned long npages)
{
- struct xdr_buf *arg = &rqstp->rq_arg;
- unsigned long pages, filled, ret;
+ unsigned long filled, ret;
- pages = rqstp->rq_maxpages;
- for (filled = 0; filled < pages; filled = ret) {
- ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages);
+ for (filled = 0; filled < npages; filled = ret) {
+ ret = alloc_pages_bulk(GFP_KERNEL, npages, pages);
if (ret > filled)
/* Made progress, don't sleep yet */
continue;
@@ -667,11 +666,40 @@ static bool svc_alloc_arg(struct svc_rqst *rqstp)
set_current_state(TASK_RUNNING);
return false;
}
- trace_svc_alloc_arg_err(pages, ret);
+ trace_svc_alloc_arg_err(npages, ret);
memalloc_retry_wait(GFP_KERNEL);
}
- rqstp->rq_page_end = &rqstp->rq_pages[pages];
- rqstp->rq_pages[pages] = NULL; /* this might be seen in nfsd_splice_actor() */
+ return true;
+}
+
+static bool svc_alloc_arg(struct svc_rqst *rqstp)
+{
+ struct xdr_buf *arg = &rqstp->rq_arg;
+ unsigned long pages, nfree;
+
+ pages = rqstp->rq_maxpages;
+
+ nfree = rqstp->rq_pages_nfree;
+ if (nfree) {
+ if (!svc_fill_pages(rqstp, rqstp->rq_pages, nfree))
+ return false;
+ rqstp->rq_pages_nfree = 0;
+ }
+
+ if (WARN_ON_ONCE(rqstp->rq_next_page < rqstp->rq_respages))
+ return false;
+ nfree = rqstp->rq_next_page - rqstp->rq_respages;
+ if (nfree) {
+ if (!svc_fill_pages(rqstp, rqstp->rq_respages, nfree))
+ return false;
+ }
+
+ rqstp->rq_next_page = rqstp->rq_respages;
+ rqstp->rq_page_end = &rqstp->rq_respages[pages];
+ /* svc_rqst_replace_page() dereferences *rq_next_page even
+ * at rq_page_end; NULL prevents releasing a garbage page.
+ */
+ rqstp->rq_page_end[0] = NULL;
/* Make arg->head point to first page and arg->pages point to rest */
arg->head[0].iov_base = page_address(rqstp->rq_pages[0]);
@@ -1277,7 +1305,6 @@ static noinline int svc_deferred_recv(struct svc_rqst *rqstp)
rqstp->rq_addrlen = dr->addrlen;
/* Save off transport header len in case we get deferred again */
rqstp->rq_daddr = dr->daddr;
- rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_xprt_ctxt = dr->xprt_ctxt;
dr->xprt_ctxt = NULL;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f28c6076f7e8..7be3de1a1aed 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -351,8 +351,6 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE)
bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0);
- rqstp->rq_respages = &rqstp->rq_pages[i];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
iov_iter_bvec(&msg.msg_iter, ITER_DEST, bvec, i, buflen);
if (seek) {
@@ -677,13 +675,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
if (len <= rqstp->rq_arg.head[0].iov_len) {
rqstp->rq_arg.head[0].iov_len = len;
rqstp->rq_arg.page_len = 0;
- rqstp->rq_respages = rqstp->rq_pages+1;
} else {
rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
- rqstp->rq_respages = rqstp->rq_pages + 1 +
- DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);
}
- rqstp->rq_next_page = rqstp->rq_respages+1;
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
@@ -994,7 +988,7 @@ static size_t svc_tcp_restore_pages(struct svc_sock *svsk,
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
if (rqstp->rq_pages[i] != NULL)
- put_page(rqstp->rq_pages[i]);
+ svc_rqst_page_release(rqstp, rqstp->rq_pages[i]);
BUG_ON(svsk->sk_pages[i] == NULL);
rqstp->rq_pages[i] = svsk->sk_pages[i];
svsk->sk_pages[i] = NULL;
@@ -1015,6 +1009,7 @@ static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
svsk->sk_pages[i] = rqstp->rq_pages[i];
rqstp->rq_pages[i] = NULL;
}
+ rqstp->rq_pages_nfree = npages;
}
static void svc_tcp_clear_pages(struct svc_sock *svsk)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index e7e4a39ca6c6..f8a0638eb095 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -118,7 +118,8 @@ svc_rdma_next_recv_ctxt(struct list_head *list)
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
- int node = ibdev_to_node(rdma->sc_cm_id->device);
+ struct ib_device *device = rdma->sc_cm_id->device;
+ int node = ibdev_to_node(device);
struct svc_rdma_recv_ctxt *ctxt;
unsigned long pages;
dma_addr_t addr;
@@ -133,9 +134,9 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail1;
- addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
- rdma->sc_max_req_size, DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
+ addr = ib_dma_map_single(device, buffer, rdma->sc_max_req_size,
+ DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(device, addr))
goto fail2;
svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid);
@@ -167,7 +168,7 @@ fail0:
static void svc_rdma_recv_ctxt_destroy(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
- ib_dma_unmap_single(rdma->sc_pd->device, ctxt->rc_recv_sge.addr,
+ ib_dma_unmap_single(rdma->sc_cm_id->device, ctxt->rc_recv_sge.addr,
ctxt->rc_recv_sge.length, DMA_FROM_DEVICE);
kfree(ctxt->rc_recv_buf);
kfree(ctxt);
@@ -861,18 +862,12 @@ static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp,
unsigned int i;
/* Transfer the Read chunk pages into @rqstp.rq_pages, replacing
- * the rq_pages that were already allocated for this rqstp.
+ * the receive buffer pages already allocated for this rqstp.
*/
- release_pages(rqstp->rq_respages, ctxt->rc_page_count);
+ release_pages(rqstp->rq_pages, ctxt->rc_page_count);
for (i = 0; i < ctxt->rc_page_count; i++)
rqstp->rq_pages[i] = ctxt->rc_pages[i];
- /* Update @rqstp's result send buffer to start after the
- * last page in the RDMA Read payload.
- */
- rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
-
/* Prevent svc_rdma_recv_ctxt_put() from releasing the
* pages in ctxt::rc_pages a second time.
*/
@@ -931,10 +926,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_recv_ctxt *ctxt;
int ret;
- /* Prevent svc_xprt_release() from releasing pages in rq_pages
- * when returning 0 or an error.
+ /* Precaution: a zero page count on error return causes
+ * svc_rqst_release_pages() to release nothing.
*/
- rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
rqstp->rq_xprt_ctxt = NULL;
@@ -962,7 +956,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
return 0;
percpu_counter_inc(&svcrdma_stat_recv);
- ib_dma_sync_single_for_cpu(rdma_xprt->sc_pd->device,
+ ib_dma_sync_single_for_cpu(rdma_xprt->sc_cm_id->device,
ctxt->rc_recv_sge.addr, ctxt->rc_byte_len,
DMA_FROM_DEVICE);
svc_rdma_build_arg_xdr(rqstp, ctxt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 4ec2f9ae06aa..402e2ceca4ff 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -252,6 +252,28 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
}
/**
+ * svc_rdma_write_chunk_release - Release Write chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ *
+ * Write chunk resources remain live until Send completion because
+ * Write WRs are chained to the Send WR. This function releases all
+ * write_info structures accumulated on @ctxt->sc_write_info_list.
+ */
+void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_write_info *info;
+
+ while (!list_empty(&ctxt->sc_write_info_list)) {
+ info = list_first_entry(&ctxt->sc_write_info_list,
+ struct svc_rdma_write_info, wi_list);
+ list_del(&info->wi_list);
+ svc_rdma_write_info_free(info);
+ }
+}
+
+/**
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
* @rdma: controlling transport
* @ctxt: Send context that is being released
@@ -307,13 +329,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svc_rdma_write_info *info =
- container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid);
- break;
+ return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break;
@@ -321,12 +341,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
}
- svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
-
- if (unlikely(wc->status != IB_WC_SUCCESS))
- svc_xprt_deferred_close(&rdma->sc_xprt);
-
- svc_rdma_write_info_free(info);
+ /* The RDMA Write has flushed, so the client won't get
+ * some of the outgoing RPC message. Signal the loss
+ * to the client by closing the connection.
+ */
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
@@ -405,34 +424,17 @@ static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma,
cqe = NULL;
}
- do {
- if (atomic_sub_return(cc->cc_sqecount,
- &rdma->sc_sq_avail) > 0) {
- cc->cc_posttime = ktime_get();
- ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
- if (ret)
- break;
- return 0;
- }
-
- percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma, &cc->cc_cid);
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wait_event(rdma->sc_send_wait,
- atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
- trace_svcrdma_sq_retry(rdma, &cc->cc_cid);
- } while (1);
-
- trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret);
- svc_xprt_deferred_close(&rdma->sc_xprt);
-
- /* If even one was posted, there will be a completion. */
- if (bad_wr != first_wr)
- return 0;
+ ret = svc_rdma_sq_wait(rdma, &cc->cc_cid, cc->cc_sqecount);
+ if (ret < 0)
+ return ret;
- atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
- wake_up(&rdma->sc_send_wait);
- return -ENOTCONN;
+ cc->cc_posttime = ktime_get();
+ ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ if (ret)
+ return svc_rdma_post_send_err(rdma, &cc->cc_cid, bad_wr,
+ first_wr, cc->cc_sqecount,
+ ret);
+ return 0;
}
/* Build a bvec that covers one kvec in an xdr_buf.
@@ -617,9 +619,37 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
-static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+/* Link chunk WRs onto @sctxt's WR chain. Completion is requested
+ * for the tail WR, which is posted first.
+ */
+static void svc_rdma_cc_link_wrs(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *sctxt,
+ struct svc_rdma_chunk_ctxt *cc)
+{
+ struct ib_send_wr *first_wr;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
+
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
+
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
+}
+
+/* Link Write WRs for @chunk onto @sctxt's WR chain.
+ */
+static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
@@ -639,10 +669,14 @@ static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
if (ret != payload.len)
goto out_err;
- trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(rdma, cc);
- if (ret < 0)
+ ret = -EINVAL;
+ if (unlikely(sctxt->sc_sqecount + cc->cc_sqecount > rdma->sc_sq_depth))
goto out_err;
+
+ svc_rdma_cc_link_wrs(rdma, sctxt, cc);
+ list_add(&info->wi_list, &sctxt->sc_write_info_list);
+
+ trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
return 0;
out_err:
@@ -651,17 +685,19 @@ out_err:
}
/**
- * svc_rdma_send_write_list - Send all chunks on the Write list
+ * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
* @rdma: controlling RDMA transport
* @rctxt: Write list provisioned by the client
+ * @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply message
*
- * Returns zero on success, or a negative errno if one or more
- * Write chunks could not be sent.
+ * Returns zero on success, or a negative errno if WR chain
+ * construction fails for one or more Write chunks.
*/
-int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_recv_ctxt *rctxt,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_chunk *chunk;
int ret;
@@ -669,7 +705,7 @@ int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
if (!chunk->ch_payload_length)
break;
- ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
+ ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
if (ret < 0)
return ret;
}
@@ -699,9 +735,6 @@ int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
{
struct svc_rdma_write_info *info = &sctxt->sc_reply_info;
struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
- struct ib_send_wr *first_wr;
- struct list_head *pos;
- struct ib_cqe *cqe;
int ret;
info->wi_rdma = rdma;
@@ -715,23 +748,222 @@ int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
- first_wr = sctxt->sc_wr_chain;
- cqe = &cc->cc_cqe;
- list_for_each(pos, &cc->cc_rwctxts) {
- struct svc_rdma_rw_ctxt *rwc;
-
- rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
- first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
- rdma->sc_port_num, cqe, first_wr);
- cqe = NULL;
- }
- sctxt->sc_wr_chain = first_wr;
- sctxt->sc_sqecount += cc->cc_sqecount;
+ svc_rdma_cc_link_wrs(rdma, sctxt, cc);
trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
return xdr->len;
}
+/*
+ * Cap contiguous RDMA Read sink allocations at order-4.
+ * Higher orders risk allocation failure under
+ * __GFP_NORETRY, which would negate the benefit of the
+ * contiguous fast path.
+ */
+#define SVC_RDMA_CONTIG_MAX_ORDER 4
+
+/**
+ * svc_rdma_alloc_read_pages - Allocate physically contiguous pages
+ * @nr_pages: number of pages needed
+ * @order: on success, set to the allocation order
+ *
+ * Attempts a higher-order allocation, falling back to smaller orders.
+ * The returned pages are split immediately so each sub-page has its
+ * own refcount and can be freed independently.
+ *
+ * Returns a pointer to the first page on success, or NULL if even
+ * order-1 allocation fails.
+ */
+static struct page *
+svc_rdma_alloc_read_pages(unsigned int nr_pages, unsigned int *order)
+{
+ unsigned int o;
+ struct page *page;
+
+ o = min(get_order(nr_pages << PAGE_SHIFT),
+ SVC_RDMA_CONTIG_MAX_ORDER);
+
+ while (o >= 1) {
+ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN,
+ o);
+ if (page) {
+ split_page(page, o);
+ *order = o;
+ return page;
+ }
+ o--;
+ }
+ return NULL;
+}
+
+/*
+ * svc_rdma_fill_contig_bvec - Replace rq_pages with a contiguous allocation
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
+ * @bv: bvec entry to fill
+ * @pages_left: number of data pages remaining in the segment
+ * @len_left: bytes remaining in the segment
+ *
+ * On success, fills @bv with a bvec spanning the contiguous range and
+ * advances rc_curpage/rc_page_count. Returns the byte length covered,
+ * or zero if the allocation failed or would overrun rq_maxpages.
+ */
+static unsigned int
+svc_rdma_fill_contig_bvec(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
+ struct bio_vec *bv, unsigned int pages_left,
+ unsigned int len_left)
+{
+ unsigned int order, npages, chunk_pages, chunk_len, i;
+ struct page *page;
+
+ page = svc_rdma_alloc_read_pages(pages_left, &order);
+ if (!page)
+ return 0;
+ npages = 1 << order;
+
+ if (head->rc_curpage + npages > rqstp->rq_maxpages) {
+ for (i = 0; i < npages; i++)
+ __free_page(page + i);
+ return 0;
+ }
+
+ /*
+ * Replace rq_pages[] entries with pages from the contiguous
+ * allocation. If npages exceeds chunk_pages, the extra pages
+ * stay in rq_pages[] for later reuse or normal rqst teardown.
+ */
+ for (i = 0; i < npages; i++) {
+ svc_rqst_page_release(rqstp,
+ rqstp->rq_pages[head->rc_curpage + i]);
+ rqstp->rq_pages[head->rc_curpage + i] = page + i;
+ }
+
+ chunk_pages = min(npages, pages_left);
+ chunk_len = min_t(unsigned int, chunk_pages << PAGE_SHIFT, len_left);
+ bvec_set_page(bv, page, chunk_len, 0);
+ head->rc_page_count += chunk_pages;
+ head->rc_curpage += chunk_pages;
+ return chunk_len;
+}
+
+/*
+ * svc_rdma_fill_page_bvec - Add a single rq_page to the bvec array
+ * @head: context for ongoing I/O
+ * @ctxt: R/W context whose bvec array is being filled
+ * @cur: page to add
+ * @bvec_idx: pointer to current bvec index, not advanced on merge
+ * @len_left: bytes remaining in the segment
+ *
+ * If @cur is physically contiguous with the preceding bvec, it is
+ * merged by extending that bvec's length. Otherwise a new bvec
+ * entry is created. Returns the byte length covered.
+ */
+static unsigned int
+svc_rdma_fill_page_bvec(struct svc_rdma_recv_ctxt *head,
+ struct svc_rdma_rw_ctxt *ctxt, struct page *cur,
+ unsigned int *bvec_idx, unsigned int len_left)
+{
+ unsigned int chunk_len = min_t(unsigned int, PAGE_SIZE, len_left);
+
+ head->rc_page_count++;
+ head->rc_curpage++;
+
+ if (*bvec_idx > 0) {
+ struct bio_vec *prev = &ctxt->rw_bvec[*bvec_idx - 1];
+
+ if (page_to_phys(prev->bv_page) + prev->bv_offset +
+ prev->bv_len == page_to_phys(cur)) {
+ prev->bv_len += chunk_len;
+ return chunk_len;
+ }
+ }
+
+ bvec_set_page(&ctxt->rw_bvec[*bvec_idx], cur, chunk_len, 0);
+ (*bvec_idx)++;
+ return chunk_len;
+}
+
+/**
+ * svc_rdma_build_read_segment_contig - Build RDMA Read WR with contiguous pages
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
+ * @segment: co-ordinates of remote memory to be read
+ *
+ * Greedily allocates higher-order pages to cover the segment,
+ * building one bvec per contiguous chunk. Each allocation is
+ * split so sub-pages have independent refcounts. When a
+ * higher-order allocation fails, remaining pages are covered
+ * individually, merging adjacent pages into the preceding bvec
+ * when they are physically contiguous. The split sub-pages
+ * replace entries in rq_pages[] so downstream cleanup is
+ * unchanged.
+ *
+ * Returns:
+ * %0: the Read WR was constructed successfully
+ * %-ENOMEM: allocation failed
+ * %-EIO: a DMA mapping error occurred
+ */
+static int svc_rdma_build_read_segment_contig(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
+ const struct svc_rdma_segment *segment)
+{
+ struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp);
+ struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
+ unsigned int nr_data_pages, bvec_idx;
+ struct svc_rdma_rw_ctxt *ctxt;
+ unsigned int len_left;
+ int ret;
+
+ nr_data_pages = PAGE_ALIGN(segment->rs_length) >> PAGE_SHIFT;
+ if (head->rc_curpage + nr_data_pages > rqstp->rq_maxpages)
+ return -ENOMEM;
+
+ ctxt = svc_rdma_get_rw_ctxt(rdma, nr_data_pages);
+ if (!ctxt)
+ return -ENOMEM;
+
+ bvec_idx = 0;
+ len_left = segment->rs_length;
+ while (len_left) {
+ unsigned int pages_left = PAGE_ALIGN(len_left) >> PAGE_SHIFT;
+ unsigned int chunk_len = 0;
+
+ if (pages_left >= 2)
+ chunk_len = svc_rdma_fill_contig_bvec(rqstp, head,
+ &ctxt->rw_bvec[bvec_idx],
+ pages_left, len_left);
+ if (chunk_len) {
+ bvec_idx++;
+ } else {
+ struct page *cur =
+ rqstp->rq_pages[head->rc_curpage];
+ chunk_len = svc_rdma_fill_page_bvec(head, ctxt, cur,
+ &bvec_idx,
+ len_left);
+ }
+
+ len_left -= chunk_len;
+ }
+
+ ctxt->rw_nents = bvec_idx;
+
+ head->rc_pageoff = offset_in_page(segment->rs_length);
+ if (head->rc_pageoff)
+ head->rc_curpage--;
+
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset,
+ segment->rs_handle, segment->rs_length,
+ DMA_FROM_DEVICE);
+ if (ret < 0)
+ return -EIO;
+ percpu_counter_inc(&svcrdma_stat_read);
+
+ list_add(&ctxt->rw_list, &cc->cc_rwctxts);
+ cc->cc_sqecount += ret;
+ return 0;
+}
+
/**
* svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment
* @rqstp: RPC transaction context
@@ -758,6 +990,14 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
if (check_add_overflow(head->rc_pageoff, len, &total))
return -EINVAL;
nr_bvec = PAGE_ALIGN(total) >> PAGE_SHIFT;
+
+ if (head->rc_pageoff == 0 && nr_bvec >= 2) {
+ ret = svc_rdma_build_read_segment_contig(rqstp, head,
+ segment);
+ if (ret != -ENOMEM)
+ return ret;
+ }
+
ctxt = svc_rdma_get_rw_ctxt(rdma, nr_bvec);
if (!ctxt)
return -ENOMEM;
@@ -1103,10 +1343,16 @@ static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp,
{
unsigned int i;
+ /*
+ * Move only pages containing RPC data into rc_pages[]. Pages
+ * from a contiguous allocation that were not used for the
+ * payload remain in rq_pages[] for subsequent reuse.
+ */
for (i = 0; i < head->rc_page_count; i++) {
head->rc_pages[i] = rqstp->rq_pages[i];
rqstp->rq_pages[i] = NULL;
}
+ rqstp->rq_pages_nfree = head->rc_page_count;
}
/**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 914cd263c2f1..8b3f0c8c14b2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -116,7 +116,8 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
- int node = ibdev_to_node(rdma->sc_cm_id->device);
+ struct ib_device *device = rdma->sc_cm_id->device;
+ int node = ibdev_to_node(device);
struct svc_rdma_send_ctxt *ctxt;
unsigned long pages;
dma_addr_t addr;
@@ -136,9 +137,9 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
if (!buffer)
goto fail2;
- addr = ib_dma_map_single(rdma->sc_pd->device, buffer,
- rdma->sc_max_req_size, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(rdma->sc_pd->device, addr))
+ addr = ib_dma_map_single(device, buffer, rdma->sc_max_req_size,
+ DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device, addr))
goto fail3;
svc_rdma_send_cid_init(rdma, &ctxt->sc_cid);
@@ -149,6 +150,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send;
+ INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
@@ -175,15 +177,14 @@ fail0:
*/
void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
{
+ struct ib_device *device = rdma->sc_cm_id->device;
struct svc_rdma_send_ctxt *ctxt;
struct llist_node *node;
while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) {
ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
- ib_dma_unmap_single(rdma->sc_pd->device,
- ctxt->sc_sges[0].addr,
- rdma->sc_max_req_size,
- DMA_TO_DEVICE);
+ ib_dma_unmap_single(device, ctxt->sc_sges[0].addr,
+ rdma->sc_max_req_size, DMA_TO_DEVICE);
kfree(ctxt->sc_xprt_buf);
kfree(ctxt->sc_pages);
kfree(ctxt);
@@ -237,6 +238,7 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
+ svc_rdma_write_chunk_release(rdma, ctxt);
svc_rdma_reply_chunk_release(rdma, ctxt);
if (ctxt->sc_page_count)
@@ -295,6 +297,117 @@ void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail)
}
/**
+ * svc_rdma_sq_wait - Wait for SQ slots using fair queuing
+ * @rdma: controlling transport
+ * @cid: completion ID for tracing
+ * @sqecount: number of SQ entries needed
+ *
+ * A ticket-based system ensures fair ordering when multiple threads
+ * wait for Send Queue capacity. Each waiter takes a ticket and is
+ * served in order, preventing starvation.
+ *
+ * Protocol invariant: every ticket holder must increment
+ * sc_sq_ticket_tail exactly once, whether the reservation
+ * succeeds or the connection closes. Failing to advance the
+ * tail stalls all subsequent waiters.
+ *
+ * The ticket counters are signed 32-bit atomics. After
+ * wrapping through INT_MAX, the equality check
+ * (tail == ticket) remains correct because both counters
+ * advance monotonically and the comparison uses exact
+ * equality rather than relational operators.
+ *
+ * Return values:
+ * %0: SQ slots were reserved successfully
+ * %-ENOTCONN: The connection was lost
+ */
+int svc_rdma_sq_wait(struct svcxprt_rdma *rdma,
+ const struct rpc_rdma_cid *cid, int sqecount)
+{
+ int ticket;
+
+ /* Fast path: try to reserve SQ slots without waiting.
+ *
+ * A failed reservation temporarily understates sc_sq_avail
+ * until the compensating atomic_add restores it. A Send
+ * completion arriving in that window sees a lower count
+ * than reality, but the value self-corrects once the add
+ * completes. No ordering guarantee is needed here because
+ * the slow path serializes all contended waiters.
+ */
+ if (likely(atomic_sub_return(sqecount, &rdma->sc_sq_avail) >= 0))
+ return 0;
+ atomic_add(sqecount, &rdma->sc_sq_avail);
+
+ /* Slow path: take a ticket and wait in line */
+ ticket = atomic_fetch_inc(&rdma->sc_sq_ticket_head);
+
+ percpu_counter_inc(&svcrdma_stat_sq_starve);
+ trace_svcrdma_sq_full(rdma, cid);
+
+ /* Wait until all earlier tickets have been served */
+ wait_event(rdma->sc_sq_ticket_wait,
+ test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) ||
+ atomic_read(&rdma->sc_sq_ticket_tail) == ticket);
+ if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+ goto out_close;
+
+ /* It's our turn. Wait for enough SQ slots to be available. */
+ while (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
+ atomic_add(sqecount, &rdma->sc_sq_avail);
+
+ wait_event(rdma->sc_send_wait,
+ test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) ||
+ atomic_read(&rdma->sc_sq_avail) >= sqecount);
+ if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+ goto out_close;
+ }
+
+ /* Slots reserved successfully. Let the next waiter proceed. */
+ atomic_inc(&rdma->sc_sq_ticket_tail);
+ wake_up(&rdma->sc_sq_ticket_wait);
+ trace_svcrdma_sq_retry(rdma, cid);
+ return 0;
+
+out_close:
+ atomic_inc(&rdma->sc_sq_ticket_tail);
+ wake_up(&rdma->sc_sq_ticket_wait);
+ return -ENOTCONN;
+}
+
+/**
+ * svc_rdma_post_send_err - Handle ib_post_send failure
+ * @rdma: controlling transport
+ * @cid: completion ID for tracing
+ * @bad_wr: first WR that was not posted
+ * @first_wr: first WR in the chain
+ * @sqecount: number of SQ entries that were reserved
+ * @ret: error code from ib_post_send
+ *
+ * Return values:
+ * %0: At least one WR was posted; a completion handles cleanup
+ * %-ENOTCONN: No WRs were posted; SQ slots are released
+ */
+int svc_rdma_post_send_err(struct svcxprt_rdma *rdma,
+ const struct rpc_rdma_cid *cid,
+ const struct ib_send_wr *bad_wr,
+ const struct ib_send_wr *first_wr,
+ int sqecount, int ret)
+{
+ trace_svcrdma_sq_post_err(rdma, cid, ret);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+
+ /* If even one WR was posted, a Send completion will
+ * return the reserved SQ slots.
+ */
+ if (bad_wr != first_wr)
+ return 0;
+
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+ return -ENOTCONN;
+}
+
+/**
* svc_rdma_wc_send - Invoked by RDMA provider for each polled Send WC
* @cq: Completion Queue context
* @wc: Work Completion object
@@ -336,11 +449,6 @@ flushed:
* that these values remain available after the ib_post_send() call.
* In some error flow cases, svc_rdma_wc_send() releases @ctxt.
*
- * Note there is potential for starvation when the Send Queue is
- * full because there is no order to when waiting threads are
- * awoken. The transport is typically provisioned with a deep
- * enough Send Queue that SQ exhaustion should be a rare event.
- *
* Return values:
* %0: @ctxt's WR chain was posted successfully
* %-ENOTCONN: The connection was lost
@@ -357,47 +465,21 @@ int svc_rdma_post_send(struct svcxprt_rdma *rdma,
might_sleep();
/* Sync the transport header buffer */
- ib_dma_sync_single_for_device(rdma->sc_pd->device,
+ ib_dma_sync_single_for_device(rdma->sc_cm_id->device,
send_wr->sg_list[0].addr,
send_wr->sg_list[0].length,
DMA_TO_DEVICE);
- /* If the SQ is full, wait until an SQ entry is available */
- while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
- if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
- svc_rdma_wake_send_waiters(rdma, sqecount);
-
- /* When the transport is torn down, assume
- * ib_drain_sq() will trigger enough Send
- * completions to wake us. The XPT_CLOSE test
- * above should then cause the while loop to
- * exit.
- */
- percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma, &cid);
- wait_event(rdma->sc_send_wait,
- atomic_read(&rdma->sc_sq_avail) > 0);
- trace_svcrdma_sq_retry(rdma, &cid);
- continue;
- }
-
- trace_svcrdma_post_send(ctxt);
- ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
- if (ret) {
- trace_svcrdma_sq_post_err(rdma, &cid, ret);
- svc_xprt_deferred_close(&rdma->sc_xprt);
-
- /* If even one WR was posted, there will be a
- * Send completion that bumps sc_sq_avail.
- */
- if (bad_wr == first_wr) {
- svc_rdma_wake_send_waiters(rdma, sqecount);
- break;
- }
- }
- return 0;
- }
- return -ENOTCONN;
+ ret = svc_rdma_sq_wait(rdma, &cid, sqecount);
+ if (ret < 0)
+ return ret;
+
+ trace_svcrdma_post_send(ctxt);
+ ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ if (ret)
+ return svc_rdma_post_send_err(rdma, &cid, bad_wr,
+ first_wr, sqecount, ret);
+ return 0;
}
/**
@@ -858,7 +940,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
/* The svc_rqst and all resources it owns are released as soon as
* svc_rdma_sendto returns. Transfer pages under I/O to the ctxt
- * so they are released by the Send completion handler.
+ * so they are released only after Send completion, and not by
+ * svc_rqst_release_pages().
*/
static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
struct svc_rdma_send_ctxt *ctxt)
@@ -870,9 +953,6 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
ctxt->sc_pages[i] = rqstp->rq_respages[i];
rqstp->rq_respages[i] = NULL;
}
-
- /* Prevent svc_xprt_release from releasing pages in rq_pages */
- rqstp->rq_next_page = rqstp->rq_respages;
}
/* Prepare the portion of the RPC Reply that will be transmitted
@@ -976,6 +1056,12 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_send_wr.opcode = IB_WR_SEND;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
+
+ /* Ensure only the error message is posted, not any previously
+ * prepared Write chunk WRs.
+ */
+ sctxt->sc_wr_chain = &sctxt->sc_send_wr;
+ sctxt->sc_sqecount = 1;
if (svc_rdma_post_send(rdma, sctxt))
goto put_ctxt;
return;
@@ -1023,7 +1109,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
- ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_prepare_write_list(rdma, rctxt, sctxt, &rqstp->rq_res);
if (ret < 0)
goto put_ctxt;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 9b623849723e..f18bc60d9f4f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -179,6 +179,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
init_llist_head(&cma_xprt->sc_recv_ctxts);
init_llist_head(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
+ init_waitqueue_head(&cma_xprt->sc_sq_ticket_wait);
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
@@ -414,7 +415,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
int ret = 0;
- RPC_IFDEBUG(struct sockaddr *sap);
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
clear_bit(XPT_CONN, &xprt->xpt_flags);
@@ -478,6 +478,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
+ atomic_set(&newxprt->sc_sq_ticket_head, 0);
+ atomic_set(&newxprt->sc_sq_ticket_tail, 0);
newxprt->sc_pd = ib_alloc_pd(dev, 0);
if (IS_ERR(newxprt->sc_pd)) {
@@ -560,18 +562,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout;
}
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
- sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
- dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
- sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
- dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
- dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges);
- dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
- dprintk(" rdma_rw_ctxs : %d\n", ctxts);
- dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
- dprintk(" ord : %d\n", conn_param.initiator_depth);
-#endif
+ if (IS_ENABLED(CONFIG_SUNRPC_DEBUG)) {
+ struct sockaddr *sap;
+
+ dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
+ sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
+ dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
+ sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
+ dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
+ dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges);
+ dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
+ dprintk(" rdma_rw_ctxs : %d\n", ctxts);
+ dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
+ dprintk(" ord : %d\n", conn_param.initiator_depth);
+ }
return &newxprt->sc_xprt;
@@ -648,7 +652,8 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
* If there are already waiters on the SQ,
* return false.
*/
- if (waitqueue_active(&rdma->sc_send_wait))
+ if (waitqueue_active(&rdma->sc_send_wait) ||
+ waitqueue_active(&rdma->sc_sq_ticket_wait))
return 0;
/* Otherwise return true. */