summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-03-19 00:27:11 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-03-19 00:27:11 +0300
commit8a30aeb0d1b4e4aaf7f7bae72f20f2ae75385ccb (patch)
tree172b3674cd1563edf6f437aca9b6d09007c04879
parent04a9f1766954687f0a1b7a0f7184dc4f86edcb30 (diff)
parent5133b61aaf437e5f25b1b396b14242a6bb0508e2 (diff)
downloadlinux-8a30aeb0d1b4e4aaf7f7bae72f20f2ae75385ccb.tar.xz
Merge tag 'nfsd-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd fixes from Chuck Lever: - Fix cache_request leak in cache_release() - Fix heap overflow in the NFSv4.0 LOCK replay cache - Hold net reference for the lifetime of /proc/fs/nfs/exports fd - Defer sub-object cleanup in export "put" callbacks * tag 'nfsd-7.0-2' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: nfsd: fix heap overflow in NFSv4.0 LOCK replay cache sunrpc: fix cache_request leak in cache_release NFSD: Hold net reference for the lifetime of /proc/fs/nfs/exports fd NFSD: Defer sub-object cleanup in export put callbacks
-rw-r--r--fs/nfsd/export.c63
-rw-r--r--fs/nfsd/export.h7
-rw-r--r--fs/nfsd/nfs4xdr.c9
-rw-r--r--fs/nfsd/nfsctl.c22
-rw-r--r--fs/nfsd/state.h17
-rw-r--r--net/sunrpc/cache.c26
6 files changed, 118 insertions, 26 deletions
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8fdbba7cad96..8e8a76a44ff0 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -36,19 +36,30 @@
* second map contains a reference to the entry in the first map.
*/
+static struct workqueue_struct *nfsd_export_wq;
+
#define EXPKEY_HASHBITS 8
#define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS)
#define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1)
-static void expkey_put(struct kref *ref)
+static void expkey_release(struct work_struct *work)
{
- struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+ struct svc_expkey *key = container_of(to_rcu_work(work),
+ struct svc_expkey, ek_rwork);
if (test_bit(CACHE_VALID, &key->h.flags) &&
!test_bit(CACHE_NEGATIVE, &key->h.flags))
path_put(&key->ek_path);
auth_domain_put(key->ek_client);
- kfree_rcu(key, ek_rcu);
+ kfree(key);
+}
+
+static void expkey_put(struct kref *ref)
+{
+ struct svc_expkey *key = container_of(ref, struct svc_expkey, h.ref);
+
+ INIT_RCU_WORK(&key->ek_rwork, expkey_release);
+ queue_rcu_work(nfsd_export_wq, &key->ek_rwork);
}
static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -353,11 +364,13 @@ static void export_stats_destroy(struct export_stats *stats)
EXP_STATS_COUNTERS_NUM);
}
-static void svc_export_release(struct rcu_head *rcu_head)
+static void svc_export_release(struct work_struct *work)
{
- struct svc_export *exp = container_of(rcu_head, struct svc_export,
- ex_rcu);
+ struct svc_export *exp = container_of(to_rcu_work(work),
+ struct svc_export, ex_rwork);
+ path_put(&exp->ex_path);
+ auth_domain_put(exp->ex_client);
nfsd4_fslocs_free(&exp->ex_fslocs);
export_stats_destroy(exp->ex_stats);
kfree(exp->ex_stats);
@@ -369,9 +382,8 @@ static void svc_export_put(struct kref *ref)
{
struct svc_export *exp = container_of(ref, struct svc_export, h.ref);
- path_put(&exp->ex_path);
- auth_domain_put(exp->ex_client);
- call_rcu(&exp->ex_rcu, svc_export_release);
+ INIT_RCU_WORK(&exp->ex_rwork, svc_export_release);
+ queue_rcu_work(nfsd_export_wq, &exp->ex_rwork);
}
static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
@@ -1479,6 +1491,36 @@ const struct seq_operations nfs_exports_op = {
.show = e_show,
};
+/**
+ * nfsd_export_wq_init - allocate the export release workqueue
+ *
+ * Called once at module load. The workqueue runs deferred svc_export and
+ * svc_expkey release work scheduled by queue_rcu_work() in the cache put
+ * callbacks.
+ *
+ * Return values:
+ * %0: workqueue allocated
+ * %-ENOMEM: allocation failed
+ */
+int nfsd_export_wq_init(void)
+{
+ nfsd_export_wq = alloc_workqueue("nfsd_export", WQ_UNBOUND, 0);
+ if (!nfsd_export_wq)
+ return -ENOMEM;
+ return 0;
+}
+
+/**
+ * nfsd_export_wq_shutdown - drain and free the export release workqueue
+ *
+ * Called once at module unload. Per-namespace teardown in
+ * nfsd_export_shutdown() has already drained all deferred work.
+ */
+void nfsd_export_wq_shutdown(void)
+{
+ destroy_workqueue(nfsd_export_wq);
+}
+
/*
* Initialize the exports module.
*/
@@ -1540,6 +1582,9 @@ nfsd_export_shutdown(struct net *net)
cache_unregister_net(nn->svc_expkey_cache, net);
cache_unregister_net(nn->svc_export_cache, net);
+ /* Drain deferred export and expkey release work. */
+ rcu_barrier();
+ flush_workqueue(nfsd_export_wq);
cache_destroy_net(nn->svc_expkey_cache, net);
cache_destroy_net(nn->svc_export_cache, net);
svcauth_unix_purge(net);
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index d2b09cd76145..b05399374574 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -7,6 +7,7 @@
#include <linux/sunrpc/cache.h>
#include <linux/percpu_counter.h>
+#include <linux/workqueue.h>
#include <uapi/linux/nfsd/export.h>
#include <linux/nfs4.h>
@@ -75,7 +76,7 @@ struct svc_export {
u32 ex_layout_types;
struct nfsd4_deviceid_map *ex_devid_map;
struct cache_detail *cd;
- struct rcu_head ex_rcu;
+ struct rcu_work ex_rwork;
unsigned long ex_xprtsec_modes;
struct export_stats *ex_stats;
};
@@ -92,7 +93,7 @@ struct svc_expkey {
u32 ek_fsid[6];
struct path ek_path;
- struct rcu_head ek_rcu;
+ struct rcu_work ek_rwork;
};
#define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC))
@@ -110,6 +111,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp,
/*
* Function declarations
*/
+int nfsd_export_wq_init(void);
+void nfsd_export_wq_shutdown(void);
int nfsd_export_init(struct net *);
void nfsd_export_shutdown(struct net *);
void nfsd_export_flush(struct net *);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 41dfba5ab8b8..9d234913100b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -6281,9 +6281,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
int len = xdr->buf->len - (op_status_offset + XDR_UNIT);
so->so_replay.rp_status = op->status;
- so->so_replay.rp_buflen = len;
- read_bytes_from_xdr_buf(xdr->buf, op_status_offset + XDR_UNIT,
+ if (len <= NFSD4_REPLAY_ISIZE) {
+ so->so_replay.rp_buflen = len;
+ read_bytes_from_xdr_buf(xdr->buf,
+ op_status_offset + XDR_UNIT,
so->so_replay.rp_buf, len);
+ } else {
+ so->so_replay.rp_buflen = 0;
+ }
}
status:
op->status = nfsd4_map_status(op->status,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 4cc8a58fa56a..71aabdaa1d15 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -149,9 +149,19 @@ static int exports_net_open(struct net *net, struct file *file)
seq = file->private_data;
seq->private = nn->svc_export_cache;
+ get_net(net);
return 0;
}
+static int exports_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct cache_detail *cd = seq->private;
+
+ put_net(cd->net);
+ return seq_release(inode, file);
+}
+
static int exports_nfsd_open(struct inode *inode, struct file *file)
{
return exports_net_open(inode->i_sb->s_fs_info, file);
@@ -161,7 +171,7 @@ static const struct file_operations exports_nfsd_operations = {
.open = exports_nfsd_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release,
+ .release = exports_release,
};
static int export_features_show(struct seq_file *m, void *v)
@@ -1376,7 +1386,7 @@ static const struct proc_ops exports_proc_ops = {
.proc_open = exports_proc_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
- .proc_release = seq_release,
+ .proc_release = exports_release,
};
static int create_proc_exports_entry(void)
@@ -2259,9 +2269,12 @@ static int __init init_nfsd(void)
if (retval)
goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
+ retval = nfsd_export_wq_init();
+ if (retval)
+ goto out_free_lockd;
retval = register_pernet_subsys(&nfsd_net_ops);
if (retval < 0)
- goto out_free_lockd;
+ goto out_free_export_wq;
retval = register_cld_notifier();
if (retval)
goto out_free_subsys;
@@ -2290,6 +2303,8 @@ out_free_cld:
unregister_cld_notifier();
out_free_subsys:
unregister_pernet_subsys(&nfsd_net_ops);
+out_free_export_wq:
+ nfsd_export_wq_shutdown();
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_drc_slab_free();
@@ -2310,6 +2325,7 @@ static void __exit exit_nfsd(void)
nfsd4_destroy_laundry_wq();
unregister_cld_notifier();
unregister_pernet_subsys(&nfsd_net_ops);
+ nfsd_export_wq_shutdown();
nfsd_drc_slab_free();
nfsd_lockd_shutdown();
nfsd4_free_slabs();
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 6fcbf1e427d4..c0ca115c3b74 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -541,11 +541,18 @@ struct nfs4_client_reclaim {
struct xdr_netobj cr_princhash;
};
-/* A reasonable value for REPLAY_ISIZE was estimated as follows:
- * The OPEN response, typically the largest, requires
- * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
- * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) +
- * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes
+/*
+ * REPLAY_ISIZE is sized for an OPEN response with delegation:
+ * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) +
+ * 8(verifier) + 4(deleg. type) + 8(deleg. stateid) +
+ * 4(deleg. recall flag) + 20(deleg. space limit) +
+ * ~32(deleg. ace) = 112 bytes
+ *
+ * Some responses can exceed this. A LOCK denial includes the conflicting
+ * lock owner, which can be up to 1024 bytes (NFS4_OPAQUE_LIMIT). Responses
+ * larger than REPLAY_ISIZE are not cached in rp_ibuf; only rp_status is
+ * saved. Enlarging this constant increases the size of every
+ * nfs4_stateowner.
*/
#define NFSD4_REPLAY_ISIZE 112
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 237f67a5d004..ef8b7e8b1e9c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1062,14 +1062,25 @@ static int cache_release(struct inode *inode, struct file *filp,
struct cache_reader *rp = filp->private_data;
if (rp) {
+ struct cache_request *rq = NULL;
+
spin_lock(&queue_lock);
if (rp->offset) {
struct cache_queue *cq;
- for (cq= &rp->q; &cq->list != &cd->queue;
- cq = list_entry(cq->list.next, struct cache_queue, list))
+ for (cq = &rp->q; &cq->list != &cd->queue;
+ cq = list_entry(cq->list.next,
+ struct cache_queue, list))
if (!cq->reader) {
- container_of(cq, struct cache_request, q)
- ->readers--;
+ struct cache_request *cr =
+ container_of(cq,
+ struct cache_request, q);
+ cr->readers--;
+ if (cr->readers == 0 &&
+ !test_bit(CACHE_PENDING,
+ &cr->item->flags)) {
+ list_del(&cr->q.list);
+ rq = cr;
+ }
break;
}
rp->offset = 0;
@@ -1077,9 +1088,14 @@ static int cache_release(struct inode *inode, struct file *filp,
list_del(&rp->q.list);
spin_unlock(&queue_lock);
+ if (rq) {
+ cache_put(rq->item, cd);
+ kfree(rq->buf);
+ kfree(rq);
+ }
+
filp->private_data = NULL;
kfree(rp);
-
}
if (filp->f_mode & FMODE_WRITE) {
atomic_dec(&cd->writers);