summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2026-05-15 18:35:15 +0300
committerChristian Brauner <brauner@kernel.org>2026-05-15 18:49:23 +0300
commitcfff672ffcf9a74b560c2002739729f91812e398 (patch)
tree98200b2d40940058a6ef08e07c01a49ecc90cb93
parentfbaee2a5c406f274d72ed8f98bf9140ae43c5972 (diff)
downloadlinux-cfff672ffcf9a74b560c2002739729f91812e398.tar.xz
nfsd: Cap case-folding probe cost across READDIR entries
NFSv4 READDIR carries a per-entry attrmask. When the attrmask includes FATTR4_CASE_INSENSITIVE or FATTR4_CASE_PRESERVING, nfsd4_encode_fattr4() resolves each non-directory child's case attributes by calling nfsd_get_case_info(), which dget_parent()s back to the directory being read and re-runs the cred swap and LSM probe per child. The encoder amplifies a single answer into one prepare_kernel_cred() allocation, two LSM hooks, and one put_cred() RCU callback for every non-directory entry. No mainstream NFSv4 client has been observed to populate a READDIR attrmask with these attributes; the Linux client queries them only via SERVER_CAPS at mount time. The exposure is therefore to test clients exploring corner cases and to hostile clients that submit an attrmask designed to multiply server work by rd_dircount. Probe the directory being read once and cache the result on struct nfsd4_readdir for use by every non-directory child. The probe targets the readdir filehandle's dentry, which is held for the duration of the request, rather than dget_parent() of a child's locklessly-acquired dentry; the latter could be moved out of the directory by a concurrent rename and report attributes from an unrelated parent. Directory entries continue to be queried individually, because casefold-capable filesystems (ext4, f2fs) report case state per directory. The other callers of nfsd4_encode_fattr4() (single GETATTR, the buffer wrapper) pass NULL for the cache pointer and behave as before. Reported-by: sashiko-bot <sashiko-bot@kernel.org> Closes: https://sashiko.dev/#/patchset/20260507-case-sensitivity-v14-0-e62cc8200435@oracle.com?part=14 Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Link: https://patch.msgid.link/20260515153515.362266-8-cel@kernel.org Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/nfsd/nfs4xdr.c55
-rw-r--r--fs/nfsd/xdr4.h14
2 files changed, 60 insertions, 9 deletions
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 319007b79d49..20355dc3f1d1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3883,13 +3883,16 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
- * ourselves.
+ * ourselves. @case_cache is NULL for callers that encode a single dentry
+ * (GETATTR, the buffer wrapper); READDIR passes a per-request cache so
+ * non-directory children share the parent's case-folding probe result.
*/
static __be32
nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, const u32 *bmval,
- int ignore_crossmnt)
+ int ignore_crossmnt,
+ struct nfsd_case_attrs_cache *case_cache)
{
DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
struct nfs4_delegation *dp = NULL;
@@ -3999,9 +4002,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
args.fhp = fhp;
if (attrmask[0] & (FATTR4_WORD0_CASE_INSENSITIVE |
FATTR4_WORD0_CASE_PRESERVING)) {
- err = nfsd_get_case_info(dentry, &args.case_insensitive,
- &args.case_preserving);
/*
+ * In a batched encoder (READDIR) every non-directory
+ * child shares the same case-folding answer, so the
+ * directory being read is probed once and the result is
+ * cached. The probe targets case_cache->dir, the held
+ * readdir filehandle's dentry, instead of the child's
+ * locklessly-acquired dentry, which a concurrent rename
+ * could move under an unrelated parent. Directory
+ * entries are queried directly because casefold-capable
+ * filesystems answer per directory.
+ *
* Per RFC 8881 Section 18.7.3, an attribute advertised
* in SUPPORTED_ATTRS must come back with a value or the
* GETATTR must fail. nfsd_get_case_info() fills POSIX
@@ -4011,8 +4022,24 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
* advertises. Other errors fail the operation as the
* spec requires.
*/
- if (err && err != -EOPNOTSUPP)
- goto out_nfserr;
+ if (case_cache && !d_is_dir(dentry)) {
+ if (!case_cache->valid) {
+ err = nfsd_get_case_info(case_cache->dir,
+ &case_cache->insensitive,
+ &case_cache->preserving);
+ if (err && err != -EOPNOTSUPP)
+ goto out_nfserr;
+ case_cache->valid = true;
+ }
+ args.case_insensitive = case_cache->insensitive;
+ args.case_preserving = case_cache->preserving;
+ } else {
+ err = nfsd_get_case_info(dentry,
+ &args.case_insensitive,
+ &args.case_preserving);
+ if (err && err != -EOPNOTSUPP)
+ goto out_nfserr;
+ }
}
if (attrmask[0] & FATTR4_WORD0_ACL) {
@@ -4170,7 +4197,7 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
ret = nfsd4_encode_fattr4(rqstp, &xdr, fhp, exp, dentry, bmval,
- ignore_crossmnt);
+ ignore_crossmnt, NULL);
*p = xdr.p;
return ret;
}
@@ -4208,6 +4235,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
struct dentry *dentry;
__be32 nfserr;
int ignore_crossmnt = 0;
+ bool crossed = false;
dentry = lookup_one_positive_unlocked(&nop_mnt_idmap,
&QSTR_LEN(name, namlen),
@@ -4244,11 +4272,18 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name,
nfserr = check_nfsd_access(exp, cd->rd_rqstp, false);
if (nfserr)
goto out_put;
+ crossed = true;
}
out_encode:
+ /*
+ * A crossed entry no longer shares a parent with the directory
+ * being read, so it must neither consume nor populate the
+ * per-readdir case-folding cache.
+ */
nfserr = nfsd4_encode_fattr4(cd->rd_rqstp, cd->xdr, NULL, exp, dentry,
- cd->rd_bmval, ignore_crossmnt);
+ cd->rd_bmval, ignore_crossmnt,
+ crossed ? NULL : &cd->rd_case_cache);
out_put:
dput(dentry);
exp_put(exp);
@@ -4495,7 +4530,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr,
/* obj_attributes */
return nfsd4_encode_fattr4(resp->rqstp, xdr, fhp, fhp->fh_export,
- fhp->fh_dentry, getattr->ga_bmval, 0);
+ fhp->fh_dentry, getattr->ga_bmval, 0, NULL);
}
static __be32
@@ -5022,6 +5057,8 @@ static __be32 nfsd4_encode_dirlist4(struct xdr_stream *xdr,
readdir->rd_maxcount = maxcount;
readdir->common.err = 0;
readdir->cookie_offset = 0;
+ readdir->rd_case_cache.dir = readdir->rd_fhp->fh_dentry;
+ readdir->rd_case_cache.valid = false;
offset = readdir->rd_cookie;
status = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, &offset,
&readdir->common, nfsd4_encode_entry4);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 417e9ad9fbb3..615797df218f 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -432,6 +432,19 @@ struct nfsd4_read {
u32 rd_eof; /* response */
};
+/*
+ * Cache the case-folding properties of @dir so a batched encoder
+ * (e.g., READDIR) does not re-probe per child. @dir is the
+ * directory being read, held by the request, so it is stable
+ * against rename for the duration of the cache's lifetime.
+ */
+struct nfsd_case_attrs_cache {
+ struct dentry *dir;
+ bool valid;
+ bool insensitive;
+ bool preserving;
+};
+
struct nfsd4_readdir {
u64 rd_cookie; /* request */
nfs4_verifier rd_verf; /* request */
@@ -444,6 +457,7 @@ struct nfsd4_readdir {
struct readdir_cd common;
struct xdr_stream *xdr;
int cookie_offset;
+ struct nfsd_case_attrs_cache rd_case_cache;
};
struct nfsd4_release_lockowner {