summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/exportfs/expfs.c9
-rw-r--r--fs/fhandle.c178
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--include/linux/exportfs.h2
6 files changed, 152 insertions, 42 deletions
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 07ea3d62b298..4f2dd4ab4486 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -427,7 +427,7 @@ EXPORT_SYMBOL_GPL(exportfs_encode_fh);
struct dentry *
exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
- int fileid_type,
+ int fileid_type, unsigned int flags,
int (*acceptable)(void *, struct dentry *),
void *context)
{
@@ -445,6 +445,11 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
if (IS_ERR_OR_NULL(result))
return result;
+ if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) {
+ err = -ENOTDIR;
+ goto err_result;
+ }
+
/*
* If no acceptance criteria was specified by caller, a disconnected
* dentry is also accepatable. Callers may use this mode to query if
@@ -581,7 +586,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
{
struct dentry *ret;
- ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type,
+ ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0,
acceptable, context);
if (IS_ERR_OR_NULL(ret)) {
if (ret == ERR_PTR(-ENOMEM))
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 8a7f86c2139a..6e8cea16790e 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -115,88 +115,188 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
return err;
}
-static struct vfsmount *get_vfsmount_from_fd(int fd)
+static int get_path_from_fd(int fd, struct path *root)
{
- struct vfsmount *mnt;
-
if (fd == AT_FDCWD) {
struct fs_struct *fs = current->fs;
spin_lock(&fs->lock);
- mnt = mntget(fs->pwd.mnt);
+ *root = fs->pwd;
+ path_get(root);
spin_unlock(&fs->lock);
} else {
struct fd f = fdget(fd);
if (!f.file)
- return ERR_PTR(-EBADF);
- mnt = mntget(f.file->f_path.mnt);
+ return -EBADF;
+ *root = f.file->f_path;
+ path_get(root);
fdput(f);
}
- return mnt;
+
+ return 0;
}
+enum handle_to_path_flags {
+ HANDLE_CHECK_PERMS = (1 << 0),
+ HANDLE_CHECK_SUBTREE = (1 << 1),
+};
+
+struct handle_to_path_ctx {
+ struct path root;
+ enum handle_to_path_flags flags;
+ unsigned int fh_flags;
+};
+
static int vfs_dentry_acceptable(void *context, struct dentry *dentry)
{
- return 1;
+ struct handle_to_path_ctx *ctx = context;
+ struct user_namespace *user_ns = current_user_ns();
+ struct dentry *d, *root = ctx->root.dentry;
+ struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt);
+ int retval = 0;
+
+ if (!root)
+ return 1;
+
+ /* Old permission model with global CAP_DAC_READ_SEARCH. */
+ if (!ctx->flags)
+ return 1;
+
+ /*
+ * It's racy as we're not taking rename_lock but we're able to ignore
+ * permissions and we just need an approximation whether we were able
+ * to follow a path to the file.
+ *
+ * It's also potentially expensive on some filesystems especially if
+ * there is a deep path.
+ */
+ d = dget(dentry);
+ while (d != root && !IS_ROOT(d)) {
+ struct dentry *parent = dget_parent(d);
+
+ /*
+ * We know that we have the ability to override DAC permissions
+ * as we've verified this earlier via CAP_DAC_READ_SEARCH. But
+ * we also need to make sure that there aren't any unmapped
+ * inodes in the path that would prevent us from reaching the
+ * file.
+ */
+ if (!privileged_wrt_inode_uidgid(user_ns, idmap,
+ d_inode(parent))) {
+ dput(d);
+ dput(parent);
+ return retval;
+ }
+
+ dput(d);
+ d = parent;
+ }
+
+ if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root)
+ retval = 1;
+ WARN_ON_ONCE(d != root && d != root->d_sb->s_root);
+ dput(d);
+ return retval;
}
-static int do_handle_to_path(int mountdirfd, struct file_handle *handle,
- struct path *path)
+static int do_handle_to_path(struct file_handle *handle, struct path *path,
+ struct handle_to_path_ctx *ctx)
{
- int retval = 0;
int handle_dwords;
+ struct vfsmount *mnt = ctx->root.mnt;
- path->mnt = get_vfsmount_from_fd(mountdirfd);
- if (IS_ERR(path->mnt)) {
- retval = PTR_ERR(path->mnt);
- goto out_err;
- }
/* change the handle size to multiple of sizeof(u32) */
handle_dwords = handle->handle_bytes >> 2;
- path->dentry = exportfs_decode_fh(path->mnt,
+ path->dentry = exportfs_decode_fh_raw(mnt,
(struct fid *)handle->f_handle,
handle_dwords, handle->handle_type,
- vfs_dentry_acceptable, NULL);
- if (IS_ERR(path->dentry)) {
- retval = PTR_ERR(path->dentry);
- goto out_mnt;
+ ctx->fh_flags,
+ vfs_dentry_acceptable, ctx);
+ if (IS_ERR_OR_NULL(path->dentry)) {
+ if (path->dentry == ERR_PTR(-ENOMEM))
+ return -ENOMEM;
+ return -ESTALE;
}
+ path->mnt = mntget(mnt);
return 0;
-out_mnt:
- mntput(path->mnt);
-out_err:
- return retval;
+}
+
+/*
+ * Allow relaxed permissions of file handles if the caller has the
+ * ability to mount the filesystem or create a bind-mount of the
+ * provided @mountdirfd.
+ *
+ * In both cases the caller may be able to get an unobstructed way to
+ * the encoded file handle. If the caller is only able to create a
+ * bind-mount we need to verify that there are no locked mounts on top
+ * of it that could prevent us from getting to the encoded file.
+ *
+ * In principle, locked mounts can prevent the caller from mounting the
+ * filesystem but that only applies to procfs and sysfs neither of which
+ * support decoding file handles.
+ */
+static inline bool may_decode_fh(struct handle_to_path_ctx *ctx,
+ unsigned int o_flags)
+{
+ struct path *root = &ctx->root;
+
+ /*
+ * Restrict to O_DIRECTORY to provide a deterministic API that avoids a
+ * confusing api in the face of disconnected non-dir dentries.
+ *
+ * There's only one dentry for each directory inode (VFS rule)...
+ */
+ if (!(o_flags & O_DIRECTORY))
+ return false;
+
+ if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+ ctx->flags = HANDLE_CHECK_PERMS;
+ else if (is_mounted(root->mnt) &&
+ ns_capable(real_mount(root->mnt)->mnt_ns->user_ns,
+ CAP_SYS_ADMIN) &&
+ !has_locked_children(real_mount(root->mnt), root->dentry))
+ ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE;
+ else
+ return false;
+
+ /* Are we able to override DAC permissions? */
+ if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH))
+ return false;
+
+ ctx->fh_flags = EXPORT_FH_DIR_ONLY;
+ return true;
}
static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
- struct path *path)
+ struct path *path, unsigned int o_flags)
{
int retval = 0;
struct file_handle f_handle;
struct file_handle *handle = NULL;
+ struct handle_to_path_ctx ctx = {};
- /*
- * With handle we don't look at the execute bit on the
- * directory. Ideally we would like CAP_DAC_SEARCH.
- * But we don't have that
- */
- if (!capable(CAP_DAC_READ_SEARCH)) {
- retval = -EPERM;
+ retval = get_path_from_fd(mountdirfd, &ctx.root);
+ if (retval)
goto out_err;
+
+ if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) {
+ retval = -EPERM;
+ goto out_path;
}
+
if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) {
retval = -EFAULT;
- goto out_err;
+ goto out_path;
}
if ((f_handle.handle_bytes > MAX_HANDLE_SZ) ||
(f_handle.handle_bytes == 0)) {
retval = -EINVAL;
- goto out_err;
+ goto out_path;
}
handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes),
GFP_KERNEL);
if (!handle) {
retval = -ENOMEM;
- goto out_err;
+ goto out_path;
}
/* copy the full handle */
*handle = f_handle;
@@ -207,10 +307,12 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
goto out_handle;
}
- retval = do_handle_to_path(mountdirfd, handle, path);
+ retval = do_handle_to_path(handle, path, &ctx);
out_handle:
kfree(handle);
+out_path:
+ path_put(&ctx.root);
out_err:
return retval;
}
@@ -223,7 +325,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh,
struct file *file;
int fd;
- retval = handle_to_path(mountdirfd, ufh, &path);
+ retval = handle_to_path(mountdirfd, ufh, &path, open_flag);
if (retval)
return retval;
diff --git a/fs/mount.h b/fs/mount.h
index 4a42fc68f4cc..4adce73211ae 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -152,3 +152,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
}
extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
+bool has_locked_children(struct mount *mnt, struct dentry *dentry);
diff --git a/fs/namespace.c b/fs/namespace.c
index 5a51315c6678..4386787210c7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2078,7 +2078,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
-static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+bool has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 0b75305fb5f5..dd4e11a703aa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -247,7 +247,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
dentry = dget(exp->ex_path.dentry);
else {
dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
- data_left, fileid_type,
+ data_left, fileid_type, 0,
nfsd_acceptable, exp);
if (IS_ERR_OR_NULL(dentry)) {
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index bb37ad5cc954..893a1d21dc1c 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -158,6 +158,7 @@ struct fid {
#define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */
#define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */
+#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */
/**
* struct export_operations - for nfsd to communicate with file systems
@@ -305,6 +306,7 @@ static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid,
extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt,
struct fid *fid, int fh_len,
int fileid_type,
+ unsigned int flags,
int (*acceptable)(void *, struct dentry *),
void *context);
extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,