summaryrefslogtreecommitdiff
path: root/fs/fuse
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse')
-rw-r--r--fs/fuse/Makefile2
-rw-r--r--fs/fuse/acl.c7
-rw-r--r--fs/fuse/cuse.c12
-rw-r--r--fs/fuse/dev.c27
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c506
-rw-r--r--fs/fuse/fuse_i.h54
-rw-r--r--fs/fuse/inode.c12
-rw-r--r--fs/fuse/ioctl.c490
-rw-r--r--fs/fuse/readdir.c2
-rw-r--r--fs/fuse/virtio_fs.c37
-rw-r--r--fs/fuse/xattr.c9
12 files changed, 696 insertions, 474 deletions
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 8c7021fb2cd4..0c48b35c058d 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
obj-$(CONFIG_CUSE) += cuse.o
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
-fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
+fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
virtiofs-y := virtio_fs.o
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index e9c0f916349d..52b165319be1 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -71,6 +71,7 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
return -EINVAL;
if (acl) {
+ unsigned int extra_flags = 0;
/*
* Fuse userspace is responsible for updating access
* permissions in the inode, if needed. fuse_setxattr
@@ -94,7 +95,11 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
return ret;
}
- ret = fuse_setxattr(inode, name, value, size, 0);
+ if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
+ !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
+ extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID;
+
+ ret = fuse_setxattr(inode, name, value, size, 0, extra_flags);
kfree(value);
} else {
ret = fuse_removexattr(inode, name);
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 45082269e698..c7d882a9fe33 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -511,20 +511,18 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
&fuse_dev_fiq_ops, NULL);
+ cc->fc.release = cuse_fc_release;
fud = fuse_dev_alloc_install(&cc->fc);
- if (!fud) {
- kfree(cc);
+ fuse_conn_put(&cc->fc);
+ if (!fud)
return -ENOMEM;
- }
INIT_LIST_HEAD(&cc->list);
- cc->fc.release = cuse_fc_release;
cc->fc.initialized = 1;
rc = cuse_send_init(cc);
if (rc) {
fuse_dev_free(fud);
- fuse_conn_put(&cc->fc);
return rc;
}
file->private_data = fud;
@@ -561,8 +559,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file)
unregister_chrdev_region(cc->cdev->dev, 1);
cdev_del(cc->cdev);
}
- /* Base reference is now owned by "fud" */
- fuse_conn_put(&cc->fc);
rc = fuse_dev_release(inode, file); /* puts the base reference */
@@ -627,6 +623,8 @@ static int __init cuse_init(void)
cuse_channel_fops.owner = THIS_MODULE;
cuse_channel_fops.open = cuse_channel_open;
cuse_channel_fops.release = cuse_channel_release;
+ /* CUSE is not prepared for FUSE_DEV_IOC_CLONE */
+ cuse_channel_fops.unlocked_ioctl = NULL;
cuse_class = class_create(THIS_MODULE, "cuse");
if (IS_ERR(cuse_class))
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index c6636b4c4ccf..a5ceccc5ef00 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2229,19 +2229,18 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- int err = -ENOTTY;
-
- if (cmd == FUSE_DEV_IOC_CLONE) {
- int oldfd;
-
- err = -EFAULT;
- if (!get_user(oldfd, (__u32 __user *) arg)) {
+ int res;
+ int oldfd;
+ struct fuse_dev *fud = NULL;
+
+ switch (cmd) {
+ case FUSE_DEV_IOC_CLONE:
+ res = -EFAULT;
+ if (!get_user(oldfd, (__u32 __user *)arg)) {
struct file *old = fget(oldfd);
- err = -EINVAL;
+ res = -EINVAL;
if (old) {
- struct fuse_dev *fud = NULL;
-
/*
* Check against file->f_op because CUSE
* uses the same ioctl handler.
@@ -2252,14 +2251,18 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
if (fud) {
mutex_lock(&fuse_mutex);
- err = fuse_device_clone(fud->fc, file);
+ res = fuse_device_clone(fud->fc, file);
mutex_unlock(&fuse_mutex);
}
fput(old);
}
}
+ break;
+ default:
+ res = -ENOTTY;
+ break;
}
- return err;
+ return res;
}
const struct file_operations fuse_dev_operations = {
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 06a18700a845..1b6c001a7dd1 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -252,7 +252,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
if (ret == -ENOMEM)
goto out;
if (ret || fuse_invalid_attr(&outarg.attr) ||
- (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ inode_wrong_type(inode, outarg.attr.mode))
goto invalid;
forget_all_cached_acls(inode);
@@ -508,7 +508,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
* 'mknod' + 'open' requests.
*/
static int fuse_create_open(struct inode *dir, struct dentry *entry,
- struct file *file, unsigned flags,
+ struct file *file, unsigned int flags,
umode_t mode)
{
int err;
@@ -1054,7 +1054,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
err = fuse_simple_request(fm, &args);
if (!err) {
if (fuse_invalid_attr(&outarg.attr) ||
- (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ inode_wrong_type(inode, outarg.attr.mode)) {
fuse_make_bad(inode);
err = -EIO;
} else {
@@ -1703,7 +1703,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
}
if (fuse_invalid_attr(&outarg.attr) ||
- (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
+ inode_wrong_type(inode, outarg.attr.mode)) {
fuse_make_bad(inode);
err = -EIO;
goto error;
@@ -1866,6 +1866,8 @@ static const struct inode_operations fuse_dir_inode_operations = {
.listxattr = fuse_listxattr,
.get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
+ .fileattr_get = fuse_fileattr_get,
+ .fileattr_set = fuse_fileattr_set,
};
static const struct file_operations fuse_dir_operations = {
@@ -1886,6 +1888,8 @@ static const struct inode_operations fuse_common_inode_operations = {
.listxattr = fuse_listxattr,
.get_acl = fuse_get_acl,
.set_acl = fuse_set_acl,
+ .fileattr_get = fuse_fileattr_get,
+ .fileattr_set = fuse_fileattr_set,
};
static const struct inode_operations fuse_symlink_inode_operations = {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8cccecb55fb8..09ef2a4d25ed 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -14,32 +14,20 @@
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/module.h>
-#include <linux/compat.h>
#include <linux/swap.h>
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/fs.h>
-static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
- struct fuse_page_desc **desc)
-{
- struct page **pages;
-
- pages = kzalloc(npages * (sizeof(struct page *) +
- sizeof(struct fuse_page_desc)), flags);
- *desc = (void *) (pages + npages);
-
- return pages;
-}
-
-static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
- int opcode, struct fuse_open_out *outargp)
+static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
+ unsigned int open_flags, int opcode,
+ struct fuse_open_out *outargp)
{
struct fuse_open_in inarg;
FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
- inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
+ inarg.flags = open_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
if (!fm->fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC;
@@ -136,8 +124,8 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
}
}
-int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
- bool isdir)
+struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
+ unsigned int open_flags, bool isdir)
{
struct fuse_conn *fc = fm->fc;
struct fuse_file *ff;
@@ -145,7 +133,7 @@ int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
ff = fuse_file_alloc(fm);
if (!ff)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
ff->fh = 0;
/* Default for no-open */
@@ -154,14 +142,14 @@ int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
struct fuse_open_out outarg;
int err;
- err = fuse_send_open(fm, nodeid, file, opcode, &outarg);
+ err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg);
if (!err) {
ff->fh = outarg.fh;
ff->open_flags = outarg.open_flags;
} else if (err != -ENOSYS) {
fuse_file_free(ff);
- return err;
+ return ERR_PTR(err);
} else {
if (isdir)
fc->no_opendir = 1;
@@ -174,9 +162,19 @@ int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
ff->open_flags &= ~FOPEN_DIRECT_IO;
ff->nodeid = nodeid;
- file->private_data = ff;
- return 0;
+ return ff;
+}
+
+int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
+ bool isdir)
+{
+ struct fuse_file *ff = fuse_file_open(fm, nodeid, file->f_flags, isdir);
+
+ if (!IS_ERR(ff))
+ file->private_data = ff;
+
+ return PTR_ERR_OR_ZERO(ff);
}
EXPORT_SYMBOL_GPL(fuse_do_open);
@@ -268,7 +266,7 @@ out:
}
static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
- int flags, int opcode)
+ unsigned int flags, int opcode)
{
struct fuse_conn *fc = ff->fm->fc;
struct fuse_release_args *ra = ff->release_args;
@@ -297,22 +295,21 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
ra->args.nocreds = true;
}
-void fuse_release_common(struct file *file, bool isdir)
+void fuse_file_release(struct inode *inode, struct fuse_file *ff,
+ unsigned int open_flags, fl_owner_t id, bool isdir)
{
- struct fuse_inode *fi = get_fuse_inode(file_inode(file));
- struct fuse_file *ff = file->private_data;
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_release_args *ra = ff->release_args;
int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
- fuse_prepare_release(fi, ff, file->f_flags, opcode);
+ fuse_prepare_release(fi, ff, open_flags, opcode);
if (ff->flock) {
ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
- ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc,
- (fl_owner_t) file);
+ ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id);
}
/* Hold inode until release is finished */
- ra->inode = igrab(file_inode(file));
+ ra->inode = igrab(inode);
/*
* Normally this will send the RELEASE request, however if
@@ -326,6 +323,12 @@ void fuse_release_common(struct file *file, bool isdir)
fuse_file_put(ff, ff->fm->fc->destroy, isdir);
}
+void fuse_release_common(struct file *file, bool isdir)
+{
+ fuse_file_release(file_inode(file), file->private_data, file->f_flags,
+ (fl_owner_t) file, isdir);
+}
+
static int fuse_open(struct inode *inode, struct file *file)
{
return fuse_open_common(inode, file, false);
@@ -345,7 +348,8 @@ static int fuse_release(struct inode *inode, struct file *file)
return 0;
}
-void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags)
+void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
+ unsigned int flags)
{
WARN_ON(refcount_read(&ff->count) > 1);
fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
@@ -798,21 +802,12 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
{
struct fuse_conn *fc = get_fuse_conn(inode);
- if (fc->writeback_cache) {
- /*
- * A hole in a file. Some data after the hole are in page cache,
- * but have not reached the client fs yet. So, the hole is not
- * present there.
- */
- int i;
- int start_idx = num_read >> PAGE_SHIFT;
- size_t off = num_read & (PAGE_SIZE - 1);
-
- for (i = start_idx; i < ap->num_pages; i++) {
- zero_user_segment(ap->pages[i], off, PAGE_SIZE);
- off = 0;
- }
- } else {
+ /*
+ * If writeback_cache is enabled, a short read means there's a hole in
+ * the file. Some data after the hole is in page cache, but has not
+ * reached the client fs yet. So the hole is not present there.
+ */
+ if (!fc->writeback_cache) {
loff_t pos = page_offset(ap->pages[0]) + num_read;
fuse_read_update_size(inode, pos, attr_ver);
}
@@ -1099,6 +1094,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
unsigned int offset, i;
+ bool short_write;
int err;
for (i = 0; i < ap->num_pages; i++)
@@ -1113,32 +1109,38 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
if (!err && ia->write.out.size > count)
err = -EIO;
+ short_write = ia->write.out.size < count;
offset = ap->descs[0].offset;
count = ia->write.out.size;
for (i = 0; i < ap->num_pages; i++) {
struct page *page = ap->pages[i];
- if (!err && !offset && count >= PAGE_SIZE)
- SetPageUptodate(page);
-
- if (count > PAGE_SIZE - offset)
- count -= PAGE_SIZE - offset;
- else
- count = 0;
- offset = 0;
-
- unlock_page(page);
+ if (err) {
+ ClearPageUptodate(page);
+ } else {
+ if (count >= PAGE_SIZE - offset)
+ count -= PAGE_SIZE - offset;
+ else {
+ if (short_write)
+ ClearPageUptodate(page);
+ count = 0;
+ }
+ offset = 0;
+ }
+ if (ia->write.page_locked && (i == ap->num_pages - 1))
+ unlock_page(page);
put_page(page);
}
return err;
}
-static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
+static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
unsigned int max_pages)
{
+ struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
unsigned offset = pos & (PAGE_SIZE - 1);
size_t count = 0;
@@ -1191,6 +1193,16 @@ static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap,
if (offset == PAGE_SIZE)
offset = 0;
+ /* If we copied full page, mark it uptodate */
+ if (tmp == PAGE_SIZE)
+ SetPageUptodate(page);
+
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ } else {
+ ia->write.page_locked = true;
+ break;
+ }
if (!fc->big_writes)
break;
} while (iov_iter_count(ii) && count < fc->max_write &&
@@ -1234,7 +1246,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb,
break;
}
- count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages);
+ count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages);
if (count <= 0) {
err = count;
} else {
@@ -1346,16 +1358,6 @@ out:
return written ? written : err;
}
-static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
- unsigned int index,
- unsigned int nr_pages)
-{
- int i;
-
- for (i = index; i < index + nr_pages; i++)
- descs[i].length = PAGE_SIZE - descs[i].offset;
-}
-
static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii)
{
return (unsigned long)ii->iov->iov_base + ii->iov_offset;
@@ -1759,8 +1761,17 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
container_of(args, typeof(*wpa), ia.ap.args);
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
mapping_set_error(inode->i_mapping, error);
+ /*
+ * A writeback finished and this might have updated mtime/ctime on
+ * server making local mtime/ctime stale. Hence invalidate attrs.
+ * Do this only if writeback_cache is not enabled. If writeback_cache
+ * is enabled, we trust local ctime/mtime.
+ */
+ if (!fc->writeback_cache)
+ fuse_invalidate_attr(inode);
spin_lock(&fi->lock);
rb_erase(&wpa->writepages_entry, &fi->writepages);
while (wpa->next) {
@@ -2637,363 +2648,6 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence)
}
/*
- * CUSE servers compiled on 32bit broke on 64bit kernels because the
- * ABI was defined to be 'struct iovec' which is different on 32bit
- * and 64bit. Fortunately we can determine which structure the server
- * used from the size of the reply.
- */
-static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
- size_t transferred, unsigned count,
- bool is_compat)
-{
-#ifdef CONFIG_COMPAT
- if (count * sizeof(struct compat_iovec) == transferred) {
- struct compat_iovec *ciov = src;
- unsigned i;
-
- /*
- * With this interface a 32bit server cannot support
- * non-compat (i.e. ones coming from 64bit apps) ioctl
- * requests
- */
- if (!is_compat)
- return -EINVAL;
-
- for (i = 0; i < count; i++) {
- dst[i].iov_base = compat_ptr(ciov[i].iov_base);
- dst[i].iov_len = ciov[i].iov_len;
- }
- return 0;
- }
-#endif
-
- if (count * sizeof(struct iovec) != transferred)
- return -EIO;
-
- memcpy(dst, src, transferred);
- return 0;
-}
-
-/* Make sure iov_length() won't overflow */
-static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
- size_t count)
-{
- size_t n;
- u32 max = fc->max_pages << PAGE_SHIFT;
-
- for (n = 0; n < count; n++, iov++) {
- if (iov->iov_len > (size_t) max)
- return -ENOMEM;
- max -= iov->iov_len;
- }
- return 0;
-}
-
-static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
- void *src, size_t transferred, unsigned count,
- bool is_compat)
-{
- unsigned i;
- struct fuse_ioctl_iovec *fiov = src;
-
- if (fc->minor < 16) {
- return fuse_copy_ioctl_iovec_old(dst, src, transferred,
- count, is_compat);
- }
-
- if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
- return -EIO;
-
- for (i = 0; i < count; i++) {
- /* Did the server supply an inappropriate value? */
- if (fiov[i].base != (unsigned long) fiov[i].base ||
- fiov[i].len != (unsigned long) fiov[i].len)
- return -EIO;
-
- dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
- dst[i].iov_len = (size_t) fiov[i].len;
-
-#ifdef CONFIG_COMPAT
- if (is_compat &&
- (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
- (compat_size_t) dst[i].iov_len != fiov[i].len))
- return -EIO;
-#endif
- }
-
- return 0;
-}
-
-
-/*
- * For ioctls, there is no generic way to determine how much memory
- * needs to be read and/or written. Furthermore, ioctls are allowed
- * to dereference the passed pointer, so the parameter requires deep
- * copying but FUSE has no idea whatsoever about what to copy in or
- * out.
- *
- * This is solved by allowing FUSE server to retry ioctl with
- * necessary in/out iovecs. Let's assume the ioctl implementation
- * needs to read in the following structure.
- *
- * struct a {
- * char *buf;
- * size_t buflen;
- * }
- *
- * On the first callout to FUSE server, inarg->in_size and
- * inarg->out_size will be NULL; then, the server completes the ioctl
- * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
- * the actual iov array to
- *
- * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } }
- *
- * which tells FUSE to copy in the requested area and retry the ioctl.
- * On the second round, the server has access to the structure and
- * from that it can tell what to look for next, so on the invocation,
- * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
- *
- * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) },
- * { .iov_base = a.buf, .iov_len = a.buflen } }
- *
- * FUSE will copy both struct a and the pointed buffer from the
- * process doing the ioctl and retry ioctl with both struct a and the
- * buffer.
- *
- * This time, FUSE server has everything it needs and completes ioctl
- * without FUSE_IOCTL_RETRY which finishes the ioctl call.
- *
- * Copying data out works the same way.
- *
- * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
- * automatically initializes in and out iovs by decoding @cmd with
- * _IOC_* macros and the server is not allowed to request RETRY. This
- * limits ioctl data transfers to well-formed ioctls and is the forced
- * behavior for all FUSE servers.
- */
-long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
- unsigned int flags)
-{
- struct fuse_file *ff = file->private_data;
- struct fuse_mount *fm = ff->fm;
- struct fuse_ioctl_in inarg = {
- .fh = ff->fh,
- .cmd = cmd,
- .arg = arg,
- .flags = flags
- };
- struct fuse_ioctl_out outarg;
- struct iovec *iov_page = NULL;
- struct iovec *in_iov = NULL, *out_iov = NULL;
- unsigned int in_iovs = 0, out_iovs = 0, max_pages;
- size_t in_size, out_size, c;
- ssize_t transferred;
- int err, i;
- struct iov_iter ii;
- struct fuse_args_pages ap = {};
-
-#if BITS_PER_LONG == 32
- inarg.flags |= FUSE_IOCTL_32BIT;
-#else
- if (flags & FUSE_IOCTL_COMPAT) {
- inarg.flags |= FUSE_IOCTL_32BIT;
-#ifdef CONFIG_X86_X32
- if (in_x32_syscall())
- inarg.flags |= FUSE_IOCTL_COMPAT_X32;
-#endif
- }
-#endif
-
- /* assume all the iovs returned by client always fits in a page */
- BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
-
- err = -ENOMEM;
- ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
- iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
- if (!ap.pages || !iov_page)
- goto out;
-
- fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
-
- /*
- * If restricted, initialize IO parameters as encoded in @cmd.
- * RETRY from server is not allowed.
- */
- if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
- struct iovec *iov = iov_page;
-
- iov->iov_base = (void __user *)arg;
-
- switch (cmd) {
- case FS_IOC_GETFLAGS:
- case FS_IOC_SETFLAGS:
- iov->iov_len = sizeof(int);
- break;
- default:
- iov->iov_len = _IOC_SIZE(cmd);
- break;
- }
-
- if (_IOC_DIR(cmd) & _IOC_WRITE) {
- in_iov = iov;
- in_iovs = 1;
- }
-
- if (_IOC_DIR(cmd) & _IOC_READ) {
- out_iov = iov;
- out_iovs = 1;
- }
- }
-
- retry:
- inarg.in_size = in_size = iov_length(in_iov, in_iovs);
- inarg.out_size = out_size = iov_length(out_iov, out_iovs);
-
- /*
- * Out data can be used either for actual out data or iovs,
- * make sure there always is at least one page.
- */
- out_size = max_t(size_t, out_size, PAGE_SIZE);
- max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
-
- /* make sure there are enough buffer pages and init request with them */
- err = -ENOMEM;
- if (max_pages > fm->fc->max_pages)
- goto out;
- while (ap.num_pages < max_pages) {
- ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
- if (!ap.pages[ap.num_pages])
- goto out;
- ap.num_pages++;
- }
-
-
- /* okay, let's send it to the client */
- ap.args.opcode = FUSE_IOCTL;
- ap.args.nodeid = ff->nodeid;
- ap.args.in_numargs = 1;
- ap.args.in_args[0].size = sizeof(inarg);
- ap.args.in_args[0].value = &inarg;
- if (in_size) {
- ap.args.in_numargs++;
- ap.args.in_args[1].size = in_size;
- ap.args.in_pages = true;
-
- err = -EFAULT;
- iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
- c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
- if (c != PAGE_SIZE && iov_iter_count(&ii))
- goto out;
- }
- }
-
- ap.args.out_numargs = 2;
- ap.args.out_args[0].size = sizeof(outarg);
- ap.args.out_args[0].value = &outarg;
- ap.args.out_args[1].size = out_size;
- ap.args.out_pages = true;
- ap.args.out_argvar = true;
-
- transferred = fuse_simple_request(fm, &ap.args);
- err = transferred;
- if (transferred < 0)
- goto out;
-
- /* did it ask for retry? */
- if (outarg.flags & FUSE_IOCTL_RETRY) {
- void *vaddr;
-
- /* no retry if in restricted mode */
- err = -EIO;
- if (!(flags & FUSE_IOCTL_UNRESTRICTED))
- goto out;
-
- in_iovs = outarg.in_iovs;
- out_iovs = outarg.out_iovs;
-
- /*
- * Make sure things are in boundary, separate checks
- * are to protect against overflow.
- */
- err = -ENOMEM;
- if (in_iovs > FUSE_IOCTL_MAX_IOV ||
- out_iovs > FUSE_IOCTL_MAX_IOV ||
- in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
- goto out;
-
- vaddr = kmap_atomic(ap.pages[0]);
- err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
- transferred, in_iovs + out_iovs,
- (flags & FUSE_IOCTL_COMPAT) != 0);
- kunmap_atomic(vaddr);
- if (err)
- goto out;
-
- in_iov = iov_page;
- out_iov = in_iov + in_iovs;
-
- err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs);
- if (err)
- goto out;
-
- err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs);
- if (err)
- goto out;
-
- goto retry;
- }
-
- err = -EIO;
- if (transferred > inarg.out_size)
- goto out;
-
- err = -EFAULT;
- iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
- for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
- c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
- if (c != PAGE_SIZE && iov_iter_count(&ii))
- goto out;
- }
- err = 0;
- out:
- free_page((unsigned long) iov_page);
- while (ap.num_pages)
- __free_page(ap.pages[--ap.num_pages]);
- kfree(ap.pages);
-
- return err ? err : outarg.result;
-}
-EXPORT_SYMBOL_GPL(fuse_do_ioctl);
-
-long fuse_ioctl_common(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int flags)
-{
- struct inode *inode = file_inode(file);
- struct fuse_conn *fc = get_fuse_conn(inode);
-
- if (!fuse_allow_current_process(fc))
- return -EACCES;
-
- if (fuse_is_bad(inode))
- return -EIO;
-
- return fuse_do_ioctl(file, cmd, arg, flags);
-}
-
-static long fuse_file_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- return fuse_ioctl_common(file, cmd, arg, 0);
-}
-
-static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
-{
- return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
-}
-
-/*
* All files which have been polled are linked to RB tree
* fuse_conn->polled_files which is indexed by kh. Walk the tree and
* find the matching one.
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 68cca8d4db6e..7e463e220053 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -552,9 +552,12 @@ struct fuse_conn {
/** Maximum write size */
unsigned max_write;
- /** Maxmum number of pages that can be used in a single request */
+ /** Maximum number of pages that can be used in a single request */
unsigned int max_pages;
+ /** Constrain ->max_pages to this value during feature negotiation */
+ unsigned int max_pages_limit;
+
/** Input queue */
struct fuse_iqueue iq;
@@ -668,6 +671,9 @@ struct fuse_conn {
/** Is setxattr not implemented by fs? */
unsigned no_setxattr:1;
+ /** Does file server support extended setxattr */
+ unsigned setxattr_ext:1;
+
/** Is getxattr not implemented by fs? */
unsigned no_getxattr:1;
@@ -713,7 +719,7 @@ struct fuse_conn {
/** Use enhanced/automatic page cache invalidation. */
unsigned auto_inval_data:1;
- /** Filesystem is fully reponsible for page cache invalidation. */
+ /** Filesystem is fully responsible for page cache invalidation. */
unsigned explicit_inval_data:1;
/** Does the filesystem support readdirplus? */
@@ -863,6 +869,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
static inline void fuse_make_bad(struct inode *inode)
{
+ remove_inode_hash(inode);
set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
}
@@ -871,6 +878,28 @@ static inline bool fuse_is_bad(struct inode *inode)
return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
}
+static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
+ struct fuse_page_desc **desc)
+{
+ struct page **pages;
+
+ pages = kzalloc(npages * (sizeof(struct page *) +
+ sizeof(struct fuse_page_desc)), flags);
+ *desc = (void *) (pages + npages);
+
+ return pages;
+}
+
+static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
+ unsigned int index,
+ unsigned int nr_pages)
+{
+ int i;
+
+ for (i = index; i < index + nr_pages; i++)
+ descs[i].length = PAGE_SIZE - descs[i].offset;
+}
+
/** Device operations */
extern const struct file_operations fuse_dev_operations;
@@ -911,6 +940,7 @@ struct fuse_io_args {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
+ bool page_locked;
} write;
};
struct fuse_args_pages ap;
@@ -931,7 +961,8 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm);
void fuse_file_free(struct fuse_file *ff);
void fuse_finish_open(struct inode *inode, struct file *file);
-void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags);
+void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
+ unsigned int flags);
/**
* Send RELEASE or RELEASEDIR request
@@ -1169,7 +1200,7 @@ void fuse_unlock_inode(struct inode *inode, bool locked);
bool fuse_lock_inode(struct inode *inode);
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
- size_t size, int flags);
+ size_t size, int flags, unsigned int extra_flags);
ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value,
size_t size);
ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size);
@@ -1213,4 +1244,19 @@ void fuse_dax_inode_cleanup(struct inode *inode);
bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment);
void fuse_dax_cancel_work(struct fuse_conn *fc);
+/* ioctl.c */
+long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg);
+int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int fuse_fileattr_set(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct fileattr *fa);
+
+/* file.c */
+
+struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
+ unsigned int open_flags, bool isdir);
+void fuse_file_release(struct inode *inode, struct fuse_file *ff,
+ unsigned int open_flags, fl_owner_t id, bool isdir);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b0e18b470e91..393e36b74dc4 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -350,7 +350,7 @@ retry:
inode->i_generation = generation;
fuse_init_inode(inode, attr);
unlock_new_inode(inode);
- } else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
+ } else if (inode_wrong_type(inode, attr->mode)) {
/* Inode has changed type, any I/O on the old should fail */
fuse_make_bad(inode);
iput(inode);
@@ -712,6 +712,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
+ fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
INIT_LIST_HEAD(&fc->mounts);
list_add(&fm->fc_entry, &fc->mounts);
@@ -872,14 +873,13 @@ static struct dentry *fuse_get_parent(struct dentry *child)
struct inode *inode;
struct dentry *parent;
struct fuse_entry_out outarg;
- const struct qstr name = QSTR_INIT("..", 2);
int err;
if (!fc->export_support)
return ERR_PTR(-ESTALE);
err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
- &name, &outarg, &inode);
+ &dotdot_name, &outarg, &inode);
if (err) {
if (err == -ENOENT)
return ERR_PTR(-ESTALE);
@@ -1040,7 +1040,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
fc->abort_err = 1;
if (arg->flags & FUSE_MAX_PAGES) {
fc->max_pages =
- min_t(unsigned int, FUSE_MAX_MAX_PAGES,
+ min_t(unsigned int, fc->max_pages_limit,
max_t(unsigned int, arg->max_pages, 1));
}
if (IS_ENABLED(CONFIG_FUSE_DAX) &&
@@ -1052,6 +1052,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
fc->handle_killpriv_v2 = 1;
fm->sb->s_flags |= SB_NOSEC;
}
+ if (arg->flags & FUSE_SETXATTR_EXT)
+ fc->setxattr_ext = 1;
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
@@ -1095,7 +1097,7 @@ void fuse_send_init(struct fuse_mount *fm)
FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
- FUSE_HANDLE_KILLPRIV_V2;
+ FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
ia->in.flags |= FUSE_MAP_ALIGNMENT;
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
new file mode 100644
index 000000000000..546ea3d58fb4
--- /dev/null
+++ b/fs/fuse/ioctl.c
@@ -0,0 +1,490 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ */
+
+#include "fuse_i.h"
+
+#include <linux/uio.h>
+#include <linux/compat.h>
+#include <linux/fileattr.h>
+
+/*
+ * CUSE servers compiled on 32bit broke on 64bit kernels because the
+ * ABI was defined to be 'struct iovec' which is different on 32bit
+ * and 64bit. Fortunately we can determine which structure the server
+ * used from the size of the reply.
+ */
+static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src,
+ size_t transferred, unsigned count,
+ bool is_compat)
+{
+#ifdef CONFIG_COMPAT
+ if (count * sizeof(struct compat_iovec) == transferred) {
+ struct compat_iovec *ciov = src;
+ unsigned i;
+
+ /*
+ * With this interface a 32bit server cannot support
+ * non-compat (i.e. ones coming from 64bit apps) ioctl
+ * requests
+ */
+ if (!is_compat)
+ return -EINVAL;
+
+ for (i = 0; i < count; i++) {
+ dst[i].iov_base = compat_ptr(ciov[i].iov_base);
+ dst[i].iov_len = ciov[i].iov_len;
+ }
+ return 0;
+ }
+#endif
+
+ if (count * sizeof(struct iovec) != transferred)
+ return -EIO;
+
+ memcpy(dst, src, transferred);
+ return 0;
+}
+
+/* Make sure iov_length() won't overflow */
+static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov,
+ size_t count)
+{
+ size_t n;
+ u32 max = fc->max_pages << PAGE_SHIFT;
+
+ for (n = 0; n < count; n++, iov++) {
+ if (iov->iov_len > (size_t) max)
+ return -ENOMEM;
+ max -= iov->iov_len;
+ }
+ return 0;
+}
+
+static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst,
+ void *src, size_t transferred, unsigned count,
+ bool is_compat)
+{
+ unsigned i;
+ struct fuse_ioctl_iovec *fiov = src;
+
+ if (fc->minor < 16) {
+ return fuse_copy_ioctl_iovec_old(dst, src, transferred,
+ count, is_compat);
+ }
+
+ if (count * sizeof(struct fuse_ioctl_iovec) != transferred)
+ return -EIO;
+
+ for (i = 0; i < count; i++) {
+ /* Did the server supply an inappropriate value? */
+ if (fiov[i].base != (unsigned long) fiov[i].base ||
+ fiov[i].len != (unsigned long) fiov[i].len)
+ return -EIO;
+
+ dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base;
+ dst[i].iov_len = (size_t) fiov[i].len;
+
+#ifdef CONFIG_COMPAT
+ if (is_compat &&
+ (ptr_to_compat(dst[i].iov_base) != fiov[i].base ||
+ (compat_size_t) dst[i].iov_len != fiov[i].len))
+ return -EIO;
+#endif
+ }
+
+ return 0;
+}
+
+
+/*
+ * For ioctls, there is no generic way to determine how much memory
+ * needs to be read and/or written. Furthermore, ioctls are allowed
+ * to dereference the passed pointer, so the parameter requires deep
+ * copying but FUSE has no idea whatsoever about what to copy in or
+ * out.
+ *
+ * This is solved by allowing FUSE server to retry ioctl with
+ * necessary in/out iovecs. Let's assume the ioctl implementation
+ * needs to read in the following structure.
+ *
+ * struct a {
+ * char *buf;
+ * size_t buflen;
+ * }
+ *
+ * On the first callout to FUSE server, inarg->in_size and
+ * inarg->out_size will be NULL; then, the server completes the ioctl
+ * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and
+ * the actual iov array to
+ *
+ * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } }
+ *
+ * which tells FUSE to copy in the requested area and retry the ioctl.
+ * On the second round, the server has access to the structure and
+ * from that it can tell what to look for next, so on the invocation,
+ * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to
+ *
+ * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) },
+ * { .iov_base = a.buf, .iov_len = a.buflen } }
+ *
+ * FUSE will copy both struct a and the pointed buffer from the
+ * process doing the ioctl and retry ioctl with both struct a and the
+ * buffer.
+ *
+ * This time, FUSE server has everything it needs and completes ioctl
+ * without FUSE_IOCTL_RETRY which finishes the ioctl call.
+ *
+ * Copying data out works the same way.
+ *
+ * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel
+ * automatically initializes in and out iovs by decoding @cmd with
+ * _IOC_* macros and the server is not allowed to request RETRY. This
+ * limits ioctl data transfers to well-formed ioctls and is the forced
+ * behavior for all FUSE servers.
+ */
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ unsigned int flags)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_mount *fm = ff->fm;
+ struct fuse_ioctl_in inarg = {
+ .fh = ff->fh,
+ .cmd = cmd,
+ .arg = arg,
+ .flags = flags
+ };
+ struct fuse_ioctl_out outarg;
+ struct iovec *iov_page = NULL;
+ struct iovec *in_iov = NULL, *out_iov = NULL;
+ unsigned int in_iovs = 0, out_iovs = 0, max_pages;
+ size_t in_size, out_size, c;
+ ssize_t transferred;
+ int err, i;
+ struct iov_iter ii;
+ struct fuse_args_pages ap = {};
+
+#if BITS_PER_LONG == 32
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#else
+ if (flags & FUSE_IOCTL_COMPAT) {
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#ifdef CONFIG_X86_X32
+ if (in_x32_syscall())
+ inarg.flags |= FUSE_IOCTL_COMPAT_X32;
+#endif
+ }
+#endif
+
+ /* assume all the iovs returned by client always fits in a page */
+ BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
+
+ err = -ENOMEM;
+ ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
+ iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
+ if (!ap.pages || !iov_page)
+ goto out;
+
+ fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
+
+ /*
+ * If restricted, initialize IO parameters as encoded in @cmd.
+ * RETRY from server is not allowed.
+ */
+ if (!(flags & FUSE_IOCTL_UNRESTRICTED)) {
+ struct iovec *iov = iov_page;
+
+ iov->iov_base = (void __user *)arg;
+ iov->iov_len = _IOC_SIZE(cmd);
+
+ if (_IOC_DIR(cmd) & _IOC_WRITE) {
+ in_iov = iov;
+ in_iovs = 1;
+ }
+
+ if (_IOC_DIR(cmd) & _IOC_READ) {
+ out_iov = iov;
+ out_iovs = 1;
+ }
+ }
+
+ retry:
+ inarg.in_size = in_size = iov_length(in_iov, in_iovs);
+ inarg.out_size = out_size = iov_length(out_iov, out_iovs);
+
+ /*
+ * Out data can be used either for actual out data or iovs,
+ * make sure there always is at least one page.
+ */
+ out_size = max_t(size_t, out_size, PAGE_SIZE);
+ max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE);
+
+ /* make sure there are enough buffer pages and init request with them */
+ err = -ENOMEM;
+ if (max_pages > fm->fc->max_pages)
+ goto out;
+ while (ap.num_pages < max_pages) {
+ ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+ if (!ap.pages[ap.num_pages])
+ goto out;
+ ap.num_pages++;
+ }
+
+
+ /* okay, let's send it to the client */
+ ap.args.opcode = FUSE_IOCTL;
+ ap.args.nodeid = ff->nodeid;
+ ap.args.in_numargs = 1;
+ ap.args.in_args[0].size = sizeof(inarg);
+ ap.args.in_args[0].value = &inarg;
+ if (in_size) {
+ ap.args.in_numargs++;
+ ap.args.in_args[1].size = in_size;
+ ap.args.in_pages = true;
+
+ err = -EFAULT;
+ iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
+ c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
+ if (c != PAGE_SIZE && iov_iter_count(&ii))
+ goto out;
+ }
+ }
+
+ ap.args.out_numargs = 2;
+ ap.args.out_args[0].size = sizeof(outarg);
+ ap.args.out_args[0].value = &outarg;
+ ap.args.out_args[1].size = out_size;
+ ap.args.out_pages = true;
+ ap.args.out_argvar = true;
+
+ transferred = fuse_simple_request(fm, &ap.args);
+ err = transferred;
+ if (transferred < 0)
+ goto out;
+
+ /* did it ask for retry? */
+ if (outarg.flags & FUSE_IOCTL_RETRY) {
+ void *vaddr;
+
+ /* no retry if in restricted mode */
+ err = -EIO;
+ if (!(flags & FUSE_IOCTL_UNRESTRICTED))
+ goto out;
+
+ in_iovs = outarg.in_iovs;
+ out_iovs = outarg.out_iovs;
+
+ /*
+ * Make sure things are in boundary, separate checks
+ * are to protect against overflow.
+ */
+ err = -ENOMEM;
+ if (in_iovs > FUSE_IOCTL_MAX_IOV ||
+ out_iovs > FUSE_IOCTL_MAX_IOV ||
+ in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
+ goto out;
+
+ vaddr = kmap_atomic(ap.pages[0]);
+ err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
+ transferred, in_iovs + out_iovs,
+ (flags & FUSE_IOCTL_COMPAT) != 0);
+ kunmap_atomic(vaddr);
+ if (err)
+ goto out;
+
+ in_iov = iov_page;
+ out_iov = in_iov + in_iovs;
+
+ err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs);
+ if (err)
+ goto out;
+
+ err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs);
+ if (err)
+ goto out;
+
+ goto retry;
+ }
+
+ err = -EIO;
+ if (transferred > inarg.out_size)
+ goto out;
+
+ err = -EFAULT;
+ iov_iter_init(&ii, READ, out_iov, out_iovs, transferred);
+ for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
+ c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
+ if (c != PAGE_SIZE && iov_iter_count(&ii))
+ goto out;
+ }
+ err = 0;
+ out:
+ free_page((unsigned long) iov_page);
+ while (ap.num_pages)
+ __free_page(ap.pages[--ap.num_pages]);
+ kfree(ap.pages);
+
+ return err ? err : outarg.result;
+}
+EXPORT_SYMBOL_GPL(fuse_do_ioctl);
+
+long fuse_ioctl_common(struct file *file, unsigned int cmd,
+ unsigned long arg, unsigned int flags)
+{
+ struct inode *inode = file_inode(file);
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_current_process(fc))
+ return -EACCES;
+
+ if (fuse_is_bad(inode))
+ return -EIO;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return fuse_ioctl_common(file, cmd, arg, 0);
+}
+
+long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
+}
+
+static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
+ unsigned int cmd, void *ptr, size_t size)
+{
+ struct fuse_mount *fm = ff->fm;
+ struct fuse_ioctl_in inarg;
+ struct fuse_ioctl_out outarg;
+ FUSE_ARGS(args);
+ int err;
+
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.cmd = cmd;
+
+#if BITS_PER_LONG == 32
+ inarg.flags |= FUSE_IOCTL_32BIT;
+#endif
+ if (S_ISDIR(inode->i_mode))
+ inarg.flags |= FUSE_IOCTL_DIR;
+
+ if (_IOC_DIR(cmd) & _IOC_READ)
+ inarg.out_size = size;
+ if (_IOC_DIR(cmd) & _IOC_WRITE)
+ inarg.in_size = size;
+
+ args.opcode = FUSE_IOCTL;
+ args.nodeid = ff->nodeid;
+ args.in_numargs = 2;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.in_args[1].size = inarg.in_size;
+ args.in_args[1].value = ptr;
+ args.out_numargs = 2;
+ args.out_args[0].size = sizeof(outarg);
+ args.out_args[0].value = &outarg;
+ args.out_args[1].size = inarg.out_size;
+ args.out_args[1].value = ptr;
+
+ err = fuse_simple_request(fm, &args);
+ if (!err && outarg.flags & FUSE_IOCTL_RETRY)
+ err = -EIO;
+
+ return err;
+}
+
+static struct fuse_file *fuse_priv_ioctl_prepare(struct inode *inode)
+{
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ bool isdir = S_ISDIR(inode->i_mode);
+
+ if (!S_ISREG(inode->i_mode) && !isdir)
+ return ERR_PTR(-ENOTTY);
+
+ return fuse_file_open(fm, get_node_id(inode), O_RDONLY, isdir);
+}
+
+static void fuse_priv_ioctl_cleanup(struct inode *inode, struct fuse_file *ff)
+{
+ fuse_file_release(inode, ff, O_RDONLY, NULL, S_ISDIR(inode->i_mode));
+}
+
+int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct fuse_file *ff;
+ unsigned int flags;
+ struct fsxattr xfa;
+ int err;
+
+ ff = fuse_priv_ioctl_prepare(inode);
+ if (IS_ERR(ff))
+ return PTR_ERR(ff);
+
+ if (fa->flags_valid) {
+ err = fuse_priv_ioctl(inode, ff, FS_IOC_GETFLAGS,
+ &flags, sizeof(flags));
+ if (err)
+ goto cleanup;
+
+ fileattr_fill_flags(fa, flags);
+ } else {
+ err = fuse_priv_ioctl(inode, ff, FS_IOC_FSGETXATTR,
+ &xfa, sizeof(xfa));
+ if (err)
+ goto cleanup;
+
+ fileattr_fill_xflags(fa, xfa.fsx_xflags);
+ fa->fsx_extsize = xfa.fsx_extsize;
+ fa->fsx_nextents = xfa.fsx_nextents;
+ fa->fsx_projid = xfa.fsx_projid;
+ fa->fsx_cowextsize = xfa.fsx_cowextsize;
+ }
+cleanup:
+ fuse_priv_ioctl_cleanup(inode, ff);
+
+ return err;
+}
+
+int fuse_fileattr_set(struct user_namespace *mnt_userns,
+ struct dentry *dentry, struct fileattr *fa)
+{
+ struct inode *inode = d_inode(dentry);
+ struct fuse_file *ff;
+ unsigned int flags = fa->flags;
+ struct fsxattr xfa;
+ int err;
+
+ ff = fuse_priv_ioctl_prepare(inode);
+ if (IS_ERR(ff))
+ return PTR_ERR(ff);
+
+ if (fa->flags_valid) {
+ err = fuse_priv_ioctl(inode, ff, FS_IOC_SETFLAGS,
+ &flags, sizeof(flags));
+ if (err)
+ goto cleanup;
+ } else {
+ memset(&xfa, 0, sizeof(xfa));
+ xfa.fsx_xflags = fa->fsx_xflags;
+ xfa.fsx_extsize = fa->fsx_extsize;
+ xfa.fsx_nextents = fa->fsx_nextents;
+ xfa.fsx_projid = fa->fsx_projid;
+ xfa.fsx_cowextsize = fa->fsx_cowextsize;
+
+ err = fuse_priv_ioctl(inode, ff, FS_IOC_FSSETXATTR,
+ &xfa, sizeof(xfa));
+ }
+
+cleanup:
+ fuse_priv_ioctl_cleanup(inode, ff);
+
+ return err;
+}
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 3441ffa740f3..277f7041d55a 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -202,7 +202,7 @@ retry:
inode = d_inode(dentry);
if (!inode ||
get_node_id(inode) != o->nodeid ||
- ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+ inode_wrong_type(inode, o->attr.mode)) {
d_invalidate(dentry);
dput(dentry);
goto retry;
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 8868ac31a3c0..bcb8a02e2d8b 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -18,6 +18,12 @@
#include <linux/uio.h>
#include "fuse_i.h"
+/* Used to help calculate the FUSE connection's max_pages limit for a request's
+ * size. Parts of the struct fuse_req are sliced into scattergather lists in
+ * addition to the pages used, so this can help account for that overhead.
+ */
+#define FUSE_HEADER_OVERHEAD 4
+
/* List of virtio-fs device instances and a lock for the list. Also provides
* mutual exclusion in device removal and mounting path
*/
@@ -127,11 +133,6 @@ static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq)
return &fs->vqs[vq->index];
}
-static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq)
-{
- return &vq_to_fsvq(vq)->fud->pq;
-}
-
/* Should be called with fsvq->lock held. */
static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq)
{
@@ -896,6 +897,7 @@ static int virtio_fs_probe(struct virtio_device *vdev)
out_vqs:
vdev->config->reset(vdev);
virtio_fs_cleanup_vqs(vdev, fs);
+ kfree(fs->vqs);
out:
vdev->priv = NULL;
@@ -1324,8 +1326,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
/* virtiofs allocates and installs its own fuse devices */
ctx->fudptr = NULL;
- if (ctx->dax)
+ if (ctx->dax) {
+ if (!fs->dax_dev) {
+ err = -EINVAL;
+ pr_err("virtio-fs: dax can't be enabled as filesystem"
+ " device does not support it.\n");
+ goto err_free_fuse_devs;
+ }
ctx->dax_dev = fs->dax_dev;
+ }
err = fuse_fill_super_common(sb, ctx);
if (err < 0)
goto err_free_fuse_devs;
@@ -1406,9 +1415,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
{
struct virtio_fs *fs;
struct super_block *sb;
- struct fuse_conn *fc;
+ struct fuse_conn *fc = NULL;
struct fuse_mount *fm;
- int err;
+ unsigned int virtqueue_size;
+ int err = -EIO;
/* This gets a reference on virtio_fs object. This ptr gets installed
* in fc->iq->priv. Once fuse_conn is going away, it calls ->put()
@@ -1420,6 +1430,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
return -EINVAL;
}
+ virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq);
+ if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD))
+ goto out_err;
+
err = -ENOMEM;
fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
if (!fc)
@@ -1429,12 +1443,15 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
if (!fm)
goto out_err;
- fuse_conn_init(fc, fm, get_user_ns(current_user_ns()),
- &virtio_fs_fiq_ops, fs);
+ fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs);
fc->release = fuse_free_conn;
fc->delete_stale = true;
fc->auto_submounts = true;
+ /* Tell FUSE to split requests that exceed the virtqueue's size */
+ fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,
+ virtqueue_size - FUSE_HEADER_OVERHEAD);
+
fsc->s_fs_info = fm;
sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
if (fsc->s_fs_info) {
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 1a7d7ace54e1..61dfaf7b7d20 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -12,7 +12,7 @@
#include <linux/posix_acl_xattr.h>
int fuse_setxattr(struct inode *inode, const char *name, const void *value,
- size_t size, int flags)
+ size_t size, int flags, unsigned int extra_flags)
{
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
@@ -25,10 +25,13 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value,
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
inarg.flags = flags;
+ inarg.setxattr_flags = extra_flags;
+
args.opcode = FUSE_SETXATTR;
args.nodeid = get_node_id(inode);
args.in_numargs = 3;
- args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].size = fm->fc->setxattr_ext ?
+ sizeof(inarg) : FUSE_COMPAT_SETXATTR_IN_SIZE;
args.in_args[0].value = &inarg;
args.in_args[1].size = strlen(name) + 1;
args.in_args[1].value = name;
@@ -199,7 +202,7 @@ static int fuse_xattr_set(const struct xattr_handler *handler,
if (!value)
return fuse_removexattr(inode, name);
- return fuse_setxattr(inode, name, value, size, flags);
+ return fuse_setxattr(inode, name, value, size, flags, 0);
}
static bool no_xattr_list(struct dentry *dentry)