diff options
Diffstat (limited to 'fs/fuse')
| -rw-r--r-- | fs/fuse/Makefile | 2 | ||||
| -rw-r--r-- | fs/fuse/acl.c | 7 | ||||
| -rw-r--r-- | fs/fuse/cuse.c | 12 | ||||
| -rw-r--r-- | fs/fuse/dev.c | 27 | ||||
| -rw-r--r-- | fs/fuse/dir.c | 12 | ||||
| -rw-r--r-- | fs/fuse/file.c | 506 | ||||
| -rw-r--r-- | fs/fuse/fuse_i.h | 54 | ||||
| -rw-r--r-- | fs/fuse/inode.c | 12 | ||||
| -rw-r--r-- | fs/fuse/ioctl.c | 490 | ||||
| -rw-r--r-- | fs/fuse/readdir.c | 2 | ||||
| -rw-r--r-- | fs/fuse/virtio_fs.c | 37 | ||||
| -rw-r--r-- | fs/fuse/xattr.c | 9 |
12 files changed, 696 insertions, 474 deletions
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 8c7021fb2cd4..0c48b35c058d 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o -fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o +fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o fuse-$(CONFIG_FUSE_DAX) += dax.o virtiofs-y := virtio_fs.o diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index e9c0f916349d..52b165319be1 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -71,6 +71,7 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode, return -EINVAL; if (acl) { + unsigned int extra_flags = 0; /* * Fuse userspace is responsible for updating access * permissions in the inode, if needed. fuse_setxattr @@ -94,7 +95,11 @@ int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode, return ret; } - ret = fuse_setxattr(inode, name, value, size, 0); + if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) && + !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) + extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; + + ret = fuse_setxattr(inode, name, value, size, 0, extra_flags); kfree(value); } else { ret = fuse_removexattr(inode, name); diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 45082269e698..c7d882a9fe33 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -511,20 +511,18 @@ static int cuse_channel_open(struct inode *inode, struct file *file) fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL); + cc->fc.release = cuse_fc_release; fud = fuse_dev_alloc_install(&cc->fc); - if (!fud) { - kfree(cc); + fuse_conn_put(&cc->fc); + if (!fud) return -ENOMEM; - } INIT_LIST_HEAD(&cc->list); - cc->fc.release = cuse_fc_release; cc->fc.initialized = 1; rc = cuse_send_init(cc); if (rc) { fuse_dev_free(fud); - fuse_conn_put(&cc->fc); return rc; } file->private_data = fud; @@ -561,8 +559,6 @@ static int cuse_channel_release(struct inode *inode, struct file *file) unregister_chrdev_region(cc->cdev->dev, 1); cdev_del(cc->cdev); } - /* Base reference is now owned by "fud" */ - fuse_conn_put(&cc->fc); rc = fuse_dev_release(inode, file); /* puts the base reference */ @@ -627,6 +623,8 @@ static int __init cuse_init(void) cuse_channel_fops.owner = THIS_MODULE; cuse_channel_fops.open = cuse_channel_open; cuse_channel_fops.release = cuse_channel_release; + /* CUSE is not prepared for FUSE_DEV_IOC_CLONE */ + cuse_channel_fops.unlocked_ioctl = NULL; cuse_class = class_create(THIS_MODULE, "cuse"); if (IS_ERR(cuse_class)) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c6636b4c4ccf..a5ceccc5ef00 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -2229,19 +2229,18 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new) static long fuse_dev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int err = -ENOTTY; - - if (cmd == FUSE_DEV_IOC_CLONE) { - int oldfd; - - err = -EFAULT; - if (!get_user(oldfd, (__u32 __user *) arg)) { + int res; + int oldfd; + struct fuse_dev *fud = NULL; + + switch (cmd) { + case FUSE_DEV_IOC_CLONE: + res = -EFAULT; + if (!get_user(oldfd, (__u32 __user *)arg)) { struct file *old = fget(oldfd); - err = -EINVAL; + res = -EINVAL; if (old) { - struct fuse_dev *fud = NULL; - /* * Check against file->f_op because CUSE * uses the same ioctl handler. @@ -2252,14 +2251,18 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd, if (fud) { mutex_lock(&fuse_mutex); - err = fuse_device_clone(fud->fc, file); + res = fuse_device_clone(fud->fc, file); mutex_unlock(&fuse_mutex); } fput(old); } } + break; + default: + res = -ENOTTY; + break; } - return err; + return res; } const struct file_operations fuse_dev_operations = { diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 06a18700a845..1b6c001a7dd1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -252,7 +252,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) if (ret == -ENOMEM) goto out; if (ret || fuse_invalid_attr(&outarg.attr) || - (outarg.attr.mode ^ inode->i_mode) & S_IFMT) + inode_wrong_type(inode, outarg.attr.mode)) goto invalid; forget_all_cached_acls(inode); @@ -508,7 +508,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * 'mknod' + 'open' requests. */ static int fuse_create_open(struct inode *dir, struct dentry *entry, - struct file *file, unsigned flags, + struct file *file, unsigned int flags, umode_t mode) { int err; @@ -1054,7 +1054,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, err = fuse_simple_request(fm, &args); if (!err) { if (fuse_invalid_attr(&outarg.attr) || - (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + inode_wrong_type(inode, outarg.attr.mode)) { fuse_make_bad(inode); err = -EIO; } else { @@ -1703,7 +1703,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, } if (fuse_invalid_attr(&outarg.attr) || - (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + inode_wrong_type(inode, outarg.attr.mode)) { fuse_make_bad(inode); err = -EIO; goto error; @@ -1866,6 +1866,8 @@ static const struct inode_operations fuse_dir_inode_operations = { .listxattr = fuse_listxattr, .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, + .fileattr_get = fuse_fileattr_get, + .fileattr_set = fuse_fileattr_set, }; static const struct file_operations fuse_dir_operations = { @@ -1886,6 +1888,8 @@ static const struct inode_operations fuse_common_inode_operations = { .listxattr = fuse_listxattr, .get_acl = fuse_get_acl, .set_acl = fuse_set_acl, + .fileattr_get = fuse_fileattr_get, + .fileattr_set = fuse_fileattr_set, }; static const struct inode_operations fuse_symlink_inode_operations = { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 8cccecb55fb8..09ef2a4d25ed 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -14,32 +14,20 @@ #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/module.h> -#include <linux/compat.h> #include <linux/swap.h> #include <linux/falloc.h> #include <linux/uio.h> #include <linux/fs.h> -static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, - struct fuse_page_desc **desc) -{ - struct page **pages; - - pages = kzalloc(npages * (sizeof(struct page *) + - sizeof(struct fuse_page_desc)), flags); - *desc = (void *) (pages + npages); - - return pages; -} - -static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file, - int opcode, struct fuse_open_out *outargp) +static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, + unsigned int open_flags, int opcode, + struct fuse_open_out *outargp) { struct fuse_open_in inarg; FUSE_ARGS(args); memset(&inarg, 0, sizeof(inarg)); - inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); + inarg.flags = open_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); if (!fm->fc->atomic_o_trunc) inarg.flags &= ~O_TRUNC; @@ -136,8 +124,8 @@ static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) } } -int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, - bool isdir) +struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, + unsigned int open_flags, bool isdir) { struct fuse_conn *fc = fm->fc; struct fuse_file *ff; @@ -145,7 +133,7 @@ int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, ff = fuse_file_alloc(fm); if (!ff) - return -ENOMEM; + return ERR_PTR(-ENOMEM); ff->fh = 0; /* Default for no-open */ @@ -154,14 +142,14 @@ int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, struct fuse_open_out outarg; int err; - err = fuse_send_open(fm, nodeid, file, opcode, &outarg); + err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg); if (!err) { ff->fh = outarg.fh; ff->open_flags = outarg.open_flags; } else if (err != -ENOSYS) { fuse_file_free(ff); - return err; + return ERR_PTR(err); } else { if (isdir) fc->no_opendir = 1; @@ -174,9 +162,19 @@ int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, ff->open_flags &= ~FOPEN_DIRECT_IO; ff->nodeid = nodeid; - file->private_data = ff; - return 0; + return ff; +} + +int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, + bool isdir) +{ + struct fuse_file *ff = fuse_file_open(fm, nodeid, file->f_flags, isdir); + + if (!IS_ERR(ff)) + file->private_data = ff; + + return PTR_ERR_OR_ZERO(ff); } EXPORT_SYMBOL_GPL(fuse_do_open); @@ -268,7 +266,7 @@ out: } static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, - int flags, int opcode) + unsigned int flags, int opcode) { struct fuse_conn *fc = ff->fm->fc; struct fuse_release_args *ra = ff->release_args; @@ -297,22 +295,21 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, ra->args.nocreds = true; } -void fuse_release_common(struct file *file, bool isdir) +void fuse_file_release(struct inode *inode, struct fuse_file *ff, + unsigned int open_flags, fl_owner_t id, bool isdir) { - struct fuse_inode *fi = get_fuse_inode(file_inode(file)); - struct fuse_file *ff = file->private_data; + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_release_args *ra = ff->release_args; int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; - fuse_prepare_release(fi, ff, file->f_flags, opcode); + fuse_prepare_release(fi, ff, open_flags, opcode); if (ff->flock) { ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; - ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, - (fl_owner_t) file); + ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id); } /* Hold inode until release is finished */ - ra->inode = igrab(file_inode(file)); + ra->inode = igrab(inode); /* * Normally this will send the RELEASE request, however if @@ -326,6 +323,12 @@ void fuse_release_common(struct file *file, bool isdir) fuse_file_put(ff, ff->fm->fc->destroy, isdir); } +void fuse_release_common(struct file *file, bool isdir) +{ + fuse_file_release(file_inode(file), file->private_data, file->f_flags, + (fl_owner_t) file, isdir); +} + static int fuse_open(struct inode *inode, struct file *file) { return fuse_open_common(inode, file, false); @@ -345,7 +348,8 @@ static int fuse_release(struct inode *inode, struct file *file) return 0; } -void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags) +void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, + unsigned int flags) { WARN_ON(refcount_read(&ff->count) > 1); fuse_prepare_release(fi, ff, flags, FUSE_RELEASE); @@ -798,21 +802,12 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read, { struct fuse_conn *fc = get_fuse_conn(inode); - if (fc->writeback_cache) { - /* - * A hole in a file. Some data after the hole are in page cache, - * but have not reached the client fs yet. So, the hole is not - * present there. - */ - int i; - int start_idx = num_read >> PAGE_SHIFT; - size_t off = num_read & (PAGE_SIZE - 1); - - for (i = start_idx; i < ap->num_pages; i++) { - zero_user_segment(ap->pages[i], off, PAGE_SIZE); - off = 0; - } - } else { + /* + * If writeback_cache is enabled, a short read means there's a hole in + * the file. Some data after the hole is in page cache, but has not + * reached the client fs yet. So the hole is not present there. + */ + if (!fc->writeback_cache) { loff_t pos = page_offset(ap->pages[0]) + num_read; fuse_read_update_size(inode, pos, attr_ver); } @@ -1099,6 +1094,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; unsigned int offset, i; + bool short_write; int err; for (i = 0; i < ap->num_pages; i++) @@ -1113,32 +1109,38 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, if (!err && ia->write.out.size > count) err = -EIO; + short_write = ia->write.out.size < count; offset = ap->descs[0].offset; count = ia->write.out.size; for (i = 0; i < ap->num_pages; i++) { struct page *page = ap->pages[i]; - if (!err && !offset && count >= PAGE_SIZE) - SetPageUptodate(page); - - if (count > PAGE_SIZE - offset) - count -= PAGE_SIZE - offset; - else - count = 0; - offset = 0; - - unlock_page(page); + if (err) { + ClearPageUptodate(page); + } else { + if (count >= PAGE_SIZE - offset) + count -= PAGE_SIZE - offset; + else { + if (short_write) + ClearPageUptodate(page); + count = 0; + } + offset = 0; + } + if (ia->write.page_locked && (i == ap->num_pages - 1)) + unlock_page(page); put_page(page); } return err; } -static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap, +static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, struct address_space *mapping, struct iov_iter *ii, loff_t pos, unsigned int max_pages) { + struct fuse_args_pages *ap = &ia->ap; struct fuse_conn *fc = get_fuse_conn(mapping->host); unsigned offset = pos & (PAGE_SIZE - 1); size_t count = 0; @@ -1191,6 +1193,16 @@ static ssize_t fuse_fill_write_pages(struct fuse_args_pages *ap, if (offset == PAGE_SIZE) offset = 0; + /* If we copied full page, mark it uptodate */ + if (tmp == PAGE_SIZE) + SetPageUptodate(page); + + if (PageUptodate(page)) { + unlock_page(page); + } else { + ia->write.page_locked = true; + break; + } if (!fc->big_writes) break; } while (iov_iter_count(ii) && count < fc->max_write && @@ -1234,7 +1246,7 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, break; } - count = fuse_fill_write_pages(ap, mapping, ii, pos, nr_pages); + count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages); if (count <= 0) { err = count; } else { @@ -1346,16 +1358,6 @@ out: return written ? written : err; } -static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, - unsigned int index, - unsigned int nr_pages) -{ - int i; - - for (i = index; i < index + nr_pages; i++) - descs[i].length = PAGE_SIZE - descs[i].offset; -} - static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) { return (unsigned long)ii->iov->iov_base + ii->iov_offset; @@ -1759,8 +1761,17 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, container_of(args, typeof(*wpa), ia.ap.args); struct inode *inode = wpa->inode; struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_conn *fc = get_fuse_conn(inode); mapping_set_error(inode->i_mapping, error); + /* + * A writeback finished and this might have updated mtime/ctime on + * server making local mtime/ctime stale. Hence invalidate attrs. + * Do this only if writeback_cache is not enabled. If writeback_cache + * is enabled, we trust local ctime/mtime. + */ + if (!fc->writeback_cache) + fuse_invalidate_attr(inode); spin_lock(&fi->lock); rb_erase(&wpa->writepages_entry, &fi->writepages); while (wpa->next) { @@ -2637,363 +2648,6 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) } /* - * CUSE servers compiled on 32bit broke on 64bit kernels because the - * ABI was defined to be 'struct iovec' which is different on 32bit - * and 64bit. Fortunately we can determine which structure the server - * used from the size of the reply. - */ -static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, - size_t transferred, unsigned count, - bool is_compat) -{ -#ifdef CONFIG_COMPAT - if (count * sizeof(struct compat_iovec) == transferred) { - struct compat_iovec *ciov = src; - unsigned i; - - /* - * With this interface a 32bit server cannot support - * non-compat (i.e. ones coming from 64bit apps) ioctl - * requests - */ - if (!is_compat) - return -EINVAL; - - for (i = 0; i < count; i++) { - dst[i].iov_base = compat_ptr(ciov[i].iov_base); - dst[i].iov_len = ciov[i].iov_len; - } - return 0; - } -#endif - - if (count * sizeof(struct iovec) != transferred) - return -EIO; - - memcpy(dst, src, transferred); - return 0; -} - -/* Make sure iov_length() won't overflow */ -static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov, - size_t count) -{ - size_t n; - u32 max = fc->max_pages << PAGE_SHIFT; - - for (n = 0; n < count; n++, iov++) { - if (iov->iov_len > (size_t) max) - return -ENOMEM; - max -= iov->iov_len; - } - return 0; -} - -static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, - void *src, size_t transferred, unsigned count, - bool is_compat) -{ - unsigned i; - struct fuse_ioctl_iovec *fiov = src; - - if (fc->minor < 16) { - return fuse_copy_ioctl_iovec_old(dst, src, transferred, - count, is_compat); - } - - if (count * sizeof(struct fuse_ioctl_iovec) != transferred) - return -EIO; - - for (i = 0; i < count; i++) { - /* Did the server supply an inappropriate value? */ - if (fiov[i].base != (unsigned long) fiov[i].base || - fiov[i].len != (unsigned long) fiov[i].len) - return -EIO; - - dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; - dst[i].iov_len = (size_t) fiov[i].len; - -#ifdef CONFIG_COMPAT - if (is_compat && - (ptr_to_compat(dst[i].iov_base) != fiov[i].base || - (compat_size_t) dst[i].iov_len != fiov[i].len)) - return -EIO; -#endif - } - - return 0; -} - - -/* - * For ioctls, there is no generic way to determine how much memory - * needs to be read and/or written. Furthermore, ioctls are allowed - * to dereference the passed pointer, so the parameter requires deep - * copying but FUSE has no idea whatsoever about what to copy in or - * out. - * - * This is solved by allowing FUSE server to retry ioctl with - * necessary in/out iovecs. Let's assume the ioctl implementation - * needs to read in the following structure. - * - * struct a { - * char *buf; - * size_t buflen; - * } - * - * On the first callout to FUSE server, inarg->in_size and - * inarg->out_size will be NULL; then, the server completes the ioctl - * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and - * the actual iov array to - * - * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } - * - * which tells FUSE to copy in the requested area and retry the ioctl. - * On the second round, the server has access to the structure and - * from that it can tell what to look for next, so on the invocation, - * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to - * - * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, - * { .iov_base = a.buf, .iov_len = a.buflen } } - * - * FUSE will copy both struct a and the pointed buffer from the - * process doing the ioctl and retry ioctl with both struct a and the - * buffer. - * - * This time, FUSE server has everything it needs and completes ioctl - * without FUSE_IOCTL_RETRY which finishes the ioctl call. - * - * Copying data out works the same way. - * - * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel - * automatically initializes in and out iovs by decoding @cmd with - * _IOC_* macros and the server is not allowed to request RETRY. This - * limits ioctl data transfers to well-formed ioctls and is the forced - * behavior for all FUSE servers. - */ -long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, - unsigned int flags) -{ - struct fuse_file *ff = file->private_data; - struct fuse_mount *fm = ff->fm; - struct fuse_ioctl_in inarg = { - .fh = ff->fh, - .cmd = cmd, - .arg = arg, - .flags = flags - }; - struct fuse_ioctl_out outarg; - struct iovec *iov_page = NULL; - struct iovec *in_iov = NULL, *out_iov = NULL; - unsigned int in_iovs = 0, out_iovs = 0, max_pages; - size_t in_size, out_size, c; - ssize_t transferred; - int err, i; - struct iov_iter ii; - struct fuse_args_pages ap = {}; - -#if BITS_PER_LONG == 32 - inarg.flags |= FUSE_IOCTL_32BIT; -#else - if (flags & FUSE_IOCTL_COMPAT) { - inarg.flags |= FUSE_IOCTL_32BIT; -#ifdef CONFIG_X86_X32 - if (in_x32_syscall()) - inarg.flags |= FUSE_IOCTL_COMPAT_X32; -#endif - } -#endif - - /* assume all the iovs returned by client always fits in a page */ - BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); - - err = -ENOMEM; - ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); - iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); - if (!ap.pages || !iov_page) - goto out; - - fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); - - /* - * If restricted, initialize IO parameters as encoded in @cmd. - * RETRY from server is not allowed. - */ - if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { - struct iovec *iov = iov_page; - - iov->iov_base = (void __user *)arg; - - switch (cmd) { - case FS_IOC_GETFLAGS: - case FS_IOC_SETFLAGS: - iov->iov_len = sizeof(int); - break; - default: - iov->iov_len = _IOC_SIZE(cmd); - break; - } - - if (_IOC_DIR(cmd) & _IOC_WRITE) { - in_iov = iov; - in_iovs = 1; - } - - if (_IOC_DIR(cmd) & _IOC_READ) { - out_iov = iov; - out_iovs = 1; - } - } - - retry: - inarg.in_size = in_size = iov_length(in_iov, in_iovs); - inarg.out_size = out_size = iov_length(out_iov, out_iovs); - - /* - * Out data can be used either for actual out data or iovs, - * make sure there always is at least one page. - */ - out_size = max_t(size_t, out_size, PAGE_SIZE); - max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); - - /* make sure there are enough buffer pages and init request with them */ - err = -ENOMEM; - if (max_pages > fm->fc->max_pages) - goto out; - while (ap.num_pages < max_pages) { - ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); - if (!ap.pages[ap.num_pages]) - goto out; - ap.num_pages++; - } - - - /* okay, let's send it to the client */ - ap.args.opcode = FUSE_IOCTL; - ap.args.nodeid = ff->nodeid; - ap.args.in_numargs = 1; - ap.args.in_args[0].size = sizeof(inarg); - ap.args.in_args[0].value = &inarg; - if (in_size) { - ap.args.in_numargs++; - ap.args.in_args[1].size = in_size; - ap.args.in_pages = true; - - err = -EFAULT; - iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); - for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { - c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); - if (c != PAGE_SIZE && iov_iter_count(&ii)) - goto out; - } - } - - ap.args.out_numargs = 2; - ap.args.out_args[0].size = sizeof(outarg); - ap.args.out_args[0].value = &outarg; - ap.args.out_args[1].size = out_size; - ap.args.out_pages = true; - ap.args.out_argvar = true; - - transferred = fuse_simple_request(fm, &ap.args); - err = transferred; - if (transferred < 0) - goto out; - - /* did it ask for retry? */ - if (outarg.flags & FUSE_IOCTL_RETRY) { - void *vaddr; - - /* no retry if in restricted mode */ - err = -EIO; - if (!(flags & FUSE_IOCTL_UNRESTRICTED)) - goto out; - - in_iovs = outarg.in_iovs; - out_iovs = outarg.out_iovs; - - /* - * Make sure things are in boundary, separate checks - * are to protect against overflow. - */ - err = -ENOMEM; - if (in_iovs > FUSE_IOCTL_MAX_IOV || - out_iovs > FUSE_IOCTL_MAX_IOV || - in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) - goto out; - - vaddr = kmap_atomic(ap.pages[0]); - err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, - transferred, in_iovs + out_iovs, - (flags & FUSE_IOCTL_COMPAT) != 0); - kunmap_atomic(vaddr); - if (err) - goto out; - - in_iov = iov_page; - out_iov = in_iov + in_iovs; - - err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); - if (err) - goto out; - - err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); - if (err) - goto out; - - goto retry; - } - - err = -EIO; - if (transferred > inarg.out_size) - goto out; - - err = -EFAULT; - iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); - for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { - c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); - if (c != PAGE_SIZE && iov_iter_count(&ii)) - goto out; - } - err = 0; - out: - free_page((unsigned long) iov_page); - while (ap.num_pages) - __free_page(ap.pages[--ap.num_pages]); - kfree(ap.pages); - - return err ? err : outarg.result; -} -EXPORT_SYMBOL_GPL(fuse_do_ioctl); - -long fuse_ioctl_common(struct file *file, unsigned int cmd, - unsigned long arg, unsigned int flags) -{ - struct inode *inode = file_inode(file); - struct fuse_conn *fc = get_fuse_conn(inode); - - if (!fuse_allow_current_process(fc)) - return -EACCES; - - if (fuse_is_bad(inode)) - return -EIO; - - return fuse_do_ioctl(file, cmd, arg, flags); -} - -static long fuse_file_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - return fuse_ioctl_common(file, cmd, arg, 0); -} - -static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); -} - -/* * All files which have been polled are linked to RB tree * fuse_conn->polled_files which is indexed by kh. Walk the tree and * find the matching one. diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 68cca8d4db6e..7e463e220053 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -552,9 +552,12 @@ struct fuse_conn { /** Maximum write size */ unsigned max_write; - /** Maxmum number of pages that can be used in a single request */ + /** Maximum number of pages that can be used in a single request */ unsigned int max_pages; + /** Constrain ->max_pages to this value during feature negotiation */ + unsigned int max_pages_limit; + /** Input queue */ struct fuse_iqueue iq; @@ -668,6 +671,9 @@ struct fuse_conn { /** Is setxattr not implemented by fs? */ unsigned no_setxattr:1; + /** Does file server support extended setxattr */ + unsigned setxattr_ext:1; + /** Is getxattr not implemented by fs? */ unsigned no_getxattr:1; @@ -713,7 +719,7 @@ struct fuse_conn { /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; - /** Filesystem is fully reponsible for page cache invalidation. */ + /** Filesystem is fully responsible for page cache invalidation. */ unsigned explicit_inval_data:1; /** Does the filesystem support readdirplus? */ @@ -863,6 +869,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) static inline void fuse_make_bad(struct inode *inode) { + remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } @@ -871,6 +878,28 @@ static inline bool fuse_is_bad(struct inode *inode) return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state)); } +static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, + struct fuse_page_desc **desc) +{ + struct page **pages; + + pages = kzalloc(npages * (sizeof(struct page *) + + sizeof(struct fuse_page_desc)), flags); + *desc = (void *) (pages + npages); + + return pages; +} + +static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, + unsigned int index, + unsigned int nr_pages) +{ + int i; + + for (i = index; i < index + nr_pages; i++) + descs[i].length = PAGE_SIZE - descs[i].offset; +} + /** Device operations */ extern const struct file_operations fuse_dev_operations; @@ -911,6 +940,7 @@ struct fuse_io_args { struct { struct fuse_write_in in; struct fuse_write_out out; + bool page_locked; } write; }; struct fuse_args_pages ap; @@ -931,7 +961,8 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm); void fuse_file_free(struct fuse_file *ff); void fuse_finish_open(struct inode *inode, struct file *file); -void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags); +void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, + unsigned int flags); /** * Send RELEASE or RELEASEDIR request @@ -1169,7 +1200,7 @@ void fuse_unlock_inode(struct inode *inode, bool locked); bool fuse_lock_inode(struct inode *inode); int fuse_setxattr(struct inode *inode, const char *name, const void *value, - size_t size, int flags); + size_t size, int flags, unsigned int extra_flags); ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, size_t size); ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size); @@ -1213,4 +1244,19 @@ void fuse_dax_inode_cleanup(struct inode *inode); bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); void fuse_dax_cancel_work(struct fuse_conn *fc); +/* ioctl.c */ +long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg); +int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa); +int fuse_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa); + +/* file.c */ + +struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid, + unsigned int open_flags, bool isdir); +void fuse_file_release(struct inode *inode, struct fuse_file *ff, + unsigned int open_flags, fl_owner_t id, bool isdir); + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index b0e18b470e91..393e36b74dc4 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -350,7 +350,7 @@ retry: inode->i_generation = generation; fuse_init_inode(inode, attr); unlock_new_inode(inode); - } else if ((inode->i_mode ^ attr->mode) & S_IFMT) { + } else if (inode_wrong_type(inode, attr->mode)) { /* Inode has changed type, any I/O on the old should fail */ fuse_make_bad(inode); iput(inode); @@ -712,6 +712,7 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); fc->user_ns = get_user_ns(user_ns); fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; + fc->max_pages_limit = FUSE_MAX_MAX_PAGES; INIT_LIST_HEAD(&fc->mounts); list_add(&fm->fc_entry, &fc->mounts); @@ -872,14 +873,13 @@ static struct dentry *fuse_get_parent(struct dentry *child) struct inode *inode; struct dentry *parent; struct fuse_entry_out outarg; - const struct qstr name = QSTR_INIT("..", 2); int err; if (!fc->export_support) return ERR_PTR(-ESTALE); err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), - &name, &outarg, &inode); + &dotdot_name, &outarg, &inode); if (err) { if (err == -ENOENT) return ERR_PTR(-ESTALE); @@ -1040,7 +1040,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->abort_err = 1; if (arg->flags & FUSE_MAX_PAGES) { fc->max_pages = - min_t(unsigned int, FUSE_MAX_MAX_PAGES, + min_t(unsigned int, fc->max_pages_limit, max_t(unsigned int, arg->max_pages, 1)); } if (IS_ENABLED(CONFIG_FUSE_DAX) && @@ -1052,6 +1052,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->handle_killpriv_v2 = 1; fm->sb->s_flags |= SB_NOSEC; } + if (arg->flags & FUSE_SETXATTR_EXT) + fc->setxattr_ext = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1095,7 +1097,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA | - FUSE_HANDLE_KILLPRIV_V2; + FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) ia->in.flags |= FUSE_MAP_ALIGNMENT; diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c new file mode 100644 index 000000000000..546ea3d58fb4 --- /dev/null +++ b/fs/fuse/ioctl.c @@ -0,0 +1,490 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 Red Hat, Inc. + */ + +#include "fuse_i.h" + +#include <linux/uio.h> +#include <linux/compat.h> +#include <linux/fileattr.h> + +/* + * CUSE servers compiled on 32bit broke on 64bit kernels because the + * ABI was defined to be 'struct iovec' which is different on 32bit + * and 64bit. Fortunately we can determine which structure the server + * used from the size of the reply. + */ +static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, + size_t transferred, unsigned count, + bool is_compat) +{ +#ifdef CONFIG_COMPAT + if (count * sizeof(struct compat_iovec) == transferred) { + struct compat_iovec *ciov = src; + unsigned i; + + /* + * With this interface a 32bit server cannot support + * non-compat (i.e. ones coming from 64bit apps) ioctl + * requests + */ + if (!is_compat) + return -EINVAL; + + for (i = 0; i < count; i++) { + dst[i].iov_base = compat_ptr(ciov[i].iov_base); + dst[i].iov_len = ciov[i].iov_len; + } + return 0; + } +#endif + + if (count * sizeof(struct iovec) != transferred) + return -EIO; + + memcpy(dst, src, transferred); + return 0; +} + +/* Make sure iov_length() won't overflow */ +static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov, + size_t count) +{ + size_t n; + u32 max = fc->max_pages << PAGE_SHIFT; + + for (n = 0; n < count; n++, iov++) { + if (iov->iov_len > (size_t) max) + return -ENOMEM; + max -= iov->iov_len; + } + return 0; +} + +static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, + void *src, size_t transferred, unsigned count, + bool is_compat) +{ + unsigned i; + struct fuse_ioctl_iovec *fiov = src; + + if (fc->minor < 16) { + return fuse_copy_ioctl_iovec_old(dst, src, transferred, + count, is_compat); + } + + if (count * sizeof(struct fuse_ioctl_iovec) != transferred) + return -EIO; + + for (i = 0; i < count; i++) { + /* Did the server supply an inappropriate value? */ + if (fiov[i].base != (unsigned long) fiov[i].base || + fiov[i].len != (unsigned long) fiov[i].len) + return -EIO; + + dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; + dst[i].iov_len = (size_t) fiov[i].len; + +#ifdef CONFIG_COMPAT + if (is_compat && + (ptr_to_compat(dst[i].iov_base) != fiov[i].base || + (compat_size_t) dst[i].iov_len != fiov[i].len)) + return -EIO; +#endif + } + + return 0; +} + + +/* + * For ioctls, there is no generic way to determine how much memory + * needs to be read and/or written. Furthermore, ioctls are allowed + * to dereference the passed pointer, so the parameter requires deep + * copying but FUSE has no idea whatsoever about what to copy in or + * out. + * + * This is solved by allowing FUSE server to retry ioctl with + * necessary in/out iovecs. Let's assume the ioctl implementation + * needs to read in the following structure. + * + * struct a { + * char *buf; + * size_t buflen; + * } + * + * On the first callout to FUSE server, inarg->in_size and + * inarg->out_size will be NULL; then, the server completes the ioctl + * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and + * the actual iov array to + * + * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } + * + * which tells FUSE to copy in the requested area and retry the ioctl. + * On the second round, the server has access to the structure and + * from that it can tell what to look for next, so on the invocation, + * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to + * + * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, + * { .iov_base = a.buf, .iov_len = a.buflen } } + * + * FUSE will copy both struct a and the pointed buffer from the + * process doing the ioctl and retry ioctl with both struct a and the + * buffer. + * + * This time, FUSE server has everything it needs and completes ioctl + * without FUSE_IOCTL_RETRY which finishes the ioctl call. + * + * Copying data out works the same way. + * + * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel + * automatically initializes in and out iovs by decoding @cmd with + * _IOC_* macros and the server is not allowed to request RETRY. This + * limits ioctl data transfers to well-formed ioctls and is the forced + * behavior for all FUSE servers. + */ +long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, + unsigned int flags) +{ + struct fuse_file *ff = file->private_data; + struct fuse_mount *fm = ff->fm; + struct fuse_ioctl_in inarg = { + .fh = ff->fh, + .cmd = cmd, + .arg = arg, + .flags = flags + }; + struct fuse_ioctl_out outarg; + struct iovec *iov_page = NULL; + struct iovec *in_iov = NULL, *out_iov = NULL; + unsigned int in_iovs = 0, out_iovs = 0, max_pages; + size_t in_size, out_size, c; + ssize_t transferred; + int err, i; + struct iov_iter ii; + struct fuse_args_pages ap = {}; + +#if BITS_PER_LONG == 32 + inarg.flags |= FUSE_IOCTL_32BIT; +#else + if (flags & FUSE_IOCTL_COMPAT) { + inarg.flags |= FUSE_IOCTL_32BIT; +#ifdef CONFIG_X86_X32 + if (in_x32_syscall()) + inarg.flags |= FUSE_IOCTL_COMPAT_X32; +#endif + } +#endif + + /* assume all the iovs returned by client always fits in a page */ + BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); + + err = -ENOMEM; + ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); + iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); + if (!ap.pages || !iov_page) + goto out; + + fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); + + /* + * If restricted, initialize IO parameters as encoded in @cmd. + * RETRY from server is not allowed. + */ + if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { + struct iovec *iov = iov_page; + + iov->iov_base = (void __user *)arg; + iov->iov_len = _IOC_SIZE(cmd); + + if (_IOC_DIR(cmd) & _IOC_WRITE) { + in_iov = iov; + in_iovs = 1; + } + + if (_IOC_DIR(cmd) & _IOC_READ) { + out_iov = iov; + out_iovs = 1; + } + } + + retry: + inarg.in_size = in_size = iov_length(in_iov, in_iovs); + inarg.out_size = out_size = iov_length(out_iov, out_iovs); + + /* + * Out data can be used either for actual out data or iovs, + * make sure there always is at least one page. + */ + out_size = max_t(size_t, out_size, PAGE_SIZE); + max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); + + /* make sure there are enough buffer pages and init request with them */ + err = -ENOMEM; + if (max_pages > fm->fc->max_pages) + goto out; + while (ap.num_pages < max_pages) { + ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (!ap.pages[ap.num_pages]) + goto out; + ap.num_pages++; + } + + + /* okay, let's send it to the client */ + ap.args.opcode = FUSE_IOCTL; + ap.args.nodeid = ff->nodeid; + ap.args.in_numargs = 1; + ap.args.in_args[0].size = sizeof(inarg); + ap.args.in_args[0].value = &inarg; + if (in_size) { + ap.args.in_numargs++; + ap.args.in_args[1].size = in_size; + ap.args.in_pages = true; + + err = -EFAULT; + iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); + for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { + c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); + if (c != PAGE_SIZE && iov_iter_count(&ii)) + goto out; + } + } + + ap.args.out_numargs = 2; + ap.args.out_args[0].size = sizeof(outarg); + ap.args.out_args[0].value = &outarg; + ap.args.out_args[1].size = out_size; + ap.args.out_pages = true; + ap.args.out_argvar = true; + + transferred = fuse_simple_request(fm, &ap.args); + err = transferred; + if (transferred < 0) + goto out; + + /* did it ask for retry? */ + if (outarg.flags & FUSE_IOCTL_RETRY) { + void *vaddr; + + /* no retry if in restricted mode */ + err = -EIO; + if (!(flags & FUSE_IOCTL_UNRESTRICTED)) + goto out; + + in_iovs = outarg.in_iovs; + out_iovs = outarg.out_iovs; + + /* + * Make sure things are in boundary, separate checks + * are to protect against overflow. + */ + err = -ENOMEM; + if (in_iovs > FUSE_IOCTL_MAX_IOV || + out_iovs > FUSE_IOCTL_MAX_IOV || + in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) + goto out; + + vaddr = kmap_atomic(ap.pages[0]); + err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, + transferred, in_iovs + out_iovs, + (flags & FUSE_IOCTL_COMPAT) != 0); + kunmap_atomic(vaddr); + if (err) + goto out; + + in_iov = iov_page; + out_iov = in_iov + in_iovs; + + err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); + if (err) + goto out; + + err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); + if (err) + goto out; + + goto retry; + } + + err = -EIO; + if (transferred > inarg.out_size) + goto out; + + err = -EFAULT; + iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); + for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { + c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); + if (c != PAGE_SIZE && iov_iter_count(&ii)) + goto out; + } + err = 0; + out: + free_page((unsigned long) iov_page); + while (ap.num_pages) + __free_page(ap.pages[--ap.num_pages]); + kfree(ap.pages); + + return err ? err : outarg.result; +} +EXPORT_SYMBOL_GPL(fuse_do_ioctl); + +long fuse_ioctl_common(struct file *file, unsigned int cmd, + unsigned long arg, unsigned int flags) +{ + struct inode *inode = file_inode(file); + struct fuse_conn *fc = get_fuse_conn(inode); + + if (!fuse_allow_current_process(fc)) + return -EACCES; + + if (fuse_is_bad(inode)) + return -EIO; + + return fuse_do_ioctl(file, cmd, arg, flags); +} + +long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return fuse_ioctl_common(file, cmd, arg, 0); +} + +long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); +} + +static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff, + unsigned int cmd, void *ptr, size_t size) +{ + struct fuse_mount *fm = ff->fm; + struct fuse_ioctl_in inarg; + struct fuse_ioctl_out outarg; + FUSE_ARGS(args); + int err; + + memset(&inarg, 0, sizeof(inarg)); + inarg.fh = ff->fh; + inarg.cmd = cmd; + +#if BITS_PER_LONG == 32 + inarg.flags |= FUSE_IOCTL_32BIT; +#endif + if (S_ISDIR(inode->i_mode)) + inarg.flags |= FUSE_IOCTL_DIR; + + if (_IOC_DIR(cmd) & _IOC_READ) + inarg.out_size = size; + if (_IOC_DIR(cmd) & _IOC_WRITE) + inarg.in_size = size; + + args.opcode = FUSE_IOCTL; + args.nodeid = ff->nodeid; + args.in_numargs = 2; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.in_args[1].size = inarg.in_size; + args.in_args[1].value = ptr; + args.out_numargs = 2; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; + args.out_args[1].size = inarg.out_size; + args.out_args[1].value = ptr; + + err = fuse_simple_request(fm, &args); + if (!err && outarg.flags & FUSE_IOCTL_RETRY) + err = -EIO; + + return err; +} + +static struct fuse_file *fuse_priv_ioctl_prepare(struct inode *inode) +{ + struct fuse_mount *fm = get_fuse_mount(inode); + bool isdir = S_ISDIR(inode->i_mode); + + if (!S_ISREG(inode->i_mode) && !isdir) + return ERR_PTR(-ENOTTY); + + return fuse_file_open(fm, get_node_id(inode), O_RDONLY, isdir); +} + +static void fuse_priv_ioctl_cleanup(struct inode *inode, struct fuse_file *ff) +{ + fuse_file_release(inode, ff, O_RDONLY, NULL, S_ISDIR(inode->i_mode)); +} + +int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct fuse_file *ff; + unsigned int flags; + struct fsxattr xfa; + int err; + + ff = fuse_priv_ioctl_prepare(inode); + if (IS_ERR(ff)) + return PTR_ERR(ff); + + if (fa->flags_valid) { + err = fuse_priv_ioctl(inode, ff, FS_IOC_GETFLAGS, + &flags, sizeof(flags)); + if (err) + goto cleanup; + + fileattr_fill_flags(fa, flags); + } else { + err = fuse_priv_ioctl(inode, ff, FS_IOC_FSGETXATTR, + &xfa, sizeof(xfa)); + if (err) + goto cleanup; + + fileattr_fill_xflags(fa, xfa.fsx_xflags); + fa->fsx_extsize = xfa.fsx_extsize; + fa->fsx_nextents = xfa.fsx_nextents; + fa->fsx_projid = xfa.fsx_projid; + fa->fsx_cowextsize = xfa.fsx_cowextsize; + } +cleanup: + fuse_priv_ioctl_cleanup(inode, ff); + + return err; +} + +int fuse_fileattr_set(struct user_namespace *mnt_userns, + struct dentry *dentry, struct fileattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct fuse_file *ff; + unsigned int flags = fa->flags; + struct fsxattr xfa; + int err; + + ff = fuse_priv_ioctl_prepare(inode); + if (IS_ERR(ff)) + return PTR_ERR(ff); + + if (fa->flags_valid) { + err = fuse_priv_ioctl(inode, ff, FS_IOC_SETFLAGS, + &flags, sizeof(flags)); + if (err) + goto cleanup; + } else { + memset(&xfa, 0, sizeof(xfa)); + xfa.fsx_xflags = fa->fsx_xflags; + xfa.fsx_extsize = fa->fsx_extsize; + xfa.fsx_nextents = fa->fsx_nextents; + xfa.fsx_projid = fa->fsx_projid; + xfa.fsx_cowextsize = fa->fsx_cowextsize; + + err = fuse_priv_ioctl(inode, ff, FS_IOC_FSSETXATTR, + &xfa, sizeof(xfa)); + } + +cleanup: + fuse_priv_ioctl_cleanup(inode, ff); + + return err; +} diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index 3441ffa740f3..277f7041d55a 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -202,7 +202,7 @@ retry: inode = d_inode(dentry); if (!inode || get_node_id(inode) != o->nodeid || - ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + inode_wrong_type(inode, o->attr.mode)) { d_invalidate(dentry); dput(dentry); goto retry; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 8868ac31a3c0..bcb8a02e2d8b 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -18,6 +18,12 @@ #include <linux/uio.h> #include "fuse_i.h" +/* Used to help calculate the FUSE connection's max_pages limit for a request's + * size. Parts of the struct fuse_req are sliced into scattergather lists in + * addition to the pages used, so this can help account for that overhead. + */ +#define FUSE_HEADER_OVERHEAD 4 + /* List of virtio-fs device instances and a lock for the list. Also provides * mutual exclusion in device removal and mounting path */ @@ -127,11 +133,6 @@ static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) return &fs->vqs[vq->index]; } -static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) -{ - return &vq_to_fsvq(vq)->fud->pq; -} - /* Should be called with fsvq->lock held. */ static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) { @@ -896,6 +897,7 @@ static int virtio_fs_probe(struct virtio_device *vdev) out_vqs: vdev->config->reset(vdev); virtio_fs_cleanup_vqs(vdev, fs); + kfree(fs->vqs); out: vdev->priv = NULL; @@ -1324,8 +1326,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) /* virtiofs allocates and installs its own fuse devices */ ctx->fudptr = NULL; - if (ctx->dax) + if (ctx->dax) { + if (!fs->dax_dev) { + err = -EINVAL; + pr_err("virtio-fs: dax can't be enabled as filesystem" + " device does not support it.\n"); + goto err_free_fuse_devs; + } ctx->dax_dev = fs->dax_dev; + } err = fuse_fill_super_common(sb, ctx); if (err < 0) goto err_free_fuse_devs; @@ -1406,9 +1415,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc) { struct virtio_fs *fs; struct super_block *sb; - struct fuse_conn *fc; + struct fuse_conn *fc = NULL; struct fuse_mount *fm; - int err; + unsigned int virtqueue_size; + int err = -EIO; /* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() @@ -1420,6 +1430,10 @@ static int virtio_fs_get_tree(struct fs_context *fsc) return -EINVAL; } + virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); + if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) + goto out_err; + err = -ENOMEM; fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); if (!fc) @@ -1429,12 +1443,15 @@ static int virtio_fs_get_tree(struct fs_context *fsc) if (!fm) goto out_err; - fuse_conn_init(fc, fm, get_user_ns(current_user_ns()), - &virtio_fs_fiq_ops, fs); + fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); fc->release = fuse_free_conn; fc->delete_stale = true; fc->auto_submounts = true; + /* Tell FUSE to split requests that exceed the virtqueue's size */ + fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, + virtqueue_size - FUSE_HEADER_OVERHEAD); + fsc->s_fs_info = fm; sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); if (fsc->s_fs_info) { diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 1a7d7ace54e1..61dfaf7b7d20 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -12,7 +12,7 @@ #include <linux/posix_acl_xattr.h> int fuse_setxattr(struct inode *inode, const char *name, const void *value, - size_t size, int flags) + size_t size, int flags, unsigned int extra_flags) { struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); @@ -25,10 +25,13 @@ int fuse_setxattr(struct inode *inode, const char *name, const void *value, memset(&inarg, 0, sizeof(inarg)); inarg.size = size; inarg.flags = flags; + inarg.setxattr_flags = extra_flags; + args.opcode = FUSE_SETXATTR; args.nodeid = get_node_id(inode); args.in_numargs = 3; - args.in_args[0].size = sizeof(inarg); + args.in_args[0].size = fm->fc->setxattr_ext ? + sizeof(inarg) : FUSE_COMPAT_SETXATTR_IN_SIZE; args.in_args[0].value = &inarg; args.in_args[1].size = strlen(name) + 1; args.in_args[1].value = name; @@ -199,7 +202,7 @@ static int fuse_xattr_set(const struct xattr_handler *handler, if (!value) return fuse_removexattr(inode, name); - return fuse_setxattr(inode, name, value, size, flags); + return fuse_setxattr(inode, name, value, size, flags, 0); } static bool no_xattr_list(struct dentry *dentry) |
