summaryrefslogtreecommitdiff
path: root/fs/fuse/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fuse/file.c')
-rw-r--r--fs/fuse/file.c737
1 files changed, 431 insertions, 306 deletions
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index ceb9f7d23038..f597f7e68e50 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -19,6 +19,8 @@
#include <linux/uio.h>
#include <linux/fs.h>
#include <linux/filelock.h>
+#include <linux/splice.h>
+#include <linux/task_io_accounting_ops.h>
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
unsigned int open_flags, int opcode,
@@ -49,13 +51,7 @@ static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
return fuse_simple_request(fm, &args);
}
-struct fuse_release_args {
- struct fuse_args args;
- struct fuse_release_in inarg;
- struct inode *inode;
-};
-
-struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
+struct fuse_file *fuse_file_alloc(struct fuse_mount *fm, bool release)
{
struct fuse_file *ff;
@@ -64,15 +60,15 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
return NULL;
ff->fm = fm;
- ff->release_args = kzalloc(sizeof(*ff->release_args),
- GFP_KERNEL_ACCOUNT);
- if (!ff->release_args) {
- kfree(ff);
- return NULL;
+ if (release) {
+ ff->args = kzalloc(sizeof(*ff->args), GFP_KERNEL_ACCOUNT);
+ if (!ff->args) {
+ kfree(ff);
+ return NULL;
+ }
}
INIT_LIST_HEAD(&ff->write_entry);
- mutex_init(&ff->readdir.lock);
refcount_set(&ff->count, 1);
RB_CLEAR_NODE(&ff->polled_node);
init_waitqueue_head(&ff->poll_wait);
@@ -84,8 +80,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_mount *fm)
void fuse_file_free(struct fuse_file *ff)
{
- kfree(ff->release_args);
- mutex_destroy(&ff->readdir.lock);
+ kfree(ff->args);
kfree(ff);
}
@@ -104,14 +99,17 @@ static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args,
kfree(ra);
}
-static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir)
+static void fuse_file_put(struct fuse_file *ff, bool sync)
{
if (refcount_dec_and_test(&ff->count)) {
- struct fuse_args *args = &ff->release_args->args;
+ struct fuse_release_args *ra = &ff->args->release_args;
+ struct fuse_args *args = (ra ? &ra->args : NULL);
- if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) {
- /* Do nothing when client does not implement 'open' */
- fuse_release_end(ff->fm, args, 0);
+ if (ra && ra->inode)
+ fuse_file_io_release(ff, ra->inode);
+
+ if (!args) {
+ /* Do nothing when server does not implement 'open' */
} else if (sync) {
fuse_simple_request(ff->fm, args);
fuse_release_end(ff->fm, args, 0);
@@ -131,27 +129,31 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
struct fuse_conn *fc = fm->fc;
struct fuse_file *ff;
int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+ bool open = isdir ? !fc->no_opendir : !fc->no_open;
- ff = fuse_file_alloc(fm);
+ ff = fuse_file_alloc(fm, open);
if (!ff)
return ERR_PTR(-ENOMEM);
ff->fh = 0;
/* Default for no-open */
ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0);
- if (isdir ? !fc->no_opendir : !fc->no_open) {
- struct fuse_open_out outarg;
+ if (open) {
+ /* Store outarg for fuse_finish_open() */
+ struct fuse_open_out *outargp = &ff->args->open_outarg;
int err;
- err = fuse_send_open(fm, nodeid, open_flags, opcode, &outarg);
+ err = fuse_send_open(fm, nodeid, open_flags, opcode, outargp);
if (!err) {
- ff->fh = outarg.fh;
- ff->open_flags = outarg.open_flags;
-
+ ff->fh = outargp->fh;
+ ff->open_flags = outargp->open_flags;
} else if (err != -ENOSYS) {
fuse_file_free(ff);
return ERR_PTR(err);
} else {
+ /* No release needed */
+ kfree(ff->args);
+ ff->args = NULL;
if (isdir)
fc->no_opendir = 1;
else
@@ -194,40 +196,50 @@ static void fuse_link_write_file(struct file *file)
spin_unlock(&fi->lock);
}
-void fuse_finish_open(struct inode *inode, struct file *file)
+int fuse_finish_open(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = get_fuse_conn(inode);
+ int err;
+
+ err = fuse_file_io_open(file, inode);
+ if (err)
+ return err;
if (ff->open_flags & FOPEN_STREAM)
stream_open(inode, file);
else if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
- if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
- struct fuse_inode *fi = get_fuse_inode(inode);
-
- spin_lock(&fi->lock);
- fi->attr_version = atomic64_inc_return(&fc->attr_version);
- i_size_write(inode, 0);
- spin_unlock(&fi->lock);
- file_update_time(file);
- fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
- }
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
+
+ return 0;
}
-int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+static void fuse_truncate_update_attr(struct inode *inode, struct file *file)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ fi->attr_version = atomic64_inc_return(&fc->attr_version);
+ i_size_write(inode, 0);
+ spin_unlock(&fi->lock);
+ file_update_time(file);
+ fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
+}
+
+static int fuse_open(struct inode *inode, struct file *file)
{
struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = fm->fc;
+ struct fuse_file *ff;
int err;
- bool is_wb_truncate = (file->f_flags & O_TRUNC) &&
- fc->atomic_o_trunc &&
- fc->writeback_cache;
- bool dax_truncate = (file->f_flags & O_TRUNC) &&
- fc->atomic_o_trunc && FUSE_IS_DAX(inode);
+ bool is_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc;
+ bool is_wb_truncate = is_truncate && fc->writeback_cache;
+ bool dax_truncate = is_truncate && FUSE_IS_DAX(inode);
if (fuse_is_bad(inode))
return -EIO;
@@ -241,7 +253,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out_inode_unlock;
}
@@ -249,16 +261,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
if (is_wb_truncate || dax_truncate)
fuse_set_nowrite(inode);
- err = fuse_do_open(fm, get_node_id(inode), file, isdir);
- if (!err)
- fuse_finish_open(inode, file);
+ err = fuse_do_open(fm, get_node_id(inode), file, false);
+ if (!err) {
+ ff = file->private_data;
+ err = fuse_finish_open(inode, file);
+ if (err)
+ fuse_sync_release(fi, ff, file->f_flags);
+ else if (is_truncate)
+ fuse_truncate_update_attr(inode, file);
+ }
if (is_wb_truncate || dax_truncate)
fuse_release_nowrite(inode);
if (!err) {
- struct fuse_file *ff = file->private_data;
-
- if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
+ if (is_truncate)
truncate_pagecache(inode, 0);
else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
invalidate_inode_pages2(inode->i_mapping);
@@ -273,10 +289,13 @@ out_inode_unlock:
}
static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
- unsigned int flags, int opcode)
+ unsigned int flags, int opcode, bool sync)
{
struct fuse_conn *fc = ff->fm->fc;
- struct fuse_release_args *ra = ff->release_args;
+ struct fuse_release_args *ra = &ff->args->release_args;
+
+ if (fuse_file_passthrough(ff))
+ fuse_passthrough_release(ff, fuse_inode_backing(fi));
/* Inode is NULL on error path of fuse_create_open() */
if (likely(fi)) {
@@ -291,6 +310,11 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
wake_up_interruptible_all(&ff->poll_wait);
+ if (!ra)
+ return;
+
+ /* ff->args was used for open outarg */
+ memset(ff->args, 0, sizeof(*ff->args));
ra->inarg.fh = ff->fh;
ra->inarg.flags = flags;
ra->args.in_numargs = 1;
@@ -300,23 +324,28 @@ static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff,
ra->args.nodeid = ff->nodeid;
ra->args.force = true;
ra->args.nocreds = true;
+
+ /*
+ * Hold inode until release is finished.
+ * From fuse_sync_release() the refcount is 1 and everything's
+ * synchronous, so we are fine with not doing igrab() here.
+ */
+ ra->inode = sync ? NULL : igrab(&fi->inode);
}
void fuse_file_release(struct inode *inode, struct fuse_file *ff,
unsigned int open_flags, fl_owner_t id, bool isdir)
{
struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_release_args *ra = ff->release_args;
+ struct fuse_release_args *ra = &ff->args->release_args;
int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE;
- fuse_prepare_release(fi, ff, open_flags, opcode);
+ fuse_prepare_release(fi, ff, open_flags, opcode, false);
- if (ff->flock) {
+ if (ra && ff->flock) {
ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK;
ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, id);
}
- /* Hold inode until release is finished */
- ra->inode = igrab(inode);
/*
* Normally this will send the RELEASE request, however if
@@ -327,7 +356,7 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff,
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
*/
- fuse_file_put(ff, ff->fm->fc->destroy, isdir);
+ fuse_file_put(ff, ff->fm->fc->destroy);
}
void fuse_release_common(struct file *file, bool isdir)
@@ -336,11 +365,6 @@ void fuse_release_common(struct file *file, bool isdir)
(fl_owner_t) file, isdir);
}
-static int fuse_open(struct inode *inode, struct file *file)
-{
- return fuse_open_common(inode, file, false);
-}
-
static int fuse_release(struct inode *inode, struct file *file)
{
struct fuse_conn *fc = get_fuse_conn(inode);
@@ -362,12 +386,8 @@ void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff,
unsigned int flags)
{
WARN_ON(refcount_read(&ff->count) > 1);
- fuse_prepare_release(fi, ff, flags, FUSE_RELEASE);
- /*
- * iput(NULL) is a no-op and since the refcount is 1 and everything's
- * synchronous, we are fine with not doing igrab() here"
- */
- fuse_file_put(ff, true, false);
+ fuse_prepare_release(fi, ff, flags, FUSE_RELEASE, true);
+ fuse_file_put(ff, true);
}
EXPORT_SYMBOL_GPL(fuse_sync_release);
@@ -428,9 +448,6 @@ static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
/*
* Check if any page in a range is under writeback
- *
- * This is currently done by walking the list of writepage requests
- * for the inode, which can be pretty inefficient.
*/
static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
pgoff_t idx_to)
@@ -438,6 +455,9 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
struct fuse_inode *fi = get_fuse_inode(inode);
bool found;
+ if (RB_EMPTY_ROOT(&fi->writepages))
+ return false;
+
spin_lock(&fi->lock);
found = fuse_find_writeback(fi, idx_from, idx_to);
spin_unlock(&fi->lock);
@@ -625,7 +645,7 @@ void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos,
args->out_args[0].size = count;
}
-static void fuse_release_user_pages(struct fuse_args_pages *ap,
+static void fuse_release_user_pages(struct fuse_args_pages *ap, ssize_t nres,
bool should_dirty)
{
unsigned int i;
@@ -633,8 +653,12 @@ static void fuse_release_user_pages(struct fuse_args_pages *ap,
for (i = 0; i < ap->num_pages; i++) {
if (should_dirty)
set_page_dirty_lock(ap->pages[i]);
- put_page(ap->pages[i]);
+ if (ap->args.is_pinned)
+ unpin_user_page(ap->pages[i]);
}
+
+ if (nres > 0 && ap->args.invalidate_vmap)
+ invalidate_kernel_vmap_range(ap->args.vmap_base, nres);
}
static void fuse_io_release(struct kref *kref)
@@ -733,25 +757,29 @@ static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args);
struct fuse_io_priv *io = ia->io;
ssize_t pos = -1;
-
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ size_t nres;
if (err) {
/* Nothing */
} else if (io->write) {
if (ia->write.out.size > ia->write.in.size) {
err = -EIO;
- } else if (ia->write.in.size != ia->write.out.size) {
- pos = ia->write.in.offset - io->offset +
- ia->write.out.size;
+ } else {
+ nres = ia->write.out.size;
+ if (ia->write.in.size != ia->write.out.size)
+ pos = ia->write.in.offset - io->offset +
+ ia->write.out.size;
}
} else {
u32 outsize = args->out_args[0].size;
+ nres = outsize;
if (ia->read.in.size != outsize)
pos = ia->read.in.offset - io->offset + outsize;
}
+ fuse_release_user_pages(&ia->ap, err ?: nres, io->should_dirty);
+
fuse_aio_complete(io, err, pos);
fuse_io_free(ia);
}
@@ -914,17 +942,13 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
}
for (i = 0; i < ap->num_pages; i++) {
- struct page *page = ap->pages[i];
+ struct folio *folio = page_folio(ap->pages[i]);
- if (!err)
- SetPageUptodate(page);
- else
- SetPageError(page);
- unlock_page(page);
- put_page(page);
+ folio_end_read(folio, !err);
+ folio_put(folio);
}
if (ia->ff)
- fuse_file_put(ia->ff, false, false);
+ fuse_file_put(ia->ff, false);
fuse_io_free(ia);
}
@@ -1298,13 +1322,94 @@ static ssize_t fuse_perform_write(struct kiocb *iocb, struct iov_iter *ii)
return res;
}
+static bool fuse_io_past_eof(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
+}
+
+/*
+ * @return true if an exclusive lock for direct IO writes is needed
+ */
+static bool fuse_dio_wr_exclusive_lock(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct fuse_file *ff = file->private_data;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ /* Server side has to advise that it supports parallel dio writes. */
+ if (!(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES))
+ return true;
+
+ /*
+ * Append will need to know the eventual EOF - always needs an
+ * exclusive lock.
+ */
+ if (iocb->ki_flags & IOCB_APPEND)
+ return true;
+
+ /* shared locks are not allowed with parallel page cache IO */
+ if (test_bit(FUSE_I_CACHE_IO_MODE, &fi->state))
+ return true;
+
+ /* Parallel dio beyond EOF is not supported, at least for now. */
+ if (fuse_io_past_eof(iocb, from))
+ return true;
+
+ return false;
+}
+
+static void fuse_dio_lock(struct kiocb *iocb, struct iov_iter *from,
+ bool *exclusive)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ *exclusive = fuse_dio_wr_exclusive_lock(iocb, from);
+ if (*exclusive) {
+ inode_lock(inode);
+ } else {
+ inode_lock_shared(inode);
+ /*
+ * New parallal dio allowed only if inode is not in caching
+ * mode and denies new opens in caching mode. This check
+ * should be performed only after taking shared inode lock.
+ * Previous past eof check was without inode lock and might
+ * have raced, so check it again.
+ */
+ if (fuse_io_past_eof(iocb, from) ||
+ fuse_inode_uncached_io_start(fi, NULL) != 0) {
+ inode_unlock_shared(inode);
+ inode_lock(inode);
+ *exclusive = true;
+ }
+ }
+}
+
+static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ if (exclusive) {
+ inode_unlock(inode);
+ } else {
+ /* Allow opens in caching mode after last parallel dio end */
+ fuse_inode_uncached_io_end(fi);
+ inode_unlock_shared(inode);
+ }
+}
+
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct mnt_idmap *idmap = file_mnt_idmap(file);
struct address_space *mapping = file->f_mapping;
ssize_t written = 0;
struct inode *inode = mapping->host;
- ssize_t err;
+ ssize_t err, count;
struct fuse_conn *fc = get_fuse_conn(inode);
if (fc->writeback_cache) {
@@ -1315,7 +1420,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
return err;
if (fc->handle_killpriv_v2 &&
- setattr_should_drop_suidgid(&nop_mnt_idmap,
+ setattr_should_drop_suidgid(idmap,
file_inode(file))) {
goto writethrough;
}
@@ -1326,10 +1431,12 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
writethrough:
inode_lock(inode);
- err = generic_write_checks(iocb, from);
+ err = count = generic_write_checks(iocb, from);
if (err <= 0)
goto out;
+ task_io_account_write(count);
+
err = file_remove_privs(file);
if (err)
goto out;
@@ -1368,33 +1475,49 @@ static inline size_t fuse_get_frag_size(const struct iov_iter *ii,
static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
size_t *nbytesp, int write,
- unsigned int max_pages)
+ unsigned int max_pages,
+ bool use_pages_for_kvec_io)
{
+ bool flush_or_invalidate = false;
size_t nbytes = 0; /* # bytes already packed in req */
ssize_t ret = 0;
- /* Special case for kernel I/O: can copy directly into the buffer */
+ /* Special case for kernel I/O: can copy directly into the buffer.
+ * However if the implementation of fuse_conn requires pages instead of
+ * pointer (e.g., virtio-fs), use iov_iter_extract_pages() instead.
+ */
if (iov_iter_is_kvec(ii)) {
- unsigned long user_addr = fuse_get_user_addr(ii);
- size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
+ void *user_addr = (void *)fuse_get_user_addr(ii);
- if (write)
- ap->args.in_args[1].value = (void *) user_addr;
- else
- ap->args.out_args[0].value = (void *) user_addr;
+ if (!use_pages_for_kvec_io) {
+ size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
- iov_iter_advance(ii, frag_size);
- *nbytesp = frag_size;
- return 0;
+ if (write)
+ ap->args.in_args[1].value = user_addr;
+ else
+ ap->args.out_args[0].value = user_addr;
+
+ iov_iter_advance(ii, frag_size);
+ *nbytesp = frag_size;
+ return 0;
+ }
+
+ if (is_vmalloc_addr(user_addr)) {
+ ap->args.vmap_base = user_addr;
+ flush_or_invalidate = true;
+ }
}
while (nbytes < *nbytesp && ap->num_pages < max_pages) {
unsigned npages;
size_t start;
- ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages],
- *nbytesp - nbytes,
- max_pages - ap->num_pages,
- &start);
+ struct page **pt_pages;
+
+ pt_pages = &ap->pages[ap->num_pages];
+ ret = iov_iter_extract_pages(ii, &pt_pages,
+ *nbytesp - nbytes,
+ max_pages - ap->num_pages,
+ 0, &start);
if (ret < 0)
break;
@@ -1411,6 +1534,11 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
}
+ if (write && flush_or_invalidate)
+ flush_kernel_vmap_range(ap->args.vmap_base, nbytes);
+
+ ap->args.invalidate_vmap = !write && flush_or_invalidate;
+ ap->args.is_pinned = iov_iter_extract_will_pin(ii);
ap->args.user_pages = true;
if (write)
ap->args.in_pages = true;
@@ -1478,7 +1606,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nbytes = min(count, nmax);
err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write,
- max_pages);
+ max_pages, fc->use_pages_for_kvec_io);
if (err && !nbytes)
break;
@@ -1492,7 +1620,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
}
if (!io->async || nres < 0) {
- fuse_release_user_pages(&ia->ap, io->should_dirty);
+ fuse_release_user_pages(&ia->ap, nres, io->should_dirty);
fuse_io_free(ia);
}
ia = NULL;
@@ -1557,51 +1685,17 @@ static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
return res;
}
-static bool fuse_direct_write_extending_i_size(struct kiocb *iocb,
- struct iov_iter *iter)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
-
- return iocb->ki_pos + iov_iter_count(iter) > i_size_read(inode);
-}
-
static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct inode *inode = file_inode(iocb->ki_filp);
- struct file *file = iocb->ki_filp;
- struct fuse_file *ff = file->private_data;
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb);
ssize_t res;
- bool exclusive_lock =
- !(ff->open_flags & FOPEN_PARALLEL_DIRECT_WRITES) ||
- get_fuse_conn(inode)->direct_io_allow_mmap ||
- iocb->ki_flags & IOCB_APPEND ||
- fuse_direct_write_extending_i_size(iocb, from);
-
- /*
- * Take exclusive lock if
- * - Parallel direct writes are disabled - a user space decision
- * - Parallel direct writes are enabled and i_size is being extended.
- * - Shared mmap on direct_io file is supported (FUSE_DIRECT_IO_ALLOW_MMAP).
- * This might not be needed at all, but needs further investigation.
- */
- if (exclusive_lock)
- inode_lock(inode);
- else {
- inode_lock_shared(inode);
-
- /* A race with truncate might have come up as the decision for
- * the lock type was done without holding the lock, check again.
- */
- if (fuse_direct_write_extending_i_size(iocb, from)) {
- inode_unlock_shared(inode);
- inode_lock(inode);
- exclusive_lock = true;
- }
- }
+ bool exclusive;
+ fuse_dio_lock(iocb, from, &exclusive);
res = generic_write_checks(iocb, from);
if (res > 0) {
+ task_io_account_write(res);
if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) {
res = fuse_direct_IO(iocb, from);
} else {
@@ -1610,10 +1704,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
fuse_write_update_attr(inode, iocb->ki_pos, res);
}
}
- if (exclusive_lock)
- inode_unlock(inode);
- else
- inode_unlock_shared(inode);
+ fuse_dio_unlock(iocb, exclusive);
return res;
}
@@ -1630,10 +1721,13 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (FUSE_IS_DAX(inode))
return fuse_dax_read_iter(iocb, to);
- if (!(ff->open_flags & FOPEN_DIRECT_IO))
- return fuse_cache_read_iter(iocb, to);
- else
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (ff->open_flags & FOPEN_DIRECT_IO)
return fuse_direct_read_iter(iocb, to);
+ else if (fuse_file_passthrough(ff))
+ return fuse_passthrough_read_iter(iocb, to);
+ else
+ return fuse_cache_read_iter(iocb, to);
}
static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
@@ -1648,10 +1742,38 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (FUSE_IS_DAX(inode))
return fuse_dax_write_iter(iocb, from);
- if (!(ff->open_flags & FOPEN_DIRECT_IO))
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (ff->open_flags & FOPEN_DIRECT_IO)
+ return fuse_direct_write_iter(iocb, from);
+ else if (fuse_file_passthrough(ff))
+ return fuse_passthrough_write_iter(iocb, from);
+ else
return fuse_cache_write_iter(iocb, from);
+}
+
+static ssize_t fuse_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct fuse_file *ff = in->private_data;
+
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_passthrough_splice_read(in, ppos, pipe, len, flags);
else
- return fuse_direct_write_iter(iocb, from);
+ return filemap_splice_read(in, ppos, pipe, len, flags);
+}
+
+static ssize_t fuse_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ loff_t *ppos, size_t len, unsigned int flags)
+{
+ struct fuse_file *ff = out->private_data;
+
+ /* FOPEN_DIRECT_IO overrides FOPEN_PASSTHROUGH */
+ if (fuse_file_passthrough(ff) && !(ff->open_flags & FOPEN_DIRECT_IO))
+ return fuse_passthrough_splice_write(pipe, out, ppos, len, flags);
+ else
+ return iter_file_splice_write(pipe, out, ppos, len, flags);
}
static void fuse_writepage_free(struct fuse_writepage_args *wpa)
@@ -1665,27 +1787,31 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
for (i = 0; i < ap->num_pages; i++)
__free_page(ap->pages[i]);
- if (wpa->ia.ff)
- fuse_file_put(wpa->ia.ff, false, false);
+ fuse_file_put(wpa->ia.ff, false);
kfree(ap->pages);
kfree(wpa);
}
-static void fuse_writepage_finish(struct fuse_mount *fm,
- struct fuse_writepage_args *wpa)
+static void fuse_writepage_finish_stat(struct inode *inode, struct page *page)
+{
+ struct backing_dev_info *bdi = inode_to_bdi(inode);
+
+ dec_wb_stat(&bdi->wb, WB_WRITEBACK);
+ dec_node_page_state(page, NR_WRITEBACK_TEMP);
+ wb_writeout_inc(&bdi->wb);
+}
+
+static void fuse_writepage_finish(struct fuse_writepage_args *wpa)
{
struct fuse_args_pages *ap = &wpa->ia.ap;
struct inode *inode = wpa->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
- struct backing_dev_info *bdi = inode_to_bdi(inode);
int i;
- for (i = 0; i < ap->num_pages; i++) {
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
- }
+ for (i = 0; i < ap->num_pages; i++)
+ fuse_writepage_finish_stat(inode, ap->pages[i]);
+
wake_up(&fi->page_waitq);
}
@@ -1732,19 +1858,14 @@ __acquires(fi->lock)
out_free:
fi->writectr--;
rb_erase(&wpa->writepages_entry, &fi->writepages);
- fuse_writepage_finish(fm, wpa);
+ fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
/* After rb_erase() aux request list is private */
for (aux = wpa->next; aux; aux = next) {
- struct backing_dev_info *bdi = inode_to_bdi(aux->inode);
-
next = aux->next;
aux->next = NULL;
-
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(aux->ia.ap.pages[0], NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
+ fuse_writepage_finish_stat(aux->inode, aux->ia.ap.pages[0]);
fuse_writepage_free(aux);
}
@@ -1839,7 +1960,6 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
wpa->next = next->next;
next->next = NULL;
- next->ia.ff = fuse_file_get(wpa->ia.ff);
tree_insert(&fi->writepages, next);
/*
@@ -1868,7 +1988,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_send_writepage(fm, next, inarg->offset + inarg->size);
}
fi->writectr--;
- fuse_writepage_finish(fm, wpa);
+ fuse_writepage_finish(wpa);
spin_unlock(&fi->lock);
fuse_writepage_free(wpa);
}
@@ -1914,7 +2034,7 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
ff = __fuse_write_file_get(fi);
err = fuse_flush_times(inode, ff);
if (ff)
- fuse_file_put(ff, false, false);
+ fuse_file_put(ff, false);
return err;
}
@@ -1952,49 +2072,77 @@ static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
rcu_read_unlock();
}
-static int fuse_writepage_locked(struct page *page)
+static void fuse_writepage_args_page_fill(struct fuse_writepage_args *wpa, struct folio *folio,
+ struct folio *tmp_folio, uint32_t page_index)
{
- struct address_space *mapping = page->mapping;
- struct inode *inode = mapping->host;
+ struct inode *inode = folio->mapping->host;
+ struct fuse_args_pages *ap = &wpa->ia.ap;
+
+ folio_copy(tmp_folio, folio);
+
+ ap->pages[page_index] = &tmp_folio->page;
+ ap->descs[page_index].offset = 0;
+ ap->descs[page_index].length = PAGE_SIZE;
+
+ inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
+ inc_node_page_state(&tmp_folio->page, NR_WRITEBACK_TEMP);
+}
+
+static struct fuse_writepage_args *fuse_writepage_args_setup(struct folio *folio,
+ struct fuse_file *ff)
+{
+ struct inode *inode = folio->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_writepage_args *wpa;
struct fuse_args_pages *ap;
- struct page *tmp_page;
- int error = -ENOMEM;
-
- set_page_writeback(page);
wpa = fuse_writepage_args_alloc();
if (!wpa)
- goto err;
+ return NULL;
+
+ fuse_writepage_add_to_bucket(fc, wpa);
+ fuse_write_args_fill(&wpa->ia, ff, folio_pos(folio), 0);
+ wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
+ wpa->inode = inode;
+ wpa->ia.ff = ff;
+
ap = &wpa->ia.ap;
+ ap->args.in_pages = true;
+ ap->args.end = fuse_writepage_end;
- tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (!tmp_page)
- goto err_free;
+ return wpa;
+}
+
+static int fuse_writepage_locked(struct folio *folio)
+{
+ struct address_space *mapping = folio->mapping;
+ struct inode *inode = mapping->host;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_writepage_args *wpa;
+ struct fuse_args_pages *ap;
+ struct folio *tmp_folio;
+ struct fuse_file *ff;
+ int error = -ENOMEM;
+
+ tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
+ if (!tmp_folio)
+ goto err;
error = -EIO;
- wpa->ia.ff = fuse_write_file_get(fi);
- if (!wpa->ia.ff)
+ ff = fuse_write_file_get(fi);
+ if (!ff)
goto err_nofile;
- fuse_writepage_add_to_bucket(fc, wpa);
- fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
+ wpa = fuse_writepage_args_setup(folio, ff);
+ error = -ENOMEM;
+ if (!wpa)
+ goto err_writepage_args;
- copy_highpage(tmp_page, page);
- wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
- wpa->next = NULL;
- ap->args.in_pages = true;
+ ap = &wpa->ia.ap;
ap->num_pages = 1;
- ap->pages[0] = tmp_page;
- ap->descs[0].offset = 0;
- ap->descs[0].length = PAGE_SIZE;
- ap->args.end = fuse_writepage_end;
- wpa->inode = inode;
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ folio_start_writeback(folio);
+ fuse_writepage_args_page_fill(wpa, folio, tmp_folio, 0);
spin_lock(&fi->lock);
tree_insert(&fi->writepages, wpa);
@@ -2002,48 +2150,19 @@ static int fuse_writepage_locked(struct page *page)
fuse_flush_writepages(inode);
spin_unlock(&fi->lock);
- end_page_writeback(page);
+ folio_end_writeback(folio);
return 0;
+err_writepage_args:
+ fuse_file_put(ff, false);
err_nofile:
- __free_page(tmp_page);
-err_free:
- kfree(wpa);
+ folio_put(tmp_folio);
err:
- mapping_set_error(page->mapping, error);
- end_page_writeback(page);
+ mapping_set_error(folio->mapping, error);
return error;
}
-static int fuse_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
- int err;
-
- if (fuse_page_is_writeback(page->mapping->host, page->index)) {
- /*
- * ->writepages() should be called for sync() and friends. We
- * should only get here on direct reclaim and then we are
- * allowed to skip a page which is already in flight
- */
- WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
-
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
-
- if (wbc->sync_mode == WB_SYNC_NONE &&
- fc->num_background >= fc->congestion_threshold)
- return AOP_WRITEPAGE_ACTIVATE;
-
- err = fuse_writepage_locked(page);
- unlock_page(page);
-
- return err;
-}
-
struct fuse_fill_wb_data {
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
@@ -2086,7 +2205,6 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)
int num_pages = wpa->ia.ap.num_pages;
int i;
- wpa->ia.ff = fuse_file_get(data->ff);
spin_lock(&fi->lock);
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
fuse_flush_writepages(inode);
@@ -2141,11 +2259,7 @@ static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa,
spin_unlock(&fi->lock);
if (tmp) {
- struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode);
-
- dec_wb_stat(&bdi->wb, WB_WRITEBACK);
- dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP);
- wb_writeout_inc(&bdi->wb);
+ fuse_writepage_finish_stat(new_wpa->inode, new_ap->pages[0]);
fuse_writepage_free(new_wpa);
}
@@ -2195,7 +2309,7 @@ static int fuse_writepages_fill(struct folio *folio,
struct inode *inode = data->inode;
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct page *tmp_page;
+ struct folio *tmp_folio;
int err;
if (!data->ff) {
@@ -2211,8 +2325,8 @@ static int fuse_writepages_fill(struct folio *folio,
}
err = -ENOMEM;
- tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
- if (!tmp_page)
+ tmp_folio = folio_alloc(GFP_NOFS | __GFP_HIGHMEM, 0);
+ if (!tmp_folio)
goto out_unlock;
/*
@@ -2230,35 +2344,20 @@ static int fuse_writepages_fill(struct folio *folio,
*/
if (data->wpa == NULL) {
err = -ENOMEM;
- wpa = fuse_writepage_args_alloc();
+ wpa = fuse_writepage_args_setup(folio, data->ff);
if (!wpa) {
- __free_page(tmp_page);
+ folio_put(tmp_folio);
goto out_unlock;
}
- fuse_writepage_add_to_bucket(fc, wpa);
-
+ fuse_file_get(wpa->ia.ff);
data->max_pages = 1;
-
ap = &wpa->ia.ap;
- fuse_write_args_fill(&wpa->ia, data->ff, folio_pos(folio), 0);
- wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE;
- wpa->next = NULL;
- ap->args.in_pages = true;
- ap->args.end = fuse_writepage_end;
- ap->num_pages = 0;
- wpa->inode = inode;
}
folio_start_writeback(folio);
- copy_highpage(tmp_page, &folio->page);
- ap->pages[ap->num_pages] = tmp_page;
- ap->descs[ap->num_pages].offset = 0;
- ap->descs[ap->num_pages].length = PAGE_SIZE;
+ fuse_writepage_args_page_fill(wpa, folio, tmp_folio, ap->num_pages);
data->orig_pages[ap->num_pages] = &folio->page;
- inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK);
- inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
-
err = 0;
if (data->wpa) {
/*
@@ -2312,7 +2411,7 @@ static int fuse_writepages(struct address_space *mapping,
fuse_writepages_send(&data);
}
if (data.ff)
- fuse_file_put(data.ff, false, false);
+ fuse_file_put(data.ff, false);
kfree(data.orig_pages);
out:
@@ -2324,76 +2423,77 @@ out:
* but how to implement it without killing performance need more thinking.
*/
static int fuse_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
pgoff_t index = pos >> PAGE_SHIFT;
struct fuse_conn *fc = get_fuse_conn(file_inode(file));
- struct page *page;
+ struct folio *folio;
loff_t fsize;
int err = -ENOMEM;
WARN_ON(!fc->writeback_cache);
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
+ folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
goto error;
- fuse_wait_on_page_writeback(mapping->host, page->index);
+ fuse_wait_on_page_writeback(mapping->host, folio->index);
- if (PageUptodate(page) || len == PAGE_SIZE)
+ if (folio_test_uptodate(folio) || len >= folio_size(folio))
goto success;
/*
- * Check if the start this page comes after the end of file, in which
- * case the readpage can be optimized away.
+ * Check if the start of this folio comes after the end of file,
+ * in which case the readpage can be optimized away.
*/
fsize = i_size_read(mapping->host);
- if (fsize <= (pos & PAGE_MASK)) {
- size_t off = pos & ~PAGE_MASK;
+ if (fsize <= folio_pos(folio)) {
+ size_t off = offset_in_folio(folio, pos);
if (off)
- zero_user_segment(page, 0, off);
+ folio_zero_segment(folio, 0, off);
goto success;
}
- err = fuse_do_readpage(file, page);
+ err = fuse_do_readpage(file, &folio->page);
if (err)
goto cleanup;
success:
- *pagep = page;
+ *foliop = folio;
return 0;
cleanup:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
error:
return err;
}
static int fuse_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
/* Haven't copied anything? Skip zeroing, size extending, dirtying. */
if (!copied)
goto unlock;
pos += copied;
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
/* Zero any unwritten bytes at the end of the page */
size_t endoff = pos & ~PAGE_MASK;
if (endoff)
- zero_user_segment(page, endoff, PAGE_SIZE);
- SetPageUptodate(page);
+ folio_zero_segment(folio, endoff, PAGE_SIZE);
+ folio_mark_uptodate(folio);
}
if (pos > inode->i_size)
i_size_write(inode, pos);
- set_page_dirty(page);
+ folio_mark_dirty(folio);
unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
return copied;
}
@@ -2406,7 +2506,7 @@ static int fuse_launder_folio(struct folio *folio)
/* Serialize with pending writeback for the same page */
fuse_wait_on_page_writeback(inode, folio->index);
- err = fuse_writepage_locked(&folio->page);
+ err = fuse_writepage_locked(folio);
if (!err)
fuse_wait_on_page_writeback(inode, folio->index);
}
@@ -2467,11 +2567,27 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fm->fc;
+ struct inode *inode = file_inode(file);
+ int rc;
/* DAX mmap is superior to direct_io mmap */
- if (FUSE_IS_DAX(file_inode(file)))
+ if (FUSE_IS_DAX(inode))
return fuse_dax_mmap(file, vma);
+ /*
+ * If inode is in passthrough io mode, because it has some file open
+ * in passthrough mode, either mmap to backing file or fail mmap,
+ * because mixing cached mmap and passthrough io mode is not allowed.
+ */
+ if (fuse_file_passthrough(ff))
+ return fuse_passthrough_mmap(file, vma);
+ else if (fuse_inode_backing(get_fuse_inode(inode)))
+ return -ENODEV;
+
+ /*
+ * FOPEN_DIRECT_IO handling is special compared to O_DIRECT,
+ * as does not allow MAP_SHARED mmap without FUSE_DIRECT_IO_ALLOW_MMAP.
+ */
if (ff->open_flags & FOPEN_DIRECT_IO) {
/*
* Can't provide the coherency needed for MAP_SHARED
@@ -2486,6 +2602,17 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
/* MAP_PRIVATE */
return generic_file_mmap(file, vma);
}
+
+ /*
+ * First mmap of direct_io file enters caching inode io mode.
+ * Also waits for parallel dio writers to go into serial mode
+ * (exclusive instead of shared lock).
+ * After first mmap, the inode stays in caching io mode until
+ * the direct_io file release.
+ */
+ rc = fuse_file_cached_io_open(inode, ff);
+ if (rc)
+ return rc;
}
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
@@ -2518,14 +2645,14 @@ static int convert_fuse_file_lock(struct fuse_conn *fc,
* translate it into the caller's pid namespace.
*/
rcu_read_lock();
- fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
+ fl->c.flc_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns);
rcu_read_unlock();
break;
default:
return -EIO;
}
- fl->fl_type = ffl->type;
+ fl->c.flc_type = ffl->type;
return 0;
}
@@ -2539,10 +2666,10 @@ static void fuse_lk_fill(struct fuse_args *args, struct file *file,
memset(inarg, 0, sizeof(*inarg));
inarg->fh = ff->fh;
- inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+ inarg->owner = fuse_lock_owner_id(fc, fl->c.flc_owner);
inarg->lk.start = fl->fl_start;
inarg->lk.end = fl->fl_end;
- inarg->lk.type = fl->fl_type;
+ inarg->lk.type = fl->c.flc_type;
inarg->lk.pid = pid;
if (flock)
inarg->lk_flags |= FUSE_LK_FLOCK;
@@ -2579,8 +2706,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
struct fuse_mount *fm = get_fuse_mount(inode);
FUSE_ARGS(args);
struct fuse_lk_in inarg;
- int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
- struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL;
+ int opcode = (fl->c.flc_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
+ struct pid *pid = fl->c.flc_type != F_UNLCK ? task_tgid(current) : NULL;
pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns);
int err;
@@ -2589,10 +2716,6 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
return -ENOLCK;
}
- /* Unlock on close is handled by the flush method */
- if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
- return 0;
-
fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg);
err = fuse_simple_request(fm, &args);
@@ -2880,7 +3003,7 @@ static void fuse_do_truncate(struct file *file)
attr.ia_file = file;
attr.ia_valid |= ATTR_FILE;
- fuse_do_setattr(file_dentry(file), &attr, file);
+ fuse_do_setattr(file_mnt_idmap(file), file_dentry(file), &attr, file);
}
static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off)
@@ -3023,7 +3146,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
inode_lock(inode);
if (block_faults) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out;
}
@@ -3205,8 +3328,8 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
len, flags);
if (ret == -EOPNOTSUPP || ret == -EXDEV)
- ret = generic_copy_file_range(src_file, src_off, dst_file,
- dst_off, len, flags);
+ ret = splice_copy_file_range(src_file, src_off, dst_file,
+ dst_off, len);
return ret;
}
@@ -3222,8 +3345,8 @@ static const struct file_operations fuse_file_operations = {
.lock = fuse_file_lock,
.get_unmapped_area = thp_get_unmapped_area,
.flock = fuse_file_flock,
- .splice_read = filemap_splice_read,
- .splice_write = iter_file_splice_write,
+ .splice_read = fuse_splice_read,
+ .splice_write = fuse_splice_write,
.unlocked_ioctl = fuse_file_ioctl,
.compat_ioctl = fuse_file_compat_ioctl,
.poll = fuse_file_poll,
@@ -3234,10 +3357,10 @@ static const struct file_operations fuse_file_operations = {
static const struct address_space_operations fuse_file_aops = {
.read_folio = fuse_read_folio,
.readahead = fuse_readahead,
- .writepage = fuse_writepage,
.writepages = fuse_writepages,
.launder_folio = fuse_launder_folio,
.dirty_folio = filemap_dirty_folio,
+ .migrate_folio = filemap_migrate_folio,
.bmap = fuse_bmap,
.direct_IO = fuse_direct_IO,
.write_begin = fuse_write_begin,
@@ -3254,7 +3377,9 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
fi->writectr = 0;
+ fi->iocachectr = 0;
init_waitqueue_head(&fi->page_waitq);
+ init_waitqueue_head(&fi->direct_io_waitq);
fi->writepages = RB_ROOT;
if (IS_ENABLED(CONFIG_FUSE_DAX))