summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-06-19 16:41:08 +0300
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-06-19 16:41:08 +0300
commit3394ade23d7e3934a166e973e664046ea6f559d4 (patch)
treef3078f5f2829da66e1e29e9d83917814af2f2356 /fs
parentb0d369dbad20a8c9702607e7294d63c4a0c8e0fe (diff)
parenta2b47f77e740a21dbdcb12e2f2ca3c840299545a (diff)
downloadlinux-3394ade23d7e3934a166e973e664046ea6f559d4.tar.xz
Merge v6.15.3
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c6
-rw-r--r--fs/afs/write.c9
-rw-r--r--fs/btrfs/extent-io-tree.c6
-rw-r--r--fs/btrfs/file.c2
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/scrub.c34
-rw-r--r--fs/btrfs/tree-log.c24
-rw-r--r--fs/cachefiles/io.c16
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/dax.c2
-rw-r--r--fs/erofs/fscache.c6
-rw-r--r--fs/erofs/super.c49
-rw-r--r--fs/f2fs/data.c6
-rw-r--r--fs/f2fs/f2fs.h46
-rw-r--r--fs/f2fs/gc.c3
-rw-r--r--fs/f2fs/namei.c10
-rw-r--r--fs/f2fs/segment.c12
-rw-r--r--fs/f2fs/segment.h45
-rw-r--r--fs/f2fs/super.c45
-rw-r--r--fs/filesystems.c14
-rw-r--r--fs/gfs2/aops.c54
-rw-r--r--fs/gfs2/aops.h3
-rw-r--r--fs/gfs2/bmap.c3
-rw-r--r--fs/gfs2/glock.c3
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/incore.h9
-rw-r--r--fs/gfs2/inode.c98
-rw-r--r--fs/gfs2/inode.h1
-rw-r--r--fs/gfs2/log.c7
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/meta_io.h4
-rw-r--r--fs/gfs2/ops_fstype.c35
-rw-r--r--fs/gfs2/super.c90
-rw-r--r--fs/gfs2/sys.c1
-rw-r--r--fs/gfs2/trans.c21
-rw-r--r--fs/gfs2/trans.h2
-rw-r--r--fs/gfs2/xattr.c11
-rw-r--r--fs/gfs2/xattr.h2
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/kernfs/dir.c5
-rw-r--r--fs/kernfs/file.c3
-rw-r--r--fs/mount.h5
-rw-r--r--fs/namespace.c118
-rw-r--r--fs/netfs/buffered_read.c32
-rw-r--r--fs/netfs/buffered_write.c2
-rw-r--r--fs/netfs/direct_read.c13
-rw-r--r--fs/netfs/direct_write.c12
-rw-r--r--fs/netfs/fscache_io.c10
-rw-r--r--fs/netfs/internal.h42
-rw-r--r--fs/netfs/main.c1
-rw-r--r--fs/netfs/misc.c219
-rw-r--r--fs/netfs/objects.c48
-rw-r--r--fs/netfs/read_collect.c185
-rw-r--r--fs/netfs/read_pgpriv2.c4
-rw-r--r--fs/netfs/read_retry.c26
-rw-r--r--fs/netfs/read_single.c6
-rw-r--r--fs/netfs/write_collect.c81
-rw-r--r--fs/netfs/write_issue.c38
-rw-r--r--fs/netfs/write_retry.c19
-rw-r--r--fs/nfs/fscache.c1
-rw-r--r--fs/nfs/localio.c45
-rw-r--r--fs/nfs/nfs4proc.c32
-rw-r--r--fs/nfs/super.c19
-rw-r--r--fs/nfs_common/nfslocalio.c99
-rw-r--r--fs/nfsd/filecache.c32
-rw-r--r--fs/nfsd/filecache.h3
-rw-r--r--fs/nfsd/localio.c70
-rw-r--r--fs/nilfs2/btree.c4
-rw-r--r--fs/nilfs2/direct.c3
-rw-r--r--fs/ntfs3/index.c8
-rw-r--r--fs/ntfs3/inode.c5
-rw-r--r--fs/ocfs2/quota_local.c2
-rw-r--r--fs/pidfs.c2
-rw-r--r--fs/pnode.c4
-rw-r--r--fs/smb/client/cifsproto.h3
-rw-r--r--fs/smb/client/cifssmb.c24
-rw-r--r--fs/smb/client/file.c19
-rw-r--r--fs/smb/client/smb2pdu.c4
-rw-r--r--fs/squashfs/super.c5
-rw-r--r--fs/xfs/xfs_aops.c22
-rw-r--r--fs/xfs/xfs_discard.c17
81 files changed, 1229 insertions, 763 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 32619d146cbc..862164181bac 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -59,7 +59,7 @@ static void v9fs_issue_write(struct netfs_io_subrequest *subreq)
len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
if (len > 0)
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- netfs_write_subrequest_terminated(subreq, len ?: err, false);
+ netfs_write_subrequest_terminated(subreq, len ?: err);
}
/**
@@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
/* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (pos + total >= i_size_read(rreq->inode))
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
@@ -164,4 +165,5 @@ const struct address_space_operations v9fs_addr_operations = {
.invalidate_folio = netfs_invalidate_folio,
.direct_IO = noop_direct_IO,
.writepages = netfs_writepages,
+ .migrate_folio = filemap_migrate_folio,
};
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 18b0a9f1615e..2e7526ea883a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -120,17 +120,17 @@ static void afs_issue_write_worker(struct work_struct *work)
#if 0 // Error injection
if (subreq->debug_index == 3)
- return netfs_write_subrequest_terminated(subreq, -ENOANO, false);
+ return netfs_write_subrequest_terminated(subreq, -ENOANO);
if (!subreq->retry_count) {
set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
}
#endif
op = afs_alloc_operation(wreq->netfs_priv, vnode->volume);
if (IS_ERR(op))
- return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN);
afs_op_set_vnode(op, 0, vnode);
op->file[0].dv_delta = 1;
@@ -166,7 +166,7 @@ static void afs_issue_write_worker(struct work_struct *work)
break;
}
- netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false);
+ netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len);
}
void afs_issue_write(struct netfs_io_subrequest *subreq)
@@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
case NETFS_READ_GAPS:
case NETFS_READ_SINGLE:
case NETFS_READ_FOR_WRITE:
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
return;
default:
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index 13de6af279e5..b5b44ea91f99 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -1252,8 +1252,11 @@ hit_next:
if (!prealloc)
goto search_again;
ret = split_state(tree, state, prealloc, end + 1);
- if (ret)
+ if (ret) {
extent_io_tree_panic(tree, state, "split", ret);
+ prealloc = NULL;
+ goto out;
+ }
set_state_bits(tree, prealloc, bits, changeset);
cache_state(prealloc, cached_state);
@@ -1456,6 +1459,7 @@ hit_next:
if (IS_ERR(inserted_state)) {
ret = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, prealloc, "insert", ret);
+ goto out;
}
cache_state(inserted_state, cached_state);
if (inserted_state == prealloc)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 71b8a825c447..22455fbcb29e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1862,7 +1862,7 @@ again:
if (reserved_space < fsize) {
end = page_start + reserved_space - 1;
btrfs_delalloc_release_space(BTRFS_I(inode),
- data_reserved, page_start,
+ data_reserved, end + 1,
fsize - reserved_space, true);
}
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 90f5da3c520a..8a3f44302788 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4849,8 +4849,11 @@ again:
folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mask);
if (IS_ERR(folio)) {
- btrfs_delalloc_release_space(inode, data_reserved, block_start,
- blocksize, true);
+ if (only_release_metadata)
+ btrfs_delalloc_release_metadata(inode, blocksize, true);
+ else
+ btrfs_delalloc_release_space(inode, data_reserved,
+ block_start, blocksize, true);
btrfs_delalloc_release_extents(inode, blocksize);
ret = -ENOMEM;
goto out;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index c3b2e29e3e01..4c525a040812 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -153,12 +153,14 @@ struct scrub_stripe {
unsigned int init_nr_io_errors;
unsigned int init_nr_csum_errors;
unsigned int init_nr_meta_errors;
+ unsigned int init_nr_meta_gen_errors;
/*
* The following error bitmaps are all for the current status.
* Every time we submit a new read, these bitmaps may be updated.
*
- * error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap;
+ * error_bitmap = io_error_bitmap | csum_error_bitmap |
+ * meta_error_bitmap | meta_generation_bitmap;
*
* IO and csum errors can happen for both metadata and data.
*/
@@ -166,6 +168,7 @@ struct scrub_stripe {
unsigned long io_error_bitmap;
unsigned long csum_error_bitmap;
unsigned long meta_error_bitmap;
+ unsigned long meta_gen_error_bitmap;
/* For writeback (repair or replace) error reporting. */
unsigned long write_error_bitmap;
@@ -616,7 +619,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
memcpy(on_disk_csum, header->csum, fs_info->csum_size);
if (logical != btrfs_stack_header_bytenr(header)) {
- bitmap_set(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad bytenr, has %llu want %llu",
@@ -672,7 +675,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
}
if (stripe->sectors[sector_nr].generation !=
btrfs_stack_header_generation(header)) {
- bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_set(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
"tree block %llu mirror %u has bad generation, has %llu want %llu",
@@ -684,6 +687,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
+ bitmap_clear(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
}
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
@@ -972,8 +976,22 @@ skip:
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("header error", dev, false,
stripe->logical, physical);
+ if (test_bit(sector_nr, &stripe->meta_gen_error_bitmap))
+ if (__ratelimit(&rs) && dev)
+ scrub_print_common_warning("generation error", dev, false,
+ stripe->logical, physical);
}
+ /* Update the device stats. */
+ for (int i = 0; i < stripe->init_nr_io_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS);
+ for (int i = 0; i < stripe->init_nr_csum_errors; i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ /* Generation mismatch error is based on each metadata, not each block. */
+ for (int i = 0; i < stripe->init_nr_meta_gen_errors;
+ i += (fs_info->nodesize >> fs_info->sectorsize_bits))
+ btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS);
+
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
@@ -982,7 +1000,8 @@ skip:
sctx->stat.no_csum += nr_nodatacsum_sectors;
sctx->stat.read_errors += stripe->init_nr_io_errors;
sctx->stat.csum_errors += stripe->init_nr_csum_errors;
- sctx->stat.verify_errors += stripe->init_nr_meta_errors;
+ sctx->stat.verify_errors += stripe->init_nr_meta_errors +
+ stripe->init_nr_meta_gen_errors;
sctx->stat.uncorrectable_errors +=
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
@@ -1028,6 +1047,8 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
stripe->nr_sectors);
stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
stripe->nr_sectors);
+ stripe->init_nr_meta_gen_errors = bitmap_weight(&stripe->meta_gen_error_bitmap,
+ stripe->nr_sectors);
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
goto out;
@@ -1142,6 +1163,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
bitmap_set(&stripe->write_error_bitmap, sector_nr,
bio_size >> fs_info->sectorsize_bits);
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
+ for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
+ btrfs_dev_stat_inc_and_print(stripe->dev,
+ BTRFS_DEV_STAT_WRITE_ERRS);
}
bio_put(&bbio->bio);
@@ -1508,10 +1532,12 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
stripe->init_nr_io_errors = 0;
stripe->init_nr_csum_errors = 0;
stripe->init_nr_meta_errors = 0;
+ stripe->init_nr_meta_gen_errors = 0;
stripe->error_bitmap = 0;
stripe->io_error_bitmap = 0;
stripe->csum_error_bitmap = 0;
stripe->meta_error_bitmap = 0;
+ stripe->meta_gen_error_bitmap = 0;
}
/*
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 90dc094cfa5e..f5af11565b87 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6583,6 +6583,19 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
btrfs_log_get_delayed_items(inode, &delayed_ins_list,
&delayed_del_list);
+ /*
+ * If we are fsyncing a file with 0 hard links, then commit the delayed
+ * inode because the last inode ref (or extref) item may still be in the
+ * subvolume tree and if we log it the file will still exist after a log
+ * replay. So commit the delayed inode to delete that last ref and we
+ * skip logging it.
+ */
+ if (inode->vfs_inode.i_nlink == 0) {
+ ret = btrfs_commit_inode_delayed_inode(inode);
+ if (ret)
+ goto out_unlock;
+ }
+
ret = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
path, dst_path, logged_isize,
inode_only, ctx,
@@ -7051,14 +7064,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (btrfs_root_generation(&root->root_item) == trans->transid)
return BTRFS_LOG_FORCE_COMMIT;
- /*
- * Skip already logged inodes or inodes corresponding to tmpfiles
- * (since logging them is pointless, a link count of 0 means they
- * will never be accessible).
- */
- if ((btrfs_inode_in_log(inode, trans->transid) &&
- list_empty(&ctx->ordered_extents)) ||
- inode->vfs_inode.i_nlink == 0)
+ /* Skip already logged inodes and without new extents. */
+ if (btrfs_inode_in_log(inode, trans->transid) &&
+ list_empty(&ctx->ordered_extents))
return BTRFS_NO_LOG_SYNC;
ret = start_log_trans(trans, root, ctx);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 92058ae43488..c08e4a66ac07 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -63,7 +63,7 @@ static void cachefiles_read_complete(struct kiocb *iocb, long ret)
ret = -ESTALE;
}
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ ki->term_func(ki->term_func_priv, ret);
}
cachefiles_put_kiocb(ki);
@@ -188,7 +188,7 @@ in_progress:
presubmission_error:
if (term_func)
- term_func(term_func_priv, ret < 0 ? ret : skipped, false);
+ term_func(term_func_priv, ret < 0 ? ret : skipped);
return ret;
}
@@ -271,7 +271,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
if (ki->term_func)
- ki->term_func(ki->term_func_priv, ret, ki->was_async);
+ ki->term_func(ki->term_func_priv, ret);
cachefiles_put_kiocb(ki);
}
@@ -301,7 +301,7 @@ int __cachefiles_write(struct cachefiles_object *object,
ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
if (!ki) {
if (term_func)
- term_func(term_func_priv, -ENOMEM, false);
+ term_func(term_func_priv, -ENOMEM);
return -ENOMEM;
}
@@ -366,7 +366,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
{
if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE)) {
if (term_func)
- term_func(term_func_priv, -ENOBUFS, false);
+ term_func(term_func_priv, -ENOBUFS);
trace_netfs_sreq(term_func_priv, netfs_sreq_trace_cache_nowrite);
return -ENOBUFS;
}
@@ -665,7 +665,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
pre = CACHEFILES_DIO_BLOCK_SIZE - off;
if (pre >= len) {
fscache_count_dio_misfit();
- netfs_write_subrequest_terminated(subreq, len, false);
+ netfs_write_subrequest_terminated(subreq, len);
return;
}
subreq->transferred += pre;
@@ -691,7 +691,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
len -= post;
if (len == 0) {
fscache_count_dio_misfit();
- netfs_write_subrequest_terminated(subreq, post, false);
+ netfs_write_subrequest_terminated(subreq, post);
return;
}
iov_iter_truncate(&subreq->io_iter, len);
@@ -703,7 +703,7 @@ static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
&start, &len, len, true);
cachefiles_end_secure(cache, saved_cred);
if (ret < 0) {
- netfs_write_subrequest_terminated(subreq, ret, false);
+ netfs_write_subrequest_terminated(subreq, ret);
return;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 29be367905a1..b95c4cb21c13 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (sparse && err > 0)
err = ceph_sparse_ext_map_end(op);
if (err < subreq->len &&
+ subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (IS_ENCRYPTED(inode) && err > 0) {
@@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
size_t len;
int mode;
- if (rreq->origin != NETFS_DIO_READ)
+ if (rreq->origin != NETFS_UNBUFFERED_READ &&
+ rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
@@ -539,7 +541,7 @@ static void ceph_set_page_fscache(struct page *page)
folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
}
-static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
+static void ceph_fscache_write_terminated(void *priv, ssize_t error)
{
struct inode *inode = priv;
diff --git a/fs/dax.c b/fs/dax.c
index 676303419e9e..f8d8b1afd232 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry)
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
- xas_pause(xas);
+ xas_reset(xas);
xas_unlock_irq(xas);
schedule();
finish_wait(wq, &ewait.wait);
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index 9c9129bca346..34517ca9df91 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -102,8 +102,7 @@ static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
erofs_fscache_req_put(req);
}
-static void erofs_fscache_req_end_io(void *priv,
- ssize_t transferred_or_error, bool was_async)
+static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error)
{
struct erofs_fscache_io *io = priv;
struct erofs_fscache_rq *req = io->private;
@@ -180,8 +179,7 @@ struct erofs_fscache_bio {
struct bio_vec bvecs[BIO_MAX_VECS];
};
-static void erofs_fscache_bio_endio(void *priv,
- ssize_t transferred_or_error, bool was_async)
+static void erofs_fscache_bio_endio(void *priv, ssize_t transferred_or_error)
{
struct erofs_fscache_bio *io = priv;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index da6ee7c39290..6e57b9cc6ed2 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -165,8 +165,11 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
filp_open(dif->path, O_RDONLY | O_LARGEFILE, 0) :
bdev_file_open_by_path(dif->path,
BLK_OPEN_READ, sb->s_type, NULL);
- if (IS_ERR(file))
+ if (IS_ERR(file)) {
+ if (file == ERR_PTR(-ENOTBLK))
+ return -EINVAL;
return PTR_ERR(file);
+ }
if (!erofs_is_fileio_mode(sbi)) {
dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file),
@@ -510,24 +513,52 @@ static int erofs_fc_parse_param(struct fs_context *fc,
return 0;
}
-static struct inode *erofs_nfs_get_inode(struct super_block *sb,
- u64 ino, u32 generation)
+static int erofs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+ struct inode *parent)
{
- return erofs_iget(sb, ino);
+ erofs_nid_t nid = EROFS_I(inode)->nid;
+ int len = parent ? 6 : 3;
+
+ if (*max_len < len) {
+ *max_len = len;
+ return FILEID_INVALID;
+ }
+
+ fh[0] = (u32)(nid >> 32);
+ fh[1] = (u32)(nid & 0xffffffff);
+ fh[2] = inode->i_generation;
+
+ if (parent) {
+ nid = EROFS_I(parent)->nid;
+
+ fh[3] = (u32)(nid >> 32);
+ fh[4] = (u32)(nid & 0xffffffff);
+ fh[5] = parent->i_generation;
+ }
+
+ *max_len = len;
+ return parent ? FILEID_INO64_GEN_PARENT : FILEID_INO64_GEN;
}
static struct dentry *erofs_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
- return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
- erofs_nfs_get_inode);
+ if ((fh_type != FILEID_INO64_GEN &&
+ fh_type != FILEID_INO64_GEN_PARENT) || fh_len < 3)
+ return NULL;
+
+ return d_obtain_alias(erofs_iget(sb,
+ ((u64)fid->raw[0] << 32) | fid->raw[1]));
}
static struct dentry *erofs_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type)
{
- return generic_fh_to_parent(sb, fid, fh_len, fh_type,
- erofs_nfs_get_inode);
+ if (fh_type != FILEID_INO64_GEN_PARENT || fh_len < 6)
+ return NULL;
+
+ return d_obtain_alias(erofs_iget(sb,
+ ((u64)fid->raw[3] << 32) | fid->raw[4]));
}
static struct dentry *erofs_get_parent(struct dentry *child)
@@ -543,7 +574,7 @@ static struct dentry *erofs_get_parent(struct dentry *child)
}
static const struct export_operations erofs_export_ops = {
- .encode_fh = generic_encode_ino32_fh,
+ .encode_fh = erofs_encode_fh,
.fh_to_dentry = erofs_fh_to_dentry,
.fh_to_parent = erofs_fh_to_parent,
.get_parent = erofs_get_parent,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 54f89f0ee69b..b0b8748ae287 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -53,8 +53,8 @@ bool f2fs_is_cp_guaranteed(struct page *page)
struct inode *inode;
struct f2fs_sb_info *sbi;
- if (!mapping)
- return false;
+ if (fscrypt_is_bounce_page(page))
+ return page_private_gcing(fscrypt_pagecache_page(page));
inode = mapping->host;
sbi = F2FS_I_SB(inode);
@@ -3966,7 +3966,7 @@ retry:
if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
nr_pblocks % blks_per_sec ||
- !f2fs_valid_pinned_area(sbi, pblock)) {
+ f2fs_is_sequential_zone_area(sbi, pblock)) {
bool last_extent = false;
not_aligned++;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f1576dc6ec67..4f34a7d9760a 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1780,7 +1780,7 @@ struct f2fs_sb_info {
unsigned int dirty_device; /* for checkpoint data flush */
spinlock_t dev_lock; /* protect dirty_device */
bool aligned_blksize; /* all devices has the same logical blksize */
- unsigned int first_zoned_segno; /* first zoned segno */
+ unsigned int first_seq_zone_segno; /* first segno in sequential zone */
/* For write statistics */
u64 sectors_written_start;
@@ -2518,8 +2518,14 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK;
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- sbi->total_valid_block_count -= (block_t)count;
+ if (unlikely(sbi->total_valid_block_count < count)) {
+ f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u",
+ sbi->total_valid_block_count, inode->i_ino, count);
+ sbi->total_valid_block_count = 0;
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ } else {
+ sbi->total_valid_block_count -= count;
+ }
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
@@ -4622,12 +4628,16 @@ F2FS_FEATURE_FUNCS(readonly, RO);
F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS);
#ifdef CONFIG_BLK_DEV_ZONED
-static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
- block_t blkaddr)
+static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi,
+ unsigned int zone)
{
- unsigned int zno = blkaddr / sbi->blocks_per_blkz;
+ return test_bit(zone, FDEV(devi).blkz_seq);
+}
- return test_bit(zno, FDEV(devi).blkz_seq);
+static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
+ block_t blkaddr)
+{
+ return f2fs_zone_is_seq(sbi, devi, blkaddr / sbi->blocks_per_blkz);
}
#endif
@@ -4699,15 +4709,31 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
}
-static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
+static inline bool f2fs_is_sequential_zone_area(struct f2fs_sb_info *sbi,
block_t blkaddr)
{
if (f2fs_sb_has_blkzoned(sbi)) {
+#ifdef CONFIG_BLK_DEV_ZONED
int devi = f2fs_target_device_index(sbi, blkaddr);
- return !bdev_is_zoned(FDEV(devi).bdev);
+ if (!bdev_is_zoned(FDEV(devi).bdev))
+ return false;
+
+ if (f2fs_is_multi_device(sbi)) {
+ if (blkaddr < FDEV(devi).start_blk ||
+ blkaddr > FDEV(devi).end_blk) {
+ f2fs_err(sbi, "Invalid block %x", blkaddr);
+ return false;
+ }
+ blkaddr -= FDEV(devi).start_blk;
+ }
+
+ return f2fs_blkz_is_seq(sbi, devi, blkaddr);
+#else
+ return false;
+#endif
}
- return true;
+ return false;
}
static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 2b8f9239bede..8b5a55b72264 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -2066,6 +2066,9 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
+ if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno)))
+ continue;
+
do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false);
put_gc_inode(&gc_list);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 8f8b9b843bdf..28137d499f8f 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -414,7 +414,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (is_inode_flag_set(dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)))
+ F2FS_I(inode)->i_projid)))
return -EXDEV;
err = f2fs_dquot_initialize(dir);
@@ -914,7 +914,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)))
+ F2FS_I(old_inode)->i_projid)))
return -EXDEV;
/*
@@ -1107,10 +1107,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)) ||
- (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
+ F2FS_I(old_inode)->i_projid)) ||
+ (is_inode_flag_set(old_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(old_dir)->i_projid,
- F2FS_I(new_dentry->d_inode)->i_projid)))
+ F2FS_I(new_inode)->i_projid)))
return -EXDEV;
err = f2fs_dquot_initialize(old_dir);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 396ef71f41e3..41ca73622c8d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -424,7 +424,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
if (need && excess_cached_nats(sbi))
f2fs_balance_fs_bg(sbi, false);
- if (!f2fs_is_checkpoint_ready(sbi))
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return;
/*
@@ -2777,7 +2777,7 @@ static int get_new_segment(struct f2fs_sb_info *sbi,
if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning)
segno = 0;
else
- segno = max(sbi->first_zoned_segno, *newseg);
+ segno = max(sbi->first_seq_zone_segno, *newseg);
hint = GET_SEC_FROM_SEG(sbi, segno);
}
#endif
@@ -2789,7 +2789,7 @@ find_other_zone:
if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) {
/* Write only to sequential zones */
if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) {
- hint = GET_SEC_FROM_SEG(sbi, sbi->first_zoned_segno);
+ hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno);
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
} else
secno = find_first_zero_bit(free_i->free_secmap,
@@ -2838,9 +2838,9 @@ got_it:
/* set it as dirty segment in free segmap */
f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
- /* no free section in conventional zone */
+ /* no free section in conventional device or conventional zone */
if (new_sec && pinning &&
- !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
+ f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) {
ret = -EAGAIN;
goto out_unlock;
}
@@ -3311,7 +3311,7 @@ retry:
if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
f2fs_down_write(&sbi->gc_lock);
- err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk),
+ err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1,
true, ZONED_PIN_SEC_REQUIRED_COUNT);
f2fs_up_write(&sbi->gc_lock);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 0465dc00b349..503f6df690bf 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -429,7 +429,6 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
- unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi);
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
@@ -437,7 +436,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
next = find_next_bit(free_i->free_segmap,
start_segno + SEGS_PER_SEC(sbi), start_segno);
- if (next >= start_segno + usable_segs) {
+ if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
@@ -463,22 +462,36 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
- unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi);
+ bool ret;
spin_lock(&free_i->segmap_lock);
- if (test_and_clear_bit(segno, free_i->free_segmap)) {
- free_i->free_segments++;
-
- if (!inmem && IS_CURSEC(sbi, secno))
- goto skip_free;
- next = find_next_bit(free_i->free_segmap,
- start_segno + SEGS_PER_SEC(sbi), start_segno);
- if (next >= start_segno + usable_segs) {
- if (test_and_clear_bit(secno, free_i->free_secmap))
- free_i->free_sections++;
- }
- }
-skip_free:
+ ret = test_and_clear_bit(segno, free_i->free_segmap);
+ if (!ret)
+ goto unlock_out;
+
+ free_i->free_segments++;
+
+ if (!inmem && IS_CURSEC(sbi, secno))
+ goto unlock_out;
+
+ /* check large section */
+ next = find_next_bit(free_i->free_segmap,
+ start_segno + SEGS_PER_SEC(sbi), start_segno);
+ if (next < start_segno + f2fs_usable_segs_in_sec(sbi))
+ goto unlock_out;
+
+ ret = test_and_clear_bit(secno, free_i->free_secmap);
+ if (!ret)
+ goto unlock_out;
+
+ free_i->free_sections++;
+
+ if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno)
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno)
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
+
+unlock_out:
spin_unlock(&free_i->segmap_lock);
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f087b2b71c89..386326f7a440 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1882,9 +1882,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid = u64_to_fsid(id);
#ifdef CONFIG_QUOTA
- if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) &&
+ if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) &&
sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
- f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf);
+ f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf);
}
#endif
return 0;
@@ -4304,14 +4304,35 @@ static void f2fs_record_error_work(struct work_struct *work)
f2fs_record_stop_reason(sbi);
}
-static inline unsigned int get_first_zoned_segno(struct f2fs_sb_info *sbi)
+static inline unsigned int get_first_seq_zone_segno(struct f2fs_sb_info *sbi)
{
+#ifdef CONFIG_BLK_DEV_ZONED
+ unsigned int zoneno, total_zones;
int devi;
- for (devi = 0; devi < sbi->s_ndevs; devi++)
- if (bdev_is_zoned(FDEV(devi).bdev))
- return GET_SEGNO(sbi, FDEV(devi).start_blk);
- return 0;
+ if (!f2fs_sb_has_blkzoned(sbi))
+ return NULL_SEGNO;
+
+ for (devi = 0; devi < sbi->s_ndevs; devi++) {
+ if (!bdev_is_zoned(FDEV(devi).bdev))
+ continue;
+
+ total_zones = GET_ZONE_FROM_SEG(sbi, FDEV(devi).total_segments);
+
+ for (zoneno = 0; zoneno < total_zones; zoneno++) {
+ unsigned int segs, blks;
+
+ if (!f2fs_zone_is_seq(sbi, devi, zoneno))
+ continue;
+
+ segs = GET_SEG_FROM_SEC(sbi,
+ zoneno * sbi->secs_per_zone);
+ blks = SEGS_TO_BLKS(sbi, segs);
+ return GET_SEGNO(sbi, FDEV(devi).start_blk + blks);
+ }
+ }
+#endif
+ return NULL_SEGNO;
}
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
@@ -4348,6 +4369,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
#endif
for (i = 0; i < max_devices; i++) {
+ if (max_devices == 1) {
+ FDEV(i).total_segments =
+ le32_to_cpu(raw_super->segment_count_main);
+ FDEV(i).start_blk = 0;
+ FDEV(i).end_blk = FDEV(i).total_segments *
+ BLKS_PER_SEG(sbi);
+ }
+
if (i == 0)
FDEV(0).bdev_file = sbi->sb->s_bdev_file;
else if (!RDEV(i).path[0])
@@ -4718,7 +4747,7 @@ try_onemore:
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
/* get segno of first zoned block device */
- sbi->first_zoned_segno = get_first_zoned_segno(sbi);
+ sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi);
/* Read accumulated write IO statistics if exists */
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
diff --git a/fs/filesystems.c b/fs/filesystems.c
index 58b9067b2391..95e5256821a5 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -156,15 +156,19 @@ static int fs_index(const char __user * __name)
static int fs_name(unsigned int index, char __user * buf)
{
struct file_system_type * tmp;
- int len, res;
+ int len, res = -EINVAL;
read_lock(&file_systems_lock);
- for (tmp = file_systems; tmp; tmp = tmp->next, index--)
- if (index <= 0 && try_module_get(tmp->owner))
+ for (tmp = file_systems; tmp; tmp = tmp->next, index--) {
+ if (index == 0) {
+ if (try_module_get(tmp->owner))
+ res = 0;
break;
+ }
+ }
read_unlock(&file_systems_lock);
- if (!tmp)
- return -EINVAL;
+ if (res)
+ return res;
/* OK, we got the reference, so we can safely block */
len = strlen(tmp->name) + 1;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 68fc8af14700..eb4270e82ef8 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -37,27 +37,6 @@
#include "aops.h"
-void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
- size_t from, size_t len)
-{
- struct buffer_head *head = folio_buffers(folio);
- unsigned int bsize = head->b_size;
- struct buffer_head *bh;
- size_t to = from + len;
- size_t start, end;
-
- for (bh = head, start = 0; bh != head || !start;
- bh = bh->b_this_page, start = end) {
- end = start + bsize;
- if (end <= from)
- continue;
- if (start >= to)
- break;
- set_buffer_uptodate(bh);
- gfs2_trans_add_data(ip->i_gl, bh);
- }
-}
-
/**
* gfs2_get_block_noalloc - Fills in a buffer head with details about a block
* @inode: The inode
@@ -133,12 +112,43 @@ static int __gfs2_jdata_write_folio(struct folio *folio,
inode->i_sb->s_blocksize,
BIT(BH_Dirty)|BIT(BH_Uptodate));
}
- gfs2_trans_add_databufs(ip, folio, 0, folio_size(folio));
+ gfs2_trans_add_databufs(ip->i_gl, folio, 0, folio_size(folio));
}
return gfs2_write_jdata_folio(folio, wbc);
}
/**
+ * gfs2_jdata_writeback - Write jdata folios to the log
+ * @mapping: The mapping to write
+ * @wbc: The writeback control
+ *
+ * Returns: errno
+ */
+int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(mapping->host);
+ struct folio *folio = NULL;
+ int error;
+
+ BUG_ON(current->journal_info);
+ if (gfs2_assert_withdraw(sdp, ip->i_gl->gl_state == LM_ST_EXCLUSIVE))
+ return 0;
+
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+ if (folio_test_checked(folio)) {
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
+ continue;
+ }
+ error = __gfs2_jdata_write_folio(folio, wbc);
+ }
+
+ return error;
+}
+
+/**
* gfs2_writepages - Write a bunch of dirty pages back to disk
* @mapping: The mapping to write
* @wbc: Write-back control
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
index a10c4334d248..bf002522a782 100644
--- a/fs/gfs2/aops.h
+++ b/fs/gfs2/aops.h
@@ -9,7 +9,6 @@
#include "incore.h"
void adjust_fs_space(struct inode *inode);
-void gfs2_trans_add_databufs(struct gfs2_inode *ip, struct folio *folio,
- size_t from, size_t len);
+int gfs2_jdata_writeback(struct address_space *mapping, struct writeback_control *wbc);
#endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 366516b98b3f..b81984def58e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -988,7 +988,8 @@ static void gfs2_iomap_put_folio(struct inode *inode, loff_t pos,
struct gfs2_sbd *sdp = GFS2_SB(inode);
if (!gfs2_is_stuffed(ip))
- gfs2_trans_add_databufs(ip, folio, offset_in_folio(folio, pos),
+ gfs2_trans_add_databufs(ip->i_gl, folio,
+ offset_in_folio(folio, pos),
copied);
folio_unlock(folio);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d7220a6fe8f5..ba25b884169e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1166,7 +1166,6 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
const struct gfs2_glock_operations *glops, int create,
struct gfs2_glock **glp)
{
- struct super_block *s = sdp->sd_vfs;
struct lm_lockname name = { .ln_number = number,
.ln_type = glops->go_type,
.ln_sbd = sdp };
@@ -1229,7 +1228,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
mapping = gfs2_glock2aspace(gl);
if (mapping) {
mapping->a_ops = &gfs2_meta_aops;
- mapping->host = s->s_bdev->bd_mapping->host;
+ mapping->host = sdp->sd_inode;
mapping->flags = 0;
mapping_set_gfp_mask(mapping, GFP_NOFS);
mapping->i_private_data = NULL;
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index eb4714f299ef..116efe335c32 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -168,7 +168,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
static int gfs2_rgrp_metasync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *metamapping = &sdp->sd_aspace;
+ struct address_space *metamapping = gfs2_aspace(sdp);
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
const unsigned bsize = sdp->sd_sb.sb_bsize;
loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
@@ -225,7 +225,7 @@ static int rgrp_go_sync(struct gfs2_glock *gl)
static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- struct address_space *mapping = &sdp->sd_aspace;
+ struct address_space *mapping = gfs2_aspace(sdp);
struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
const unsigned bsize = sdp->sd_sb.sb_bsize;
loff_t start, end;
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 74abbd4970f8..0a41c4e76b32 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -795,7 +795,7 @@ struct gfs2_sbd {
/* Log stuff */
- struct address_space sd_aspace;
+ struct inode *sd_inode;
spinlock_t sd_log_lock;
@@ -851,6 +851,13 @@ struct gfs2_sbd {
unsigned long sd_glock_dqs_held;
};
+#define GFS2_BAD_INO 1
+
+static inline struct address_space *gfs2_aspace(struct gfs2_sbd *sdp)
+{
+ return sdp->sd_inode->i_mapping;
+}
+
static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
{
gl->gl_stats.stats[which]++;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 198a8cbaf5e5..8fd81444ffea 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -439,6 +439,74 @@ out:
return error;
}
+static void gfs2_final_release_pages(struct gfs2_inode *ip)
+{
+ struct inode *inode = &ip->i_inode;
+ struct gfs2_glock *gl = ip->i_gl;
+
+ if (unlikely(!gl)) {
+ /* This can only happen during incomplete inode creation. */
+ BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+ return;
+ }
+
+ truncate_inode_pages(gfs2_glock2aspace(gl), 0);
+ truncate_inode_pages(&inode->i_data, 0);
+
+ if (atomic_read(&gl->gl_revokes) == 0) {
+ clear_bit(GLF_LFLUSH, &gl->gl_flags);
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
+ }
+}
+
+int gfs2_dinode_dealloc(struct gfs2_inode *ip)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder gh;
+ int error;
+
+ if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
+ gfs2_consist_inode(ip);
+ return -EIO;
+ }
+
+ gfs2_rindex_update(sdp);
+
+ error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
+ if (error)
+ return error;
+
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
+ if (!rgd) {
+ gfs2_consist_inode(ip);
+ error = -EIO;
+ goto out_qs;
+ }
+
+ error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+ LM_FLAG_NODE_SCOPE, &gh);
+ if (error)
+ goto out_qs;
+
+ error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
+ sdp->sd_jdesc->jd_blocks);
+ if (error)
+ goto out_rg_gunlock;
+
+ gfs2_free_di(rgd, ip);
+
+ gfs2_final_release_pages(ip);
+
+ gfs2_trans_end(sdp);
+
+out_rg_gunlock:
+ gfs2_glock_dq_uninit(&gh);
+out_qs:
+ gfs2_quota_unhold(ip);
+ return error;
+}
+
static void gfs2_init_dir(struct buffer_head *dibh,
const struct gfs2_inode *parent)
{
@@ -629,10 +697,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_glock *io_gl;
- int error;
+ int error, dealloc_error;
u32 aflags = 0;
unsigned blocks = 1;
struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
+ bool xattr_initialized = false;
if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
@@ -659,7 +728,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!IS_ERR(inode)) {
if (S_ISDIR(inode->i_mode)) {
iput(inode);
- inode = ERR_PTR(-EISDIR);
+ inode = NULL;
+ error = -EISDIR;
goto fail_gunlock;
}
d_instantiate(dentry, inode);
@@ -744,11 +814,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
if (error)
- goto fail_free_inode;
+ goto fail_dealloc_inode;
error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (error)
- goto fail_free_inode;
+ goto fail_dealloc_inode;
gfs2_cancel_delete_work(io_gl);
io_gl->gl_no_formal_ino = ip->i_no_formal_ino;
@@ -772,8 +842,10 @@ retry:
if (error)
goto fail_gunlock3;
- if (blocks > 1)
+ if (blocks > 1) {
gfs2_init_xattr(ip);
+ xattr_initialized = true;
+ }
init_dinode(dip, ip, symname);
gfs2_trans_end(sdp);
@@ -828,6 +900,18 @@ fail_gunlock3:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_gunlock2:
gfs2_glock_put(io_gl);
+fail_dealloc_inode:
+ set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
+ dealloc_error = 0;
+ if (ip->i_eattr)
+ dealloc_error = gfs2_ea_dealloc(ip, xattr_initialized);
+ clear_nlink(inode);
+ mark_inode_dirty(inode);
+ if (!dealloc_error)
+ dealloc_error = gfs2_dinode_dealloc(ip);
+ if (dealloc_error)
+ fs_warn(sdp, "%s: %d\n", __func__, dealloc_error);
+ ip->i_no_addr = 0;
fail_free_inode:
if (ip->i_gl) {
gfs2_glock_put(ip->i_gl);
@@ -842,10 +926,6 @@ fail_gunlock:
gfs2_dir_no_add(&da);
gfs2_glock_dq_uninit(&d_gh);
if (!IS_ERR_OR_NULL(inode)) {
- set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
- clear_nlink(inode);
- if (ip->i_no_addr)
- mark_inode_dirty(inode);
if (inode->i_state & I_NEW)
iget_failed(inode);
else
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 9e5e1622d50a..eafe123617e6 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -92,6 +92,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 no_formal_ino,
unsigned int blktype);
+int gfs2_dinode_dealloc(struct gfs2_inode *ip);
struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index f9c5089783d2..115c4ac457e9 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -31,6 +31,7 @@
#include "dir.h"
#include "trace_gfs2.h"
#include "trans.h"
+#include "aops.h"
static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
@@ -131,7 +132,11 @@ __acquires(&sdp->sd_ail_lock)
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
- ret = mapping->a_ops->writepages(mapping, wbc);
+ BUG_ON(GFS2_SB(mapping->host) != sdp);
+ if (gfs2_is_jdata(GFS2_I(mapping->host)))
+ ret = gfs2_jdata_writeback(mapping, wbc);
+ else
+ ret = mapping->a_ops->writepages(mapping, wbc);
if (need_resched()) {
blk_finish_plug(plug);
cond_resched();
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 198cc7056637..9dc8885c95d0 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -132,7 +132,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
unsigned int bufnum;
if (mapping == NULL)
- mapping = &sdp->sd_aspace;
+ mapping = gfs2_aspace(sdp);
shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h
index 831d988c2ceb..b7c8a6684d02 100644
--- a/fs/gfs2/meta_io.h
+++ b/fs/gfs2/meta_io.h
@@ -44,9 +44,7 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping)
struct gfs2_glock_aspace *gla =
container_of(mapping, struct gfs2_glock_aspace, mapping);
return gla->glock.gl_name.ln_sbd;
- } else if (mapping->a_ops == &gfs2_rgrp_aops)
- return container_of(mapping, struct gfs2_sbd, sd_aspace);
- else
+ } else
return inode->i_sb->s_fs_info;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index e83d293c3614..4a0f7de41b2b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -64,15 +64,17 @@ static void gfs2_tune_init(struct gfs2_tune *gt)
void free_sbd(struct gfs2_sbd *sdp)
{
+ struct super_block *sb = sdp->sd_vfs;
+
if (sdp->sd_lkstats)
free_percpu(sdp->sd_lkstats);
+ sb->s_fs_info = NULL;
kfree(sdp);
}
static struct gfs2_sbd *init_sbd(struct super_block *sb)
{
struct gfs2_sbd *sdp;
- struct address_space *mapping;
sdp = kzalloc(sizeof(struct gfs2_sbd), GFP_KERNEL);
if (!sdp)
@@ -109,16 +111,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
INIT_LIST_HEAD(&sdp->sd_sc_inodes_list);
- mapping = &sdp->sd_aspace;
-
- address_space_init_once(mapping);
- mapping->a_ops = &gfs2_rgrp_aops;
- mapping->host = sb->s_bdev->bd_mapping->host;
- mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_NOFS);
- mapping->i_private_data = NULL;
- mapping->writeback_index = 0;
-
spin_lock_init(&sdp->sd_log_lock);
atomic_set(&sdp->sd_log_pinned, 0);
INIT_LIST_HEAD(&sdp->sd_log_revokes);
@@ -1135,6 +1127,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
int silent = fc->sb_flags & SB_SILENT;
struct gfs2_sbd *sdp;
struct gfs2_holder mount_gh;
+ struct address_space *mapping;
int error;
sdp = init_sbd(sb);
@@ -1156,6 +1149,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_NOSEC;
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
+
sb->s_d_op = &gfs2_dops;
sb->s_export_op = &gfs2_export_ops;
sb->s_qcop = &gfs2_quotactl_ops;
@@ -1181,9 +1175,21 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
sdp->sd_tune.gt_statfs_quantum = 30;
}
+ /* Set up an address space for metadata writes */
+ sdp->sd_inode = new_inode(sb);
+ error = -ENOMEM;
+ if (!sdp->sd_inode)
+ goto fail_free;
+ sdp->sd_inode->i_ino = GFS2_BAD_INO;
+ sdp->sd_inode->i_size = OFFSET_MAX;
+
+ mapping = gfs2_aspace(sdp);
+ mapping->a_ops = &gfs2_rgrp_aops;
+ mapping_set_gfp_mask(mapping, GFP_NOFS);
+
error = init_names(sdp, silent);
if (error)
- goto fail_free;
+ goto fail_iput;
snprintf(sdp->sd_fsname, sizeof(sdp->sd_fsname), "%s", sdp->sd_table_name);
@@ -1192,7 +1198,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_FREEZABLE, 0,
sdp->sd_fsname);
if (!sdp->sd_glock_wq)
- goto fail_free;
+ goto fail_iput;
sdp->sd_delete_wq = alloc_workqueue("gfs2-delete/%s",
WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, sdp->sd_fsname);
@@ -1309,9 +1315,10 @@ fail_delete_wq:
fail_glock_wq:
if (sdp->sd_glock_wq)
destroy_workqueue(sdp->sd_glock_wq);
+fail_iput:
+ iput(sdp->sd_inode);
fail_free:
free_sbd(sdp);
- sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 44e5658b896c..0bd7827e6371 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -648,7 +648,7 @@ restart:
gfs2_jindex_free(sdp);
/* Take apart glock structures and buffer lists */
gfs2_gl_hash_clear(sdp);
- truncate_inode_pages_final(&sdp->sd_aspace);
+ iput(sdp->sd_inode);
gfs2_delete_debugfs_file(sdp);
gfs2_sys_fs_del(sdp);
@@ -674,7 +674,7 @@ static int gfs2_sync_fs(struct super_block *sb, int wait)
return sdp->sd_log_error;
}
-static int gfs2_do_thaw(struct gfs2_sbd *sdp)
+static int gfs2_do_thaw(struct gfs2_sbd *sdp, enum freeze_holder who)
{
struct super_block *sb = sdp->sd_vfs;
int error;
@@ -682,7 +682,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp)
error = gfs2_freeze_lock_shared(sdp);
if (error)
goto fail;
- error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = thaw_super(sb, who);
if (!error)
return 0;
@@ -710,7 +710,7 @@ void gfs2_freeze_func(struct work_struct *work)
gfs2_freeze_unlock(sdp);
set_bit(SDF_FROZEN, &sdp->sd_flags);
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, FREEZE_HOLDER_USERSPACE);
if (error)
goto out;
@@ -728,6 +728,7 @@ out:
/**
* gfs2_freeze_super - prevent further writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
*
*/
@@ -744,7 +745,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
}
for (;;) {
- error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ error = freeze_super(sb, who);
if (error) {
fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
error);
@@ -758,7 +759,7 @@ static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
break;
}
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, who);
if (error)
goto out;
@@ -796,6 +797,7 @@ static int gfs2_freeze_fs(struct super_block *sb)
/**
* gfs2_thaw_super - reallow writes to the filesystem
* @sb: the VFS structure for the filesystem
+ * @who: freeze flags
*
*/
@@ -814,7 +816,7 @@ static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
atomic_inc(&sb->s_active);
gfs2_freeze_unlock(sdp);
- error = gfs2_do_thaw(sdp);
+ error = gfs2_do_thaw(sdp, who);
if (!error) {
clear_bit(SDF_FREEZE_INITIATOR, &sdp->sd_flags);
@@ -1173,74 +1175,6 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
return 0;
}
-static void gfs2_final_release_pages(struct gfs2_inode *ip)
-{
- struct inode *inode = &ip->i_inode;
- struct gfs2_glock *gl = ip->i_gl;
-
- if (unlikely(!gl)) {
- /* This can only happen during incomplete inode creation. */
- BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
- return;
- }
-
- truncate_inode_pages(gfs2_glock2aspace(gl), 0);
- truncate_inode_pages(&inode->i_data, 0);
-
- if (atomic_read(&gl->gl_revokes) == 0) {
- clear_bit(GLF_LFLUSH, &gl->gl_flags);
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- }
-}
-
-static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder gh;
- int error;
-
- if (gfs2_get_inode_blocks(&ip->i_inode) != 1) {
- gfs2_consist_inode(ip);
- return -EIO;
- }
-
- gfs2_rindex_update(sdp);
-
- error = gfs2_quota_hold(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE);
- if (error)
- return error;
-
- rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr, 1);
- if (!rgd) {
- gfs2_consist_inode(ip);
- error = -EIO;
- goto out_qs;
- }
-
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
- LM_FLAG_NODE_SCOPE, &gh);
- if (error)
- goto out_qs;
-
- error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
- sdp->sd_jdesc->jd_blocks);
- if (error)
- goto out_rg_gunlock;
-
- gfs2_free_di(rgd, ip);
-
- gfs2_final_release_pages(ip);
-
- gfs2_trans_end(sdp);
-
-out_rg_gunlock:
- gfs2_glock_dq_uninit(&gh);
-out_qs:
- gfs2_quota_unhold(ip);
- return error;
-}
-
/**
* gfs2_glock_put_eventually
* @gl: The glock to put
@@ -1326,9 +1260,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode,
struct gfs2_sbd *sdp = sb->s_fs_info;
int ret;
- if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags)))
- goto should_delete;
-
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags))
return EVICT_SHOULD_DEFER_DELETE;
@@ -1358,7 +1289,6 @@ static enum evict_behavior evict_should_delete(struct inode *inode,
if (inode->i_nlink)
return EVICT_SHOULD_SKIP_DELETE;
-should_delete:
if (gfs2_holder_initialized(&ip->i_iopen_gh) &&
test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
return gfs2_upgrade_iopen_glock(inode);
@@ -1382,7 +1312,7 @@ static int evict_unlinked_inode(struct inode *inode)
}
if (ip->i_eattr) {
- ret = gfs2_ea_dealloc(ip);
+ ret = gfs2_ea_dealloc(ip, true);
if (ret)
goto out;
}
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index ecc699f8d9fc..628618302102 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -764,7 +764,6 @@ fail_reg:
fs_err(sdp, "error %d adding sysfs files\n", error);
kobject_put(&sdp->sd_kobj);
wait_for_completion(&sdp->sd_kobj_unregister);
- sb->s_fs_info = NULL;
return error;
}
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index f8ae2c666fd6..075f7e9abe47 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -226,6 +226,27 @@ out:
unlock_buffer(bh);
}
+void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio,
+ size_t from, size_t len)
+{
+ struct buffer_head *head = folio_buffers(folio);
+ unsigned int bsize = head->b_size;
+ struct buffer_head *bh;
+ size_t to = from + len;
+ size_t start, end;
+
+ for (bh = head, start = 0; bh != head || !start;
+ bh = bh->b_this_page, start = end) {
+ end = start + bsize;
+ if (end <= from)
+ continue;
+ if (start >= to)
+ break;
+ set_buffer_uptodate(bh);
+ gfs2_trans_add_data(gl, bh);
+ }
+}
+
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
{
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index f8ce5302280d..790c55f59e61 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -42,6 +42,8 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
void gfs2_trans_end(struct gfs2_sbd *sdp);
void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh);
+void gfs2_trans_add_databufs(struct gfs2_glock *gl, struct folio *folio,
+ size_t from, size_t len);
void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh);
void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 17ae5070a90e..df9c93de94c7 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1383,7 +1383,7 @@ out:
return error;
}
-static int ea_dealloc_block(struct gfs2_inode *ip)
+static int ea_dealloc_block(struct gfs2_inode *ip, bool initialized)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1416,7 +1416,7 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
ip->i_eattr = 0;
gfs2_add_inode_blocks(&ip->i_inode, -1);
- if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+ if (initialized) {
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -1435,11 +1435,12 @@ out_gunlock:
/**
* gfs2_ea_dealloc - deallocate the extended attribute fork
* @ip: the inode
+ * @initialized: xattrs have been initialized
*
* Returns: errno
*/
-int gfs2_ea_dealloc(struct gfs2_inode *ip)
+int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized)
{
int error;
@@ -1451,7 +1452,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
if (error)
return error;
- if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+ if (initialized) {
error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
if (error)
goto out_quota;
@@ -1463,7 +1464,7 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
}
}
- error = ea_dealloc_block(ip);
+ error = ea_dealloc_block(ip, initialized);
out_quota:
gfs2_quota_unhold(ip);
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index eb12eb7e37c1..3c9788e0e137 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -54,7 +54,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
const void *value, size_t size,
int flags, int type);
ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int gfs2_ea_dealloc(struct gfs2_inode *ip);
+int gfs2_ea_dealloc(struct gfs2_inode *ip, bool initialized);
/* Exported to acl.c */
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 5b08bd417b28..0ac474888a02 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1675,6 +1675,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
ioend_flags |= IOMAP_IOEND_UNWRITTEN;
if (wpc->iomap.flags & IOMAP_F_SHARED)
ioend_flags |= IOMAP_IOEND_SHARED;
+ if (folio_test_dropbehind(folio))
+ ioend_flags |= IOMAP_IOEND_DONTCACHE;
if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY))
ioend_flags |= IOMAP_IOEND_BOUNDARY;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index fc70d72c3fe8..43487fa83eae 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1580,8 +1580,9 @@ void kernfs_break_active_protection(struct kernfs_node *kn)
* invoked before finishing the kernfs operation. Note that while this
* function restores the active reference, it doesn't and can't actually
* restore the active protection - @kn may already or be in the process of
- * being removed. Once kernfs_break_active_protection() is invoked, that
- * protection is irreversibly gone for the kernfs operation instance.
+ * being drained and removed. Once kernfs_break_active_protection() is
+ * invoked, that protection is irreversibly gone for the kernfs operation
+ * instance.
*
* While this function may be called at any point after
* kernfs_break_active_protection() is invoked, its most useful location
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 66fe8fe41f06..a6c692cac616 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -778,8 +778,9 @@ bool kernfs_should_drain_open_files(struct kernfs_node *kn)
/*
* @kn being deactivated guarantees that @kn->attr.open can't change
* beneath us making the lockless test below safe.
+ * Callers post kernfs_unbreak_active_protection may be counted in
+ * kn->active by now, do not WARN_ON because of them.
*/
- WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
rcu_read_lock();
on = rcu_dereference(kn->attr.open);
diff --git a/fs/mount.h b/fs/mount.h
index 7aecf2a60472..ad7173037924 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -7,10 +7,6 @@
extern struct list_head notify_list;
-typedef __u32 __bitwise mntns_flags_t;
-
-#define MNTNS_PROPAGATING ((__force mntns_flags_t)(1 << 0))
-
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
@@ -37,7 +33,6 @@ struct mnt_namespace {
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */
refcount_t passive; /* number references not pinning @mounts */
- mntns_flags_t mntns_flags;
} __randomize_layout;
struct mnt_pcp {
diff --git a/fs/namespace.c b/fs/namespace.c
index 1b466c54a357..d6ac7e533b02 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2424,7 +2424,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
namespace_unlock();
}
-bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+static bool __has_locked_children(struct mount *mnt, struct dentry *dentry)
{
struct mount *child;
@@ -2438,6 +2438,16 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false;
}
+bool has_locked_children(struct mount *mnt, struct dentry *dentry)
+{
+ bool res;
+
+ read_seqlock_excl(&mount_lock);
+ res = __has_locked_children(mnt, dentry);
+ read_sequnlock_excl(&mount_lock);
+ return res;
+}
+
/*
* Check that there aren't references to earlier/same mount namespaces in the
* specified subtree. Such references can act as pins for mount namespaces
@@ -2482,23 +2492,27 @@ struct vfsmount *clone_private_mount(const struct path *path)
if (IS_MNT_UNBINDABLE(old_mnt))
return ERR_PTR(-EINVAL);
- if (mnt_has_parent(old_mnt)) {
- if (!check_mnt(old_mnt))
- return ERR_PTR(-EINVAL);
- } else {
- if (!is_mounted(&old_mnt->mnt))
- return ERR_PTR(-EINVAL);
-
- /* Make sure this isn't something purely kernel internal. */
- if (!is_anon_ns(old_mnt->mnt_ns))
+ /*
+ * Make sure the source mount is acceptable.
+ * Anything mounted in our mount namespace is allowed.
+ * Otherwise, it must be the root of an anonymous mount
+ * namespace, and we need to make sure no namespace
+ * loops get created.
+ */
+ if (!check_mnt(old_mnt)) {
+ if (!is_mounted(&old_mnt->mnt) ||
+ !is_anon_ns(old_mnt->mnt_ns) ||
+ mnt_has_parent(old_mnt))
return ERR_PTR(-EINVAL);
- /* Make sure we don't create mount namespace loops. */
if (!check_for_nsfs_mounts(old_mnt))
return ERR_PTR(-EINVAL);
}
- if (has_locked_children(old_mnt, path->dentry))
+ if (!ns_capable(old_mnt->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ if (__has_locked_children(old_mnt, path->dentry))
return ERR_PTR(-EINVAL);
new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
@@ -2944,6 +2958,10 @@ static int do_change_type(struct path *path, int ms_flags)
return -EINVAL;
namespace_lock();
+ if (!check_mnt(mnt)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
if (type == MS_SHARED) {
err = invent_group_ids(mnt, recurse);
if (err)
@@ -3035,7 +3053,7 @@ static struct mount *__do_loopback(struct path *old_path, int recurse)
if (!may_copy_tree(old_path))
return mnt;
- if (!recurse && has_locked_children(old, old_path->dentry))
+ if (!recurse && __has_locked_children(old, old_path->dentry))
return mnt;
if (recurse)
@@ -3428,7 +3446,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
goto out;
/* From mount should not have locked children in place of To's root */
- if (has_locked_children(from, to->mnt.mnt_root))
+ if (__has_locked_children(from, to->mnt.mnt_root))
goto out;
/* Setting sharing groups is only allowed on private mounts */
@@ -3442,7 +3460,7 @@ static int do_set_group(struct path *from_path, struct path *to_path)
if (IS_MNT_SLAVE(from)) {
struct mount *m = from->mnt_master;
- list_add(&to->mnt_slave, &m->mnt_slave_list);
+ list_add(&to->mnt_slave, &from->mnt_slave);
to->mnt_master = m;
}
@@ -3467,18 +3485,25 @@ out:
* Check if path is overmounted, i.e., if there's a mount on top of
* @path->mnt with @path->dentry as mountpoint.
*
- * Context: This function expects namespace_lock() to be held.
+ * Context: namespace_sem must be held at least shared.
+ * MUST NOT be called under lock_mount_hash() (there one should just
+ * call __lookup_mnt() and check if it returns NULL).
* Return: If path is overmounted true is returned, false if not.
*/
static inline bool path_overmounted(const struct path *path)
{
+ unsigned seq = read_seqbegin(&mount_lock);
+ bool no_child;
+
rcu_read_lock();
- if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
- rcu_read_unlock();
- return true;
- }
+ no_child = !__lookup_mnt(path->mnt, path->dentry);
rcu_read_unlock();
- return false;
+ if (need_seqretry(&mount_lock, seq)) {
+ read_seqlock_excl(&mount_lock);
+ no_child = !__lookup_mnt(path->mnt, path->dentry);
+ read_sequnlock_excl(&mount_lock);
+ }
+ return unlikely(!no_child);
}
/**
@@ -3637,46 +3662,41 @@ static int do_move_mount(struct path *old_path,
ns = old->mnt_ns;
err = -EINVAL;
- if (!may_use_mount(p))
- goto out;
-
/* The thing moved must be mounted... */
if (!is_mounted(&old->mnt))
goto out;
- /* ... and either ours or the root of anon namespace */
- if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
- goto out;
-
- if (is_anon_ns(ns)) {
+ if (check_mnt(old)) {
+ /* if the source is in our namespace... */
+ /* ... it should be detachable from parent */
+ if (!mnt_has_parent(old) || IS_MNT_LOCKED(old))
+ goto out;
+ /* ... and the target should be in our namespace */
+ if (!check_mnt(p))
+ goto out;
+ } else {
/*
- * Ending up with two files referring to the root of the
- * same anonymous mount namespace would cause an error
- * as this would mean trying to move the same mount
- * twice into the mount tree which would be rejected
- * later. But be explicit about it right here.
+ * otherwise the source must be the root of some anon namespace.
+ * AV: check for mount being root of an anon namespace is worth
+ * an inlined predicate...
*/
- if ((is_anon_ns(p->mnt_ns) && ns == p->mnt_ns))
+ if (!is_anon_ns(ns) || mnt_has_parent(old))
goto out;
-
/*
- * If this is an anonymous mount tree ensure that mount
- * propagation can detect mounts that were just
- * propagated to the target mount tree so we don't
- * propagate onto them.
+ * Bail out early if the target is within the same namespace -
+ * subsequent checks would've rejected that, but they lose
+ * some corner cases if we check it early.
*/
- ns->mntns_flags |= MNTNS_PROPAGATING;
- } else if (is_anon_ns(p->mnt_ns)) {
+ if (ns == p->mnt_ns)
+ goto out;
/*
- * Don't allow moving an attached mount tree to an
- * anonymous mount tree.
+ * Target should be either in our namespace or in an acceptable
+ * anon namespace, sensu check_anonymous_mnt().
*/
- goto out;
+ if (!may_use_mount(p))
+ goto out;
}
- if (old->mnt.mnt_flags & MNT_LOCKED)
- goto out;
-
if (!path_mounted(old_path))
goto out;
@@ -3722,8 +3742,6 @@ static int do_move_mount(struct path *old_path,
if (attached)
put_mountpoint(old_mp);
out:
- if (is_anon_ns(ns))
- ns->mntns_flags &= ~MNTNS_PROPAGATING;
unlock_mount(mp);
if (!err) {
if (attached) {
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 0d1b6d35ff3b..fd4619275801 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -262,9 +262,9 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
if (ret < 0) {
subreq->error = ret;
/* Not queued - release both refs. */
- netfs_put_subrequest(subreq, false,
+ netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq, false,
+ netfs_put_subrequest(subreq,
netfs_sreq_trace_put_cancel);
break;
}
@@ -297,8 +297,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
subreq->error = ret;
trace_netfs_sreq(subreq, netfs_sreq_trace_cancel);
/* Not queued - release both refs. */
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
break;
}
size -= slice;
@@ -312,7 +312,7 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
if (unlikely(size > 0)) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
/* Defer error return as we may need to wait for outstanding I/O. */
@@ -365,12 +365,10 @@ void netfs_readahead(struct readahead_control *ractl)
goto cleanup_free;
netfs_read_to_pagecache(rreq);
- netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
- return;
+ return netfs_put_request(rreq, netfs_rreq_trace_put_return);
cleanup_free:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
- return;
+ return netfs_put_request(rreq, netfs_rreq_trace_put_failed);
}
EXPORT_SYMBOL(netfs_readahead);
@@ -470,11 +468,11 @@ static int netfs_read_gaps(struct file *file, struct folio *folio)
folio_mark_uptodate(folio);
}
folio_unlock(folio);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
alloc_error:
folio_unlock(folio);
return ret;
@@ -530,11 +528,11 @@ int netfs_read_folio(struct file *file, struct folio *folio)
netfs_read_to_pagecache(rreq);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
discard:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
alloc_error:
folio_unlock(folio);
return ret;
@@ -689,7 +687,7 @@ retry:
ret = netfs_wait_for_read(rreq);
if (ret < 0)
goto error;
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
have_folio:
ret = folio_wait_private_2_killable(folio);
@@ -701,7 +699,7 @@ have_folio_no_wait:
return 0;
error_put:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(rreq, netfs_rreq_trace_put_failed);
error:
if (folio) {
folio_unlock(folio);
@@ -752,11 +750,11 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio,
netfs_read_to_pagecache(rreq);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret < 0 ? ret : 0;
error_put:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
+ netfs_put_request(rreq, netfs_rreq_trace_put_discard);
error:
_leave(" = %d", ret);
return ret;
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index b4826360a411..dbb544e183d1 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -386,7 +386,7 @@ out:
wbc_detach_inode(&wbc);
if (ret2 == -EIOCBQUEUED)
return ret2;
- if (ret == 0)
+ if (ret == 0 && ret2 < 0)
ret = ret2;
}
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 5e3f0aeb51f3..9902766195d7 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -85,7 +85,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
if (ret < 0) {
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
break;
}
}
@@ -103,7 +103,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
rreq->netfs_ops->issue_read(subreq);
if (test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
- netfs_wait_for_pause(rreq);
+ netfs_wait_for_paused_read(rreq);
if (test_bit(NETFS_RREQ_FAILED, &rreq->flags))
break;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
@@ -115,7 +115,7 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
if (unlikely(size > 0)) {
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
return ret;
@@ -144,7 +144,7 @@ static ssize_t netfs_unbuffered_read(struct netfs_io_request *rreq, bool sync)
ret = netfs_dispatch_unbuffered_reads(rreq);
if (!rreq->submitted) {
- netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ netfs_put_request(rreq, netfs_rreq_trace_put_no_submit);
inode_dio_end(rreq->inode);
ret = 0;
goto out;
@@ -188,7 +188,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
iocb->ki_pos, orig_count,
- NETFS_DIO_READ);
+ iocb->ki_flags & IOCB_DIRECT ?
+ NETFS_DIO_READ : NETFS_UNBUFFERED_READ);
if (IS_ERR(rreq))
return PTR_ERR(rreq);
@@ -236,7 +237,7 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
}
out:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
if (ret > 0)
orig_count -= ret;
return ret;
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 42ce53cc216e..fa9a5bf3c6d5 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -87,6 +87,8 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
}
__set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
+ if (async)
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
/* Copy the data into the bounce buffer and encrypt it. */
// TODO
@@ -105,19 +107,15 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
if (!async) {
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- ret = wreq->error;
- if (ret == 0) {
- ret = wreq->transferred;
+ ret = netfs_wait_for_write(wreq);
+ if (ret > 0)
iocb->ki_pos += ret;
- }
} else {
ret = -EIOCBQUEUED;
}
out:
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_write_iter_locked);
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index b1722a82c03d..e4308457633c 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -192,8 +192,7 @@ EXPORT_SYMBOL(__fscache_clear_page_bits);
/*
* Deal with the completion of writing the data to the cache.
*/
-static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
- bool was_async)
+static void fscache_wreq_done(void *priv, ssize_t transferred_or_error)
{
struct fscache_write_request *wreq = priv;
@@ -202,8 +201,7 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
wreq->set_bits);
if (wreq->term_func)
- wreq->term_func(wreq->term_func_priv, transferred_or_error,
- was_async);
+ wreq->term_func(wreq->term_func_priv, transferred_or_error);
fscache_end_operation(&wreq->cache_resources);
kfree(wreq);
}
@@ -255,14 +253,14 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
return;
abandon_end:
- return fscache_wreq_done(wreq, ret, false);
+ return fscache_wreq_done(wreq, ret);
abandon_free:
kfree(wreq);
abandon:
if (using_pgpriv2)
fscache_clear_page_bits(mapping, start, len, cond);
if (term_func)
- term_func(term_func_priv, ret, false);
+ term_func(term_func_priv, ret);
}
EXPORT_SYMBOL(__fscache_write_to_cache);
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index 1c4f953c3d68..e2ee9183392b 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -23,7 +23,7 @@
/*
* buffered_read.c
*/
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
@@ -62,6 +62,14 @@ static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
struct folio_queue *netfs_buffer_make_space(struct netfs_io_request *rreq,
enum netfs_folioq_trace trace);
void netfs_reset_iter(struct netfs_io_subrequest *subreq);
+void netfs_wake_collector(struct netfs_io_request *rreq);
+void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq);
+void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
+ struct netfs_io_stream *stream);
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
+ssize_t netfs_wait_for_write(struct netfs_io_request *rreq);
+void netfs_wait_for_paused_read(struct netfs_io_request *rreq);
+void netfs_wait_for_paused_write(struct netfs_io_request *rreq);
/*
* objects.c
@@ -71,9 +79,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
loff_t start, size_t len,
enum netfs_io_origin origin);
void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
-void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async);
-void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
- enum netfs_rreq_ref_trace what);
+void netfs_clear_subrequests(struct netfs_io_request *rreq);
+void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what);
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq);
static inline void netfs_see_request(struct netfs_io_request *rreq,
@@ -92,11 +99,9 @@ static inline void netfs_see_subrequest(struct netfs_io_subrequest *subreq,
/*
* read_collect.c
*/
+bool netfs_read_collection(struct netfs_io_request *rreq);
void netfs_read_collection_worker(struct work_struct *work);
-void netfs_wake_read_collector(struct netfs_io_request *rreq);
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async);
-ssize_t netfs_wait_for_read(struct netfs_io_request *rreq);
-void netfs_wait_for_pause(struct netfs_io_request *rreq);
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
/*
* read_pgpriv2.c
@@ -176,8 +181,8 @@ static inline void netfs_stat_d(atomic_t *stat)
* write_collect.c
*/
int netfs_folio_written_back(struct folio *folio);
+bool netfs_write_collection(struct netfs_io_request *wreq);
void netfs_write_collection_worker(struct work_struct *work);
-void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async);
/*
* write_issue.c
@@ -198,8 +203,8 @@ struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len
int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
struct folio *folio, size_t copied, bool to_page_end,
struct folio **writethrough_cache);
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
- struct folio *writethrough_cache);
+ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache);
int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
/*
@@ -255,6 +260,21 @@ static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
}
/*
+ * Clear and wake up a NETFS_RREQ_* flag bit on a request.
+ */
+static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq,
+ unsigned int rreq_flag,
+ enum netfs_rreq_trace trace)
+{
+ if (test_bit(rreq_flag, &rreq->flags)) {
+ trace_netfs_rreq(rreq, trace);
+ clear_bit_unlock(rreq_flag, &rreq->flags);
+ smp_mb__after_atomic(); /* Set flag before task state */
+ wake_up(&rreq->waitq);
+ }
+}
+
+/*
* fscache-cache.c
*/
#ifdef CONFIG_PROC_FS
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 70ecc8f5f210..3db401d269e7 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READ_GAPS] = "RG",
[NETFS_READ_SINGLE] = "R1",
[NETFS_READ_FOR_WRITE] = "RW",
+ [NETFS_UNBUFFERED_READ] = "UR",
[NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB",
[NETFS_WRITEBACK_SINGLE] = "W1",
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 7099aa07737a..43b67a28a8fa 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -313,3 +313,222 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp)
return true;
}
EXPORT_SYMBOL(netfs_release_folio);
+
+/*
+ * Wake the collection work item.
+ */
+void netfs_wake_collector(struct netfs_io_request *rreq)
+{
+ if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
+ !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
+ queue_work(system_unbound_wq, &rreq->work);
+ } else {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
+ wake_up(&rreq->waitq);
+ }
+}
+
+/*
+ * Mark a subrequest as no longer being in progress and, if need be, wake the
+ * collector.
+ */
+void netfs_subreq_clear_in_progress(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *rreq = subreq->rreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[subreq->stream_nr];
+
+ clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
+
+ /* If we are at the head of the queue, wake up the collector. */
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
+ test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
+ netfs_wake_collector(rreq);
+}
+
+/*
+ * Wait for all outstanding I/O in a stream to quiesce.
+ */
+void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
+ struct netfs_io_stream *stream)
+{
+ struct netfs_io_subrequest *subreq;
+ DEFINE_WAIT(myself);
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ continue;
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ for (;;) {
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
+ break;
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+ }
+
+ finish_wait(&rreq->waitq, &myself);
+}
+
+/*
+ * Perform collection in app thread if not offloaded to workqueue.
+ */
+static int netfs_collect_in_app(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ bool need_collect = false, inactive = true;
+
+ for (int i = 0; i < NR_IO_STREAMS; i++) {
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream = &rreq->io_streams[i];
+
+ if (!stream->active)
+ continue;
+ inactive = false;
+ trace_netfs_collect_stream(rreq, stream);
+ subreq = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest,
+ rreq_link);
+ if (subreq &&
+ (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
+ need_collect = true;
+ break;
+ }
+ }
+
+ if (!need_collect && !inactive)
+ return 0; /* Sleep */
+
+ __set_current_state(TASK_RUNNING);
+ if (collector(rreq)) {
+ /* Drop the ref from the NETFS_RREQ_IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ return 1; /* Done */
+ }
+
+ if (inactive) {
+ WARN(true, "Failed to collect inactive req R=%08x\n",
+ rreq->debug_id);
+ cond_resched();
+ }
+ return 2; /* Again */
+}
+
+/*
+ * Wait for a request to complete, successfully or otherwise.
+ */
+static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ DEFINE_WAIT(myself);
+ ssize_t ret;
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ switch (netfs_collect_in_app(rreq, collector)) {
+ case 0:
+ break;
+ case 1:
+ goto all_collected;
+ case 2:
+ continue;
+ }
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+all_collected:
+ finish_wait(&rreq->waitq, &myself);
+
+ ret = rreq->error;
+ if (ret == 0) {
+ ret = rreq->transferred;
+ switch (rreq->origin) {
+ case NETFS_DIO_READ:
+ case NETFS_DIO_WRITE:
+ case NETFS_READ_SINGLE:
+ case NETFS_UNBUFFERED_READ:
+ case NETFS_UNBUFFERED_WRITE:
+ break;
+ default:
+ if (rreq->submitted < rreq->len) {
+ trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
+ ret = -EIO;
+ }
+ break;
+ }
+ }
+
+ return ret;
+}
+
+ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_request(rreq, netfs_read_collection);
+}
+
+ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_request(rreq, netfs_write_collection);
+}
+
+/*
+ * Wait for a paused operation to unpause or complete in some manner.
+ */
+static void netfs_wait_for_pause(struct netfs_io_request *rreq,
+ bool (*collector)(struct netfs_io_request *rreq))
+{
+ DEFINE_WAIT(myself);
+
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
+
+ for (;;) {
+ trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
+ prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
+
+ if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
+ switch (netfs_collect_in_app(rreq, collector)) {
+ case 0:
+ break;
+ case 1:
+ goto all_collected;
+ case 2:
+ continue;
+ }
+ }
+
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
+ !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
+ break;
+
+ schedule();
+ trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
+ }
+
+all_collected:
+ finish_wait(&rreq->waitq, &myself);
+}
+
+void netfs_wait_for_paused_read(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_pause(rreq, netfs_read_collection);
+}
+
+void netfs_wait_for_paused_write(struct netfs_io_request *rreq)
+{
+ return netfs_wait_for_pause(rreq, netfs_write_collection);
+}
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index dc6b41ef18b0..31fa0c81e2a4 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -10,6 +10,8 @@
#include <linux/delay.h>
#include "internal.h"
+static void netfs_free_request(struct work_struct *work);
+
/*
* Allocate an I/O request and initialise it.
*/
@@ -34,6 +36,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
memset(rreq, 0, kmem_cache_size(cache));
+ INIT_WORK(&rreq->cleanup_work, netfs_free_request);
rreq->start = start;
rreq->len = len;
rreq->origin = origin;
@@ -49,13 +52,14 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
init_waitqueue_head(&rreq->waitq);
- refcount_set(&rreq->ref, 1);
+ refcount_set(&rreq->ref, 2);
if (origin == NETFS_READAHEAD ||
origin == NETFS_READPAGE ||
origin == NETFS_READ_GAPS ||
origin == NETFS_READ_SINGLE ||
origin == NETFS_READ_FOR_WRITE ||
+ origin == NETFS_UNBUFFERED_READ ||
origin == NETFS_DIO_READ) {
INIT_WORK(&rreq->work, netfs_read_collection_worker);
rreq->io_streams[0].avail = true;
@@ -63,7 +67,9 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
INIT_WORK(&rreq->work, netfs_write_collection_worker);
}
+ /* The IN_PROGRESS flag comes with a ref. */
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+
if (file && file->f_flags & O_NONBLOCK)
__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
if (rreq->netfs_ops->init_request) {
@@ -75,7 +81,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
}
atomic_inc(&ctx->io_count);
- trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new);
+ trace_netfs_rreq_ref(rreq->debug_id, refcount_read(&rreq->ref), netfs_rreq_trace_new);
netfs_proc_add_rreq(rreq);
netfs_stat(&netfs_n_rh_rreq);
return rreq;
@@ -89,7 +95,7 @@ void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace
trace_netfs_rreq_ref(rreq->debug_id, r + 1, what);
}
-void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
+void netfs_clear_subrequests(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream;
@@ -101,8 +107,7 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
subreq = list_first_entry(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, was_async,
- netfs_sreq_trace_put_clear);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_clear);
}
}
}
@@ -118,13 +123,19 @@ static void netfs_free_request_rcu(struct rcu_head *rcu)
static void netfs_free_request(struct work_struct *work)
{
struct netfs_io_request *rreq =
- container_of(work, struct netfs_io_request, work);
+ container_of(work, struct netfs_io_request, cleanup_work);
struct netfs_inode *ictx = netfs_inode(rreq->inode);
unsigned int i;
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
+
+ /* Cancel/flush the result collection worker. That does not carry a
+ * ref of its own, so we must wait for it somewhere.
+ */
+ cancel_work_sync(&rreq->work);
+
netfs_proc_del_rreq(rreq);
- netfs_clear_subrequests(rreq, false);
+ netfs_clear_subrequests(rreq);
if (rreq->netfs_ops->free_request)
rreq->netfs_ops->free_request(rreq);
if (rreq->cache_resources.ops)
@@ -145,8 +156,7 @@ static void netfs_free_request(struct work_struct *work)
call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
-void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
- enum netfs_rreq_ref_trace what)
+void netfs_put_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what)
{
unsigned int debug_id;
bool dead;
@@ -156,15 +166,8 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
debug_id = rreq->debug_id;
dead = __refcount_dec_and_test(&rreq->ref, &r);
trace_netfs_rreq_ref(debug_id, r - 1, what);
- if (dead) {
- if (was_async) {
- rreq->work.func = netfs_free_request;
- if (!queue_work(system_unbound_wq, &rreq->work))
- WARN_ON(1);
- } else {
- netfs_free_request(&rreq->work);
- }
- }
+ if (dead)
+ WARN_ON(!queue_work(system_unbound_wq, &rreq->cleanup_work));
}
}
@@ -206,8 +209,7 @@ void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
what);
}
-static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
- bool was_async)
+static void netfs_free_subrequest(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
@@ -216,10 +218,10 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
rreq->netfs_ops->free_subrequest(subreq);
mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool);
netfs_stat_d(&netfs_n_rh_sreq);
- netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);
+ netfs_put_request(rreq, netfs_rreq_trace_put_subreq);
}
-void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async,
+void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
enum netfs_sreq_ref_trace what)
{
unsigned int debug_index = subreq->debug_index;
@@ -230,5 +232,5 @@ void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async,
dead = __refcount_dec_and_test(&subreq->ref, &r);
trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what);
if (dead)
- netfs_free_subrequest(subreq, was_async);
+ netfs_free_subrequest(subreq);
}
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 23c75755ad4e..bad677e58a42 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -280,9 +280,13 @@ reassess:
stream->need_retry = true;
notes |= NEED_RETRY | MADE_PROGRESS;
break;
+ } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) {
+ notes |= MADE_PROGRESS;
} else {
if (!stream->failed)
- stream->transferred = stream->collected_to - rreq->start;
+ stream->transferred += transferred;
+ if (front->transferred < front->len)
+ set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags);
notes |= MADE_PROGRESS;
}
@@ -297,7 +301,7 @@ reassess:
struct netfs_io_subrequest, rreq_link);
stream->front = front;
spin_unlock(&rreq->lock);
- netfs_put_subrequest(remove, false,
+ netfs_put_subrequest(remove,
notes & ABANDON_SREQ ?
netfs_sreq_trace_put_abandon :
netfs_sreq_trace_put_done);
@@ -311,14 +315,8 @@ reassess:
if (notes & NEED_RETRY)
goto need_retry;
- if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &rreq->flags)) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_unpause);
- clear_bit_unlock(NETFS_RREQ_PAUSE, &rreq->flags);
- smp_mb__after_atomic(); /* Set PAUSE before task state */
- wake_up(&rreq->waitq);
- }
-
if (notes & MADE_PROGRESS) {
+ netfs_wake_rreq_flag(rreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
//cond_resched();
goto reassess;
}
@@ -342,24 +340,10 @@ need_retry:
*/
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
unsigned int i;
- /* Collect unbuffered reads and direct reads, adding up the transfer
- * sizes until we find the first short or failed subrequest.
- */
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- rreq->transferred += subreq->transferred;
-
- if (subreq->transferred < subreq->len ||
- test_bit(NETFS_SREQ_FAILED, &subreq->flags)) {
- rreq->error = subreq->error;
- break;
- }
- }
-
- if (rreq->origin == NETFS_DIO_READ) {
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ) {
for (i = 0; i < rreq->direct_bv_count; i++) {
flush_dcache_page(rreq->direct_bv[i].bv_page);
// TODO: cifs marks pages in the destination buffer
@@ -377,7 +361,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
- if (rreq->origin == NETFS_DIO_READ)
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ)
inode_dio_end(rreq->inode);
}
@@ -410,7 +395,7 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
* Note that we're in normal kernel thread context at this point, possibly
* running on a workqueue.
*/
-static void netfs_read_collection(struct netfs_io_request *rreq)
+bool netfs_read_collection(struct netfs_io_request *rreq)
{
struct netfs_io_stream *stream = &rreq->io_streams[0];
@@ -420,11 +405,11 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
* queue is empty.
*/
if (!test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
- return;
+ return false;
smp_rmb(); /* Read ALL_QUEUED before subreq lists. */
if (!list_empty(&stream->subrequests))
- return;
+ return false;
/* Okay, declare that all I/O is complete. */
rreq->transferred = stream->transferred;
@@ -433,6 +418,7 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
//netfs_rreq_is_still_valid(rreq);
switch (rreq->origin) {
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
case NETFS_READ_GAPS:
netfs_rreq_assess_dio(rreq);
@@ -445,14 +431,15 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
}
task_io_account_read(rreq->transferred);
- trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
- clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
+ netfs_wake_rreq_flag(rreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
+ /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
trace_netfs_rreq(rreq, netfs_rreq_trace_done);
- netfs_clear_subrequests(rreq, false);
+ netfs_clear_subrequests(rreq);
netfs_unlock_abandoned_read_pages(rreq);
if (unlikely(rreq->copy_to_cache))
netfs_pgpriv2_end_copy_to_cache(rreq);
+ return true;
}
void netfs_read_collection_worker(struct work_struct *work)
@@ -460,26 +447,12 @@ void netfs_read_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
- if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- netfs_read_collection(rreq);
- netfs_put_request(rreq, false, netfs_rreq_trace_put_work);
-}
-
-/*
- * Wake the collection work item.
- */
-void netfs_wake_read_collector(struct netfs_io_request *rreq)
-{
- if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) &&
- !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) {
- if (!work_pending(&rreq->work)) {
- netfs_get_request(rreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &rreq->work))
- netfs_put_request(rreq, true, netfs_rreq_trace_put_work_nq);
- }
- } else {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wake_queue);
- wake_up(&rreq->waitq);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_read_collection(rreq))
+ /* Drop the ref from the IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ else
+ netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
}
}
@@ -511,7 +484,7 @@ void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq)
list_is_first(&subreq->rreq_link, &stream->subrequests)
) {
__set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
- netfs_wake_read_collector(rreq);
+ netfs_wake_collector(rreq);
}
}
EXPORT_SYMBOL(netfs_read_subreq_progress);
@@ -535,7 +508,6 @@ EXPORT_SYMBOL(netfs_read_subreq_progress);
void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
switch (subreq->source) {
case NETFS_READ_FROM_CACHE:
@@ -582,23 +554,15 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq)
}
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */
-
- /* If we are at the head of the queue, wake up the collector. */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests) ||
- test_bit(NETFS_RREQ_RETRYING, &rreq->flags))
- netfs_wake_read_collector(rreq);
-
- netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated);
+ netfs_subreq_clear_in_progress(subreq);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_read_subreq_terminated);
/*
* Handle termination of a read from the cache.
*/
-void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool was_async)
+void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error)
{
struct netfs_io_subrequest *subreq = priv;
@@ -613,94 +577,3 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, bool
}
netfs_read_subreq_terminated(subreq);
}
-
-/*
- * Wait for the read operation to complete, successfully or otherwise.
- */
-ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
- ssize_t ret;
-
- for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- subreq = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
- __set_current_state(TASK_RUNNING);
- netfs_read_collection(rreq);
- continue;
- }
-
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
- break;
-
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
-
- ret = rreq->error;
- if (ret == 0) {
- ret = rreq->transferred;
- switch (rreq->origin) {
- case NETFS_DIO_READ:
- case NETFS_READ_SINGLE:
- ret = rreq->transferred;
- break;
- default:
- if (rreq->submitted < rreq->len) {
- trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
- ret = -EIO;
- }
- break;
- }
- }
-
- return ret;
-}
-
-/*
- * Wait for a paused read operation to unpause or complete in some manner.
- */
-void netfs_wait_for_pause(struct netfs_io_request *rreq)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
-
- for (;;) {
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
- subreq = list_first_entry_or_null(&stream->subrequests,
- struct netfs_io_subrequest, rreq_link);
- if (subreq &&
- (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
- test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
- __set_current_state(TASK_RUNNING);
- netfs_read_collection(rreq);
- continue;
- }
- }
-
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
- !test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
- break;
-
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
-}
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index cf7727060215..5bbe906a551d 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -116,7 +116,7 @@ static struct netfs_io_request *netfs_pgpriv2_begin_copy_to_cache(
return creq;
cancel_put:
- netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(creq, netfs_rreq_trace_put_return);
cancel:
rreq->copy_to_cache = ERR_PTR(-ENOBUFS);
clear_bit(NETFS_RREQ_FOLIO_COPY_TO_CACHE, &rreq->flags);
@@ -155,7 +155,7 @@ void netfs_pgpriv2_end_copy_to_cache(struct netfs_io_request *rreq)
smp_wmb(); /* Write lists before ALL_QUEUED. */
set_bit(NETFS_RREQ_ALL_QUEUED, &creq->flags);
- netfs_put_request(creq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(creq, netfs_rreq_trace_put_return);
creq->copy_to_cache = NULL;
}
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 0f294b26e08c..b99e84a8170a 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -173,7 +173,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
}
@@ -257,35 +257,15 @@ abandon:
*/
void netfs_retry_reads(struct netfs_io_request *rreq)
{
- struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream = &rreq->io_streams[0];
- DEFINE_WAIT(myself);
netfs_stat(&netfs_n_rh_retry_read_req);
- set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
-
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
- continue;
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
- for (;;) {
- prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
-
- if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
- break;
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
- schedule();
- trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
- }
-
- finish_wait(&rreq->waitq, &myself);
- }
+ set_bit(NETFS_RREQ_RETRYING, &rreq->flags);
+ netfs_wait_for_in_progress_stream(rreq, stream);
clear_bit(NETFS_RREQ_RETRYING, &rreq->flags);
trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
index fea0ecdecc53..fa622a6cd56d 100644
--- a/fs/netfs/read_single.c
+++ b/fs/netfs/read_single.c
@@ -142,7 +142,7 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags);
return ret;
cancel:
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_cancel);
return ret;
}
@@ -185,11 +185,11 @@ ssize_t netfs_read_single(struct inode *inode, struct file *file, struct iov_ite
netfs_single_dispatch_read(rreq);
ret = netfs_wait_for_read(rreq);
- netfs_put_request(rreq, true, netfs_rreq_trace_put_return);
+ netfs_put_request(rreq, netfs_rreq_trace_put_return);
return ret;
cleanup_free:
- netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(rreq, netfs_rreq_trace_put_failed);
return ret;
}
EXPORT_SYMBOL(netfs_read_single);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 3fca59e6475d..0ce7b53e7fe8 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -280,7 +280,7 @@ reassess_streams:
struct netfs_io_subrequest, rreq_link);
stream->front = front;
spin_unlock(&wreq->lock);
- netfs_put_subrequest(remove, false,
+ netfs_put_subrequest(remove,
notes & SAW_FAILURE ?
netfs_sreq_trace_put_cancel :
netfs_sreq_trace_put_done);
@@ -321,18 +321,14 @@ reassess_streams:
if (notes & NEED_RETRY)
goto need_retry;
- if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
- clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
- smp_mb__after_atomic(); /* Set PAUSE before task state */
- wake_up(&wreq->waitq);
- }
- if (notes & NEED_REASSESS) {
+ if (notes & MADE_PROGRESS) {
+ netfs_wake_rreq_flag(wreq, NETFS_RREQ_PAUSE, netfs_rreq_trace_unpause);
//cond_resched();
goto reassess_streams;
}
- if (notes & MADE_PROGRESS) {
+
+ if (notes & NEED_REASSESS) {
//cond_resched();
goto reassess_streams;
}
@@ -356,30 +352,21 @@ need_retry:
/*
* Perform the collection of subrequests, folios and encryption buffers.
*/
-void netfs_write_collection_worker(struct work_struct *work)
+bool netfs_write_collection(struct netfs_io_request *wreq)
{
- struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
struct netfs_inode *ictx = netfs_inode(wreq->inode);
size_t transferred;
int s;
_enter("R=%x", wreq->debug_id);
- netfs_see_request(wreq, netfs_rreq_trace_see_work);
- if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
-
netfs_collect_write_results(wreq);
/* We're done when the app thread has finished posting subreqs and all
* the queues in all the streams are empty.
*/
- if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
+ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags))
+ return false;
smp_rmb(); /* Read ALL_QUEUED before lists. */
transferred = LONG_MAX;
@@ -387,10 +374,8 @@ void netfs_write_collection_worker(struct work_struct *work)
struct netfs_io_stream *stream = &wreq->io_streams[s];
if (!stream->active)
continue;
- if (!list_empty(&stream->subrequests)) {
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
- return;
- }
+ if (!list_empty(&stream->subrequests))
+ return false;
if (stream->transferred < transferred)
transferred = stream->transferred;
}
@@ -428,8 +413,8 @@ void netfs_write_collection_worker(struct work_struct *work)
inode_dio_end(wreq->inode);
_debug("finished");
- trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
- clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
+ netfs_wake_rreq_flag(wreq, NETFS_RREQ_IN_PROGRESS, netfs_rreq_trace_wake_ip);
+ /* As we cleared NETFS_RREQ_IN_PROGRESS, we acquired its ref. */
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
@@ -440,19 +425,21 @@ void netfs_write_collection_worker(struct work_struct *work)
wreq->iocb = VFS_PTR_POISON;
}
- netfs_clear_subrequests(wreq, false);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete);
+ netfs_clear_subrequests(wreq);
+ return true;
}
-/*
- * Wake the collection work item.
- */
-void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
+void netfs_write_collection_worker(struct work_struct *work)
{
- if (!work_pending(&wreq->work)) {
- netfs_get_request(wreq, netfs_rreq_trace_get_work);
- if (!queue_work(system_unbound_wq, &wreq->work))
- netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq);
+ struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
+
+ netfs_see_request(rreq, netfs_rreq_trace_see_work);
+ if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
+ if (netfs_write_collection(rreq))
+ /* Drop the ref from the IN_PROGRESS flag. */
+ netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);
+ else
+ netfs_see_request(rreq, netfs_rreq_trace_see_work_complete);
}
}
@@ -460,7 +447,6 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
* netfs_write_subrequest_terminated - Note the termination of a write operation.
* @_op: The I/O request that has terminated.
* @transferred_or_error: The amount of data transferred or an error code.
- * @was_async: The termination was asynchronous
*
* This tells the library that a contributory write I/O operation has
* terminated, one way or another, and that it should collect the results.
@@ -470,21 +456,16 @@ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
* negative error code. The library will look after reissuing I/O operations
* as appropriate and writing downloaded data to the cache.
*
- * If @was_async is true, the caller might be running in softirq or interrupt
- * context and we can't sleep.
- *
* When this is called, ownership of the subrequest is transferred back to the
* library, along with a ref.
*
* Note that %_op is a void* so that the function can be passed to
* kiocb::term_func without the need for a casting wrapper.
*/
-void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
- bool was_async)
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error)
{
struct netfs_io_subrequest *subreq = _op;
struct netfs_io_request *wreq = subreq->rreq;
- struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
@@ -536,15 +517,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
}
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- clear_and_wake_up_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
-
- /* If we are at the head of the queue, wake up the collector,
- * transferring a ref to it if we were the ones to do so.
- */
- if (list_is_first(&subreq->rreq_link, &stream->subrequests))
- netfs_wake_write_collector(wreq, was_async);
-
- netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+ netfs_subreq_clear_in_progress(subreq);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_terminated);
}
EXPORT_SYMBOL(netfs_write_subrequest_terminated);
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 77279fc5b5a7..50bee2c4130d 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -134,7 +134,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
return wreq;
nomem:
wreq->error = -ENOMEM;
- netfs_put_request(wreq, false, netfs_rreq_trace_put_failed);
+ netfs_put_request(wreq, netfs_rreq_trace_put_failed);
return ERR_PTR(-ENOMEM);
}
@@ -233,7 +233,7 @@ static void netfs_do_issue_write(struct netfs_io_stream *stream,
_enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
- return netfs_write_subrequest_terminated(subreq, subreq->error, false);
+ return netfs_write_subrequest_terminated(subreq, subreq->error);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
stream->issue_write(subreq);
@@ -542,7 +542,7 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq)
}
if (needs_poke)
- netfs_wake_write_collector(wreq, false);
+ netfs_wake_collector(wreq);
}
/*
@@ -576,6 +576,7 @@ int netfs_writepages(struct address_space *mapping,
goto couldnt_start;
}
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
trace_netfs_write(wreq, netfs_write_trace_writeback);
netfs_stat(&netfs_n_wh_writepages);
@@ -599,8 +600,9 @@ int netfs_writepages(struct address_space *mapping,
netfs_end_issue_write(wreq);
mutex_unlock(&ictx->wb_lock);
+ netfs_wake_collector(wreq);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
_leave(" = %d", error);
return error;
@@ -673,11 +675,11 @@ int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_c
/*
* End a write operation used when writing through the pagecache.
*/
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
- struct folio *writethrough_cache)
+ssize_t netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache)
{
struct netfs_inode *ictx = netfs_inode(wreq->inode);
- int ret;
+ ssize_t ret;
_enter("R=%x", wreq->debug_id);
@@ -688,13 +690,11 @@ int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_contr
mutex_unlock(&ictx->wb_lock);
- if (wreq->iocb) {
+ if (wreq->iocb)
ret = -EIOCBQUEUED;
- } else {
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE);
- ret = wreq->error;
- }
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ else
+ ret = netfs_wait_for_write(wreq);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
return ret;
}
@@ -722,10 +722,8 @@ int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t
start += part;
len -= part;
rolling_buffer_advance(&wreq->buffer, part);
- if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
- trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
- wait_event(wreq->waitq, !test_bit(NETFS_RREQ_PAUSE, &wreq->flags));
- }
+ if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags))
+ netfs_wait_for_paused_write(wreq);
if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
break;
}
@@ -885,7 +883,8 @@ int netfs_writeback_single(struct address_space *mapping,
goto couldnt_start;
}
- trace_netfs_write(wreq, netfs_write_trace_writeback);
+ __set_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &wreq->flags);
+ trace_netfs_write(wreq, netfs_write_trace_writeback_single);
netfs_stat(&netfs_n_wh_writepages);
if (__test_and_set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
@@ -914,8 +913,9 @@ stop:
set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
mutex_unlock(&ictx->wb_lock);
+ netfs_wake_collector(wreq);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ netfs_put_request(wreq, netfs_rreq_trace_put_return);
_leave(" = %d", ret);
return ret;
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index 545d33079a77..9d1d8a8bab72 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -39,9 +39,10 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
- struct iov_iter source = subreq->io_iter;
+ struct iov_iter source;
- iov_iter_revert(&source, subreq->len - source.count);
+ netfs_reset_iter(subreq);
+ source = subreq->io_iter;
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_write(stream, subreq, &source);
}
@@ -131,7 +132,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
&stream->subrequests, rreq_link) {
trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
list_del(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ netfs_put_subrequest(subreq, netfs_sreq_trace_put_done);
if (subreq == to)
break;
}
@@ -199,7 +200,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
*/
void netfs_retry_writes(struct netfs_io_request *wreq)
{
- struct netfs_io_subrequest *subreq;
struct netfs_io_stream *stream;
int s;
@@ -208,16 +208,13 @@ void netfs_retry_writes(struct netfs_io_request *wreq)
/* Wait for all outstanding I/O to quiesce before performing retries as
* we may need to renegotiate the I/O sizes.
*/
+ set_bit(NETFS_RREQ_RETRYING, &wreq->flags);
for (s = 0; s < NR_IO_STREAMS; s++) {
stream = &wreq->io_streams[s];
- if (!stream->active)
- continue;
-
- list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
- wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- }
+ if (stream->active)
+ netfs_wait_for_in_progress_stream(wreq, stream);
}
+ clear_bit(NETFS_RREQ_RETRYING, &wreq->flags);
// TODO: Enc: Fetch changed partial pages
// TODO: Enc: Reencrypt content if needed.
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index e278a1ad1ca3..8b0785178731 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
sreq = netfs->sreq;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_UNBUFFERED_READ &&
sreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 4ec952f9f47d..e6d36b3d3fc0 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -207,14 +207,16 @@ void nfs_local_probe_async(struct nfs_client *clp)
}
EXPORT_SYMBOL_GPL(nfs_local_probe_async);
-static inline struct nfsd_file *nfs_local_file_get(struct nfsd_file *nf)
+static inline void nfs_local_file_put(struct nfsd_file *localio)
{
- return nfs_to->nfsd_file_get(nf);
-}
+ /* nfs_to_nfsd_file_put_local() expects an __rcu pointer
+ * but we have a __kernel pointer. It is always safe
+ * to cast a __kernel pointer to an __rcu pointer
+ * because the cast only weakens what is known about the pointer.
+ */
+ struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
-static inline void nfs_local_file_put(struct nfsd_file *nf)
-{
- nfs_to->nfsd_file_put(nf);
+ nfs_to_nfsd_file_put_local(&nf);
}
/*
@@ -226,12 +228,13 @@ static inline void nfs_local_file_put(struct nfsd_file *nf)
static struct nfsd_file *
__nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
struct nfs_fh *fh, struct nfs_file_localio *nfl,
+ struct nfsd_file __rcu **pnf,
const fmode_t mode)
{
struct nfsd_file *localio;
localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
- cred, fh, nfl, mode);
+ cred, fh, nfl, pnf, mode);
if (IS_ERR(localio)) {
int status = PTR_ERR(localio);
trace_nfs_local_open_fh(fh, mode, status);
@@ -258,7 +261,7 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
struct nfs_fh *fh, struct nfs_file_localio *nfl,
const fmode_t mode)
{
- struct nfsd_file *nf, *new, __rcu **pnf;
+ struct nfsd_file *nf, __rcu **pnf;
if (!nfs_server_is_local(clp))
return NULL;
@@ -270,29 +273,9 @@ nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
else
pnf = &nfl->ro_file;
- new = NULL;
- rcu_read_lock();
- nf = rcu_dereference(*pnf);
- if (!nf) {
- rcu_read_unlock();
- new = __nfs_local_open_fh(clp, cred, fh, nfl, mode);
- if (IS_ERR(new))
- return NULL;
- rcu_read_lock();
- /* try to swap in the pointer */
- spin_lock(&clp->cl_uuid.lock);
- nf = rcu_dereference_protected(*pnf, 1);
- if (!nf) {
- nf = new;
- new = NULL;
- rcu_assign_pointer(*pnf, nf);
- }
- spin_unlock(&clp->cl_uuid.lock);
- }
- nf = nfs_local_file_get(nf);
- rcu_read_unlock();
- if (new)
- nfs_to_nfsd_file_put_local(new);
+ nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
+ if (IS_ERR(nf))
+ return NULL;
return nf;
}
EXPORT_SYMBOL_GPL(nfs_local_open_fh);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b1d2122bd5a7..4b123bca65e1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5164,13 +5164,15 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
}
static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry,
- struct nfs4_createdata *data)
+ struct nfs4_createdata *data, int *statusp)
{
- int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
+ struct dentry *ret;
+
+ *statusp = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
&data->arg.seq_args, &data->res.seq_res, 1);
- if (status)
- return ERR_PTR(status);
+ if (*statusp)
+ return NULL;
spin_lock(&dir->i_lock);
/* Creating a directory bumps nlink in the parent */
@@ -5179,7 +5181,11 @@ static struct dentry *nfs4_do_mkdir(struct inode *dir, struct dentry *dentry,
data->res.fattr->time_start,
NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
- return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+ ret = nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
+ if (!IS_ERR(ret))
+ return ret;
+ *statusp = PTR_ERR(ret);
+ return NULL;
}
static void nfs4_free_createdata(struct nfs4_createdata *data)
@@ -5240,17 +5246,18 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
static struct dentry *_nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
struct iattr *sattr,
- struct nfs4_label *label)
+ struct nfs4_label *label, int *statusp)
{
struct nfs4_createdata *data;
- struct dentry *ret = ERR_PTR(-ENOMEM);
+ struct dentry *ret = NULL;
+ *statusp = -ENOMEM;
data = nfs4_alloc_createdata(dir, &dentry->d_name, sattr, NF4DIR);
if (data == NULL)
goto out;
data->arg.label = label;
- ret = nfs4_do_mkdir(dir, dentry, data);
+ ret = nfs4_do_mkdir(dir, dentry, data, statusp);
nfs4_free_createdata(data);
out:
@@ -5273,11 +5280,12 @@ static struct dentry *nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
sattr->ia_mode &= ~current_umask();
do {
- alias = _nfs4_proc_mkdir(dir, dentry, sattr, label);
- err = PTR_ERR_OR_ZERO(alias);
+ alias = _nfs4_proc_mkdir(dir, dentry, sattr, label, &err);
trace_nfs4_mkdir(dir, &dentry->d_name, err);
- err = nfs4_handle_exception(NFS_SERVER(dir), err,
- &exception);
+ if (err)
+ alias = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+ err,
+ &exception));
} while (exception.retry);
nfs4_label_release_security(label);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9eea9e62afc9..91b5503b6f74 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1052,6 +1052,16 @@ int nfs_reconfigure(struct fs_context *fc)
sync_filesystem(sb);
/*
+ * The SB_RDONLY flag has been removed from the superblock during
+ * mounts to prevent interference between different filesystems.
+ * Similarly, it is also necessary to ignore the SB_RDONLY flag
+ * during reconfiguration; otherwise, it may also result in the
+ * creation of redundant superblocks when mounting a directory with
+ * different rw and ro flags multiple times.
+ */
+ fc->sb_flags_mask &= ~SB_RDONLY;
+
+ /*
* Userspace mount programs that send binary options generally send
* them populated with default values. We have no way to know which
* ones were explicitly specified. Fall back to legacy behavior and
@@ -1308,8 +1318,17 @@ int nfs_get_tree_common(struct fs_context *fc)
if (IS_ERR(server))
return PTR_ERR(server);
+ /*
+ * When NFS_MOUNT_UNSHARED is not set, NFS forces the sharing of a
+ * superblock among each filesystem that mounts sub-directories
+ * belonging to a single exported root path.
+ * To prevent interference between different filesystems, the
+ * SB_RDONLY flag should be removed from the superblock.
+ */
if (server->flags & NFS_MOUNT_UNSHARED)
compare_super = NULL;
+ else
+ fc->sb_flags &= ~SB_RDONLY;
/* -o noac implies -o sync */
if (server->flags & NFS_MOUNT_NOAC)
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 6a0bdea6d644..05c7c16e37ab 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -151,8 +151,7 @@ EXPORT_SYMBOL_GPL(nfs_localio_enable_client);
*/
static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
{
- LIST_HEAD(local_files);
- struct nfs_file_localio *nfl, *tmp;
+ struct nfs_file_localio *nfl;
spin_lock(&nfs_uuid->lock);
if (unlikely(!rcu_access_pointer(nfs_uuid->net))) {
@@ -166,17 +165,42 @@ static bool nfs_uuid_put(nfs_uuid_t *nfs_uuid)
nfs_uuid->dom = NULL;
}
- list_splice_init(&nfs_uuid->files, &local_files);
- spin_unlock(&nfs_uuid->lock);
-
/* Walk list of files and ensure their last references dropped */
- list_for_each_entry_safe(nfl, tmp, &local_files, list) {
- nfs_close_local_fh(nfl);
+
+ while ((nfl = list_first_entry_or_null(&nfs_uuid->files,
+ struct nfs_file_localio,
+ list)) != NULL) {
+ /* If nfs_uuid is already NULL, nfs_close_local_fh is
+ * closing and we must wait, else we unlink and close.
+ */
+ if (rcu_access_pointer(nfl->nfs_uuid) == NULL) {
+ /* nfs_close_local_fh() is doing the
+ * close and we must wait. until it unlinks
+ */
+ wait_var_event_spinlock(nfl,
+ list_first_entry_or_null(
+ &nfs_uuid->files,
+ struct nfs_file_localio,
+ list) != nfl,
+ &nfs_uuid->lock);
+ continue;
+ }
+
+ /* Remove nfl from nfs_uuid->files list */
+ list_del_init(&nfl->list);
+ spin_unlock(&nfs_uuid->lock);
+
+ nfs_to_nfsd_file_put_local(&nfl->ro_file);
+ nfs_to_nfsd_file_put_local(&nfl->rw_file);
cond_resched();
- }
- spin_lock(&nfs_uuid->lock);
- BUG_ON(!list_empty(&nfs_uuid->files));
+ spin_lock(&nfs_uuid->lock);
+ /* Now we can allow racing nfs_close_local_fh() to
+ * skip the locking.
+ */
+ RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
+ wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
+ }
/* Remove client from nn->local_clients */
if (nfs_uuid->list_lock) {
@@ -237,6 +261,7 @@ static void nfs_uuid_add_file(nfs_uuid_t *nfs_uuid, struct nfs_file_localio *nfl
struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
struct rpc_clnt *rpc_clnt, const struct cred *cred,
const struct nfs_fh *nfs_fh, struct nfs_file_localio *nfl,
+ struct nfsd_file __rcu **pnf,
const fmode_t fmode)
{
struct net *net;
@@ -261,10 +286,9 @@ struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *uuid,
rcu_read_unlock();
/* We have an implied reference to net thanks to nfsd_net_try_get */
localio = nfs_to->nfsd_open_local_fh(net, uuid->dom, rpc_clnt,
- cred, nfs_fh, fmode);
- if (IS_ERR(localio))
- nfs_to_nfsd_net_put(net);
- else
+ cred, nfs_fh, pnf, fmode);
+ nfs_to_nfsd_net_put(net);
+ if (!IS_ERR(localio))
nfs_uuid_add_file(uuid, nfl);
return localio;
@@ -273,8 +297,6 @@ EXPORT_SYMBOL_GPL(nfs_open_local_fh);
void nfs_close_local_fh(struct nfs_file_localio *nfl)
{
- struct nfsd_file *ro_nf = NULL;
- struct nfsd_file *rw_nf = NULL;
nfs_uuid_t *nfs_uuid;
rcu_read_lock();
@@ -285,28 +307,39 @@ void nfs_close_local_fh(struct nfs_file_localio *nfl)
return;
}
- ro_nf = rcu_access_pointer(nfl->ro_file);
- rw_nf = rcu_access_pointer(nfl->rw_file);
- if (ro_nf || rw_nf) {
- spin_lock(&nfs_uuid->lock);
- if (ro_nf)
- ro_nf = rcu_dereference_protected(xchg(&nfl->ro_file, NULL), 1);
- if (rw_nf)
- rw_nf = rcu_dereference_protected(xchg(&nfl->rw_file, NULL), 1);
-
- /* Remove nfl from nfs_uuid->files list */
- RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
- list_del_init(&nfl->list);
+ spin_lock(&nfs_uuid->lock);
+ if (!rcu_access_pointer(nfl->nfs_uuid)) {
+ /* nfs_uuid_put has finished here */
spin_unlock(&nfs_uuid->lock);
rcu_read_unlock();
-
- if (ro_nf)
- nfs_to_nfsd_file_put_local(ro_nf);
- if (rw_nf)
- nfs_to_nfsd_file_put_local(rw_nf);
return;
}
+ if (list_empty(&nfs_uuid->files)) {
+ /* nfs_uuid_put() has started closing files, wait for it
+ * to finished
+ */
+ spin_unlock(&nfs_uuid->lock);
+ rcu_read_unlock();
+ wait_var_event(&nfl->nfs_uuid,
+ rcu_access_pointer(nfl->nfs_uuid) == NULL);
+ return;
+ }
+ /* tell nfs_uuid_put() to wait for us */
+ RCU_INIT_POINTER(nfl->nfs_uuid, NULL);
+ spin_unlock(&nfs_uuid->lock);
rcu_read_unlock();
+
+ nfs_to_nfsd_file_put_local(&nfl->ro_file);
+ nfs_to_nfsd_file_put_local(&nfl->rw_file);
+
+ /* Remove nfl from nfs_uuid->files list and signal nfs_uuid_put()
+ * that we are done. The moment we drop the spinlock the
+ * nfs_uuid could be freed.
+ */
+ spin_lock(&nfs_uuid->lock);
+ list_del_init(&nfl->list);
+ wake_up_var_locked(&nfl->nfs_uuid, &nfs_uuid->lock);
+ spin_unlock(&nfs_uuid->lock);
}
EXPORT_SYMBOL_GPL(nfs_close_local_fh);
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index ab85e6a2454f..e108b6c705b4 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -378,15 +378,41 @@ nfsd_file_put(struct nfsd_file *nf)
* the reference of the nfsd_file.
*/
struct net *
-nfsd_file_put_local(struct nfsd_file *nf)
+nfsd_file_put_local(struct nfsd_file __rcu **pnf)
{
- struct net *net = nf->nf_net;
+ struct nfsd_file *nf;
+ struct net *net = NULL;
- nfsd_file_put(nf);
+ nf = unrcu_pointer(xchg(pnf, NULL));
+ if (nf) {
+ net = nf->nf_net;
+ nfsd_file_put(nf);
+ }
return net;
}
/**
+ * nfsd_file_get_local - get nfsd_file reference and reference to net
+ * @nf: nfsd_file of which to put the reference
+ *
+ * Get reference to both the nfsd_file and nf->nf_net.
+ */
+struct nfsd_file *
+nfsd_file_get_local(struct nfsd_file *nf)
+{
+ struct net *net = nf->nf_net;
+
+ if (nfsd_net_try_get(net)) {
+ nf = nfsd_file_get(nf);
+ if (!nf)
+ nfsd_net_put(net);
+ } else {
+ nf = NULL;
+ }
+ return nf;
+}
+
+/**
* nfsd_file_file - get the backing file of an nfsd_file
* @nf: nfsd_file of which to access the backing file.
*
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 5865f9c72712..722b26c71e45 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -62,7 +62,8 @@ void nfsd_file_cache_shutdown(void);
int nfsd_file_cache_start_net(struct net *net);
void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
-struct net *nfsd_file_put_local(struct nfsd_file *nf);
+struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf);
+struct nfsd_file *nfsd_file_get_local(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
struct file *nfsd_file_file(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
diff --git a/fs/nfsd/localio.c b/fs/nfsd/localio.c
index 238647fa379e..80d9ff6608a7 100644
--- a/fs/nfsd/localio.c
+++ b/fs/nfsd/localio.c
@@ -24,21 +24,6 @@
#include "filecache.h"
#include "cache.h"
-static const struct nfsd_localio_operations nfsd_localio_ops = {
- .nfsd_net_try_get = nfsd_net_try_get,
- .nfsd_net_put = nfsd_net_put,
- .nfsd_open_local_fh = nfsd_open_local_fh,
- .nfsd_file_put_local = nfsd_file_put_local,
- .nfsd_file_get = nfsd_file_get,
- .nfsd_file_put = nfsd_file_put,
- .nfsd_file_file = nfsd_file_file,
-};
-
-void nfsd_localio_ops_init(void)
-{
- nfs_to = &nfsd_localio_ops;
-}
-
/**
* nfsd_open_local_fh - lookup a local filehandle @nfs_fh and map to nfsd_file
*
@@ -47,6 +32,7 @@ void nfsd_localio_ops_init(void)
* @rpc_clnt: rpc_clnt that the client established
* @cred: cred that the client established
* @nfs_fh: filehandle to lookup
+ * @nfp: place to find the nfsd_file, or store it if it was non-NULL
* @fmode: fmode_t to use for open
*
* This function maps a local fh to a path on a local filesystem.
@@ -57,10 +43,11 @@ void nfsd_localio_ops_init(void)
* set. Caller (NFS client) is responsible for calling nfsd_net_put and
* nfsd_file_put (via nfs_to_nfsd_file_put_local).
*/
-struct nfsd_file *
+static struct nfsd_file *
nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
struct rpc_clnt *rpc_clnt, const struct cred *cred,
- const struct nfs_fh *nfs_fh, const fmode_t fmode)
+ const struct nfs_fh *nfs_fh, struct nfsd_file __rcu **pnf,
+ const fmode_t fmode)
{
int mayflags = NFSD_MAY_LOCALIO;
struct svc_cred rq_cred;
@@ -71,6 +58,15 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
if (nfs_fh->size > NFS4_FHSIZE)
return ERR_PTR(-EINVAL);
+ if (!nfsd_net_try_get(net))
+ return ERR_PTR(-ENXIO);
+
+ rcu_read_lock();
+ localio = nfsd_file_get(rcu_dereference(*pnf));
+ rcu_read_unlock();
+ if (localio)
+ return localio;
+
/* nfs_fh -> svc_fh */
fh_init(&fh, NFS4_FHSIZE);
fh.fh_handle.fh_size = nfs_fh->size;
@@ -92,9 +88,47 @@ nfsd_open_local_fh(struct net *net, struct auth_domain *dom,
if (rq_cred.cr_group_info)
put_group_info(rq_cred.cr_group_info);
+ if (!IS_ERR(localio)) {
+ struct nfsd_file *new;
+ if (!nfsd_net_try_get(net)) {
+ nfsd_file_put(localio);
+ nfsd_net_put(net);
+ return ERR_PTR(-ENXIO);
+ }
+ nfsd_file_get(localio);
+ again:
+ new = unrcu_pointer(cmpxchg(pnf, NULL, RCU_INITIALIZER(localio)));
+ if (new) {
+ /* Some other thread installed an nfsd_file */
+ if (nfsd_file_get(new) == NULL)
+ goto again;
+ /*
+ * Drop the ref we were going to install and the
+ * one we were going to return.
+ */
+ nfsd_file_put(localio);
+ nfsd_file_put(localio);
+ localio = new;
+ }
+ } else
+ nfsd_net_put(net);
+
return localio;
}
-EXPORT_SYMBOL_GPL(nfsd_open_local_fh);
+
+static const struct nfsd_localio_operations nfsd_localio_ops = {
+ .nfsd_net_try_get = nfsd_net_try_get,
+ .nfsd_net_put = nfsd_net_put,
+ .nfsd_open_local_fh = nfsd_open_local_fh,
+ .nfsd_file_put_local = nfsd_file_put_local,
+ .nfsd_file_get_local = nfsd_file_get_local,
+ .nfsd_file_file = nfsd_file_file,
+};
+
+void nfsd_localio_ops_init(void)
+{
+ nfs_to = &nfsd_localio_ops;
+}
/*
* UUID_IS_LOCAL XDR functions
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 0d8f7fb15c2e..dd0c8e560ef6 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -2102,11 +2102,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
if (ret < 0) {
- if (unlikely(ret == -ENOENT))
+ if (unlikely(ret == -ENOENT)) {
nilfs_crit(btree->b_inode->i_sb,
"writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
btree->b_inode->i_ino,
(unsigned long long)key, level);
+ ret = -EINVAL;
+ }
goto out;
}
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 893ab36824cc..2d8dc6b35b54 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap,
dat = nilfs_bmap_get_dat(bmap);
key = nilfs_bmap_data_get_key(bmap, bh);
ptr = nilfs_direct_get_ptr(bmap, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
+ return -EINVAL;
+
if (!buffer_nilfs_volatile(bh)) {
oldreq.pr_entry_nr = ptr;
newreq.pr_entry_nr = ptr;
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index 78d20e4baa2c..1bf2a6593dec 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -2182,6 +2182,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx,
e = hdr_first_de(&n->index->ihdr);
fnd_push(fnd, n, e);
+ if (!e) {
+ err = -EINVAL;
+ goto out;
+ }
if (!de_is_last(e)) {
/*
@@ -2203,6 +2207,10 @@ static int indx_get_entry_to_replace(struct ntfs_index *indx,
n = fnd->nodes[level];
te = hdr_first_de(&n->index->ihdr);
+ if (!te) {
+ err = -EINVAL;
+ goto out;
+ }
/* Copy the candidate entry into the replacement entry buffer. */
re = kmalloc(le16_to_cpu(te->size) + sizeof(u64), GFP_NOFS);
if (!re) {
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 3e2957a1e360..0f0d27d4644a 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -805,6 +805,10 @@ static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
ret = 0;
goto out;
}
+ if (is_compressed(ni)) {
+ ret = 0;
+ goto out;
+ }
ret = blockdev_direct_IO(iocb, inode, iter,
wr ? ntfs_get_block_direct_IO_W :
@@ -2068,5 +2072,6 @@ const struct address_space_operations ntfs_aops_cmpr = {
.read_folio = ntfs_read_folio,
.readahead = ntfs_readahead,
.dirty_folio = block_dirty_folio,
+ .direct_IO = ntfs_direct_IO,
};
// clang-format on
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index e272429da3db..de7f12858729 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -674,7 +674,7 @@ out_put:
break;
}
out:
- kfree(rec);
+ ocfs2_free_quota_recovery(rec);
return status;
}
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 50e69a9e104a..87a53d2ae4bb 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -336,7 +336,7 @@ static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg)
kinfo.pid = task_pid_vnr(task);
kinfo.mask |= PIDFD_INFO_PID;
- if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
+ if (kinfo.pid == 0 || kinfo.tgid == 0)
return -ESRCH;
copy_out:
diff --git a/fs/pnode.c b/fs/pnode.c
index fb77427df39e..ffd429b760d5 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -231,8 +231,8 @@ static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
/* skip if mountpoint isn't visible in m */
if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
return 0;
- /* skip if m is in the anon_ns we are emptying */
- if (m->mnt_ns->mntns_flags & MNTNS_PROPAGATING)
+ /* skip if m is in the anon_ns */
+ if (is_anon_ns(m->mnt_ns))
return 0;
if (peers(m, last_dest)) {
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index ecf774a8f1ca..66093fa78aed 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -151,8 +151,7 @@ extern bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 eof,
bool from_readdir);
extern void cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
unsigned int bytes_written);
-void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result,
- bool was_async);
+void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, int);
extern int cifs_get_writable_file(struct cifsInodeInfo *cifs_inode,
int flags,
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index f55457b4b82e..a3ba3346ed31 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1725,7 +1725,7 @@ cifs_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
- cifs_write_subrequest_terminated(wdata, result, true);
+ cifs_write_subrequest_terminated(wdata, result);
release_mid(mid);
trace_smb3_rw_credits(credits.rreq_debug_id, credits.rreq_debug_index, 0,
server->credits, server->in_flight,
@@ -1813,7 +1813,7 @@ async_writev_out:
out:
if (rc) {
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, false);
+ cifs_write_subrequest_terminated(wdata, rc);
}
}
@@ -2753,10 +2753,10 @@ int cifs_query_reparse_point(const unsigned int xid,
io_req->TotalParameterCount = 0;
io_req->TotalDataCount = 0;
- io_req->MaxParameterCount = cpu_to_le32(2);
+ io_req->MaxParameterCount = cpu_to_le32(0);
/* BB find exact data count max from sess structure BB */
io_req->MaxDataCount = cpu_to_le32(CIFSMaxBufSize & 0xFFFFFF00);
- io_req->MaxSetupCount = 4;
+ io_req->MaxSetupCount = 1;
io_req->Reserved = 0;
io_req->ParameterOffset = 0;
io_req->DataCount = 0;
@@ -2783,6 +2783,22 @@ int cifs_query_reparse_point(const unsigned int xid,
goto error;
}
+ /* SetupCount must be 1, otherwise offset to ByteCount is incorrect. */
+ if (io_rsp->SetupCount != 1) {
+ rc = -EIO;
+ goto error;
+ }
+
+ /*
+ * ReturnedDataLen is output length of executed IOCTL.
+ * DataCount is output length transferred over network.
+ * Check that we have full FSCTL_GET_REPARSE_POINT buffer.
+ */
+ if (data_count != le16_to_cpu(io_rsp->ReturnedDataLen)) {
+ rc = -EIO;
+ goto error;
+ }
+
end = 2 + get_bcc(&io_rsp->hdr) + (__u8 *)&io_rsp->ByteCount;
start = (__u8 *)&io_rsp->hdr.Protocol + data_offset;
if (start >= end) {
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 950aa4f912f5..9835672267d2 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -130,7 +130,7 @@ fail:
else
trace_netfs_sreq(subreq, netfs_sreq_trace_fail);
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, false);
+ cifs_write_subrequest_terminated(wdata, rc);
goto out;
}
@@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
goto failed;
}
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
@@ -998,15 +999,18 @@ int cifs_open(struct inode *inode, struct file *file)
rc = cifs_get_readable_path(tcon, full_path, &cfile);
}
if (rc == 0) {
- if (file->f_flags == cfile->f_flags) {
+ unsigned int oflags = file->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
+ unsigned int cflags = cfile->f_flags & ~(O_CREAT|O_EXCL|O_TRUNC);
+
+ if (cifs_convert_flags(oflags, 0) == cifs_convert_flags(cflags, 0) &&
+ (oflags & (O_SYNC|O_DIRECT)) == (cflags & (O_SYNC|O_DIRECT))) {
file->private_data = cfile;
spin_lock(&CIFS_I(inode)->deferred_lock);
cifs_del_deferred_close(cfile);
spin_unlock(&CIFS_I(inode)->deferred_lock);
goto use_cache;
- } else {
- _cifsFileInfo_put(cfile, true, false);
}
+ _cifsFileInfo_put(cfile, true, false);
} else {
/* hard link on the defeered close file */
rc = cifs_get_hardlink_path(tcon, inode, file);
@@ -2423,8 +2427,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
return rc;
}
-void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result,
- bool was_async)
+void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t result)
{
struct netfs_io_request *wreq = wdata->rreq;
struct netfs_inode *ictx = netfs_inode(wreq->inode);
@@ -2441,7 +2444,7 @@ void cifs_write_subrequest_terminated(struct cifs_io_subrequest *wdata, ssize_t
netfs_resize_file(ictx, wrend, true);
}
- netfs_write_subrequest_terminated(&wdata->subreq, result, was_async);
+ netfs_write_subrequest_terminated(&wdata->subreq, result);
}
struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 4e28632b5fd6..399185ca7cac 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4888,7 +4888,7 @@ smb2_writev_callback(struct mid_q_entry *mid)
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
- cifs_write_subrequest_terminated(wdata, result ?: written, true);
+ cifs_write_subrequest_terminated(wdata, result ?: written);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
server->credits, server->in_flight,
@@ -5061,7 +5061,7 @@ out:
-(int)wdata->credits.value,
cifs_trace_rw_credits_write_response_clear);
add_credits_and_wake_if(wdata->server, &wdata->credits, 0);
- cifs_write_subrequest_terminated(wdata, rc, true);
+ cifs_write_subrequest_terminated(wdata, rc);
}
}
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 67c55fe32ce8..992ea0e37257 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -202,6 +202,11 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
+ if (!msblk->devblksize) {
+ errorf(fc, "squashfs: unable to set blocksize\n");
+ return -EINVAL;
+ }
+
msblk->devblksize_log2 = ffz(~msblk->devblksize);
mutex_init(&msblk->meta_index_mutex);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 26a04a783489..63151feb9c3f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -436,6 +436,25 @@ allocate_blocks:
return 0;
}
+static bool
+xfs_ioend_needs_wq_completion(
+ struct iomap_ioend *ioend)
+{
+ /* Changing inode size requires a transaction. */
+ if (xfs_ioend_is_append(ioend))
+ return true;
+
+ /* Extent manipulation requires a transaction. */
+ if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))
+ return true;
+
+ /* Page cache invalidation cannot be done in irq context. */
+ if (ioend->io_flags & IOMAP_IOEND_DONTCACHE)
+ return true;
+
+ return false;
+}
+
static int
xfs_submit_ioend(
struct iomap_writepage_ctx *wpc,
@@ -460,8 +479,7 @@ xfs_submit_ioend(
memalloc_nofs_restore(nofs_flag);
/* send ioends that might require a transaction to the completion wq */
- if (xfs_ioend_is_append(ioend) ||
- (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)))
+ if (xfs_ioend_needs_wq_completion(ioend))
ioend->io_bio.bi_end_io = xfs_end_bio;
if (status)
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index c1a306268ae4..94d0873bcd62 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -167,6 +167,14 @@ xfs_discard_extents(
return error;
}
+/*
+ * Care must be taken setting up the trim cursor as the perags may not have been
+ * initialised when the cursor is initialised. e.g. a clean mount which hasn't
+ * read in AGFs and the first operation run on the mounted fs is a trim. This
+ * can result in perag fields that aren't initialised until
+ * xfs_trim_gather_extents() calls xfs_alloc_read_agf() to lock down the AG for
+ * the free space search.
+ */
struct xfs_trim_cur {
xfs_agblock_t start;
xfs_extlen_t count;
@@ -204,6 +212,14 @@ xfs_trim_gather_extents(
if (error)
goto out_trans_cancel;
+ /*
+ * First time through tcur->count will not have been initialised as
+ * pag->pagf_longest is not guaranteed to be valid before we read
+ * the AGF buffer above.
+ */
+ if (!tcur->count)
+ tcur->count = pag->pagf_longest;
+
if (tcur->by_bno) {
/* sub-AG discard request always starts at tcur->start */
cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag);
@@ -350,7 +366,6 @@ xfs_trim_perag_extents(
{
struct xfs_trim_cur tcur = {
.start = start,
- .count = pag->pagf_longest,
.end = end,
.minlen = minlen,
};