summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/block-group.c2
-rw-r--r--fs/btrfs/disk-io.c4
-rw-r--r--fs/btrfs/tree-log.c98
-rw-r--r--fs/btrfs/volumes.c5
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/ext4/Makefile5
-rw-r--r--fs/ext4/crypto.c9
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/ext4_extents.h12
-rw-r--r--fs/ext4/extents-test.c12
-rw-r--r--fs/ext4/extents.c80
-rw-r--r--fs/ext4/fast_commit.c17
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c75
-rw-r--r--fs/ext4/mballoc-test.c81
-rw-r--r--fs/ext4/mballoc.c132
-rw-r--r--fs/ext4/mballoc.h30
-rw-r--r--fs/ext4/page-io.c10
-rw-r--r--fs/ext4/super.c37
-rw-r--r--fs/ext4/sysfs.c10
-rw-r--r--fs/fs-writeback.c36
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/iomap/bio.c51
-rw-r--r--fs/iomap/buffered-io.c15
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/mpage.c30
-rw-r--r--fs/namei.c10
-rw-r--r--fs/netfs/buffered_read.c3
-rw-r--r--fs/netfs/direct_read.c3
-rw-r--r--fs/netfs/direct_write.c15
-rw-r--r--fs/netfs/iterator.c43
-rw-r--r--fs/netfs/read_collect.c4
-rw-r--r--fs/netfs/read_retry.c5
-rw-r--r--fs/netfs/read_single.c1
-rw-r--r--fs/netfs/write_collect.c4
-rw-r--r--fs/netfs/write_issue.c3
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/overlayfs.h21
-rw-r--r--fs/overlayfs/ovl_entry.h7
-rw-r--r--fs/overlayfs/params.c33
-rw-r--r--fs/overlayfs/super.c2
-rw-r--r--fs/overlayfs/util.c5
-rw-r--r--fs/smb/client/Makefile7
-rw-r--r--fs/smb/server/oplock.c72
-rw-r--r--fs/smb/server/smb2pdu.c73
-rw-r--r--fs/udf/inode.c33
-rw-r--r--fs/xfs/libxfs/xfs_attr.h3
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c22
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c53
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h2
-rw-r--r--fs/xfs/scrub/quota.c4
-rw-r--r--fs/xfs/scrub/trace.h12
-rw-r--r--fs/xfs/xfs_attr_inactive.c95
-rw-r--r--fs/xfs/xfs_attr_item.c51
-rw-r--r--fs/xfs/xfs_dquot_item.c9
-rw-r--r--fs/xfs/xfs_handle.c2
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode_item.c9
-rw-r--r--fs/xfs/xfs_mount.c7
-rw-r--r--fs/xfs/xfs_trace.h47
-rw-r--r--fs/xfs/xfs_trans_ail.c127
-rw-r--r--fs/xfs/xfs_verify_media.c18
-rw-r--r--fs/xfs/xfs_xattr.c2
67 files changed, 1157 insertions, 475 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index ebf5079096af..c0d17a369bda 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -4583,7 +4583,7 @@ static void check_removing_space_info(struct btrfs_space_info *space_info)
for (int i = 0; i < BTRFS_SPACE_INFO_SUB_GROUP_MAX; i++) {
if (space_info->sub_group[i]) {
check_removing_space_info(space_info->sub_group[i]);
- kfree(space_info->sub_group[i]);
+ btrfs_sysfs_remove_space_info(space_info->sub_group[i]);
space_info->sub_group[i] = NULL;
}
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 01f2dbb69832..1b0eb246b714 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2531,8 +2531,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
if (mirror_num >= 0 &&
btrfs_super_bytenr(sb) != btrfs_sb_offset(mirror_num)) {
- btrfs_err(fs_info, "super offset mismatch %llu != %u",
- btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
+ btrfs_err(fs_info, "super offset mismatch %llu != %llu",
+ btrfs_super_bytenr(sb), btrfs_sb_offset(mirror_num));
ret = -EINVAL;
}
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ab0d460c7139..ac871efb9763 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4616,21 +4616,32 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct inode *inode, bool log_inode_only,
u64 logged_isize)
{
+ u64 gen = BTRFS_I(inode)->generation;
u64 flags;
if (log_inode_only) {
- /* set the generation to zero so the recover code
- * can tell the difference between an logging
- * just to say 'this inode exists' and a logging
- * to say 'update this inode with these values'
+ /*
+ * Set the generation to zero so the recover code can tell the
+ * difference between a logging just to say 'this inode exists'
+ * and a logging to say 'update this inode with these values'.
+ * But only if the inode was not already logged before.
+ * We access ->logged_trans directly since it was already set
+ * up in the call chain by btrfs_log_inode(), and data_race()
+ * to avoid false alerts from KCSAN and since it was set already
+ * and one can set it to 0 since that only happens on eviction
+ * and we are holding a ref on the inode.
*/
- btrfs_set_inode_generation(leaf, item, 0);
+ ASSERT(data_race(BTRFS_I(inode)->logged_trans) > 0);
+ if (data_race(BTRFS_I(inode)->logged_trans) < trans->transid)
+ gen = 0;
+
btrfs_set_inode_size(leaf, item, logged_isize);
} else {
- btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation);
btrfs_set_inode_size(leaf, item, inode->i_size);
}
+ btrfs_set_inode_generation(leaf, item, gen);
+
btrfs_set_inode_uid(leaf, item, i_uid_read(inode));
btrfs_set_inode_gid(leaf, item, i_gid_read(inode));
btrfs_set_inode_mode(leaf, item, inode->i_mode);
@@ -5448,42 +5459,63 @@ process:
return 0;
}
-static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode,
- struct btrfs_path *path, u64 *size_ret)
+static int get_inode_size_to_log(struct btrfs_trans_handle *trans,
+ struct btrfs_inode *inode,
+ struct btrfs_path *path, u64 *size_ret)
{
struct btrfs_key key;
+ struct btrfs_inode_item *item;
int ret;
key.objectid = btrfs_ino(inode);
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
- ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
- if (ret < 0) {
- return ret;
- } else if (ret > 0) {
- *size_ret = 0;
- } else {
- struct btrfs_inode_item *item;
+ /*
+ * Our caller called inode_logged(), so logged_trans is up to date.
+ * Use data_race() to silence any warning from KCSAN. Once logged_trans
+ * is set, it can only be reset to 0 after inode eviction.
+ */
+ if (data_race(inode->logged_trans) == trans->transid) {
+ ret = btrfs_search_slot(NULL, inode->root->log_root, &key, path, 0, 0);
+ } else if (inode->generation < trans->transid) {
+ path->search_commit_root = true;
+ path->skip_locking = true;
+ ret = btrfs_search_slot(NULL, inode->root, &key, path, 0, 0);
+ path->search_commit_root = false;
+ path->skip_locking = false;
- item = btrfs_item_ptr(path->nodes[0], path->slots[0],
- struct btrfs_inode_item);
- *size_ret = btrfs_inode_size(path->nodes[0], item);
- /*
- * If the in-memory inode's i_size is smaller then the inode
- * size stored in the btree, return the inode's i_size, so
- * that we get a correct inode size after replaying the log
- * when before a power failure we had a shrinking truncate
- * followed by addition of a new name (rename / new hard link).
- * Otherwise return the inode size from the btree, to avoid
- * data loss when replaying a log due to previously doing a
- * write that expands the inode's size and logging a new name
- * immediately after.
- */
- if (*size_ret > inode->vfs_inode.i_size)
- *size_ret = inode->vfs_inode.i_size;
+ } else {
+ *size_ret = 0;
+ return 0;
}
+ /*
+ * If the inode was logged before or is from a past transaction, then
+ * its inode item must exist in the log root or in the commit root.
+ */
+ ASSERT(ret <= 0);
+ if (WARN_ON_ONCE(ret > 0))
+ ret = -ENOENT;
+
+ if (ret < 0)
+ return ret;
+
+ item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_inode_item);
+ *size_ret = btrfs_inode_size(path->nodes[0], item);
+ /*
+ * If the in-memory inode's i_size is smaller then the inode size stored
+ * in the btree, return the inode's i_size, so that we get a correct
+ * inode size after replaying the log when before a power failure we had
+ * a shrinking truncate followed by addition of a new name (rename / new
+ * hard link). Otherwise return the inode size from the btree, to avoid
+ * data loss when replaying a log due to previously doing a write that
+ * expands the inode's size and logging a new name immediately after.
+ */
+ if (*size_ret > inode->vfs_inode.i_size)
+ *size_ret = inode->vfs_inode.i_size;
+
btrfs_release_path(path);
return 0;
}
@@ -6996,7 +7028,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
ret = drop_inode_items(trans, log, path, inode,
BTRFS_XATTR_ITEM_KEY);
} else {
- if (inode_only == LOG_INODE_EXISTS && ctx->logged_before) {
+ if (inode_only == LOG_INODE_EXISTS) {
/*
* Make sure the new inode item we write to the log has
* the same isize as the current one (if it exists).
@@ -7010,7 +7042,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
* (zeroes), as if an expanding truncate happened,
* instead of getting a file of 4Kb only.
*/
- ret = logged_inode_size(log, inode, path, &logged_isize);
+ ret = get_inode_size_to_log(trans, inode, path, &logged_isize);
if (ret)
goto out_unlock;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ab51e6ecfdef..6b8e810a35ce 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -8099,8 +8099,9 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans)
smp_rmb();
ret = update_dev_stat_item(trans, device);
- if (!ret)
- atomic_sub(stats_cnt, &device->dev_stats_ccnt);
+ if (ret)
+ break;
+ atomic_sub(stats_cnt, &device->dev_stats_ccnt);
}
mutex_unlock(&fs_devices->device_list_mutex);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 1b7544aa88a2..c676e715b4f8 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -308,7 +308,9 @@ int zlib_compress_bio(struct list_head *ws, struct compressed_bio *cb)
}
/* Queue the remaining part of the folio. */
if (workspace->strm.total_out > bio->bi_iter.bi_size) {
- u32 cur_len = offset_in_folio(out_folio, workspace->strm.total_out);
+ const u32 cur_len = workspace->strm.total_out - bio->bi_iter.bi_size;
+
+ ASSERT(cur_len <= folio_size(out_folio));
if (!bio_add_folio(bio, out_folio, cur_len, 0)) {
ret = -E2BIG;
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 72206a292676..3baee4e7c1cf 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -14,7 +14,8 @@ ext4-y := balloc.o bitmap.o block_validity.o dir.o ext4_jbd2.o extents.o \
ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o
ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o
-ext4-inode-test-objs += inode-test.o
-obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o
+ext4-test-objs += inode-test.o mballoc-test.o \
+ extents-test.o
+obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-test.o
ext4-$(CONFIG_FS_VERITY) += verity.o
ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index cf0a0970c095..f41f320f4437 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -163,10 +163,17 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
*/
if (handle) {
+ /*
+ * Since the inode is new it is ok to pass the
+ * XATTR_CREATE flag. This is necessary to match the
+ * remaining journal credits check in the set_handle
+ * function with the credits allocated for the new
+ * inode.
+ */
res = ext4_xattr_set_handle(handle, inode,
EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
- ctx, len, 0);
+ ctx, len, XATTR_CREATE);
if (!res) {
ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
ext4_clear_inode_state(inode,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 293f698b7042..7617e2d454ea 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1570,6 +1570,7 @@ struct ext4_sb_info {
struct proc_dir_entry *s_proc;
struct kobject s_kobj;
struct completion s_kobj_unregister;
+ struct mutex s_error_notify_mutex; /* protects sysfs_notify vs kobject_del */
struct super_block *s_sb;
struct buffer_head *s_mmp_bh;
@@ -3944,6 +3945,11 @@ static inline bool ext4_inode_can_atomic_write(struct inode *inode)
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
loff_t pos, unsigned len,
get_block_t *get_block);
+
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+#define EXPORT_SYMBOL_FOR_EXT4_TEST(sym) \
+ EXPORT_SYMBOL_FOR_MODULES(sym, "ext4-test")
+#endif
#endif /* __KERNEL__ */
#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index c484125d963f..ebaf7cc42430 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -264,5 +264,17 @@ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
0xffff);
}
+extern int __ext4_ext_dirty(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path);
+extern int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex);
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+extern int ext4_ext_space_root_idx_test(struct inode *inode, int check);
+extern struct ext4_ext_path *ext4_split_convert_extents_test(
+ handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path,
+ int flags, unsigned int *allocated);
+#endif
#endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents-test.c b/fs/ext4/extents-test.c
index 7c4690eb7dad..5496b2c8e2cd 100644
--- a/fs/ext4/extents-test.c
+++ b/fs/ext4/extents-test.c
@@ -142,8 +142,10 @@ static struct file_system_type ext_fs_type = {
static void extents_kunit_exit(struct kunit *test)
{
- struct ext4_sb_info *sbi = k_ctx.k_ei->vfs_inode.i_sb->s_fs_info;
+ struct super_block *sb = k_ctx.k_ei->vfs_inode.i_sb;
+ struct ext4_sb_info *sbi = sb->s_fs_info;
+ ext4_es_unregister_shrinker(sbi);
kfree(sbi);
kfree(k_ctx.k_ei);
kfree(k_ctx.k_data);
@@ -280,8 +282,8 @@ static int extents_kunit_init(struct kunit *test)
eh->eh_depth = 0;
eh->eh_entries = cpu_to_le16(1);
eh->eh_magic = EXT4_EXT_MAGIC;
- eh->eh_max =
- cpu_to_le16(ext4_ext_space_root_idx(&k_ctx.k_ei->vfs_inode, 0));
+ eh->eh_max = cpu_to_le16(ext4_ext_space_root_idx_test(
+ &k_ctx.k_ei->vfs_inode, 0));
eh->eh_generation = 0;
/*
@@ -384,8 +386,8 @@ static void test_split_convert(struct kunit *test)
switch (param->type) {
case TEST_SPLIT_CONVERT:
- path = ext4_split_convert_extents(NULL, inode, &map, path,
- param->split_flags, NULL);
+ path = ext4_split_convert_extents_test(NULL, inode, &map,
+ path, param->split_flags, NULL);
break;
case TEST_CREATE_BLOCKS:
ext4_map_create_blocks_helper(test, inode, &map, param->split_flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index ae3804f36535..8cce1479be6d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -184,9 +184,9 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
* - ENOMEM
* - EIO
*/
-static int __ext4_ext_dirty(const char *where, unsigned int line,
- handle_t *handle, struct inode *inode,
- struct ext4_ext_path *path)
+int __ext4_ext_dirty(const char *where, unsigned int line,
+ handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *path)
{
int err;
@@ -1736,6 +1736,13 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + k);
if (err)
return err;
+ if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "path[%d].p_idx %p > EXT_LAST_INDEX %p",
+ k, path[k].p_idx,
+ EXT_LAST_INDEX(path[k].p_hdr));
+ return -EFSCORRUPTED;
+ }
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k);
if (err)
@@ -1748,6 +1755,14 @@ static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + k);
if (err)
goto clean;
+ if (unlikely(path[k].p_idx > EXT_LAST_INDEX(path[k].p_hdr))) {
+ EXT4_ERROR_INODE(inode,
+ "path[%d].p_idx %p > EXT_LAST_INDEX %p",
+ k, path[k].p_idx,
+ EXT_LAST_INDEX(path[k].p_hdr));
+ err = -EFSCORRUPTED;
+ goto clean;
+ }
path[k].p_idx->ei_block = border;
err = ext4_ext_dirty(handle, inode, path + k);
if (err)
@@ -3144,7 +3159,7 @@ static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
}
/* FIXME!! we need to try to merge to left or right after zero-out */
-static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
@@ -3239,6 +3254,9 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
insert_err = PTR_ERR(path);
err = 0;
+ if (insert_err != -ENOSPC && insert_err != -EDQUOT &&
+ insert_err != -ENOMEM)
+ goto out_path;
/*
* Get a new path to try to zeroout or fix the extent length.
@@ -3255,13 +3273,20 @@ static struct ext4_ext_path *ext4_split_extent_at(handle_t *handle,
goto out_path;
}
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ if (!ex) {
+ EXT4_ERROR_INODE(inode,
+ "bad extent address lblock: %lu, depth: %d pblock %llu",
+ (unsigned long)ee_block, depth, path[depth].p_block);
+ err = -EFSCORRUPTED;
+ goto out;
+ }
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
-
fix_extent_len:
ex->ee_len = orig_ex.ee_len;
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
@@ -3363,7 +3388,7 @@ static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode,
ext4_ext_mark_initialized(ex);
- ext4_ext_dirty(handle, inode, path + depth);
+ err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
return err;
@@ -4457,9 +4482,13 @@ got_allocated_blocks:
path = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (IS_ERR(path)) {
err = PTR_ERR(path);
- if (allocated_clusters) {
+ /*
+ * Gracefully handle out of space conditions. If the filesystem
+ * is inconsistent, we'll just leak allocated blocks to avoid
+ * causing even more damage.
+ */
+ if (allocated_clusters && (err == -EDQUOT || err == -ENOSPC)) {
int fb_flags = 0;
-
/*
* free data blocks we just allocated.
* not a good idea to call discard here directly,
@@ -6238,6 +6267,33 @@ out:
return 0;
}
-#ifdef CONFIG_EXT4_KUNIT_TESTS
-#include "extents-test.c"
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+int ext4_ext_space_root_idx_test(struct inode *inode, int check)
+{
+ return ext4_ext_space_root_idx(inode, check);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_space_root_idx_test);
+
+struct ext4_ext_path *ext4_split_convert_extents_test(handle_t *handle,
+ struct inode *inode, struct ext4_map_blocks *map,
+ struct ext4_ext_path *path, int flags,
+ unsigned int *allocated)
+{
+ return ext4_split_convert_extents(handle, inode, map, path,
+ flags, allocated);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_split_convert_extents_test);
+
+EXPORT_SYMBOL_FOR_EXT4_TEST(__ext4_ext_dirty);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_zeroout);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_register_shrinker);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_unregister_shrinker);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_create_blocks);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_init_tree);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_lookup_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_es_insert_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_ext_insert_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_find_extent);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_issue_zeroout);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_map_query_blocks);
#endif
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index f575751f1cae..2f0057e04934 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -975,13 +975,13 @@ static int ext4_fc_flush_data(journal_t *journal)
int ret = 0;
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
- ret = jbd2_submit_inode_data(journal, ei->jinode);
+ ret = jbd2_submit_inode_data(journal, READ_ONCE(ei->jinode));
if (ret)
return ret;
}
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
- ret = jbd2_wait_inode_data(journal, ei->jinode);
+ ret = jbd2_wait_inode_data(journal, READ_ONCE(ei->jinode));
if (ret)
return ret;
}
@@ -1613,19 +1613,21 @@ static int ext4_fc_replay_inode(struct super_block *sb,
/* Immediately update the inode on disk. */
ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
if (ret)
- goto out;
+ goto out_brelse;
ret = sync_dirty_buffer(iloc.bh);
if (ret)
- goto out;
+ goto out_brelse;
ret = ext4_mark_inode_used(sb, ino);
if (ret)
- goto out;
+ goto out_brelse;
/* Given that we just wrote the inode on disk, this SHOULD succeed. */
inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
ext4_debug("Inode not found.");
- return -EFSCORRUPTED;
+ inode = NULL;
+ ret = -EFSCORRUPTED;
+ goto out_brelse;
}
/*
@@ -1642,13 +1644,14 @@ static int ext4_fc_replay_inode(struct super_block *sb,
ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
sync_dirty_buffer(iloc.bh);
+out_brelse:
brelse(iloc.bh);
out:
iput(inode);
if (!ret)
blkdev_issue_flush(sb->s_bdev);
- return 0;
+ return ret;
}
/*
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index e476c6de3074..bd8f230fa507 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -83,11 +83,23 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
int datasync, bool *needs_barrier)
{
struct inode *inode = file->f_inode;
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0,
+ };
int ret;
ret = generic_buffers_fsync_noflush(file, start, end, datasync);
- if (!ret)
- ret = ext4_sync_parent(inode);
+ if (ret)
+ return ret;
+
+ /* Force writeout of inode table buffer to disk */
+ ret = ext4_write_inode(inode, &wbc);
+ if (ret)
+ return ret;
+
+ ret = ext4_sync_parent(inode);
+
if (test_opt(inode->i_sb, BARRIER))
*needs_barrier = true;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b20a1bf866ab..b1bc1950c9f0 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -686,6 +686,12 @@ static int recently_deleted(struct super_block *sb, ext4_group_t group, int ino)
if (unlikely(!gdp))
return 0;
+ /* Inode was never used in this filesystem? */
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT) ||
+ ino >= EXT4_INODES_PER_GROUP(sb) - ext4_itable_unused_count(sb, gdp)))
+ return 0;
+
bh = sb_find_get_block(sb, ext4_inode_table(sb, gdp) +
(ino / inodes_per_block));
if (!bh || !buffer_uptodate(bh))
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 1f6bc05593df..408677fa8196 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -522,7 +522,15 @@ static int ext4_read_inline_folio(struct inode *inode, struct folio *folio)
goto out;
len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
- BUG_ON(len > PAGE_SIZE);
+
+ if (len > PAGE_SIZE) {
+ ext4_error_inode(inode, __func__, __LINE__, 0,
+ "inline size %zu exceeds PAGE_SIZE", len);
+ ret = -EFSCORRUPTED;
+ brelse(iloc.bh);
+ goto out;
+ }
+
kaddr = kmap_local_folio(folio, 0);
ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
kaddr = folio_zero_tail(folio, len, kaddr + len);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 396dc3a5d16b..1123d995494b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -128,6 +128,8 @@ void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
+ struct jbd2_inode *jinode = READ_ONCE(EXT4_I(inode)->jinode);
+
trace_ext4_begin_ordered_truncate(inode, new_size);
/*
* If jinode is zero, then we never opened the file for
@@ -135,10 +137,10 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
* jbd2_journal_begin_ordered_truncate() since there's no
* outstanding writes we need to flush.
*/
- if (!EXT4_I(inode)->jinode)
+ if (!jinode)
return 0;
return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
- EXT4_I(inode)->jinode,
+ jinode,
new_size);
}
@@ -184,6 +186,14 @@ void ext4_evict_inode(struct inode *inode)
if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
+ /*
+ * If there's dirty page will lead to data loss, user
+ * could see stale data.
+ */
+ if (unlikely(!ext4_emergency_state(inode->i_sb) &&
+ mapping_tagged(&inode->i_data, PAGECACHE_TAG_DIRTY)))
+ ext4_warning_inode(inode, "data will be lost");
+
truncate_inode_pages_final(&inode->i_data);
goto no_delete;
@@ -4451,8 +4461,13 @@ int ext4_inode_attach_jinode(struct inode *inode)
spin_unlock(&inode->i_lock);
return -ENOMEM;
}
- ei->jinode = jinode;
- jbd2_journal_init_jbd_inode(ei->jinode, inode);
+ jbd2_journal_init_jbd_inode(jinode, inode);
+ /*
+ * Publish ->jinode only after it is fully initialized so that
+ * readers never observe a partially initialized jbd2_inode.
+ */
+ smp_wmb();
+ WRITE_ONCE(ei->jinode, jinode);
jinode = NULL;
}
spin_unlock(&inode->i_lock);
@@ -5401,18 +5416,36 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
inode->i_op = &ext4_encrypted_symlink_inode_operations;
} else if (ext4_inode_is_fast_symlink(inode)) {
inode->i_op = &ext4_fast_symlink_inode_operations;
- if (inode->i_size == 0 ||
- inode->i_size >= sizeof(ei->i_data) ||
- strnlen((char *)ei->i_data, inode->i_size + 1) !=
- inode->i_size) {
- ext4_error_inode(inode, function, line, 0,
- "invalid fast symlink length %llu",
- (unsigned long long)inode->i_size);
- ret = -EFSCORRUPTED;
- goto bad_inode;
+
+ /*
+ * Orphan cleanup can see inodes with i_size == 0
+ * and i_data uninitialized. Skip size checks in
+ * that case. This is safe because the first thing
+ * ext4_evict_inode() does for fast symlinks is
+ * clearing of i_data and i_size.
+ */
+ if ((EXT4_SB(sb)->s_mount_state & EXT4_ORPHAN_FS)) {
+ if (inode->i_nlink != 0) {
+ ext4_error_inode(inode, function, line, 0,
+ "invalid orphan symlink nlink %d",
+ inode->i_nlink);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ } else {
+ if (inode->i_size == 0 ||
+ inode->i_size >= sizeof(ei->i_data) ||
+ strnlen((char *)ei->i_data, inode->i_size + 1) !=
+ inode->i_size) {
+ ext4_error_inode(inode, function, line, 0,
+ "invalid fast symlink length %llu",
+ (unsigned long long)inode->i_size);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
+ inode_set_cached_link(inode, (char *)ei->i_data,
+ inode->i_size);
}
- inode_set_cached_link(inode, (char *)ei->i_data,
- inode->i_size);
} else {
inode->i_op = &ext4_symlink_inode_operations;
}
@@ -5849,6 +5882,18 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (attr->ia_size == inode->i_size)
inc_ivers = false;
+ /*
+ * If file has inline data but new size exceeds inline capacity,
+ * convert to extent-based storage first to prevent inconsistent
+ * state (inline flag set but size exceeds inline capacity).
+ */
+ if (ext4_has_inline_data(inode) &&
+ attr->ia_size > EXT4_I(inode)->i_inline_size) {
+ error = ext4_convert_inline_data(inode);
+ if (error)
+ goto err_out;
+ }
+
if (shrink) {
if (ext4_should_order_data(inode)) {
error = ext4_begin_ordered_truncate(inode,
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 9fbdf6a09489..6f5bfbb0e8a4 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -8,6 +8,7 @@
#include <linux/random.h>
#include "ext4.h"
+#include "mballoc.h"
struct mbt_grp_ctx {
struct buffer_head bitmap_bh;
@@ -336,7 +337,7 @@ ext4_mb_mark_context_stub(handle_t *handle, struct super_block *sb, bool state,
if (state)
mb_set_bits(bitmap_bh->b_data, blkoff, len);
else
- mb_clear_bits(bitmap_bh->b_data, blkoff, len);
+ mb_clear_bits_test(bitmap_bh->b_data, blkoff, len);
return 0;
}
@@ -413,14 +414,14 @@ static void test_new_blocks_simple(struct kunit *test)
/* get block at goal */
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test, ar.goal, found,
"failed to alloc block at goal, expected %llu found %llu",
ar.goal, found);
/* get block after goal in goal group */
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test, ar.goal + EXT4_C2B(sbi, 1), found,
"failed to alloc block after goal in goal group, expected %llu found %llu",
ar.goal + 1, found);
@@ -428,7 +429,7 @@ static void test_new_blocks_simple(struct kunit *test)
/* get block after goal group */
mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test,
ext4_group_first_block_no(sb, goal_group + 1), found,
"failed to alloc block after goal group, expected %llu found %llu",
@@ -438,7 +439,7 @@ static void test_new_blocks_simple(struct kunit *test)
for (i = goal_group; i < ext4_get_groups_count(sb); i++)
mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_EQ_MSG(test,
ext4_group_first_block_no(sb, 0) + EXT4_C2B(sbi, 1), found,
"failed to alloc block before goal group, expected %llu found %llu",
@@ -448,7 +449,7 @@ static void test_new_blocks_simple(struct kunit *test)
for (i = 0; i < ext4_get_groups_count(sb); i++)
mbt_ctx_mark_used(sb, i, 0, EXT4_CLUSTERS_PER_GROUP(sb));
ar.goal = ext4_group_first_block_no(sb, goal_group);
- found = ext4_mb_new_blocks_simple(&ar, &err);
+ found = ext4_mb_new_blocks_simple_test(&ar, &err);
KUNIT_ASSERT_NE_MSG(test, err, 0,
"unexpectedly get block when no block is available");
}
@@ -492,16 +493,16 @@ validate_free_blocks_simple(struct kunit *test, struct super_block *sb,
continue;
bitmap = mbt_ctx_bitmap(sb, i);
- bit = mb_find_next_zero_bit(bitmap, max, 0);
+ bit = mb_find_next_zero_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ_MSG(test, bit, max,
"free block on unexpected group %d", i);
}
bitmap = mbt_ctx_bitmap(sb, goal_group);
- bit = mb_find_next_zero_bit(bitmap, max, 0);
+ bit = mb_find_next_zero_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ(test, bit, start);
- bit = mb_find_next_bit(bitmap, max, bit + 1);
+ bit = mb_find_next_bit_test(bitmap, max, bit + 1);
KUNIT_ASSERT_EQ(test, bit, start + len);
}
@@ -524,7 +525,7 @@ test_free_blocks_simple_range(struct kunit *test, ext4_group_t goal_group,
block = ext4_group_first_block_no(sb, goal_group) +
EXT4_C2B(sbi, start);
- ext4_free_blocks_simple(inode, block, len);
+ ext4_free_blocks_simple_test(inode, block, len);
validate_free_blocks_simple(test, sb, goal_group, start, len);
mbt_ctx_mark_used(sb, goal_group, 0, EXT4_CLUSTERS_PER_GROUP(sb));
}
@@ -566,15 +567,15 @@ test_mark_diskspace_used_range(struct kunit *test,
bitmap = mbt_ctx_bitmap(sb, TEST_GOAL_GROUP);
memset(bitmap, 0, sb->s_blocksize);
- ret = ext4_mb_mark_diskspace_used(ac, NULL);
+ ret = ext4_mb_mark_diskspace_used_test(ac, NULL);
KUNIT_ASSERT_EQ(test, ret, 0);
max = EXT4_CLUSTERS_PER_GROUP(sb);
- i = mb_find_next_bit(bitmap, max, 0);
+ i = mb_find_next_bit_test(bitmap, max, 0);
KUNIT_ASSERT_EQ(test, i, start);
- i = mb_find_next_zero_bit(bitmap, max, i + 1);
+ i = mb_find_next_zero_bit_test(bitmap, max, i + 1);
KUNIT_ASSERT_EQ(test, i, start + len);
- i = mb_find_next_bit(bitmap, max, i + 1);
+ i = mb_find_next_bit_test(bitmap, max, i + 1);
KUNIT_ASSERT_EQ(test, max, i);
}
@@ -617,54 +618,54 @@ static void mbt_generate_buddy(struct super_block *sb, void *buddy,
max = EXT4_CLUSTERS_PER_GROUP(sb);
bb_h = buddy + sbi->s_mb_offsets[1];
- off = mb_find_next_zero_bit(bb, max, 0);
+ off = mb_find_next_zero_bit_test(bb, max, 0);
grp->bb_first_free = off;
while (off < max) {
grp->bb_counters[0]++;
grp->bb_free++;
- if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+ if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) {
grp->bb_free++;
grp->bb_counters[0]--;
- mb_clear_bit(off >> 1, bb_h);
+ mb_clear_bit_test(off >> 1, bb_h);
grp->bb_counters[1]++;
grp->bb_largest_free_order = 1;
off++;
}
- off = mb_find_next_zero_bit(bb, max, off + 1);
+ off = mb_find_next_zero_bit_test(bb, max, off + 1);
}
for (order = 1; order < MB_NUM_ORDERS(sb) - 1; order++) {
bb = buddy + sbi->s_mb_offsets[order];
bb_h = buddy + sbi->s_mb_offsets[order + 1];
max = max >> 1;
- off = mb_find_next_zero_bit(bb, max, 0);
+ off = mb_find_next_zero_bit_test(bb, max, 0);
while (off < max) {
- if (!(off & 1) && !mb_test_bit(off + 1, bb)) {
+ if (!(off & 1) && !mb_test_bit_test(off + 1, bb)) {
mb_set_bits(bb, off, 2);
grp->bb_counters[order] -= 2;
- mb_clear_bit(off >> 1, bb_h);
+ mb_clear_bit_test(off >> 1, bb_h);
grp->bb_counters[order + 1]++;
grp->bb_largest_free_order = order + 1;
off++;
}
- off = mb_find_next_zero_bit(bb, max, off + 1);
+ off = mb_find_next_zero_bit_test(bb, max, off + 1);
}
}
max = EXT4_CLUSTERS_PER_GROUP(sb);
- off = mb_find_next_zero_bit(bitmap, max, 0);
+ off = mb_find_next_zero_bit_test(bitmap, max, 0);
while (off < max) {
grp->bb_fragments++;
- off = mb_find_next_bit(bitmap, max, off + 1);
+ off = mb_find_next_bit_test(bitmap, max, off + 1);
if (off + 1 >= max)
break;
- off = mb_find_next_zero_bit(bitmap, max, off + 1);
+ off = mb_find_next_zero_bit_test(bitmap, max, off + 1);
}
}
@@ -706,7 +707,7 @@ do_test_generate_buddy(struct kunit *test, struct super_block *sb, void *bitmap,
/* needed by validation in ext4_mb_generate_buddy */
ext4_grp->bb_free = mbt_grp->bb_free;
memset(ext4_buddy, 0xff, sb->s_blocksize);
- ext4_mb_generate_buddy(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP,
+ ext4_mb_generate_buddy_test(sb, ext4_buddy, bitmap, TEST_GOAL_GROUP,
ext4_grp);
KUNIT_ASSERT_EQ(test, memcmp(mbt_buddy, ext4_buddy, sb->s_blocksize),
@@ -760,7 +761,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b,
ex.fe_group = TEST_GOAL_GROUP;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(e4b, &ex);
+ mb_mark_used_test(e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
mb_set_bits(bitmap, start, len);
@@ -769,7 +770,7 @@ test_mb_mark_used_range(struct kunit *test, struct ext4_buddy *e4b,
memset(buddy, 0xff, sb->s_blocksize);
for (i = 0; i < MB_NUM_ORDERS(sb); i++)
grp->bb_counters[i] = 0;
- ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+ ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp);
KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
0);
@@ -798,7 +799,7 @@ static void test_mb_mark_used(struct kunit *test)
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
grp->bb_free = EXT4_CLUSTERS_PER_GROUP(sb);
@@ -809,7 +810,7 @@ static void test_mb_mark_used(struct kunit *test)
test_mb_mark_used_range(test, &e4b, ranges[i].start,
ranges[i].len, bitmap, buddy, grp);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
static void
@@ -825,16 +826,16 @@ test_mb_free_blocks_range(struct kunit *test, struct ext4_buddy *e4b,
return;
ext4_lock_group(sb, e4b->bd_group);
- mb_free_blocks(NULL, e4b, start, len);
+ mb_free_blocks_test(NULL, e4b, start, len);
ext4_unlock_group(sb, e4b->bd_group);
- mb_clear_bits(bitmap, start, len);
+ mb_clear_bits_test(bitmap, start, len);
/* bypass bb_free validatoin in ext4_mb_generate_buddy */
grp->bb_free += len;
memset(buddy, 0xff, sb->s_blocksize);
for (i = 0; i < MB_NUM_ORDERS(sb); i++)
grp->bb_counters[i] = 0;
- ext4_mb_generate_buddy(sb, buddy, bitmap, 0, grp);
+ ext4_mb_generate_buddy_test(sb, buddy, bitmap, 0, grp);
KUNIT_ASSERT_EQ(test, memcmp(buddy, e4b->bd_buddy, sb->s_blocksize),
0);
@@ -865,7 +866,7 @@ static void test_mb_free_blocks(struct kunit *test)
bb_counters[MB_NUM_ORDERS(sb)]), GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, grp);
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_start = 0;
@@ -873,7 +874,7 @@ static void test_mb_free_blocks(struct kunit *test)
ex.fe_group = TEST_GOAL_GROUP;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(&e4b, &ex);
+ mb_mark_used_test(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
grp->bb_free = 0;
@@ -886,7 +887,7 @@ static void test_mb_free_blocks(struct kunit *test)
test_mb_free_blocks_range(test, &e4b, ranges[i].start,
ranges[i].len, bitmap, buddy, grp);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
#define COUNT_FOR_ESTIMATE 100000
@@ -904,7 +905,7 @@ static void test_mb_mark_used_cost(struct kunit *test)
if (sb->s_blocksize > PAGE_SIZE)
kunit_skip(test, "blocksize exceeds pagesize");
- ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
ex.fe_group = TEST_GOAL_GROUP;
@@ -918,7 +919,7 @@ static void test_mb_mark_used_cost(struct kunit *test)
ex.fe_start = ranges[i].start;
ex.fe_len = ranges[i].len;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_mark_used(&e4b, &ex);
+ mb_mark_used_test(&e4b, &ex);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
end = jiffies;
@@ -929,14 +930,14 @@ static void test_mb_mark_used_cost(struct kunit *test)
continue;
ext4_lock_group(sb, TEST_GOAL_GROUP);
- mb_free_blocks(NULL, &e4b, ranges[i].start,
+ mb_free_blocks_test(NULL, &e4b, ranges[i].start,
ranges[i].len);
ext4_unlock_group(sb, TEST_GOAL_GROUP);
}
}
kunit_info(test, "costed jiffies %lu\n", all);
- ext4_mb_unload_buddy(&e4b);
+ ext4_mb_unload_buddy_test(&e4b);
}
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 20e9fdaf4301..bb58eafb87bc 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1199,6 +1199,8 @@ static int ext4_mb_scan_groups(struct ext4_allocation_context *ac)
/* searching for the right group start from the goal value specified */
start = ac->ac_g_ex.fe_group;
+ if (start >= ngroups)
+ start = 0;
ac->ac_prefetch_grp = start;
ac->ac_prefetch_nr = 0;
@@ -2443,8 +2445,12 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
return 0;
err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
- if (err)
+ if (err) {
+ if (EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info) &&
+ !(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
+ return 0;
return err;
+ }
ext4_lock_group(ac->ac_sb, group);
if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
@@ -3580,9 +3586,7 @@ err_freebuddy:
rcu_read_unlock();
iput(sbi->s_buddy_cache);
err_freesgi:
- rcu_read_lock();
- kvfree(rcu_dereference(sbi->s_group_info));
- rcu_read_unlock();
+ kvfree(rcu_access_pointer(sbi->s_group_info));
return -ENOMEM;
}
@@ -3889,15 +3893,14 @@ void ext4_mb_release(struct super_block *sb)
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
int count;
- if (test_opt(sb, DISCARD)) {
- /*
- * wait the discard work to drain all of ext4_free_data
- */
- flush_work(&sbi->s_discard_work);
- WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
- }
+ /*
+ * wait the discard work to drain all of ext4_free_data
+ */
+ flush_work(&sbi->s_discard_work);
+ WARN_ON_ONCE(!list_empty(&sbi->s_discard_list));
- if (sbi->s_group_info) {
+ group_info = rcu_access_pointer(sbi->s_group_info);
+ if (group_info) {
for (i = 0; i < ngroups; i++) {
cond_resched();
grinfo = ext4_get_group_info(sb, i);
@@ -3915,12 +3918,9 @@ void ext4_mb_release(struct super_block *sb)
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
EXT4_DESC_PER_BLOCK_BITS(sb);
- rcu_read_lock();
- group_info = rcu_dereference(sbi->s_group_info);
for (i = 0; i < num_meta_group_infos; i++)
kfree(group_info[i]);
kvfree(group_info);
- rcu_read_unlock();
}
ext4_mb_avg_fragment_size_destroy(sbi);
ext4_mb_largest_free_orders_destroy(sbi);
@@ -4084,7 +4084,7 @@ void ext4_exit_mballoc(void)
#define EXT4_MB_BITMAP_MARKED_CHECK 0x0001
#define EXT4_MB_SYNC_UPDATE 0x0002
-static int
+int
ext4_mb_mark_context(handle_t *handle, struct super_block *sb, bool state,
ext4_group_t group, ext4_grpblk_t blkoff,
ext4_grpblk_t len, int flags, ext4_grpblk_t *ret_changed)
@@ -7188,6 +7188,102 @@ out_unload:
return error;
}
-#ifdef CONFIG_EXT4_KUNIT_TESTS
-#include "mballoc-test.c"
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+void mb_clear_bits_test(void *bm, int cur, int len)
+{
+ mb_clear_bits(bm, cur, len);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bits_test);
+
+ext4_fsblk_t
+ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar,
+ int *errp)
+{
+ return ext4_mb_new_blocks_simple(ar, errp);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_new_blocks_simple_test);
+
+int mb_find_next_zero_bit_test(void *addr, int max, int start)
+{
+ return mb_find_next_zero_bit(addr, max, start);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_zero_bit_test);
+
+int mb_find_next_bit_test(void *addr, int max, int start)
+{
+ return mb_find_next_bit(addr, max, start);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_find_next_bit_test);
+
+void mb_clear_bit_test(int bit, void *addr)
+{
+ mb_clear_bit(bit, addr);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_clear_bit_test);
+
+int mb_test_bit_test(int bit, void *addr)
+{
+ return mb_test_bit(bit, addr);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_test_bit_test);
+
+int ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac,
+ handle_t *handle)
+{
+ return ext4_mb_mark_diskspace_used(ac, handle);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_diskspace_used_test);
+
+int mb_mark_used_test(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
+{
+ return mb_mark_used(e4b, ex);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_mark_used_test);
+
+void ext4_mb_generate_buddy_test(struct super_block *sb, void *buddy,
+ void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp)
+{
+ ext4_mb_generate_buddy(sb, buddy, bitmap, group, grp);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_generate_buddy_test);
+
+int ext4_mb_load_buddy_test(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b)
+{
+ return ext4_mb_load_buddy(sb, group, e4b);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_load_buddy_test);
+
+void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b)
+{
+ ext4_mb_unload_buddy(e4b);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_unload_buddy_test);
+
+void mb_free_blocks_test(struct inode *inode, struct ext4_buddy *e4b,
+ int first, int count)
+{
+ mb_free_blocks(inode, e4b, first, count);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_free_blocks_test);
+
+void ext4_free_blocks_simple_test(struct inode *inode, ext4_fsblk_t block,
+ unsigned long count)
+{
+ return ext4_free_blocks_simple(inode, block, count);
+}
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_blocks_simple_test);
+
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_wait_block_bitmap);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_init);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_desc);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_count_free_clusters);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_get_group_info);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_free_group_clusters_set);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_release);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_read_block_bitmap_nowait);
+EXPORT_SYMBOL_FOR_EXT4_TEST(mb_set_bits);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_fc_init_inode);
+EXPORT_SYMBOL_FOR_EXT4_TEST(ext4_mb_mark_context);
#endif
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 15a049f05d04..39333ce72cbd 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -270,4 +270,34 @@ ext4_mballoc_query_range(
ext4_mballoc_query_range_fn formatter,
void *priv);
+extern int ext4_mb_mark_context(handle_t *handle,
+ struct super_block *sb, bool state,
+ ext4_group_t group, ext4_grpblk_t blkoff,
+ ext4_grpblk_t len, int flags,
+ ext4_grpblk_t *ret_changed);
+#if IS_ENABLED(CONFIG_EXT4_KUNIT_TESTS)
+extern void mb_clear_bits_test(void *bm, int cur, int len);
+extern ext4_fsblk_t
+ext4_mb_new_blocks_simple_test(struct ext4_allocation_request *ar,
+ int *errp);
+extern int mb_find_next_zero_bit_test(void *addr, int max, int start);
+extern int mb_find_next_bit_test(void *addr, int max, int start);
+extern void mb_clear_bit_test(int bit, void *addr);
+extern int mb_test_bit_test(int bit, void *addr);
+extern int
+ext4_mb_mark_diskspace_used_test(struct ext4_allocation_context *ac,
+ handle_t *handle);
+extern int mb_mark_used_test(struct ext4_buddy *e4b,
+ struct ext4_free_extent *ex);
+extern void ext4_mb_generate_buddy_test(struct super_block *sb,
+ void *buddy, void *bitmap, ext4_group_t group,
+ struct ext4_group_info *grp);
+extern int ext4_mb_load_buddy_test(struct super_block *sb,
+ ext4_group_t group, struct ext4_buddy *e4b);
+extern void ext4_mb_unload_buddy_test(struct ext4_buddy *e4b);
+extern void mb_free_blocks_test(struct inode *inode,
+ struct ext4_buddy *e4b, int first, int count);
+extern void ext4_free_blocks_simple_test(struct inode *inode,
+ ext4_fsblk_t block, unsigned long count);
+#endif
#endif
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index a8c95eee91b7..39fe50b3c662 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -524,9 +524,15 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,
nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
- /* Nothing to submit? Just unlock the folio... */
- if (!nr_to_submit)
+ if (!nr_to_submit) {
+ /*
+ * We have nothing to submit. Just cycle the folio through
+ * writeback state to properly update xarray tags.
+ */
+ __folio_start_writeback(folio, keep_towrite);
+ folio_end_writeback(folio);
return 0;
+ }
bh = head = folio_buffers(folio);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43f680c750ae..a34efb44e73d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1254,12 +1254,10 @@ static void ext4_group_desc_free(struct ext4_sb_info *sbi)
struct buffer_head **group_desc;
int i;
- rcu_read_lock();
- group_desc = rcu_dereference(sbi->s_group_desc);
+ group_desc = rcu_access_pointer(sbi->s_group_desc);
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(group_desc[i]);
kvfree(group_desc);
- rcu_read_unlock();
}
static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
@@ -1267,14 +1265,12 @@ static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
struct flex_groups **flex_groups;
int i;
- rcu_read_lock();
- flex_groups = rcu_dereference(sbi->s_flex_groups);
+ flex_groups = rcu_access_pointer(sbi->s_flex_groups);
if (flex_groups) {
for (i = 0; i < sbi->s_flex_groups_allocated; i++)
kvfree(flex_groups[i]);
kvfree(flex_groups);
}
- rcu_read_unlock();
}
static void ext4_put_super(struct super_block *sb)
@@ -1527,6 +1523,27 @@ void ext4_clear_inode(struct inode *inode)
invalidate_inode_buffers(inode);
clear_inode(inode);
ext4_discard_preallocations(inode);
+ /*
+ * We must remove the inode from the hash before ext4_free_inode()
+ * clears the bit in inode bitmap as otherwise another process reusing
+ * the inode will block in insert_inode_hash() waiting for inode
+ * eviction to complete while holding transaction handle open, but
+ * ext4_evict_inode() still running for that inode could block waiting
+ * for transaction commit if the inode is marked as IS_SYNC => deadlock.
+ *
+ * Removing the inode from the hash here is safe. There are two cases
+ * to consider:
+ * 1) The inode still has references to it (i_nlink > 0). In that case
+ * we are keeping the inode and once we remove the inode from the hash,
+ * iget() can create the new inode structure for the same inode number
+ * and we are fine with that as all IO on behalf of the inode is
+ * finished.
+ * 2) We are deleting the inode (i_nlink == 0). In that case inode
+ * number cannot be reused until ext4_free_inode() clears the bit in
+ * the inode bitmap, at which point all IO is done and reuse is fine
+ * again.
+ */
+ remove_inode_hash(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
@@ -3633,6 +3650,13 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly)
"extents feature\n");
return 0;
}
+ if (ext4_has_feature_bigalloc(sb) &&
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+ ext4_msg(sb, KERN_WARNING,
+ "bad geometry: bigalloc file system with non-zero "
+ "first_data_block\n");
+ return 0;
+ }
#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
if (!readonly && (ext4_has_feature_quota(sb) ||
@@ -5403,6 +5427,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
spin_lock_init(&sbi->s_error_lock);
+ mutex_init(&sbi->s_error_notify_mutex);
INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index b87d7bdab06a..923b375e017f 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -597,7 +597,10 @@ static const struct kobj_type ext4_feat_ktype = {
void ext4_notify_error_sysfs(struct ext4_sb_info *sbi)
{
- sysfs_notify(&sbi->s_kobj, NULL, "errors_count");
+ mutex_lock(&sbi->s_error_notify_mutex);
+ if (sbi->s_kobj.state_in_sysfs)
+ sysfs_notify(&sbi->s_kobj, NULL, "errors_count");
+ mutex_unlock(&sbi->s_error_notify_mutex);
}
static struct kobject *ext4_root;
@@ -610,8 +613,10 @@ int ext4_register_sysfs(struct super_block *sb)
int err;
init_completion(&sbi->s_kobj_unregister);
+ mutex_lock(&sbi->s_error_notify_mutex);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_sb_ktype, ext4_root,
"%s", sb->s_id);
+ mutex_unlock(&sbi->s_error_notify_mutex);
if (err) {
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
@@ -644,7 +649,10 @@ void ext4_unregister_sysfs(struct super_block *sb)
if (sbi->s_proc)
remove_proc_subtree(sb->s_id, ext4_proc_root);
+
+ mutex_lock(&sbi->s_error_notify_mutex);
kobject_del(&sbi->s_kobj);
+ mutex_unlock(&sbi->s_error_notify_mutex);
}
int __init ext4_init_sysfs(void)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7c75ed7e8979..3c75ee025bda 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1711,6 +1711,19 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
}
}
+static bool __sync_lazytime(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ if (!(inode_state_read(inode) & I_DIRTY_TIME)) {
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ inode_state_clear(inode, I_DIRTY_TIME);
+ spin_unlock(&inode->i_lock);
+ inode->i_op->sync_lazytime(inode);
+ return true;
+}
+
bool sync_lazytime(struct inode *inode)
{
if (!(inode_state_read_once(inode) & I_DIRTY_TIME))
@@ -1718,9 +1731,8 @@ bool sync_lazytime(struct inode *inode)
trace_writeback_lazytime(inode);
if (inode->i_op->sync_lazytime)
- inode->i_op->sync_lazytime(inode);
- else
- mark_inode_dirty_sync(inode);
+ return __sync_lazytime(inode);
+ mark_inode_dirty_sync(inode);
return true;
}
@@ -2775,13 +2787,8 @@ static void wait_sb_inodes(struct super_block *sb)
* The mapping can appear untagged while still on-list since we
* do not have the mapping lock. Skip it here, wb completion
* will remove it.
- *
- * If the mapping does not have data integrity semantics,
- * there's no need to wait for the writeout to complete, as the
- * mapping cannot guarantee that data is persistently stored.
*/
- if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK) ||
- mapping_no_data_integrity(mapping))
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
continue;
spin_unlock_irq(&sb->s_inode_wblist_lock);
@@ -2916,6 +2923,17 @@ void sync_inodes_sb(struct super_block *sb)
*/
if (bdi == &noop_backing_dev_info)
return;
+
+ /*
+ * If the superblock has SB_I_NO_DATA_INTEGRITY set, there's no need to
+ * wait for the writeout to complete, as the filesystem cannot guarantee
+ * data persistence on sync. Just kick off writeback and return.
+ */
+ if (sb->s_iflags & SB_I_NO_DATA_INTEGRITY) {
+ wakeup_flusher_threads_bdi(bdi, WB_REASON_SYNC);
+ return;
+ }
+
WARN_ON(!rwsem_is_locked(&sb->s_umount));
/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b1bb7153cb78..676fd9856bfb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -3201,10 +3201,8 @@ void fuse_init_file_inode(struct inode *inode, unsigned int flags)
inode->i_fop = &fuse_file_operations;
inode->i_data.a_ops = &fuse_file_aops;
- if (fc->writeback_cache) {
+ if (fc->writeback_cache)
mapping_set_writeback_may_deadlock_on_reclaim(&inode->i_data);
- mapping_set_no_data_integrity(&inode->i_data);
- }
INIT_LIST_HEAD(&fi->write_files);
INIT_LIST_HEAD(&fi->queued_writes);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e57b8af06be9..c795abe47a4f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1709,6 +1709,7 @@ static void fuse_sb_defaults(struct super_block *sb)
sb->s_export_op = &fuse_export_operations;
sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
sb->s_iflags |= SB_I_NOIDMAP;
+ sb->s_iflags |= SB_I_NO_DATA_INTEGRITY;
if (sb->s_user_ns != &init_user_ns)
sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
diff --git a/fs/iomap/bio.c b/fs/iomap/bio.c
index fc045f2e4c45..edd908183058 100644
--- a/fs/iomap/bio.c
+++ b/fs/iomap/bio.c
@@ -8,7 +8,10 @@
#include "internal.h"
#include "trace.h"
-static void iomap_read_end_io(struct bio *bio)
+static DEFINE_SPINLOCK(failed_read_lock);
+static struct bio_list failed_read_list = BIO_EMPTY_LIST;
+
+static void __iomap_read_end_io(struct bio *bio)
{
int error = blk_status_to_errno(bio->bi_status);
struct folio_iter fi;
@@ -18,6 +21,52 @@ static void iomap_read_end_io(struct bio *bio)
bio_put(bio);
}
+static void
+iomap_fail_reads(
+ struct work_struct *work)
+{
+ struct bio *bio;
+ struct bio_list tmp = BIO_EMPTY_LIST;
+ unsigned long flags;
+
+ spin_lock_irqsave(&failed_read_lock, flags);
+ bio_list_merge_init(&tmp, &failed_read_list);
+ spin_unlock_irqrestore(&failed_read_lock, flags);
+
+ while ((bio = bio_list_pop(&tmp)) != NULL) {
+ __iomap_read_end_io(bio);
+ cond_resched();
+ }
+}
+
+static DECLARE_WORK(failed_read_work, iomap_fail_reads);
+
+static void iomap_fail_buffered_read(struct bio *bio)
+{
+ unsigned long flags;
+
+ /*
+ * Bounce I/O errors to a workqueue to avoid nested i_lock acquisitions
+ * in the fserror code. The caller no longer owns the bio reference
+ * after the spinlock drops.
+ */
+ spin_lock_irqsave(&failed_read_lock, flags);
+ if (bio_list_empty(&failed_read_list))
+ WARN_ON_ONCE(!schedule_work(&failed_read_work));
+ bio_list_add(&failed_read_list, bio);
+ spin_unlock_irqrestore(&failed_read_lock, flags);
+}
+
+static void iomap_read_end_io(struct bio *bio)
+{
+ if (bio->bi_status) {
+ iomap_fail_buffered_read(bio);
+ return;
+ }
+
+ __iomap_read_end_io(bio);
+}
+
static void iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx)
{
struct bio *bio = ctx->read_ctx;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 00f0efaf12b2..92a831cf4bf1 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -514,6 +514,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
loff_t length = iomap_length(iter);
struct folio *folio = ctx->cur_folio;
size_t folio_len = folio_size(folio);
+ struct iomap_folio_state *ifs;
size_t poff, plen;
loff_t pos_diff;
int ret;
@@ -525,7 +526,7 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
return iomap_iter_advance(iter, length);
}
- ifs_alloc(iter->inode, folio, iter->flags);
+ ifs = ifs_alloc(iter->inode, folio, iter->flags);
length = min_t(loff_t, length, folio_len - offset_in_folio(folio, pos));
while (length) {
@@ -560,11 +561,15 @@ static int iomap_read_folio_iter(struct iomap_iter *iter,
*bytes_submitted += plen;
/*
- * If the entire folio has been read in by the IO
- * helper, then the helper owns the folio and will end
- * the read on it.
+ * Hand off folio ownership to the IO helper when:
+ * 1) The entire folio has been submitted for IO, or
+ * 2) There is no ifs attached to the folio
+ *
+ * Case (2) occurs when 1 << i_blkbits matches the folio
+ * size but the underlying filesystem or block device
+ * uses a smaller granularity for IO.
*/
- if (*bytes_submitted == folio_len)
+ if (*bytes_submitted == folio_len || !ifs)
ctx->cur_folio = NULL;
}
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index de89c5bef607..1508e2f54462 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -267,7 +267,15 @@ restart:
*/
BUFFER_TRACE(bh, "queue");
get_bh(bh);
- J_ASSERT_BH(bh, !buffer_jwrite(bh));
+ if (WARN_ON_ONCE(buffer_jwrite(bh))) {
+ put_bh(bh); /* drop the ref we just took */
+ spin_unlock(&journal->j_list_lock);
+ /* Clean up any previously batched buffers */
+ if (batch_count)
+ __flush_batch(journal, &batch_count);
+ jbd2_journal_abort(journal, -EFSCORRUPTED);
+ return -EFSCORRUPTED;
+ }
journal->j_chkpt_bhs[batch_count++] = bh;
transaction->t_chp_stats.cs_written++;
transaction->t_checkpoint_list = jh->b_cpnext;
@@ -325,7 +333,10 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
return 1;
- J_ASSERT(blocknr != 0);
+ if (WARN_ON_ONCE(blocknr == 0)) {
+ jbd2_journal_abort(journal, -EFSCORRUPTED);
+ return -EFSCORRUPTED;
+ }
/*
* We need to make sure that any blocks that were recently written out
diff --git a/fs/mpage.c b/fs/mpage.c
index 7dae5afc2b9e..b3d9f231a04a 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -646,17 +646,24 @@ out:
}
/**
- * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
+ * __mpage_writepages - walk the list of dirty pages of the given address space
+ * & writepage() all of them
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @get_block: the filesystem's block mapper function.
+ * @write_folio: handler to call for each folio before calling
+ * mpage_write_folio()
*
* This is a library function, which implements the writepages()
- * address_space_operation.
+ * address_space_operation. It calls @write_folio handler for each folio. If
+ * the handler returns value > 0, it calls mpage_write_folio() to do the
+ * folio writeback.
*/
int
-mpage_writepages(struct address_space *mapping,
- struct writeback_control *wbc, get_block_t get_block)
+__mpage_writepages(struct address_space *mapping,
+ struct writeback_control *wbc, get_block_t get_block,
+ int (*write_folio)(struct folio *folio,
+ struct writeback_control *wbc))
{
struct mpage_data mpd = {
.get_block = get_block,
@@ -666,11 +673,22 @@ mpage_writepages(struct address_space *mapping,
int error;
blk_start_plug(&plug);
- while ((folio = writeback_iter(mapping, wbc, folio, &error)))
+ while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
+ if (write_folio) {
+ error = write_folio(folio, wbc);
+ /*
+ * == 0 means folio is handled, < 0 means error. In
+ * both cases hand back control to writeback_iter()
+ */
+ if (error <= 0)
+ continue;
+ /* Let mpage_write_folio() handle the folio. */
+ }
error = mpage_write_folio(wbc, folio, &mpd);
+ }
if (mpd.bio)
mpage_bio_submit_write(mpd.bio);
blk_finish_plug(&plug);
return error;
}
-EXPORT_SYMBOL(mpage_writepages);
+EXPORT_SYMBOL(__mpage_writepages);
diff --git a/fs/namei.c b/fs/namei.c
index 58f715f7657e..9e5500dad14f 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2437,8 +2437,14 @@ inside:
EXPORT_SYMBOL(hashlen_string);
/*
- * Calculate the length and hash of the path component, and
- * return the length as the result.
+ * hash_name - Calculate the length and hash of the path component
+ * @nd: the path resolution state
+ * @name: the pathname to read the component from
+ * @lastword: if the component fits in a single word, LAST_WORD_IS_DOT,
+ * LAST_WORD_IS_DOTDOT, or some other value depending on whether the
+ * component is '.', '..', or something else. Otherwise, @lastword is 0.
+ *
+ * Returns: a pointer to the terminating '/' or NUL character in @name.
*/
static inline const char *hash_name(struct nameidata *nd,
const char *name,
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 88a0d801525f..a8c0d86118c5 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -171,9 +171,8 @@ static void netfs_queue_read(struct netfs_io_request *rreq,
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
- stream->front = subreq;
if (!stream->active) {
- stream->collected_to = stream->front->start;
+ stream->collected_to = subreq->start;
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
}
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index a498ee8d6674..f72e6da88cca 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -71,9 +71,8 @@ static int netfs_dispatch_unbuffered_reads(struct netfs_io_request *rreq)
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
- stream->front = subreq;
if (!stream->active) {
- stream->collected_to = stream->front->start;
+ stream->collected_to = subreq->start;
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
}
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index dd1451bf7543..f9ab69de3e29 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -111,7 +111,6 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq)
netfs_prepare_write(wreq, stream, wreq->start + wreq->transferred);
subreq = stream->construct;
stream->construct = NULL;
- stream->front = NULL;
}
/* Check if (re-)preparation failed. */
@@ -186,10 +185,18 @@ static int netfs_unbuffered_write(struct netfs_io_request *wreq)
stream->sreq_max_segs = INT_MAX;
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
- stream->prepare_write(subreq);
- __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- netfs_stat(&netfs_n_wh_retry_write_subreq);
+ if (stream->prepare_write) {
+ stream->prepare_write(subreq);
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ netfs_stat(&netfs_n_wh_retry_write_subreq);
+ } else {
+ struct iov_iter source;
+
+ netfs_reset_iter(subreq);
+ source = subreq->io_iter;
+ netfs_reissue_write(stream, subreq, &source);
+ }
}
netfs_unbuffered_write_done(wreq);
diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c
index 72a435e5fc6d..154a14bb2d7f 100644
--- a/fs/netfs/iterator.c
+++ b/fs/netfs/iterator.c
@@ -143,6 +143,47 @@ static size_t netfs_limit_bvec(const struct iov_iter *iter, size_t start_offset,
}
/*
+ * Select the span of a kvec iterator we're going to use. Limit it by both
+ * maximum size and maximum number of segments. Returns the size of the span
+ * in bytes.
+ */
+static size_t netfs_limit_kvec(const struct iov_iter *iter, size_t start_offset,
+ size_t max_size, size_t max_segs)
+{
+ const struct kvec *kvecs = iter->kvec;
+ unsigned int nkv = iter->nr_segs, ix = 0, nsegs = 0;
+ size_t len, span = 0, n = iter->count;
+ size_t skip = iter->iov_offset + start_offset;
+
+ if (WARN_ON(!iov_iter_is_kvec(iter)) ||
+ WARN_ON(start_offset > n) ||
+ n == 0)
+ return 0;
+
+ while (n && ix < nkv && skip) {
+ len = kvecs[ix].iov_len;
+ if (skip < len)
+ break;
+ skip -= len;
+ n -= len;
+ ix++;
+ }
+
+ while (n && ix < nkv) {
+ len = min3(n, kvecs[ix].iov_len - skip, max_size);
+ span += len;
+ nsegs++;
+ ix++;
+ if (span >= max_size || nsegs >= max_segs)
+ break;
+ skip = 0;
+ n -= len;
+ }
+
+ return min(span, max_size);
+}
+
+/*
* Select the span of an xarray iterator we're going to use. Limit it by both
* maximum size and maximum number of segments. It is assumed that segments
* can be larger than a page in size, provided they're physically contiguous.
@@ -245,6 +286,8 @@ size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
return netfs_limit_bvec(iter, start_offset, max_size, max_segs);
if (iov_iter_is_xarray(iter))
return netfs_limit_xarray(iter, start_offset, max_size, max_segs);
+ if (iov_iter_is_kvec(iter))
+ return netfs_limit_kvec(iter, start_offset, max_size, max_segs);
BUG();
}
EXPORT_SYMBOL(netfs_limit_iter);
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 137f0e28a44c..e5f6665b3341 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -205,7 +205,8 @@ reassess:
* in progress. The issuer thread may be adding stuff to the tail
* whilst we're doing this.
*/
- front = READ_ONCE(stream->front);
+ front = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
while (front) {
size_t transferred;
@@ -301,7 +302,6 @@ reassess:
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
- stream->front = front;
spin_unlock(&rreq->lock);
netfs_put_subrequest(remove,
notes & ABANDON_SREQ ?
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 7793ba5e3e8f..cca9ac43c077 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -93,8 +93,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
from->start, from->transferred, from->len);
if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
- !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
+ !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) {
+ subreq = from;
goto abandon;
+ }
list_for_each_continue(next, &stream->subrequests) {
subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
@@ -178,6 +180,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
if (subreq == to)
break;
}
+ subreq = NULL;
continue;
}
diff --git a/fs/netfs/read_single.c b/fs/netfs/read_single.c
index 8e6264f62a8f..d0e23bc42445 100644
--- a/fs/netfs/read_single.c
+++ b/fs/netfs/read_single.c
@@ -107,7 +107,6 @@ static int netfs_single_dispatch_read(struct netfs_io_request *rreq)
spin_lock(&rreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
trace_netfs_sreq(subreq, netfs_sreq_trace_added);
- stream->front = subreq;
/* Store list pointers before active flag */
smp_store_release(&stream->active, true);
spin_unlock(&rreq->lock);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 83eb3dc1adf8..b194447f4b11 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -228,7 +228,8 @@ reassess_streams:
if (!smp_load_acquire(&stream->active))
continue;
- front = stream->front;
+ front = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
while (front) {
trace_netfs_collect_sreq(wreq, front);
//_debug("sreq [%x] %llx %zx/%zx",
@@ -279,7 +280,6 @@ reassess_streams:
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
- stream->front = front;
spin_unlock(&wreq->lock);
netfs_put_subrequest(remove,
notes & SAW_FAILURE ?
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 437268f65640..2db688f94125 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -206,9 +206,8 @@ void netfs_prepare_write(struct netfs_io_request *wreq,
spin_lock(&wreq->lock);
list_add_tail(&subreq->rreq_link, &stream->subrequests);
if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
- stream->front = subreq;
if (!stream->active) {
- stream->collected_to = stream->front->start;
+ stream->collected_to = subreq->start;
/* Write list pointers before active flag */
smp_store_release(&stream->active, true);
}
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 758611ee4475..13cb60b52bd6 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -1146,15 +1146,15 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
return -EOVERFLOW;
/*
- * With metacopy disabled, we fsync after final metadata copyup, for
+ * With "fsync=strict", we fsync after final metadata copyup, for
* both regular files and directories to get atomic copyup semantics
* on filesystems that do not use strict metadata ordering (e.g. ubifs).
*
- * With metacopy enabled we want to avoid fsync on all meta copyup
+ * By default, we want to avoid fsync on all meta copyup, because
* that will hurt performance of workloads such as chown -R, so we
* only fsync on data copyup as legacy behavior.
*/
- ctx.metadata_fsync = !OVL_FS(dentry->d_sb)->config.metacopy &&
+ ctx.metadata_fsync = ovl_should_sync_metadata(OVL_FS(dentry->d_sb)) &&
(S_ISREG(ctx.stat.mode) || S_ISDIR(ctx.stat.mode));
ctx.metacopy = ovl_need_meta_copy_up(dentry, ctx.stat.mode, flags);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index cad2055ebf18..63b299bf12f7 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -99,6 +99,12 @@ enum {
OVL_VERITY_REQUIRE,
};
+enum {
+ OVL_FSYNC_VOLATILE,
+ OVL_FSYNC_AUTO,
+ OVL_FSYNC_STRICT,
+};
+
/*
* The tuple (fh,uuid) is a universal unique identifier for a copy up origin,
* where:
@@ -656,6 +662,21 @@ static inline bool ovl_xino_warn(struct ovl_fs *ofs)
return ofs->config.xino == OVL_XINO_ON;
}
+static inline bool ovl_should_sync(struct ovl_fs *ofs)
+{
+ return ofs->config.fsync_mode != OVL_FSYNC_VOLATILE;
+}
+
+static inline bool ovl_should_sync_metadata(struct ovl_fs *ofs)
+{
+ return ofs->config.fsync_mode == OVL_FSYNC_STRICT;
+}
+
+static inline bool ovl_is_volatile(struct ovl_config *config)
+{
+ return config->fsync_mode == OVL_FSYNC_VOLATILE;
+}
+
/*
* To avoid regressions in existing setups with overlay lower offline changes,
* we allow lower changes only if none of the new features are used.
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 1d4828dbcf7a..80cad4ea96a3 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -18,7 +18,7 @@ struct ovl_config {
int xino;
bool metacopy;
bool userxattr;
- bool ovl_volatile;
+ int fsync_mode;
};
struct ovl_sb {
@@ -120,11 +120,6 @@ static inline struct ovl_fs *OVL_FS(struct super_block *sb)
return (struct ovl_fs *)sb->s_fs_info;
}
-static inline bool ovl_should_sync(struct ovl_fs *ofs)
-{
- return !ofs->config.ovl_volatile;
-}
-
static inline unsigned int ovl_numlower(struct ovl_entry *oe)
{
return oe ? oe->__numlower : 0;
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index 8111b437ae5d..c93fcaa45d4a 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -58,6 +58,7 @@ enum ovl_opt {
Opt_xino,
Opt_metacopy,
Opt_verity,
+ Opt_fsync,
Opt_volatile,
Opt_override_creds,
};
@@ -140,6 +141,23 @@ static int ovl_verity_mode_def(void)
return OVL_VERITY_OFF;
}
+static const struct constant_table ovl_parameter_fsync[] = {
+ { "volatile", OVL_FSYNC_VOLATILE },
+ { "auto", OVL_FSYNC_AUTO },
+ { "strict", OVL_FSYNC_STRICT },
+ {}
+};
+
+static const char *ovl_fsync_mode(struct ovl_config *config)
+{
+ return ovl_parameter_fsync[config->fsync_mode].name;
+}
+
+static int ovl_fsync_mode_def(void)
+{
+ return OVL_FSYNC_AUTO;
+}
+
const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_string_empty("lowerdir", Opt_lowerdir),
fsparam_file_or_string("lowerdir+", Opt_lowerdir_add),
@@ -155,6 +173,7 @@ const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_enum("xino", Opt_xino, ovl_parameter_xino),
fsparam_enum("metacopy", Opt_metacopy, ovl_parameter_bool),
fsparam_enum("verity", Opt_verity, ovl_parameter_verity),
+ fsparam_enum("fsync", Opt_fsync, ovl_parameter_fsync),
fsparam_flag("volatile", Opt_volatile),
fsparam_flag_no("override_creds", Opt_override_creds),
{}
@@ -665,8 +684,11 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_verity:
config->verity_mode = result.uint_32;
break;
+ case Opt_fsync:
+ config->fsync_mode = result.uint_32;
+ break;
case Opt_volatile:
- config->ovl_volatile = true;
+ config->fsync_mode = OVL_FSYNC_VOLATILE;
break;
case Opt_userxattr:
config->userxattr = true;
@@ -800,6 +822,7 @@ int ovl_init_fs_context(struct fs_context *fc)
ofs->config.nfs_export = ovl_nfs_export_def;
ofs->config.xino = ovl_xino_def();
ofs->config.metacopy = ovl_metacopy_def;
+ ofs->config.fsync_mode = ovl_fsync_mode_def();
fc->s_fs_info = ofs;
fc->fs_private = ctx;
@@ -870,9 +893,9 @@ int ovl_fs_params_verify(const struct ovl_fs_context *ctx,
config->index = false;
}
- if (!config->upperdir && config->ovl_volatile) {
+ if (!config->upperdir && ovl_is_volatile(config)) {
pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
- config->ovl_volatile = false;
+ config->fsync_mode = ovl_fsync_mode_def();
}
if (!config->upperdir && config->uuid == OVL_UUID_ON) {
@@ -1070,8 +1093,8 @@ int ovl_show_options(struct seq_file *m, struct dentry *dentry)
seq_printf(m, ",xino=%s", ovl_xino_mode(&ofs->config));
if (ofs->config.metacopy != ovl_metacopy_def)
seq_printf(m, ",metacopy=%s", str_on_off(ofs->config.metacopy));
- if (ofs->config.ovl_volatile)
- seq_puts(m, ",volatile");
+ if (ofs->config.fsync_mode != ovl_fsync_mode_def())
+ seq_printf(m, ",fsync=%s", ovl_fsync_mode(&ofs->config));
if (ofs->config.userxattr)
seq_puts(m, ",userxattr");
if (ofs->config.verity_mode != ovl_verity_mode_def())
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index d4c12feec039..0822987cfb51 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -776,7 +776,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
* For volatile mount, create a incompat/volatile/dirty file to keep
* track of it.
*/
- if (ofs->config.ovl_volatile) {
+ if (ovl_is_volatile(&ofs->config)) {
err = ovl_create_volatile_dirty(ofs);
if (err < 0) {
pr_err("Failed to create volatile/dirty file.\n");
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 3f1b763a8bb4..2ea769f311c3 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -85,7 +85,10 @@ int ovl_can_decode_fh(struct super_block *sb)
if (!exportfs_can_decode_fh(sb->s_export_op))
return 0;
- return sb->s_export_op->encode_fh ? -1 : FILEID_INO32_GEN;
+ if (sb->s_export_op->encode_fh == generic_encode_ino32_fh)
+ return FILEID_INO32_GEN;
+
+ return -1;
}
struct dentry *ovl_indexdir(struct super_block *sb)
diff --git a/fs/smb/client/Makefile b/fs/smb/client/Makefile
index 26b6105f04d1..1a6e1e1c9764 100644
--- a/fs/smb/client/Makefile
+++ b/fs/smb/client/Makefile
@@ -48,8 +48,8 @@ cifs-$(CONFIG_CIFS_COMPRESSION) += compress.o compress/lz77.o
# Build the SMB2 error mapping table from smb2status.h
#
$(obj)/smb2_mapping_table.c: $(src)/../common/smb2status.h \
- $(src)/gen_smb2_mapping
- $(call cmd,gen_smb2_mapping)
+ $(src)/gen_smb2_mapping FORCE
+ $(call if_changed,gen_smb2_mapping)
$(obj)/smb2maperror.o: $(obj)/smb2_mapping_table.c
@@ -58,4 +58,5 @@ quiet_cmd_gen_smb2_mapping = GEN $@
obj-$(CONFIG_SMB_KUNIT_TESTS) += smb2maperror_test.o
-clean-files += smb2_mapping_table.c
+# Let Kbuild handle tracking and cleaning
+targets += smb2_mapping_table.c
diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c
index 393a4ae47cc1..9b2bb8764a80 100644
--- a/fs/smb/server/oplock.c
+++ b/fs/smb/server/oplock.c
@@ -82,11 +82,19 @@ static void lease_del_list(struct oplock_info *opinfo)
spin_unlock(&lb->lb_lock);
}
-static void lb_add(struct lease_table *lb)
+static struct lease_table *alloc_lease_table(struct oplock_info *opinfo)
{
- write_lock(&lease_list_lock);
- list_add(&lb->l_entry, &lease_table_list);
- write_unlock(&lease_list_lock);
+ struct lease_table *lb;
+
+ lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP);
+ if (!lb)
+ return NULL;
+
+ memcpy(lb->client_guid, opinfo->conn->ClientGUID,
+ SMB2_CLIENT_GUID_SIZE);
+ INIT_LIST_HEAD(&lb->lease_list);
+ spin_lock_init(&lb->lb_lock);
+ return lb;
}
static int alloc_lease(struct oplock_info *opinfo, struct lease_ctx_info *lctx)
@@ -1042,34 +1050,27 @@ static void copy_lease(struct oplock_info *op1, struct oplock_info *op2)
lease2->version = lease1->version;
}
-static int add_lease_global_list(struct oplock_info *opinfo)
+static void add_lease_global_list(struct oplock_info *opinfo,
+ struct lease_table *new_lb)
{
struct lease_table *lb;
- read_lock(&lease_list_lock);
+ write_lock(&lease_list_lock);
list_for_each_entry(lb, &lease_table_list, l_entry) {
if (!memcmp(lb->client_guid, opinfo->conn->ClientGUID,
SMB2_CLIENT_GUID_SIZE)) {
opinfo->o_lease->l_lb = lb;
lease_add_list(opinfo);
- read_unlock(&lease_list_lock);
- return 0;
+ write_unlock(&lease_list_lock);
+ kfree(new_lb);
+ return;
}
}
- read_unlock(&lease_list_lock);
- lb = kmalloc_obj(struct lease_table, KSMBD_DEFAULT_GFP);
- if (!lb)
- return -ENOMEM;
-
- memcpy(lb->client_guid, opinfo->conn->ClientGUID,
- SMB2_CLIENT_GUID_SIZE);
- INIT_LIST_HEAD(&lb->lease_list);
- spin_lock_init(&lb->lb_lock);
- opinfo->o_lease->l_lb = lb;
+ opinfo->o_lease->l_lb = new_lb;
lease_add_list(opinfo);
- lb_add(lb);
- return 0;
+ list_add(&new_lb->l_entry, &lease_table_list);
+ write_unlock(&lease_list_lock);
}
static void set_oplock_level(struct oplock_info *opinfo, int level,
@@ -1189,6 +1190,7 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid,
int err = 0;
struct oplock_info *opinfo = NULL, *prev_opinfo = NULL;
struct ksmbd_inode *ci = fp->f_ci;
+ struct lease_table *new_lb = NULL;
bool prev_op_has_lease;
__le32 prev_op_state = 0;
@@ -1291,21 +1293,37 @@ set_lev:
set_oplock_level(opinfo, req_op_level, lctx);
out:
- opinfo_count_inc(fp);
- opinfo_add(opinfo, fp);
-
+ /*
+ * Set o_fp before any publication so that concurrent readers
+ * (e.g. find_same_lease_key() on the lease list) that
+ * dereference opinfo->o_fp don't hit a NULL pointer.
+ *
+ * Keep the original publication order so concurrent opens can
+ * still observe the in-flight grant via ci->m_op_list, but make
+ * everything after opinfo_add() no-fail by preallocating any new
+ * lease_table first.
+ */
+ opinfo->o_fp = fp;
if (opinfo->is_lease) {
- err = add_lease_global_list(opinfo);
- if (err)
+ new_lb = alloc_lease_table(opinfo);
+ if (!new_lb) {
+ err = -ENOMEM;
goto err_out;
+ }
}
+ opinfo_count_inc(fp);
+ opinfo_add(opinfo, fp);
+
+ if (opinfo->is_lease)
+ add_lease_global_list(opinfo, new_lb);
+
rcu_assign_pointer(fp->f_opinfo, opinfo);
- opinfo->o_fp = fp;
return 0;
err_out:
- __free_opinfo(opinfo);
+ kfree(new_lb);
+ opinfo_put(opinfo);
return err;
}
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 9c44e71e3c3b..6fb7a795ff5d 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1939,8 +1939,14 @@ out_err:
if (sess->user && sess->user->flags & KSMBD_USER_FLAG_DELAY_SESSION)
try_delay = true;
- sess->last_active = jiffies;
- sess->state = SMB2_SESSION_EXPIRED;
+ /*
+ * For binding requests, session belongs to another
+ * connection. Do not expire it.
+ */
+ if (!(req->Flags & SMB2_SESSION_REQ_FLAG_BINDING)) {
+ sess->last_active = jiffies;
+ sess->state = SMB2_SESSION_EXPIRED;
+ }
ksmbd_user_session_put(sess);
work->sess = NULL;
if (try_delay) {
@@ -4446,8 +4452,9 @@ int smb2_query_dir(struct ksmbd_work *work)
d_info.wptr = (char *)rsp->Buffer;
d_info.rptr = (char *)rsp->Buffer;
d_info.out_buf_len =
- smb2_calc_max_out_buf_len(work, 8,
- le32_to_cpu(req->OutputBufferLength));
+ smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_directory_rsp, Buffer),
+ le32_to_cpu(req->OutputBufferLength));
if (d_info.out_buf_len < 0) {
rc = -EINVAL;
goto err_out;
@@ -4714,8 +4721,9 @@ static int smb2_get_ea(struct ksmbd_work *work, struct ksmbd_file *fp,
}
buf_free_len =
- smb2_calc_max_out_buf_len(work, 8,
- le32_to_cpu(req->OutputBufferLength));
+ smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_info_rsp, Buffer),
+ le32_to_cpu(req->OutputBufferLength));
if (buf_free_len < 0)
return -EINVAL;
@@ -4932,7 +4940,8 @@ static int get_file_all_info(struct ksmbd_work *work,
int conv_len;
char *filename;
u64 time;
- int ret;
+ int ret, buf_free_len, filename_len;
+ struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
if (!(fp->daccess & FILE_READ_ATTRIBUTES_LE)) {
ksmbd_debug(SMB, "no right to read the attributes : 0x%x\n",
@@ -4944,6 +4953,16 @@ static int get_file_all_info(struct ksmbd_work *work,
if (IS_ERR(filename))
return PTR_ERR(filename);
+ filename_len = strlen(filename);
+ buf_free_len = smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_info_rsp, Buffer) +
+ offsetof(struct smb2_file_all_info, FileName),
+ le32_to_cpu(req->OutputBufferLength));
+ if (buf_free_len < (filename_len + 1) * 2) {
+ kfree(filename);
+ return -EINVAL;
+ }
+
ret = vfs_getattr(&fp->filp->f_path, &stat, STATX_BASIC_STATS,
AT_STATX_SYNC_AS_STAT);
if (ret) {
@@ -4987,7 +5006,8 @@ static int get_file_all_info(struct ksmbd_work *work,
file_info->Mode = fp->coption;
file_info->AlignmentRequirement = 0;
conv_len = smbConvertToUTF16((__le16 *)file_info->FileName, filename,
- PATH_MAX, conn->local_nls, 0);
+ min(filename_len, PATH_MAX),
+ conn->local_nls, 0);
conv_len *= 2;
file_info->FileNameLength = cpu_to_le32(conv_len);
rsp->OutputBufferLength =
@@ -5041,8 +5061,9 @@ static int get_file_stream_info(struct ksmbd_work *work,
file_info = (struct smb2_file_stream_info *)rsp->Buffer;
buf_free_len =
- smb2_calc_max_out_buf_len(work, 8,
- le32_to_cpu(req->OutputBufferLength));
+ smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_query_info_rsp, Buffer),
+ le32_to_cpu(req->OutputBufferLength));
if (buf_free_len < 0)
goto out;
@@ -7586,14 +7607,15 @@ retry:
rc = vfs_lock_file(filp, smb_lock->cmd, flock, NULL);
skip:
if (smb_lock->flags & SMB2_LOCKFLAG_UNLOCK) {
+ locks_free_lock(flock);
+ kfree(smb_lock);
if (!rc) {
ksmbd_debug(SMB, "File unlocked\n");
} else if (rc == -ENOENT) {
rsp->hdr.Status = STATUS_NOT_LOCKED;
+ err = rc;
goto out;
}
- locks_free_lock(flock);
- kfree(smb_lock);
} else {
if (rc == FILE_LOCK_DEFERRED) {
void **argv;
@@ -7662,6 +7684,9 @@ skip:
spin_unlock(&work->conn->llist_lock);
ksmbd_debug(SMB, "successful in taking lock\n");
} else {
+ locks_free_lock(flock);
+ kfree(smb_lock);
+ err = rc;
goto out;
}
}
@@ -7692,13 +7717,17 @@ out:
struct file_lock *rlock = NULL;
rlock = smb_flock_init(filp);
- rlock->c.flc_type = F_UNLCK;
- rlock->fl_start = smb_lock->start;
- rlock->fl_end = smb_lock->end;
+ if (rlock) {
+ rlock->c.flc_type = F_UNLCK;
+ rlock->fl_start = smb_lock->start;
+ rlock->fl_end = smb_lock->end;
- rc = vfs_lock_file(filp, F_SETLK, rlock, NULL);
- if (rc)
- pr_err("rollback unlock fail : %d\n", rc);
+ rc = vfs_lock_file(filp, F_SETLK, rlock, NULL);
+ if (rc)
+ pr_err("rollback unlock fail : %d\n", rc);
+ } else {
+ pr_err("rollback unlock alloc failed\n");
+ }
list_del(&smb_lock->llist);
spin_lock(&work->conn->llist_lock);
@@ -7708,7 +7737,8 @@ out:
spin_unlock(&work->conn->llist_lock);
locks_free_lock(smb_lock->fl);
- locks_free_lock(rlock);
+ if (rlock)
+ locks_free_lock(rlock);
kfree(smb_lock);
}
out2:
@@ -8191,8 +8221,9 @@ int smb2_ioctl(struct ksmbd_work *work)
buffer = (char *)req + le32_to_cpu(req->InputOffset);
cnt_code = le32_to_cpu(req->CtlCode);
- ret = smb2_calc_max_out_buf_len(work, 48,
- le32_to_cpu(req->MaxOutputResponse));
+ ret = smb2_calc_max_out_buf_len(work,
+ offsetof(struct smb2_ioctl_rsp, Buffer),
+ le32_to_cpu(req->MaxOutputResponse));
if (ret < 0) {
rsp->hdr.Status = STATUS_INVALID_PARAMETER;
goto out;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7fae8002344a..23e894092dab 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -181,22 +181,23 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
}
}
-static int udf_adinicb_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+static int udf_handle_page_wb(struct folio *folio,
+ struct writeback_control *wbc)
{
- struct inode *inode = mapping->host;
+ struct inode *inode = folio->mapping->host;
struct udf_inode_info *iinfo = UDF_I(inode);
- struct folio *folio = NULL;
- int error = 0;
- while ((folio = writeback_iter(mapping, wbc, folio, &error))) {
- BUG_ON(!folio_test_locked(folio));
- BUG_ON(folio->index != 0);
- memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio,
- 0, i_size_read(inode));
- folio_unlock(folio);
- }
+ /*
+ * Inodes in the normal format are handled by the generic code. This
+ * check is race-free as the folio lock protects us from inode type
+ * conversion.
+ */
+ if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB)
+ return 1;
+ memcpy_from_file_folio(iinfo->i_data + iinfo->i_lenEAttr, folio,
+ 0, i_size_read(inode));
+ folio_unlock(folio);
mark_inode_dirty(inode);
return 0;
}
@@ -204,12 +205,8 @@ static int udf_adinicb_writepages(struct address_space *mapping,
static int udf_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct inode *inode = mapping->host;
- struct udf_inode_info *iinfo = UDF_I(inode);
-
- if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
- return udf_adinicb_writepages(mapping, wbc);
- return mpage_writepages(mapping, wbc, udf_get_block_wb);
+ return __mpage_writepages(mapping, wbc, udf_get_block_wb,
+ udf_handle_page_wb);
}
static void udf_adinicb_read_folio(struct folio *folio)
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 8244305949de..67fd9c75ac3f 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -55,7 +55,8 @@ struct xfs_attr_list_context {
struct xfs_trans *tp;
struct xfs_inode *dp; /* inode */
struct xfs_attrlist_cursor_kern cursor; /* position in list */
- void *buffer; /* output buffer */
+ /* output buffer */
+ void *buffer __counted_by_ptr(bufsize);
/*
* Abort attribute list iteration if non-zero. Can be used to pass
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 47f48ae555c0..2b78041e8672 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -1416,6 +1416,28 @@ xfs_attr3_leaf_create(
}
/*
+ * Reinitialize an existing attr fork block as an empty leaf, and attach
+ * the buffer to tp.
+ */
+int
+xfs_attr3_leaf_init(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ xfs_dablk_t blkno)
+{
+ struct xfs_buf *bp = NULL;
+ struct xfs_da_args args = {
+ .trans = tp,
+ .dp = dp,
+ .owner = dp->i_ino,
+ .geo = dp->i_mount->m_attr_geo,
+ };
+
+ ASSERT(tp != NULL);
+
+ return xfs_attr3_leaf_create(&args, blkno, &bp);
+}
+/*
* Split the leaf node, rebalance, then add the new entry.
*
* Returns 0 if the entry was added, 1 if a further split is needed or a
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index aca46da2bc50..72639efe6ac3 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -87,6 +87,9 @@ int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
/*
* Routines used for shrinking the Btree.
*/
+
+int xfs_attr3_leaf_init(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t blkno);
int xfs_attr3_leaf_toosmall(struct xfs_da_state *state, int *retval);
void xfs_attr3_leaf_unbalance(struct xfs_da_state *state,
struct xfs_da_state_blk *drop_blk,
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 09d4c17b3e7b..ad801b7bd2dd 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1506,21 +1506,20 @@ xfs_da3_fixhashpath(
}
/*
- * Remove an entry from an intermediate node.
+ * Internal implementation to remove an entry from an intermediate node.
*/
STATIC void
-xfs_da3_node_remove(
- struct xfs_da_state *state,
- struct xfs_da_state_blk *drop_blk)
+__xfs_da3_node_remove(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_da_geometry *geo,
+ struct xfs_da_state_blk *drop_blk)
{
struct xfs_da_intnode *node;
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_da_node_entry *btree;
int index;
int tmp;
- struct xfs_inode *dp = state->args->dp;
-
- trace_xfs_da_node_remove(state->args);
node = drop_blk->bp->b_addr;
xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
@@ -1536,17 +1535,17 @@ xfs_da3_node_remove(
tmp = nodehdr.count - index - 1;
tmp *= (uint)sizeof(xfs_da_node_entry_t);
memmove(&btree[index], &btree[index + 1], tmp);
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(tp, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], tmp));
index = nodehdr.count - 1;
}
memset(&btree[index], 0, sizeof(xfs_da_node_entry_t));
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(tp, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
nodehdr.count -= 1;
xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
- xfs_trans_log_buf(state->args->trans, drop_blk->bp,
- XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
+ xfs_trans_log_buf(tp, drop_blk->bp,
+ XFS_DA_LOGRANGE(node, &node->hdr, geo->node_hdr_size));
/*
* Copy the last hash value from the block to propagate upwards.
@@ -1555,6 +1554,38 @@ xfs_da3_node_remove(
}
/*
+ * Remove an entry from an intermediate node.
+ */
+STATIC void
+xfs_da3_node_remove(
+ struct xfs_da_state *state,
+ struct xfs_da_state_blk *drop_blk)
+{
+ trace_xfs_da_node_remove(state->args);
+
+ __xfs_da3_node_remove(state->args->trans, state->args->dp,
+ state->args->geo, drop_blk);
+}
+
+/*
+ * Remove an entry from an intermediate attr node at the specified index.
+ */
+void
+xfs_attr3_node_entry_remove(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_buf *bp,
+ int index)
+{
+ struct xfs_da_state_blk blk = {
+ .index = index,
+ .bp = bp,
+ };
+
+ __xfs_da3_node_remove(tp, dp, dp->i_mount->m_attr_geo, &blk);
+}
+
+/*
* Unbalance the elements between two intermediate nodes,
* move all Btree elements from one node into another.
*/
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 354d5d65043e..afcf2d3c7a21 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -184,6 +184,8 @@ int xfs_da3_split(xfs_da_state_t *state);
int xfs_da3_join(xfs_da_state_t *state);
void xfs_da3_fixhashpath(struct xfs_da_state *state,
struct xfs_da_state_path *path_to_to_fix);
+void xfs_attr3_node_entry_remove(struct xfs_trans *tp, struct xfs_inode *dp,
+ struct xfs_buf *bp, int index);
/*
* Routines used for finding things in the Btree.
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 1d25bd5b892e..222812fe202c 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -171,8 +171,10 @@ xchk_quota_item(
error = xchk_quota_item_bmap(sc, dq, offset);
xchk_iunlock(sc, XFS_ILOCK_SHARED);
- if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
+ if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error)) {
+ mutex_unlock(&dq->q_qlock);
return error;
+ }
/*
* Warn if the hard limits are larger than the fs.
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 39ea651cbb75..286c5f5e0544 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -972,20 +972,12 @@ TRACE_EVENT(xfile_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char *path;
-
__entry->ino = file_inode(xf->file)->i_ino;
- path = file_path(xf->file, __entry->pathname, MAXNAMELEN);
- if (IS_ERR(path))
- strncpy(__entry->pathname, "(unknown)",
- sizeof(__entry->pathname));
),
- TP_printk("xfino 0x%lx path '%s'",
- __entry->ino,
- __entry->pathname)
+ TP_printk("xfino 0x%lx",
+ __entry->ino)
);
TRACE_EVENT(xfile_destroy,
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 92331991f9fd..a5b69c0fbfd0 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -140,7 +140,7 @@ xfs_attr3_node_inactive(
xfs_daddr_t parent_blkno, child_blkno;
struct xfs_buf *child_bp;
struct xfs_da3_icnode_hdr ichdr;
- int error, i;
+ int error;
/*
* Since this code is recursive (gasp!) we must protect ourselves.
@@ -152,7 +152,7 @@ xfs_attr3_node_inactive(
return -EFSCORRUPTED;
}
- xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr);
+ xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr);
parent_blkno = xfs_buf_daddr(bp);
if (!ichdr.count) {
xfs_trans_brelse(*trans, bp);
@@ -167,7 +167,7 @@ xfs_attr3_node_inactive(
* over the leaves removing all of them. If this is higher up
* in the tree, recurse downward.
*/
- for (i = 0; i < ichdr.count; i++) {
+ while (ichdr.count > 0) {
/*
* Read the subsidiary block to see what we have to work with.
* Don't do this in a transaction. This is a depth-first
@@ -218,29 +218,32 @@ xfs_attr3_node_inactive(
xfs_trans_binval(*trans, child_bp);
child_bp = NULL;
+ error = xfs_da3_node_read_mapped(*trans, dp,
+ parent_blkno, &bp, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
/*
- * If we're not done, re-read the parent to get the next
- * child block number.
+ * Remove entry from parent node, prevents being indexed to.
*/
- if (i + 1 < ichdr.count) {
- struct xfs_da3_icnode_hdr phdr;
+ xfs_attr3_node_entry_remove(*trans, dp, bp, 0);
+
+ xfs_da3_node_hdr_from_disk(mp, &ichdr, bp->b_addr);
+ bp = NULL;
- error = xfs_da3_node_read_mapped(*trans, dp,
- parent_blkno, &bp, XFS_ATTR_FORK);
+ if (ichdr.count > 0) {
+ /*
+ * If we're not done, get the next child block number.
+ */
+ child_fsb = be32_to_cpu(ichdr.btree[0].before);
+
+ /*
+ * Atomically commit the whole invalidate stuff.
+ */
+ error = xfs_trans_roll_inode(trans, dp);
if (error)
return error;
- xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr,
- bp->b_addr);
- child_fsb = be32_to_cpu(phdr.btree[i + 1].before);
- xfs_trans_brelse(*trans, bp);
- bp = NULL;
}
- /*
- * Atomically commit the whole invalidate stuff.
- */
- error = xfs_trans_roll_inode(trans, dp);
- if (error)
- return error;
}
return 0;
@@ -257,10 +260,8 @@ xfs_attr3_root_inactive(
struct xfs_trans **trans,
struct xfs_inode *dp)
{
- struct xfs_mount *mp = dp->i_mount;
struct xfs_da_blkinfo *info;
struct xfs_buf *bp;
- xfs_daddr_t blkno;
int error;
/*
@@ -272,7 +273,6 @@ xfs_attr3_root_inactive(
error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK);
if (error)
return error;
- blkno = xfs_buf_daddr(bp);
/*
* Invalidate the tree, even if the "tree" is only a single leaf block.
@@ -283,10 +283,26 @@ xfs_attr3_root_inactive(
case cpu_to_be16(XFS_DA_NODE_MAGIC):
case cpu_to_be16(XFS_DA3_NODE_MAGIC):
error = xfs_attr3_node_inactive(trans, dp, bp, 1);
+ /*
+ * Empty root node block are not allowed, convert it to leaf.
+ */
+ if (!error)
+ error = xfs_attr3_leaf_init(*trans, dp, 0);
+ if (!error)
+ error = xfs_trans_roll_inode(trans, dp);
break;
case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
error = xfs_attr3_leaf_inactive(trans, dp, bp);
+ /*
+ * Reinit the leaf before truncating extents so that a crash
+ * mid-truncation leaves an empty leaf rather than one with
+ * entries that may reference freed remote value blocks.
+ */
+ if (!error)
+ error = xfs_attr3_leaf_init(*trans, dp, 0);
+ if (!error)
+ error = xfs_trans_roll_inode(trans, dp);
break;
default:
xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK);
@@ -295,21 +311,6 @@ xfs_attr3_root_inactive(
xfs_trans_brelse(*trans, bp);
break;
}
- if (error)
- return error;
-
- /*
- * Invalidate the incore copy of the root block.
- */
- error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
- XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp);
- if (error)
- return error;
- xfs_trans_binval(*trans, bp); /* remove from cache */
- /*
- * Commit the invalidate and start the next transaction.
- */
- error = xfs_trans_roll_inode(trans, dp);
return error;
}
@@ -328,6 +329,7 @@ xfs_attr_inactive(
{
struct xfs_trans *trans;
struct xfs_mount *mp;
+ struct xfs_buf *bp;
int lock_mode = XFS_ILOCK_SHARED;
int error = 0;
@@ -363,10 +365,27 @@ xfs_attr_inactive(
* removal below.
*/
if (dp->i_af.if_nextents > 0) {
+ /*
+ * Invalidate and truncate all blocks but leave the root block.
+ */
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out_cancel;
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK,
+ XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount));
+ if (error)
+ goto out_cancel;
+
+ /*
+ * Invalidate and truncate the root block and ensure that the
+ * operation is completed within a single transaction.
+ */
+ error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK);
+ if (error)
+ goto out_cancel;
+
+ xfs_trans_binval(trans, bp);
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out_cancel;
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 354472bf45f1..deab14f31b38 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -653,7 +653,6 @@ xfs_attri_recover_work(
break;
}
if (error) {
- xfs_irele(ip);
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attrp,
sizeof(*attrp));
return ERR_PTR(-EFSCORRUPTED);
@@ -1047,8 +1046,8 @@ xlog_recover_attri_commit_pass2(
break;
case XFS_ATTRI_OP_FLAGS_SET:
case XFS_ATTRI_OP_FLAGS_REPLACE:
- /* Log item, attr name, attr value */
- if (item->ri_total != 3) {
+ /* Log item, attr name, optional attr value */
+ if (item->ri_total != 2 + !!attri_formatp->alfi_value_len) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
attri_formatp, len);
return -EFSCORRUPTED;
@@ -1132,52 +1131,6 @@ xlog_recover_attri_commit_pass2(
return -EFSCORRUPTED;
}
- switch (op) {
- case XFS_ATTRI_OP_FLAGS_REMOVE:
- /* Regular remove operations operate only on names. */
- if (attr_value != NULL || value_len != 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- fallthrough;
- case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE:
- case XFS_ATTRI_OP_FLAGS_PPTR_SET:
- case XFS_ATTRI_OP_FLAGS_SET:
- case XFS_ATTRI_OP_FLAGS_REPLACE:
- /*
- * Regular xattr set/remove/replace operations require a name
- * and do not take a newname. Values are optional for set and
- * replace.
- *
- * Name-value set/remove operations must have a name, do not
- * take a newname, and can take a value.
- */
- if (attr_name == NULL || name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- break;
- case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE:
- /*
- * Name-value replace operations require the caller to
- * specify the old and new names and values explicitly.
- * Values are optional.
- */
- if (attr_name == NULL || name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- if (attr_new_name == NULL || new_name_len == 0) {
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
- attri_formatp, len);
- return -EFSCORRUPTED;
- }
- break;
- }
-
/*
* Memory alloc failure will cause replay to abort. We attach the
* name/value buffer to the recovered incore log item and drop our
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 491e2a7053a3..65a0e69c3d08 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -125,6 +125,7 @@ xfs_qm_dquot_logitem_push(
struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
struct xfs_dquot *dqp = qlip->qli_dquot;
struct xfs_buf *bp;
+ struct xfs_ail *ailp = lip->li_ailp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -153,7 +154,7 @@ xfs_qm_dquot_logitem_push(
goto out_unlock;
}
- spin_unlock(&lip->li_ailp->ail_lock);
+ spin_unlock(&ailp->ail_lock);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error == -EAGAIN) {
@@ -172,9 +173,13 @@ xfs_qm_dquot_logitem_push(
rval = XFS_ITEM_FLUSHING;
}
xfs_buf_relse(bp);
+ /*
+ * The buffer no longer protects the log item from reclaim, so
+ * do not reference lip after this point.
+ */
out_relock_ail:
- spin_lock(&lip->li_ailp->ail_lock);
+ spin_lock(&ailp->ail_lock);
out_unlock:
mutex_unlock(&dqp->q_qlock);
return rval;
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index d1291ca15239..2b8617ae7ec2 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -443,8 +443,8 @@ xfs_ioc_attr_list(
context.dp = dp;
context.resynch = 1;
context.attr_filter = xfs_attr_filter(flags);
- context.buffer = buffer;
context.bufsize = round_down(bufsize, sizeof(uint32_t));
+ context.buffer = buffer;
context.firstu = context.bufsize;
context.put_listent = xfs_ioc_attr_put_listent;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 50c0404f9064..beaa26ec62da 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1048,7 +1048,8 @@ xfs_itruncate_extents_flags(
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
if (icount_read(VFS_I(ip)))
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
- ASSERT(new_size <= XFS_ISIZE(ip));
+ if (whichfork == XFS_DATA_FORK)
+ ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 8913036b8024..4ae81eed0442 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -746,6 +746,7 @@ xfs_inode_item_push(
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
struct xfs_buf *bp = lip->li_buf;
+ struct xfs_ail *ailp = lip->li_ailp;
uint rval = XFS_ITEM_SUCCESS;
int error;
@@ -771,7 +772,7 @@ xfs_inode_item_push(
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
- spin_unlock(&lip->li_ailp->ail_lock);
+ spin_unlock(&ailp->ail_lock);
/*
* We need to hold a reference for flushing the cluster buffer as it may
@@ -795,7 +796,11 @@ xfs_inode_item_push(
rval = XFS_ITEM_LOCKED;
}
- spin_lock(&lip->li_ailp->ail_lock);
+ /*
+ * The buffer no longer protects the log item from reclaim, so
+ * do not reference lip after this point.
+ */
+ spin_lock(&ailp->ail_lock);
return rval;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9c295abd0a0a..ef1ea8a1238c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -608,8 +608,9 @@ xfs_unmount_check(
* have been retrying in the background. This will prevent never-ending
* retries in AIL pushing from hanging the unmount.
*
- * Finally, we can push the AIL to clean all the remaining dirty objects, then
- * reclaim the remaining inodes that are still in memory at this point in time.
+ * Stop inodegc and background reclaim before pushing the AIL so that they
+ * are not running while the AIL is being flushed. Then push the AIL to
+ * clean all the remaining dirty objects and reclaim the remaining inodes.
*/
static void
xfs_unmount_flush_inodes(
@@ -621,9 +622,9 @@ xfs_unmount_flush_inodes(
xfs_set_unmounting(mp);
- xfs_ail_push_all_sync(mp->m_ail);
xfs_inodegc_stop(mp);
cancel_delayed_work_sync(&mp->m_reclaim_work);
+ xfs_ail_push_all_sync(mp->m_ail);
xfs_reclaim_inodes(mp);
xfs_health_unmount(mp);
xfs_healthmon_unmount(mp);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 813e5a9f57eb..5e8190fe2be9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -56,6 +56,7 @@
#include <linux/tracepoint.h>
struct xfs_agf;
+struct xfs_ail;
struct xfs_alloc_arg;
struct xfs_attr_list_context;
struct xfs_buf_log_item;
@@ -1650,16 +1651,43 @@ TRACE_EVENT(xfs_log_force,
DEFINE_EVENT(xfs_log_item_class, name, \
TP_PROTO(struct xfs_log_item *lip), \
TP_ARGS(lip))
-DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
-DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort);
DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort);
+DECLARE_EVENT_CLASS(xfs_ail_push_class,
+ TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn),
+ TP_ARGS(ailp, type, flags, lsn),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(uint, type)
+ __field(unsigned long, flags)
+ __field(xfs_lsn_t, lsn)
+ ),
+ TP_fast_assign(
+ __entry->dev = ailp->ail_log->l_mp->m_super->s_dev;
+ __entry->type = type;
+ __entry->flags = flags;
+ __entry->lsn = lsn;
+ ),
+ TP_printk("dev %d:%d lsn %d/%d type %s flags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ CYCLE_LSN(__entry->lsn), BLOCK_LSN(__entry->lsn),
+ __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+ __print_flags(__entry->flags, "|", XFS_LI_FLAGS))
+)
+
+#define DEFINE_AIL_PUSH_EVENT(name) \
+DEFINE_EVENT(xfs_ail_push_class, name, \
+ TP_PROTO(struct xfs_ail *ailp, uint type, unsigned long flags, xfs_lsn_t lsn), \
+ TP_ARGS(ailp, type, flags, lsn))
+DEFINE_AIL_PUSH_EVENT(xfs_ail_push);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_pinned);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_locked);
+DEFINE_AIL_PUSH_EVENT(xfs_ail_flushing);
+
DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
TP_ARGS(lip, old_lsn, new_lsn),
@@ -5091,23 +5119,16 @@ TRACE_EVENT(xmbuf_create,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
- __array(char, pathname, MAXNAMELEN)
),
TP_fast_assign(
- char *path;
struct file *file = btp->bt_file;
__entry->dev = btp->bt_mount->m_super->s_dev;
__entry->ino = file_inode(file)->i_ino;
- path = file_path(file, __entry->pathname, MAXNAMELEN);
- if (IS_ERR(path))
- strncpy(__entry->pathname, "(unknown)",
- sizeof(__entry->pathname));
),
- TP_printk("dev %d:%d xmino 0x%lx path '%s'",
+ TP_printk("dev %d:%d xmino 0x%lx",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->pathname)
+ __entry->ino)
);
TRACE_EVENT(xmbuf_free,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 923729af4206..99a9bf3762b7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -365,6 +365,12 @@ xfsaild_resubmit_item(
return XFS_ITEM_SUCCESS;
}
+/*
+ * Push a single log item from the AIL.
+ *
+ * @lip may have been released and freed by the time this function returns,
+ * so callers must not dereference the log item afterwards.
+ */
static inline uint
xfsaild_push_item(
struct xfs_ail *ailp,
@@ -458,6 +464,74 @@ xfs_ail_calc_push_target(
return target_lsn;
}
+static void
+xfsaild_process_logitem(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip,
+ int *stuck,
+ int *flushing)
+{
+ struct xfs_mount *mp = ailp->ail_log->l_mp;
+ uint type = lip->li_type;
+ unsigned long flags = lip->li_flags;
+ xfs_lsn_t item_lsn = lip->li_lsn;
+ int lock_result;
+
+ /*
+ * Note that iop_push may unlock and reacquire the AIL lock. We
+ * rely on the AIL cursor implementation to be able to deal with
+ * the dropped lock.
+ *
+ * The log item may have been freed by the push, so it must not
+ * be accessed or dereferenced below this line.
+ */
+ lock_result = xfsaild_push_item(ailp, lip);
+ switch (lock_result) {
+ case XFS_ITEM_SUCCESS:
+ XFS_STATS_INC(mp, xs_push_ail_success);
+ trace_xfs_ail_push(ailp, type, flags, item_lsn);
+
+ ailp->ail_last_pushed_lsn = item_lsn;
+ break;
+
+ case XFS_ITEM_FLUSHING:
+ /*
+ * The item or its backing buffer is already being
+ * flushed. The typical reason for that is that an
+ * inode buffer is locked because we already pushed the
+ * updates to it as part of inode clustering.
+ *
+ * We do not want to stop flushing just because lots
+ * of items are already being flushed, but we need to
+ * re-try the flushing relatively soon if most of the
+ * AIL is being flushed.
+ */
+ XFS_STATS_INC(mp, xs_push_ail_flushing);
+ trace_xfs_ail_flushing(ailp, type, flags, item_lsn);
+
+ (*flushing)++;
+ ailp->ail_last_pushed_lsn = item_lsn;
+ break;
+
+ case XFS_ITEM_PINNED:
+ XFS_STATS_INC(mp, xs_push_ail_pinned);
+ trace_xfs_ail_pinned(ailp, type, flags, item_lsn);
+
+ (*stuck)++;
+ ailp->ail_log_flush++;
+ break;
+ case XFS_ITEM_LOCKED:
+ XFS_STATS_INC(mp, xs_push_ail_locked);
+ trace_xfs_ail_locked(ailp, type, flags, item_lsn);
+
+ (*stuck)++;
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+}
+
static long
xfsaild_push(
struct xfs_ail *ailp)
@@ -505,62 +579,11 @@ xfsaild_push(
lsn = lip->li_lsn;
while ((XFS_LSN_CMP(lip->li_lsn, ailp->ail_target) <= 0)) {
- int lock_result;
if (test_bit(XFS_LI_FLUSHING, &lip->li_flags))
goto next_item;
- /*
- * Note that iop_push may unlock and reacquire the AIL lock. We
- * rely on the AIL cursor implementation to be able to deal with
- * the dropped lock.
- */
- lock_result = xfsaild_push_item(ailp, lip);
- switch (lock_result) {
- case XFS_ITEM_SUCCESS:
- XFS_STATS_INC(mp, xs_push_ail_success);
- trace_xfs_ail_push(lip);
-
- ailp->ail_last_pushed_lsn = lsn;
- break;
-
- case XFS_ITEM_FLUSHING:
- /*
- * The item or its backing buffer is already being
- * flushed. The typical reason for that is that an
- * inode buffer is locked because we already pushed the
- * updates to it as part of inode clustering.
- *
- * We do not want to stop flushing just because lots
- * of items are already being flushed, but we need to
- * re-try the flushing relatively soon if most of the
- * AIL is being flushed.
- */
- XFS_STATS_INC(mp, xs_push_ail_flushing);
- trace_xfs_ail_flushing(lip);
-
- flushing++;
- ailp->ail_last_pushed_lsn = lsn;
- break;
-
- case XFS_ITEM_PINNED:
- XFS_STATS_INC(mp, xs_push_ail_pinned);
- trace_xfs_ail_pinned(lip);
-
- stuck++;
- ailp->ail_log_flush++;
- break;
- case XFS_ITEM_LOCKED:
- XFS_STATS_INC(mp, xs_push_ail_locked);
- trace_xfs_ail_locked(lip);
-
- stuck++;
- break;
- default:
- ASSERT(0);
- break;
- }
-
+ xfsaild_process_logitem(ailp, lip, &stuck, &flushing);
count++;
/*
diff --git a/fs/xfs/xfs_verify_media.c b/fs/xfs/xfs_verify_media.c
index 8bbd4ec567f8..5ead3976d511 100644
--- a/fs/xfs/xfs_verify_media.c
+++ b/fs/xfs/xfs_verify_media.c
@@ -183,10 +183,9 @@ xfs_verify_iosize(
min_not_zero(SZ_1M, me->me_max_io_size);
BUILD_BUG_ON(BBSHIFT != SECTOR_SHIFT);
- ASSERT(BBTOB(bbcount) >= bdev_logical_block_size(btp->bt_bdev));
+ ASSERT(BBTOB(bbcount) >= btp->bt_logical_sectorsize);
- return clamp(iosize, bdev_logical_block_size(btp->bt_bdev),
- BBTOB(bbcount));
+ return clamp(iosize, btp->bt_logical_sectorsize, BBTOB(bbcount));
}
/* Allocate as much memory as we can get for verification buffer. */
@@ -218,8 +217,8 @@ xfs_verify_media_error(
unsigned int bio_bbcount,
blk_status_t bio_status)
{
- trace_xfs_verify_media_error(mp, me, btp->bt_bdev->bd_dev, daddr,
- bio_bbcount, bio_status);
+ trace_xfs_verify_media_error(mp, me, btp->bt_dev, daddr, bio_bbcount,
+ bio_status);
/*
* Pass any error, I/O or otherwise, up to the caller if we didn't
@@ -280,7 +279,7 @@ xfs_verify_media(
btp = mp->m_ddev_targp;
break;
case XFS_DEV_LOG:
- if (mp->m_logdev_targp->bt_bdev != mp->m_ddev_targp->bt_bdev)
+ if (mp->m_logdev_targp != mp->m_ddev_targp)
btp = mp->m_logdev_targp;
break;
case XFS_DEV_RT:
@@ -299,7 +298,7 @@ xfs_verify_media(
/* start and end have to be aligned to the lba size */
if (!IS_ALIGNED(BBTOB(me->me_start_daddr | me->me_end_daddr),
- bdev_logical_block_size(btp->bt_bdev)))
+ btp->bt_logical_sectorsize))
return -EINVAL;
/*
@@ -331,8 +330,7 @@ xfs_verify_media(
if (!folio)
return -ENOMEM;
- trace_xfs_verify_media(mp, me, btp->bt_bdev->bd_dev, daddr, bbcount,
- folio);
+ trace_xfs_verify_media(mp, me, btp->bt_dev, daddr, bbcount, folio);
bio = bio_alloc(btp->bt_bdev, 1, REQ_OP_READ, GFP_KERNEL);
if (!bio) {
@@ -400,7 +398,7 @@ out_folio:
* an operational error.
*/
me->me_start_daddr = daddr;
- trace_xfs_verify_media_end(mp, me, btp->bt_bdev->bd_dev);
+ trace_xfs_verify_media_end(mp, me, btp->bt_dev);
return 0;
}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index a735f16d9cd8..544213067d59 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -332,8 +332,8 @@ xfs_vn_listxattr(
memset(&context, 0, sizeof(context));
context.dp = XFS_I(inode);
context.resynch = 1;
- context.buffer = size ? data : NULL;
context.bufsize = size;
+ context.buffer = size ? data : NULL;
context.firstu = context.bufsize;
context.put_listent = xfs_xattr_put_listent;