From e3ea75ee651daf5e434afbfdb7dbf75e200ea1f6 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 4 Oct 2022 15:58:03 +0200 Subject: ext4: journal_path mount options should follow links Before the commit 461c3af045d3 ("ext4: Change handle_mount_opt() to use fs_parameter") ext4 mount option journal_path did follow links in the provided path. Bring this behavior back by allowing to pass pathwalk flags to fs_lookup_param(). Fixes: 461c3af045d3 ("ext4: Change handle_mount_opt() to use fs_parameter") Signed-off-by: Lukas Czerner Reviewed-by: Darrick J. Wong Link: https://lore.kernel.org/r/20221004135803.32283-1-lczerner@redhat.com Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- include/linux/fs_parser.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index f103c91139d4..01542c4b87a2 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -76,6 +76,7 @@ static inline int fs_parse(struct fs_context *fc, extern int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, + unsigned int flags, struct path *_path); extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); -- cgit v1.2.3 From a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 23 Nov 2022 20:39:50 +0100 Subject: ext4: fix deadlock due to mbcache entry corruption When manipulating xattr blocks, we can deadlock infinitely looping inside ext4_xattr_block_set() where we constantly keep finding xattr block for reuse in mbcache but we are unable to reuse it because its reference count is too big. This happens because cache entry for the xattr block is marked as reusable (e_reusable set) although its reference count is too big. When this inconsistency happens, this inconsistent state is kept indefinitely and so ext4_xattr_block_set() keeps retrying indefinitely. The inconsistent state is caused by non-atomic update of e_reusable bit. e_reusable is part of a bitfield and e_reusable update can race with update of e_referenced bit in the same bitfield resulting in loss of one of the updates. Fix the problem by using atomic bitops instead. This bug has been around for many years, but it became *much* easier to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr blocks"). Cc: stable@vger.kernel.org Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries") Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks") Reported-and-tested-by: Jeremi Piotrowski Reported-by: Thilo Fromm Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/ Signed-off-by: Jan Kara Reviewed-by: Andreas Dilger Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/xattr.c | 4 ++-- fs/mbcache.c | 14 ++++++++------ include/linux/mbcache.h | 9 +++++++-- 3 files changed, 17 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 4d1c701f0eec..6bdd502527f8 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1281,7 +1281,7 @@ retry_ref: ce = mb_cache_entry_get(ea_block_cache, hash, bh->b_blocknr); if (ce) { - ce->e_reusable = 1; + set_bit(MBE_REUSABLE_B, &ce->e_flags); mb_cache_entry_put(ea_block_cache, ce); } } @@ -2043,7 +2043,7 @@ inserted: } BHDR(new_bh)->h_refcount = cpu_to_le32(ref); if (ref == EXT4_XATTR_REFCOUNT_MAX) - ce->e_reusable = 0; + clear_bit(MBE_REUSABLE_B, &ce->e_flags); ea_bdebug(new_bh, "reusing; refcount now=%d", ref); ext4_xattr_block_csum_set(inode, new_bh); diff --git a/fs/mbcache.c b/fs/mbcache.c index e272ad738faf..2a4b8b549e93 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -100,8 +100,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, atomic_set(&entry->e_refcnt, 2); entry->e_key = key; entry->e_value = value; - entry->e_reusable = reusable; - entry->e_referenced = 0; + entry->e_flags = 0; + if (reusable) + set_bit(MBE_REUSABLE_B, &entry->e_flags); head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { @@ -165,7 +166,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, while (node) { entry = hlist_bl_entry(node, struct mb_cache_entry, e_hash_list); - if (entry->e_key == key && entry->e_reusable && + if (entry->e_key == key && + test_bit(MBE_REUSABLE_B, &entry->e_flags) && atomic_inc_not_zero(&entry->e_refcnt)) goto out; node = node->next; @@ -284,7 +286,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get); void mb_cache_entry_touch(struct mb_cache *cache, struct mb_cache_entry *entry) { - entry->e_referenced = 1; + set_bit(MBE_REFERENCED_B, &entry->e_flags); } EXPORT_SYMBOL(mb_cache_entry_touch); @@ -309,9 +311,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, entry = list_first_entry(&cache->c_list, struct mb_cache_entry, e_list); /* Drop initial hash reference if there is no user */ - if (entry->e_referenced || + if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { - entry->e_referenced = 0; + clear_bit(MBE_REFERENCED_B, &entry->e_flags); list_move_tail(&entry->e_list, &cache->c_list); continue; } diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 2da63fd7b98f..97e64184767d 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -10,6 +10,12 @@ struct mb_cache; +/* Cache entry flags */ +enum { + MBE_REFERENCED_B = 0, + MBE_REUSABLE_B +}; + struct mb_cache_entry { /* List of entries in cache - protected by cache->c_list_lock */ struct list_head e_list; @@ -26,8 +32,7 @@ struct mb_cache_entry { atomic_t e_refcnt; /* Key in hash - stable during lifetime of the entry */ u32 e_key; - u32 e_referenced:1; - u32 e_reusable:1; + unsigned long e_flags; /* User provided value - stable during lifetime of the entry */ u64 e_value; }; -- cgit v1.2.3 From f30ff35f6266993405c8659e48fddc3180692164 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 7 Dec 2022 12:27:12 +0100 Subject: jbd2: switch jbd2_submit_inode_data() to use fs-provided hook for data writeout jbd2_submit_inode_data() hardcoded use of jbd2_journal_submit_inode_data_buffers() for submission of data pages. Make it use j_submit_inode_data_buffers hook instead. This effectively switches ext4 fastcommits to use ext4_writepages() for data writeout instead of generic_writepages(). Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20221207112722.22220-9-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/fast_commit.c | 2 +- fs/jbd2/commit.c | 5 ++--- include/linux/jbd2.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 1da85f626116..4594b62f147b 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -981,7 +981,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) finish_wait(&ei->i_fc_wait, &wait); } spin_unlock(&sbi->s_fc_lock); - ret = jbd2_submit_inode_data(ei->jinode); + ret = jbd2_submit_inode_data(journal, ei->jinode); if (ret) return ret; spin_lock(&sbi->s_fc_lock); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 885a7a6cc53e..4810438b7856 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -207,14 +207,13 @@ int jbd2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode) } /* Send all the data buffers related to an inode */ -int jbd2_submit_inode_data(struct jbd2_inode *jinode) +int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode) { - if (!jinode || !(jinode->i_flags & JI_WRITE_DATA)) return 0; trace_jbd2_submit_inode_data(jinode->i_vfs_inode); - return jbd2_journal_submit_inode_data_buffers(jinode); + return journal->j_submit_inode_data_buffers(jinode); } EXPORT_SYMBOL(jbd2_submit_inode_data); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 0b7242370b56..2170e0cc279d 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1662,7 +1662,7 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid); int jbd2_fc_end_commit(journal_t *journal); int jbd2_fc_end_commit_fallback(journal_t *journal); int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out); -int jbd2_submit_inode_data(struct jbd2_inode *jinode); +int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode); int jbd2_fc_wait_bufs(journal_t *journal, int num_blks); int jbd2_fc_release_bufs(journal_t *journal); -- cgit v1.2.3