From 878520ac45f9f698432d4276db3d9144b83931b6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Tue, 19 Nov 2019 21:54:15 -0500 Subject: ext4: save the error code which triggered an ext4_error() in the superblock This allows the cause of an ext4_error() report to be categorized based on whether it was triggered due to an I/O error, or an memory allocation error, or other possible causes. Most errors are caused by a detected file system inconsistency, so the default code stored in the superblock will be EXT4_ERR_EFSCORRUPTED. Link: https://lore.kernel.org/r/20191204032335.7683-1-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1cb42d940784..1bb6099397af 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -156,6 +156,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (ext4_dx_csum_verify(inode, dirent)) set_buffer_verified(bh); else { + ext4_set_errno(inode->i_sb, EFSBADCRC); ext4_error_inode(inode, func, line, block, "Directory index failed checksum"); brelse(bh); @@ -166,6 +167,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, if (ext4_dirblock_csum_verify(inode, bh)) set_buffer_verified(bh); else { + ext4_set_errno(inode->i_sb, EFSBADCRC); ext4_error_inode(inode, func, line, block, "Directory block failed checksum"); brelse(bh); @@ -1527,6 +1529,7 @@ restart: goto next; wait_on_buffer(bh); if (!buffer_uptodate(bh)) { + ext4_set_errno(sb, EIO); EXT4_ERROR_INODE(dir, "reading directory lblock %lu", (unsigned long) block); brelse(bh); @@ -1537,6 +1540,7 @@ restart: !is_dx_internal_node(dir, block, (struct ext4_dir_entry *)bh->b_data) && !ext4_dirblock_csum_verify(dir, bh)) { + ext4_set_errno(sb, EFSBADCRC); EXT4_ERROR_INODE(dir, "checksumming directory " "block %lu", (unsigned long)block); brelse(bh); -- cgit v1.2.3 From 46f870d690fecc792a66730dcbbf0aa109f5f9ab Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 21 Nov 2019 13:09:43 -0500 Subject: ext4: simulate various I/O and checksum errors when reading metadata This allows us to test various error handling code paths Link: https://lore.kernel.org/r/20191209012317.59398-1-tytso@mit.edu Signed-off-by: Theodore Ts'o --- fs/ext4/balloc.c | 4 +++- fs/ext4/ext4.h | 37 +++++++++++++++++++++++++++++++++++++ fs/ext4/ialloc.c | 4 +++- fs/ext4/inode.c | 6 +++++- fs/ext4/namei.c | 11 ++++++++--- fs/ext4/sysfs.c | 23 +++++++++++++++++++++++ 6 files changed, 79 insertions(+), 6 deletions(-) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 102c38527a10..5f993a411251 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -371,7 +371,8 @@ static int ext4_validate_block_bitmap(struct super_block *sb, if (buffer_verified(bh)) goto verified; if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group, - desc, bh))) { + desc, bh) || + ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) { ext4_unlock_group(sb, block_group); ext4_error(sb, "bg %u: bad block bitmap checksum", block_group); ext4_mark_group_bitmap_corrupted(sb, block_group, @@ -505,6 +506,7 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group, if (!desc) return -EFSCORRUPTED; wait_on_buffer(bh); + ext4_simulate_fail_bh(sb, bh, EXT4_SIM_BBITMAP_EIO); if (!buffer_uptodate(bh)) { ext4_set_errno(sb, EIO); ext4_error(sb, "Cannot read block bitmap - " diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b00d07bad45b..5edc16d36a96 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1557,6 +1557,9 @@ struct ext4_sb_info { /* Barrier between changing inodes' journal flags and writepages ops. */ struct percpu_rw_semaphore s_journal_flag_rwsem; struct dax_device *s_daxdev; +#ifdef CONFIG_EXT4_DEBUG + unsigned long s_simulate_fail; +#endif }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) @@ -1575,6 +1578,40 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +/* + * Simulate_fail codes + */ +#define EXT4_SIM_BBITMAP_EIO 1 +#define EXT4_SIM_BBITMAP_CRC 2 +#define EXT4_SIM_IBITMAP_EIO 3 +#define EXT4_SIM_IBITMAP_CRC 4 +#define EXT4_SIM_INODE_EIO 5 +#define EXT4_SIM_INODE_CRC 6 +#define EXT4_SIM_DIRBLOCK_EIO 7 +#define EXT4_SIM_DIRBLOCK_CRC 8 + +static inline bool ext4_simulate_fail(struct super_block *sb, + unsigned long code) +{ +#ifdef CONFIG_EXT4_DEBUG + struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (unlikely(sbi->s_simulate_fail == code)) { + sbi->s_simulate_fail = 0; + return true; + } +#endif + return false; +} + +static inline void ext4_simulate_fail_bh(struct super_block *sb, + struct buffer_head *bh, + unsigned long code) +{ + if (!IS_ERR(bh) && ext4_simulate_fail(sb, code)) + clear_buffer_uptodate(bh); +} + /* * Error number codes for s_{first,last}_error_errno * diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 0151ba8ea439..c66e8f9451a2 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -94,7 +94,8 @@ static int ext4_validate_inode_bitmap(struct super_block *sb, goto verified; blk = ext4_inode_bitmap(sb, desc); if (!ext4_inode_bitmap_csum_verify(sb, block_group, desc, bh, - EXT4_INODES_PER_GROUP(sb) / 8)) { + EXT4_INODES_PER_GROUP(sb) / 8) || + ext4_simulate_fail(sb, EXT4_SIM_IBITMAP_CRC)) { ext4_unlock_group(sb, block_group); ext4_error(sb, "Corrupt inode bitmap - block_group = %u, " "inode_bitmap = %llu", block_group, blk); @@ -192,6 +193,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) get_bh(bh); submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); + ext4_simulate_fail_bh(sb, bh, EXT4_SIM_IBITMAP_EIO); if (!buffer_uptodate(bh)) { put_bh(bh); ext4_set_errno(sb, EIO); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 23fa585206f0..c3270aaa2b75 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4243,6 +4243,8 @@ static int __ext4_get_inode_loc(struct inode *inode, bh = sb_getblk(sb, block); if (unlikely(!bh)) return -ENOMEM; + if (ext4_simulate_fail(sb, EXT4_SIM_INODE_EIO)) + goto simulate_eio; if (!buffer_uptodate(bh)) { lock_buffer(bh); @@ -4341,6 +4343,7 @@ make_io: blk_finish_plug(&plug); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { + simulate_eio: ext4_set_errno(inode->i_sb, EIO); EXT4_ERROR_INODE_BLOCK(inode, block, "unable to read itable block"); @@ -4555,7 +4558,8 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, sizeof(gen)); } - if (!ext4_inode_csum_verify(inode, raw_inode, ei)) { + if (!ext4_inode_csum_verify(inode, raw_inode, ei) || + ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) { ext4_set_errno(inode->i_sb, EFSBADCRC); ext4_error_inode(inode, function, line, 0, "iget: checksum invalid"); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 1bb6099397af..d4c0d7a18d64 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -109,7 +109,10 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, struct ext4_dir_entry *dirent; int is_dx_block = 0; - bh = ext4_bread(NULL, inode, block, 0); + if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO)) + bh = ERR_PTR(-EIO); + else + bh = ext4_bread(NULL, inode, block, 0); if (IS_ERR(bh)) { __ext4_warning(inode->i_sb, func, line, "inode #%lu: lblock %lu: comm %s: " @@ -153,7 +156,8 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, * caller is sure it should be an index block. */ if (is_dx_block && type == INDEX) { - if (ext4_dx_csum_verify(inode, dirent)) + if (ext4_dx_csum_verify(inode, dirent) && + !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { ext4_set_errno(inode->i_sb, EFSBADCRC); @@ -164,7 +168,8 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, } } if (!is_dx_block) { - if (ext4_dirblock_csum_verify(inode, bh)) + if (ext4_dirblock_csum_verify(inode, bh) && + !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC)) set_buffer_verified(bh); else { ext4_set_errno(inode->i_sb, EFSBADCRC); diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index eb1efad0e20a..a990d28d191b 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -29,6 +29,7 @@ typedef enum { attr_last_error_time, attr_feature, attr_pointer_ui, + attr_pointer_ul, attr_pointer_atomic, attr_journal_task, } attr_id_t; @@ -160,6 +161,9 @@ static struct ext4_attr ext4_attr_##_name = { \ #define EXT4_RW_ATTR_SBI_UI(_name,_elname) \ EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname) +#define EXT4_RW_ATTR_SBI_UL(_name,_elname) \ + EXT4_ATTR_OFFSET(_name, 0644, pointer_ul, ext4_sb_info, _elname) + #define EXT4_ATTR_PTR(_name,_mode,_id,_ptr) \ static struct ext4_attr ext4_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = _mode }, \ @@ -194,6 +198,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); +#ifdef CONFIG_EXT4_DEBUG +EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail); +#endif EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); EXT4_ATTR(first_error_time, 0444, first_error_time); EXT4_ATTR(last_error_time, 0444, last_error_time); @@ -228,6 +235,9 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(first_error_time), ATTR_LIST(last_error_time), ATTR_LIST(journal_task), +#ifdef CONFIG_EXT4_DEBUG + ATTR_LIST(simulate_fail), +#endif NULL, }; ATTRIBUTE_GROUPS(ext4); @@ -318,6 +328,11 @@ static ssize_t ext4_attr_show(struct kobject *kobj, else return snprintf(buf, PAGE_SIZE, "%u\n", *((unsigned int *) ptr)); + case attr_pointer_ul: + if (!ptr) + return 0; + return snprintf(buf, PAGE_SIZE, "%lu\n", + *((unsigned long *) ptr)); case attr_pointer_atomic: if (!ptr) return 0; @@ -361,6 +376,14 @@ static ssize_t ext4_attr_store(struct kobject *kobj, else *((unsigned int *) ptr) = t; return len; + case attr_pointer_ul: + if (!ptr) + return 0; + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + *((unsigned long *) ptr) = t; + return len; case attr_inode_readahead: return inode_readahead_blks_store(sbi, buf, len); case attr_trigger_test_error: -- cgit v1.2.3 From 64c314ff822786b4634bb5cfe91ed5a34ba8743e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Dec 2019 13:32:25 -0800 Subject: ext4: remove unnecessary ifdefs in htree_dirblock_to_tree() The ifdefs for CONFIG_FS_ENCRYPTION in htree_dirblock_to_tree() are unnecessary, as the called functions are already stubbed out when !CONFIG_FS_ENCRYPTION. Remove them. Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20191209213225.18477-1-ebiggers@kernel.org Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/namei.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d4c0d7a18d64..129d2ebae00d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1009,7 +1009,6 @@ static int htree_dirblock_to_tree(struct file *dir_file, top = (struct ext4_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0)); -#ifdef CONFIG_FS_ENCRYPTION /* Check if the directory is encrypted */ if (IS_ENCRYPTED(dir)) { err = fscrypt_get_encryption_info(dir); @@ -1024,7 +1023,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, return err; } } -#endif + for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) { if (ext4_check_dir_entry(dir, NULL, de, bh, bh->b_data, bh->b_size, @@ -1072,9 +1071,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, } errout: brelse(bh); -#ifdef CONFIG_FS_ENCRYPTION fscrypt_fname_free_buffer(&fname_crypto_str); -#endif return count; } -- cgit v1.2.3 From 48a34311953d921235f4d7bbd2111690d2e469cf Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 10 Feb 2020 15:43:16 +0100 Subject: ext4: fix checksum errors with indexed dirs DIR_INDEX has been introduced as a compat ext4 feature. That means that even kernels / tools that don't understand the feature may modify the filesystem. This works because for kernels not understanding indexed dir format, internal htree nodes appear just as empty directory entries. Index dir aware kernels then check the htree structure is still consistent before using the data. This all worked reasonably well until metadata checksums were introduced. The problem is that these effectively made DIR_INDEX only ro-compatible because internal htree nodes store checksums in a different place than normal directory blocks. Thus any modification ignorant to DIR_INDEX (or just clearing EXT4_INDEX_FL from the inode) will effectively cause checksum mismatch and trigger kernel errors. So we have to be more careful when dealing with indexed directories on filesystems with checksumming enabled. 1) We just disallow loading any directory inodes with EXT4_INDEX_FL when DIR_INDEX is not enabled. This is harsh but it should be very rare (it means someone disabled DIR_INDEX on existing filesystem and didn't run e2fsck), e2fsck can fix the problem, and we don't want to answer the difficult question: "Should we rather corrupt the directory more or should we ignore that DIR_INDEX feature is not set?" 2) When we find out htree structure is corrupted (but the filesystem and the directory should in support htrees), we continue just ignoring htree information for reading but we refuse to add new entries to the directory to avoid corrupting it more. Link: https://lore.kernel.org/r/20200210144316.22081-1-jack@suse.cz Fixes: dbe89444042a ("ext4: Calculate and verify checksums for htree nodes") Reviewed-by: Andreas Dilger Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@kernel.org --- fs/ext4/dir.c | 14 ++++++++------ fs/ext4/ext4.h | 5 ++++- fs/ext4/inode.c | 12 ++++++++++++ fs/ext4/namei.c | 7 +++++++ 4 files changed, 31 insertions(+), 7 deletions(-) (limited to 'fs/ext4/namei.c') diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 1f340743c9a8..9aa1f75409b0 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -129,12 +129,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (err != ERR_BAD_DX_DIR) { return err; } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - ext4_clear_inode_flag(file_inode(file), - EXT4_INODE_INDEX); + /* Can we just clear INDEX flag to ignore htree information? */ + if (!ext4_has_metadata_csum(sb)) { + /* + * We don't set the inode dirty flag since it's not + * critical that it gets flushed back to the disk. + */ + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } if (ext4_has_inline_data(inode)) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9a2ee2428ecc..4441331d06cc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2544,8 +2544,11 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) + if (!ext4_has_feature_dir_index(inode->i_sb)) { + /* ext4_iget() should have caught this... */ + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 3313168b680f..c04a15fc8b6a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4644,6 +4644,18 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ret = -EFSCORRUPTED; goto bad_inode; } + /* + * If dir_index is not enabled but there's dir with INDEX flag set, + * we'd normally treat htree data as empty space. But with metadata + * checksumming that corrupts checksums so forbid that. + */ + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { + ext4_error_inode(inode, function, line, 0, + "iget: Dir with htree data on filesystem without dir_index feature."); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 129d2ebae00d..ceff4b4b1877 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2213,6 +2213,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); -- cgit v1.2.3