diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-25 05:04:46 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-05-25 05:04:46 +0300 |
commit | fea3043314f30a87ca04fd1219661810600e256f (patch) | |
tree | 9cb37f5f3ad5cc3c8f4d3428e741945f3dd5bec8 /fs/ext4 | |
parent | 7208c9842c50f97327aac20be62edc8ad230f05c (diff) | |
parent | 5f41fdaea63ddf96d921ab36b2af4a90ccdb5744 (diff) | |
download | linux-fea3043314f30a87ca04fd1219661810600e256f.tar.xz |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"Various bug fixes and cleanups for ext4.
In particular, move the crypto related fucntions from fs/ext4/super.c
into a new fs/ext4/crypto.c, and fix a number of bugs found by fuzzers
and error injection tools"
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits)
ext4: only allow test_dummy_encryption when supported
ext4: fix bug_on in __es_tree_search
ext4: avoid cycles in directory h-tree
ext4: verify dir block before splitting it
ext4: filter out EXT4_FC_REPLAY from on-disk superblock field s_state
ext4: fix bug_on in ext4_writepages
ext4: refactor and move ext4_ioctl_get_encryption_pwsalt()
ext4: cleanup function defs from ext4.h into crypto.c
ext4: move ext4 crypto code to its own file crypto.c
ext4: fix memory leak in parse_apply_sb_mount_options()
ext4: reject the 'commit' option on ext2 filesystems
ext4: remove duplicated #include of dax.h in inode.c
ext4: fix race condition between ext4_write and ext4_convert_inline_data
ext4: convert symlink external data block mapping to bdev
ext4: add nowait mode for ext4_getblk()
ext4: fix journal_ioprio mount option handling
ext4: mark group as trimmed only if it was fully scanned
ext4: fix use-after-free in ext4_rename_dir_prepare
ext4: add unmount filesystem message
ext4: remove unnecessary conditionals
...
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/Makefile | 1 | ||||
-rw-r--r-- | fs/ext4/crypto.c | 246 | ||||
-rw-r--r-- | fs/ext4/dir.c | 6 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 84 | ||||
-rw-r--r-- | fs/ext4/extents.c | 20 | ||||
-rw-r--r-- | fs/ext4/fast_commit.c | 13 | ||||
-rw-r--r-- | fs/ext4/inline.c | 18 | ||||
-rw-r--r-- | fs/ext4/inode.c | 37 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 59 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 25 | ||||
-rw-r--r-- | fs/ext4/mmp.c | 2 | ||||
-rw-r--r-- | fs/ext4/namei.c | 214 | ||||
-rw-r--r-- | fs/ext4/super.c | 217 | ||||
-rw-r--r-- | fs/ext4/symlink.c | 51 |
14 files changed, 564 insertions, 429 deletions
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index 7d89142e1421..72206a292676 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -17,3 +17,4 @@ ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o ext4-inode-test-objs += inode-test.o obj-$(CONFIG_EXT4_KUNIT_TESTS) += ext4-inode-test.o ext4-$(CONFIG_FS_VERITY) += verity.o +ext4-$(CONFIG_FS_ENCRYPTION) += crypto.o diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c new file mode 100644 index 000000000000..e20ac0654b3f --- /dev/null +++ b/fs/ext4/crypto.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/quotaops.h> +#include <linux/uuid.h> + +#include "ext4.h" +#include "xattr.h" +#include "ext4_jbd2.h" + +static void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, + const struct fscrypt_name *src) +{ + memset(dst, 0, sizeof(*dst)); + + dst->usr_fname = src->usr_fname; + dst->disk_name = src->disk_name; + dst->hinfo.hash = src->hash; + dst->hinfo.minor_hash = src->minor_hash; + dst->crypto_buf = src->crypto_buf; +} + +int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct ext4_filename *fname) +{ + struct fscrypt_name name; + int err; + + err = fscrypt_setup_filename(dir, iname, lookup, &name); + if (err) + return err; + + ext4_fname_from_fscrypt_name(fname, &name); + +#if IS_ENABLED(CONFIG_UNICODE) + err = ext4_fname_setup_ci_filename(dir, iname, fname); +#endif + return err; +} + +int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct ext4_filename *fname) +{ + struct fscrypt_name name; + int err; + + err = fscrypt_prepare_lookup(dir, dentry, &name); + if (err) + return err; + + ext4_fname_from_fscrypt_name(fname, &name); + +#if IS_ENABLED(CONFIG_UNICODE) + err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); +#endif + return err; +} + +void ext4_fname_free_filename(struct ext4_filename *fname) +{ + struct fscrypt_name name; + + name.crypto_buf = fname->crypto_buf; + fscrypt_free_filename(&name); + + fname->crypto_buf.name = NULL; + fname->usr_fname = NULL; + fname->disk_name.name = NULL; + +#if IS_ENABLED(CONFIG_UNICODE) + kfree(fname->cf_name.name); + fname->cf_name.name = NULL; +#endif +} + +static bool uuid_is_zero(__u8 u[16]) +{ + int i; + + for (i = 0; i < 16; i++) + if (u[i]) + return false; + return true; +} + +int ext4_ioctl_get_encryption_pwsalt(struct file *filp, void __user *arg) +{ + struct super_block *sb = file_inode(filp)->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); + int err, err2; + handle_t *handle; + + if (!ext4_has_feature_encrypt(sb)) + return -EOPNOTSUPP; + + if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) { + err = mnt_want_write_file(filp); + if (err) + return err; + handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto pwsalt_err_exit; + } + err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh, + EXT4_JTR_NONE); + if (err) + goto pwsalt_err_journal; + lock_buffer(sbi->s_sbh); + generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); + ext4_superblock_csum_set(sb); + unlock_buffer(sbi->s_sbh); + err = ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); +pwsalt_err_journal: + err2 = ext4_journal_stop(handle); + if (err2 && !err) + err = err2; +pwsalt_err_exit: + mnt_drop_write_file(filp); + if (err) + return err; + } + + if (copy_to_user(arg, sbi->s_es->s_encrypt_pw_salt, 16)) + return -EFAULT; + return 0; +} + +static int ext4_get_context(struct inode *inode, void *ctx, size_t len) +{ + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); +} + +static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, + void *fs_data) +{ + handle_t *handle = fs_data; + int res, res2, credits, retries = 0; + + /* + * Encrypting the root directory is not allowed because e2fsck expects + * lost+found to exist and be unencrypted, and encrypting the root + * directory would imply encrypting the lost+found directory as well as + * the filename "lost+found" itself. + */ + if (inode->i_ino == EXT4_ROOT_INO) + return -EPERM; + + if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) + return -EINVAL; + + if (ext4_test_inode_flag(inode, EXT4_INODE_DAX)) + return -EOPNOTSUPP; + + res = ext4_convert_inline_data(inode); + if (res) + return res; + + /* + * If a journal handle was specified, then the encryption context is + * being set on a new inode via inheritance and is part of a larger + * transaction to create the inode. Otherwise the encryption context is + * being set on an existing inode in its own transaction. Only in the + * latter case should the "retry on ENOSPC" logic be used. + */ + + if (handle) { + res = ext4_xattr_set_handle(handle, inode, + EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, + ctx, len, 0); + if (!res) { + ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); + ext4_clear_inode_state(inode, + EXT4_STATE_MAY_INLINE_DATA); + /* + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled + */ + ext4_set_inode_flags(inode, false); + } + return res; + } + + res = dquot_initialize(inode); + if (res) + return res; +retry: + res = ext4_xattr_set_credits(inode, len, false /* is_create */, + &credits); + if (res) + return res; + + handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, + EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, + ctx, len, 0); + if (!res) { + ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); + /* + * Update inode->i_flags - S_ENCRYPTED will be enabled, + * S_DAX may be disabled + */ + ext4_set_inode_flags(inode, false); + res = ext4_mark_inode_dirty(handle, inode); + if (res) + EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); + } + res2 = ext4_journal_stop(handle); + + if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; + if (!res) + res = res2; + return res; +} + +static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb) +{ + return EXT4_SB(sb)->s_dummy_enc_policy.policy; +} + +static bool ext4_has_stable_inodes(struct super_block *sb) +{ + return ext4_has_feature_stable_inodes(sb); +} + +static void ext4_get_ino_and_lblk_bits(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret) +{ + *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); + *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); +} + +const struct fscrypt_operations ext4_cryptops = { + .key_prefix = "ext4:", + .get_context = ext4_get_context, + .set_context = ext4_set_context, + .get_dummy_policy = ext4_get_dummy_policy, + .empty_dir = ext4_empty_dir, + .has_stable_inodes = ext4_has_stable_inodes, + .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, +}; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index a6bb86f52b9a..3985f8c33f95 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -412,7 +412,7 @@ struct fname { }; /* - * This functoin implements a non-recursive way of freeing all of the + * This function implements a non-recursive way of freeing all of the * nodes in the red-black tree. */ static void free_rb_tree_fname(struct rb_root *root) @@ -515,7 +515,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, /* * This is a helper function for ext4_dx_readdir. It calls filldir - * for all entres on the fname linked list. (Normally there is only + * for all entries on the fname linked list. (Normally there is only * one entry on the linked list, unless there are 62 bit hash collisions.) */ static int call_filldir(struct file *file, struct dir_context *ctx, @@ -648,7 +648,7 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, unsigned int offset = 0; char *top; - de = (struct ext4_dir_entry_2 *)buf; + de = buf; top = buf + buf_size; while ((char *) de < top) { if (ext4_check_dir_entry(dir, NULL, de, bh, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a743b1e3b89e..d5cea9c2e2a2 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -673,6 +673,8 @@ enum { /* Caller will submit data before dropping transaction handle. This * allows jbd2 to avoid submitting data before commit. */ #define EXT4_GET_BLOCKS_IO_SUBMIT 0x0400 + /* Caller is in the atomic contex, find extent if it has been cached */ +#define EXT4_GET_BLOCKS_CACHED_NOWAIT 0x0800 /* * The bit position of these flags must not overlap with any of the @@ -1440,12 +1442,6 @@ struct ext4_super_block { #ifdef __KERNEL__ -#ifdef CONFIG_FS_ENCRYPTION -#define DUMMY_ENCRYPTION_ENABLED(sbi) ((sbi)->s_dummy_enc_policy.policy != NULL) -#else -#define DUMMY_ENCRYPTION_ENABLED(sbi) (0) -#endif - /* Number of quota types we support */ #define EXT4_MAXQUOTAS 3 @@ -2731,74 +2727,20 @@ extern int ext4_fname_setup_ci_filename(struct inode *dir, struct ext4_filename *fname); #endif +/* ext4 encryption related stuff goes here crypto.c */ #ifdef CONFIG_FS_ENCRYPTION -static inline void ext4_fname_from_fscrypt_name(struct ext4_filename *dst, - const struct fscrypt_name *src) -{ - memset(dst, 0, sizeof(*dst)); - - dst->usr_fname = src->usr_fname; - dst->disk_name = src->disk_name; - dst->hinfo.hash = src->hash; - dst->hinfo.minor_hash = src->minor_hash; - dst->crypto_buf = src->crypto_buf; -} - -static inline int ext4_fname_setup_filename(struct inode *dir, - const struct qstr *iname, - int lookup, - struct ext4_filename *fname) -{ - struct fscrypt_name name; - int err; - - err = fscrypt_setup_filename(dir, iname, lookup, &name); - if (err) - return err; - - ext4_fname_from_fscrypt_name(fname, &name); - -#if IS_ENABLED(CONFIG_UNICODE) - err = ext4_fname_setup_ci_filename(dir, iname, fname); -#endif - return err; -} +extern const struct fscrypt_operations ext4_cryptops; -static inline int ext4_fname_prepare_lookup(struct inode *dir, - struct dentry *dentry, - struct ext4_filename *fname) -{ - struct fscrypt_name name; - int err; +int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, + int lookup, struct ext4_filename *fname); - err = fscrypt_prepare_lookup(dir, dentry, &name); - if (err) - return err; +int ext4_fname_prepare_lookup(struct inode *dir, struct dentry *dentry, + struct ext4_filename *fname); - ext4_fname_from_fscrypt_name(fname, &name); +void ext4_fname_free_filename(struct ext4_filename *fname); -#if IS_ENABLED(CONFIG_UNICODE) - err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); -#endif - return err; -} - -static inline void ext4_fname_free_filename(struct ext4_filename *fname) -{ - struct fscrypt_name name; +int ext4_ioctl_get_encryption_pwsalt(struct file *filp, void __user *arg); - name.crypto_buf = fname->crypto_buf; - fscrypt_free_filename(&name); - - fname->crypto_buf.name = NULL; - fname->usr_fname = NULL; - fname->disk_name.name = NULL; - -#if IS_ENABLED(CONFIG_UNICODE) - kfree(fname->cf_name.name); - fname->cf_name.name = NULL; -#endif -} #else /* !CONFIG_FS_ENCRYPTION */ static inline int ext4_fname_setup_filename(struct inode *dir, const struct qstr *iname, @@ -2831,6 +2773,12 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->cf_name.name = NULL; #endif } + +static inline int ext4_ioctl_get_encryption_pwsalt(struct file *filp, + void __user *arg) +{ + return -EOPNOTSUPP; +} #endif /* !CONFIG_FS_ENCRYPTION */ /* dir.c */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e473fde6b64b..c148bb97b527 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -372,7 +372,7 @@ static int ext4_valid_extent_entries(struct inode *inode, { unsigned short entries; ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; + ext4_lblk_t cur = 0; if (eh->eh_entries == 0) return 1; @@ -396,11 +396,11 @@ static int ext4_valid_extent_entries(struct inode *inode, /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - if ((lblock <= prev) && prev) { + if (lblock < cur) { *pblk = ext4_ext_pblock(ext); return 0; } - prev = lblock + ext4_ext_get_actual_len(ext) - 1; + cur = lblock + ext4_ext_get_actual_len(ext); ext++; entries--; } @@ -420,13 +420,13 @@ static int ext4_valid_extent_entries(struct inode *inode, /* Check for overlapping index extents */ lblock = le32_to_cpu(ext_idx->ei_block); - if ((lblock <= prev) && prev) { + if (lblock < cur) { *pblk = ext4_idx_pblock(ext_idx); return 0; } ext_idx++; entries--; - prev = lblock; + cur = lblock + 1; } } return 1; @@ -4693,15 +4693,17 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) FALLOC_FL_INSERT_RANGE)) return -EOPNOTSUPP; + inode_lock(inode); + ret = ext4_convert_inline_data(inode); + inode_unlock(inode); + if (ret) + goto exit; + if (mode & FALLOC_FL_PUNCH_HOLE) { ret = ext4_punch_hole(file, offset, len); goto exit; } - ret = ext4_convert_inline_data(inode); - if (ret) - goto exit; - if (mode & FALLOC_FL_COLLAPSE_RANGE) { ret = ext4_collapse_range(file, offset, len); goto exit; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 3d72565ec6e8..795a60ad1897 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -970,7 +970,7 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) /* Submit data for all the fast commit inodes */ static int ext4_fc_submit_inode_data_all(journal_t *journal) { - struct super_block *sb = (struct super_block *)(journal->j_private); + struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_inode_info *ei; int ret = 0; @@ -1004,7 +1004,7 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) /* Wait for completion of data for all the fast commit inodes */ static int ext4_fc_wait_inode_data_all(journal_t *journal) { - struct super_block *sb = (struct super_block *)(journal->j_private); + struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_inode_info *pos, *n; int ret = 0; @@ -1031,7 +1031,7 @@ static int ext4_fc_commit_dentry_updates(journal_t *journal, u32 *crc) __acquires(&sbi->s_fc_lock) __releases(&sbi->s_fc_lock) { - struct super_block *sb = (struct super_block *)(journal->j_private); + struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n; struct inode *inode; @@ -1093,7 +1093,7 @@ lock_and_exit: static int ext4_fc_perform_commit(journal_t *journal) { - struct super_block *sb = (struct super_block *)(journal->j_private); + struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_inode_info *iter; struct ext4_fc_head head; @@ -1198,7 +1198,7 @@ static void ext4_fc_update_stats(struct super_block *sb, int status, */ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) { - struct super_block *sb = (struct super_block *)(journal->j_private); + struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); int nblks = 0, ret, bsize = journal->j_blocksize; int subtid = atomic_read(&sbi->s_fc_subtid); @@ -1659,8 +1659,7 @@ static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl, set_nlink(inode, 1); ext4_mark_inode_dirty(NULL, inode); out: - if (inode) - iput(inode); + iput(inode); return ret; } diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 9c076262770d..513762c087a9 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1083,14 +1083,14 @@ static void ext4_update_final_de(void *de_buf, int old_size, int new_size) void *limit; int de_len; - de = (struct ext4_dir_entry_2 *)de_buf; + de = de_buf; if (old_size) { limit = de_buf + old_size; do { prev_de = de; de_len = ext4_rec_len_from_disk(de->rec_len, old_size); de_buf += de_len; - de = (struct ext4_dir_entry_2 *)de_buf; + de = de_buf; } while (de_buf < limit); prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - @@ -1155,7 +1155,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle, * First create "." and ".." and then copy the dir information * back to the block. */ - de = (struct ext4_dir_entry_2 *)target; + de = target; de = ext4_init_dot_dotdot(inode, de, inode->i_sb->s_blocksize, csum_size, le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1); @@ -2005,6 +2005,18 @@ int ext4_convert_inline_data(struct inode *inode) if (!ext4_has_inline_data(inode)) { ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); return 0; + } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { + /* + * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is + * cleared. This means we are in the middle of moving of + * inline data to delay allocated block. Just force writeout + * here to finish conversion. + */ + error = filemap_flush(inode->i_mapping); + if (error) + return error; + if (!ext4_has_inline_data(inode)) + return 0; } needed_blocks = ext4_writepage_trans_blocks(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 646ece9b3455..7555cbe77148 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -41,7 +41,6 @@ #include <linux/bitops.h> #include <linux/iomap.h> #include <linux/iversion.h> -#include <linux/dax.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -199,8 +198,7 @@ void ext4_evict_inode(struct inode *inode) */ if (inode->i_ino != EXT4_JOURNAL_INO && ext4_should_journal_data(inode) && - (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && - inode->i_data.nrpages) { + S_ISREG(inode->i_mode) && inode->i_data.nrpages) { journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; @@ -545,12 +543,21 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, } else { BUG(); } + + if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT) + return retval; #ifdef ES_AGGRESSIVE_TEST ext4_map_blocks_es_recheck(handle, inode, map, &orig_map, flags); #endif goto found; } + /* + * In the query cache no-wait mode, nothing we can do more if we + * cannot find extent in the cache. + */ + if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT) + return 0; /* * Try to see if we can get the block without requesting a new @@ -837,10 +844,12 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, struct ext4_map_blocks map; struct buffer_head *bh; int create = map_flags & EXT4_GET_BLOCKS_CREATE; + bool nowait = map_flags & EXT4_GET_BLOCKS_CACHED_NOWAIT; int err; ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) || handle != NULL || create == 0); + ASSERT(create == 0 || !nowait); map.m_lblk = block; map.m_len = 1; @@ -851,6 +860,9 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, if (err < 0) return ERR_PTR(err); + if (nowait) + return sb_find_get_block(inode->i_sb, map.m_pblk); + bh = sb_getblk(inode->i_sb, map.m_pblk); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -2944,8 +2956,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, index = pos >> PAGE_SHIFT; - if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) || - ext4_verity_in_progress(inode)) { + if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) { *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; return ext4_write_begin(file, mapping, pos, len, flags, pagep, fsdata); @@ -3967,15 +3978,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) trace_ext4_punch_hole(inode, offset, length, 0); - ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); - if (ext4_has_inline_data(inode)) { - filemap_invalidate_lock(mapping); - ret = ext4_convert_inline_data(inode); - filemap_invalidate_unlock(mapping); - if (ret) - return ret; - } - /* * Write out all dirty pages to avoid race conditions * Then release them. @@ -4991,7 +4993,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, } if (IS_ENCRYPTED(inode)) { inode->i_op = &ext4_encrypted_symlink_inode_operations; - ext4_set_aops(inode); } else if (ext4_inode_is_fast_symlink(inode)) { inode->i_link = (char *)ei->i_data; inode->i_op = &ext4_fast_symlink_inode_operations; @@ -4999,9 +5000,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, sizeof(ei->i_data) - 1); } else { inode->i_op = &ext4_symlink_inode_operations; - ext4_set_aops(inode); } - inode_nohighmem(inode); } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { inode->i_op = &ext4_special_inode_operations; @@ -5398,6 +5397,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, if (attr->ia_valid & ATTR_SIZE) { handle_t *handle; loff_t oldsize = inode->i_size; + loff_t old_disksize; int shrink = (attr->ia_size < inode->i_size); if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { @@ -5469,6 +5469,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, inode->i_sb->s_blocksize_bits); down_write(&EXT4_I(inode)->i_data_sem); + old_disksize = EXT4_I(inode)->i_disksize; EXT4_I(inode)->i_disksize = attr->ia_size; rc = ext4_mark_inode_dirty(handle, inode); if (!error) @@ -5480,6 +5481,8 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, */ if (!error) i_size_write(inode, attr->ia_size); + else + EXT4_I(inode)->i_disksize = old_disksize; up_write(&EXT4_I(inode)->i_data_sem); ext4_journal_stop(handle); if (error) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 4d1d2326eee9..cb01c1da0f9d 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -16,7 +16,6 @@ #include <linux/file.h> #include <linux/quotaops.h> #include <linux/random.h> -#include <linux/uuid.h> #include <linux/uaccess.h> #include <linux/delay.h> #include <linux/iversion.h> @@ -504,18 +503,6 @@ journal_err_out: return err; } -#ifdef CONFIG_FS_ENCRYPTION -static int uuid_is_zero(__u8 u[16]) -{ - int i; - - for (i = 0; i < 16; i++) - if (u[i]) - return 0; - return 1; -} -#endif - /* * If immutable is set and we are not clearing it, we're not allowed to change * anything else in the inode. Don't error out if we're only trying to set @@ -1428,51 +1415,9 @@ resizefs_out: return -EOPNOTSUPP; return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); - case FS_IOC_GET_ENCRYPTION_PWSALT: { -#ifdef CONFIG_FS_ENCRYPTION - int err, err2; - struct ext4_sb_info *sbi = EXT4_SB(sb); - handle_t *handle; + case FS_IOC_GET_ENCRYPTION_PWSALT: + return ext4_ioctl_get_encryption_pwsalt(filp, (void __user *)arg); - if (!ext4_has_feature_encrypt(sb)) - return -EOPNOTSUPP; - if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) { - err = mnt_want_write_file(filp); - if (err) - return err; - handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - goto pwsalt_err_exit; - } - err = ext4_journal_get_write_access(handle, sb, - sbi->s_sbh, - EXT4_JTR_NONE); - if (err) - goto pwsalt_err_journal; - lock_buffer(sbi->s_sbh); - generate_random_uuid(sbi->s_es->s_encrypt_pw_salt); - ext4_superblock_csum_set(sb); - unlock_buffer(sbi->s_sbh); - err = ext4_handle_dirty_metadata(handle, NULL, - sbi->s_sbh); - pwsalt_err_journal: - err2 = ext4_journal_stop(handle); - if (err2 && !err) - err = err2; - pwsalt_err_exit: - mnt_drop_write_file(filp); - if (err) - return err; - } - if (copy_to_user((void __user *) arg, - sbi->s_es->s_encrypt_pw_salt, 16)) - return -EFAULT; - return 0; -#else - return -EOPNOTSUPP; -#endif - } case FS_IOC_GET_ENCRYPTION_POLICY: if (!ext4_has_feature_encrypt(sb)) return -EOPNOTSUPP; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index ea653d19f9ec..9f12f29bc346 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -695,13 +695,10 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, for (i = 0; i < max; i++) { if (mb_test_bit(i, buddy)) { - /* only single bit in buddy2 may be 1 */ + /* only single bit in buddy2 may be 0 */ if (!mb_test_bit(i << 1, buddy2)) { MB_CHECK_ASSERT( mb_test_bit((i<<1)+1, buddy2)); - } else if (!mb_test_bit((i << 1) + 1, buddy2)) { - MB_CHECK_ASSERT( - mb_test_bit(i << 1, buddy2)); } continue; } @@ -2919,7 +2916,7 @@ const struct seq_operations ext4_mb_seq_groups_ops = { int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) { - struct super_block *sb = (struct super_block *)seq->private; + struct super_block *sb = seq->private; struct ext4_sb_info *sbi = EXT4_SB(sb); seq_puts(seq, "mballoc:\n"); @@ -6398,6 +6395,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) * @start: first group block to examine * @max: last group block to examine * @minblocks: minimum extent block count + * @set_trimmed: set the trimmed flag if at least one block is trimmed * * ext4_trim_all_free walks through group's block bitmap searching for free * extents. When the free extent is found, mark it as used in group buddy @@ -6407,7 +6405,7 @@ __releases(ext4_group_lock_ptr(sb, e4b->bd_group)) static ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ext4_grpblk_t start, ext4_grpblk_t max, - ext4_grpblk_t minblocks) + ext4_grpblk_t minblocks, bool set_trimmed) { struct ext4_buddy e4b; int ret; @@ -6426,7 +6424,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, if (!EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) || minblocks < EXT4_SB(sb)->s_last_trim_minblks) { ret = ext4_try_to_trim_range(sb, &e4b, start, max, minblocks); - if (ret >= 0) + if (ret >= 0 && set_trimmed) EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info); } else { ret = 0; @@ -6463,6 +6461,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) ext4_fsblk_t first_data_blk = le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); ext4_fsblk_t max_blks = ext4_blocks_count(EXT4_SB(sb)->s_es); + bool whole_group, eof = false; int ret = 0; start = range->start >> sb->s_blocksize_bits; @@ -6481,8 +6480,10 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) goto out; } - if (end >= max_blks) + if (end >= max_blks - 1) { end = max_blks - 1; + eof = true; + } if (end <= first_data_blk) goto out; if (start < first_data_blk) @@ -6496,6 +6497,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) /* end now represents the last cluster to discard in this group */ end = EXT4_CLUSTERS_PER_GROUP(sb) - 1; + whole_group = true; for (group = first_group; group <= last_group; group++) { grp = ext4_get_group_info(sb, group); @@ -6512,12 +6514,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) * change it for the last group, note that last_cluster is * already computed earlier by ext4_get_group_no_and_offset() */ - if (group == last_group) + if (group == last_group) { end = last_cluster; - + whole_group = eof ? true : end == EXT4_CLUSTERS_PER_GROUP(sb) - 1; + } if (grp->bb_free >= minlen) { cnt = ext4_trim_all_free(sb, group, first_cluster, - end, minlen); + end, minlen, whole_group); if (cnt < 0) { ret = cnt; break; diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index cebea4270817..79d05e464c43 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -127,7 +127,7 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, */ static int kmmpd(void *data) { - struct super_block *sb = (struct super_block *) data; + struct super_block *sb = data; struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh; struct mmp_struct *mmp; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 767b4bfe39c3..47d0ca4c795b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -277,9 +277,9 @@ static struct dx_frame *dx_probe(struct ext4_filename *fname, struct dx_hash_info *hinfo, struct dx_frame *frame); static void dx_release(struct dx_frame *frames); -static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, - unsigned blocksize, struct dx_hash_info *hinfo, - struct dx_map_entry map[]); +static int dx_make_map(struct inode *dir, struct buffer_head *bh, + struct dx_hash_info *hinfo, + struct dx_map_entry *map_tail); static void dx_sort_map(struct dx_map_entry *map, unsigned count); static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from, char *to, struct dx_map_entry *offsets, @@ -777,12 +777,14 @@ static struct dx_frame * dx_probe(struct ext4_filename *fname, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in) { - unsigned count, indirect; + unsigned count, indirect, level, i; struct dx_entry *at, *entries, *p, *q, *m; struct dx_root *root; struct dx_frame *frame = frame_in; struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR); u32 hash; + ext4_lblk_t block; + ext4_lblk_t blocks[EXT4_HTREE_LEVEL]; memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0])); frame->bh = ext4_read_dirblock(dir, 0, INDEX); @@ -854,6 +856,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, } dxtrace(printk("Look up %x", hash)); + level = 0; + blocks[0] = 0; while (1) { count = dx_get_count(entries); if (!count || count > dx_get_limit(entries)) { @@ -882,15 +886,27 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, dx_get_block(at))); frame->entries = entries; frame->at = at; - if (!indirect--) + + block = dx_get_block(at); + for (i = 0; i <= level; i++) { + if (blocks[i] == block) { + ext4_warning_inode(dir, + "dx entry: tree cycle block %u points back to block %u", + blocks[level], block); + goto fail; + } + } + if (++level > indirect) return frame; + blocks[level] = block; frame++; - frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); + frame->bh = ext4_read_dirblock(dir, block, INDEX); if (IS_ERR(frame->bh)) { ret_err = (struct dx_frame *) frame->bh; frame->bh = NULL; goto fail; } + entries = ((struct dx_node *) frame->bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit(dir)) { @@ -1249,15 +1265,23 @@ static inline int search_dirblock(struct buffer_head *bh, * Create map of hash values, offsets, and sizes, stored at end of block. * Returns number of entries mapped. */ -static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, - unsigned blocksize, struct dx_hash_info *hinfo, +static int dx_make_map(struct inode *dir, struct buffer_head *bh, + struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { int count = 0; - char *base = (char *) de; + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data; + unsigned int buflen = bh->b_size; + char *base = bh->b_data; struct dx_hash_info h = *hinfo; - while ((char *) de < base + blocksize) { + if (ext4_has_metadata_csum(dir->i_sb)) + buflen -= sizeof(struct ext4_dir_entry_tail); + + while ((char *) de < base + buflen) { + if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen, + ((char *)de) - base)) + return -EFSCORRUPTED; if (de->name_len && de->inode) { if (ext4_hash_in_dirent(dir)) h.hash = EXT4_DIRENT_HASH(de); @@ -1270,8 +1294,7 @@ static int dx_make_map(struct inode *dir, struct ext4_dir_entry_2 *de, count++; cond_resched(); } - /* XXX: do we need to check rec_len == 0 case? -Chris */ - de = ext4_next_entry(de, blocksize); + de = ext4_next_entry(de, dir->i_sb->s_blocksize); } return count; } @@ -1943,8 +1966,11 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* create map in the end of data2 block */ map = (struct dx_map_entry *) (data2 + blocksize); - count = dx_make_map(dir, (struct ext4_dir_entry_2 *) data1, - blocksize, hinfo, map); + count = dx_make_map(dir, *bh, hinfo, map); + if (count < 0) { + err = count; + goto journal_error; + } map -= count; dx_sort_map(map, count); /* Ensure that neither split block is over half full */ @@ -2031,7 +2057,7 @@ int ext4_find_dest_de(struct inode *dir, struct inode *inode, unsigned int offset = 0; char *top; - de = (struct ext4_dir_entry_2 *)buf; + de = buf; top = buf + buf_size - reclen; while ((char *) de <= top) { if (ext4_check_dir_entry(dir, NULL, de, bh, @@ -2587,7 +2613,7 @@ int ext4_generic_delete_entry(struct inode *dir, i = 0; pde = NULL; - de = (struct ext4_dir_entry_2 *)entry_buf; + de = entry_buf; while (i < buf_size - csum_size) { if (ext4_check_dir_entry(dir, NULL, de, bh, entry_buf, buf_size, i)) @@ -3249,6 +3275,32 @@ out_trace: return retval; } +static int ext4_init_symlink_block(handle_t *handle, struct inode *inode, + struct fscrypt_str *disk_link) +{ + struct buffer_head *bh; + char *kaddr; + int err = 0; + + bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE); + if (IS_ERR(bh)) + return PTR_ERR(bh); + + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE); + if (err) + goto out; + + kaddr = (char *)bh->b_data; + memcpy(kaddr, disk_link->name, disk_link->len); + inode->i_size = disk_link->len - 1; + EXT4_I(inode)->i_disksize = inode->i_size; + err = ext4_handle_dirty_metadata(handle, inode, bh); +out: + brelse(bh); + return err; +} + static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, const char *symname) { @@ -3257,6 +3309,7 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, int err, len = strlen(symname); int credits; struct fscrypt_str disk_link; + int retries = 0; if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb)))) return -EIO; @@ -3270,26 +3323,15 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) return err; - if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - /* - * For non-fast symlinks, we just allocate inode and put it on - * orphan list in the first transaction => we need bitmap, - * group descriptor, sb, inode block, quota blocks, and - * possibly selinux xattr blocks. - */ - credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + - EXT4_XATTR_TRANS_BLOCKS; - } else { - /* - * Fast symlink. We have to add entry to directory - * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS), - * allocate new inode (bitmap, group descriptor, inode block, - * quota blocks, sb is already counted in previous macros). - */ - credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; - } - + /* + * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the + * directory. +3 for inode, inode bitmap, group descriptor allocation. + * EXT4_DATA_TRANS_BLOCKS for the data block allocation and + * modification. + */ + credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; +retry: inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO, &dentry->d_name, 0, NULL, EXT4_HT_DIR, credits); @@ -3297,7 +3339,8 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (IS_ERR(inode)) { if (handle) ext4_journal_stop(handle); - return PTR_ERR(inode); + err = PTR_ERR(inode); + goto out_retry; } if (IS_ENCRYPTED(inode)) { @@ -3305,75 +3348,44 @@ static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir, if (err) goto err_drop_inode; inode->i_op = &ext4_encrypted_symlink_inode_operations; + } else { + if ((disk_link.len > EXT4_N_BLOCKS * 4)) { + inode->i_op = &ext4_symlink_inode_operations; + } else { + inode->i_op = &ext4_fast_symlink_inode_operations; + inode->i_link = (char *)&EXT4_I(inode)->i_data; + } } if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - if (!IS_ENCRYPTED(inode)) - inode->i_op = &ext4_symlink_inode_operations; - inode_nohighmem(inode); - ext4_set_aops(inode); - /* - * We cannot call page_symlink() with transaction started - * because it calls into ext4_write_begin() which can wait - * for transaction commit if we are running out of space - * and thus we deadlock. So we have to stop transaction now - * and restart it when symlink contents is written. - * - * To keep fs consistent in case of crash, we have to put inode - * to orphan list in the mean time. - */ - drop_nlink(inode); - err = ext4_orphan_add(handle, inode); - if (handle) - ext4_journal_stop(handle); - handle = NULL; - if (err) - goto err_drop_inode; - err = __page_symlink(inode, disk_link.name, disk_link.len, 1); - if (err) - goto err_drop_inode; - /* - * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS - * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified - */ - handle = ext4_journal_start(dir, EXT4_HT_DIR, - EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); - if (IS_ERR(handle)) { - err = PTR_ERR(handle); - handle = NULL; - goto err_drop_inode; - } - set_nlink(inode, 1); - err = ext4_orphan_del(handle, inode); + /* alloc symlink block and fill it */ + err = ext4_init_symlink_block(handle, inode, &disk_link); if (err) goto err_drop_inode; } else { /* clear the extent format for fast symlink */ ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); - if (!IS_ENCRYPTED(inode)) { - inode->i_op = &ext4_fast_symlink_inode_operations; - inode->i_link = (char *)&EXT4_I(inode)->i_data; - } memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, disk_link.len); inode->i_size = disk_link.len - 1; + EXT4_I(inode)->i_disksize = inode->i_size; } - EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_add_nondir(handle, dentry, &inode); if (handle) ext4_journal_stop(handle); - if (inode) - iput(inode); - goto out_free_encrypted_link; + iput(inode); + goto out_retry; err_drop_inode: - if (handle) - ext4_journal_stop(handle); clear_nlink(inode); + ext4_orphan_add(handle, inode); unlock_new_inode(inode); + if (handle) + ext4_journal_stop(handle); iput(inode); -out_free_encrypted_link: +out_retry: + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; if (disk_link.name != (unsigned char *)symname) kfree(disk_link.name); return err; @@ -3455,6 +3467,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { + struct ext4_dir_entry_2 *de; + unsigned int offset; + /* The first directory block must not be a hole, so * treat it as DIRENT_HTREE */ @@ -3463,9 +3478,30 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, *retval = PTR_ERR(bh); return NULL; } - *parent_de = ext4_next_entry( - (struct ext4_dir_entry_2 *)bh->b_data, - inode->i_sb->s_blocksize); + + de = (struct ext4_dir_entry_2 *) bh->b_data; + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, 0) || + le32_to_cpu(de->inode) != inode->i_ino || + strcmp(".", de->name)) { + EXT4_ERROR_INODE(inode, "directory missing '.'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + offset = ext4_rec_len_from_disk(de->rec_len, + inode->i_sb->s_blocksize); + de = ext4_next_entry(de, inode->i_sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, + bh->b_size, offset) || + le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { + EXT4_ERROR_INODE(inode, "directory missing '..'"); + brelse(bh); + *retval = -EFSCORRUPTED; + return NULL; + } + *parent_de = de; + return bh; } diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6900da973ce2..450c918d68fc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1211,6 +1211,9 @@ static void ext4_put_super(struct super_block *sb) */ ext4_unregister_sysfs(sb); + if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount")) + ext4_msg(sb, KERN_INFO, "unmounting filesystem."); + ext4_unregister_li_request(sb); ext4_quota_off_umount(sb); @@ -1397,7 +1400,7 @@ static void ext4_destroy_inode(struct inode *inode) static void init_once(void *foo) { - struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; + struct ext4_inode_info *ei = foo; INIT_LIST_HEAD(&ei->i_orphan); init_rwsem(&ei->xattr_sem); @@ -1492,128 +1495,6 @@ static int ext4_nfs_commit_metadata(struct inode *inode) return ext4_write_inode(inode, &wbc); } -#ifdef CONFIG_FS_ENCRYPTION -static int ext4_get_context(struct inode *inode, void *ctx, size_t len) -{ - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); -} - -static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, - void *fs_data) -{ - handle_t *handle = fs_data; - int res, res2, credits, retries = 0; - - /* - * Encrypting the root directory is not allowed because e2fsck expects - * lost+found to exist and be unencrypted, and encrypting the root - * directory would imply encrypting the lost+found directory as well as - * the filename "lost+found" itself. - */ - if (inode->i_ino == EXT4_ROOT_INO) - return -EPERM; - - if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) - return -EINVAL; - - if (ext4_test_inode_flag(inode, EXT4_INODE_DAX)) - return -EOPNOTSUPP; - - res = ext4_convert_inline_data(inode); - if (res) - return res; - - /* - * If a journal handle was specified, then the encryption context is - * being set on a new inode via inheritance and is part of a larger - * transaction to create the inode. Otherwise the encryption context is - * being set on an existing inode in its own transaction. Only in the - * latter case should the "retry on ENOSPC" logic be used. - */ - - if (handle) { - res = ext4_xattr_set_handle(handle, inode, - EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); - if (!res) { - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - ext4_clear_inode_state(inode, - EXT4_STATE_MAY_INLINE_DATA); - /* - * Update inode->i_flags - S_ENCRYPTED will be enabled, - * S_DAX may be disabled - */ - ext4_set_inode_flags(inode, false); - } - return res; - } - - res = dquot_initialize(inode); - if (res) - return res; -retry: - res = ext4_xattr_set_credits(inode, len, false /* is_create */, - &credits); - if (res) - return res; - - handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); - if (!res) { - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - /* - * Update inode->i_flags - S_ENCRYPTED will be enabled, - * S_DAX may be disabled - */ - ext4_set_inode_flags(inode, false); - res = ext4_mark_inode_dirty(handle, inode); - if (res) - EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); - } - res2 = ext4_journal_stop(handle); - - if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - if (!res) - res = res2; - return res; -} - -static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb) -{ - return EXT4_SB(sb)->s_dummy_enc_policy.policy; -} - -static bool ext4_has_stable_inodes(struct super_block *sb) -{ - return ext4_has_feature_stable_inodes(sb); -} - -static void ext4_get_ino_and_lblk_bits(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret) -{ - *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); - *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); -} - -static const struct fscrypt_operations ext4_cryptops = { - .key_prefix = "ext4:", - .get_context = ext4_get_context, - .set_context = ext4_set_context, - .get_dummy_policy = ext4_get_dummy_policy, - .empty_dir = ext4_empty_dir, - .has_stable_inodes = ext4_has_stable_inodes, - .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, -}; -#endif - #ifdef CONFIG_QUOTA static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) @@ -1867,7 +1748,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = { }; #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) -#define DEFAULT_MB_OPTIMIZE_SCAN (-1) static const char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n" @@ -1913,6 +1793,7 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, + {Opt_commit, 0, MOPT_NO_EXT2}, {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, @@ -2427,11 +2308,12 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size, GFP_KERNEL); + return 0; #else ext4_msg(NULL, KERN_WARNING, - "Test dummy encryption mount option ignored"); + "test_dummy_encryption option not supported"); + return -EINVAL; #endif - return 0; case Opt_dax: case Opt_dax_type: #ifdef CONFIG_FS_DAX @@ -2625,8 +2507,10 @@ parse_failed: ret = ext4_apply_options(fc, sb); out_free: - kfree(s_ctx); - kfree(fc); + if (fc) { + ext4_fc_free(fc); + kfree(fc); + } kfree(s_mount_opts); return ret; } @@ -2786,12 +2670,44 @@ err_jquota_specified: #endif } +static int ext4_check_test_dummy_encryption(const struct fs_context *fc, + struct super_block *sb) +{ +#ifdef CONFIG_FS_ENCRYPTION + const struct ext4_fs_context *ctx = fc->fs_private; + const struct ext4_sb_info *sbi = EXT4_SB(sb); + + if (!(ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION)) + return 0; + + if (!ext4_has_feature_encrypt(sb)) { + ext4_msg(NULL, KERN_WARNING, + "test_dummy_encryption requires encrypt feature"); + return -EINVAL; + } + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE && + !sbi->s_dummy_enc_policy.policy) { + ext4_msg(NULL, KERN_WARNING, + "Can't set test_dummy_encryption on remount"); + return -EINVAL; + } +#endif /* CONFIG_FS_ENCRYPTION */ + return 0; +} + static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb) { struct ext4_fs_context *ctx = fc->fs_private; struct ext4_sb_info *sbi = fc->s_fs_info; int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + int err; if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { ext4_msg(NULL, KERN_ERR, @@ -2821,20 +2737,9 @@ static int ext4_check_opt_consistency(struct fs_context *fc, "for blocksize < PAGE_SIZE"); } -#ifdef CONFIG_FS_ENCRYPTION - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if ((ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) && - is_remount && !sbi->s_dummy_enc_policy.policy) { - ext4_msg(NULL, KERN_WARNING, - "Can't set test_dummy_encryption on remount"); - return -1; - } -#endif + err = ext4_check_test_dummy_encryption(fc, sb); + if (err) + return err; if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) { if (!sbi->s_journal) { @@ -3837,7 +3742,7 @@ static struct task_struct *ext4_lazyinit_task; */ static int ext4_lazyinit_thread(void *arg) { - struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; + struct ext4_lazy_init *eli = arg; struct list_head *pos, *n; struct ext4_li_request *elr; unsigned long next_wakeup, cur; @@ -4409,7 +4314,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) int silent = fc->sb_flags & SB_SILENT; /* Set defaults for the variables that will be set during parsing */ - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sectors_written_start = @@ -4886,7 +4792,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_inodes_per_block; sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); sbi->s_sbh = bh; - sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); @@ -5279,12 +5185,6 @@ no_journal: goto failed_mount_wq; } - if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && - !ext4_has_feature_encrypt(sb)) { - ext4_set_feature_encrypt(sb); - ext4_commit_super(sb); - } - /* * Get the # of file system overhead blocks from the * superblock if present. @@ -6272,7 +6172,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) char *to_free[EXT4_MAXQUOTAS]; #endif - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; /* Store the original options */ old_sb_flags = sb->s_flags; @@ -6298,9 +6197,14 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) } else old_opts.s_qf_names[i] = NULL; #endif - if (sbi->s_journal && sbi->s_journal->j_task->io_context) - ctx->journal_ioprio = - sbi->s_journal->j_task->io_context->ioprio; + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) { + if (sbi->s_journal && sbi->s_journal->j_task->io_context) + ctx->journal_ioprio = + sbi->s_journal->j_task->io_context->ioprio; + else + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + + } ext4_apply_options(fc, sb); @@ -6441,7 +6345,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) if (err) goto restore_opts; } - sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_mount_state = (le16_to_cpu(es->s_state) & + ~EXT4_FC_REPLAY); err = ext4_setup_super(sb, es, 0); if (err) diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 69109746e6e2..d281f5bcc526 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -27,7 +27,7 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { - struct page *cpage = NULL; + struct buffer_head *bh = NULL; const void *caddr; unsigned int max_size; const char *paddr; @@ -39,16 +39,19 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry, caddr = EXT4_I(inode)->i_data; max_size = sizeof(EXT4_I(inode)->i_data); } else { - cpage = read_mapping_page(inode->i_mapping, 0, NULL); - if (IS_ERR(cpage)) - return ERR_CAST(cpage); - caddr = page_address(cpage); + bh = ext4_bread(NULL, inode, 0, 0); + if (IS_ERR(bh)) + return ERR_CAST(bh); + if (!bh) { + EXT4_ERROR_INODE(inode, "bad symlink."); + return ERR_PTR(-EFSCORRUPTED); + } + caddr = bh->b_data; max_size = inode->i_sb->s_blocksize; } paddr = fscrypt_get_symlink(inode, caddr, max_size, done); - if (cpage) - put_page(cpage); + brelse(bh); return paddr; } @@ -62,6 +65,38 @@ static int ext4_encrypted_symlink_getattr(struct user_namespace *mnt_userns, return fscrypt_symlink_getattr(path, stat); } +static void ext4_free_link(void *bh) +{ + brelse(bh); +} + +static const char *ext4_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + struct buffer_head *bh; + + if (!dentry) { + bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); + if (IS_ERR(bh)) + return ERR_CAST(bh); + if (!bh || !ext4_buffer_uptodate(bh)) + return ERR_PTR(-ECHILD); + } else { + bh = ext4_bread(NULL, inode, 0, 0); + if (IS_ERR(bh)) + return ERR_CAST(bh); + if (!bh) { + EXT4_ERROR_INODE(inode, "bad symlink."); + return ERR_PTR(-EFSCORRUPTED); + } + } + + set_delayed_call(callback, ext4_free_link, bh); + nd_terminate_link(bh->b_data, inode->i_size, + inode->i_sb->s_blocksize - 1); + return bh->b_data; +} + const struct inode_operations ext4_encrypted_symlink_inode_operations = { .get_link = ext4_encrypted_get_link, .setattr = ext4_setattr, @@ -70,7 +105,7 @@ const struct inode_operations ext4_encrypted_symlink_inode_operations = { }; const struct inode_operations ext4_symlink_inode_operations = { - .get_link = page_get_link, + .get_link = ext4_get_link, .setattr = ext4_setattr, .getattr = ext4_getattr, .listxattr = ext4_listxattr, |