summaryrefslogtreecommitdiff
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c731
1 files changed, 474 insertions, 257 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0c7c4adb664e..ea425b49b345 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -43,7 +43,7 @@
#include <linux/uaccess.h>
#include <linux/iversion.h>
#include <linux/unicode.h>
-
+#include <linux/part_stat.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_show_options(struct seq_file *seq, struct dentry *root);
static int ext4_commit_super(struct super_block *sb, int sync);
-static void ext4_mark_recovery_complete(struct super_block *sb,
+static int ext4_mark_recovery_complete(struct super_block *sb,
struct ext4_super_block *es);
-static void ext4_clear_journal_err(struct super_block *sb,
- struct ext4_super_block *es);
+static int ext4_clear_journal_err(struct super_block *sb,
+ struct ext4_super_block *es);
static int ext4_sync_fs(struct super_block *sb, int wait);
static int ext4_remount(struct super_block *sb, int *flags, char *data);
static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -93,11 +93,11 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* i_mmap_rwsem (inode->i_mmap_rwsem)!
*
* page fault path:
- * mmap_sem -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
+ * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
* page lock -> i_data_sem (rw)
*
* buffered write path:
- * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> mmap_lock
* sb_start_write -> i_mutex -> transaction start -> page lock ->
* i_data_sem (rw)
*
@@ -107,7 +107,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* i_data_sem (rw)
*
* direct IO:
- * sb_start_write -> i_mutex -> mmap_sem
+ * sb_start_write -> i_mutex -> mmap_lock
* sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
*
* writepages:
@@ -335,10 +335,12 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
#define ext4_get_tstamp(es, tstamp) \
__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
-static void __save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
+static void __save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
{
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+ int err;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
if (bdev_read_only(sb->s_bdev))
@@ -347,8 +349,62 @@ static void __save_error_info(struct super_block *sb, const char *func,
ext4_update_tstamp(es, s_last_error_time);
strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
es->s_last_error_line = cpu_to_le32(line);
- if (es->s_last_error_errcode == 0)
- es->s_last_error_errcode = EXT4_ERR_EFSCORRUPTED;
+ es->s_last_error_ino = cpu_to_le32(ino);
+ es->s_last_error_block = cpu_to_le64(block);
+ switch (error) {
+ case EIO:
+ err = EXT4_ERR_EIO;
+ break;
+ case ENOMEM:
+ err = EXT4_ERR_ENOMEM;
+ break;
+ case EFSBADCRC:
+ err = EXT4_ERR_EFSBADCRC;
+ break;
+ case 0:
+ case EFSCORRUPTED:
+ err = EXT4_ERR_EFSCORRUPTED;
+ break;
+ case ENOSPC:
+ err = EXT4_ERR_ENOSPC;
+ break;
+ case ENOKEY:
+ err = EXT4_ERR_ENOKEY;
+ break;
+ case EROFS:
+ err = EXT4_ERR_EROFS;
+ break;
+ case EFBIG:
+ err = EXT4_ERR_EFBIG;
+ break;
+ case EEXIST:
+ err = EXT4_ERR_EEXIST;
+ break;
+ case ERANGE:
+ err = EXT4_ERR_ERANGE;
+ break;
+ case EOVERFLOW:
+ err = EXT4_ERR_EOVERFLOW;
+ break;
+ case EBUSY:
+ err = EXT4_ERR_EBUSY;
+ break;
+ case ENOTDIR:
+ err = EXT4_ERR_ENOTDIR;
+ break;
+ case ENOTEMPTY:
+ err = EXT4_ERR_ENOTEMPTY;
+ break;
+ case ESHUTDOWN:
+ err = EXT4_ERR_ESHUTDOWN;
+ break;
+ case EFAULT:
+ err = EXT4_ERR_EFAULT;
+ break;
+ default:
+ err = EXT4_ERR_UNKNOWN;
+ }
+ es->s_last_error_errcode = err;
if (!es->s_first_error_time) {
es->s_first_error_time = es->s_last_error_time;
es->s_first_error_time_hi = es->s_last_error_time_hi;
@@ -368,11 +424,13 @@ static void __save_error_info(struct super_block *sb, const char *func,
le32_add_cpu(&es->s_error_count, 1);
}
-static void save_error_info(struct super_block *sb, const char *func,
- unsigned int line)
+static void save_error_info(struct super_block *sb, int error,
+ __u32 ino, __u64 block,
+ const char *func, unsigned int line)
{
- __save_error_info(sb, func, line);
- ext4_commit_super(sb, 1);
+ __save_error_info(sb, error, ino, block, func, line);
+ if (!bdev_read_only(sb->s_bdev))
+ ext4_commit_super(sb, 1);
}
/*
@@ -464,9 +522,6 @@ static void ext4_handle_error(struct super_block *sb)
smp_wmb();
sb->s_flags |= SB_RDONLY;
} else if (test_opt(sb, ERRORS_PANIC)) {
- if (EXT4_SB(sb)->s_journal &&
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
- return;
panic("EXT4-fs (device %s): panic forced after error\n",
sb->s_id);
}
@@ -477,7 +532,8 @@ static void ext4_handle_error(struct super_block *sb)
"EXT4-fs error")
void __ext4_error(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
+ unsigned int line, int error, __u64 block,
+ const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -495,24 +551,21 @@ void __ext4_error(struct super_block *sb, const char *function,
sb->s_id, function, line, current->comm, &vaf);
va_end(args);
}
- save_error_info(sb, function, line);
+ save_error_info(sb, error, 0, block, function, line);
ext4_handle_error(sb);
}
void __ext4_error_inode(struct inode *inode, const char *function,
- unsigned int line, ext4_fsblk_t block,
+ unsigned int line, ext4_fsblk_t block, int error,
const char *fmt, ...)
{
va_list args;
struct va_format vaf;
- struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return;
trace_ext4_error(inode->i_sb, function, line);
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
- es->s_last_error_block = cpu_to_le64(block);
if (ext4_error_ratelimit(inode->i_sb)) {
va_start(args, fmt);
vaf.fmt = fmt;
@@ -529,7 +582,8 @@ void __ext4_error_inode(struct inode *inode, const char *function,
current->comm, &vaf);
va_end(args);
}
- save_error_info(inode->i_sb, function, line);
+ save_error_info(inode->i_sb, error, inode->i_ino, block,
+ function, line);
ext4_handle_error(inode->i_sb);
}
@@ -539,7 +593,6 @@ void __ext4_error_file(struct file *file, const char *function,
{
va_list args;
struct va_format vaf;
- struct ext4_super_block *es;
struct inode *inode = file_inode(file);
char pathname[80], *path;
@@ -547,8 +600,6 @@ void __ext4_error_file(struct file *file, const char *function,
return;
trace_ext4_error(inode->i_sb, function, line);
- es = EXT4_SB(inode->i_sb)->s_es;
- es->s_last_error_ino = cpu_to_le32(inode->i_ino);
if (ext4_error_ratelimit(inode->i_sb)) {
path = file_path(file, pathname, sizeof(pathname));
if (IS_ERR(path))
@@ -570,7 +621,8 @@ void __ext4_error_file(struct file *file, const char *function,
current->comm, path, &vaf);
va_end(args);
}
- save_error_info(inode->i_sb, function, line);
+ save_error_info(inode->i_sb, EFSCORRUPTED, inode->i_ino, block,
+ function, line);
ext4_handle_error(inode->i_sb);
}
@@ -614,66 +666,6 @@ const char *ext4_decode_error(struct super_block *sb, int errno,
return errstr;
}
-void ext4_set_errno(struct super_block *sb, int err)
-{
- if (err < 0)
- err = -err;
-
- switch (err) {
- case EIO:
- err = EXT4_ERR_EIO;
- break;
- case ENOMEM:
- err = EXT4_ERR_ENOMEM;
- break;
- case EFSBADCRC:
- err = EXT4_ERR_EFSBADCRC;
- break;
- case EFSCORRUPTED:
- err = EXT4_ERR_EFSCORRUPTED;
- break;
- case ENOSPC:
- err = EXT4_ERR_ENOSPC;
- break;
- case ENOKEY:
- err = EXT4_ERR_ENOKEY;
- break;
- case EROFS:
- err = EXT4_ERR_EROFS;
- break;
- case EFBIG:
- err = EXT4_ERR_EFBIG;
- break;
- case EEXIST:
- err = EXT4_ERR_EEXIST;
- break;
- case ERANGE:
- err = EXT4_ERR_ERANGE;
- break;
- case EOVERFLOW:
- err = EXT4_ERR_EOVERFLOW;
- break;
- case EBUSY:
- err = EXT4_ERR_EBUSY;
- break;
- case ENOTDIR:
- err = EXT4_ERR_ENOTDIR;
- break;
- case ENOTEMPTY:
- err = EXT4_ERR_ENOTEMPTY;
- break;
- case ESHUTDOWN:
- err = EXT4_ERR_ESHUTDOWN;
- break;
- case EFAULT:
- err = EXT4_ERR_EFAULT;
- break;
- default:
- err = EXT4_ERR_UNKNOWN;
- }
- EXT4_SB(sb)->s_es->s_last_error_errcode = err;
-}
-
/* __ext4_std_error decodes expected errors from journaling functions
* automatically and invokes the appropriate error response. */
@@ -698,8 +690,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
sb->s_id, function, line, errstr);
}
- ext4_set_errno(sb, -errno);
- save_error_info(sb, function, line);
+ save_error_info(sb, -errno, 0, 0, function, line);
ext4_handle_error(sb);
}
@@ -714,7 +705,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
*/
void __ext4_abort(struct super_block *sb, const char *function,
- unsigned int line, const char *fmt, ...)
+ unsigned int line, int error, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -722,7 +713,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return;
- save_error_info(sb, function, line);
+ save_error_info(sb, error, 0, 0, function, line);
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
@@ -731,24 +722,20 @@ void __ext4_abort(struct super_block *sb, const char *function,
va_end(args);
if (sb_rdonly(sb) == 0) {
- ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+ if (EXT4_SB(sb)->s_journal)
+ jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+
+ ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
/*
* Make sure updated value of ->s_mount_flags will be visible
* before ->s_flags update
*/
smp_wmb();
sb->s_flags |= SB_RDONLY;
- if (EXT4_SB(sb)->s_journal)
- jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
- save_error_info(sb, function, line);
}
- if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
- if (EXT4_SB(sb)->s_journal &&
- !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
- return;
+ if (test_opt(sb, ERRORS_PANIC) && !system_going_down())
panic("EXT4-fs panic from previous error\n");
- }
}
void __ext4_msg(struct super_block *sb,
@@ -757,6 +744,7 @@ void __ext4_msg(struct super_block *sb,
struct va_format vaf;
va_list args;
+ atomic_inc(&EXT4_SB(sb)->s_msg_count);
if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs"))
return;
@@ -767,9 +755,12 @@ void __ext4_msg(struct super_block *sb,
va_end(args);
}
-#define ext4_warning_ratelimit(sb) \
- ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), \
- "EXT4-fs warning")
+static int ext4_warning_ratelimit(struct super_block *sb)
+{
+ atomic_inc(&EXT4_SB(sb)->s_warning_count);
+ return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
+ "EXT4-fs warning");
+}
void __ext4_warning(struct super_block *sb, const char *function,
unsigned int line, const char *fmt, ...)
@@ -815,15 +806,12 @@ __acquires(bitlock)
{
struct va_format vaf;
va_list args;
- struct ext4_super_block *es = EXT4_SB(sb)->s_es;
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
return;
trace_ext4_error(sb, function, line);
- es->s_last_error_ino = cpu_to_le32(ino);
- es->s_last_error_block = cpu_to_le64(block);
- __save_error_info(sb, function, line);
+ __save_error_info(sb, EFSCORRUPTED, ino, block, function, line);
if (ext4_error_ratelimit(sb)) {
va_start(args, fmt);
@@ -927,7 +915,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
{
struct block_device *bdev;
- char b[BDEVNAME_SIZE];
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
if (IS_ERR(bdev))
@@ -935,8 +922,9 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
return bdev;
fail:
- ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld",
- __bdevname(dev, b), PTR_ERR(bdev));
+ ext4_msg(sb, KERN_ERR,
+ "failed to open journal device unknown-block(%u,%u) %ld",
+ MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
return NULL;
}
@@ -1024,17 +1012,22 @@ static void ext4_put_super(struct super_block *sb)
destroy_workqueue(sbi->rsv_conversion_wq);
+ /*
+ * Unregister sysfs before destroying jbd2 journal.
+ * Since we could still access attr_journal_task attribute via sysfs
+ * path which could have sbi->s_journal->j_task as NULL
+ */
+ ext4_unregister_sysfs(sb);
+
if (sbi->s_journal) {
aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
if ((err < 0) && !aborted) {
- ext4_set_errno(sb, -err);
- ext4_abort(sb, "Couldn't clean up the journal");
+ ext4_abort(sb, -err, "Couldn't clean up the journal");
}
}
- ext4_unregister_sysfs(sb);
ext4_es_unregister_shrinker(sbi);
del_timer_sync(&sbi->s_err_report);
ext4_release_system_zone(sb);
@@ -1111,6 +1104,7 @@ static void ext4_put_super(struct super_block *sb)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->s_blockgroup_lock);
fs_put_dax(sbi->s_daxdev);
+ fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
#ifdef CONFIG_UNICODE
utf8_unload(sbi->s_encoding);
#endif
@@ -1133,6 +1127,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
inode_set_iversion(&ei->vfs_inode, 1);
spin_lock_init(&ei->i_raw_lock);
INIT_LIST_HEAD(&ei->i_prealloc_list);
+ atomic_set(&ei->i_prealloc_active, 0);
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
@@ -1226,7 +1221,7 @@ void ext4_clear_inode(struct inode *inode)
{
invalidate_inode_buffers(inode);
clear_inode(inode);
- ext4_discard_preallocations(inode);
+ ext4_discard_preallocations(inode, 0);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
dquot_drop(inode);
if (EXT4_I(inode)->jinode) {
@@ -1298,8 +1293,8 @@ static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
if (!page_has_buffers(page))
return 0;
if (journal)
- return jbd2_journal_try_to_free_buffers(journal, page,
- wait & ~__GFP_DIRECT_RECLAIM);
+ return jbd2_journal_try_to_free_buffers(journal, page);
+
return try_to_free_buffers(page);
}
@@ -1328,6 +1323,9 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode)))
return -EINVAL;
+ if (ext4_test_inode_flag(inode, EXT4_INODE_DAX))
+ return -EOPNOTSUPP;
+
res = ext4_convert_inline_data(inode);
if (res)
return res;
@@ -1353,7 +1351,7 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,
* Update inode->i_flags - S_ENCRYPTED will be enabled,
* S_DAX may be disabled
*/
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
}
return res;
}
@@ -1380,7 +1378,7 @@ retry:
* Update inode->i_flags - S_ENCRYPTED will be enabled,
* S_DAX may be disabled
*/
- ext4_set_inode_flags(inode);
+ ext4_set_inode_flags(inode, false);
res = ext4_mark_inode_dirty(handle, inode);
if (res)
EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
@@ -1394,9 +1392,10 @@ retry:
return res;
}
-static bool ext4_dummy_context(struct inode *inode)
+static const union fscrypt_context *
+ext4_get_dummy_context(struct super_block *sb)
{
- return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb));
+ return EXT4_SB(sb)->s_dummy_enc_ctx.ctx;
}
static bool ext4_has_stable_inodes(struct super_block *sb)
@@ -1415,7 +1414,7 @@ static const struct fscrypt_operations ext4_cryptops = {
.key_prefix = "ext4:",
.get_context = ext4_get_context,
.set_context = ext4_set_context,
- .dummy_context = ext4_dummy_context,
+ .get_dummy_context = ext4_get_dummy_context,
.empty_dir = ext4_empty_dir,
.max_namelen = EXT4_NAME_LEN,
.has_stable_inodes = ext4_has_stable_inodes,
@@ -1514,10 +1513,12 @@ enum {
Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
+ Opt_inlinecrypt,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
- Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, Opt_dax,
+ Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version,
+ Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
Opt_nowarn_on_error, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime, Opt_debug_want_extra_isize,
@@ -1526,6 +1527,7 @@ enum {
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
+ Opt_prefetch_block_bitmaps,
};
static const match_table_t tokens = {
@@ -1584,6 +1586,9 @@ static const match_table_t tokens = {
{Opt_nobarrier, "nobarrier"},
{Opt_i_version, "i_version"},
{Opt_dax, "dax"},
+ {Opt_dax_always, "dax=always"},
+ {Opt_dax_inode, "dax=inode"},
+ {Opt_dax_never, "dax=never"},
{Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"},
{Opt_warn_on_error, "warn_on_error"},
@@ -1610,9 +1615,12 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
+ {Opt_test_dummy_encryption, "test_dummy_encryption=%s"},
{Opt_test_dummy_encryption, "test_dummy_encryption"},
+ {Opt_inlinecrypt, "inlinecrypt"},
{Opt_nombcache, "nombcache"},
{Opt_nombcache, "no_mbcache"}, /* for backward compatibility */
+ {Opt_prefetch_block_bitmaps, "prefetch_block_bitmaps"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1731,6 +1739,7 @@ static int clear_qf_name(struct super_block *sb, int qtype)
#define MOPT_NO_EXT3 0x0200
#define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
#define MOPT_STRING 0x0400
+#define MOPT_SKIP 0x0800
static const struct mount_opts {
int token;
@@ -1780,7 +1789,13 @@ static const struct mount_opts {
{Opt_min_batch_time, 0, MOPT_GTE0},
{Opt_inode_readahead_blks, 0, MOPT_GTE0},
{Opt_init_itable, 0, MOPT_GTE0},
- {Opt_dax, EXT4_MOUNT_DAX, MOPT_SET},
+ {Opt_dax, EXT4_MOUNT_DAX_ALWAYS, MOPT_SET | MOPT_SKIP},
+ {Opt_dax_always, EXT4_MOUNT_DAX_ALWAYS,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+ {Opt_dax_inode, EXT4_MOUNT2_DAX_INODE,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
+ {Opt_dax_never, EXT4_MOUNT2_DAX_NEVER,
+ MOPT_EXT4_ONLY | MOPT_SET | MOPT_SKIP},
{Opt_stripe, 0, MOPT_GTE0},
{Opt_resuid, 0, MOPT_GTE0},
{Opt_resgid, 0, MOPT_GTE0},
@@ -1821,8 +1836,10 @@ static const struct mount_opts {
{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
- {Opt_test_dummy_encryption, 0, MOPT_GTE0},
+ {Opt_test_dummy_encryption, 0, MOPT_STRING},
{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
+ {Opt_prefetch_block_bitmaps, EXT4_MOUNT_PREFETCH_BLOCK_BITMAPS,
+ MOPT_SET},
{Opt_err, 0, 0}
};
@@ -1856,6 +1873,48 @@ static int ext4_sb_read_encoding(const struct ext4_super_block *es,
}
#endif
+static int ext4_set_test_dummy_encryption(struct super_block *sb,
+ const char *opt,
+ const substring_t *arg,
+ bool is_remount)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
+
+ /*
+ * This mount option is just for testing, and it's not worthwhile to
+ * implement the extra complexity (e.g. RCU protection) that would be
+ * needed to allow it to be set or changed during remount. We do allow
+ * it to be specified during remount, but only if there is no change.
+ */
+ if (is_remount && !sbi->s_dummy_enc_ctx.ctx) {
+ ext4_msg(sb, KERN_WARNING,
+ "Can't set test_dummy_encryption on remount");
+ return -1;
+ }
+ err = fscrypt_set_test_dummy_encryption(sb, arg, &sbi->s_dummy_enc_ctx);
+ if (err) {
+ if (err == -EEXIST)
+ ext4_msg(sb, KERN_WARNING,
+ "Can't change test_dummy_encryption on remount");
+ else if (err == -EINVAL)
+ ext4_msg(sb, KERN_WARNING,
+ "Value of option \"%s\" is unrecognized", opt);
+ else
+ ext4_msg(sb, KERN_WARNING,
+ "Error processing option \"%s\" [%d]",
+ opt, err);
+ return -1;
+ }
+ ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
+#else
+ ext4_msg(sb, KERN_WARNING,
+ "Test dummy encryption mount option ignored");
+#endif
+ return 1;
+}
+
static int handle_mount_opt(struct super_block *sb, char *opt, int token,
substring_t *args, unsigned long *journal_devnum,
unsigned int *journal_ioprio, int is_remount)
@@ -1898,6 +1957,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
case Opt_nolazytime:
sb->s_flags &= ~SB_LAZYTIME;
return 1;
+ case Opt_inlinecrypt:
+#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
+ sb->s_flags |= SB_INLINECRYPT;
+#else
+ ext4_msg(sb, KERN_ERR, "inline encryption not supported");
+#endif
+ return 1;
}
for (m = ext4_mount_opts; m->token != Opt_err; m++)
@@ -2052,14 +2118,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
*journal_ioprio =
IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg);
} else if (token == Opt_test_dummy_encryption) {
-#ifdef CONFIG_FS_ENCRYPTION
- sbi->s_mount_flags |= EXT4_MF_TEST_DUMMY_ENCRYPTION;
- ext4_msg(sb, KERN_WARNING,
- "Test dummy encryption mode enabled");
-#else
- ext4_msg(sb, KERN_WARNING,
- "Test dummy encryption mount option ignored");
-#endif
+ return ext4_set_test_dummy_encryption(sb, opt, &args[0],
+ is_remount);
} else if (m->flags & MOPT_DATAJ) {
if (is_remount) {
if (!sbi->s_journal)
@@ -2089,13 +2149,56 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
- } else if (token == Opt_dax) {
+ } else if (token == Opt_dax || token == Opt_dax_always ||
+ token == Opt_dax_inode || token == Opt_dax_never) {
#ifdef CONFIG_FS_DAX
- ext4_msg(sb, KERN_WARNING,
- "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
- sbi->s_mount_opt |= m->mount_opt;
+ switch (token) {
+ case Opt_dax:
+ case Opt_dax_always:
+ if (is_remount &&
+ (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
+ fail_dax_change_remount:
+ ext4_msg(sb, KERN_ERR, "can't change "
+ "dax mount option while remounting");
+ return -1;
+ }
+ if (is_remount &&
+ (test_opt(sb, DATA_FLAGS) ==
+ EXT4_MOUNT_JOURNAL_DATA)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dax");
+ return -1;
+ }
+ ext4_msg(sb, KERN_WARNING,
+ "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
+ sbi->s_mount_opt |= EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ break;
+ case Opt_dax_never:
+ if (is_remount &&
+ (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS)))
+ goto fail_dax_change_remount;
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ break;
+ case Opt_dax_inode:
+ if (is_remount &&
+ ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
+ (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
+ !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE)))
+ goto fail_dax_change_remount;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
+ sbi->s_mount_opt2 &= ~EXT4_MOUNT2_DAX_NEVER;
+ /* Strictly for printing options */
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_INODE;
+ break;
+ }
#else
ext4_msg(sb, KERN_INFO, "dax option not supported");
+ sbi->s_mount_opt2 |= EXT4_MOUNT2_DAX_NEVER;
+ sbi->s_mount_opt &= ~EXT4_MOUNT_DAX_ALWAYS;
return -1;
#endif
} else if (token == Opt_data_err_abort) {
@@ -2180,6 +2283,14 @@ static int parse_options(char *options, struct super_block *sb,
}
}
#endif
+ if (test_opt(sb, DIOREAD_NOLOCK)) {
+ int blocksize =
+ BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+ if (blocksize < PAGE_SIZE)
+ ext4_msg(sb, KERN_WARNING, "Warning: mounting with an "
+ "experimental mount option 'dioread_nolock' "
+ "for blocksize < PAGE_SIZE");
+ }
return 1;
}
@@ -2251,7 +2362,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET;
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
- (m->flags & MOPT_CLEAR_ERR))
+ (m->flags & MOPT_CLEAR_ERR) || m->flags & MOPT_SKIP)
continue;
if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
continue; /* skip if same as the default */
@@ -2308,8 +2419,22 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
if (test_opt(sb, DATA_ERR_ABORT))
SEQ_OPTS_PUTS("data_err=abort");
- if (DUMMY_ENCRYPTION_ENABLED(sbi))
- SEQ_OPTS_PUTS("test_dummy_encryption");
+
+ fscrypt_show_test_dummy_encryption(seq, sep, sb);
+
+ if (sb->s_flags & SB_INLINECRYPT)
+ SEQ_OPTS_PUTS("inlinecrypt");
+
+ if (test_opt(sb, DAX_ALWAYS)) {
+ if (IS_EXT2_SB(sb))
+ SEQ_OPTS_PUTS("dax");
+ else
+ SEQ_OPTS_PUTS("dax=always");
+ } else if (test_opt2(sb, DAX_NEVER)) {
+ SEQ_OPTS_PUTS("dax=never");
+ } else if (test_opt2(sb, DAX_INODE)) {
+ SEQ_OPTS_PUTS("dax=inode");
+ }
ext4_show_quota_options(seq, sb);
return 0;
@@ -2341,6 +2466,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
ext4_msg(sb, KERN_ERR, "revision level too high, "
"forcing read-only mode");
err = -EROFS;
+ goto done;
}
if (read_only)
goto done;
@@ -3096,15 +3222,34 @@ static void print_daily_error_info(struct timer_list *t)
static int ext4_run_li_request(struct ext4_li_request *elr)
{
struct ext4_group_desc *gdp = NULL;
- ext4_group_t group, ngroups;
- struct super_block *sb;
+ struct super_block *sb = elr->lr_super;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ ext4_group_t group = elr->lr_next_group;
unsigned long timeout = 0;
+ unsigned int prefetch_ios = 0;
int ret = 0;
- sb = elr->lr_super;
- ngroups = EXT4_SB(sb)->s_groups_count;
+ if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
+ elr->lr_next_group = ext4_mb_prefetch(sb, group,
+ EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
+ if (prefetch_ios)
+ ext4_mb_prefetch_fini(sb, elr->lr_next_group,
+ prefetch_ios);
+ trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
+ prefetch_ios);
+ if (group >= elr->lr_next_group) {
+ ret = 1;
+ if (elr->lr_first_not_zeroed != ngroups &&
+ !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
+ elr->lr_next_group = elr->lr_first_not_zeroed;
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ ret = 0;
+ }
+ }
+ return ret;
+ }
- for (group = elr->lr_next_group; group < ngroups; group++) {
+ for (; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
ret = 1;
@@ -3122,9 +3267,10 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
timeout = jiffies;
ret = ext4_init_inode_table(sb, group,
elr->lr_timeout ? 0 : 1);
+ trace_ext4_lazy_itable_init(sb, group);
if (elr->lr_timeout == 0) {
timeout = (jiffies - timeout) *
- elr->lr_sbi->s_li_wait_mult;
+ EXT4_SB(elr->lr_super)->s_li_wait_mult;
elr->lr_timeout = timeout;
}
elr->lr_next_sched = jiffies + elr->lr_timeout;
@@ -3139,15 +3285,11 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
*/
static void ext4_remove_li_request(struct ext4_li_request *elr)
{
- struct ext4_sb_info *sbi;
-
if (!elr)
return;
- sbi = elr->lr_sbi;
-
list_del(&elr->lr_request);
- sbi->s_li_request = NULL;
+ EXT4_SB(elr->lr_super)->s_li_request = NULL;
kfree(elr);
}
@@ -3356,7 +3498,6 @@ static int ext4_li_info_new(void)
static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
ext4_group_t start)
{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_li_request *elr;
elr = kzalloc(sizeof(*elr), GFP_KERNEL);
@@ -3364,8 +3505,13 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
return NULL;
elr->lr_super = sb;
- elr->lr_sbi = sbi;
- elr->lr_next_group = start;
+ elr->lr_first_not_zeroed = start;
+ if (test_opt(sb, PREFETCH_BLOCK_BITMAPS))
+ elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
+ else {
+ elr->lr_mode = EXT4_LI_MODE_ITABLE;
+ elr->lr_next_group = start;
+ }
/*
* Randomize first schedule time of the request to
@@ -3395,8 +3541,9 @@ int ext4_register_li_request(struct super_block *sb,
goto out;
}
- if (first_not_zeroed == ngroups || sb_rdonly(sb) ||
- !test_opt(sb, INIT_INODE_TABLE))
+ if (!test_opt(sb, PREFETCH_BLOCK_BITMAPS) &&
+ (first_not_zeroed == ngroups || sb_rdonly(sb) ||
+ !test_opt(sb, INIT_INODE_TABLE)))
goto out;
elr = ext4_li_request_new(sb, first_not_zeroed);
@@ -3609,7 +3756,8 @@ int ext4_calculate_overhead(struct super_block *sb)
*/
if (sbi->s_journal && !sbi->journal_bdev)
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
- else if (ext4_has_feature_journal(sb) && !sbi->s_journal) {
+ else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
+ /* j_inum for internal journal is non-zero */
j_inode = ext4_get_journal_inode(sb, j_inum);
if (j_inode) {
j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
@@ -3675,7 +3823,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
int blocksize, clustersize;
unsigned int db_count;
unsigned int i;
- int needs_recovery, has_huge_files, has_bigalloc;
+ int needs_recovery, has_huge_files;
__u64 blocks_count;
int err = 0;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
@@ -3785,7 +3933,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sb, NO_UID32);
/* xattr user namespace & acls are now defaulted on */
set_opt(sb, XATTR_USER);
- set_opt(sb, DIOREAD_NOLOCK);
#ifdef CONFIG_EXT4_FS_POSIX_ACL
set_opt(sb, POSIX_ACL);
#endif
@@ -3835,6 +3982,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+
+ if (blocksize == PAGE_SIZE)
+ set_opt(sb, DIOREAD_NOLOCK);
+
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR,
@@ -3964,18 +4115,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, and O_DIRECT support!\n");
+ /* can't mount with both data=journal and dioread_nolock. */
clear_opt(sb, DIOREAD_NOLOCK);
if (test_opt2(sb, EXPLICIT_DELALLOC)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and delalloc");
goto failed_mount;
}
- if (test_opt(sb, DIOREAD_NOLOCK)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dioread_nolock");
- goto failed_mount;
- }
- if (test_opt(sb, DAX)) {
+ if (test_opt(sb, DAX_ALWAYS)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"both data=journal and dax");
goto failed_mount;
@@ -4085,13 +4232,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
- if (sbi->s_mount_opt & EXT4_MOUNT_DAX) {
+ if (bdev_dax_supported(sb->s_bdev, blocksize))
+ set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
+
+ if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data");
goto failed_mount;
}
- if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+ if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device.");
goto failed_mount;
@@ -4157,7 +4307,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
sbi->s_inodes_per_group > blocksize * 8) {
ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
- sbi->s_blocks_per_group);
+ sbi->s_inodes_per_group);
goto failed_mount;
}
sbi->s_itb_per_group = sbi->s_inodes_per_group /
@@ -4191,8 +4341,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Handle clustersize */
clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
- has_bigalloc = ext4_has_feature_bigalloc(sb);
- if (has_bigalloc) {
+ if (ext4_has_feature_bigalloc(sb)) {
if (clustersize < blocksize) {
ext4_msg(sb, KERN_ERR,
"cluster size (%d) smaller than "
@@ -4286,9 +4435,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
EXT4_BLOCKS_PER_GROUP(sb) - 1);
do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
- ext4_msg(sb, KERN_WARNING, "groups count too large: %u "
+ ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
"(block count %llu, first data block %u, "
- "blocks per group %lu)", sbi->s_groups_count,
+ "blocks per group %lu)", blocks_count,
ext4_blocks_count(es),
le32_to_cpu(es->s_first_data_block),
EXT4_BLOCKS_PER_GROUP(sb));
@@ -4331,7 +4480,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* Pre-read the descriptors into the buffer cache */
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logical_sb_block, i);
- sb_breadahead(sb, block);
+ sb_breadahead_unmovable(sb, block);
}
for (i = 0; i < db_count; i++) {
@@ -4591,11 +4740,13 @@ no_journal:
ext4_set_resv_clusters(sb);
- err = ext4_setup_system_zone(sb);
- if (err) {
- ext4_msg(sb, KERN_ERR, "failed to initialize system "
- "zone (%d)", err);
- goto failed_mount4a;
+ if (test_opt(sb, BLOCK_VALIDITY)) {
+ err = ext4_setup_system_zone(sb);
+ if (err) {
+ ext4_msg(sb, KERN_ERR, "failed to initialize system "
+ "zone (%d)", err);
+ goto failed_mount4a;
+ }
}
ext4_ext_init(sb);
@@ -4658,12 +4809,23 @@ no_journal:
}
#endif /* CONFIG_QUOTA */
+ /*
+ * Save the original bdev mapping's wb_err value which could be
+ * used to detect the metadata async write error.
+ */
+ spin_lock_init(&sbi->s_bdev_wb_lock);
+ if (!sb_rdonly(sb))
+ errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
+ &sbi->s_bdev_wb_err);
+ sb->s_bdev->bd_super = sb;
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
- ext4_mark_recovery_complete(sb, es);
+ err = ext4_mark_recovery_complete(sb, es);
+ if (err)
+ goto failed_mount8;
}
if (EXT4_SB(sb)->s_journal) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -4697,6 +4859,8 @@ no_journal:
ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
+ atomic_set(&sbi->s_warning_count, 0);
+ atomic_set(&sbi->s_msg_count, 0);
kfree(orig_data);
return 0;
@@ -4706,10 +4870,8 @@ cantfind_ext4:
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
goto failed_mount;
-#ifdef CONFIG_QUOTA
failed_mount8:
ext4_unregister_sysfs(sb);
-#endif
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
@@ -4773,6 +4935,7 @@ failed_mount:
for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(get_qf_name(sb, sbi, i));
#endif
+ fscrypt_free_dummy_context(&sbi->s_dummy_enc_ctx);
ext4_blkdev_remove(sbi);
brelse(bh);
out_fail:
@@ -4848,7 +5011,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
struct inode *journal_inode;
journal_t *journal;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return NULL;
journal_inode = ext4_get_journal_inode(sb, journal_inum);
if (!journal_inode)
@@ -4878,7 +5042,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
struct ext4_super_block *es;
struct block_device *bdev;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return NULL;
bdev = ext4_blkdev_get(j_dev, sb);
if (bdev == NULL)
@@ -4969,8 +5134,10 @@ static int ext4_load_journal(struct super_block *sb,
dev_t journal_dev;
int err = 0;
int really_read_only;
+ int journal_dev_ro;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
+ return -EFSCORRUPTED;
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -4980,7 +5147,31 @@ static int ext4_load_journal(struct super_block *sb,
} else
journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
- really_read_only = bdev_read_only(sb->s_bdev);
+ if (journal_inum && journal_dev) {
+ ext4_msg(sb, KERN_ERR,
+ "filesystem has both journal inode and journal device!");
+ return -EINVAL;
+ }
+
+ if (journal_inum) {
+ journal = ext4_get_journal(sb, journal_inum);
+ if (!journal)
+ return -EINVAL;
+ } else {
+ journal = ext4_get_dev_journal(sb, journal_dev);
+ if (!journal)
+ return -EINVAL;
+ }
+
+ journal_dev_ro = bdev_read_only(journal->j_dev);
+ really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
+
+ if (journal_dev_ro && !sb_rdonly(sb)) {
+ ext4_msg(sb, KERN_ERR,
+ "journal device read-only, try mounting with '-o ro'");
+ err = -EROFS;
+ goto err_out;
+ }
/*
* Are we loading a blank journal or performing recovery after a
@@ -4995,27 +5186,14 @@ static int ext4_load_journal(struct super_block *sb,
ext4_msg(sb, KERN_ERR, "write access "
"unavailable, cannot proceed "
"(try mounting with noload)");
- return -EROFS;
+ err = -EROFS;
+ goto err_out;
}
ext4_msg(sb, KERN_INFO, "write access will "
"be enabled during recovery");
}
}
- if (journal_inum && journal_dev) {
- ext4_msg(sb, KERN_ERR, "filesystem has both journal "
- "and inode journals!");
- return -EINVAL;
- }
-
- if (journal_inum) {
- if (!(journal = ext4_get_journal(sb, journal_inum)))
- return -EINVAL;
- } else {
- if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
- return -EINVAL;
- }
-
if (!(journal->j_flags & JBD2_BARRIER))
ext4_msg(sb, KERN_INFO, "barriers disabled");
@@ -5035,12 +5213,16 @@ static int ext4_load_journal(struct super_block *sb,
if (err) {
ext4_msg(sb, KERN_ERR, "error loading journal");
- jbd2_journal_destroy(journal);
- return err;
+ goto err_out;
}
EXT4_SB(sb)->s_journal = journal;
- ext4_clear_journal_err(sb, es);
+ err = ext4_clear_journal_err(sb, es);
+ if (err) {
+ EXT4_SB(sb)->s_journal = NULL;
+ jbd2_journal_destroy(journal);
+ return err;
+ }
if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -5051,6 +5233,10 @@ static int ext4_load_journal(struct super_block *sb,
}
return 0;
+
+err_out:
+ jbd2_journal_destroy(journal);
+ return err;
}
static int ext4_commit_super(struct super_block *sb, int sync)
@@ -5063,13 +5249,6 @@ static int ext4_commit_super(struct super_block *sb, int sync)
return error;
/*
- * The superblock bh should be mapped, but it might not be if the
- * device was hot-removed. Not much we can do but fail the I/O.
- */
- if (!buffer_mapped(sbh))
- return error;
-
- /*
* If the file system is mounted read-only, don't update the
* superblock write time. This avoids updating the superblock
* write time when we are mounting the root file system
@@ -5136,26 +5315,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
* remounting) the filesystem readonly, then we will end up with a
* consistent fs on disk. Record that fact.
*/
-static void ext4_mark_recovery_complete(struct super_block *sb,
- struct ext4_super_block *es)
+static int ext4_mark_recovery_complete(struct super_block *sb,
+ struct ext4_super_block *es)
{
+ int err;
journal_t *journal = EXT4_SB(sb)->s_journal;
if (!ext4_has_feature_journal(sb)) {
- BUG_ON(journal != NULL);
- return;
+ if (journal != NULL) {
+ ext4_error(sb, "Journal got removed while the fs was "
+ "mounted!");
+ return -EFSCORRUPTED;
+ }
+ return 0;
}
jbd2_journal_lock_updates(journal);
- if (jbd2_journal_flush(journal) < 0)
+ err = jbd2_journal_flush(journal);
+ if (err < 0)
goto out;
if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
ext4_clear_feature_journal_needs_recovery(sb);
ext4_commit_super(sb, 1);
}
-
out:
jbd2_journal_unlock_updates(journal);
+ return err;
}
/*
@@ -5163,14 +5348,17 @@ out:
* has recorded an error from a previous lifetime, move that error to the
* main filesystem now.
*/
-static void ext4_clear_journal_err(struct super_block *sb,
+static int ext4_clear_journal_err(struct super_block *sb,
struct ext4_super_block *es)
{
journal_t *journal;
int j_errno;
const char *errstr;
- BUG_ON(!ext4_has_feature_journal(sb));
+ if (!ext4_has_feature_journal(sb)) {
+ ext4_error(sb, "Journal got removed while the fs was mounted!");
+ return -EFSCORRUPTED;
+ }
journal = EXT4_SB(sb)->s_journal;
@@ -5195,6 +5383,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);
}
+ return 0;
}
/*
@@ -5249,7 +5438,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
needs_barrier = true;
if (needs_barrier) {
int err;
- err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+ err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
if (!ret)
ret = err;
}
@@ -5337,7 +5526,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
{
struct ext4_super_block *es;
struct ext4_sb_info *sbi = EXT4_SB(sb);
- unsigned long old_sb_flags;
+ unsigned long old_sb_flags, vfs_flags;
struct ext4_mount_options old_opts;
int enable_quota = 0;
ext4_group_t g;
@@ -5380,6 +5569,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (sbi->s_journal && sbi->s_journal->j_task->io_context)
journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
+ /*
+ * Some options can be enabled by ext4 and/or by VFS mount flag
+ * either way we need to make sure it matches in both *flags and
+ * s_flags. Copy those selected flags from *flags to s_flags
+ */
+ vfs_flags = SB_LAZYTIME | SB_I_VERSION;
+ sb->s_flags = (sb->s_flags & ~vfs_flags) | (*flags & vfs_flags);
+
if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {
err = -EINVAL;
goto restore_opts;
@@ -5405,12 +5602,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
err = -EINVAL;
goto restore_opts;
}
- if (test_opt(sb, DAX)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and dax");
- err = -EINVAL;
- goto restore_opts;
- }
} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
@@ -5426,14 +5617,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) {
- ext4_msg(sb, KERN_WARNING, "warning: refusing change of "
- "dax flag with busy inodes while remounting");
- sbi->s_mount_opt ^= EXT4_MOUNT_DAX;
- }
-
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
- ext4_abort(sb, "Abort forced by user");
+ ext4_abort(sb, EXT4_ERR_ESHUTDOWN, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
@@ -5445,9 +5630,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
}
- if (*flags & SB_LAZYTIME)
- sb->s_flags |= SB_LAZYTIME;
-
if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {
err = -EROFS;
@@ -5477,8 +5659,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
(sbi->s_mount_state & EXT4_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
- if (sbi->s_journal)
+ if (sbi->s_journal) {
+ /*
+ * We let remount-ro finish even if marking fs
+ * as clean failed...
+ */
ext4_mark_recovery_complete(sb, es);
+ }
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
} else {
@@ -5521,13 +5708,24 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
/*
+ * Update the original bdev mapping's wb_err value
+ * which could be used to detect the metadata async
+ * write error.
+ */
+ errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
+ &sbi->s_bdev_wb_err);
+
+ /*
* Mounting a RDONLY partition read-write, so reread
* and store the current valid flag. (It may have
* been changed by e2fsck since we originally mounted
* the partition.)
*/
- if (sbi->s_journal)
- ext4_clear_journal_err(sb, es);
+ if (sbi->s_journal) {
+ err = ext4_clear_journal_err(sb, es);
+ if (err)
+ goto restore_opts;
+ }
sbi->s_mount_state = le16_to_cpu(es->s_state);
err = ext4_setup_super(sb, es, 0);
@@ -5557,7 +5755,17 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
ext4_register_li_request(sb, first_not_zeroed);
}
- ext4_setup_system_zone(sb);
+ /*
+ * Handle creation of system zone data early because it can fail.
+ * Releasing of existing data is done when we are sure remount will
+ * succeed.
+ */
+ if (test_opt(sb, BLOCK_VALIDITY) && !sbi->system_blks) {
+ err = ext4_setup_system_zone(sb);
+ if (err)
+ goto restore_opts;
+ }
+
if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
err = ext4_commit_super(sb, 1);
if (err)
@@ -5578,8 +5786,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
}
}
#endif
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
+ ext4_release_system_zone(sb);
+
+ /*
+ * Some options can be enabled by ext4 and/or by VFS mount flag
+ * either way we need to make sure it matches in both *flags and
+ * s_flags. Copy those selected flags from s_flags to *flags
+ */
+ *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags);
- *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data);
return 0;
@@ -5593,6 +5809,8 @@ restore_opts:
sbi->s_commit_interval = old_opts.s_commit_interval;
sbi->s_min_batch_time = old_opts.s_min_batch_time;
sbi->s_max_batch_time = old_opts.s_max_batch_time;
+ if (!test_opt(sb, BLOCK_VALIDITY) && sbi->system_blks)
+ ext4_release_system_zone(sb);
#ifdef CONFIG_QUOTA
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
for (i = 0; i < EXT4_MAXQUOTAS; i++) {
@@ -5622,10 +5840,8 @@ static int ext4_statfs_project(struct super_block *sb,
return PTR_ERR(dquot);
spin_lock(&dquot->dq_dqb_lock);
- limit = dquot->dq_dqb.dqb_bsoftlimit;
- if (dquot->dq_dqb.dqb_bhardlimit &&
- (!limit || dquot->dq_dqb.dqb_bhardlimit < limit))
- limit = dquot->dq_dqb.dqb_bhardlimit;
+ limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
+ dquot->dq_dqb.dqb_bhardlimit);
limit >>= sb->s_blocksize_bits;
if (limit && buf->f_blocks > limit) {
@@ -5637,11 +5853,8 @@ static int ext4_statfs_project(struct super_block *sb,
(buf->f_blocks - curblock) : 0;
}
- limit = dquot->dq_dqb.dqb_isoftlimit;
- if (dquot->dq_dqb.dqb_ihardlimit &&
- (!limit || dquot->dq_dqb.dqb_ihardlimit < limit))
- limit = dquot->dq_dqb.dqb_ihardlimit;
-
+ limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
+ dquot->dq_dqb.dqb_ihardlimit);
if (limit && buf->f_files > limit) {
buf->f_files = limit;
buf->f_ffree =
@@ -5883,7 +6096,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
S_NOATIME | S_IMMUTABLE);
- ext4_mark_inode_dirty(handle, inode);
+ err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
unlock_inode:
inode_unlock(inode);
@@ -5985,12 +6198,14 @@ static int ext4_quota_off(struct super_block *sb, int type)
* this is not a hard failure and quotas are already disabled.
*/
handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
goto out_unlock;
+ }
EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
inode->i_mtime = inode->i_ctime = current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
+ err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out_unlock:
inode_unlock(inode);
@@ -6048,7 +6263,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
{
struct inode *inode = sb_dqopt(sb)->files[type];
ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
- int err, offset = off & (sb->s_blocksize - 1);
+ int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
int retries = 0;
struct buffer_head *bh;
handle_t *handle = journal_current_handle();
@@ -6096,9 +6311,11 @@ out:
if (inode->i_size < off + len) {
i_size_write(inode, off + len);
EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_mark_inode_dirty(handle, inode);
+ err2 = ext4_mark_inode_dirty(handle, inode);
+ if (unlikely(err2 && !err))
+ err = err2;
}
- return len;
+ return err ? err : len;
}
#endif