summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/block_validity.c5
-rw-r--r--fs/ext4/ext4.h5
-rw-r--r--fs/ext4/extents.c19
-rw-r--r--fs/ext4/inode.c88
-rw-r--r--fs/ext4/mballoc.c3
-rw-r--r--fs/ext4/page-io.c16
-rw-r--r--fs/ext4/super.c19
8 files changed, 117 insertions, 42 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 8042ad873808..c48fd36b2d74 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -649,8 +649,8 @@ static int ext4_has_free_clusters(struct ext4_sb_info *sbi,
/* Hm, nope. Are (enough) root reserved clusters available? */
if (uid_eq(sbi->s_resuid, current_fsuid()) ||
(!gid_eq(sbi->s_resgid, GLOBAL_ROOT_GID) && in_group_p(sbi->s_resgid)) ||
- capable(CAP_SYS_RESOURCE) ||
- (flags & EXT4_MB_USE_ROOT_BLOCKS)) {
+ (flags & EXT4_MB_USE_ROOT_BLOCKS) ||
+ capable(CAP_SYS_RESOURCE)) {
if (free_clusters >= (nclusters + dirty_clusters +
resv_clusters))
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 87ee3a17bd29..e8c5525afc67 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -351,10 +351,9 @@ int ext4_check_blockref(const char *function, unsigned int line,
{
__le32 *bref = p;
unsigned int blk;
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
- if (ext4_has_feature_journal(inode->i_sb) &&
- (inode->i_ino ==
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ if (journal && inode == journal->j_inode)
return 0;
while (bref < p+max) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index df30d9f23512..4f81983b7918 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -278,7 +278,8 @@ struct ext4_system_blocks {
/*
* Flags for ext4_io_end->flags
*/
-#define EXT4_IO_END_UNWRITTEN 0x0001
+#define EXT4_IO_END_UNWRITTEN 0x0001
+#define EXT4_IO_END_FAILED 0x0002
struct ext4_io_end_vec {
struct list_head list; /* list of io_end_vec */
@@ -3010,6 +3011,8 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
extern int ext4_can_truncate(struct inode *inode);
extern int ext4_truncate(struct inode *);
extern int ext4_break_layouts(struct inode *);
+extern int ext4_truncate_page_cache_block_range(struct inode *inode,
+ loff_t start, loff_t end);
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
extern void ext4_set_inode_flags(struct inode *, bool init);
extern int ext4_alloc_da_blocks(struct inode *inode);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a07a98a4b97a..8dc6b4271b15 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4667,22 +4667,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
goto out_mutex;
}
- /*
- * For journalled data we need to write (and checkpoint) pages
- * before discarding page cache to avoid inconsitent data on
- * disk in case of crash before zeroing trans is committed.
- */
- if (ext4_should_journal_data(inode)) {
- ret = filemap_write_and_wait_range(mapping, start,
- end - 1);
- if (ret) {
- filemap_invalidate_unlock(mapping);
- goto out_mutex;
- }
+ /* Now release the pages and zero block aligned part of pages */
+ ret = ext4_truncate_page_cache_block_range(inode, start, end);
+ if (ret) {
+ filemap_invalidate_unlock(mapping);
+ goto out_mutex;
}
- /* Now release the pages and zero block aligned part of pages */
- truncate_pagecache_range(inode, start, end - 1);
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4108b7d1696f..44780bb537f9 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -31,6 +31,7 @@
#include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/mpage.h>
+#include <linux/rmap.h>
#include <linux/namei.h>
#include <linux/uio.h>
#include <linux/bio.h>
@@ -383,10 +384,11 @@ static int __check_block_validity(struct inode *inode, const char *func,
unsigned int line,
struct ext4_map_blocks *map)
{
- if (ext4_has_feature_journal(inode->i_sb) &&
- (inode->i_ino ==
- le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
+ journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+
+ if (journal && inode == journal->j_inode)
return 0;
+
if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
ext4_error_inode(inode, func, line, map->m_pblk,
"lblock %lu mapped to illegal pblock %llu "
@@ -3902,6 +3904,68 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
return ret;
}
+static inline void ext4_truncate_folio(struct inode *inode,
+ loff_t start, loff_t end)
+{
+ unsigned long blocksize = i_blocksize(inode);
+ struct folio *folio;
+
+ /* Nothing to be done if no complete block needs to be truncated. */
+ if (round_up(start, blocksize) >= round_down(end, blocksize))
+ return;
+
+ folio = filemap_lock_folio(inode->i_mapping, start >> PAGE_SHIFT);
+ if (IS_ERR(folio))
+ return;
+
+ if (folio_mkclean(folio))
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
+}
+
+int ext4_truncate_page_cache_block_range(struct inode *inode,
+ loff_t start, loff_t end)
+{
+ unsigned long blocksize = i_blocksize(inode);
+ int ret;
+
+ /*
+ * For journalled data we need to write (and checkpoint) pages
+ * before discarding page cache to avoid inconsitent data on disk
+ * in case of crash before freeing or unwritten converting trans
+ * is committed.
+ */
+ if (ext4_should_journal_data(inode)) {
+ ret = filemap_write_and_wait_range(inode->i_mapping, start,
+ end - 1);
+ if (ret)
+ return ret;
+ goto truncate_pagecache;
+ }
+
+ /*
+ * If the block size is less than the page size, the file's mapped
+ * blocks within one page could be freed or converted to unwritten.
+ * So it's necessary to remove writable userspace mappings, and then
+ * ext4_page_mkwrite() can be called during subsequent write access
+ * to these partial folios.
+ */
+ if (!IS_ALIGNED(start | end, PAGE_SIZE) &&
+ blocksize < PAGE_SIZE && start < inode->i_size) {
+ loff_t page_boundary = round_up(start, PAGE_SIZE);
+
+ ext4_truncate_folio(inode, start, min(page_boundary, end));
+ if (end > page_boundary)
+ ext4_truncate_folio(inode,
+ round_down(end, PAGE_SIZE), end);
+ }
+
+truncate_pagecache:
+ truncate_pagecache_range(inode, start, end - 1);
+ return 0;
+}
+
static void ext4_wait_dax_page(struct inode *inode)
{
filemap_invalidate_unlock(inode->i_mapping);
@@ -3956,17 +4020,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
trace_ext4_punch_hole(inode, offset, length, 0);
- /*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- ret = filemap_write_and_wait_range(mapping, offset,
- offset + length - 1);
- if (ret)
- return ret;
- }
-
inode_lock(inode);
/* No need to punch hole beyond i_size */
@@ -4028,8 +4081,11 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
ret = ext4_update_disksize_before_punch(inode, offset, length);
if (ret)
goto out_dio;
- truncate_pagecache_range(inode, first_block_offset,
- last_block_offset);
+
+ ret = ext4_truncate_page_cache_block_range(inode,
+ first_block_offset, last_block_offset + 1);
+ if (ret)
+ goto out_dio;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b25a27c86696..d6f1e61c6dc8 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -6644,7 +6644,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
for (i = 0; i < count; i++) {
cond_resched();
if (is_metadata)
- bh = sb_find_get_block(inode->i_sb, block + i);
+ bh = sb_find_get_block_nonatomic(inode->i_sb,
+ block + i);
ext4_forget(handle, is_metadata, inode, bh, block + i);
}
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 69b8a7221a2b..f041a5d93716 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -181,14 +181,25 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
"list->prev 0x%p\n",
io_end, inode->i_ino, io_end->list.next, io_end->list.prev);
- io_end->handle = NULL; /* Following call will use up the handle */
- ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
+ /*
+ * Do not convert the unwritten extents if data writeback fails,
+ * or stale data may be exposed.
+ */
+ io_end->handle = NULL; /* Following call will use up the handle */
+ if (unlikely(io_end->flag & EXT4_IO_END_FAILED)) {
+ ret = -EIO;
+ if (handle)
+ jbd2_journal_free_reserved(handle);
+ } else {
+ ret = ext4_convert_unwritten_io_end_vec(handle, io_end);
+ }
if (ret < 0 && !ext4_forced_shutdown(inode->i_sb)) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
"extents -- potential data loss! "
"(inode %lu, error %d)", inode->i_ino, ret);
}
+
ext4_clear_io_unwritten_flag(io_end);
ext4_release_io_end(io_end);
return ret;
@@ -344,6 +355,7 @@ static void ext4_end_bio(struct bio *bio)
bio->bi_status, inode->i_ino,
(unsigned long long)
bi_sector >> (inode->i_blkbits - 9));
+ io_end->flag |= EXT4_IO_END_FAILED;
mapping_set_error(inode->i_mapping,
blk_status_to_errno(bio->bi_status));
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 528979de0f7c..b956e1ee9829 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2785,6 +2785,13 @@ static int ext4_check_opt_consistency(struct fs_context *fc,
}
if (is_remount) {
+ if (!sbi->s_journal &&
+ ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Remounting fs w/o journal so ignoring data_err option");
+ ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_ERR_ABORT);
+ }
+
if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
ext4_msg(NULL, KERN_ERR, "can't mount with "
@@ -5428,6 +5435,11 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
"data=, fs mounted w/o journal");
goto failed_mount3a;
}
+ if (test_opt(sb, DATA_ERR_ABORT)) {
+ ext4_msg(sb, KERN_ERR,
+ "can't mount with data_err=abort, fs mounted w/o journal");
+ goto failed_mount3a;
+ }
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
clear_opt(sb, DATA_FLAGS);
@@ -6776,6 +6788,7 @@ static int ext4_reconfigure(struct fs_context *fc)
{
struct super_block *sb = fc->root->d_sb;
int ret;
+ bool old_ro = sb_rdonly(sb);
fc->s_fs_info = EXT4_SB(sb);
@@ -6787,9 +6800,9 @@ static int ext4_reconfigure(struct fs_context *fc)
if (ret < 0)
return ret;
- ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
- &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
- ext4_quota_mode(sb));
+ ext4_msg(sb, KERN_INFO, "re-mounted %pU%s.",
+ &sb->s_uuid,
+ (old_ro != sb_rdonly(sb)) ? (sb_rdonly(sb) ? " ro" : " r/w") : "");
return 0;
}