summaryrefslogtreecommitdiff
path: root/fs/f2fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/acl.c3
-rw-r--r--fs/f2fs/checkpoint.c101
-rw-r--r--fs/f2fs/compress.c100
-rw-r--r--fs/f2fs/data.c603
-rw-r--r--fs/f2fs/debug.c9
-rw-r--r--fs/f2fs/dir.c171
-rw-r--r--fs/f2fs/extent_cache.c12
-rw-r--r--fs/f2fs/f2fs.h353
-rw-r--r--fs/f2fs/file.c474
-rw-r--r--fs/f2fs/gc.c158
-rw-r--r--fs/f2fs/gc.h35
-rw-r--r--fs/f2fs/inline.c74
-rw-r--r--fs/f2fs/inode.c148
-rw-r--r--fs/f2fs/namei.c90
-rw-r--r--fs/f2fs/node.c176
-rw-r--r--fs/f2fs/recovery.c52
-rw-r--r--fs/f2fs/segment.c533
-rw-r--r--fs/f2fs/segment.h85
-rw-r--r--fs/f2fs/super.c508
-rw-r--r--fs/f2fs/sysfs.c306
-rw-r--r--fs/f2fs/verity.c29
-rw-r--r--fs/f2fs/xattr.c4
-rw-r--r--fs/f2fs/xattr.h2
23 files changed, 2503 insertions, 1523 deletions
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index ec2aeccb69a3..8bffdeccdbc3 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -219,8 +219,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap,
return error;
if (error == 0)
*acl = NULL;
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c6317596e695..86228f82f54d 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -99,7 +99,7 @@ repeat:
}
if (unlikely(!PageUptodate(page))) {
- f2fs_handle_page_eio(sbi, page->index, META);
+ f2fs_handle_page_eio(sbi, page_folio(page), META);
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
@@ -154,49 +154,47 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
if (unlikely(f2fs_cp_error(sbi)))
return exist;
- if (exist && type == DATA_GENERIC_ENHANCE_UPDATE) {
- f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
- blkaddr, exist);
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- return exist;
- }
+ if ((exist && type == DATA_GENERIC_ENHANCE_UPDATE) ||
+ (!exist && type == DATA_GENERIC_ENHANCE))
+ goto out_err;
+ if (!exist && type != DATA_GENERIC_ENHANCE_UPDATE)
+ goto out_handle;
+ return exist;
- if (!exist && type == DATA_GENERIC_ENHANCE) {
- f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
- blkaddr, exist);
- set_sbi_flag(sbi, SBI_NEED_FSCK);
- dump_stack();
- }
+out_err:
+ f2fs_err(sbi, "Inconsistent error blkaddr:%u, sit bitmap:%d",
+ blkaddr, exist);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ dump_stack();
+out_handle:
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return exist;
}
-bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type)
{
- if (time_to_inject(sbi, FAULT_BLKADDR))
- return false;
-
switch (type) {
case META_NAT:
break;
case META_SIT:
if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
- return false;
+ goto check_only;
break;
case META_SSA:
if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
blkaddr < SM_I(sbi)->ssa_blkaddr))
- return false;
+ goto check_only;
break;
case META_CP:
if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
blkaddr < __start_cp_addr(sbi)))
- return false;
+ goto check_only;
break;
case META_POR:
if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
blkaddr < MAIN_BLKADDR(sbi)))
- return false;
+ goto check_only;
break;
case DATA_GENERIC:
case DATA_GENERIC_ENHANCE:
@@ -213,7 +211,7 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
blkaddr);
set_sbi_flag(sbi, SBI_NEED_FSCK);
dump_stack();
- return false;
+ goto err;
} else {
return __is_bitmap_valid(sbi, blkaddr, type);
}
@@ -221,13 +219,31 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
case META_GENERIC:
if (unlikely(blkaddr < SEG0_BLKADDR(sbi) ||
blkaddr >= MAIN_BLKADDR(sbi)))
- return false;
+ goto err;
break;
default:
BUG();
}
return true;
+err:
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+check_only:
+ return false;
+}
+
+bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ if (time_to_inject(sbi, FAULT_BLKADDR_VALIDITY))
+ return false;
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
+}
+
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type)
+{
+ return __f2fs_is_valid_blkaddr(sbi, blkaddr, type);
}
/*
@@ -329,30 +345,31 @@ static int __f2fs_write_meta_page(struct page *page,
enum iostat_type io_type)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ struct folio *folio = page_folio(page);
- trace_f2fs_writepage(page, META);
+ trace_f2fs_writepage(folio, META);
if (unlikely(f2fs_cp_error(sbi))) {
if (is_sbi_flag_set(sbi, SBI_IS_CLOSE)) {
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
dec_page_count(sbi, F2FS_DIRTY_META);
- unlock_page(page);
+ folio_unlock(folio);
return 0;
}
goto redirty_out;
}
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
- if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
+ if (wbc->for_reclaim && folio->index < GET_SUM_BLOCK(sbi, 0))
goto redirty_out;
- f2fs_do_write_meta_page(sbi, page, io_type);
+ f2fs_do_write_meta_page(sbi, folio, io_type);
dec_page_count(sbi, F2FS_DIRTY_META);
if (wbc->for_reclaim)
f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META);
- unlock_page(page);
+ folio_unlock(folio);
if (unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_write(sbi, META);
@@ -477,7 +494,7 @@ stop:
static bool f2fs_dirty_meta_folio(struct address_space *mapping,
struct folio *folio)
{
- trace_f2fs_set_page_dirty(&folio->page, META);
+ trace_f2fs_set_page_dirty(folio, META);
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
@@ -1327,21 +1344,13 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
unsigned long flags;
- if (cpc->reason & CP_UMOUNT) {
- if (le32_to_cpu(ckpt->cp_pack_total_block_count) +
- NM_I(sbi)->nat_bits_blocks > BLKS_PER_SEG(sbi)) {
- clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
- f2fs_notice(sbi, "Disable nat_bits due to no space");
- } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) &&
- f2fs_nat_bitmap_enabled(sbi)) {
- f2fs_enable_nat_bits(sbi);
- set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
- f2fs_notice(sbi, "Rebuild and enable nat_bits");
- }
- }
-
spin_lock_irqsave(&sbi->cp_lock, flags);
+ if ((cpc->reason & CP_UMOUNT) &&
+ le32_to_cpu(ckpt->cp_pack_total_block_count) >
+ sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+ disable_nat_bits(sbi, false);
+
if (cpc->reason & CP_TRIMMED)
__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
else
@@ -1524,8 +1533,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
start_blk = __start_cp_next_addr(sbi);
/* write nat bits */
- if ((cpc->reason & CP_UMOUNT) &&
- is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) {
+ if (enabled_nat_bits(sbi, cpc)) {
__u64 cp_ver = cur_cp_version(ckpt);
block_t blk;
@@ -1535,7 +1543,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
blk = start_blk + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++)
f2fs_update_meta_page(sbi, nm_i->nat_bits +
- (i << F2FS_BLKSIZE_BITS), blk + i);
+ F2FS_BLK_TO_BYTES(i), blk + i);
}
/* write out checkpoint buffer at block 0 */
@@ -1702,6 +1710,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
f2fs_restore_inmem_curseg(sbi);
+ f2fs_reinit_atgc_curseg(sbi);
stat_inc_cp_count(sbi);
stop:
unblock_operations(sbi);
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index f7ef69f44f3d..b05bb7bfa14c 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -90,11 +90,13 @@ bool f2fs_is_compressed_page(struct page *page)
static void f2fs_set_compressed_page(struct page *page,
struct inode *inode, pgoff_t index, void *data)
{
- attach_page_private(page, (void *)data);
+ struct folio *folio = page_folio(page);
+
+ folio_attach_private(folio, (void *)data);
/* i_crypto_info and iv index */
- page->index = index;
- page->mapping = inode->i_mapping;
+ folio->index = index;
+ folio->mapping = inode->i_mapping;
}
static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock)
@@ -160,24 +162,23 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
cc->cluster_idx = NULL_CLUSTER;
}
-void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page)
+void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio)
{
unsigned int cluster_ofs;
- if (!f2fs_cluster_can_merge_page(cc, page->index))
+ if (!f2fs_cluster_can_merge_page(cc, folio->index))
f2fs_bug_on(F2FS_I_SB(cc->inode), 1);
- cluster_ofs = offset_in_cluster(cc, page->index);
- cc->rpages[cluster_ofs] = page;
+ cluster_ofs = offset_in_cluster(cc, folio->index);
+ cc->rpages[cluster_ofs] = folio_page(folio, 0);
cc->nr_rpages++;
- cc->cluster_idx = cluster_idx(cc, page->index);
+ cc->cluster_idx = cluster_idx(cc, folio->index);
}
#ifdef CONFIG_F2FS_FS_LZO
static int lzo_init_compress_ctx(struct compress_ctx *cc)
{
- cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
- LZO1X_MEM_COMPRESS, GFP_NOFS);
+ cc->private = f2fs_vmalloc(LZO1X_MEM_COMPRESS);
if (!cc->private)
return -ENOMEM;
@@ -187,7 +188,7 @@ static int lzo_init_compress_ctx(struct compress_ctx *cc)
static void lzo_destroy_compress_ctx(struct compress_ctx *cc)
{
- kvfree(cc->private);
+ vfree(cc->private);
cc->private = NULL;
}
@@ -244,7 +245,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc)
size = LZ4HC_MEM_COMPRESS;
#endif
- cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), size, GFP_NOFS);
+ cc->private = f2fs_vmalloc(size);
if (!cc->private)
return -ENOMEM;
@@ -259,7 +260,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc)
static void lz4_destroy_compress_ctx(struct compress_ctx *cc)
{
- kvfree(cc->private);
+ vfree(cc->private);
cc->private = NULL;
}
@@ -340,8 +341,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
params = zstd_get_params(level, cc->rlen);
workspace_size = zstd_cstream_workspace_bound(&params.cParams);
- workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
- workspace_size, GFP_NOFS);
+ workspace = f2fs_vmalloc(workspace_size);
if (!workspace)
return -ENOMEM;
@@ -349,7 +349,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
if (!stream) {
f2fs_err_ratelimited(F2FS_I_SB(cc->inode),
"%s zstd_init_cstream failed", __func__);
- kvfree(workspace);
+ vfree(workspace);
return -EIO;
}
@@ -362,7 +362,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
static void zstd_destroy_compress_ctx(struct compress_ctx *cc)
{
- kvfree(cc->private);
+ vfree(cc->private);
cc->private = NULL;
cc->private2 = NULL;
}
@@ -421,8 +421,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
workspace_size = zstd_dstream_workspace_bound(max_window_size);
- workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
- workspace_size, GFP_NOFS);
+ workspace = f2fs_vmalloc(workspace_size);
if (!workspace)
return -ENOMEM;
@@ -430,7 +429,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
if (!stream) {
f2fs_err_ratelimited(F2FS_I_SB(dic->inode),
"%s zstd_init_dstream failed", __func__);
- kvfree(workspace);
+ vfree(workspace);
return -EIO;
}
@@ -442,7 +441,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic)
{
- kvfree(dic->private);
+ vfree(dic->private);
dic->private = NULL;
dic->private2 = NULL;
}
@@ -879,7 +878,7 @@ static bool cluster_has_invalid_data(struct compress_ctx *cc)
f2fs_bug_on(F2FS_I_SB(cc->inode), !page);
/* beyond EOF */
- if (page->index >= nr_pages)
+ if (page_folio(page)->index >= nr_pages)
return true;
}
return false;
@@ -1045,6 +1044,31 @@ static void set_cluster_writeback(struct compress_ctx *cc)
}
}
+static void cancel_cluster_writeback(struct compress_ctx *cc,
+ struct compress_io_ctx *cic, int submitted)
+{
+ int i;
+
+ /* Wait for submitted IOs. */
+ if (submitted > 1) {
+ f2fs_submit_merged_write(F2FS_I_SB(cc->inode), DATA);
+ while (atomic_read(&cic->pending_pages) !=
+ (cc->valid_nr_cpages - submitted + 1))
+ f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
+ }
+
+ /* Cancel writeback and stay locked. */
+ for (i = 0; i < cc->cluster_size; i++) {
+ if (i < submitted) {
+ inode_inc_dirty_pages(cc->inode);
+ lock_page(cc->rpages[i]);
+ }
+ clear_page_private_gcing(cc->rpages[i]);
+ if (folio_test_writeback(page_folio(cc->rpages[i])))
+ end_page_writeback(cc->rpages[i]);
+ }
+}
+
static void set_cluster_dirty(struct compress_ctx *cc)
{
int i;
@@ -1088,14 +1112,14 @@ retry:
if (PageUptodate(page))
f2fs_put_page(page, 1);
else
- f2fs_compress_ctx_add_page(cc, page);
+ f2fs_compress_ctx_add_page(cc, page_folio(page));
}
if (!f2fs_cluster_is_empty(cc)) {
struct bio *bio = NULL;
ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
- &last_block_in_bio, false, true);
+ &last_block_in_bio, NULL, true);
f2fs_put_rpages(cc);
f2fs_destroy_compress_ctx(cc, true);
if (ret)
@@ -1118,7 +1142,7 @@ retry:
}
f2fs_wait_on_page_writeback(page, DATA, true, true);
- f2fs_compress_ctx_add_page(cc, page);
+ f2fs_compress_ctx_add_page(cc, page_folio(page));
if (!PageUptodate(page)) {
release_and_retry:
@@ -1246,7 +1270,6 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
.page = NULL,
.encrypted_page = NULL,
.compressed_page = NULL,
- .submitted = 0,
.io_type = io_type,
.io_wbc = wbc,
.encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ?
@@ -1372,7 +1395,16 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
fio.compressed_page = cc->cpages[i - 1];
cc->cpages[i - 1] = NULL;
+ fio.submitted = 0;
f2fs_outplace_write_data(&dn, &fio);
+ if (unlikely(!fio.submitted)) {
+ cancel_cluster_writeback(cc, cic, i);
+
+ /* To call fscrypt_finalize_bounce_page */
+ i = cc->valid_nr_cpages;
+ *submitted = 0;
+ goto out_destroy_crypt;
+ }
(*submitted)++;
unlock_continue:
inode_dec_dirty_pages(cc->inode);
@@ -1406,8 +1438,11 @@ unlock_continue:
out_destroy_crypt:
page_array_free(cc->inode, cic->rpages, cc->cluster_size);
- for (--i; i >= 0; i--)
+ for (--i; i >= 0; i--) {
+ if (!cc->cpages[i])
+ continue;
fscrypt_finalize_bounce_page(&cc->cpages[i]);
+ }
out_put_cic:
kmem_cache_free(cic_entry_slab, cic);
out_put_dnode:
@@ -1498,7 +1533,7 @@ continue_unlock:
if (!PageDirty(cc->rpages[i]))
goto continue_unlock;
- if (PageWriteback(cc->rpages[i])) {
+ if (folio_test_writeback(page_folio(cc->rpages[i]))) {
if (wbc->sync_mode == WB_SYNC_NONE)
goto continue_unlock;
f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true);
@@ -1507,7 +1542,8 @@ continue_unlock:
if (!clear_page_dirty_for_io(cc->rpages[i]))
goto continue_unlock;
- ret = f2fs_write_single_data_page(cc->rpages[i], &submitted,
+ ret = f2fs_write_single_data_page(page_folio(cc->rpages[i]),
+ &submitted,
NULL, NULL, wbc, io_type,
compr_blocks, false);
if (ret) {
@@ -1905,12 +1941,8 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page,
set_page_private_data(cpage, ino);
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE_READ))
- goto out;
-
memcpy(page_address(cpage), page_address(page), PAGE_SIZE);
SetPageUptodate(cpage);
-out:
f2fs_put_page(cpage, 1);
}
@@ -1971,7 +2003,7 @@ void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino)
continue;
}
- generic_error_remove_page(mapping, &folio->page);
+ generic_error_remove_folio(mapping, folio);
folio_unlock(folio);
}
folio_batch_release(&fbatch);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index acd0764b0286..efc30626760a 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -7,7 +7,6 @@
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
-#include <linux/buffer_head.h>
#include <linux/sched/mm.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
@@ -54,8 +53,8 @@ bool f2fs_is_cp_guaranteed(struct page *page)
struct inode *inode;
struct f2fs_sb_info *sbi;
- if (!mapping)
- return false;
+ if (fscrypt_is_bounce_page(page))
+ return page_private_gcing(fscrypt_pagecache_page(page));
inode = mapping->host;
sbi = F2FS_I_SB(inode);
@@ -288,7 +287,7 @@ static void f2fs_read_end_io(struct bio *bio)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
struct bio_post_read_ctx *ctx;
- bool intask = in_task();
+ bool intask = in_task() && !irqs_disabled();
iostat_update_and_unbind_ctx(bio);
ctx = bio->bi_private;
@@ -355,7 +354,7 @@ static void f2fs_write_end_io(struct bio *bio)
}
f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
- page->index != nid_of_node(page));
+ page_folio(page)->index != nid_of_node(page));
dec_page_count(sbi, type);
if (f2fs_in_warm_node_list(sbi, page))
@@ -465,6 +464,8 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
} else {
bio->bi_end_io = f2fs_write_end_io;
bio->bi_private = sbi;
+ bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
+ fio->type, fio->temp);
}
iostat_alloc_and_bind_ctx(sbi, bio, NULL);
@@ -593,17 +594,20 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
return -ENOMEM;
for (j = HOT; j < n; j++) {
- init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
- sbi->write_io[i][j].sbi = sbi;
- sbi->write_io[i][j].bio = NULL;
- spin_lock_init(&sbi->write_io[i][j].io_lock);
- INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
- INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
- init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
+ struct f2fs_bio_info *io = &sbi->write_io[i][j];
+
+ init_f2fs_rwsem(&io->io_rwsem);
+ io->sbi = sbi;
+ io->bio = NULL;
+ io->last_block_in_bio = 0;
+ spin_lock_init(&io->io_lock);
+ INIT_LIST_HEAD(&io->io_list);
+ INIT_LIST_HEAD(&io->bio_list);
+ init_f2fs_rwsem(&io->bio_list_lock);
#ifdef CONFIG_BLK_DEV_ZONED
- init_completion(&sbi->write_io[i][j].zone_wait);
- sbi->write_io[i][j].zone_pending_bio = NULL;
- sbi->write_io[i][j].bi_private = NULL;
+ init_completion(&io->zone_wait);
+ io->zone_pending_bio = NULL;
+ io->bi_private = NULL;
#endif
}
}
@@ -690,10 +694,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
fio->is_por ? META_POR : (__is_meta_io(fio) ?
- META_GENERIC : DATA_GENERIC_ENHANCE))) {
- f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
+ META_GENERIC : DATA_GENERIC_ENHANCE)))
return -EFSCORRUPTED;
- }
trace_f2fs_submit_page_bio(page, fio);
@@ -701,7 +703,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
bio = __bio_alloc(fio, 1);
f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
- fio->page->index, fio, GFP_NOIO);
+ page_folio(fio->page)->index, fio, GFP_NOIO);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
@@ -709,7 +711,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
}
if (fio->io_wbc && !is_read_io(fio->op))
- wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
+ PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
__read_io_type(page) : WB_DATA_TYPE(fio->page, false));
@@ -800,7 +803,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
fio->new_blkaddr));
if (f2fs_crypt_mergeable_bio(*bio,
fio->page->mapping->host,
- fio->page->index, fio) &&
+ page_folio(fio->page)->index, fio) &&
bio_add_page(*bio, page, PAGE_SIZE, 0) ==
PAGE_SIZE) {
ret = 0;
@@ -888,10 +891,8 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
fio->encrypted_page : fio->page;
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
- __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) {
- f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
+ __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
return -EFSCORRUPTED;
- }
trace_f2fs_submit_page_bio(page, fio);
@@ -902,7 +903,7 @@ alloc_new:
if (!bio) {
bio = __bio_alloc(fio, BIO_MAX_VECS);
f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
- fio->page->index, fio, GFP_NOIO);
+ page_folio(fio->page)->index, fio, GFP_NOIO);
add_bio_entry(fio->sbi, bio, page, fio->temp);
} else {
@@ -911,7 +912,8 @@ alloc_new:
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
+ PAGE_SIZE);
inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
@@ -995,13 +997,13 @@ next:
(!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
fio->new_blkaddr) ||
!f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
- bio_page->index, fio)))
+ page_folio(bio_page)->index, fio)))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
io->bio = __bio_alloc(fio, BIO_MAX_VECS);
f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
- bio_page->index, fio, GFP_NOIO);
+ page_folio(bio_page)->index, fio, GFP_NOIO);
io->fio = *fio;
}
@@ -1011,7 +1013,8 @@ alloc_new:
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
+ PAGE_SIZE);
io->last_block_in_bio = fio->new_blkaddr;
@@ -1086,7 +1089,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
}
/* This can handle encryption stuffs */
-static int f2fs_submit_page_read(struct inode *inode, struct page *page,
+static int f2fs_submit_page_read(struct inode *inode, struct folio *folio,
block_t blkaddr, blk_opf_t op_flags,
bool for_write)
{
@@ -1094,14 +1097,14 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
struct bio *bio;
bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
- page->index, for_write);
+ folio->index, for_write);
if (IS_ERR(bio))
return PTR_ERR(bio);
/* wait for GCed page writeback via META_MAPPING */
f2fs_wait_on_block_writeback(inode, blkaddr);
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
+ if (!bio_add_folio(bio, folio, PAGE_SIZE, 0)) {
iostat_update_and_unbind_ctx(bio);
if (bio->bi_private)
mempool_free(bio->bi_private, bio_post_read_ctx_pool);
@@ -1221,8 +1224,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(F2FS_I_SB(inode),
- ERROR_INVALID_BLKADDR);
goto put_err;
}
goto got_it;
@@ -1248,8 +1249,6 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
dn.data_blkaddr,
DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(F2FS_I_SB(inode),
- ERROR_INVALID_BLKADDR);
goto put_err;
}
got_it:
@@ -1273,7 +1272,7 @@ got_it:
return page;
}
- err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
+ err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr,
op_flags, for_write);
if (err)
goto put_err;
@@ -1418,8 +1417,11 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
old_blkaddr = dn->data_blkaddr;
- f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
- &sum, seg_type, NULL);
+ err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
+ &dn->data_blkaddr, &sum, seg_type, NULL);
+ if (err)
+ return err;
+
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
f2fs_invalidate_internal_cache(sbi, old_blkaddr);
@@ -1514,6 +1516,25 @@ static bool f2fs_map_blocks_cached(struct inode *inode,
return true;
}
+static bool map_is_mergeable(struct f2fs_sb_info *sbi,
+ struct f2fs_map_blocks *map,
+ block_t blkaddr, int flag, int bidx,
+ int ofs)
+{
+ if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
+ return false;
+ if (map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs))
+ return true;
+ if (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR)
+ return true;
+ if (flag == F2FS_GET_BLOCK_PRE_DIO)
+ return true;
+ if (flag == F2FS_GET_BLOCK_DIO &&
+ map->m_pblk == NULL_ADDR && blkaddr == NULL_ADDR)
+ return true;
+ return false;
+}
+
/*
* f2fs_map_blocks() tries to find or build mapping relationship which
* maps continuous logical blocks to physical blocks, and return such
@@ -1552,8 +1573,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
end = pgofs + maxblocks;
next_dnode:
- if (map->m_may_create)
+ if (map->m_may_create) {
+ if (f2fs_lfs_mode(sbi))
+ f2fs_balance_fs(sbi, true);
f2fs_map_lock(sbi, flag);
+ }
/* When reading holes, we need its node page */
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -1577,13 +1601,13 @@ next_block:
if (!is_hole &&
!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto sync_out;
}
/* use out-place-update for direct IO under LFS mode */
- if (map->m_may_create &&
- (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
+ if (map->m_may_create && (is_hole ||
+ (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
+ !f2fs_is_pinned_file(inode)))) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
goto sync_out;
@@ -1636,6 +1660,10 @@ next_block:
goto sync_out;
}
break;
+ case F2FS_GET_BLOCK_DIO:
+ if (map->m_next_pgofs)
+ *map->m_next_pgofs = pgofs + 1;
+ break;
default:
/* for defragment case */
if (map->m_next_pgofs)
@@ -1654,19 +1682,16 @@ next_block:
/* reserved delalloc block should be mapped for fiemap. */
if (blkaddr == NEW_ADDR)
map->m_flags |= F2FS_MAP_DELALLOC;
- map->m_flags |= F2FS_MAP_MAPPED;
+ /* DIO READ and hole case, should not map the blocks. */
+ if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create))
+ map->m_flags |= F2FS_MAP_MAPPED;
map->m_pblk = blkaddr;
map->m_len = 1;
if (map->m_multidev_dio)
map->m_bdev = FDEV(bidx).bdev;
- } else if ((map->m_pblk != NEW_ADDR &&
- blkaddr == (map->m_pblk + ofs)) ||
- (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
- flag == F2FS_GET_BLOCK_PRE_DIO) {
- if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
- goto sync_out;
+ } else if (map_is_mergeable(sbi, map, blkaddr, flag, bidx, ofs)) {
ofs++;
map->m_len++;
} else {
@@ -1694,6 +1719,14 @@ skip:
dn.ofs_in_node = end_offset;
}
+ if (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
+ map->m_may_create) {
+ /* the next block to be allocated may not be contiguous. */
+ if (GET_SEGOFF_FROM_SEG0(sbi, blkaddr) % BLKS_PER_SEC(sbi) ==
+ CAP_BLKS_PER_SEC(sbi) - 1)
+ goto sync_out;
+ }
+
if (pgofs >= end)
goto sync_out;
else if (dn.ofs_in_node < end_offset)
@@ -1792,16 +1825,6 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
return true;
}
-static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
-{
- return (bytes >> inode->i_blkbits);
-}
-
-static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
-{
- return (blks << inode->i_blkbits);
-}
-
static int f2fs_xattr_fiemap(struct inode *inode,
struct fiemap_extent_info *fieinfo)
{
@@ -1827,7 +1850,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
return err;
}
- phys = blks_to_bytes(inode, ni.blk_addr);
+ phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
offset = offsetof(struct f2fs_inode, i_addr) +
sizeof(__le32) * (DEF_ADDRS_PER_INODE -
get_inline_xattr_addrs(inode));
@@ -1859,7 +1882,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
return err;
}
- phys = blks_to_bytes(inode, ni.blk_addr);
+ phys = F2FS_BLK_TO_BYTES(ni.blk_addr);
len = inode->i_sb->s_blocksize;
f2fs_put_page(page, 1);
@@ -1879,7 +1902,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
struct f2fs_map_blocks map;
- sector_t start_blk, last_blk;
+ sector_t start_blk, last_blk, blk_len, max_len;
pgoff_t next_pgofs;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
@@ -1899,9 +1922,9 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
if (ret)
return ret;
- inode_lock(inode);
+ inode_lock_shared(inode);
- maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
+ maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
if (start > maxbytes) {
ret = -EFBIG;
goto out;
@@ -1921,16 +1944,15 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
goto out;
}
- if (bytes_to_blks(inode, len) == 0)
- len = blks_to_bytes(inode, 1);
-
- start_blk = bytes_to_blks(inode, start);
- last_blk = bytes_to_blks(inode, start + len - 1);
+ start_blk = F2FS_BYTES_TO_BLK(start);
+ last_blk = F2FS_BYTES_TO_BLK(start + len - 1);
+ blk_len = last_blk - start_blk + 1;
+ max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk;
next:
memset(&map, 0, sizeof(map));
map.m_lblk = start_blk;
- map.m_len = bytes_to_blks(inode, len);
+ map.m_len = blk_len;
map.m_next_pgofs = &next_pgofs;
map.m_seg_type = NO_CHECK_TYPE;
@@ -1947,12 +1969,23 @@ next:
if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
start_blk = next_pgofs;
- if (blks_to_bytes(inode, start_blk) < maxbytes)
+ if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes)
goto prep_next;
flags |= FIEMAP_EXTENT_LAST;
}
+ /*
+ * current extent may cross boundary of inquiry, increase len to
+ * requery.
+ */
+ if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) &&
+ map.m_lblk + map.m_len - 1 == last_blk &&
+ blk_len != max_len) {
+ blk_len = max_len;
+ goto next;
+ }
+
compr_appended = false;
/* In a case of compressed cluster, append this to the last extent */
if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
@@ -1984,14 +2017,14 @@ skip_fill:
} else if (compr_appended) {
unsigned int appended_blks = cluster_size -
count_in_cluster + 1;
- size += blks_to_bytes(inode, appended_blks);
+ size += F2FS_BLK_TO_BYTES(appended_blks);
start_blk += appended_blks;
compr_cluster = false;
} else {
- logical = blks_to_bytes(inode, start_blk);
+ logical = F2FS_BLK_TO_BYTES(start_blk);
phys = __is_valid_data_blkaddr(map.m_pblk) ?
- blks_to_bytes(inode, map.m_pblk) : 0;
- size = blks_to_bytes(inode, map.m_len);
+ F2FS_BLK_TO_BYTES(map.m_pblk) : 0;
+ size = F2FS_BLK_TO_BYTES(map.m_len);
flags = 0;
if (compr_cluster) {
@@ -1999,13 +2032,13 @@ skip_fill:
count_in_cluster += map.m_len;
if (count_in_cluster == cluster_size) {
compr_cluster = false;
- size += blks_to_bytes(inode, 1);
+ size += F2FS_BLKSIZE;
}
} else if (map.m_flags & F2FS_MAP_DELALLOC) {
flags = FIEMAP_EXTENT_UNWRITTEN;
}
- start_blk += bytes_to_blks(inode, size);
+ start_blk += F2FS_BYTES_TO_BLK(size);
}
prep_next:
@@ -2018,37 +2051,43 @@ out:
if (ret == 1)
ret = 0;
- inode_unlock(inode);
+ inode_unlock_shared(inode);
return ret;
}
static inline loff_t f2fs_readpage_limit(struct inode *inode)
{
if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
- return inode->i_sb->s_maxbytes;
+ return F2FS_BLK_TO_BYTES(max_file_blocks(inode));
return i_size_read(inode);
}
-static int f2fs_read_single_page(struct inode *inode, struct page *page,
+static inline blk_opf_t f2fs_ra_op_flags(struct readahead_control *rac)
+{
+ return rac ? REQ_RAHEAD : 0;
+}
+
+static int f2fs_read_single_page(struct inode *inode, struct folio *folio,
unsigned nr_pages,
struct f2fs_map_blocks *map,
struct bio **bio_ret,
sector_t *last_block_in_bio,
- bool is_readahead)
+ struct readahead_control *rac)
{
struct bio *bio = *bio_ret;
- const unsigned blocksize = blks_to_bytes(inode, 1);
+ const unsigned int blocksize = F2FS_BLKSIZE;
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
sector_t block_nr;
+ pgoff_t index = folio_index(folio);
int ret = 0;
- block_in_file = (sector_t)page_index(page);
+ block_in_file = (sector_t)index;
last_block = block_in_file + nr_pages;
- last_block_in_file = bytes_to_blks(inode,
- f2fs_readpage_limit(inode) + blocksize - 1);
+ last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
+ blocksize - 1);
if (last_block > last_block_in_file)
last_block = last_block_in_file;
@@ -2076,26 +2115,24 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
got_it:
if ((map->m_flags & F2FS_MAP_MAPPED)) {
block_nr = map->m_pblk + block_in_file - map->m_lblk;
- SetPageMappedToDisk(page);
+ folio_set_mappedtodisk(folio);
if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
DATA_GENERIC_ENHANCE_READ)) {
ret = -EFSCORRUPTED;
- f2fs_handle_error(F2FS_I_SB(inode),
- ERROR_INVALID_BLKADDR);
goto out;
}
} else {
zero_out:
- zero_user_segment(page, 0, PAGE_SIZE);
- if (f2fs_need_verity(inode, page->index) &&
- !fsverity_verify_page(page)) {
+ folio_zero_segment(folio, 0, folio_size(folio));
+ if (f2fs_need_verity(inode, index) &&
+ !fsverity_verify_folio(folio)) {
ret = -EIO;
goto out;
}
- if (!PageUptodate(page))
- SetPageUptodate(page);
- unlock_page(page);
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
goto out;
}
@@ -2105,14 +2142,14 @@ zero_out:
*/
if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
*last_block_in_bio, block_nr) ||
- !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
+ !f2fs_crypt_mergeable_bio(bio, inode, index, NULL))) {
submit_and_realloc:
f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
bio = NULL;
}
if (bio == NULL) {
bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0, page->index,
+ f2fs_ra_op_flags(rac), index,
false);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
@@ -2127,7 +2164,7 @@ submit_and_realloc:
*/
f2fs_wait_on_block_writeback(inode, block_nr);
- if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ if (!bio_add_folio(bio, folio, blocksize, 0))
goto submit_and_realloc;
inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
@@ -2142,7 +2179,7 @@ out:
#ifdef CONFIG_F2FS_FS_COMPRESSION
int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
- bool is_readahead, bool for_write)
+ struct readahead_control *rac, bool for_write)
{
struct dnode_of_data dn;
struct inode *inode = cc->inode;
@@ -2150,7 +2187,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
struct bio *bio = *bio_ret;
unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
sector_t last_block_in_file;
- const unsigned blocksize = blks_to_bytes(inode, 1);
+ const unsigned int blocksize = F2FS_BLKSIZE;
struct decompress_io_ctx *dic = NULL;
struct extent_info ei = {};
bool from_dnode = true;
@@ -2159,25 +2196,28 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
- last_block_in_file = bytes_to_blks(inode,
- f2fs_readpage_limit(inode) + blocksize - 1);
+ last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) +
+ blocksize - 1);
/* get rid of pages beyond EOF */
for (i = 0; i < cc->cluster_size; i++) {
struct page *page = cc->rpages[i];
+ struct folio *folio;
if (!page)
continue;
- if ((sector_t)page->index >= last_block_in_file) {
- zero_user_segment(page, 0, PAGE_SIZE);
- if (!PageUptodate(page))
- SetPageUptodate(page);
- } else if (!PageUptodate(page)) {
+
+ folio = page_folio(page);
+ if ((sector_t)folio->index >= last_block_in_file) {
+ folio_zero_segment(folio, 0, folio_size(folio));
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
+ } else if (!folio_test_uptodate(folio)) {
continue;
}
- unlock_page(page);
+ folio_unlock(folio);
if (for_write)
- put_page(page);
+ folio_put(folio);
cc->rpages[i] = NULL;
cc->nr_rpages--;
}
@@ -2237,7 +2277,7 @@ skip_reading_dnode:
}
for (i = 0; i < cc->nr_cpages; i++) {
- struct page *page = dic->cpages[i];
+ struct folio *folio = page_folio(dic->cpages[i]);
block_t blkaddr;
struct bio_post_read_ctx *ctx;
@@ -2247,7 +2287,8 @@ skip_reading_dnode:
f2fs_wait_on_block_writeback(inode, blkaddr);
- if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
+ if (f2fs_load_compressed_page(sbi, folio_page(folio, 0),
+ blkaddr)) {
if (atomic_dec_and_test(&dic->remaining_pages)) {
f2fs_decompress_cluster(dic, true);
break;
@@ -2257,7 +2298,7 @@ skip_reading_dnode:
if (bio && (!page_is_mergeable(sbi, bio,
*last_block_in_bio, blkaddr) ||
- !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
+ !f2fs_crypt_mergeable_bio(bio, inode, folio->index, NULL))) {
submit_and_realloc:
f2fs_submit_read_bio(sbi, bio, DATA);
bio = NULL;
@@ -2265,8 +2306,8 @@ submit_and_realloc:
if (!bio) {
bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
- is_readahead ? REQ_RAHEAD : 0,
- page->index, for_write);
+ f2fs_ra_op_flags(rac),
+ folio->index, for_write);
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
f2fs_decompress_end_io(dic, ret, true);
@@ -2276,7 +2317,7 @@ submit_and_realloc:
}
}
- if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ if (!bio_add_folio(bio, folio, blocksize, 0))
goto submit_and_realloc;
ctx = get_post_read_ctx(bio);
@@ -2314,7 +2355,7 @@ out:
* Major change was from block_size == page_size in f2fs by default.
*/
static int f2fs_mpage_readpages(struct inode *inode,
- struct readahead_control *rac, struct page *page)
+ struct readahead_control *rac, struct folio *folio)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
@@ -2331,6 +2372,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
.nr_cpages = 0,
};
pgoff_t nc_cluster_idx = NULL_CLUSTER;
+ pgoff_t index;
#endif
unsigned nr_pages = rac ? readahead_count(rac) : 1;
unsigned max_nr_pages = nr_pages;
@@ -2347,64 +2389,63 @@ static int f2fs_mpage_readpages(struct inode *inode,
for (; nr_pages; nr_pages--) {
if (rac) {
- page = readahead_page(rac);
- prefetchw(&page->flags);
+ folio = readahead_folio(rac);
+ prefetchw(&folio->flags);
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (f2fs_compressed_file(inode)) {
- /* there are remained compressed pages, submit them */
- if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
- ret = f2fs_read_multi_pages(&cc, &bio,
- max_nr_pages,
- &last_block_in_bio,
- rac != NULL, false);
- f2fs_destroy_compress_ctx(&cc, false);
- if (ret)
- goto set_error_page;
- }
- if (cc.cluster_idx == NULL_CLUSTER) {
- if (nc_cluster_idx ==
- page->index >> cc.log_cluster_size) {
- goto read_single_page;
- }
-
- ret = f2fs_is_compressed_cluster(inode, page->index);
- if (ret < 0)
- goto set_error_page;
- else if (!ret) {
- nc_cluster_idx =
- page->index >> cc.log_cluster_size;
- goto read_single_page;
- }
-
- nc_cluster_idx = NULL_CLUSTER;
- }
- ret = f2fs_init_compress_ctx(&cc);
+ index = folio_index(folio);
+
+ if (!f2fs_compressed_file(inode))
+ goto read_single_page;
+
+ /* there are remained compressed pages, submit them */
+ if (!f2fs_cluster_can_merge_page(&cc, index)) {
+ ret = f2fs_read_multi_pages(&cc, &bio,
+ max_nr_pages,
+ &last_block_in_bio,
+ rac, false);
+ f2fs_destroy_compress_ctx(&cc, false);
if (ret)
goto set_error_page;
+ }
+ if (cc.cluster_idx == NULL_CLUSTER) {
+ if (nc_cluster_idx == index >> cc.log_cluster_size)
+ goto read_single_page;
- f2fs_compress_ctx_add_page(&cc, page);
+ ret = f2fs_is_compressed_cluster(inode, index);
+ if (ret < 0)
+ goto set_error_page;
+ else if (!ret) {
+ nc_cluster_idx =
+ index >> cc.log_cluster_size;
+ goto read_single_page;
+ }
- goto next_page;
+ nc_cluster_idx = NULL_CLUSTER;
}
+ ret = f2fs_init_compress_ctx(&cc);
+ if (ret)
+ goto set_error_page;
+
+ f2fs_compress_ctx_add_page(&cc, folio);
+
+ goto next_page;
read_single_page:
#endif
- ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
+ ret = f2fs_read_single_page(inode, folio, max_nr_pages, &map,
&bio, &last_block_in_bio, rac);
if (ret) {
#ifdef CONFIG_F2FS_FS_COMPRESSION
set_error_page:
#endif
- zero_user_segment(page, 0, PAGE_SIZE);
- unlock_page(page);
+ folio_zero_segment(folio, 0, folio_size(folio));
+ folio_unlock(folio);
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
next_page:
#endif
- if (rac)
- put_page(page);
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_compressed_file(inode)) {
@@ -2413,7 +2454,7 @@ next_page:
ret = f2fs_read_multi_pages(&cc, &bio,
max_nr_pages,
&last_block_in_bio,
- rac != NULL, false);
+ rac, false);
f2fs_destroy_compress_ctx(&cc, false);
}
}
@@ -2426,22 +2467,21 @@ next_page:
static int f2fs_read_data_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = folio_file_mapping(folio)->host;
int ret = -EAGAIN;
- trace_f2fs_readpage(page, DATA);
+ trace_f2fs_readpage(folio, DATA);
if (!f2fs_is_compress_backend_ready(inode)) {
- unlock_page(page);
+ folio_unlock(folio);
return -EOPNOTSUPP;
}
/* If the file has inline data, try to read it directly */
if (f2fs_has_inline_data(inode))
- ret = f2fs_read_inline_data(inode, page);
+ ret = f2fs_read_inline_data(inode, folio);
if (ret == -EAGAIN)
- ret = f2fs_mpage_readpages(inode, NULL, page);
+ ret = f2fs_mpage_readpages(inode, NULL, folio);
return ret;
}
@@ -2568,6 +2608,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
return true;
if (f2fs_used_in_atomic_write(inode))
return true;
+ /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
+ if (f2fs_compressed_file(inode) &&
+ F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
+ is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
+ return true;
/* swap file is migrating in aligned write mode */
if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
@@ -2598,8 +2643,8 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio)
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
{
- struct page *page = fio->page;
- struct inode *inode = page->mapping->host;
+ struct folio *folio = page_folio(fio->page);
+ struct inode *inode = folio->mapping->host;
struct dnode_of_data dn;
struct node_info ni;
bool ipu_force = false;
@@ -2608,21 +2653,18 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
/* Use COW inode to make dnode_of_data for atomic write */
atomic_commit = f2fs_is_atomic_file(inode) &&
- page_private_atomic(fio->page);
+ page_private_atomic(folio_page(folio, 0));
if (atomic_commit)
set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
else
set_new_dnode(&dn, inode, NULL, NULL, 0);
if (need_inplace_update(fio) &&
- f2fs_lookup_read_extent_cache_block(inode, page->index,
+ f2fs_lookup_read_extent_cache_block(inode, folio->index,
&fio->old_blkaddr)) {
if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
- DATA_GENERIC_ENHANCE)) {
- f2fs_handle_error(fio->sbi,
- ERROR_INVALID_BLKADDR);
+ DATA_GENERIC_ENHANCE))
return -EFSCORRUPTED;
- }
ipu_force = true;
fio->need_lock = LOCK_DONE;
@@ -2633,7 +2675,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
return -EAGAIN;
- err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
if (err)
goto out;
@@ -2641,8 +2683,8 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
/* This page is already truncated */
if (fio->old_blkaddr == NULL_ADDR) {
- ClearPageUptodate(page);
- clear_page_private_gcing(page);
+ folio_clear_uptodate(folio);
+ clear_page_private_gcing(folio_page(folio, 0));
goto out_writepage;
}
got_it:
@@ -2650,7 +2692,6 @@ got_it:
!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
DATA_GENERIC_ENHANCE)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR);
goto out_writepage;
}
@@ -2669,7 +2710,7 @@ got_it:
if (err)
goto out_writepage;
- set_page_writeback(page);
+ folio_start_writeback(folio);
f2fs_put_dnode(&dn);
if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
@@ -2677,12 +2718,11 @@ got_it:
if (err) {
if (fscrypt_inode_uses_fs_layer_crypto(inode))
fscrypt_finalize_bounce_page(&fio->encrypted_page);
- if (PageWriteback(page))
- end_page_writeback(page);
+ folio_end_writeback(folio);
} else {
set_inode_flag(inode, FI_UPDATE_WRITE);
}
- trace_f2fs_do_write_data_page(fio->page, IPU);
+ trace_f2fs_do_write_data_page(folio, IPU);
return err;
}
@@ -2704,17 +2744,17 @@ got_it:
if (err)
goto out_writepage;
- set_page_writeback(page);
+ folio_start_writeback(folio);
if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
/* LFS mode write path */
f2fs_outplace_write_data(&dn, fio);
- trace_f2fs_do_write_data_page(page, OPU);
+ trace_f2fs_do_write_data_page(folio, OPU);
set_inode_flag(inode, FI_APPEND_WRITE);
if (atomic_commit)
- clear_page_private_atomic(page);
+ clear_page_private_atomic(folio_page(folio, 0));
out_writepage:
f2fs_put_dnode(&dn);
out:
@@ -2723,7 +2763,7 @@ out:
return err;
}
-int f2fs_write_single_data_page(struct page *page, int *submitted,
+int f2fs_write_single_data_page(struct folio *folio, int *submitted,
struct bio **bio,
sector_t *last_block,
struct writeback_control *wbc,
@@ -2731,12 +2771,13 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
int compr_blocks,
bool allow_balance)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
+ struct page *page = folio_page(folio, 0);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long)i_size)
>> PAGE_SHIFT;
- loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
+ loff_t psize = (loff_t)(folio->index + 1) << PAGE_SHIFT;
unsigned offset = 0;
bool need_balance_fs = false;
bool quota_inode = IS_NOQUOTA(inode);
@@ -2760,11 +2801,11 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
.last_block = last_block,
};
- trace_f2fs_writepage(page, DATA);
+ trace_f2fs_writepage(folio, DATA);
/* we should bypass data pages to proceed the kworker jobs */
if (unlikely(f2fs_cp_error(sbi))) {
- mapping_set_error(page->mapping, -EIO);
+ mapping_set_error(folio->mapping, -EIO);
/*
* don't drop any dirty dentry pages for keeping lastest
* directory structure.
@@ -2782,7 +2823,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
- if (page->index < end_index ||
+ if (folio->index < end_index ||
f2fs_verity_in_progress(inode) ||
compr_blocks)
goto write;
@@ -2792,10 +2833,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
* this page does not have to be written to disk.
*/
offset = i_size & (PAGE_SIZE - 1);
- if ((page->index >= end_index + 1) || !offset)
+ if ((folio->index >= end_index + 1) || !offset)
goto out;
- zero_user_segment(page, offset, PAGE_SIZE);
+ folio_zero_segment(folio, offset, folio_size(folio));
write:
/* Dentry/quota blocks are controlled by checkpoint */
if (S_ISDIR(inode->i_mode) || quota_inode) {
@@ -2825,7 +2866,7 @@ write:
err = -EAGAIN;
if (f2fs_has_inline_data(inode)) {
- err = f2fs_write_inline_data(inode, page);
+ err = f2fs_write_inline_data(inode, folio);
if (!err)
goto out;
}
@@ -2855,7 +2896,7 @@ done:
out:
inode_dec_dirty_pages(inode);
if (err) {
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
clear_page_private_gcing(page);
}
@@ -2865,7 +2906,7 @@ out:
f2fs_remove_dirty_inode(inode);
submitted = NULL;
}
- unlock_page(page);
+ folio_unlock(folio);
if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
!F2FS_I(inode)->wb_task && allow_balance)
f2fs_balance_fs(sbi, need_balance_fs);
@@ -2883,7 +2924,7 @@ out:
return 0;
redirty_out:
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
/*
* pageout() in MM translates EAGAIN, so calls handle_write_error()
* -> mapping_set_error() -> set_bit(AS_EIO, ...).
@@ -2892,29 +2933,30 @@ redirty_out:
*/
if (!err || wbc->for_reclaim)
return AOP_WRITEPAGE_ACTIVATE;
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
static int f2fs_write_data_page(struct page *page,
struct writeback_control *wbc)
{
+ struct folio *folio = page_folio(page);
#ifdef CONFIG_F2FS_FS_COMPRESSION
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
goto out;
if (f2fs_compressed_file(inode)) {
- if (f2fs_is_compressed_cluster(inode, page->index)) {
- redirty_page_for_writepage(wbc, page);
+ if (f2fs_is_compressed_cluster(inode, folio->index)) {
+ folio_redirty_for_writepage(wbc, folio);
return AOP_WRITEPAGE_ACTIVATE;
}
}
out:
#endif
- return f2fs_write_single_data_page(page, NULL, NULL, NULL,
+ return f2fs_write_single_data_page(folio, NULL, NULL, NULL,
wbc, FS_DATA_IO, 0, true);
}
@@ -3120,11 +3162,11 @@ continue_unlock:
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_compressed_file(inode)) {
folio_get(folio);
- f2fs_compress_ctx_add_page(&cc, &folio->page);
+ f2fs_compress_ctx_add_page(&cc, folio);
continue;
}
#endif
- ret = f2fs_write_single_data_page(&folio->page,
+ ret = f2fs_write_single_data_page(folio,
&submitted, &bio, &last_block,
wbc, io_type, 0, true);
if (ret == AOP_WRITEPAGE_ACTIVATE)
@@ -3332,11 +3374,11 @@ void f2fs_write_failed(struct inode *inode, loff_t to)
}
static int prepare_write_begin(struct f2fs_sb_info *sbi,
- struct page *page, loff_t pos, unsigned len,
+ struct folio *folio, loff_t pos, unsigned int len,
block_t *blk_addr, bool *node_changed)
{
- struct inode *inode = page->mapping->host;
- pgoff_t index = page->index;
+ struct inode *inode = folio->mapping->host;
+ pgoff_t index = folio->index;
struct dnode_of_data dn;
struct page *ipage;
bool locked = false;
@@ -3373,13 +3415,13 @@ restart:
if (f2fs_has_inline_data(inode)) {
if (pos + len <= MAX_INLINE_DATA(inode)) {
- f2fs_do_read_inline_data(page, ipage);
+ f2fs_do_read_inline_data(folio, ipage);
set_inode_flag(inode, FI_DATA_EXIST);
if (inode->i_nlink)
set_page_private_inline(ipage);
goto out;
}
- err = f2fs_convert_inline_page(&dn, page);
+ err = f2fs_convert_inline_page(&dn, folio_page(folio, 0));
if (err || dn.data_blkaddr != NULL_ADDR)
goto out;
}
@@ -3472,12 +3514,12 @@ unlock_out:
}
static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
- struct page *page, loff_t pos, unsigned int len,
+ struct folio *folio, loff_t pos, unsigned int len,
block_t *blk_addr, bool *node_changed, bool *use_cow)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct inode *cow_inode = F2FS_I(inode)->cow_inode;
- pgoff_t index = page->index;
+ pgoff_t index = folio->index;
int err = 0;
block_t ori_blk_addr = NULL_ADDR;
@@ -3515,12 +3557,12 @@ reserve_block:
}
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, struct page **pagep, void **fsdata)
+ loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct page *page = NULL;
- pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
+ struct folio *folio;
+ pgoff_t index = pos >> PAGE_SHIFT;
bool need_balance = false;
bool use_cow = false;
block_t blkaddr = NULL_ADDR;
@@ -3536,7 +3578,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
/*
* We should check this at this moment to avoid deadlock on inode page
* and #0 page. The locking rule for inline_data conversion should be:
- * lock_page(page #0) -> lock_page(inode_page)
+ * folio_lock(folio #0) -> folio_lock(inode_page)
*/
if (index != 0) {
err = f2fs_convert_inline_inode(inode);
@@ -3547,18 +3589,20 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_compressed_file(inode)) {
int ret;
+ struct page *page;
*fsdata = NULL;
if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
goto repeat;
- ret = f2fs_prepare_compress_overwrite(inode, pagep,
+ ret = f2fs_prepare_compress_overwrite(inode, &page,
index, fsdata);
if (ret < 0) {
err = ret;
goto fail;
} else if (ret) {
+ *foliop = page_folio(page);
return 0;
}
}
@@ -3566,82 +3610,85 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
repeat:
/*
- * Do not use grab_cache_page_write_begin() to avoid deadlock due to
- * wait_for_stable_page. Will wait that below with our IO control.
+ * Do not use FGP_STABLE to avoid deadlock.
+ * Will wait that below with our IO control.
*/
- page = f2fs_pagecache_get_page(mapping, index,
+ folio = __filemap_get_folio(mapping, index,
FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
- if (!page) {
- err = -ENOMEM;
+ if (IS_ERR(folio)) {
+ err = PTR_ERR(folio);
goto fail;
}
/* TODO: cluster can be compressed due to race with .writepage */
- *pagep = page;
+ *foliop = folio;
if (f2fs_is_atomic_file(inode))
- err = prepare_atomic_write_begin(sbi, page, pos, len,
+ err = prepare_atomic_write_begin(sbi, folio, pos, len,
&blkaddr, &need_balance, &use_cow);
else
- err = prepare_write_begin(sbi, page, pos, len,
+ err = prepare_write_begin(sbi, folio, pos, len,
&blkaddr, &need_balance);
if (err)
- goto fail;
+ goto put_folio;
if (need_balance && !IS_NOQUOTA(inode) &&
has_not_enough_free_secs(sbi, 0, 0)) {
- unlock_page(page);
+ folio_unlock(folio);
f2fs_balance_fs(sbi, true);
- lock_page(page);
- if (page->mapping != mapping) {
- /* The page got truncated from under us */
- f2fs_put_page(page, 1);
+ folio_lock(folio);
+ if (folio->mapping != mapping) {
+ /* The folio got truncated from under us */
+ folio_unlock(folio);
+ folio_put(folio);
goto repeat;
}
}
- f2fs_wait_on_page_writeback(page, DATA, false, true);
+ f2fs_wait_on_page_writeback(&folio->page, DATA, false, true);
- if (len == PAGE_SIZE || PageUptodate(page))
+ if (len == folio_size(folio) || folio_test_uptodate(folio))
return 0;
if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
!f2fs_verity_in_progress(inode)) {
- zero_user_segment(page, len, PAGE_SIZE);
+ folio_zero_segment(folio, len, folio_size(folio));
return 0;
}
if (blkaddr == NEW_ADDR) {
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
+ folio_zero_segment(folio, 0, folio_size(folio));
+ folio_mark_uptodate(folio);
} else {
if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
DATA_GENERIC_ENHANCE_READ)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
- goto fail;
+ goto put_folio;
}
err = f2fs_submit_page_read(use_cow ?
- F2FS_I(inode)->cow_inode : inode, page,
- blkaddr, 0, true);
+ F2FS_I(inode)->cow_inode : inode,
+ folio, blkaddr, 0, true);
if (err)
- goto fail;
+ goto put_folio;
- lock_page(page);
- if (unlikely(page->mapping != mapping)) {
- f2fs_put_page(page, 1);
+ folio_lock(folio);
+ if (unlikely(folio->mapping != mapping)) {
+ folio_unlock(folio);
+ folio_put(folio);
goto repeat;
}
- if (unlikely(!PageUptodate(page))) {
+ if (unlikely(!folio_test_uptodate(folio))) {
err = -EIO;
- goto fail;
+ goto put_folio;
}
}
return 0;
+put_folio:
+ folio_unlock(folio);
+ folio_put(folio);
fail:
- f2fs_put_page(page, 1);
f2fs_write_failed(inode, pos + len);
return err;
}
@@ -3649,9 +3696,9 @@ fail:
static int f2fs_write_end(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
trace_f2fs_write_end(inode, pos, len, copied);
@@ -3660,17 +3707,17 @@ static int f2fs_write_end(struct file *file,
* should be PAGE_SIZE. Otherwise, we treat it with zero copied and
* let generic_perform_write() try to copy data again through copied=0.
*/
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
if (unlikely(copied != len))
copied = 0;
else
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
/* overwrite compressed file */
if (f2fs_compressed_file(inode) && fsdata) {
- f2fs_compress_write_end(inode, fsdata, page->index, copied);
+ f2fs_compress_write_end(inode, fsdata, folio->index, copied);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
if (pos + copied > i_size_read(inode) &&
@@ -3683,10 +3730,10 @@ static int f2fs_write_end(struct file *file,
if (!copied)
goto unlock_out;
- set_page_dirty(page);
+ folio_mark_dirty(folio);
if (f2fs_is_atomic_file(inode))
- set_page_private_atomic(page);
+ set_page_private_atomic(folio_page(folio, 0));
if (pos + copied > i_size_read(inode) &&
!f2fs_verity_in_progress(inode)) {
@@ -3696,7 +3743,8 @@ static int f2fs_write_end(struct file *file,
pos + copied);
}
unlock_out:
- f2fs_put_page(page, 1);
+ folio_unlock(folio);
+ folio_put(folio);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
return copied;
}
@@ -3738,7 +3786,7 @@ static bool f2fs_dirty_data_folio(struct address_space *mapping,
{
struct inode *inode = mapping->host;
- trace_f2fs_set_page_dirty(&folio->page, DATA);
+ trace_f2fs_set_page_dirty(folio, DATA);
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
@@ -3823,13 +3871,14 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned int blkofs;
unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
+ unsigned int end_blk = start_blk + blkcnt - 1;
unsigned int secidx = start_blk / blk_per_sec;
unsigned int end_sec;
int ret = 0;
if (!blkcnt)
return 0;
- end_sec = secidx + (blkcnt - 1) / blk_per_sec;
+ end_sec = end_blk / blk_per_sec;
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
@@ -3839,7 +3888,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
for (; secidx <= end_sec; secidx++) {
unsigned int blkofs_end = secidx == end_sec ?
- (blkcnt - 1) % blk_per_sec : blk_per_sec - 1;
+ end_blk % blk_per_sec : blk_per_sec - 1;
f2fs_down_write(&sbi->pin_sem);
@@ -3909,7 +3958,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
* to be very smart.
*/
cur_lblock = 0;
- last_lblock = bytes_to_blks(inode, i_size_read(inode));
+ last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode));
while (cur_lblock < last_lblock && cur_lblock < sis->max) {
struct f2fs_map_blocks map;
@@ -3940,7 +3989,7 @@ retry:
if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec ||
nr_pblocks % blks_per_sec ||
- !f2fs_valid_pinned_area(sbi, pblock)) {
+ f2fs_is_sequential_zone_area(sbi, pblock)) {
bool last_extent = false;
not_aligned++;
@@ -3995,7 +4044,7 @@ retry:
sis->highest_bit = cur_lblock - 1;
out:
if (not_aligned)
- f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%u * N)",
+ f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
not_aligned, blks_per_sec * F2FS_BLKSIZE);
return ret;
}
@@ -4025,12 +4074,12 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (!f2fs_disable_compressed_file(inode))
return -EINVAL;
- f2fs_precache_extents(inode);
-
ret = filemap_fdatawrite(inode->i_mapping);
if (ret < 0)
return ret;
+ f2fs_precache_extents(inode);
+
ret = check_swap_activate(sis, file, span);
if (ret < 0)
return ret;
@@ -4076,13 +4125,13 @@ const struct address_space_operations f2fs_dblock_aops = {
.swap_deactivate = f2fs_swap_deactivate,
};
-void f2fs_clear_page_cache_dirty_tag(struct page *page)
+void f2fs_clear_page_cache_dirty_tag(struct folio *folio)
{
- struct address_space *mapping = page_mapping(page);
+ struct address_space *mapping = folio->mapping;
unsigned long flags;
xa_lock_irqsave(&mapping->i_pages, flags);
- __xa_clear_mark(&mapping->i_pages, page_index(page),
+ __xa_clear_mark(&mapping->i_pages, folio->index,
PAGECACHE_TAG_DIRTY);
xa_unlock_irqrestore(&mapping->i_pages, flags);
}
@@ -4152,10 +4201,11 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
pgoff_t next_pgofs = 0;
int err;
- map.m_lblk = bytes_to_blks(inode, offset);
- map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
+ map.m_lblk = F2FS_BYTES_TO_BLK(offset);
+ map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1;
map.m_next_pgofs = &next_pgofs;
- map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
+ inode->i_write_hint);
if (flags & IOMAP_WRITE)
map.m_may_create = true;
@@ -4163,7 +4213,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
if (err)
return err;
- iomap->offset = blks_to_bytes(inode, map.m_lblk);
+ iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk);
/*
* When inline encryption is enabled, sometimes I/O to an encrypted file
@@ -4176,23 +4226,32 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
* We should never see delalloc or compressed extents here based on
* prior flushing and checks.
*/
- if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
- return -EINVAL;
if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
return -EINVAL;
if (map.m_flags & F2FS_MAP_MAPPED) {
- iomap->length = blks_to_bytes(inode, map.m_len);
+ if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
+ return -EINVAL;
+
+ iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
iomap->type = IOMAP_MAPPED;
iomap->flags |= IOMAP_F_MERGED;
iomap->bdev = map.m_bdev;
- iomap->addr = blks_to_bytes(inode, map.m_pblk);
+ iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
} else {
if (flags & IOMAP_WRITE)
return -ENOTBLK;
- iomap->length = blks_to_bytes(inode, next_pgofs) -
- iomap->offset;
- iomap->type = IOMAP_HOLE;
+
+ if (map.m_pblk == NULL_ADDR) {
+ iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) -
+ iomap->offset;
+ iomap->type = IOMAP_HOLE;
+ } else if (map.m_pblk == NEW_ADDR) {
+ iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
+ iomap->type = IOMAP_UNWRITTEN;
+ } else {
+ f2fs_bug_on(F2FS_I_SB(inode), 1);
+ }
iomap->addr = IOMAP_NULL_ADDR;
}
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 0d02224b99b7..546b8ba91261 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -135,7 +135,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->cur_ckpt_time = sbi->cprc_info.cur_time;
si->peak_ckpt_time = sbi->cprc_info.peak_time;
spin_unlock(&sbi->cprc_info.stat_lock);
- si->total_count = (int)sbi->user_block_count / BLKS_PER_SEG(sbi);
+ si->total_count = BLKS_TO_SEGS(sbi, (int)sbi->user_block_count);
si->rsvd_segs = reserved_segments(sbi);
si->overp_segs = overprovision_segments(sbi);
si->valid_count = valid_user_blocks(sbi);
@@ -176,11 +176,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
si->io_skip_bggc = sbi->io_skip_bggc;
si->other_skip_bggc = sbi->other_skip_bggc;
- si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg)
+ si->util_free = (int)(BLKS_TO_SEGS(sbi, free_user_blocks(sbi)))
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
- si->util_valid = (int)(written_block_count(sbi) >>
- sbi->log_blocks_per_seg)
+ si->util_valid = (int)(BLKS_TO_SEGS(sbi, written_block_count(sbi)))
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
si->util_invalid = 50 - si->util_free - si->util_valid;
@@ -276,7 +275,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build nm */
si->base_mem += sizeof(struct f2fs_nm_info);
si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
- si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
+ si->base_mem += F2FS_BLK_TO_BYTES(NM_I(sbi)->nat_bits_blocks);
si->base_mem += NM_I(sbi)->nat_blocks *
f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK);
si->base_mem += NM_I(sbi)->nat_blocks / 8;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 166ec8942595..54dd52de7269 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -5,7 +5,7 @@
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched/signal.h>
@@ -42,35 +42,49 @@ static unsigned int bucket_blocks(unsigned int level)
return 4;
}
+#if IS_ENABLED(CONFIG_UNICODE)
/* If @dir is casefolded, initialize @fname->cf_name from @fname->usr_fname. */
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
-#if IS_ENABLED(CONFIG_UNICODE)
struct super_block *sb = dir->i_sb;
+ unsigned char *buf;
+ int len;
if (IS_CASEFOLDED(dir) &&
!is_dot_dotdot(fname->usr_fname->name, fname->usr_fname->len)) {
- fname->cf_name.name = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
- GFP_NOFS, false, F2FS_SB(sb));
- if (!fname->cf_name.name)
+ buf = f2fs_kmem_cache_alloc(f2fs_cf_name_slab,
+ GFP_NOFS, false, F2FS_SB(sb));
+ if (!buf)
return -ENOMEM;
- fname->cf_name.len = utf8_casefold(sb->s_encoding,
- fname->usr_fname,
- fname->cf_name.name,
- F2FS_NAME_LEN);
- if ((int)fname->cf_name.len <= 0) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
+
+ len = utf8_casefold(sb->s_encoding, fname->usr_fname,
+ buf, F2FS_NAME_LEN);
+ if (len <= 0) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
+ return 0;
}
+ fname->cf_name.name = buf;
+ fname->cf_name.len = len;
}
-#endif
+
return 0;
}
+void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+ unsigned char *buf = (unsigned char *)fname->cf_name.name;
+
+ if (buf) {
+ kmem_cache_free(f2fs_cf_name_slab, buf);
+ fname->cf_name.name = NULL;
+ }
+}
+#endif /* CONFIG_UNICODE */
+
static int __f2fs_setup_filename(const struct inode *dir,
const struct fscrypt_name *crypt_name,
struct f2fs_filename *fname)
@@ -142,12 +156,7 @@ void f2fs_free_filename(struct f2fs_filename *fname)
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
#endif
-#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
- }
-#endif
+ f2fs_free_casefolded_name(fname);
}
static unsigned long dir_block_index(unsigned int level,
@@ -166,7 +175,8 @@ static unsigned long dir_block_index(unsigned int level,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
const struct f2fs_filename *fname,
- int *max_slots)
+ int *max_slots,
+ bool use_hash)
{
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dentry_ptr d;
@@ -174,60 +184,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
- return f2fs_find_target_dentry(&d, fname, max_slots);
-}
-
-#if IS_ENABLED(CONFIG_UNICODE)
-/*
- * Test whether a case-insensitive directory entry matches the filename
- * being searched for.
- *
- * Returns 1 for a match, 0 for no match, and -errno on an error.
- */
-static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
- const u8 *de_name, u32 de_name_len)
-{
- const struct super_block *sb = dir->i_sb;
- const struct unicode_map *um = sb->s_encoding;
- struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
- struct qstr entry = QSTR_INIT(de_name, de_name_len);
- int res;
-
- if (IS_ENCRYPTED(dir)) {
- const struct fscrypt_str encrypted_name =
- FSTR_INIT((u8 *)de_name, de_name_len);
-
- if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
- return -EINVAL;
-
- decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
- if (!decrypted_name.name)
- return -ENOMEM;
- res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name,
- &decrypted_name);
- if (res < 0)
- goto out;
- entry.name = decrypted_name.name;
- entry.len = decrypted_name.len;
- }
-
- res = utf8_strncasecmp_folded(um, name, &entry);
- /*
- * In strict mode, ignore invalid names. In non-strict mode,
- * fall back to treating them as opaque byte sequences.
- */
- if (res < 0 && !sb_has_strict_encoding(sb)) {
- res = name->len == entry.len &&
- memcmp(name->name, entry.name, name->len) == 0;
- } else {
- /* utf8_strncasecmp_folded returns 0 on match */
- res = (res == 0);
- }
-out:
- kfree(decrypted_name.name);
- return res;
+ return f2fs_find_target_dentry(&d, fname, max_slots, use_hash);
}
-#endif /* CONFIG_UNICODE */
static inline int f2fs_match_name(const struct inode *dir,
const struct f2fs_filename *fname,
@@ -236,11 +194,11 @@ static inline int f2fs_match_name(const struct inode *dir,
struct fscrypt_name f;
#if IS_ENABLED(CONFIG_UNICODE)
- if (fname->cf_name.name) {
- struct qstr cf = FSTR_TO_QSTR(&fname->cf_name);
+ if (fname->cf_name.name)
+ return generic_ci_match(dir, fname->usr_fname,
+ &fname->cf_name,
+ de_name, de_name_len);
- return f2fs_match_ci_name(dir, &cf, de_name, de_name_len);
- }
#endif
f.usr_fname = fname->usr_fname;
f.disk_name = fname->disk_name;
@@ -251,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir,
}
struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
- const struct f2fs_filename *fname, int *max_slots)
+ const struct f2fs_filename *fname, int *max_slots,
+ bool use_hash)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
@@ -274,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
continue;
}
- if (de->hash_code == fname->hash) {
+ if (!use_hash || de->hash_code == fname->hash) {
res = f2fs_match_name(d->inode, fname,
d->filename[bit_pos],
le16_to_cpu(de->name_len));
@@ -301,11 +260,12 @@ found:
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level,
const struct f2fs_filename *fname,
- struct page **res_page)
+ struct page **res_page,
+ bool use_hash)
{
int s = GET_DENTRY_SLOTS(fname->disk_name.len);
unsigned int nbucket, nblock;
- unsigned int bidx, end_block;
+ unsigned int bidx, end_block, bucket_no;
struct page *dentry_page;
struct f2fs_dir_entry *de = NULL;
pgoff_t next_pgofs;
@@ -315,8 +275,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level);
nblock = bucket_blocks(level);
+ bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0;
+
+start_find_bucket:
bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level,
- le32_to_cpu(fname->hash) % nbucket);
+ bucket_no);
end_block = bidx + nblock;
while (bidx < end_block) {
@@ -333,7 +296,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
- de = find_in_block(dir, dentry_page, fname, &max_slots);
+ de = find_in_block(dir, dentry_page, fname, &max_slots, use_hash);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
de = NULL;
@@ -350,12 +313,18 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
bidx++;
}
- if (!de && room && F2FS_I(dir)->chash != fname->hash) {
- F2FS_I(dir)->chash = fname->hash;
- F2FS_I(dir)->clevel = level;
- }
+ if (de)
+ return de;
- return de;
+ if (likely(use_hash)) {
+ if (room && F2FS_I(dir)->chash != fname->hash) {
+ F2FS_I(dir)->chash = fname->hash;
+ F2FS_I(dir)->clevel = level;
+ }
+ } else if (++bucket_no < nbucket) {
+ goto start_find_bucket;
+ }
+ return NULL;
}
struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
@@ -366,11 +335,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
struct f2fs_dir_entry *de = NULL;
unsigned int max_depth;
unsigned int level;
+ bool use_hash = true;
*res_page = NULL;
+#if IS_ENABLED(CONFIG_UNICODE)
+start_find_entry:
+#endif
if (f2fs_has_inline_dentry(dir)) {
- de = f2fs_find_in_inline_dir(dir, fname, res_page);
+ de = f2fs_find_in_inline_dir(dir, fname, res_page, use_hash);
goto out;
}
@@ -386,11 +359,18 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
}
for (level = 0; level < max_depth; level++) {
- de = find_in_level(dir, level, fname, res_page);
+ de = find_in_level(dir, level, fname, res_page, use_hash);
if (de || IS_ERR(*res_page))
break;
}
+
out:
+#if IS_ENABLED(CONFIG_UNICODE)
+ if (IS_CASEFOLDED(dir) && !de && use_hash) {
+ use_hash = false;
+ goto start_find_entry;
+ }
+#endif
/* This is to increase the speed of f2fs_create */
if (!de)
F2FS_I(dir)->task = current;
@@ -456,7 +436,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
de->file_type = fs_umode_to_ftype(inode->i_mode);
set_page_dirty(page);
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
f2fs_mark_inode_dirty_sync(dir, false);
f2fs_put_page(page, 1);
}
@@ -610,7 +590,7 @@ void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
f2fs_i_links_write(dir, true);
clear_inode_flag(inode, FI_NEW_INODE);
}
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
f2fs_mark_inode_dirty_sync(dir, false);
if (F2FS_I(dir)->i_current_depth != current_depth)
@@ -885,6 +865,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
struct f2fs_dentry_block *dentry_blk;
unsigned int bit_pos;
int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len));
+ pgoff_t index = page_folio(page)->index;
int i;
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
@@ -910,8 +891,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
set_page_dirty(page);
if (bit_pos == NR_DENTRY_IN_BLOCK &&
- !f2fs_truncate_hole(dir, page->index, page->index + 1)) {
- f2fs_clear_page_cache_dirty_tag(page);
+ !f2fs_truncate_hole(dir, index, index + 1)) {
+ f2fs_clear_page_cache_dirty_tag(page_folio(page));
clear_page_dirty_for_io(page);
ClearPageUptodate(page);
clear_page_private_all(page);
@@ -921,7 +902,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
}
f2fs_put_page(page, 1);
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index bfa2d89dc9ea..2ccc86875099 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -33,7 +33,6 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
!f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
DATA_GENERIC_ENHANCE)) {
- set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
__func__, inode->i_ino,
ei.blk, ei.fofs, ei.len);
@@ -382,7 +381,7 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
struct extent_tree *et;
struct extent_node *en;
- struct extent_info ei;
+ struct extent_info ei = {0};
if (!__may_extent_tree(inode, EX_READ)) {
/* drop largest read extent */
@@ -675,7 +674,9 @@ static void __update_extent_tree_range(struct inode *inode,
}
if (end < org_end && (type != EX_READ ||
- org_end - end >= F2FS_MIN_EXTENT_LEN)) {
+ (org_end - end >= F2FS_MIN_EXTENT_LEN &&
+ atomic_read(&et->node_cnt) <
+ sbi->max_read_extent_count))) {
if (parts) {
__set_extent_info(&ei,
end, org_end - end,
@@ -871,10 +872,8 @@ static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
goto out;
if (__is_valid_data_blkaddr(blkaddr) &&
- !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
- f2fs_bug_on(sbi, 1);
+ !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
return -EINVAL;
- }
out:
/*
* init block age with zero, this can happen when the block age extent
@@ -1172,6 +1171,7 @@ void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
sbi->last_age_weight = LAST_AGE_WEIGHT;
+ sbi->max_read_extent_count = DEF_MAX_READ_EXTENT_COUNT;
}
int __init f2fs_create_extent_cache(void)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 33620642ae5e..2dec22f2ea63 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -11,7 +11,6 @@
#include <linux/uio.h>
#include <linux/types.h>
#include <linux/page-flags.h>
-#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/crc32.h>
#include <linux/magic.h>
@@ -24,6 +23,7 @@
#include <linux/blkdev.h>
#include <linux/quotaops.h>
#include <linux/part_stat.h>
+#include <linux/rw_hint.h>
#include <crypto/hash.h>
#include <linux/fscrypt.h>
@@ -60,7 +60,9 @@ enum {
FAULT_SLAB_ALLOC,
FAULT_DQUOT_INIT,
FAULT_LOCK_OP,
- FAULT_BLKADDR,
+ FAULT_BLKADDR_VALIDITY,
+ FAULT_BLKADDR_CONSISTENCE,
+ FAULT_NO_SEGMENT,
FAULT_MAX,
};
@@ -131,6 +133,12 @@ typedef u32 nid_t;
#define COMPRESS_EXT_NUM 16
+enum blkzone_allocation_policy {
+ BLKZONE_ALLOC_PRIOR_SEQ, /* Prioritize writing to sequential zones */
+ BLKZONE_ALLOC_ONLY_SEQ, /* Only allow writing to sequential zones */
+ BLKZONE_ALLOC_PRIOR_CONV, /* Prioritize writing to conventional zones */
+};
+
/*
* An implementation of an rwsem that is explicitly unfair to readers. This
* prevents priority inversion when a low-priority reader acquires the read lock
@@ -379,6 +387,12 @@ enum {
MAX_DPOLICY,
};
+enum {
+ DPOLICY_IO_AWARE_DISABLE, /* force to not be aware of IO */
+ DPOLICY_IO_AWARE_ENABLE, /* force to be aware of IO */
+ DPOLICY_IO_AWARE_MAX,
+};
+
struct discard_policy {
int type; /* type of discard */
unsigned int min_interval; /* used for candidates exist */
@@ -411,6 +425,7 @@ struct discard_cmd_control {
unsigned int discard_urgent_util; /* utilization which issue discard proactively */
unsigned int discard_granularity; /* discard granularity */
unsigned int max_ordered_discard; /* maximum discard granularity issued by lba order */
+ unsigned int discard_io_aware; /* io_aware policy */
unsigned int undiscard_blks; /* # of undiscard blocks */
unsigned int next_pos; /* next discard position */
atomic_t issued_discard; /* # of issued discard */
@@ -522,7 +537,7 @@ struct f2fs_filename {
* internal operation where usr_fname is also NULL. In all these cases
* we fall back to treating the name as an opaque byte sequence.
*/
- struct fscrypt_str cf_name;
+ struct qstr cf_name;
#endif
};
@@ -619,6 +634,9 @@ enum {
#define DEF_HOT_DATA_AGE_THRESHOLD 262144
#define DEF_WARM_DATA_AGE_THRESHOLD 2621440
+/* default max read extent count per inode */
+#define DEF_MAX_READ_EXTENT_COUNT 10240
+
/* extent cache type */
enum extent_type {
EX_READ,
@@ -756,11 +774,6 @@ enum {
#define DEF_DIR_LEVEL 0
-enum {
- GC_FAILURE_PIN,
- MAX_GC_FAILURE
-};
-
/* used for f2fs_inode_info->flags */
enum {
FI_NEW_INODE, /* indicate newly allocated inode */
@@ -808,9 +821,10 @@ struct f2fs_inode_info {
unsigned long i_flags; /* keep an inode flags for ioctl */
unsigned char i_advise; /* use to give file attribute hints */
unsigned char i_dir_level; /* use for dentry level for large dir */
- unsigned int i_current_depth; /* only for directory depth */
- /* for gc failure statistic */
- unsigned int i_gc_failures[MAX_GC_FAILURE];
+ union {
+ unsigned int i_current_depth; /* only for directory depth */
+ unsigned short i_gc_failures; /* for gc failure statistic */
+ };
unsigned int i_pino; /* parent inode number */
umode_t i_acl_mode; /* keep file acl mode temporarily */
@@ -1244,8 +1258,9 @@ struct f2fs_bio_info {
#define FDEV(i) (sbi->devs[i])
#define RDEV(i) (raw_super->devs[i])
struct f2fs_dev_info {
+ struct file *bdev_file;
struct block_device *bdev;
- char path[MAX_PATH_LEN];
+ char path[MAX_PATH_LEN + 1];
unsigned int total_segments;
block_t start_blk;
block_t end_blk;
@@ -1288,6 +1303,7 @@ struct f2fs_gc_control {
bool no_bg_gc; /* check the space and stop bg_gc */
bool should_migrate_blocks; /* should migrate blocks */
bool err_gc_skipped; /* return EAGAIN if GC skipped */
+ bool one_time; /* require one time GC in one migration unit */
unsigned int nr_free_secs; /* # of free sections to do GC */
};
@@ -1555,6 +1571,9 @@ struct f2fs_sb_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
+ unsigned int max_open_zones; /* max open zone resources of the zoned device */
+ /* For adjust the priority writing position of data in zone UFS */
+ unsigned int blkzone_alloc_policy;
#endif
/* for node-related operations */
@@ -1603,6 +1622,7 @@ struct f2fs_sb_info {
/* for extent tree cache */
struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
atomic64_t allocated_data_blocks; /* for block age extent_cache */
+ unsigned int max_read_extent_count; /* max read extent count per inode */
/* The threshold used for hot and warm data seperation*/
unsigned int hot_data_age_threshold;
@@ -1674,13 +1694,15 @@ struct f2fs_sb_info {
unsigned long long skipped_gc_rwsem; /* FG_GC only */
/* threshold for gc trials on pinned files */
- u64 gc_pin_file_threshold;
+ unsigned short gc_pin_file_threshold;
struct f2fs_rwsem pin_sem;
/* maximum # of trials to find a victim segment for SSR and GC */
unsigned int max_victim_search;
/* migration granularity of garbage collection, unit: segment */
unsigned int migration_granularity;
+ /* migration window granularity of garbage collection, unit: segment */
+ unsigned int migration_window_granularity;
/*
* for stat information.
@@ -1740,6 +1762,7 @@ struct f2fs_sb_info {
unsigned int dirty_device; /* for checkpoint data flush */
spinlock_t dev_lock; /* protect dirty_device */
bool aligned_blksize; /* all devices has the same logical blksize */
+ unsigned int first_seq_zone_segno; /* first segno in sequential zone */
/* For write statistics */
u64 sectors_written_start;
@@ -1814,12 +1837,14 @@ struct f2fs_sb_info {
};
/* Definitions to access f2fs_sb_info */
-#define BLKS_PER_SEG(sbi) \
- ((sbi)->blocks_per_seg)
-#define BLKS_PER_SEC(sbi) \
- ((sbi)->segs_per_sec << (sbi)->log_blocks_per_seg)
-#define SEGS_PER_SEC(sbi) \
- ((sbi)->segs_per_sec)
+#define SEGS_TO_BLKS(sbi, segs) \
+ ((segs) << (sbi)->log_blocks_per_seg)
+#define BLKS_TO_SEGS(sbi, blks) \
+ ((blks) >> (sbi)->log_blocks_per_seg)
+
+#define BLKS_PER_SEG(sbi) ((sbi)->blocks_per_seg)
+#define BLKS_PER_SEC(sbi) (SEGS_TO_BLKS(sbi, (sbi)->segs_per_sec))
+#define SEGS_PER_SEC(sbi) ((sbi)->segs_per_sec)
__printf(3, 4)
void f2fs_printk(struct f2fs_sb_info *sbi, bool limit_rate, const char *fmt, ...);
@@ -1988,6 +2013,16 @@ static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi)
return (struct f2fs_super_block *)(sbi->raw_super);
}
+static inline struct f2fs_super_block *F2FS_SUPER_BLOCK(struct folio *folio,
+ pgoff_t index)
+{
+ pgoff_t idx_in_folio = index % (1 << folio_order(folio));
+
+ return (struct f2fs_super_block *)
+ (page_address(folio_page(folio, idx_in_folio)) +
+ F2FS_SUPER_OFFSET);
+}
+
static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi)
{
return (struct f2fs_checkpoint *)(sbi->ckpt);
@@ -2197,6 +2232,36 @@ static inline void f2fs_up_write(struct f2fs_rwsem *sem)
#endif
}
+static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock)
+{
+ unsigned long flags;
+ unsigned char *nat_bits;
+
+ /*
+ * In order to re-enable nat_bits we need to call fsck.f2fs by
+ * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost,
+ * so let's rely on regular fsck or unclean shutdown.
+ */
+
+ if (lock)
+ spin_lock_irqsave(&sbi->cp_lock, flags);
+ __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
+ nat_bits = NM_I(sbi)->nat_bits;
+ NM_I(sbi)->nat_bits = NULL;
+ if (lock)
+ spin_unlock_irqrestore(&sbi->cp_lock, flags);
+
+ kvfree(nat_bits);
+}
+
+static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi,
+ struct cp_control *cpc)
+{
+ bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
+
+ return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set;
+}
+
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
f2fs_down_read(&sbi->cp_rwsem);
@@ -2444,8 +2509,14 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi,
blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK;
spin_lock(&sbi->stat_lock);
- f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count);
- sbi->total_valid_block_count -= (block_t)count;
+ if (unlikely(sbi->total_valid_block_count < count)) {
+ f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u",
+ sbi->total_valid_block_count, inode->i_ino, count);
+ sbi->total_valid_block_count = 0;
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ } else {
+ sbi->total_valid_block_count -= count;
+ }
if (sbi->reserved_blocks &&
sbi->current_reserved_blocks < sbi->reserved_blocks)
sbi->current_reserved_blocks = min(sbi->reserved_blocks,
@@ -2852,13 +2923,26 @@ static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type)
return false;
}
+static inline bool is_inflight_read_io(struct f2fs_sb_info *sbi)
+{
+ return get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_DIO_READ);
+}
+
static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
{
+ bool zoned_gc = (type == GC_TIME &&
+ F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_BLKZONED));
+
if (sbi->gc_mode == GC_URGENT_HIGH)
return true;
- if (is_inflight_io(sbi, type))
- return false;
+ if (zoned_gc) {
+ if (is_inflight_read_io(sbi))
+ return false;
+ } else {
+ if (is_inflight_io(sbi, type))
+ return false;
+ }
if (sbi->gc_mode == GC_URGENT_MID)
return true;
@@ -2867,6 +2951,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
(type == DISCARD_TIME || type == GC_TIME))
return true;
+ if (zoned_gc)
+ return true;
+
return f2fs_time_over(sbi, type);
}
@@ -2898,26 +2985,27 @@ static inline __le32 *blkaddr_in_node(struct f2fs_node *node)
}
static inline int f2fs_has_extra_attr(struct inode *inode);
-static inline block_t data_blkaddr(struct inode *inode,
- struct page *node_page, unsigned int offset)
+static inline unsigned int get_dnode_base(struct inode *inode,
+ struct page *node_page)
{
- struct f2fs_node *raw_node;
- __le32 *addr_array;
- int base = 0;
- bool is_inode = IS_INODE(node_page);
+ if (!IS_INODE(node_page))
+ return 0;
- raw_node = F2FS_NODE(node_page);
+ return inode ? get_extra_isize(inode) :
+ offset_in_addr(&F2FS_NODE(node_page)->i);
+}
- if (is_inode) {
- if (!inode)
- /* from GC path only */
- base = offset_in_addr(&raw_node->i);
- else if (f2fs_has_extra_attr(inode))
- base = get_extra_isize(inode);
- }
+static inline __le32 *get_dnode_addr(struct inode *inode,
+ struct page *node_page)
+{
+ return blkaddr_in_node(F2FS_NODE(node_page)) +
+ get_dnode_base(inode, node_page);
+}
- addr_array = blkaddr_in_node(raw_node);
- return le32_to_cpu(addr_array[base + offset]);
+static inline block_t data_blkaddr(struct inode *inode,
+ struct page *node_page, unsigned int offset)
+{
+ return le32_to_cpu(*(get_dnode_addr(inode, node_page) + offset));
}
static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn)
@@ -3131,7 +3219,7 @@ static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth)
static inline void f2fs_i_gc_failures_write(struct inode *inode,
unsigned int count)
{
- F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] = count;
+ F2FS_I(inode)->i_gc_failures = count;
f2fs_mark_inode_dirty_sync(inode, true);
}
@@ -3219,21 +3307,15 @@ static inline bool f2fs_need_compress_data(struct inode *inode)
return false;
}
-static inline unsigned int addrs_per_inode(struct inode *inode)
+static inline unsigned int addrs_per_page(struct inode *inode,
+ bool is_inode)
{
- unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
- get_inline_xattr_addrs(inode);
+ unsigned int addrs = is_inode ? (CUR_ADDRS_PER_INODE(inode) -
+ get_inline_xattr_addrs(inode)) : DEF_ADDRS_PER_BLOCK;
- if (!f2fs_compressed_file(inode))
- return addrs;
- return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size);
-}
-
-static inline unsigned int addrs_per_block(struct inode *inode)
-{
- if (!f2fs_compressed_file(inode))
- return DEF_ADDRS_PER_BLOCK;
- return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size);
+ if (f2fs_compressed_file(inode))
+ return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size);
+ return addrs;
}
static inline void *inline_xattr_addr(struct inode *inode, struct page *page)
@@ -3285,8 +3367,6 @@ static inline bool f2fs_is_cow_file(struct inode *inode)
return is_inode_flag_set(inode, FI_COW_FILE);
}
-static inline __le32 *get_dnode_addr(struct inode *inode,
- struct page *node_page);
static inline void *inline_data_addr(struct inode *inode, struct page *page)
{
__le32 *addr = get_dnode_addr(inode, page);
@@ -3322,13 +3402,15 @@ static inline void clear_file(struct inode *inode, int type)
static inline bool f2fs_is_time_consistent(struct inode *inode)
{
- struct timespec64 ctime = inode_get_ctime(inode);
+ struct timespec64 ts = inode_get_atime(inode);
- if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &ts))
return false;
- if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ctime))
+ ts = inode_get_ctime(inode);
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ts))
return false;
- if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
+ ts = inode_get_mtime(inode);
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &ts))
return false;
return true;
}
@@ -3413,6 +3495,11 @@ static inline void *f2fs_kvzalloc(struct f2fs_sb_info *sbi,
return f2fs_kvmalloc(sbi, size, flags | __GFP_ZERO);
}
+static inline void *f2fs_vmalloc(size_t size)
+{
+ return vmalloc(size);
+}
+
static inline int get_extra_isize(struct inode *inode)
{
return F2FS_I(inode)->i_extra_isize / sizeof(__le32);
@@ -3423,17 +3510,6 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
return F2FS_I(inode)->i_inline_xattr_size;
}
-static inline __le32 *get_dnode_addr(struct inode *inode,
- struct page *node_page)
-{
- int base = 0;
-
- if (IS_INODE(node_page) && f2fs_has_extra_attr(inode))
- base = get_extra_isize(inode);
-
- return blkaddr_in_node(F2FS_NODE(node_page)) + base;
-}
-
#define f2fs_get_inode_mode(i) \
((is_inode_flag_set(i, FI_ACL_MODE)) ? \
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
@@ -3523,15 +3599,30 @@ int f2fs_get_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
/*
* dir.c
*/
+#if IS_ENABLED(CONFIG_UNICODE)
int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname);
+void f2fs_free_casefolded_name(struct f2fs_filename *fname);
+#else
+static inline int f2fs_init_casefolded_name(const struct inode *dir,
+ struct f2fs_filename *fname)
+{
+ return 0;
+}
+
+static inline void f2fs_free_casefolded_name(struct f2fs_filename *fname)
+{
+}
+#endif /* CONFIG_UNICODE */
+
int f2fs_setup_filename(struct inode *dir, const struct qstr *iname,
int lookup, struct f2fs_filename *fname);
int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry,
struct f2fs_filename *fname);
void f2fs_free_filename(struct f2fs_filename *fname);
struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
- const struct f2fs_filename *fname, int *max_slots);
+ const struct f2fs_filename *fname, int *max_slots,
+ bool use_hash);
int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
unsigned int start_pos, struct fscrypt_str *fstr);
void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent,
@@ -3622,7 +3713,6 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from);
int f2fs_truncate_xattr_node(struct inode *inode);
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
unsigned int seq_id);
-bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi);
int f2fs_remove_inode_page(struct inode *inode);
struct page *f2fs_new_inode_page(struct inode *inode);
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs);
@@ -3647,7 +3737,6 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page);
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page);
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
unsigned int segno, struct f2fs_summary_block *sum);
-void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi);
int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc);
int f2fs_build_node_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi);
@@ -3680,23 +3769,22 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno);
-void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi);
void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
-void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
- unsigned int *newseg, bool new_sec, int dir);
-void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi);
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
+int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno);
void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src,
block_t blk_addr);
-void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
enum iostat_type io_type);
void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio);
void f2fs_outplace_write_data(struct dnode_of_data *dn,
@@ -3710,7 +3798,8 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
unsigned char version, bool recover_curseg,
bool recover_newaddr);
-void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+int f2fs_get_segment_temp(int seg_type);
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio);
@@ -3732,9 +3821,10 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi);
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi);
int __init f2fs_create_segment_manager_caches(void);
void f2fs_destroy_segment_manager_caches(void);
-int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
-unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
- unsigned int segno);
+int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint);
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp);
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi);
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
unsigned int segno);
@@ -3760,6 +3850,8 @@ struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
+bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi,
+ block_t blkaddr, int type);
int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync);
void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
@@ -3840,7 +3932,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
int f2fs_encrypt_one_page(struct f2fs_io_info *fio);
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio);
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio);
-int f2fs_write_single_data_page(struct page *page, int *submitted,
+int f2fs_write_single_data_page(struct folio *folio, int *submitted,
struct bio **bio, sector_t *last_block,
struct writeback_control *wbc,
enum iostat_type io_type,
@@ -3849,7 +3941,7 @@ void f2fs_write_failed(struct inode *inode, loff_t to);
void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
bool f2fs_release_folio(struct folio *folio, gfp_t wait);
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
-void f2fs_clear_page_cache_dirty_tag(struct page *page);
+void f2fs_clear_page_cache_dirty_tag(struct folio *folio);
int f2fs_init_post_read_processing(void);
void f2fs_destroy_post_read_processing(void);
int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
@@ -3873,7 +3965,7 @@ void f2fs_destroy_garbage_collection_cache(void);
/* victim selection function for cleaning and SSR */
int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
int gc_type, int type, char alloc_mode,
- unsigned long long age);
+ unsigned long long age, bool one_time);
/*
* recovery.c
@@ -3959,7 +4051,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
#define stat_inc_cp_call_count(sbi, foreground) \
atomic_inc(&sbi->cp_call_count[(foreground)])
-#define stat_inc_cp_count(si) (F2FS_STAT(sbi)->cp_count++)
+#define stat_inc_cp_count(sbi) (F2FS_STAT(sbi)->cp_count++)
#define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++)
#define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++)
#define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++)
@@ -4137,18 +4229,19 @@ extern struct kmem_cache *f2fs_inode_entry_slab;
bool f2fs_may_inline_data(struct inode *inode);
bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage);
bool f2fs_may_inline_dentry(struct inode *inode);
-void f2fs_do_read_inline_data(struct page *page, struct page *ipage);
+void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage);
void f2fs_truncate_inline_inode(struct inode *inode,
struct page *ipage, u64 from);
-int f2fs_read_inline_data(struct inode *inode, struct page *page);
+int f2fs_read_inline_data(struct inode *inode, struct folio *folio);
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page);
int f2fs_convert_inline_inode(struct inode *inode);
int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry);
-int f2fs_write_inline_data(struct inode *inode, struct page *page);
+int f2fs_write_inline_data(struct inode *inode, struct folio *folio);
int f2fs_recover_inline_data(struct inode *inode, struct page *npage);
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
const struct f2fs_filename *fname,
- struct page **res_page);
+ struct page **res_page,
+ bool use_hash);
int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent,
struct page *ipage);
int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname,
@@ -4286,7 +4379,7 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages,
int index, int nr_pages, bool uptodate);
bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
-void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
+void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio);
int f2fs_write_multi_pages(struct compress_ctx *cc,
int *submitted,
struct writeback_control *wbc,
@@ -4298,7 +4391,7 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
unsigned int llen, unsigned int c_len);
int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
unsigned nr_pages, sector_t *last_block_in_bio,
- bool is_readahead, bool for_write);
+ struct readahead_control *rac, bool for_write);
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed,
bool in_task);
@@ -4395,22 +4488,18 @@ static inline int set_compress_context(struct inode *inode)
{
#ifdef CONFIG_F2FS_FS_COMPRESSION
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
- F2FS_I(inode)->i_compress_algorithm =
- F2FS_OPTION(sbi).compress_algorithm;
- F2FS_I(inode)->i_log_cluster_size =
- F2FS_OPTION(sbi).compress_log_size;
- F2FS_I(inode)->i_compress_flag =
- F2FS_OPTION(sbi).compress_chksum ?
- BIT(COMPRESS_CHKSUM) : 0;
- F2FS_I(inode)->i_cluster_size =
- BIT(F2FS_I(inode)->i_log_cluster_size);
- if ((F2FS_I(inode)->i_compress_algorithm == COMPRESS_LZ4 ||
- F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD) &&
+ fi->i_compress_algorithm = F2FS_OPTION(sbi).compress_algorithm;
+ fi->i_log_cluster_size = F2FS_OPTION(sbi).compress_log_size;
+ fi->i_compress_flag = F2FS_OPTION(sbi).compress_chksum ?
+ BIT(COMPRESS_CHKSUM) : 0;
+ fi->i_cluster_size = BIT(fi->i_log_cluster_size);
+ if ((fi->i_compress_algorithm == COMPRESS_LZ4 ||
+ fi->i_compress_algorithm == COMPRESS_ZSTD) &&
F2FS_OPTION(sbi).compress_level)
- F2FS_I(inode)->i_compress_level =
- F2FS_OPTION(sbi).compress_level;
- F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
+ fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
+ fi->i_flags |= F2FS_COMPR_FL;
set_inode_flag(inode, FI_COMPRESSED_FILE);
stat_inc_compr_inode(inode);
inc_compr_inode_stat(inode);
@@ -4425,15 +4514,15 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- f2fs_down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&fi->i_sem);
if (!f2fs_compressed_file(inode)) {
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return true;
}
if (f2fs_is_mmap_file(inode) ||
(S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))) {
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return false;
}
@@ -4442,7 +4531,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
clear_inode_flag(inode, FI_COMPRESSED_FILE);
f2fs_mark_inode_dirty_sync(inode, true);
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return true;
}
@@ -4468,12 +4557,16 @@ F2FS_FEATURE_FUNCS(compression, COMPRESSION);
F2FS_FEATURE_FUNCS(readonly, RO);
#ifdef CONFIG_BLK_DEV_ZONED
-static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
- block_t blkaddr)
+static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi,
+ unsigned int zone)
{
- unsigned int zno = blkaddr / sbi->blocks_per_blkz;
+ return test_bit(zone, FDEV(devi).blkz_seq);
+}
- return test_bit(zno, FDEV(devi).blkz_seq);
+static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
+ block_t blkaddr)
+{
+ return f2fs_zone_is_seq(sbi, devi, blkaddr / sbi->blocks_per_blkz);
}
#endif
@@ -4545,15 +4638,31 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi)
return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS;
}
-static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi,
+static inline bool f2fs_is_sequential_zone_area(struct f2fs_sb_info *sbi,
block_t blkaddr)
{
if (f2fs_sb_has_blkzoned(sbi)) {
+#ifdef CONFIG_BLK_DEV_ZONED
int devi = f2fs_target_device_index(sbi, blkaddr);
- return !bdev_is_zoned(FDEV(devi).bdev);
+ if (!bdev_is_zoned(FDEV(devi).bdev))
+ return false;
+
+ if (f2fs_is_multi_device(sbi)) {
+ if (blkaddr < FDEV(devi).start_blk ||
+ blkaddr > FDEV(devi).end_blk) {
+ f2fs_err(sbi, "Invalid block %x", blkaddr);
+ return false;
+ }
+ blkaddr -= FDEV(devi).start_blk;
+ }
+
+ return f2fs_blkz_is_seq(sbi, devi, blkaddr);
+#else
+ return false;
+#endif
}
- return true;
+ return false;
}
static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi)
@@ -4641,9 +4750,11 @@ static inline void f2fs_io_schedule_timeout(long timeout)
io_schedule_timeout(timeout);
}
-static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs,
- enum page_type type)
+static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi,
+ struct folio *folio, enum page_type type)
{
+ pgoff_t ofs = folio->index;
+
if (unlikely(f2fs_cp_error(sbi)))
return;
@@ -4672,7 +4783,7 @@ static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi,
page = find_get_page(META_MAPPING(sbi), blkaddr + i);
if (page) {
- if (PageWriteback(page))
+ if (folio_test_writeback(page_folio(page)))
need_submit = true;
f2fs_put_page(page, 0);
}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 196755a34833..fa77841f3e2c 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -8,7 +8,6 @@
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/stat.h>
-#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/falloc.h>
@@ -36,9 +35,21 @@
#include <trace/events/f2fs.h>
#include <uapi/linux/f2fs.h>
+static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size)
+{
+ loff_t old_size = i_size_read(inode);
+
+ if (old_size >= new_size)
+ return;
+
+ /* zero or drop pages only in range of [old_size, new_size] */
+ truncate_pagecache(inode, old_size);
+}
+
static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
+ vm_flags_t flags = vmf->vma->vm_flags;
vm_fault_t ret;
ret = filemap_fault(vmf);
@@ -46,47 +57,50 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
f2fs_update_iostat(F2FS_I_SB(inode), inode,
APP_MAPPED_READ_IO, F2FS_BLKSIZE);
- trace_f2fs_filemap_fault(inode, vmf->pgoff, (unsigned long)ret);
+ trace_f2fs_filemap_fault(inode, vmf->pgoff, flags, ret);
return ret;
}
static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
- bool need_alloc = true;
+ bool need_alloc = !f2fs_is_pinned_file(inode);
int err = 0;
+ vm_fault_t ret;
if (unlikely(IS_IMMUTABLE(inode)))
return VM_FAULT_SIGBUS;
- if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED))
- return VM_FAULT_SIGBUS;
+ if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
+ err = -EIO;
+ goto out;
+ }
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
- goto err;
+ goto out;
}
if (!f2fs_is_checkpoint_ready(sbi)) {
err = -ENOSPC;
- goto err;
+ goto out;
}
err = f2fs_convert_inline_inode(inode);
if (err)
- goto err;
+ goto out;
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (f2fs_compressed_file(inode)) {
- int ret = f2fs_is_compressed_cluster(inode, page->index);
+ int ret = f2fs_is_compressed_cluster(inode, folio->index);
if (ret < 0) {
err = ret;
- goto err;
+ goto out;
} else if (ret) {
need_alloc = false;
}
@@ -100,36 +114,40 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT);
+ filemap_invalidate_unlock(inode->i_mapping);
+
file_update_time(vmf->vma->vm_file);
filemap_invalidate_lock_shared(inode->i_mapping);
- lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping ||
- page_offset(page) > i_size_read(inode) ||
- !PageUptodate(page))) {
- unlock_page(page);
+
+ folio_lock(folio);
+ if (unlikely(folio->mapping != inode->i_mapping ||
+ folio_pos(folio) > i_size_read(inode) ||
+ !folio_test_uptodate(folio))) {
+ folio_unlock(folio);
err = -EFAULT;
goto out_sem;
}
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
if (need_alloc) {
/* block allocation */
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_get_block_locked(&dn, page->index);
- }
-
-#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (!need_alloc) {
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
+ err = f2fs_get_block_locked(&dn, folio->index);
+ } else {
+ err = f2fs_get_dnode_of_data(&dn, folio->index, LOOKUP_NODE);
f2fs_put_dnode(&dn);
+ if (f2fs_is_pinned_file(inode) &&
+ !__is_valid_data_blkaddr(dn.data_blkaddr))
+ err = -EIO;
}
-#endif
+
if (err) {
- unlock_page(page);
+ folio_unlock(folio);
goto out_sem;
}
- f2fs_wait_on_page_writeback(page, DATA, false, true);
+ f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true);
/* wait for GCed page writeback via META_MAPPING */
f2fs_wait_on_block_writeback(inode, dn.data_blkaddr);
@@ -137,29 +155,31 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
/*
* check to see if the page is mapped already (no holes)
*/
- if (PageMappedToDisk(page))
+ if (folio_test_mappedtodisk(folio))
goto out_sem;
/* page is wholly or partially inside EOF */
- if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
+ if (((loff_t)(folio->index + 1) << PAGE_SHIFT) >
i_size_read(inode)) {
loff_t offset;
offset = i_size_read(inode) & ~PAGE_MASK;
- zero_user_segment(page, offset, PAGE_SIZE);
+ folio_zero_segment(folio, offset, folio_size(folio));
}
- set_page_dirty(page);
+ folio_mark_dirty(folio);
f2fs_update_iostat(sbi, inode, APP_MAPPED_IO, F2FS_BLKSIZE);
f2fs_update_time(sbi, REQ_TIME);
- trace_f2fs_vm_page_mkwrite(page, DATA);
out_sem:
filemap_invalidate_unlock_shared(inode->i_mapping);
sb_end_pagefault(inode->i_sb);
-err:
- return vmf_fs_error(err);
+out:
+ ret = vmf_fs_error(err);
+
+ trace_f2fs_vm_page_mkwrite(inode, folio->index, vmf->vma->vm_flags, ret);
+ return ret;
}
static const struct vm_operations_struct f2fs_file_vm_ops = {
@@ -180,7 +200,7 @@ static int get_parent_ino(struct inode *inode, nid_t *pino)
if (!dentry)
return 0;
- *pino = parent_ino(dentry);
+ *pino = d_parent_ino(dentry);
dput(dentry);
return 1;
}
@@ -371,8 +391,7 @@ sync_nodes:
f2fs_remove_ino_entry(sbi, ino, APPEND_INO);
clear_inode_flag(inode, FI_APPEND_WRITE);
flush_out:
- if ((!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER) ||
- (atomic && !test_opt(sbi, NOBARRIER) && f2fs_sb_has_blkzoned(sbi)))
+ if (!atomic && F2FS_OPTION(sbi).fsync_mode != FSYNC_MODE_NOBARRIER)
ret = f2fs_issue_flush(sbi, inode->i_ino);
if (!ret) {
f2fs_remove_ino_entry(sbi, ino, UPDATE_INO);
@@ -392,9 +411,20 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return f2fs_do_sync_file(file, start, end, datasync, false);
}
-static bool __found_offset(struct address_space *mapping, block_t blkaddr,
- pgoff_t index, int whence)
+static bool __found_offset(struct address_space *mapping,
+ struct dnode_of_data *dn, pgoff_t index, int whence)
{
+ block_t blkaddr = f2fs_data_blkaddr(dn);
+ struct inode *inode = mapping->host;
+ bool compressed_cluster = false;
+
+ if (f2fs_compressed_file(inode)) {
+ block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page,
+ ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size));
+
+ compressed_cluster = first_blkaddr == COMPRESS_ADDR;
+ }
+
switch (whence) {
case SEEK_DATA:
if (__is_valid_data_blkaddr(blkaddr))
@@ -402,8 +432,12 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr,
if (blkaddr == NEW_ADDR &&
xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
return true;
+ if (compressed_cluster)
+ return true;
break;
case SEEK_HOLE:
+ if (compressed_cluster)
+ return false;
if (blkaddr == NULL_ADDR)
return true;
break;
@@ -414,14 +448,14 @@ static bool __found_offset(struct address_space *mapping, block_t blkaddr,
static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
- loff_t maxbytes = inode->i_sb->s_maxbytes;
+ loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
struct dnode_of_data dn;
pgoff_t pgofs, end_offset;
loff_t data_ofs = offset;
loff_t isize;
int err = 0;
- inode_lock(inode);
+ inode_lock_shared(inode);
isize = i_size_read(inode);
if (offset >= isize)
@@ -472,7 +506,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
}
- if (__found_offset(file->f_mapping, blkaddr,
+ if (__found_offset(file->f_mapping, &dn,
pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
@@ -486,20 +520,17 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
found:
if (whence == SEEK_HOLE && data_ofs > isize)
data_ofs = isize;
- inode_unlock(inode);
+ inode_unlock_shared(inode);
return vfs_setpos(file, data_ofs, maxbytes);
fail:
- inode_unlock(inode);
+ inode_unlock_shared(inode);
return -ENXIO;
}
static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
- loff_t maxbytes = inode->i_sb->s_maxbytes;
-
- if (f2fs_compressed_file(inode))
- maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
+ loff_t maxbytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode));
switch (whence) {
case SEEK_SET:
@@ -587,7 +618,7 @@ static int f2fs_file_open(struct inode *inode, struct file *filp)
if (err)
return err;
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+ filp->f_mode |= FMODE_NOWAIT;
filp->f_mode |= FMODE_CAN_ODIRECT;
err = dquot_file_open(inode, filp);
@@ -628,8 +659,10 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
f2fs_set_data_blkaddr(dn, NULL_ADDR);
if (__is_valid_data_blkaddr(blkaddr)) {
- if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))
+ if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE))
+ continue;
+ if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr,
+ DATA_GENERIC_ENHANCE))
continue;
if (compressed_cluster)
valid_blocks++;
@@ -831,7 +864,7 @@ int f2fs_truncate(struct inode *inode)
if (err)
return err;
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
f2fs_mark_inode_dirty_sync(inode, false);
return 0;
}
@@ -860,7 +893,8 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw)
* for blkzoned device, fallback direct IO to buffered IO, so
* all IOs can be serialized by log-structured write.
*/
- if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE))
+ if (f2fs_sb_has_blkzoned(sbi) && (rw == WRITE) &&
+ !f2fs_is_pinned_file(inode))
return true;
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
return true;
@@ -942,17 +976,15 @@ static void __setattr_copy(struct mnt_idmap *idmap,
i_uid_update(idmap, attr, inode);
i_gid_update(idmap, attr, inode);
if (ia_valid & ATTR_ATIME)
- inode->i_atime = attr->ia_atime;
+ inode_set_atime_to_ts(inode, attr->ia_atime);
if (ia_valid & ATTR_MTIME)
- inode->i_mtime = attr->ia_mtime;
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
if (ia_valid & ATTR_CTIME)
inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
- vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
- if (!vfsgid_in_group_p(vfsgid) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode, i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
set_acl_inode(inode, mode);
}
@@ -965,11 +997,24 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
int err;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
+ err = setattr_prepare(idmap, dentry, attr);
+ if (err)
+ return err;
+
+ err = fscrypt_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
+ err = fsverity_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
if (unlikely(IS_IMMUTABLE(inode)))
return -EPERM;
@@ -983,22 +1028,10 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return -EOPNOTSUPP;
if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) &&
!IS_ALIGNED(attr->ia_size,
- F2FS_BLK_TO_BYTES(F2FS_I(inode)->i_cluster_size)))
+ F2FS_BLK_TO_BYTES(fi->i_cluster_size)))
return -EINVAL;
}
- err = setattr_prepare(idmap, dentry, attr);
- if (err)
- return err;
-
- err = fscrypt_prepare_setattr(dentry, attr);
- if (err)
- return err;
-
- err = fsverity_prepare_setattr(dentry, attr);
- if (err)
- return err;
-
if (is_quota_modification(idmap, inode, attr)) {
err = f2fs_dquot_initialize(inode);
if (err)
@@ -1037,9 +1070,18 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return err;
}
- f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ /*
+ * wait for inflight dio, blocks should be removed after
+ * IO completion.
+ */
+ if (attr->ia_size < old_size)
+ inode_dio_wait(inode);
+
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ if (attr->ia_size > old_size)
+ f2fs_zero_post_eof_page(inode, attr->ia_size);
truncate_setsize(inode, attr->ia_size);
if (attr->ia_size <= old_size)
@@ -1049,14 +1091,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
* larger than i_size.
*/
filemap_invalidate_unlock(inode->i_mapping);
- f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
if (err)
return err;
- spin_lock(&F2FS_I(inode)->i_size_lock);
- inode->i_mtime = inode_set_ctime_current(inode);
- F2FS_I(inode)->last_disk_size = i_size_read(inode);
- spin_unlock(&F2FS_I(inode)->i_size_lock);
+ spin_lock(&fi->i_size_lock);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ fi->last_disk_size = i_size_read(inode);
+ spin_unlock(&fi->i_size_lock);
}
__setattr_copy(idmap, inode, attr);
@@ -1066,7 +1108,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
if (!err)
- inode->i_mode = F2FS_I(inode)->i_acl_mode;
+ inode->i_mode = fi->i_acl_mode;
clear_inode_flag(inode, FI_ACL_MODE);
}
}
@@ -1158,6 +1200,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1239,7 +1285,6 @@ next_dnode:
!f2fs_is_valid_blkaddr(sbi, *blkaddr,
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
return -EFSCORRUPTED;
}
@@ -1442,6 +1487,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+
f2fs_lock_op(sbi);
f2fs_drop_extent_tree(inode);
truncate_pagecache(inode, offset);
@@ -1528,7 +1575,6 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
if (!f2fs_is_valid_blkaddr(sbi, dn->data_blkaddr,
DATA_GENERIC_ENHANCE)) {
ret = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
break;
}
@@ -1564,6 +1610,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret)
return ret;
+ filemap_invalidate_lock(mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(mapping);
+
pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
@@ -1695,6 +1745,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
/* avoid gc operation during block exchange */
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(mapping);
+
+ f2fs_zero_post_eof_page(inode, offset + len);
truncate_pagecache(inode, offset);
while (!ret && idx > pg_start) {
@@ -1712,10 +1764,12 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
}
filemap_invalidate_unlock(mapping);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ if (ret)
+ return ret;
/* write out all moved pages, if possible */
filemap_invalidate_lock(mapping);
- filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
+ ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
truncate_pagecache(inode, offset);
filemap_invalidate_unlock(mapping);
@@ -1750,6 +1804,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
if (err)
return err;
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, offset + len);
+ filemap_invalidate_unlock(inode->i_mapping);
+
f2fs_balance_fs(sbi, true);
pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
@@ -1770,7 +1828,8 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset,
map.m_len = sec_blks;
next_alloc:
- if (has_not_enough_free_secs(sbi, 0,
+ if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ?
+ ZONED_PIN_SEC_REQUIRED_COUNT :
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
f2fs_down_write(&sbi->gc_lock);
stat_inc_gc_call_count(sbi, FOREGROUND);
@@ -1873,6 +1932,12 @@ static long f2fs_fallocate(struct file *file, int mode,
if (ret)
goto out;
+ /*
+ * wait for inflight dio, blocks should be removed after IO
+ * completion.
+ */
+ inode_dio_wait(inode);
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
if (offset >= inode->i_size)
goto out;
@@ -1889,7 +1954,7 @@ static long f2fs_fallocate(struct file *file, int mode,
}
if (!ret) {
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
f2fs_mark_inode_dirty_sync(inode, false);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
@@ -1974,15 +2039,15 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if (err)
return err;
- f2fs_down_write(&F2FS_I(inode)->i_sem);
+ f2fs_down_write(&fi->i_sem);
if (!f2fs_may_compress(inode) ||
(S_ISREG(inode->i_mode) &&
F2FS_HAS_BLOCKS(inode))) {
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
return -EINVAL;
}
err = set_compress_context(inode);
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
if (err)
return err;
@@ -2122,7 +2187,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
inode_lock(inode);
- if (!f2fs_disable_compressed_file(inode)) {
+ if (!f2fs_disable_compressed_file(inode) ||
+ f2fs_is_pinned_file(inode)) {
ret = -EINVAL;
goto out;
}
@@ -2135,6 +2201,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
goto out;
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[READ]);
/*
* Should wait end_io to count F2FS_WB_CP_DATA correctly by
@@ -2144,10 +2211,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
f2fs_warn(sbi, "Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
- if (ret) {
- f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
- goto out;
- }
+ if (ret)
+ goto out_unlock;
/* Check if the inode already has a COW inode */
if (fi->cow_inode == NULL) {
@@ -2156,10 +2221,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
struct inode *dir = d_inode(dentry->d_parent);
ret = f2fs_get_tmpfile(idmap, dir, &fi->cow_inode);
- if (ret) {
- f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
- goto out;
- }
+ if (ret)
+ goto out_unlock;
set_inode_flag(fi->cow_inode, FI_COW_FILE);
clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
@@ -2173,10 +2236,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
invalidate_mapping_pages(fi->cow_inode->i_mapping, 0, -1);
ret = f2fs_do_truncate_blocks(fi->cow_inode, 0, true);
- if (ret) {
- f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
- goto out;
- }
+ if (ret)
+ goto out_unlock;
}
f2fs_write_inode(inode, NULL);
@@ -2195,7 +2256,11 @@ static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
}
f2fs_i_size_write(fi->cow_inode, isize);
+out_unlock:
+ f2fs_up_write(&fi->i_gc_rwsem[READ]);
f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
+ if (ret)
+ goto out;
f2fs_update_time(sbi, REQ_TIME);
fi->atomic_write_task = current;
@@ -2277,17 +2342,20 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
switch (flag) {
case F2FS_GOING_DOWN_FULLSYNC:
- ret = freeze_bdev(sb->s_bdev);
+ ret = bdev_freeze(sb->s_bdev);
if (ret)
goto out;
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
- thaw_bdev(sb->s_bdev);
+ bdev_thaw(sb->s_bdev);
break;
case F2FS_GOING_DOWN_METASYNC:
/* do checkpoint only */
ret = f2fs_sync_fs(sb, 1);
- if (ret)
+ if (ret) {
+ if (ret == -EIO)
+ ret = 0;
goto out;
+ }
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
break;
case F2FS_GOING_DOWN_NOSYNC:
@@ -2303,6 +2371,8 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag,
set_sbi_flag(sbi, SBI_IS_DIRTY);
/* do checkpoint only */
ret = f2fs_sync_fs(sb, 1);
+ if (ret == -EIO)
+ ret = 0;
goto out;
default:
ret = -EINVAL;
@@ -2420,13 +2490,14 @@ static bool uuid_is_nonzero(__u8 u[16])
static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ int ret;
if (!f2fs_sb_has_encrypt(F2FS_I_SB(inode)))
return -EOPNOTSUPP;
+ ret = fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
-
- return fscrypt_ioctl_set_policy(filp, (const void __user *)arg);
+ return ret;
}
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
@@ -2673,12 +2744,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
bool fragmented = false;
int err;
- pg_start = range->start >> PAGE_SHIFT;
- pg_end = (range->start + range->len) >> PAGE_SHIFT;
-
f2fs_balance_fs(sbi, true);
inode_lock(inode);
+ pg_start = range->start >> PAGE_SHIFT;
+ pg_end = min_t(pgoff_t,
+ (range->start + range->len) >> PAGE_SHIFT,
+ DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE));
if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) ||
f2fs_is_atomic_file(inode)) {
@@ -2694,8 +2766,9 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
}
/* writeback all dirty pages in the range */
- err = filemap_write_and_wait_range(inode->i_mapping, range->start,
- range->start + range->len - 1);
+ err = filemap_write_and_wait_range(inode->i_mapping,
+ pg_start << PAGE_SHIFT,
+ (pg_end << PAGE_SHIFT) - 1);
if (err)
goto out;
@@ -2855,7 +2928,8 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg)
err = f2fs_defragment_range(sbi, filp, &range);
mnt_drop_write_file(filp);
- f2fs_update_time(sbi, REQ_TIME);
+ if (range.len)
+ f2fs_update_time(sbi, REQ_TIME);
if (err < 0)
return err;
@@ -2968,9 +3042,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
}
f2fs_lock_op(sbi);
- ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS,
- pos_out >> F2FS_BLKSIZE_BITS,
- len >> F2FS_BLKSIZE_BITS, false);
+ ret = __exchange_data_block(src, dst, F2FS_BYTES_TO_BLK(pos_in),
+ F2FS_BYTES_TO_BLK(pos_out),
+ F2FS_BYTES_TO_BLK(len), false);
if (!ret) {
if (dst_max_i_size)
@@ -2987,10 +3061,10 @@ out_src:
if (ret)
goto out_unlock;
- src->i_mtime = inode_set_ctime_current(src);
+ inode_set_mtime_to_ts(src, inode_set_ctime_current(src));
f2fs_mark_inode_dirty_sync(src, false);
if (src != dst) {
- dst->i_mtime = inode_set_ctime_current(dst);
+ inode_set_mtime_to_ts(dst, inode_set_ctime_current(dst));
f2fs_mark_inode_dirty_sync(dst, false);
}
f2fs_update_time(sbi, REQ_TIME);
@@ -3014,10 +3088,10 @@ static int __f2fs_ioc_move_range(struct file *filp,
return -EBADF;
dst = fdget(range->dst_fd);
- if (!dst.file)
+ if (!fd_file(dst))
return -EBADF;
- if (!(dst.file->f_mode & FMODE_WRITE)) {
+ if (!(fd_file(dst)->f_mode & FMODE_WRITE)) {
err = -EBADF;
goto err_out;
}
@@ -3026,7 +3100,7 @@ static int __f2fs_ioc_move_range(struct file *filp,
if (err)
goto err_out;
- err = f2fs_move_file_range(filp, range->pos_in, dst.file,
+ err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst),
range->pos_out, range->len);
mnt_drop_write_file(filp);
@@ -3264,18 +3338,17 @@ int f2fs_pin_file_control(struct inode *inode, bool inc)
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- /* Use i_gc_failures for normal file as a risk signal. */
- if (inc)
- f2fs_i_gc_failures_write(inode,
- fi->i_gc_failures[GC_FAILURE_PIN] + 1);
-
- if (fi->i_gc_failures[GC_FAILURE_PIN] > sbi->gc_pin_file_threshold) {
+ if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) {
f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials",
- __func__, inode->i_ino,
- fi->i_gc_failures[GC_FAILURE_PIN]);
+ __func__, inode->i_ino, fi->i_gc_failures);
clear_inode_flag(inode, FI_PIN_FILE);
return -EAGAIN;
}
+
+ /* Use i_gc_failures for normal file as a risk signal. */
+ if (inc)
+ f2fs_i_gc_failures_write(inode, fi->i_gc_failures + 1);
+
return 0;
}
@@ -3314,7 +3387,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
goto done;
}
- if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
+ if (F2FS_HAS_BLOCKS(inode)) {
ret = -EFBIG;
goto out;
}
@@ -3341,7 +3414,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
}
set_inode_flag(inode, FI_PIN_FILE);
- ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
+ ret = F2FS_I(inode)->i_gc_failures;
done:
f2fs_update_time(sbi, REQ_TIME);
out:
@@ -3356,7 +3429,7 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg)
__u32 pin = 0;
if (is_inode_flag_set(inode, FI_PIN_FILE))
- pin = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN];
+ pin = F2FS_I(inode)->i_gc_failures;
return put_user(pin, (u32 __user *)arg);
}
@@ -3377,7 +3450,7 @@ int f2fs_precache_extents(struct inode *inode)
map.m_next_extent = &m_next_extent;
map.m_seg_type = NO_CHECK_TYPE;
map.m_may_create = false;
- end = max_file_blocks(inode);
+ end = F2FS_BLK_ALIGN(i_size_read(inode));
while (map.m_lblk < end) {
map.m_len = end - map.m_lblk;
@@ -3385,7 +3458,7 @@ int f2fs_precache_extents(struct inode *inode)
f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRECACHE);
f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
- if (err)
+ if (err || !map.m_len)
return err;
map.m_lblk = m_next_extent;
@@ -3552,10 +3625,8 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
if (!__is_valid_data_blkaddr(blkaddr))
continue;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))) {
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+ DATA_GENERIC_ENHANCE)))
return -EFSCORRUPTED;
- }
}
while (count) {
@@ -3595,6 +3666,7 @@ next:
static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t page_idx = 0, last_idx;
unsigned int released_blocks = 0;
@@ -3632,7 +3704,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
goto out;
- if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ if (!atomic_read(&fi->i_compr_blocks)) {
ret = -EPERM;
goto out;
}
@@ -3641,7 +3713,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, true);
- f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3667,7 +3739,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
- count = round_up(count, F2FS_I(inode)->i_cluster_size);
+ count = round_up(count, fi->i_cluster_size);
ret = release_compress_blocks(&dn, count);
@@ -3683,8 +3755,10 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
}
filemap_invalidate_unlock(inode->i_mapping);
- f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
out:
+ if (released_blocks)
+ f2fs_update_time(sbi, REQ_TIME);
inode_unlock(inode);
mnt_drop_write_file(filp);
@@ -3692,14 +3766,14 @@ out:
if (ret >= 0) {
ret = put_user(released_blocks, (u64 __user *)arg);
} else if (released_blocks &&
- atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ atomic_read(&fi->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
"iblocks=%llu, released=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
released_blocks,
- atomic_read(&F2FS_I(inode)->i_compr_blocks));
+ atomic_read(&fi->i_compr_blocks));
}
return ret;
@@ -3720,10 +3794,8 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count,
if (!__is_valid_data_blkaddr(blkaddr))
continue;
if (unlikely(!f2fs_is_valid_blkaddr(sbi, blkaddr,
- DATA_GENERIC_ENHANCE))) {
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+ DATA_GENERIC_ENHANCE)))
return -EFSCORRUPTED;
- }
}
while (count) {
@@ -3790,6 +3862,7 @@ next:
static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
pgoff_t page_idx = 0, last_idx;
unsigned int reserved_blocks = 0;
@@ -3815,10 +3888,10 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
goto unlock_inode;
}
- if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
+ if (atomic_read(&fi->i_compr_blocks))
goto unlock_inode;
- f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
filemap_invalidate_lock(inode->i_mapping);
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
@@ -3844,7 +3917,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
count = min(end_offset - dn.ofs_in_node, last_idx - page_idx);
- count = round_up(count, F2FS_I(inode)->i_cluster_size);
+ count = round_up(count, fi->i_cluster_size);
ret = reserve_compress_blocks(&dn, count, &reserved_blocks);
@@ -3859,7 +3932,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
}
filemap_invalidate_unlock(inode->i_mapping);
- f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
if (!ret) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
@@ -3867,20 +3940,22 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
f2fs_mark_inode_dirty_sync(inode, true);
}
unlock_inode:
+ if (reserved_blocks)
+ f2fs_update_time(sbi, REQ_TIME);
inode_unlock(inode);
mnt_drop_write_file(filp);
if (!ret) {
ret = put_user(reserved_blocks, (u64 __user *)arg);
} else if (reserved_blocks &&
- atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
+ atomic_read(&fi->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
- f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
+ f2fs_warn(sbi, "%s: partial blocks were reserved i_ino=%lx "
"iblocks=%llu, reserved=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
reserved_blocks,
- atomic_read(&F2FS_I(inode)->i_compr_blocks));
+ atomic_read(&fi->i_compr_blocks));
}
return ret;
@@ -3943,7 +4018,9 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
IS_ENCRYPTED(inode) && f2fs_is_multi_device(sbi)))
return -EOPNOTSUPP;
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
if (f2fs_is_atomic_file(inode) || f2fs_compressed_file(inode) ||
@@ -4015,8 +4092,6 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
DATA_GENERIC_ENHANCE)) {
ret = -EFSCORRUPTED;
f2fs_put_dnode(&dn);
- f2fs_handle_error(sbi,
- ERROR_INVALID_BLKADDR);
goto out;
}
@@ -4065,12 +4140,13 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
if (len)
ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
prev_block, len, range.flags);
+ f2fs_update_time(sbi, REQ_TIME);
out:
filemap_invalidate_unlock(mapping);
f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
err:
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
@@ -4105,6 +4181,7 @@ static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_comp_option option;
int ret = 0;
@@ -4124,7 +4201,9 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
option.algorithm >= COMPRESS_MAX)
return -EINVAL;
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
f2fs_down_write(&F2FS_I(inode)->i_sem);
@@ -4143,27 +4222,27 @@ static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
goto out;
}
- F2FS_I(inode)->i_compress_algorithm = option.algorithm;
- F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
- F2FS_I(inode)->i_cluster_size = BIT(option.log_cluster_size);
+ fi->i_compress_algorithm = option.algorithm;
+ fi->i_log_cluster_size = option.log_cluster_size;
+ fi->i_cluster_size = BIT(option.log_cluster_size);
/* Set default level */
- if (F2FS_I(inode)->i_compress_algorithm == COMPRESS_ZSTD)
- F2FS_I(inode)->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
+ if (fi->i_compress_algorithm == COMPRESS_ZSTD)
+ fi->i_compress_level = F2FS_ZSTD_DEFAULT_CLEVEL;
else
- F2FS_I(inode)->i_compress_level = 0;
+ fi->i_compress_level = 0;
/* Adjust mount option level */
if (option.algorithm == F2FS_OPTION(sbi).compress_algorithm &&
F2FS_OPTION(sbi).compress_level)
- F2FS_I(inode)->i_compress_level = F2FS_OPTION(sbi).compress_level;
+ fi->i_compress_level = F2FS_OPTION(sbi).compress_level;
f2fs_mark_inode_dirty_sync(inode, true);
if (!f2fs_is_compress_backend_ready(inode))
f2fs_warn(sbi, "compression algorithm is successfully set, "
"but current kernel doesn't support this algorithm.");
out:
- f2fs_up_write(&F2FS_I(inode)->i_sem);
+ f2fs_up_write(&fi->i_sem);
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
@@ -4221,7 +4300,9 @@ static int f2fs_ioc_decompress_file(struct file *filp)
f2fs_balance_fs(sbi, true);
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
if (!f2fs_is_compress_backend_ready(inode)) {
@@ -4275,9 +4356,10 @@ static int f2fs_ioc_decompress_file(struct file *filp)
if (ret)
f2fs_warn(sbi, "%s: The file might be partially decompressed (errno=%d). Please delete the file.",
__func__, ret);
+ f2fs_update_time(sbi, REQ_TIME);
out:
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
@@ -4299,7 +4381,9 @@ static int f2fs_ioc_compress_file(struct file *filp)
f2fs_balance_fs(sbi, true);
- file_start_write(filp);
+ ret = mnt_want_write_file(filp);
+ if (ret)
+ return ret;
inode_lock(inode);
if (!f2fs_is_compress_backend_ready(inode)) {
@@ -4354,9 +4438,10 @@ static int f2fs_ioc_compress_file(struct file *filp)
if (ret)
f2fs_warn(sbi, "%s: The file might be partially compressed (errno=%d). Please delete the file.",
__func__, ret);
+ f2fs_update_time(sbi, REQ_TIME);
out:
inode_unlock(inode);
- file_end_write(filp);
+ mnt_drop_write_file(filp);
return ret;
}
@@ -4537,6 +4622,13 @@ static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
f2fs_down_read(&fi->i_gc_rwsem[READ]);
}
+ /* dio is not compatible w/ atomic file */
+ if (f2fs_is_atomic_file(inode)) {
+ f2fs_up_read(&fi->i_gc_rwsem[READ]);
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
/*
* We have to use __iomap_dio_rw() and iomap_dio_complete() instead of
* the higher-level function iomap_dio_rw() in order to ensure that the
@@ -4597,7 +4689,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
iov_iter_count(to), READ);
/* In LFS mode, if there is inflight dio, wait for its completion */
- if (f2fs_lfs_mode(F2FS_I_SB(inode)))
+ if (f2fs_lfs_mode(F2FS_I_SB(inode)) &&
+ get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE))
inode_dio_wait(inode);
if (f2fs_should_use_dio(inode, iocb, to)) {
@@ -4657,6 +4750,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from)
err = file_modified(file);
if (err)
return err;
+
+ filemap_invalidate_lock(inode->i_mapping);
+ f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from));
+ filemap_invalidate_unlock(inode->i_mapping);
return count;
}
@@ -4712,10 +4809,12 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
if (map.m_len > map.m_lblk)
map.m_len -= map.m_lblk;
else
- map.m_len = 0;
+ return 0;
+
map.m_may_create = true;
if (dio) {
- map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi,
+ inode->i_write_hint);
flag = F2FS_GET_BLOCK_PRE_DIO;
} else {
map.m_seg_type = NO_CHECK_TYPE;
@@ -4763,8 +4862,21 @@ static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error,
return 0;
}
+static void f2fs_dio_write_submit_io(const struct iomap_iter *iter,
+ struct bio *bio, loff_t file_offset)
+{
+ struct inode *inode = iter->inode;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ int seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint);
+ enum temp_type temp = f2fs_get_segment_temp(seg_type);
+
+ bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp);
+ submit_bio(bio);
+}
+
static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = {
- .end_io = f2fs_dio_write_end_io,
+ .end_io = f2fs_dio_write_end_io,
+ .submit_io = f2fs_dio_write_submit_io,
};
static void f2fs_flush_buffered_write(struct address_space *mapping,
@@ -4901,6 +5013,8 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
bool dio;
bool may_need_sync = true;
int preallocated;
+ const loff_t pos = iocb->ki_pos;
+ const ssize_t count = iov_iter_count(from);
ssize_t ret;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
@@ -4922,6 +5036,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
inode_lock(inode);
}
+ if (f2fs_is_pinned_file(inode) &&
+ !f2fs_overwrite_io(inode, pos, count)) {
+ ret = -EIO;
+ goto out_unlock;
+ }
+
ret = f2fs_write_checks(iocb, from);
if (ret <= 0)
goto out_unlock;
@@ -4929,6 +5049,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
/* Determine whether we will do a direct write or a buffered write. */
dio = f2fs_should_use_dio(inode, iocb, from);
+ /* dio is not compatible w/ atomic write */
+ if (dio && f2fs_is_atomic_file(inode)) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
/* Possibly preallocate the blocks for the write. */
target_size = iocb->ki_pos + iov_iter_count(from);
preallocated = f2fs_preallocate_blocks(iocb, from, dio);
@@ -5003,6 +5129,9 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len,
filp->f_mode &= ~FMODE_RANDOM;
spin_unlock(&filp->f_lock);
return 0;
+ } else if (advice == POSIX_FADV_WILLNEED && offset == 0) {
+ /* Load extent cache at the first readahead. */
+ f2fs_precache_extents(inode);
}
err = generic_fadvise(filp, offset, len, advice);
@@ -5145,4 +5274,5 @@ const struct file_operations f2fs_file_operations = {
.splice_read = f2fs_file_splice_read,
.splice_write = iter_file_splice_write,
.fadvise = f2fs_file_fadvise,
+ .fop_flags = FOP_BUFFER_RASYNC,
};
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index e99041582414..c0e43d6056a0 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -46,8 +46,8 @@ static int gc_thread_func(void *data)
do {
bool sync_mode, foreground = false;
- wait_event_interruptible_timeout(*wq,
- kthread_should_stop() || freezing(current) ||
+ wait_event_freezable_timeout(*wq,
+ kthread_should_stop() ||
waitqueue_active(fggc_wq) ||
gc_th->gc_wake,
msecs_to_jiffies(wait_ms));
@@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
if (gc_th->gc_wake)
gc_th->gc_wake = false;
- if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
+ if (f2fs_readonly(sbi->sb)) {
stat_other_skip_bggc_count(sbi);
continue;
}
@@ -81,6 +81,8 @@ static int gc_thread_func(void *data)
continue;
}
+ gc_control.one_time = false;
+
/*
* [GC triggering condition]
* 0. GC is not conducted currently.
@@ -116,15 +118,30 @@ static int gc_thread_func(void *data)
goto next;
}
- if (has_enough_invalid_blocks(sbi))
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ if (has_enough_free_blocks(sbi,
+ gc_th->no_zoned_gc_percent)) {
+ wait_ms = gc_th->no_gc_sleep_time;
+ f2fs_up_write(&sbi->gc_lock);
+ goto next;
+ }
+ if (wait_ms == gc_th->no_gc_sleep_time)
+ wait_ms = gc_th->max_sleep_time;
+ }
+
+ if (need_to_boost_gc(sbi)) {
decrease_sleep_time(gc_th, &wait_ms);
- else
+ if (f2fs_sb_has_blkzoned(sbi))
+ gc_control.one_time = true;
+ } else {
increase_sleep_time(gc_th, &wait_ms);
+ }
do_gc:
stat_inc_gc_call_count(sbi, foreground ?
FOREGROUND : BACKGROUND);
- sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC;
+ sync_mode = (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC) ||
+ gc_control.one_time;
/* foreground GC was been triggered via f2fs_balance_fs() */
if (foreground)
@@ -179,9 +196,21 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
return -ENOMEM;
gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
- gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
- gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
- gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
+ gc_th->valid_thresh_ratio = DEF_GC_THREAD_VALID_THRESH_RATIO;
+
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED;
+ gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED;
+ gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED;
+ gc_th->no_zoned_gc_percent = LIMIT_NO_ZONED_GC;
+ gc_th->boost_zoned_gc_percent = LIMIT_BOOST_ZONED_GC;
+ } else {
+ gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
+ gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
+ gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
+ gc_th->no_zoned_gc_percent = 0;
+ gc_th->boost_zoned_gc_percent = 0;
+ }
gc_th->gc_wake = false;
@@ -303,7 +332,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
/* LFS */
if (p->gc_mode == GC_GREEDY)
- return 2 * BLKS_PER_SEG(sbi) * p->ofs_unit;
+ return SEGS_TO_BLKS(sbi, 2 * p->ofs_unit);
else if (p->gc_mode == GC_CB)
return UINT_MAX;
else if (p->gc_mode == GC_AT)
@@ -341,7 +370,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned char age = 0;
unsigned char u;
unsigned int i;
- unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno);
+ unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi);
for (i = 0; i < usable_segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
@@ -350,7 +379,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
mtime = div_u64(mtime, usable_segs_per_sec);
vblocks = div_u64(vblocks, usable_segs_per_sec);
- u = (vblocks * 100) >> sbi->log_blocks_per_seg;
+ u = BLKS_TO_SEGS(sbi, vblocks * 100);
/* Handle if the system time has changed by the user */
if (mtime < sit_i->min_mtime)
@@ -370,6 +399,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
if (p->alloc_mode == SSR)
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ if (p->one_time_gc && (get_valid_blocks(sbi, segno, true) >=
+ CAP_BLKS_PER_SEC(sbi) * sbi->gc_thread->valid_thresh_ratio /
+ 100))
+ return UINT_MAX;
+
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, true);
@@ -744,7 +778,7 @@ static int f2fs_gc_pinned_control(struct inode *inode, int gc_type,
*/
int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
int gc_type, int type, char alloc_mode,
- unsigned long long age)
+ unsigned long long age, bool one_time)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
@@ -761,6 +795,7 @@ int f2fs_get_victim(struct f2fs_sb_info *sbi, unsigned int *result,
p.alloc_mode = alloc_mode;
p.age = age;
p.age_threshold = sbi->am.age_threshold;
+ p.one_time_gc = one_time;
retry:
select_policy(sbi, gc_type, type, &p);
@@ -1198,7 +1233,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE_READ))) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto put_page;
}
goto got_it;
@@ -1217,7 +1251,6 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
DATA_GENERIC_ENHANCE))) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto put_page;
}
got_it:
@@ -1366,8 +1399,13 @@ static int move_data_block(struct inode *inode, block_t bidx,
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* allocate block address */
- f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
+ err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
&sum, type, NULL);
+ if (err) {
+ f2fs_put_page(mpage, 1);
+ /* filesystem should shutdown, no need to recovery block */
+ goto up_out;
+ }
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -1436,7 +1474,7 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type,
goto out;
if (gc_type == BG_GC) {
- if (PageWriteback(page)) {
+ if (folio_test_writeback(page_folio(page))) {
err = -EAGAIN;
goto out;
}
@@ -1669,13 +1707,14 @@ next_step:
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
- int gc_type)
+ int gc_type, bool one_time)
{
struct sit_info *sit_i = SIT_I(sbi);
int ret;
down_write(&sit_i->sentry_lock);
- ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE, LFS, 0);
+ ret = f2fs_get_victim(sbi, victim, gc_type, NO_CHECK_TYPE,
+ LFS, 0, one_time);
up_write(&sit_i->sentry_lock);
return ret;
}
@@ -1683,30 +1722,49 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
static int do_garbage_collect(struct f2fs_sb_info *sbi,
unsigned int start_segno,
struct gc_inode_list *gc_list, int gc_type,
- bool force_migrate)
+ bool force_migrate, bool one_time)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
struct blk_plug plug;
unsigned int segno = start_segno;
unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi);
+ unsigned int sec_end_segno;
int seg_freed = 0, migrated = 0;
unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
SUM_TYPE_DATA : SUM_TYPE_NODE;
unsigned char data_type = (type == SUM_TYPE_DATA) ? DATA : NODE;
int submitted = 0;
- if (__is_large_section(sbi))
- end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi));
+ if (__is_large_section(sbi)) {
+ sec_end_segno = rounddown(end_segno, SEGS_PER_SEC(sbi));
- /*
- * zone-capacity can be less than zone-size in zoned devices,
- * resulting in less than expected usable segments in the zone,
- * calculate the end segno in the zone which can be garbage collected
- */
- if (f2fs_sb_has_blkzoned(sbi))
- end_segno -= SEGS_PER_SEC(sbi) -
- f2fs_usable_segs_in_sec(sbi, segno);
+ /*
+ * zone-capacity can be less than zone-size in zoned devices,
+ * resulting in less than expected usable segments in the zone,
+ * calculate the end segno in the zone which can be garbage
+ * collected
+ */
+ if (f2fs_sb_has_blkzoned(sbi))
+ sec_end_segno -= SEGS_PER_SEC(sbi) -
+ f2fs_usable_segs_in_sec(sbi);
+
+ if (gc_type == BG_GC || one_time) {
+ unsigned int window_granularity =
+ sbi->migration_window_granularity;
+
+ if (f2fs_sb_has_blkzoned(sbi) &&
+ !has_enough_free_blocks(sbi,
+ sbi->gc_thread->boost_zoned_gc_percent))
+ window_granularity *=
+ BOOST_GC_MULTIPLE;
+
+ end_segno = start_segno + window_granularity;
+ }
+
+ if (end_segno > sec_end_segno)
+ end_segno = sec_end_segno;
+ }
sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
@@ -1754,7 +1812,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
if (type != GET_SUM_TYPE((&sum->footer))) {
f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT",
segno, type, GET_SUM_TYPE((&sum->footer)));
- set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_stop_checkpoint(sbi, false,
STOP_CP_REASON_CORRUPTED_SUMMARY);
goto skip;
@@ -1786,7 +1843,8 @@ freed:
if (__is_large_section(sbi))
sbi->next_victim_seg[gc_type] =
- (segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO;
+ (segno + 1 < sec_end_segno) ?
+ segno + 1 : NULL_SEGNO;
skip:
f2fs_put_page(sum_page, 0);
}
@@ -1841,6 +1899,7 @@ gc_more:
/* Let's run FG_GC, if we don't have enough space. */
if (has_not_enough_free_secs(sbi, 0, 0)) {
gc_type = FG_GC;
+ gc_control->one_time = false;
/*
* For example, if there are many prefree_segments below given
@@ -1863,7 +1922,7 @@ gc_more:
goto stop;
}
retry:
- ret = __get_victim(sbi, &segno, gc_type);
+ ret = __get_victim(sbi, &segno, gc_type, gc_control->one_time);
if (ret) {
/* allow to search victim from sections has pinned data */
if (ret == -ENODATA && gc_type == FG_GC &&
@@ -1875,14 +1934,21 @@ retry:
}
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type,
- gc_control->should_migrate_blocks);
+ gc_control->should_migrate_blocks,
+ gc_control->one_time);
+ if (seg_freed < 0)
+ goto stop;
+
total_freed += seg_freed;
- if (seg_freed == f2fs_usable_segs_in_sec(sbi, segno)) {
+ if (seg_freed == f2fs_usable_segs_in_sec(sbi)) {
sec_freed++;
total_sec_freed++;
}
+ if (gc_control->one_time)
+ goto stop;
+
if (gc_type == FG_GC) {
sbi->cur_victim_sec = NULL_SEGNO;
@@ -1998,14 +2064,19 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi,
unsigned int segno;
unsigned int gc_secs = dry_run_sections;
+ if (unlikely(f2fs_cp_error(sbi)))
+ return -EIO;
+
for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) {
struct gc_inode_list gc_list = {
.ilist = LIST_HEAD_INIT(gc_list.ilist),
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
- do_garbage_collect(sbi, segno, &gc_list, FG_GC,
- dry_run_sections == 0);
+ if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno)))
+ continue;
+
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false);
put_gc_inode(&gc_list);
if (!dry_run && get_valid_blocks(sbi, segno, true))
@@ -2046,8 +2117,11 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
/* Move out cursegs from the target range */
- for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
- f2fs_allocate_segment_for_resize(sbi, type, start, end);
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++) {
+ err = f2fs_allocate_segment_for_resize(sbi, type, start, end);
+ if (err)
+ goto out;
+ }
/* do GC to move out valid blocks in the range */
err = f2fs_gc_range(sbi, start, end, dry_run, 0);
@@ -2090,7 +2164,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
raw_sb->segment_count = cpu_to_le32(segment_count + segs);
raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs);
raw_sb->block_count = cpu_to_le64(block_count +
- (long long)(segs << sbi->log_blocks_per_seg));
+ (long long)SEGS_TO_BLKS(sbi, segs));
if (f2fs_is_multi_device(sbi)) {
int last_dev = sbi->s_ndevs - 1;
int dev_segs =
@@ -2106,7 +2180,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs)
static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs)
{
int segs = secs * SEGS_PER_SEC(sbi);
- long long blks = (long long)segs << sbi->log_blocks_per_seg;
+ long long blks = SEGS_TO_BLKS(sbi, segs);
long long user_block_count =
le64_to_cpu(F2FS_CKPT(sbi)->user_block_count);
@@ -2148,7 +2222,7 @@ int f2fs_resize_fs(struct file *filp, __u64 block_count)
int last_dev = sbi->s_ndevs - 1;
__u64 last_segs = FDEV(last_dev).total_segments;
- if (block_count + (last_segs << sbi->log_blocks_per_seg) <=
+ if (block_count + SEGS_TO_BLKS(sbi, last_segs) <=
old_block_count)
return -EINVAL;
}
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index 28a00942802c..5c1eaf55e127 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -15,17 +15,30 @@
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
+/* GC sleep parameters for zoned deivces */
+#define DEF_GC_THREAD_MIN_SLEEP_TIME_ZONED 10
+#define DEF_GC_THREAD_MAX_SLEEP_TIME_ZONED 20
+#define DEF_GC_THREAD_NOGC_SLEEP_TIME_ZONED 60000
+
/* choose candidates from sections which has age of more than 7 days */
#define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7)
#define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */
#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */
#define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */
+#define DEF_GC_THREAD_VALID_THRESH_RATIO 95 /* do not GC over 95% valid block ratio for one time GC */
#define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
+#define LIMIT_NO_ZONED_GC 60 /* percentage over total user space of no gc for zoned devices */
+#define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */
+#define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3
+#define BOOST_GC_MULTIPLE 5
+#define ZONED_PIN_SEC_REQUIRED_COUNT 1
+
#define DEF_GC_FAILED_PINNED_FILES 2048
+#define MAX_GC_FAILED_PINNED_FILES USHRT_MAX
/* Search max. number of dirty segments to select a victim segment */
#define DEF_MAX_VICTIM_SEARCH 4096 /* covers 8GB */
@@ -50,6 +63,11 @@ struct f2fs_gc_kthread {
* caller of f2fs_balance_fs()
* will wait on this wait queue.
*/
+
+ /* for gc control for zoned devices */
+ unsigned int no_zoned_gc_percent;
+ unsigned int boost_zoned_gc_percent;
+ unsigned int valid_thresh_ratio;
};
struct gc_inode_list {
@@ -96,7 +114,7 @@ static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
if (f2fs_sb_has_blkzoned(sbi))
return free_segs_blk_count_zoned(sbi);
- return free_segments(sbi) << sbi->log_blocks_per_seg;
+ return SEGS_TO_BLKS(sbi, free_segments(sbi));
}
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
@@ -104,7 +122,7 @@ static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
block_t free_blks, ovp_blks;
free_blks = free_segs_blk_count(sbi);
- ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+ ovp_blks = SEGS_TO_BLKS(sbi, overprovision_segments(sbi));
if (free_blks < ovp_blks)
return 0;
@@ -151,6 +169,12 @@ static inline void decrease_sleep_time(struct f2fs_gc_kthread *gc_th,
*wait -= min_time;
}
+static inline bool has_enough_free_blocks(struct f2fs_sb_info *sbi,
+ unsigned int limit_perc)
+{
+ return free_sections(sbi) > ((sbi->total_sections * limit_perc) / 100);
+}
+
static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
{
block_t user_block_count = sbi->user_block_count;
@@ -166,3 +190,10 @@ static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
free_user_blocks(sbi) <
limit_free_user_blocks(invalid_user_blocks));
}
+
+static inline bool need_to_boost_gc(struct f2fs_sb_info *sbi)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return !has_enough_free_blocks(sbi, LIMIT_BOOST_ZONED_GC);
+ return has_enough_invalid_blocks(sbi);
+}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index a3f8b4ed495e..3b91a95d4276 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -79,22 +79,22 @@ bool f2fs_may_inline_dentry(struct inode *inode)
return true;
}
-void f2fs_do_read_inline_data(struct page *page, struct page *ipage)
+void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio_file_mapping(folio)->host;
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
return;
- f2fs_bug_on(F2FS_P_SB(page), page->index);
+ f2fs_bug_on(F2FS_I_SB(inode), folio_index(folio));
- zero_user_segment(page, MAX_INLINE_DATA(inode), PAGE_SIZE);
+ folio_zero_segment(folio, MAX_INLINE_DATA(inode), folio_size(folio));
/* Copy the whole inline data block */
- memcpy_to_page(page, 0, inline_data_addr(inode, ipage),
+ memcpy_to_folio(folio, 0, inline_data_addr(inode, ipage),
MAX_INLINE_DATA(inode));
- if (!PageUptodate(page))
- SetPageUptodate(page);
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
}
void f2fs_truncate_inline_inode(struct inode *inode,
@@ -115,13 +115,13 @@ void f2fs_truncate_inline_inode(struct inode *inode,
clear_inode_flag(inode, FI_DATA_EXIST);
}
-int f2fs_read_inline_data(struct inode *inode, struct page *page)
+int f2fs_read_inline_data(struct inode *inode, struct folio *folio)
{
struct page *ipage;
ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
if (IS_ERR(ipage)) {
- unlock_page(page);
+ folio_unlock(folio);
return PTR_ERR(ipage);
}
@@ -130,15 +130,15 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
return -EAGAIN;
}
- if (page->index)
- zero_user_segment(page, 0, PAGE_SIZE);
+ if (folio_index(folio))
+ folio_zero_segment(folio, 0, folio_size(folio));
else
- f2fs_do_read_inline_data(page, ipage);
+ f2fs_do_read_inline_data(folio, ipage);
- if (!PageUptodate(page))
- SetPageUptodate(page);
+ if (!folio_test_uptodate(folio))
+ folio_mark_uptodate(folio);
f2fs_put_page(ipage, 1);
- unlock_page(page);
+ folio_unlock(folio);
return 0;
}
@@ -182,9 +182,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
return -EFSCORRUPTED;
}
- f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
+ f2fs_bug_on(F2FS_P_SB(page), folio_test_writeback(page_folio(page)));
- f2fs_do_read_inline_data(page, dn->inode_page);
+ f2fs_do_read_inline_data(page_folio(page), dn->inode_page);
set_page_dirty(page);
/* clear dirty state */
@@ -260,35 +260,34 @@ out:
return err;
}
-int f2fs_write_inline_data(struct inode *inode, struct page *page)
+int f2fs_write_inline_data(struct inode *inode, struct folio *folio)
{
- struct dnode_of_data dn;
- int err;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct page *ipage;
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
- if (err)
- return err;
+ ipage = f2fs_get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage))
+ return PTR_ERR(ipage);
if (!f2fs_has_inline_data(inode)) {
- f2fs_put_dnode(&dn);
+ f2fs_put_page(ipage, 1);
return -EAGAIN;
}
- f2fs_bug_on(F2FS_I_SB(inode), page->index);
+ f2fs_bug_on(F2FS_I_SB(inode), folio->index);
- f2fs_wait_on_page_writeback(dn.inode_page, NODE, true, true);
- memcpy_from_page(inline_data_addr(inode, dn.inode_page),
- page, 0, MAX_INLINE_DATA(inode));
- set_page_dirty(dn.inode_page);
+ f2fs_wait_on_page_writeback(ipage, NODE, true, true);
+ memcpy_from_folio(inline_data_addr(inode, ipage),
+ folio, 0, MAX_INLINE_DATA(inode));
+ set_page_dirty(ipage);
- f2fs_clear_page_cache_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(folio);
set_inode_flag(inode, FI_APPEND_WRITE);
set_inode_flag(inode, FI_DATA_EXIST);
- clear_page_private_inline(dn.inode_page);
- f2fs_put_dnode(&dn);
+ clear_page_private_inline(ipage);
+ f2fs_put_page(ipage, 1);
return 0;
}
@@ -353,7 +352,8 @@ process_inline:
struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
const struct f2fs_filename *fname,
- struct page **res_page)
+ struct page **res_page,
+ bool use_hash)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_dir_entry *de;
@@ -370,7 +370,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
inline_dentry = inline_data_addr(dir, ipage);
make_dentry_ptr_inline(dir, &d, inline_dentry);
- de = f2fs_find_target_dentry(&d, fname, NULL);
+ de = f2fs_find_target_dentry(&d, fname, NULL, use_hash);
unlock_page(ipage);
if (IS_ERR(de)) {
*res_page = ERR_CAST(de);
@@ -719,7 +719,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
set_page_dirty(page);
f2fs_put_page(page, 1);
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 7ad4a9241759..41ead6c772e4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -7,7 +7,6 @@
*/
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
-#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/sched/mm.h>
#include <linux/lz4.h>
@@ -35,10 +34,10 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
if (f2fs_inode_dirtied(inode, sync))
return;
- if (f2fs_is_atomic_file(inode)) {
- set_inode_flag(inode, FI_ATOMIC_DIRTIED);
+ /* only atomic file w/ FI_ATOMIC_COMMITTED can be set vfs dirty */
+ if (f2fs_is_atomic_file(inode) &&
+ !is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
return;
- }
mark_inode_dirty_sync(inode);
}
@@ -169,7 +168,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
if (!f2fs_enable_inode_chksum(sbi, page))
#else
if (!f2fs_enable_inode_chksum(sbi, page) ||
- PageDirty(page) || PageWriteback(page))
+ PageDirty(page) ||
+ folio_test_writeback(page_folio(page)))
#endif
return true;
@@ -179,7 +179,8 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page)
if (provided != calculated)
f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x",
- page->index, ino_of_node(page), provided, calculated);
+ page_folio(page)->index, ino_of_node(page),
+ provided, calculated);
return provided == calculated;
}
@@ -287,6 +288,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
+ if (ino_of_node(node_page) == fi->i_xattr_nid) {
+ f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.",
+ __func__, inode->i_ino, fi->i_xattr_nid);
+ return false;
+ }
+
if (f2fs_has_extra_attr(inode)) {
if (!f2fs_sb_has_extra_attr(sbi)) {
f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off",
@@ -305,7 +312,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
f2fs_has_inline_xattr(inode) &&
(!fi->i_inline_xattr_size ||
fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
- f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %zu",
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %lu",
__func__, inode->i_ino, fi->i_inline_xattr_size,
MAX_INLINE_XATTR_SIZE);
return false;
@@ -317,10 +324,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
if (!sanity_check_compress_inode(inode, ri))
return false;
}
- } else if (f2fs_sb_has_flexible_inline_xattr(sbi)) {
- f2fs_warn(sbi, "%s: corrupted inode ino=%lx, run fsck to fix.",
- __func__, inode->i_ino);
- return false;
}
if (!f2fs_sb_has_extra_attr(sbi)) {
@@ -382,9 +385,9 @@ static void init_idisk_time(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
- fi->i_disk_time[0] = inode->i_atime;
+ fi->i_disk_time[0] = inode_get_atime(inode);
fi->i_disk_time[1] = inode_get_ctime(inode);
- fi->i_disk_time[2] = inode->i_mtime;
+ fi->i_disk_time[2] = inode_get_mtime(inode);
}
static int do_read_inode(struct inode *inode)
@@ -412,18 +415,17 @@ static int do_read_inode(struct inode *inode)
inode->i_size = le64_to_cpu(ri->i_size);
inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
- inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
+ inode_set_atime(inode, le64_to_cpu(ri->i_atime),
+ le32_to_cpu(ri->i_atime_nsec));
inode_set_ctime(inode, le64_to_cpu(ri->i_ctime),
le32_to_cpu(ri->i_ctime_nsec));
- inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
- inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
- inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
+ inode_set_mtime(inode, le64_to_cpu(ri->i_mtime),
+ le32_to_cpu(ri->i_mtime_nsec));
inode->i_generation = le32_to_cpu(ri->i_generation);
if (S_ISDIR(inode->i_mode))
fi->i_current_depth = le32_to_cpu(ri->i_current_depth);
else if (S_ISREG(inode->i_mode))
- fi->i_gc_failures[GC_FAILURE_PIN] =
- le16_to_cpu(ri->i_gc_failures);
+ fi->i_gc_failures = le16_to_cpu(ri->i_gc_failures);
fi->i_xattr_nid = le32_to_cpu(ri->i_xattr_nid);
fi->i_flags = le32_to_cpu(ri->i_flags);
if (S_ISREG(inode->i_mode))
@@ -585,7 +587,7 @@ make_now:
#ifdef CONFIG_F2FS_FS_COMPRESSION
inode->i_mapping->a_ops = &f2fs_compress_aops;
/*
- * generic_error_remove_page only truncates pages of regular
+ * generic_error_remove_folio only truncates pages of regular
* inode
*/
inode->i_mode |= S_IFREG;
@@ -645,8 +647,9 @@ retry:
void f2fs_update_inode(struct inode *inode, struct page *node_page)
{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_inode *ri;
- struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
+ struct extent_tree *et = fi->extent_tree[EX_READ];
f2fs_wait_on_page_writeback(node_page, NODE, true, true);
set_page_dirty(node_page);
@@ -656,7 +659,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
ri = F2FS_INODE(node_page);
ri->i_mode = cpu_to_le16(inode->i_mode);
- ri->i_advise = F2FS_I(inode)->i_advise;
+ ri->i_advise = fi->i_advise;
ri->i_uid = cpu_to_le32(i_uid_read(inode));
ri->i_gid = cpu_to_le32(i_gid_read(inode));
ri->i_links = cpu_to_le32(inode->i_nlink);
@@ -675,66 +678,56 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
}
set_raw_inline(inode, ri);
- ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
- ri->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
- ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
- ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
- ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ ri->i_atime = cpu_to_le64(inode_get_atime_sec(inode));
+ ri->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ ri->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
+ ri->i_atime_nsec = cpu_to_le32(inode_get_atime_nsec(inode));
+ ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ ri->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
if (S_ISDIR(inode->i_mode))
- ri->i_current_depth =
- cpu_to_le32(F2FS_I(inode)->i_current_depth);
+ ri->i_current_depth = cpu_to_le32(fi->i_current_depth);
else if (S_ISREG(inode->i_mode))
- ri->i_gc_failures =
- cpu_to_le16(F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]);
- ri->i_xattr_nid = cpu_to_le32(F2FS_I(inode)->i_xattr_nid);
- ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags);
- ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino);
+ ri->i_gc_failures = cpu_to_le16(fi->i_gc_failures);
+ ri->i_xattr_nid = cpu_to_le32(fi->i_xattr_nid);
+ ri->i_flags = cpu_to_le32(fi->i_flags);
+ ri->i_pino = cpu_to_le32(fi->i_pino);
ri->i_generation = cpu_to_le32(inode->i_generation);
- ri->i_dir_level = F2FS_I(inode)->i_dir_level;
+ ri->i_dir_level = fi->i_dir_level;
if (f2fs_has_extra_attr(inode)) {
- ri->i_extra_isize = cpu_to_le16(F2FS_I(inode)->i_extra_isize);
+ ri->i_extra_isize = cpu_to_le16(fi->i_extra_isize);
if (f2fs_sb_has_flexible_inline_xattr(F2FS_I_SB(inode)))
ri->i_inline_xattr_size =
- cpu_to_le16(F2FS_I(inode)->i_inline_xattr_size);
+ cpu_to_le16(fi->i_inline_xattr_size);
if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)) &&
- F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
- i_projid)) {
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) {
projid_t i_projid;
- i_projid = from_kprojid(&init_user_ns,
- F2FS_I(inode)->i_projid);
+ i_projid = from_kprojid(&init_user_ns, fi->i_projid);
ri->i_projid = cpu_to_le32(i_projid);
}
if (f2fs_sb_has_inode_crtime(F2FS_I_SB(inode)) &&
- F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
- i_crtime)) {
- ri->i_crtime =
- cpu_to_le64(F2FS_I(inode)->i_crtime.tv_sec);
- ri->i_crtime_nsec =
- cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec);
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_crtime)) {
+ ri->i_crtime = cpu_to_le64(fi->i_crtime.tv_sec);
+ ri->i_crtime_nsec = cpu_to_le32(fi->i_crtime.tv_nsec);
}
if (f2fs_sb_has_compression(F2FS_I_SB(inode)) &&
- F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
+ F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_compress_flag)) {
unsigned short compress_flag;
- ri->i_compr_blocks =
- cpu_to_le64(atomic_read(
- &F2FS_I(inode)->i_compr_blocks));
- ri->i_compress_algorithm =
- F2FS_I(inode)->i_compress_algorithm;
- compress_flag = F2FS_I(inode)->i_compress_flag |
- F2FS_I(inode)->i_compress_level <<
+ ri->i_compr_blocks = cpu_to_le64(
+ atomic_read(&fi->i_compr_blocks));
+ ri->i_compress_algorithm = fi->i_compress_algorithm;
+ compress_flag = fi->i_compress_flag |
+ fi->i_compress_level <<
COMPRESS_LEVEL_OFFSET;
ri->i_compress_flag = cpu_to_le16(compress_flag);
- ri->i_log_cluster_size =
- F2FS_I(inode)->i_log_cluster_size;
+ ri->i_log_cluster_size = fi->i_log_cluster_size;
}
}
@@ -764,8 +757,12 @@ retry:
if (err == -ENOENT)
return;
+ if (err == -EFSCORRUPTED)
+ goto stop_checkpoint;
+
if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT)
goto retry;
+stop_checkpoint:
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE);
return;
}
@@ -788,6 +785,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
!is_inode_flag_set(inode, FI_DIRTY_INODE))
return 0;
+ /*
+ * no need to update inode page, ultimately f2fs_evict_inode() will
+ * clear dirty status of inode.
+ */
+ if (f2fs_cp_error(sbi))
+ return -EIO;
+
if (!f2fs_is_checkpoint_ready(sbi)) {
f2fs_mark_inode_dirty_sync(inode, true);
return -ENOSPC;
@@ -812,6 +816,7 @@ void f2fs_evict_inode(struct inode *inode)
struct f2fs_inode_info *fi = F2FS_I(inode);
nid_t xnid = fi->i_xattr_nid;
int err = 0;
+ bool freeze_protected = false;
f2fs_abort_atomic_write(inode, true);
@@ -852,8 +857,10 @@ void f2fs_evict_inode(struct inode *inode)
f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO);
- if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) {
sb_start_intwrite(inode->i_sb);
+ freeze_protected = true;
+ }
set_inode_flag(inode, FI_NO_ALLOC);
i_size_write(inode, 0);
retry:
@@ -895,8 +902,21 @@ retry:
f2fs_update_inode_page(inode);
if (dquot_initialize_needed(inode))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+
+ /*
+ * If both f2fs_truncate() and f2fs_update_inode_page() failed
+ * due to fuzzed corrupted inode, call f2fs_inode_synced() to
+ * avoid triggering later f2fs_bug_on().
+ */
+ if (is_inode_flag_set(inode, FI_DIRTY_INODE)) {
+ f2fs_warn(sbi,
+ "f2fs_evict_inode: inode is dirty, ino:%lu",
+ inode->i_ino);
+ f2fs_inode_synced(inode);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
}
- if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING))
+ if (freeze_protected)
sb_end_intwrite(inode->i_sb);
no_delete:
dquot_drop(inode);
@@ -911,8 +931,12 @@ no_delete:
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
- else
- f2fs_inode_synced(inode);
+
+ /*
+ * anyway, it needs to remove the inode from sbi->inode_list[DIRTY_META]
+ * list to avoid UAF in f2fs_sync_inode_meta() during checkpoint.
+ */
+ f2fs_inode_synced(inode);
/* for the case f2fs_new_inode() was failed, .i_ino is zero, skip it */
if (inode->i_ino)
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 2e08e1fdf485..781b872fac8c 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -221,6 +221,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
const char *name)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
+ struct f2fs_inode_info *fi;
nid_t ino;
struct inode *inode;
bool nid_free = false;
@@ -241,14 +242,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
inode_init_owner(idmap, inode, dir, mode);
+ fi = F2FS_I(inode);
inode->i_ino = ino;
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
- F2FS_I(inode)->i_crtime = inode->i_mtime;
+ simple_inode_init_ts(inode);
+ fi->i_crtime = inode_get_mtime(inode);
inode->i_generation = get_random_u32();
if (S_ISDIR(inode->i_mode))
- F2FS_I(inode)->i_current_depth = 1;
+ fi->i_current_depth = 1;
err = insert_inode_locked(inode);
if (err) {
@@ -258,9 +260,9 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
if (f2fs_sb_has_project_quota(sbi) &&
(F2FS_I(dir)->i_flags & F2FS_PROJINHERIT_FL))
- F2FS_I(inode)->i_projid = F2FS_I(dir)->i_projid;
+ fi->i_projid = F2FS_I(dir)->i_projid;
else
- F2FS_I(inode)->i_projid = make_kprojid(&init_user_ns,
+ fi->i_projid = make_kprojid(&init_user_ns,
F2FS_DEF_PROJID);
err = fscrypt_prepare_new_inode(dir, inode, &encrypt);
@@ -278,7 +280,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
if (f2fs_sb_has_extra_attr(sbi)) {
set_inode_flag(inode, FI_EXTRA_ATTR);
- F2FS_I(inode)->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
+ fi->i_extra_isize = F2FS_TOTAL_EXTRA_ATTR_SIZE;
}
if (test_opt(sbi, INLINE_XATTR))
@@ -296,15 +298,15 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
f2fs_has_inline_dentry(inode)) {
xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
}
- F2FS_I(inode)->i_inline_xattr_size = xattr_size;
+ fi->i_inline_xattr_size = xattr_size;
- F2FS_I(inode)->i_flags =
+ fi->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
if (S_ISDIR(inode->i_mode))
- F2FS_I(inode)->i_flags |= F2FS_INDEX_FL;
+ fi->i_flags |= F2FS_INDEX_FL;
- if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
+ if (fi->i_flags & F2FS_PROJINHERIT_FL)
set_inode_flag(inode, FI_PROJ_INHERIT);
/* Check compression first. */
@@ -411,7 +413,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
if (is_inode_flag_set(dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)))
+ F2FS_I(inode)->i_projid)))
return -EXDEV;
err = f2fs_dquot_initialize(dir);
@@ -474,7 +476,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
}
err = f2fs_prepare_lookup(dir, dentry, &fname);
- generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
goto out_splice;
if (err)
@@ -509,8 +510,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
goto out_iput;
}
out_splice:
-#if IS_ENABLED(CONFIG_UNICODE)
- if (!inode && IS_CASEFOLDED(dir)) {
+ if (IS_ENABLED(CONFIG_UNICODE) && !inode && IS_CASEFOLDED(dir)) {
/* Eventually we want to call d_add_ci(dentry, NULL)
* for negative dentries in the encoding case as
* well. For now, prevent the negative dentry
@@ -519,7 +519,7 @@ out_splice:
trace_f2fs_lookup_end(dir, dentry, ino, err);
return NULL;
}
-#endif
+
new = d_splice_alias(inode, dentry);
trace_f2fs_lookup_end(dir, !IS_ERR_OR_NULL(new) ? new : dentry,
ino, IS_ERR(new) ? PTR_ERR(new) : err);
@@ -560,6 +560,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
goto fail;
}
+ if (unlikely(inode->i_nlink == 0)) {
+ f2fs_warn(F2FS_I_SB(inode), "%s: inode (ino=%lx) has zero i_nlink",
+ __func__, inode->i_ino);
+ err = -EFSCORRUPTED;
+ set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
+ f2fs_put_page(page, 0);
+ goto fail;
+ }
+
f2fs_balance_fs(sbi, true);
f2fs_lock_op(sbi);
@@ -572,16 +581,15 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
f2fs_delete_entry(de, page, dir, inode);
f2fs_unlock_op(sbi);
-#if IS_ENABLED(CONFIG_UNICODE)
/* VFS negative dentries are incompatible with Encoding and
* Case-insensitiveness. Eventually we'll want avoid
* invalidating the dentries here, alongside with returning the
* negative dentries at f2fs_lookup(), when it is better
* supported by the VFS for the CI case.
*/
- if (IS_CASEFOLDED(dir))
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
d_invalidate(dentry);
-#endif
+
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
fail:
@@ -896,6 +904,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
+ bool old_is_dir = S_ISDIR(old_inode->i_mode);
int err;
if (unlikely(f2fs_cp_error(sbi)))
@@ -905,7 +914,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
(!projid_eq(F2FS_I(new_dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)))
+ F2FS_I(old_inode)->i_projid)))
return -EXDEV;
/*
@@ -957,7 +966,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
goto out;
}
- if (S_ISDIR(old_inode->i_mode)) {
+ if (old_is_dir && old_dir != new_dir) {
old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page);
if (!old_dir_entry) {
if (IS_ERR(old_dir_page))
@@ -969,7 +978,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new_inode) {
err = -ENOTEMPTY;
- if (old_dir_entry && !f2fs_empty_dir(new_inode))
+ if (old_is_dir && !f2fs_empty_dir(new_inode))
goto out_dir;
err = -ENOENT;
@@ -994,7 +1003,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
inode_set_ctime_current(new_inode);
f2fs_down_write(&F2FS_I(new_inode)->i_sem);
- if (old_dir_entry)
+ if (old_is_dir)
f2fs_i_links_write(new_inode, false);
f2fs_i_links_write(new_inode, false);
f2fs_up_write(&F2FS_I(new_inode)->i_sem);
@@ -1014,12 +1023,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
goto out_dir;
}
- if (old_dir_entry)
+ if (old_is_dir)
f2fs_i_links_write(new_dir, true);
}
f2fs_down_write(&F2FS_I(old_inode)->i_sem);
- if (!old_dir_entry || whiteout)
+ if (!old_is_dir || whiteout)
file_lost_pino(old_inode);
else
/* adjust dir's i_pino to pass fsck check */
@@ -1045,14 +1054,11 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
iput(whiteout);
}
- if (old_dir_entry) {
- if (old_dir != new_dir)
- f2fs_set_link(old_inode, old_dir_entry,
- old_dir_page, new_dir);
- else
- f2fs_put_page(old_dir_page, 0);
+ if (old_dir_entry)
+ f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir);
+ if (old_is_dir)
f2fs_i_links_write(old_dir, false);
- }
+
if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT) {
f2fs_add_ino_entry(sbi, new_dir->i_ino, TRANS_DIR_INO);
if (S_ISDIR(old_inode->i_mode))
@@ -1101,10 +1107,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(new_dir)->i_projid,
- F2FS_I(old_dentry->d_inode)->i_projid)) ||
- (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
+ F2FS_I(old_inode)->i_projid)) ||
+ (is_inode_flag_set(old_dir, FI_PROJ_INHERIT) &&
!projid_eq(F2FS_I(old_dir)->i_projid,
- F2FS_I(new_dentry->d_inode)->i_projid)))
+ F2FS_I(new_inode)->i_projid)))
return -EXDEV;
err = f2fs_dquot_initialize(old_dir);
@@ -1256,21 +1262,27 @@ static int f2fs_rename2(struct mnt_idmap *idmap,
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
+ trace_f2fs_rename_start(old_dir, old_dentry, new_dir, new_dentry,
+ flags);
+
err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
flags);
if (err)
return err;
- if (flags & RENAME_EXCHANGE) {
- return f2fs_cross_rename(old_dir, old_dentry,
- new_dir, new_dentry);
- }
+ if (flags & RENAME_EXCHANGE)
+ err = f2fs_cross_rename(old_dir, old_dentry,
+ new_dir, new_dentry);
+ else
/*
* VFS has already handled the new dentry existence case,
* here, we just deal with "RENAME_NOREPLACE" as regular rename.
*/
- return f2fs_rename(idmap, old_dir, old_dentry,
+ err = f2fs_rename(idmap, old_dir, old_dentry,
new_dir, new_dentry, flags);
+
+ trace_f2fs_rename_end(old_dentry, new_dentry, flags, err);
+ return err;
}
static const char *f2fs_encrypted_get_link(struct dentry *dentry,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index dedba481b66d..12c76e3d1cd4 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -20,7 +20,7 @@
#include "iostat.h"
#include <trace/events/f2fs.h>
-#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
+#define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock)
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
@@ -123,7 +123,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
static void clear_node_page_dirty(struct page *page)
{
if (PageDirty(page)) {
- f2fs_clear_page_cache_dirty_tag(page);
+ f2fs_clear_page_cache_dirty_tag(page_folio(page));
clear_page_dirty_for_io(page);
dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
}
@@ -633,7 +633,7 @@ static void f2fs_ra_node_pages(struct page *parent, int start, int n)
/* Then, try readahead for siblings of the desired node */
end = start + n;
- end = min(end, NIDS_PER_BLOCK);
+ end = min(end, (int)NIDS_PER_BLOCK);
for (i = start; i < end; i++) {
nid = get_nid(parent, i, false);
f2fs_ra_node_page(sbi, nid);
@@ -929,7 +929,7 @@ static int truncate_node(struct dnode_of_data *dn)
clear_node_page_dirty(dn->node_page);
set_sbi_flag(sbi, SBI_IS_DIRTY);
- index = dn->node_page->index;
+ index = page_folio(dn->node_page)->index;
f2fs_put_page(dn->node_page, 1);
invalidate_mapping_pages(NODE_MAPPING(sbi),
@@ -1134,7 +1134,14 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
trace_f2fs_truncate_inode_blocks_enter(inode, from);
level = get_node_path(inode, from, offset, noffset);
- if (level < 0) {
+ if (level <= 0) {
+ if (!level) {
+ level = -EFSCORRUPTED;
+ f2fs_err(sbi, "%s: inode ino=%lx has corrupted node block, from:%lu addrs:%u",
+ __func__, inode->i_ino,
+ from, ADDRS_PER_INODE(inode));
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ }
trace_f2fs_truncate_inode_blocks_exit(inode, level);
return level;
}
@@ -1197,7 +1204,17 @@ skip_partial:
default:
BUG();
}
- if (err < 0 && err != -ENOENT)
+ if (err == -ENOENT) {
+ set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
+ f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
+ f2fs_err_ratelimited(sbi,
+ "truncate node fail, ino:%lu, nid:%u, "
+ "offset[0]:%d, offset[1]:%d, nofs:%d",
+ inode->i_ino, dn.nid, offset[0],
+ offset[1], nofs);
+ err = 0;
+ }
+ if (err < 0)
goto fail;
if (offset[1] == 0 &&
ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
@@ -1374,6 +1391,7 @@ fail:
*/
static int read_node_page(struct page *page, blk_opf_t op_flags)
{
+ struct folio *folio = page_folio(page);
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
struct node_info ni;
struct f2fs_io_info fio = {
@@ -1386,21 +1404,21 @@ static int read_node_page(struct page *page, blk_opf_t op_flags)
};
int err;
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
if (!f2fs_inode_chksum_verify(sbi, page)) {
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
return -EFSBADCRC;
}
return LOCKED_PAGE;
}
- err = f2fs_get_node_info(sbi, page->index, &ni, false);
+ err = f2fs_get_node_info(sbi, folio->index, &ni, false);
if (err)
return err;
/* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) {
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
return -ENOENT;
}
@@ -1497,7 +1515,7 @@ out_err:
out_put_err:
/* ENOENT comes from read_node_page which is not an error. */
if (err != -ENOENT)
- f2fs_handle_page_eio(sbi, page->index, NODE);
+ f2fs_handle_page_eio(sbi, page_folio(page), NODE);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
@@ -1540,7 +1558,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
if (!clear_page_dirty_for_io(page))
goto page_out;
- ret = f2fs_write_inline_data(inode, page);
+ ret = f2fs_write_inline_data(inode, page_folio(page));
inode_dec_dirty_pages(inode);
f2fs_remove_dirty_inode(inode);
if (ret)
@@ -1613,6 +1631,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
enum iostat_type io_type, unsigned int *seq_id)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
+ struct folio *folio = page_folio(page);
nid_t nid;
struct node_info ni;
struct f2fs_io_info fio = {
@@ -1629,15 +1648,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
};
unsigned int seq;
- trace_f2fs_writepage(page, NODE);
+ trace_f2fs_writepage(folio, NODE);
if (unlikely(f2fs_cp_error(sbi))) {
/* keep node pages in remount-ro mode */
if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
goto redirty_out;
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
dec_page_count(sbi, F2FS_DIRTY_NODES);
- unlock_page(page);
+ folio_unlock(folio);
return 0;
}
@@ -1651,7 +1670,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
/* get old block addr of this node page */
nid = nid_of_node(page);
- f2fs_bug_on(sbi, page->index != nid);
+ f2fs_bug_on(sbi, folio->index != nid);
if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
goto redirty_out;
@@ -1665,10 +1684,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
/* This page is already truncated */
if (unlikely(ni.blk_addr == NULL_ADDR)) {
- ClearPageUptodate(page);
+ folio_clear_uptodate(folio);
dec_page_count(sbi, F2FS_DIRTY_NODES);
f2fs_up_read(&sbi->node_write);
- unlock_page(page);
+ folio_unlock(folio);
return 0;
}
@@ -1679,7 +1698,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
goto redirty_out;
}
- if (atomic && !test_opt(sbi, NOBARRIER) && !f2fs_sb_has_blkzoned(sbi))
+ if (atomic && !test_opt(sbi, NOBARRIER))
fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
/* should add to global list before clearing PAGECACHE status */
@@ -1689,7 +1708,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
*seq_id = seq;
}
- set_page_writeback(page);
+ folio_start_writeback(folio);
fio.old_blkaddr = ni.blk_addr;
f2fs_do_write_node_page(nid, &fio);
@@ -1702,7 +1721,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
submitted = NULL;
}
- unlock_page(page);
+ folio_unlock(folio);
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_submit_merged_write(sbi, NODE);
@@ -1716,7 +1735,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted,
return 0;
redirty_out:
- redirty_page_for_writepage(wbc, page);
+ folio_redirty_for_writepage(wbc, folio);
return AOP_WRITEPAGE_ACTIVATE;
}
@@ -1748,7 +1767,7 @@ int f2fs_move_node_page(struct page *node_page, int gc_type)
goto release_page;
} else {
/* set page dirty and write it */
- if (!PageWriteback(node_page))
+ if (!folio_test_writeback(page_folio(node_page)))
set_page_dirty(node_page);
}
out_page:
@@ -1872,7 +1891,7 @@ continue_unlock:
}
if (!ret && atomic && !marked) {
f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx",
- ino, last_page->index);
+ ino, page_folio(last_page)->index);
lock_page(last_page);
f2fs_wait_on_page_writeback(last_page, NODE, true, true);
set_page_dirty(last_page);
@@ -1942,7 +1961,7 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
for (i = 0; i < nr_folios; i++) {
struct page *page = &fbatch.folios[i]->page;
- if (!IS_DNODE(page))
+ if (!IS_INODE(page))
continue;
lock_page(page);
@@ -2176,7 +2195,7 @@ skip_write:
static bool f2fs_dirty_node_folio(struct address_space *mapping,
struct folio *folio)
{
- trace_f2fs_set_page_dirty(&folio->page, NODE);
+ trace_f2fs_set_page_dirty(folio, NODE);
if (!folio_test_uptodate(folio))
folio_mark_uptodate(folio);
@@ -2258,24 +2277,6 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
}
}
-bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
-{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- unsigned int i;
- bool ret = true;
-
- f2fs_down_read(&nm_i->nat_tree_lock);
- for (i = 0; i < nm_i->nat_blocks; i++) {
- if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
- ret = false;
- break;
- }
- }
- f2fs_up_read(&nm_i->nat_tree_lock);
-
- return ret;
-}
-
static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
bool set, bool build)
{
@@ -2954,23 +2955,7 @@ add_out:
list_add_tail(&nes->set_list, head);
}
-static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs,
- unsigned int valid)
-{
- if (valid == 0) {
- __set_bit_le(nat_ofs, nm_i->empty_nat_bits);
- __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
- return;
- }
-
- __clear_bit_le(nat_ofs, nm_i->empty_nat_bits);
- if (valid == NAT_ENTRY_PER_BLOCK)
- __set_bit_le(nat_ofs, nm_i->full_nat_bits);
- else
- __clear_bit_le(nat_ofs, nm_i->full_nat_bits);
-}
-
-static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
+static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
struct page *page)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -2979,7 +2964,7 @@ static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
int valid = 0;
int i = 0;
- if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ if (!enabled_nat_bits(sbi, NULL))
return;
if (nat_index == 0) {
@@ -2990,36 +2975,17 @@ static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
valid++;
}
-
- __update_nat_bits(nm_i, nat_index, valid);
-}
-
-void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
-{
- struct f2fs_nm_info *nm_i = NM_I(sbi);
- unsigned int nat_ofs;
-
- f2fs_down_read(&nm_i->nat_tree_lock);
-
- for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
- unsigned int valid = 0, nid_ofs = 0;
-
- /* handle nid zero due to it should never be used */
- if (unlikely(nat_ofs == 0)) {
- valid = 1;
- nid_ofs = 1;
- }
-
- for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) {
- if (!test_bit_le(nid_ofs,
- nm_i->free_nid_bitmap[nat_ofs]))
- valid++;
- }
-
- __update_nat_bits(nm_i, nat_ofs, valid);
+ if (valid == 0) {
+ __set_bit_le(nat_index, nm_i->empty_nat_bits);
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
+ return;
}
- f2fs_up_read(&nm_i->nat_tree_lock);
+ __clear_bit_le(nat_index, nm_i->empty_nat_bits);
+ if (valid == NAT_ENTRY_PER_BLOCK)
+ __set_bit_le(nat_index, nm_i->full_nat_bits);
+ else
+ __clear_bit_le(nat_index, nm_i->full_nat_bits);
}
static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
@@ -3038,7 +3004,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if ((cpc->reason & CP_UMOUNT) ||
+ if (enabled_nat_bits(sbi, cpc) ||
!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
@@ -3085,7 +3051,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
if (to_journal) {
up_write(&curseg->journal_rwsem);
} else {
- update_nat_bits(sbi, start_nid, page);
+ __update_nat_bits(sbi, start_nid, page);
f2fs_put_page(page, 1);
}
@@ -3116,7 +3082,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* during unmount, let's flush nat_bits before checking
* nat_cnt[DIRTY_NAT].
*/
- if (cpc->reason & CP_UMOUNT) {
+ if (enabled_nat_bits(sbi, cpc)) {
f2fs_down_write(&nm_i->nat_tree_lock);
remove_nats_in_journal(sbi);
f2fs_up_write(&nm_i->nat_tree_lock);
@@ -3132,7 +3098,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (cpc->reason & CP_UMOUNT ||
+ if (enabled_nat_bits(sbi, cpc) ||
!__has_cursum_space(journal,
nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
remove_nats_in_journal(sbi);
@@ -3169,18 +3135,15 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
__u64 cp_ver = cur_cp_version(ckpt);
block_t nat_bits_addr;
+ if (!enabled_nat_bits(sbi, NULL))
+ return 0;
+
nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
nm_i->nat_bits = f2fs_kvzalloc(sbi,
- nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
+ F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL);
if (!nm_i->nat_bits)
return -ENOMEM;
- nm_i->full_nat_bits = nm_i->nat_bits + 8;
- nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
-
- if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
- return 0;
-
nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) -
nm_i->nat_bits_blocks;
for (i = 0; i < nm_i->nat_bits_blocks; i++) {
@@ -3190,19 +3153,20 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
if (IS_ERR(page))
return PTR_ERR(page);
- memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
+ memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i),
page_address(page), F2FS_BLKSIZE);
f2fs_put_page(page, 1);
}
cp_ver |= (cur_cp_crc(ckpt) << 32);
if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
- clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
- f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)",
- cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits));
+ disable_nat_bits(sbi, true);
return 0;
}
+ nm_i->full_nat_bits = nm_i->nat_bits + 8;
+ nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
+
f2fs_notice(sbi, "Found nat_bits in checkpoint");
return 0;
}
@@ -3213,7 +3177,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
unsigned int i = 0;
nid_t nid, last_nid;
- if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
+ if (!enabled_nat_bits(sbi, NULL))
return;
for (i = 0; i < nm_i->nat_blocks; i++) {
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index f8852aa52640..e4d81b8705d1 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -5,7 +5,7 @@
* Copyright (c) 2012 Samsung Electronics Co., Ltd.
* http://www.samsung.com/
*/
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/sched/mm.h>
@@ -46,10 +46,6 @@
static struct kmem_cache *fsync_entry_slab;
-#if IS_ENABLED(CONFIG_UNICODE)
-extern struct kmem_cache *f2fs_cf_name_slab;
-#endif
-
bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
{
s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);
@@ -153,11 +149,8 @@ static int init_recovered_filename(const struct inode *dir,
if (err)
return err;
f2fs_hash_filename(dir, fname);
-#if IS_ENABLED(CONFIG_UNICODE)
/* Case-sensitive match is fine for recovery */
- kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name);
- fname->cf_name.name = NULL;
-#endif
+ f2fs_free_casefolded_name(fname);
} else {
f2fs_hash_filename(dir, fname);
}
@@ -287,6 +280,7 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
static int recover_inode(struct inode *inode, struct page *page)
{
struct f2fs_inode *raw = F2FS_INODE(page);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
char *name;
int err;
@@ -309,29 +303,28 @@ static int recover_inode(struct inode *inode, struct page *page)
i_projid = (projid_t)le32_to_cpu(raw->i_projid);
kprojid = make_kprojid(&init_user_ns, i_projid);
- if (!projid_eq(kprojid, F2FS_I(inode)->i_projid)) {
+ if (!projid_eq(kprojid, fi->i_projid)) {
err = f2fs_transfer_project_quota(inode,
kprojid);
if (err)
return err;
- F2FS_I(inode)->i_projid = kprojid;
+ fi->i_projid = kprojid;
}
}
}
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
- inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
+ inode_set_atime(inode, le64_to_cpu(raw->i_atime),
+ le32_to_cpu(raw->i_atime_nsec));
inode_set_ctime(inode, le64_to_cpu(raw->i_ctime),
le32_to_cpu(raw->i_ctime_nsec));
- inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
- inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
- inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
+ inode_set_mtime(inode, le64_to_cpu(raw->i_mtime),
+ le32_to_cpu(raw->i_mtime_nsec));
- F2FS_I(inode)->i_advise = raw->i_advise;
- F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
+ fi->i_advise = raw->i_advise;
+ fi->i_flags = le32_to_cpu(raw->i_flags);
f2fs_set_inode_flags(inode);
- F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
- le16_to_cpu(raw->i_gc_failures);
+ fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures);
recover_inline_flags(inode, raw);
@@ -693,14 +686,12 @@ retry_dn:
if (__is_valid_data_blkaddr(src) &&
!f2fs_is_valid_blkaddr(sbi, src, META_POR)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
if (__is_valid_data_blkaddr(dest) &&
!f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
goto err;
}
@@ -755,8 +746,6 @@ retry_prev:
f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
dest, inode->i_ino, dn.ofs_in_node);
err = -EFSCORRUPTED;
- f2fs_handle_error(sbi,
- ERROR_INVALID_BLKADDR);
goto err;
}
@@ -851,7 +840,7 @@ next:
f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks);
}
if (!err)
- f2fs_allocate_new_segments(sbi);
+ err = f2fs_allocate_new_segments(sbi);
return err;
}
@@ -863,7 +852,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
int ret = 0;
unsigned long s_flags = sbi->sb->s_flags;
bool need_writecp = false;
- bool fix_curseg_write_pointer = false;
if (is_sbi_flag_set(sbi, SBI_IS_WRITABLE))
f2fs_info(sbi, "recover fsync data on readonly fs");
@@ -894,8 +882,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
else
f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
skip:
- fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
-
destroy_fsync_dnodes(&inode_list, err);
destroy_fsync_dnodes(&tmp_inode_list, err);
@@ -913,11 +899,13 @@ skip:
* and the f2fs is not read only, check and fix zoned block devices'
* write pointer consistency.
*/
- if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
- f2fs_sb_has_blkzoned(sbi)) {
- err = f2fs_fix_curseg_write_pointer(sbi);
- if (!err)
- err = f2fs_check_write_pointer(sbi);
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sbi->sb)) {
+ int err2 = f2fs_fix_curseg_write_pointer(sbi);
+
+ if (!err2)
+ err2 = f2fs_check_write_pointer(sbi);
+ if (err2)
+ err = err2;
ret = err;
}
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 156d92b94525..e48b5e2efea2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -340,8 +340,6 @@ static int __f2fs_commit_atomic_write(struct inode *inode)
DATA_GENERIC_ENHANCE)) {
f2fs_put_dnode(&dn);
ret = -EFSCORRUPTED;
- f2fs_handle_error(sbi,
- ERROR_INVALID_BLKADDR);
goto out;
}
@@ -372,7 +370,13 @@ out:
} else {
sbi->committed_atomic_block += fi->atomic_write_cnt;
set_inode_flag(inode, FI_ATOMIC_COMMITTED);
+
+ /*
+ * inode may has no FI_ATOMIC_DIRTIED flag due to no write
+ * before commit.
+ */
if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
+ /* clear atomic dirty status and set vfs dirty status */
clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
f2fs_mark_inode_dirty_sync(inode, true);
}
@@ -410,6 +414,9 @@ int f2fs_commit_atomic_write(struct inode *inode)
*/
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
{
+ if (f2fs_cp_error(sbi))
+ return;
+
if (time_to_inject(sbi, FAULT_CHECKPOINT))
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
@@ -458,8 +465,8 @@ static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
- unsigned int threshold = (factor * DEFAULT_DIRTY_THRESHOLD) <<
- sbi->log_blocks_per_seg;
+ unsigned int threshold =
+ SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
unsigned int global_threshold = threshold * 3 / 2;
if (dents >= threshold || qdata >= threshold ||
@@ -778,8 +785,10 @@ static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
block_t valid_blocks =
get_valid_blocks(sbi, segno, true);
- f2fs_bug_on(sbi, unlikely(!valid_blocks ||
- valid_blocks == CAP_BLKS_PER_SEC(sbi)));
+ f2fs_bug_on(sbi,
+ (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+ !valid_blocks) ||
+ valid_blocks == CAP_BLKS_PER_SEC(sbi));
if (!IS_CURSEC(sbi, secno))
set_bit(secno, dirty_i->dirty_secmap);
@@ -882,7 +891,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
{
int ovp_hole_segs =
(overprovision_segments(sbi) - reserved_segments(sbi));
- block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
+ block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
block_t holes[2] = {0, 0}; /* DATA and NODE */
block_t unusable;
@@ -911,11 +920,16 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
{
int ovp_hole_segs =
(overprovision_segments(sbi) - reserved_segments(sbi));
+
+ if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
+ return 0;
if (unusable > F2FS_OPTION(sbi).unusable_cap)
return -EAGAIN;
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
dirty_segments(sbi) > ovp_hole_segs)
return -EAGAIN;
+ if (has_not_enough_free_secs(sbi, 0, 0))
+ return -EAGAIN;
return 0;
}
@@ -1180,7 +1194,10 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->min_interval = dcc->min_discard_issue_time;
dpolicy->mid_interval = dcc->mid_discard_issue_time;
dpolicy->max_interval = dcc->max_discard_issue_time;
- dpolicy->io_aware = true;
+ if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE)
+ dpolicy->io_aware = true;
+ else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE)
+ dpolicy->io_aware = false;
dpolicy->sync = false;
dpolicy->ordered = true;
if (utilization(sbi) > dcc->discard_urgent_util) {
@@ -1397,7 +1414,8 @@ static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
p = &(*p)->rb_right;
leftmost = false;
} else {
- f2fs_bug_on(sbi, 1);
+ /* Let's skip to add, if exists */
+ return;
}
}
@@ -1900,9 +1918,8 @@ static int issue_discard_thread(void *data)
set_freezable();
do {
- wait_event_interruptible_timeout(*q,
- kthread_should_stop() || freezing(current) ||
- dcc->discard_wake,
+ wait_event_freezable_timeout(*q,
+ kthread_should_stop() || dcc->discard_wake,
msecs_to_jiffies(wait_ms));
if (sbi->gc_mode == GC_URGENT_HIGH ||
@@ -1920,8 +1937,6 @@ static int issue_discard_thread(void *data)
if (atomic_read(&dcc->queued_discard))
__wait_all_discard_cmd(sbi, NULL);
- if (try_to_freeze())
- continue;
if (f2fs_readonly(sbi->sb))
continue;
if (kthread_should_stop())
@@ -1987,9 +2002,15 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
}
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
+ unsigned int nofs_flags;
+ int ret;
+
trace_f2fs_issue_reset_zone(bdev, blkstart);
- return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
- sector, nr_sects, GFP_NOFS);
+ nofs_flags = memalloc_nofs_save();
+ ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
+ sector, nr_sects);
+ memalloc_nofs_restore(nofs_flags);
+ return ret;
}
__queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
@@ -2197,7 +2218,7 @@ void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
if (!f2fs_sb_has_blkzoned(sbi) &&
(!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
- (end - start) << sbi->log_blocks_per_seg);
+ SEGS_TO_BLKS(sbi, end - start));
continue;
}
next:
@@ -2262,6 +2283,12 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
int err = 0;
+ if (f2fs_sb_has_readonly(sbi)) {
+ f2fs_info(sbi,
+ "Skip to start discard thread for readonly image");
+ return 0;
+ }
+
if (!f2fs_realtime_discard_enable(sbi))
return 0;
@@ -2292,6 +2319,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
dcc->discard_io_aware_gran = MAX_PLIST_NUM;
dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
+ dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
dcc->discard_granularity = BLKS_PER_SEG(sbi);
else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
@@ -2307,7 +2335,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
atomic_set(&dcc->queued_discard, 0);
atomic_set(&dcc->discard_cmd_cnt, 0);
dcc->nr_discards = 0;
- dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
+ dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
@@ -2641,7 +2669,7 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
}
static int is_next_segment_free(struct f2fs_sb_info *sbi,
- struct curseg_info *curseg, int type)
+ struct curseg_info *curseg)
{
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2655,7 +2683,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi,
* Find a new segment from the free segments bitmap to right order
* This function should be returned with success, otherwise BUG
*/
-static void get_new_segment(struct f2fs_sb_info *sbi,
+static int get_new_segment(struct f2fs_sb_info *sbi,
unsigned int *newseg, bool new_sec, bool pinning)
{
struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2669,6 +2697,11 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
spin_lock(&free_i->segmap_lock);
+ if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
segno = find_next_zero_bit(free_i->free_segmap,
GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
@@ -2676,22 +2709,47 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
goto got_it;
}
+#ifdef CONFIG_BLK_DEV_ZONED
/*
* If we format f2fs on zoned storage, let's try to get pinned sections
* from beginning of the storage, which should be a conventional one.
*/
if (f2fs_sb_has_blkzoned(sbi)) {
- segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg);
+ /* Prioritize writing to conventional zones */
+ if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning)
+ segno = 0;
+ else
+ segno = max(sbi->first_seq_zone_segno, *newseg);
hint = GET_SEC_FROM_SEG(sbi, segno);
}
+#endif
find_other_zone:
secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) {
+ /* Write only to sequential zones */
+ if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) {
+ hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno);
+ secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
+ } else
+ secno = find_first_zero_bit(free_i->free_secmap,
+ MAIN_SECS(sbi));
+ if (secno >= MAIN_SECS(sbi)) {
+ ret = -ENOSPC;
+ f2fs_bug_on(sbi, 1);
+ goto out_unlock;
+ }
+ }
+#endif
+
if (secno >= MAIN_SECS(sbi)) {
secno = find_first_zero_bit(free_i->free_secmap,
MAIN_SECS(sbi));
if (secno >= MAIN_SECS(sbi)) {
ret = -ENOSPC;
+ f2fs_bug_on(sbi, 1);
goto out_unlock;
}
}
@@ -2720,16 +2778,26 @@ find_other_zone:
}
got_it:
/* set it as dirty segment in free segmap */
- f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
+ if (test_bit(segno, free_i->free_segmap)) {
+ ret = -EFSCORRUPTED;
+ f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_FREE_BITMAP);
+ goto out_unlock;
+ }
+
+ /* no free section in conventional device or conventional zone */
+ if (new_sec && pinning &&
+ f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
__set_inuse(sbi, segno);
*newseg = segno;
out_unlock:
spin_unlock(&free_i->segmap_lock);
- if (ret) {
+ if (ret == -ENOSPC)
f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
- f2fs_bug_on(sbi, 1);
- }
+ return ret;
}
static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
@@ -2738,6 +2806,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
struct summary_footer *sum_footer;
unsigned short seg_type = curseg->seg_type;
+ /* only happen when get_new_segment() fails */
+ if (curseg->next_segno == NULL_SEGNO)
+ return;
+
curseg->inited = true;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
@@ -2762,11 +2834,19 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
unsigned short seg_type = curseg->seg_type;
sanity_check_seg_type(sbi, seg_type);
- if (f2fs_need_rand_seg(sbi))
- return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
+ if (__is_large_section(sbi)) {
+ if (f2fs_need_rand_seg(sbi)) {
+ unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno);
- if (__is_large_section(sbi))
+ if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint)
+ return curseg->segno;
+ return get_random_u32_inclusive(curseg->segno + 1,
+ GET_SEG_FROM_SEC(sbi, hint + 1) - 1);
+ }
return curseg->segno;
+ } else if (f2fs_need_rand_seg(sbi)) {
+ return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
+ }
/* inmem log may not locate on any segment after mount */
if (!curseg->inited)
@@ -2797,16 +2877,17 @@ static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno;
bool pinning = type == CURSEG_COLD_DATA_PINNED;
+ int ret;
if (curseg->inited)
write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
segno = __get_next_segno(sbi, type);
- get_new_segment(sbi, &segno, new_sec, pinning);
- if (new_sec && pinning &&
- !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
- __set_free(sbi, segno);
- return -EAGAIN;
+ ret = get_new_segment(sbi, &segno, new_sec, pinning);
+ if (ret) {
+ if (ret == -ENOSPC)
+ curseg->segno = NULL_SEGNO;
+ return ret;
}
curseg->next_segno = segno;
@@ -2849,7 +2930,7 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static int change_curseg(struct f2fs_sb_info *sbi, int type)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2875,21 +2956,23 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
if (IS_ERR(sum_page)) {
/* GC won't be able to use stale summary pages by cp_error */
memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
- return;
+ return PTR_ERR(sum_page);
}
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
+ return 0;
}
static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
int alloc_mode, unsigned long long age);
-static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
int target_type, int alloc_mode,
unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ int ret = 0;
curseg->seg_type = target_type;
@@ -2897,38 +2980,62 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
curseg->seg_type = se->type;
- change_curseg(sbi, type);
+ ret = change_curseg(sbi, type);
} else {
/* allocate cold segment by default */
curseg->seg_type = CURSEG_COLD_DATA;
- new_curseg(sbi, type, true);
+ ret = new_curseg(sbi, type, true);
}
stat_inc_seg_type(sbi, curseg);
+ return ret;
}
-static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+ int ret = 0;
- if (!sbi->am.atgc_enabled)
- return;
+ if (!sbi->am.atgc_enabled && !force)
+ return 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&SIT_I(sbi)->sentry_lock);
- get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+ ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
+ CURSEG_COLD_DATA, SSR, 0);
up_write(&SIT_I(sbi)->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ return ret;
+}
+int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ return __f2fs_init_atgc_curseg(sbi, false);
}
-void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+
+int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi)
{
- __f2fs_init_atgc_curseg(sbi);
+ int ret;
+
+ if (!test_opt(sbi, ATGC))
+ return 0;
+ if (sbi->am.atgc_enabled)
+ return 0;
+ if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) <
+ sbi->am.age_threshold)
+ return 0;
+
+ ret = __f2fs_init_atgc_curseg(sbi, true);
+ if (!ret) {
+ sbi->am.atgc_enabled = true;
+ f2fs_info(sbi, "reenabled age threshold GC");
+ }
+ return ret;
}
static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
@@ -2996,7 +3103,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
sanity_check_seg_type(sbi, seg_type);
/* f2fs_need_SSR() already forces to do this */
- if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
+ if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type,
+ alloc_mode, age, false)) {
curseg->next_segno = segno;
return 1;
}
@@ -3023,7 +3131,8 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
for (; cnt-- > 0; reversed ? i-- : i++) {
if (i == seg_type)
continue;
- if (!f2fs_get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
+ if (!f2fs_get_victim(sbi, &segno, BG_GC, i,
+ alloc_mode, age, false)) {
curseg->next_segno = segno;
return 1;
}
@@ -3047,8 +3156,7 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
curseg->seg_type == CURSEG_WARM_NODE)
return true;
- if (curseg->alloc_type == LFS &&
- is_next_segment_free(sbi, curseg, type) &&
+ if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) &&
likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return true;
if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
@@ -3056,11 +3164,12 @@ static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
return false;
}
-void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
+int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno;
+ int ret = 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
@@ -3071,9 +3180,9 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
goto unlock;
if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
- change_curseg(sbi, type);
+ ret = change_curseg(sbi, type);
else
- new_curseg(sbi, type, true);
+ ret = new_curseg(sbi, type, true);
stat_inc_seg_type(sbi, curseg);
@@ -3087,6 +3196,7 @@ unlock:
mutex_unlock(&curseg->curseg_mutex);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ return ret;
}
static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
@@ -3094,6 +3204,10 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
+ int err = 0;
+
+ if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
+ goto allocate;
if (!force && curseg->inited &&
!curseg->next_blkoff &&
@@ -3101,9 +3215,11 @@ static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
return 0;
+allocate:
old_segno = curseg->segno;
- if (new_curseg(sbi, type, true))
- return -EAGAIN;
+ err = new_curseg(sbi, type, true);
+ if (err)
+ return err;
stat_inc_seg_type(sbi, curseg);
locate_dirty_segment(sbi, old_segno);
return 0;
@@ -3132,28 +3248,33 @@ retry:
err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
f2fs_unlock_op(sbi);
- if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) {
+ if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
f2fs_down_write(&sbi->gc_lock);
- f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
+ err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1,
+ true, ZONED_PIN_SEC_REQUIRED_COUNT);
f2fs_up_write(&sbi->gc_lock);
gc_required = false;
- goto retry;
+ if (!err)
+ goto retry;
}
return err;
}
-void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
+int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
int i;
+ int err = 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
- __allocate_new_segment(sbi, i, false, false);
+ err += __allocate_new_segment(sbi, i, false, false);
up_write(&SIT_I(sbi)->sentry_lock);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+
+ return err;
}
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
@@ -3314,8 +3435,14 @@ out:
return err;
}
-int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
+int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint)
{
+ if (F2FS_OPTION(sbi).active_logs == 2)
+ return CURSEG_HOT_DATA;
+ else if (F2FS_OPTION(sbi).active_logs == 4)
+ return CURSEG_COLD_DATA;
+
+ /* active_log == 6 */
switch (hint) {
case WRITE_LIFE_SHORT:
return CURSEG_HOT_DATA;
@@ -3326,6 +3453,65 @@ int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
}
}
+/*
+ * This returns write hints for each segment type. This hints will be
+ * passed down to block layer as below by default.
+ *
+ * User F2FS Block
+ * ---- ---- -----
+ * META WRITE_LIFE_NONE|REQ_META
+ * HOT_NODE WRITE_LIFE_NONE
+ * WARM_NODE WRITE_LIFE_MEDIUM
+ * COLD_NODE WRITE_LIFE_LONG
+ * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
+ * extension list " "
+ *
+ * -- buffered io
+ * COLD_DATA WRITE_LIFE_EXTREME
+ * HOT_DATA WRITE_LIFE_SHORT
+ * WARM_DATA WRITE_LIFE_NOT_SET
+ *
+ * -- direct io
+ * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
+ * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
+ * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
+ * WRITE_LIFE_NONE " WRITE_LIFE_NONE
+ * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
+ * WRITE_LIFE_LONG " WRITE_LIFE_LONG
+ */
+enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
+ enum page_type type, enum temp_type temp)
+{
+ switch (type) {
+ case DATA:
+ switch (temp) {
+ case WARM:
+ return WRITE_LIFE_NOT_SET;
+ case HOT:
+ return WRITE_LIFE_SHORT;
+ case COLD:
+ return WRITE_LIFE_EXTREME;
+ default:
+ return WRITE_LIFE_NONE;
+ }
+ case NODE:
+ switch (temp) {
+ case WARM:
+ return WRITE_LIFE_MEDIUM;
+ case HOT:
+ return WRITE_LIFE_NONE;
+ case COLD:
+ return WRITE_LIFE_LONG;
+ default:
+ return WRITE_LIFE_NONE;
+ }
+ case META:
+ return WRITE_LIFE_NONE;
+ default:
+ return WRITE_LIFE_NONE;
+ }
+}
+
static int __get_segment_type_2(struct f2fs_io_info *fio)
{
if (fio->type == DATA)
@@ -3390,7 +3576,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (file_is_cold(inode) || f2fs_need_compress_data(inode))
return CURSEG_COLD_DATA;
- type = __get_age_segment_type(inode, fio->page->index);
+ type = __get_age_segment_type(inode,
+ page_folio(fio->page)->index);
if (type != NO_CHECK_TYPE)
return type;
@@ -3398,7 +3585,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
is_inode_flag_set(inode, FI_HOT_DATA) ||
f2fs_is_cow_file(inode))
return CURSEG_HOT_DATA;
- return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
+ return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
+ inode->i_write_hint);
} else {
if (IS_DNODE(fio->page))
return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
@@ -3407,6 +3595,15 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
}
}
+int f2fs_get_segment_temp(int seg_type)
+{
+ if (IS_HOT(seg_type))
+ return HOT;
+ else if (IS_WARM(seg_type))
+ return WARM;
+ return COLD;
+}
+
static int __get_segment_type(struct f2fs_io_info *fio)
{
int type = 0;
@@ -3425,12 +3622,8 @@ static int __get_segment_type(struct f2fs_io_info *fio)
f2fs_bug_on(fio->sbi, true);
}
- if (IS_HOT(type))
- fio->temp = HOT;
- else if (IS_WARM(type))
- fio->temp = WARM;
- else
- fio->temp = COLD;
+ fio->temp = f2fs_get_segment_temp(type);
+
return type;
}
@@ -3447,7 +3640,14 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
get_random_u32_inclusive(1, sbi->max_fragment_hole);
}
-void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
+static void reset_curseg_fields(struct curseg_info *curseg)
+{
+ curseg->inited = false;
+ curseg->segno = NULL_SEGNO;
+ curseg->next_segno = 0;
+}
+
+int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
block_t old_blkaddr, block_t *new_blkaddr,
struct f2fs_summary *sum, int type,
struct f2fs_io_info *fio)
@@ -3458,12 +3658,18 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
struct seg_entry *se = NULL;
bool segment_full = false;
+ int ret = 0;
f2fs_down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&sit_i->sentry_lock);
+ if (curseg->segno == NULL_SEGNO) {
+ ret = -ENOSPC;
+ goto out_err;
+ }
+
if (from_gc) {
f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
@@ -3512,19 +3718,23 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
!((curseg->segno + 1) % sbi->segs_per_sec)) {
write_sum_page(sbi, curseg->sum_blk,
GET_SUM_BLOCK(sbi, curseg->segno));
+ reset_curseg_fields(curseg);
goto skip_new_segment;
}
if (from_gc) {
- get_atssr_segment(sbi, type, se->type,
+ ret = get_atssr_segment(sbi, type, se->type,
AT_SSR, se->mtime);
} else {
if (need_new_seg(sbi, type))
- new_curseg(sbi, type, false);
+ ret = new_curseg(sbi, type, false);
else
- change_curseg(sbi, type);
+ ret = change_curseg(sbi, type);
stat_inc_seg_type(sbi, curseg);
}
+
+ if (ret)
+ goto out_err;
}
skip_new_segment:
@@ -3559,8 +3769,15 @@ skip_new_segment:
}
mutex_unlock(&curseg->curseg_mutex);
+ f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ return 0;
+out_err:
+ *new_blkaddr = NULL_ADDR;
+ up_write(&sit_i->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
f2fs_up_read(&SM_I(sbi)->curseg_lock);
+ return ret;
}
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
@@ -3598,8 +3815,15 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
if (keep_order)
f2fs_down_read(&fio->sbi->io_order_lock);
- f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
- &fio->new_blkaddr, sum, type, fio);
+ if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+ &fio->new_blkaddr, sum, type, fio)) {
+ if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
+ fscrypt_finalize_bounce_page(&fio->encrypted_page);
+ end_page_writeback(fio->page);
+ if (f2fs_in_warm_node_list(fio->sbi, fio->page))
+ f2fs_del_fsync_node_entry(fio->sbi, fio->page);
+ goto out;
+ }
if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
@@ -3607,12 +3831,12 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
f2fs_submit_page_write(fio);
f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
-
+out:
if (keep_order)
f2fs_up_read(&fio->sbi->io_order_lock);
}
-void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
+void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
enum iostat_type io_type)
{
struct f2fs_io_info fio = {
@@ -3621,20 +3845,20 @@ void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
.temp = HOT,
.op = REQ_OP_WRITE,
.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
- .old_blkaddr = page->index,
- .new_blkaddr = page->index,
- .page = page,
+ .old_blkaddr = folio->index,
+ .new_blkaddr = folio->index,
+ .page = folio_page(folio, 0),
.encrypted_page = NULL,
.in_list = 0,
};
- if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
+ if (unlikely(folio->index >= MAIN_BLKADDR(sbi)))
fio.op_flags &= ~REQ_META;
- set_page_writeback(page);
+ folio_start_writeback(folio);
f2fs_submit_page_write(&fio);
- stat_inc_meta_count(sbi, page->index);
+ stat_inc_meta_count(sbi, folio->index);
f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
}
@@ -3780,7 +4004,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
/* change the current segment */
if (segno != curseg->segno) {
curseg->next_segno = segno;
- change_curseg(sbi, type);
+ if (change_curseg(sbi, type))
+ goto out_unlock;
}
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
@@ -3806,12 +4031,14 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (recover_curseg) {
if (old_cursegno != curseg->segno) {
curseg->next_segno = old_cursegno;
- change_curseg(sbi, type);
+ if (change_curseg(sbi, type))
+ goto out_unlock;
}
curseg->next_blkoff = old_blkoff;
curseg->alloc_type = old_alloc_type;
}
+out_unlock:
up_write(&sit_i->sentry_lock);
mutex_unlock(&curseg->curseg_mutex);
f2fs_up_write(&SM_I(sbi)->curseg_lock);
@@ -3835,7 +4062,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
void f2fs_wait_on_page_writeback(struct page *page,
enum page_type type, bool ordered, bool locked)
{
- if (PageWriteback(page)) {
+ if (folio_test_writeback(page_folio(page))) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
/* submit cached LFS IO */
@@ -3844,7 +4071,8 @@ void f2fs_wait_on_page_writeback(struct page *page,
f2fs_submit_merged_ipu_write(sbi, NULL, page);
if (ordered) {
wait_on_page_writeback(page);
- f2fs_bug_on(sbi, locked && PageWriteback(page));
+ f2fs_bug_on(sbi, locked &&
+ folio_test_writeback(page_folio(page)));
} else {
wait_for_stable_page(page);
}
@@ -4497,7 +4725,7 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
#endif
sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
- sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
+ sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
sit_i->written_valid_blocks = 0;
sit_i->bitmap_size = sit_bitmap_size;
sit_i->dirty_sentries = 0;
@@ -4570,9 +4798,7 @@ static int build_curseg(struct f2fs_sb_info *sbi)
array[i].seg_type = CURSEG_COLD_DATA;
else if (i == CURSEG_ALL_DATA_ATGC)
array[i].seg_type = CURSEG_COLD_DATA;
- array[i].segno = NULL_SEGNO;
- array[i].next_blkoff = 0;
- array[i].inited = false;
+ reset_curseg_fields(&array[i]);
}
return restore_curseg_summaries(sbi);
}
@@ -4892,96 +5118,71 @@ out:
}
#ifdef CONFIG_BLK_DEV_ZONED
-
static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
struct f2fs_dev_info *fdev,
struct blk_zone *zone)
{
- unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
- block_t zone_block, wp_block, last_valid_block;
+ unsigned int zone_segno;
+ block_t zone_block, valid_block_cnt;
unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
- int i, s, b, ret;
- struct seg_entry *se;
+ int ret;
+ unsigned int nofs_flags;
if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
- wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
- wp_segno = GET_SEGNO(sbi, wp_block);
- wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
zone_segno = GET_SEGNO(sbi, zone_block);
- zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
-
- if (zone_segno >= MAIN_SEGS(sbi))
- return 0;
/*
* Skip check of zones cursegs point to, since
* fix_curseg_write_pointer() checks them.
*/
- for (i = 0; i < NO_CHECK_TYPE; i++)
- if (zone_secno == GET_SEC_FROM_SEG(sbi,
- CURSEG_I(sbi, i)->segno))
- return 0;
+ if (zone_segno >= MAIN_SEGS(sbi))
+ return 0;
/*
- * Get last valid block of the zone.
+ * Get # of valid block of the zone.
*/
- last_valid_block = zone_block - 1;
- for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
- segno = zone_segno + s;
- se = get_seg_entry(sbi, segno);
- for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
- if (f2fs_test_bit(b, se->cur_valid_map)) {
- last_valid_block = START_BLOCK(sbi, segno) + b;
- break;
- }
- if (last_valid_block >= zone_block)
- break;
+ valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
+ if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
+ f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
+ zone_segno, valid_block_cnt,
+ blk_zone_cond_str(zone->cond));
+ return 0;
}
- /*
- * The write pointer matches with the valid blocks or
- * already points to the end of the zone.
- */
- if ((last_valid_block + 1 == wp_block) ||
- (zone->wp == zone->start + zone->len))
+ if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
+ (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
return 0;
- if (last_valid_block + 1 == zone_block) {
- /*
- * If there is no valid block in the zone and if write pointer
- * is not at zone start, reset the write pointer.
- */
- f2fs_notice(sbi,
- "Zone without valid block has non-zero write "
- "pointer. Reset the write pointer: wp[0x%x,0x%x]",
- wp_segno, wp_blkoff);
+ if (!valid_block_cnt) {
+ f2fs_notice(sbi, "Zone without valid block has non-zero write "
+ "pointer. Reset the write pointer: cond[%s]",
+ blk_zone_cond_str(zone->cond));
ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
zone->len >> log_sectors_per_block);
if (ret)
f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
fdev->path, ret);
-
return ret;
}
/*
- * If there are valid blocks and the write pointer doesn't
- * match with them, we need to report the inconsistency and
- * fill the zone till the end to close the zone. This inconsistency
- * does not cause write error because the zone will not be selected
- * for write operation until it get discarded.
+ * If there are valid blocks and the write pointer doesn't match
+ * with them, we need to report the inconsistency and fill
+ * the zone till the end to close the zone. This inconsistency
+ * does not cause write error because the zone will not be
+ * selected for write operation until it get discarded.
*/
- f2fs_notice(sbi, "Valid blocks are not aligned with write pointer: "
- "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
- GET_SEGNO(sbi, last_valid_block),
- GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
- wp_segno, wp_blkoff);
+ f2fs_notice(sbi, "Valid blocks are not aligned with write "
+ "pointer: valid block[0x%x,0x%x] cond[%s]",
+ zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond));
+ nofs_flags = memalloc_nofs_save();
ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
- zone->start, zone->len, GFP_NOFS);
+ zone->start, zone->len);
+ memalloc_nofs_restore(nofs_flags);
if (ret == -EOPNOTSUPP) {
ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
zone->len - (zone->wp - zone->start),
@@ -5052,23 +5253,36 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
return 0;
- wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
- wp_segno = GET_SEGNO(sbi, wp_block);
- wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
- wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
-
- if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
- wp_sector_off == 0)
- return 0;
+ /*
+ * When safely unmounted in the previous mount, we could use current
+ * segments. Otherwise, allocate new sections.
+ */
+ if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
+ wp_segno = GET_SEGNO(sbi, wp_block);
+ wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
+ wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
+
+ if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
+ wp_sector_off == 0)
+ return 0;
- f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
- "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
- type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
+ f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
+ "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
+ cs->next_blkoff, wp_segno, wp_blkoff);
+ }
- f2fs_notice(sbi, "Assign new section to curseg[%d]: "
- "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
+ /* Allocate a new section if it's not new. */
+ if (cs->next_blkoff ||
+ cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) {
+ unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
- f2fs_allocate_new_section(sbi, type, true);
+ f2fs_allocate_new_section(sbi, type, true);
+ f2fs_notice(sbi, "Assign new section to curseg[%d]: "
+ "[0x%x,0x%x] -> [0x%x,0x%x]",
+ type, old_segno, old_blkoff,
+ cs->segno, cs->next_blkoff);
+ }
/* check consistency of the zone curseg pointed to */
if (check_zone_write_pointer(sbi, zbd, &zone))
@@ -5222,8 +5436,7 @@ unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
return BLKS_PER_SEG(sbi);
}
-unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
- unsigned int segno)
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi)
{
if (f2fs_sb_has_blkzoned(sbi))
return CAP_SEGS_PER_SEC(sbi);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index cd2ec6acc717..f8f94301350c 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -77,21 +77,21 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
#define TOTAL_SEGS(sbi) \
(SM_I(sbi) ? SM_I(sbi)->segment_count : \
le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count))
-#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg)
+#define TOTAL_BLKS(sbi) (SEGS_TO_BLKS(sbi, TOTAL_SEGS(sbi)))
#define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi))
#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \
(sbi)->log_blocks_per_seg))
#define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \
- (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg))
+ (SEGS_TO_BLKS(sbi, GET_R2L_SEGNO(FREE_I(sbi), segno))))
#define NEXT_FREE_BLKADDR(sbi, curseg) \
(START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff)
#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi))
#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \
- (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg)
+ (BLKS_TO_SEGS(sbi, GET_SEGOFF_FROM_SEG0(sbi, blk_addr)))
#define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \
(GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (BLKS_PER_SEG(sbi) - 1))
@@ -100,11 +100,10 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \
GET_SEGNO_FROM_SEG0(sbi, blk_addr)))
#define CAP_BLKS_PER_SEC(sbi) \
- (SEGS_PER_SEC(sbi) * BLKS_PER_SEG(sbi) - \
- (sbi)->unusable_blocks_per_sec)
+ (BLKS_PER_SEC(sbi) - (sbi)->unusable_blocks_per_sec)
#define CAP_SEGS_PER_SEC(sbi) \
- (SEGS_PER_SEC(sbi) - ((sbi)->unusable_blocks_per_sec >> \
- (sbi)->log_blocks_per_seg))
+ (SEGS_PER_SEC(sbi) - \
+ BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec))
#define GET_SEC_FROM_SEG(sbi, segno) \
(((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi))
#define GET_SEG_FROM_SEC(sbi, secno) \
@@ -189,6 +188,7 @@ struct victim_sel_policy {
unsigned int min_segno; /* segment # having min. cost */
unsigned long long age; /* mtime of GCed section*/
unsigned long long age_threshold;/* age threshold */
+ bool one_time_gc; /* one time GC */
};
struct seg_entry {
@@ -431,7 +431,6 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
- unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
@@ -439,7 +438,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
next = find_next_bit(free_i->free_segmap,
start_segno + SEGS_PER_SEC(sbi), start_segno);
- if (next >= start_segno + usable_segs) {
+ if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
@@ -465,22 +464,36 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
- unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
+ bool ret;
spin_lock(&free_i->segmap_lock);
- if (test_and_clear_bit(segno, free_i->free_segmap)) {
- free_i->free_segments++;
-
- if (!inmem && IS_CURSEC(sbi, secno))
- goto skip_free;
- next = find_next_bit(free_i->free_segmap,
- start_segno + SEGS_PER_SEC(sbi), start_segno);
- if (next >= start_segno + usable_segs) {
- if (test_and_clear_bit(secno, free_i->free_secmap))
- free_i->free_sections++;
- }
- }
-skip_free:
+ ret = test_and_clear_bit(segno, free_i->free_segmap);
+ if (!ret)
+ goto unlock_out;
+
+ free_i->free_segments++;
+
+ if (!inmem && IS_CURSEC(sbi, secno))
+ goto unlock_out;
+
+ /* check large section */
+ next = find_next_bit(free_i->free_segmap,
+ start_segno + SEGS_PER_SEC(sbi), start_segno);
+ if (next < start_segno + f2fs_usable_segs_in_sec(sbi))
+ goto unlock_out;
+
+ ret = test_and_clear_bit(secno, free_i->free_secmap);
+ if (!ret)
+ goto unlock_out;
+
+ free_i->free_sections++;
+
+ if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno)
+ sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
+ if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno)
+ sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
+
+unlock_out:
spin_unlock(&free_i->segmap_lock);
}
@@ -562,13 +575,16 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
unsigned int node_blocks, unsigned int data_blocks,
unsigned int dent_blocks)
{
-
unsigned int segno, left_blocks, blocks;
int i;
/* check current data/node sections in the worst case. */
for (i = CURSEG_HOT_DATA; i < NR_PERSISTENT_LOG; i++) {
segno = CURSEG_I(sbi, i)->segno;
+
+ if (unlikely(segno == NULL_SEGNO))
+ return false;
+
left_blocks = CAP_BLKS_PER_SEC(sbi) -
get_ckpt_valid_blocks(sbi, segno, true);
@@ -579,6 +595,10 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi,
/* check current data section for dentry blocks. */
segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
+
+ if (unlikely(segno == NULL_SEGNO))
+ return false;
+
left_blocks = CAP_BLKS_PER_SEC(sbi) -
get_ckpt_valid_blocks(sbi, segno, true);
if (dent_blocks > left_blocks)
@@ -606,8 +626,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi,
unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
unsigned int data_blocks = 0;
- if (f2fs_lfs_mode(sbi) &&
- unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (f2fs_lfs_mode(sbi)) {
total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA);
data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi);
data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi);
@@ -616,7 +635,7 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi,
if (lower_p)
*lower_p = node_secs + dent_secs + data_secs;
if (upper_p)
- *upper_p = node_secs + dent_secs +
+ *upper_p = node_secs + dent_secs + data_secs +
(node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) +
(data_blocks ? 1 : 0);
if (curseg_p)
@@ -920,7 +939,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
if (type == DATA)
return BLKS_PER_SEG(sbi);
else if (type == NODE)
- return 8 * BLKS_PER_SEG(sbi);
+ return SEGS_TO_BLKS(sbi, 8);
else if (type == META)
return 8 * BIO_MAX_VECS;
else
@@ -972,13 +991,3 @@ wake_up:
dcc->discard_wake = true;
wake_up_interruptible_all(&dcc->discard_wait_queue);
}
-
-static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi)
-{
- int devi;
-
- for (devi = 0; devi < sbi->s_ndevs; devi++)
- if (bdev_is_zoned(FDEV(devi).bdev))
- return GET_SEGNO(sbi, FDEV(devi).start_blk);
- return 0;
-}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index b72fa103b963..875aef2fc520 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -11,7 +11,6 @@
#include <linux/fs_context.h>
#include <linux/sched/mm.h>
#include <linux/statfs.h>
-#include <linux/buffer_head.h>
#include <linux/kthread.h>
#include <linux/parser.h>
#include <linux/mount.h>
@@ -44,24 +43,26 @@ static struct kmem_cache *f2fs_inode_cachep;
#ifdef CONFIG_F2FS_FAULT_INJECTION
const char *f2fs_fault_name[FAULT_MAX] = {
- [FAULT_KMALLOC] = "kmalloc",
- [FAULT_KVMALLOC] = "kvmalloc",
- [FAULT_PAGE_ALLOC] = "page alloc",
- [FAULT_PAGE_GET] = "page get",
- [FAULT_ALLOC_NID] = "alloc nid",
- [FAULT_ORPHAN] = "orphan",
- [FAULT_BLOCK] = "no more block",
- [FAULT_DIR_DEPTH] = "too big dir depth",
- [FAULT_EVICT_INODE] = "evict_inode fail",
- [FAULT_TRUNCATE] = "truncate fail",
- [FAULT_READ_IO] = "read IO error",
- [FAULT_CHECKPOINT] = "checkpoint error",
- [FAULT_DISCARD] = "discard error",
- [FAULT_WRITE_IO] = "write IO error",
- [FAULT_SLAB_ALLOC] = "slab alloc",
- [FAULT_DQUOT_INIT] = "dquot initialize",
- [FAULT_LOCK_OP] = "lock_op",
- [FAULT_BLKADDR] = "invalid blkaddr",
+ [FAULT_KMALLOC] = "kmalloc",
+ [FAULT_KVMALLOC] = "kvmalloc",
+ [FAULT_PAGE_ALLOC] = "page alloc",
+ [FAULT_PAGE_GET] = "page get",
+ [FAULT_ALLOC_NID] = "alloc nid",
+ [FAULT_ORPHAN] = "orphan",
+ [FAULT_BLOCK] = "no more block",
+ [FAULT_DIR_DEPTH] = "too big dir depth",
+ [FAULT_EVICT_INODE] = "evict_inode fail",
+ [FAULT_TRUNCATE] = "truncate fail",
+ [FAULT_READ_IO] = "read IO error",
+ [FAULT_CHECKPOINT] = "checkpoint error",
+ [FAULT_DISCARD] = "discard error",
+ [FAULT_WRITE_IO] = "write IO error",
+ [FAULT_SLAB_ALLOC] = "slab alloc",
+ [FAULT_DQUOT_INIT] = "dquot initialize",
+ [FAULT_LOCK_OP] = "lock_op",
+ [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr",
+ [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr",
+ [FAULT_NO_SEGMENT] = "no free segment",
};
int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate,
@@ -93,11 +94,26 @@ int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate,
#endif
/* f2fs-wide shrinker description */
-static struct shrinker f2fs_shrinker_info = {
- .scan_objects = f2fs_shrink_scan,
- .count_objects = f2fs_shrink_count,
- .seeks = DEFAULT_SEEKS,
-};
+static struct shrinker *f2fs_shrinker_info;
+
+static int __init f2fs_init_shrinker(void)
+{
+ f2fs_shrinker_info = shrinker_alloc(0, "f2fs-shrinker");
+ if (!f2fs_shrinker_info)
+ return -ENOMEM;
+
+ f2fs_shrinker_info->count_objects = f2fs_shrink_count;
+ f2fs_shrinker_info->scan_objects = f2fs_shrink_scan;
+
+ shrinker_register(f2fs_shrinker_info);
+
+ return 0;
+}
+
+static void f2fs_exit_shrinker(void)
+{
+ shrinker_free(f2fs_shrinker_info);
+}
enum {
Opt_gc_background,
@@ -304,7 +320,7 @@ struct kmem_cache *f2fs_cf_name_slab;
static int __init f2fs_create_casefold_cache(void)
{
f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name",
- F2FS_NAME_LEN);
+ F2FS_NAME_LEN);
return f2fs_cf_name_slab ? 0 : -ENOMEM;
}
@@ -694,6 +710,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
if (!strcmp(name, "on")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
} else if (!strcmp(name, "off")) {
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_warn(sbi, "zoned devices need bggc");
+ kfree(name);
+ return -EINVAL;
+ }
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_OFF;
} else if (!strcmp(name, "sync")) {
F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_SYNC;
@@ -1309,13 +1330,13 @@ default_check:
return -EINVAL;
}
#endif
-#if !IS_ENABLED(CONFIG_UNICODE)
- if (f2fs_sb_has_casefold(sbi)) {
+
+ if (!IS_ENABLED(CONFIG_UNICODE) && f2fs_sb_has_casefold(sbi)) {
f2fs_err(sbi,
"Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE");
return -EINVAL;
}
-#endif
+
/*
* The BLKZONED feature indicates that the drive was formatted with
* zone alignment optimization. This is optional for host-aware
@@ -1371,11 +1392,6 @@ default_check:
}
}
- if (test_opt(sbi, DISABLE_CHECKPOINT) && f2fs_lfs_mode(sbi)) {
- f2fs_err(sbi, "LFS is not compatible with checkpoint=disable");
- return -EINVAL;
- }
-
if (test_opt(sbi, ATGC) && f2fs_lfs_mode(sbi)) {
f2fs_err(sbi, "LFS is not compatible with ATGC");
return -EINVAL;
@@ -1499,6 +1515,12 @@ int f2fs_inode_dirtied(struct inode *inode, bool sync)
inc_page_count(sbi, F2FS_DIRTY_IMETA);
}
spin_unlock(&sbi->inode_lock[DIRTY_META]);
+
+ /* if atomic write is not committed, set inode w/ atomic dirty */
+ if (!ret && f2fs_is_atomic_file(inode) &&
+ !is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
+ set_inode_flag(inode, FI_ATOMIC_DIRTIED);
+
return ret;
}
@@ -1559,7 +1581,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
for (i = 0; i < sbi->s_ndevs; i++) {
if (i > 0)
- blkdev_put(FDEV(i).bdev, sbi->sb);
+ bdev_fput(FDEV(i).bdev_file);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
@@ -1666,12 +1688,10 @@ static void f2fs_put_super(struct super_block *sb)
kvfree(sbi->ckpt);
- sb->s_fs_info = NULL;
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
- destroy_device_list(sbi);
f2fs_destroy_page_array_cache(sbi);
f2fs_destroy_xattr_caches(sbi);
#ifdef CONFIG_QUOTA
@@ -1686,7 +1706,6 @@ static void f2fs_put_super(struct super_block *sb)
#if IS_ENABLED(CONFIG_UNICODE)
utf8_unload(sb->s_encoding);
#endif
- kfree(sbi);
}
int f2fs_sync_fs(struct super_block *sb, int sync)
@@ -1768,26 +1787,32 @@ static int f2fs_statfs_project(struct super_block *sb,
limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
dquot->dq_dqb.dqb_bhardlimit);
- if (limit)
- limit >>= sb->s_blocksize_bits;
+ limit >>= sb->s_blocksize_bits;
+
+ if (limit) {
+ uint64_t remaining = 0;
- if (limit && buf->f_blocks > limit) {
curblock = (dquot->dq_dqb.dqb_curspace +
dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
- buf->f_blocks = limit;
- buf->f_bfree = buf->f_bavail =
- (buf->f_blocks > curblock) ?
- (buf->f_blocks - curblock) : 0;
+ if (limit > curblock)
+ remaining = limit - curblock;
+
+ buf->f_blocks = min(buf->f_blocks, limit);
+ buf->f_bfree = min(buf->f_bfree, remaining);
+ buf->f_bavail = min(buf->f_bavail, remaining);
}
limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
dquot->dq_dqb.dqb_ihardlimit);
- if (limit && buf->f_files > limit) {
- buf->f_files = limit;
- buf->f_ffree =
- (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
- (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
+ if (limit) {
+ uint64_t remaining = 0;
+
+ if (limit > dquot->dq_dqb.dqb_curinodes)
+ remaining = limit - dquot->dq_dqb.dqb_curinodes;
+
+ buf->f_files = min(buf->f_files, limit);
+ buf->f_ffree = min(buf->f_ffree, remaining);
}
spin_unlock(&dquot->dq_dqb_lock);
@@ -1845,9 +1870,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_fsid = u64_to_fsid(id);
#ifdef CONFIG_QUOTA
- if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) &&
+ if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) &&
sb_has_quota_limits_enabled(sb, PRJQUOTA)) {
- f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf);
+ f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf);
}
#endif
return 0;
@@ -2205,6 +2230,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
.init_gc_type = FG_GC,
.should_migrate_blocks = false,
.err_gc_skipped = true,
+ .no_bg_gc = true,
.nr_free_secs = 1 };
f2fs_down_write(&sbi->gc_lock);
@@ -2284,9 +2310,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
unsigned long old_sb_flags;
int err;
bool need_restart_gc = false, need_stop_gc = false;
- bool need_restart_ckpt = false, need_stop_ckpt = false;
bool need_restart_flush = false, need_stop_flush = false;
bool need_restart_discard = false, need_stop_discard = false;
+ bool need_enable_checkpoint = false, need_disable_checkpoint = false;
bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
@@ -2339,6 +2365,17 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
if (err)
goto restore_opts;
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi) &&
+ sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ err = -EINVAL;
+ goto restore_opts;
+ }
+#endif
+
/* flush outstanding errors before changing fs state */
flush_work(&sbi->s_error_work);
@@ -2443,24 +2480,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
clear_sbi_flag(sbi, SBI_IS_CLOSE);
}
- if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
- !test_opt(sbi, MERGE_CHECKPOINT)) {
- f2fs_stop_ckpt_thread(sbi);
- need_restart_ckpt = true;
- } else {
- /* Flush if the prevous checkpoint, if exists. */
- f2fs_flush_ckpt_thread(sbi);
-
- err = f2fs_start_ckpt_thread(sbi);
- if (err) {
- f2fs_err(sbi,
- "Failed to start F2FS issue_checkpoint_thread (%d)",
- err);
- goto restore_gc;
- }
- need_stop_ckpt = true;
- }
-
/*
* We stop issue flush thread if FS is mounted as RO
* or if flush_merge is not passed in mount option.
@@ -2472,7 +2491,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
} else {
err = f2fs_create_flush_cmd_control(sbi);
if (err)
- goto restore_ckpt;
+ goto restore_gc;
need_stop_flush = true;
}
@@ -2494,8 +2513,31 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
err = f2fs_disable_checkpoint(sbi);
if (err)
goto restore_discard;
+ need_enable_checkpoint = true;
} else {
f2fs_enable_checkpoint(sbi);
+ need_disable_checkpoint = true;
+ }
+ }
+
+ /*
+ * Place this routine at the end, since a new checkpoint would be
+ * triggered while remount and we need to take care of it before
+ * returning from remount.
+ */
+ if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
+ !test_opt(sbi, MERGE_CHECKPOINT)) {
+ f2fs_stop_ckpt_thread(sbi);
+ } else {
+ /* Flush if the prevous checkpoint, if exists. */
+ f2fs_flush_ckpt_thread(sbi);
+
+ err = f2fs_start_ckpt_thread(sbi);
+ if (err) {
+ f2fs_err(sbi,
+ "Failed to start F2FS issue_checkpoint_thread (%d)",
+ err);
+ goto restore_checkpoint;
}
}
@@ -2513,6 +2555,13 @@ skip:
adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
+restore_checkpoint:
+ if (need_enable_checkpoint) {
+ f2fs_enable_checkpoint(sbi);
+ } else if (need_disable_checkpoint) {
+ if (f2fs_disable_checkpoint(sbi))
+ f2fs_warn(sbi, "checkpoint has not been disabled");
+ }
restore_discard:
if (need_restart_discard) {
if (f2fs_start_discard_thread(sbi))
@@ -2528,13 +2577,6 @@ restore_flush:
clear_opt(sbi, FLUSH_MERGE);
f2fs_destroy_flush_cmd_control(sbi, false);
}
-restore_ckpt:
- if (need_restart_ckpt) {
- if (f2fs_start_ckpt_thread(sbi))
- f2fs_warn(sbi, "background ckpt thread has stopped");
- } else if (need_stop_ckpt) {
- f2fs_stop_ckpt_thread(sbi);
- }
restore_gc:
if (need_restart_gc) {
if (f2fs_start_gc_thread(sbi))
@@ -2673,7 +2715,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
const struct address_space_operations *a_ops = mapping->a_ops;
int offset = off & (sb->s_blocksize - 1);
size_t towrite = len;
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int err = 0;
int tocopy;
@@ -2683,7 +2725,7 @@ static ssize_t f2fs_quota_write(struct super_block *sb, int type,
towrite);
retry:
err = a_ops->write_begin(NULL, mapping, off, tocopy,
- &page, &fsdata);
+ &folio, &fsdata);
if (unlikely(err)) {
if (err == -ENOMEM) {
f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
@@ -2693,10 +2735,10 @@ retry:
break;
}
- memcpy_to_page(page, offset, data, tocopy);
+ memcpy_to_folio(folio, offset_in_folio(folio, off), data, tocopy);
a_ops->write_end(NULL, mapping, off, tocopy, tocopy,
- page, fsdata);
+ folio, fsdata);
offset = 0;
towrite -= tocopy;
off += tocopy;
@@ -2706,7 +2748,7 @@ retry:
if (len == towrite)
return err;
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
f2fs_mark_inode_dirty_sync(inode, false);
return len - towrite;
}
@@ -3200,13 +3242,6 @@ static bool f2fs_has_stable_inodes(struct super_block *sb)
return true;
}
-static void f2fs_get_ino_and_lblk_bits(struct super_block *sb,
- int *ino_bits_ret, int *lblk_bits_ret)
-{
- *ino_bits_ret = 8 * sizeof(nid_t);
- *lblk_bits_ret = 8 * sizeof(block_t);
-}
-
static struct block_device **f2fs_get_devices(struct super_block *sb,
unsigned int *num_devs)
{
@@ -3228,13 +3263,15 @@ static struct block_device **f2fs_get_devices(struct super_block *sb,
}
static const struct fscrypt_operations f2fs_cryptops = {
- .key_prefix = "f2fs:",
+ .needs_bounce_pages = 1,
+ .has_32bit_inodes = 1,
+ .supports_subblock_data_units = 1,
+ .legacy_key_prefix = "f2fs:",
.get_context = f2fs_get_context,
.set_context = f2fs_set_context,
.get_dummy_policy = f2fs_get_dummy_policy,
.empty_dir = f2fs_empty_dir,
.has_stable_inodes = f2fs_has_stable_inodes,
- .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits,
.get_devices = f2fs_get_devices,
};
#endif
@@ -3279,6 +3316,7 @@ static struct dentry *f2fs_fh_to_parent(struct super_block *sb, struct fid *fid,
}
static const struct export_operations f2fs_export_ops = {
+ .encode_fh = generic_encode_ino32_fh,
.fh_to_dentry = f2fs_fh_to_dentry,
.fh_to_parent = f2fs_fh_to_parent,
.get_parent = f2fs_get_parent,
@@ -3312,27 +3350,54 @@ loff_t max_file_blocks(struct inode *inode)
leaf_count *= NIDS_PER_BLOCK;
result += leaf_count;
+ /*
+ * For compatibility with FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{64,32} with
+ * a 4K crypto data unit, we must restrict the max filesize to what can
+ * fit within U32_MAX + 1 data units.
+ */
+
+ result = umin(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096));
+
return result;
}
-static int __f2fs_commit_super(struct buffer_head *bh,
- struct f2fs_super_block *super)
+static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio,
+ pgoff_t index, bool update)
{
- lock_buffer(bh);
- if (super)
- memcpy(bh->b_data + F2FS_SUPER_OFFSET, super, sizeof(*super));
- set_buffer_dirty(bh);
- unlock_buffer(bh);
-
+ struct bio *bio;
/* it's rare case, we can do fua all the time */
- return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
+ blk_opf_t opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA;
+ int ret;
+
+ folio_lock(folio);
+ folio_wait_writeback(folio);
+ if (update)
+ memcpy(F2FS_SUPER_BLOCK(folio, index), F2FS_RAW_SUPER(sbi),
+ sizeof(struct f2fs_super_block));
+ folio_mark_dirty(folio);
+ folio_clear_dirty_for_io(folio);
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+
+ bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS);
+
+ /* it doesn't need to set crypto context for superblock update */
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio));
+
+ if (!bio_add_folio(bio, folio, folio_size(folio), 0))
+ f2fs_bug_on(sbi, 1);
+
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ folio_end_writeback(folio);
+
+ return ret;
}
static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
- struct buffer_head *bh)
+ struct folio *folio, pgoff_t index)
{
- struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
- (bh->b_data + F2FS_SUPER_OFFSET);
+ struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index);
struct super_block *sb = sbi->sb;
u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
@@ -3407,7 +3472,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
set_sbi_flag(sbi, SBI_NEED_SB_WRITE);
res = "internally";
} else {
- err = __f2fs_commit_super(bh, NULL);
+ err = __f2fs_commit_super(sbi, folio, index, false);
res = err ? "failed" : "done";
}
f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)",
@@ -3420,12 +3485,11 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
}
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
- struct buffer_head *bh)
+ struct folio *folio, pgoff_t index)
{
block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main;
block_t total_sections, blocks_per_seg;
- struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
- (bh->b_data + F2FS_SUPER_OFFSET);
+ struct f2fs_super_block *raw_super = F2FS_SUPER_BLOCK(folio, index);
size_t crc_offset = 0;
__u32 crc = 0;
@@ -3451,7 +3515,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
}
- /* Currently, support only 4KB block size */
+ /* only support block_size equals to PAGE_SIZE */
if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) {
f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u",
le32_to_cpu(raw_super->log_blocksize),
@@ -3466,7 +3530,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- /* Currently, support 512/1024/2048/4096 bytes sector size */
+ /* Currently, support 512/1024/2048/4096/16K bytes sector size */
if (le32_to_cpu(raw_super->log_sectorsize) >
F2FS_MAX_LOG_SECTOR_SIZE ||
le32_to_cpu(raw_super->log_sectorsize) <
@@ -3583,7 +3647,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
/* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
- if (sanity_check_area_boundary(sbi, bh))
+ if (sanity_check_area_boundary(sbi, folio, index))
return -EFSCORRUPTED;
return 0;
@@ -3604,6 +3668,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
block_t user_block_count, valid_user_blocks;
block_t avail_node_count, valid_node_count;
unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks;
+ unsigned int sit_blk_cnt;
int i, j;
total = le32_to_cpu(raw_super->segment_count);
@@ -3715,6 +3780,13 @@ skip_cross:
return 1;
}
+ sit_blk_cnt = DIV_ROUND_UP(main_segs, SIT_ENTRY_PER_BLOCK);
+ if (sit_bitmap_size * 8 < sit_blk_cnt) {
+ f2fs_err(sbi, "Wrong bitmap size: sit: %u, sit_blk_cnt:%u",
+ sit_bitmap_size, sit_blk_cnt);
+ return 1;
+ }
+
cp_pack_start_sum = __start_sum_addr(sbi);
cp_payload = __cp_payload(sbi);
if (cp_pack_start_sum < cp_payload + 1 ||
@@ -3766,9 +3838,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
sbi->secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
sbi->total_sections = le32_to_cpu(raw_super->section_count);
- sbi->total_node_count =
- ((le32_to_cpu(raw_super->segment_count_nat) / 2) *
- NAT_ENTRY_PER_BLOCK) << sbi->log_blocks_per_seg;
+ sbi->total_node_count = SEGS_TO_BLKS(sbi,
+ ((le32_to_cpu(raw_super->segment_count_nat) / 2) *
+ NAT_ENTRY_PER_BLOCK));
F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
@@ -3778,6 +3850,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
sbi->migration_granularity = SEGS_PER_SEC(sbi);
+ sbi->migration_window_granularity = f2fs_sb_has_blkzoned(sbi) ?
+ DEF_MIGRATION_WINDOW_GRANULARITY_ZONED : SEGS_PER_SEC(sbi);
sbi->seq_file_ra_mul = MIN_RA_MUL;
sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
@@ -3872,11 +3946,24 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
sector_t nr_sectors = bdev_nr_sectors(bdev);
struct f2fs_report_zones_args rep_zone_arg;
u64 zone_sectors;
+ unsigned int max_open_zones;
int ret;
if (!f2fs_sb_has_blkzoned(sbi))
return 0;
+ if (bdev_is_zoned(FDEV(devi).bdev)) {
+ max_open_zones = bdev_max_open_zones(bdev);
+ if (max_open_zones && (max_open_zones < sbi->max_open_zones))
+ sbi->max_open_zones = max_open_zones;
+ if (sbi->max_open_zones < F2FS_OPTION(sbi).active_logs) {
+ f2fs_err(sbi,
+ "zoned: max open zones %u is too small, need at least %u open zones",
+ sbi->max_open_zones, F2FS_OPTION(sbi).active_logs);
+ return -EINVAL;
+ }
+ }
+
zone_sectors = bdev_zone_sectors(bdev);
if (sbi->blocks_per_blkz && sbi->blocks_per_blkz !=
SECTOR_TO_BLOCK(zone_sectors))
@@ -3917,7 +4004,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
{
struct super_block *sb = sbi->sb;
int block;
- struct buffer_head *bh;
+ struct folio *folio;
struct f2fs_super_block *super;
int err = 0;
@@ -3926,32 +4013,32 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
return -ENOMEM;
for (block = 0; block < 2; block++) {
- bh = sb_bread(sb, block);
- if (!bh) {
+ folio = read_mapping_folio(sb->s_bdev->bd_mapping, block, NULL);
+ if (IS_ERR(folio)) {
f2fs_err(sbi, "Unable to read %dth superblock",
block + 1);
- err = -EIO;
+ err = PTR_ERR(folio);
*recovery = 1;
continue;
}
/* sanity checking of raw super */
- err = sanity_check_raw_super(sbi, bh);
+ err = sanity_check_raw_super(sbi, folio, block);
if (err) {
f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
block + 1);
- brelse(bh);
+ folio_put(folio);
*recovery = 1;
continue;
}
if (!*raw_super) {
- memcpy(super, bh->b_data + F2FS_SUPER_OFFSET,
+ memcpy(super, F2FS_SUPER_BLOCK(folio, block),
sizeof(*super));
*valid_super_block = block;
*raw_super = super;
}
- brelse(bh);
+ folio_put(folio);
}
/* No valid superblock */
@@ -3965,7 +4052,8 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
{
- struct buffer_head *bh;
+ struct folio *folio;
+ pgoff_t index;
__u32 crc = 0;
int err;
@@ -3983,22 +4071,24 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
}
/* write back-up superblock first */
- bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
- if (!bh)
- return -EIO;
- err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
- brelse(bh);
+ index = sbi->valid_super_block ? 0 : 1;
+ folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = __f2fs_commit_super(sbi, folio, index, true);
+ folio_put(folio);
/* if we are in recovery path, skip writing valid superblock */
if (recover || err)
return err;
/* write current valid superblock */
- bh = sb_bread(sbi->sb, sbi->valid_super_block);
- if (!bh)
- return -EIO;
- err = __f2fs_commit_super(bh, F2FS_RAW_SUPER(sbi));
- brelse(bh);
+ index = sbi->valid_super_block;
+ folio = read_mapping_folio(sbi->sb->s_bdev->bd_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ err = __f2fs_commit_super(sbi, folio, index, true);
+ folio_put(folio);
return err;
}
@@ -4033,7 +4123,9 @@ static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
f2fs_up_write(&sbi->sb_lock);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record stop_reason, err:%d",
+ err);
}
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
@@ -4076,8 +4168,9 @@ static void f2fs_record_errors(struct f2fs_sb_info *sbi, unsigned char error)
err = f2fs_commit_super(sbi, false);
if (err)
- f2fs_err(sbi, "f2fs_commit_super fails to record errors:%u, err:%d",
- error, err);
+ f2fs_err_ratelimited(sbi,
+ "f2fs_commit_super fails to record errors:%u, err:%d",
+ error, err);
out_unlock:
f2fs_up_write(&sbi->sb_lock);
}
@@ -4168,6 +4261,37 @@ static void f2fs_record_error_work(struct work_struct *work)
f2fs_record_stop_reason(sbi);
}
+static inline unsigned int get_first_seq_zone_segno(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+ unsigned int zoneno, total_zones;
+ int devi;
+
+ if (!f2fs_sb_has_blkzoned(sbi))
+ return NULL_SEGNO;
+
+ for (devi = 0; devi < sbi->s_ndevs; devi++) {
+ if (!bdev_is_zoned(FDEV(devi).bdev))
+ continue;
+
+ total_zones = GET_ZONE_FROM_SEG(sbi, FDEV(devi).total_segments);
+
+ for (zoneno = 0; zoneno < total_zones; zoneno++) {
+ unsigned int segs, blks;
+
+ if (!f2fs_zone_is_seq(sbi, devi, zoneno))
+ continue;
+
+ segs = GET_SEG_FROM_SEC(sbi,
+ zoneno * sbi->secs_per_zone);
+ blks = SEGS_TO_BLKS(sbi, segs);
+ return GET_SEGNO(sbi, FDEV(devi).start_blk + blks);
+ }
+ }
+#endif
+ return NULL_SEGNO;
+}
+
static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
{
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -4196,10 +4320,22 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
sbi->aligned_blksize = true;
+#ifdef CONFIG_BLK_DEV_ZONED
+ sbi->max_open_zones = UINT_MAX;
+ sbi->blkzone_alloc_policy = BLKZONE_ALLOC_PRIOR_SEQ;
+#endif
for (i = 0; i < max_devices; i++) {
+ if (max_devices == 1) {
+ FDEV(i).total_segments =
+ le32_to_cpu(raw_super->segment_count_main);
+ FDEV(i).start_blk = 0;
+ FDEV(i).end_blk = FDEV(i).total_segments *
+ BLKS_PER_SEG(sbi);
+ }
+
if (i == 0)
- FDEV(0).bdev = sbi->sb->s_bdev;
+ FDEV(0).bdev_file = sbi->sb->s_bdev_file;
else if (!RDEV(i).path[0])
break;
@@ -4211,21 +4347,22 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
if (i == 0) {
FDEV(i).start_blk = 0;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1 +
- le32_to_cpu(raw_super->segment0_blkaddr);
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1 +
+ le32_to_cpu(raw_super->segment0_blkaddr);
} else {
FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
FDEV(i).end_blk = FDEV(i).start_blk +
- (FDEV(i).total_segments <<
- sbi->log_blocks_per_seg) - 1;
- FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
- mode, sbi->sb, NULL);
+ SEGS_TO_BLKS(sbi,
+ FDEV(i).total_segments) - 1;
+ FDEV(i).bdev_file = bdev_file_open_by_path(
+ FDEV(i).path, mode, sbi->sb, NULL);
}
}
- if (IS_ERR(FDEV(i).bdev))
- return PTR_ERR(FDEV(i).bdev);
+ if (IS_ERR(FDEV(i).bdev_file))
+ return PTR_ERR(FDEV(i).bdev_file);
+ FDEV(i).bdev = file_bdev(FDEV(i).bdev_file);
/* to release errored devices */
sbi->s_ndevs = i + 1;
@@ -4233,24 +4370,21 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
sbi->aligned_blksize = false;
#ifdef CONFIG_BLK_DEV_ZONED
- if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
- !f2fs_sb_has_blkzoned(sbi)) {
- f2fs_err(sbi, "Zoned block device feature not enabled");
- return -EINVAL;
- }
- if (bdev_zoned_model(FDEV(i).bdev) != BLK_ZONED_NONE) {
+ if (bdev_is_zoned(FDEV(i).bdev)) {
+ if (!f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device feature not enabled");
+ return -EINVAL;
+ }
if (init_blkz_info(sbi, i)) {
f2fs_err(sbi, "Failed to initialize F2FS blkzone information");
return -EINVAL;
}
if (max_devices == 1)
break;
- f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
+ f2fs_info(sbi, "Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: Host-managed)",
i, FDEV(i).path,
FDEV(i).total_segments,
- FDEV(i).start_blk, FDEV(i).end_blk,
- bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HA ?
- "Host-aware" : "Host-managed");
+ FDEV(i).start_blk, FDEV(i).end_blk);
continue;
}
#endif
@@ -4448,7 +4582,8 @@ try_onemore:
sb->s_time_gran = 1;
sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
(test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0);
- memcpy(&sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid));
+ super_set_sysfs_name_bdev(sb);
sb->s_iflags |= SB_I_CGROUPWB;
/* init f2fs-specific super block info */
@@ -4565,6 +4700,9 @@ try_onemore:
/* For write statistics */
sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
+ /* get segno of first zoned block device */
+ sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi);
+
/* Read accumulated write IO statistics if exists */
seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
if (__exist_node_summaries(sbi))
@@ -4599,6 +4737,7 @@ try_onemore:
goto free_node_inode;
}
+ generic_set_sb_d_ops(sb);
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
err = -ENOMEM;
@@ -4682,19 +4821,25 @@ try_onemore:
#ifdef CONFIG_QUOTA
f2fs_recover_quota_end(sbi, quota_enabled);
#endif
-
+reset_checkpoint:
/*
* If the f2fs is not readonly and fsync data recovery succeeds,
* check zoned block devices' write pointer consistency.
*/
- if (!err && !f2fs_readonly(sb) && f2fs_sb_has_blkzoned(sbi)) {
- err = f2fs_check_write_pointer(sbi);
- if (err)
- goto free_meta;
+ if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sb)) {
+ int err2;
+
+ f2fs_notice(sbi, "Checking entire write pointers");
+ err2 = f2fs_check_write_pointer(sbi);
+ if (err2)
+ err = err2;
}
+ if (err)
+ goto free_meta;
-reset_checkpoint:
- f2fs_init_inmem_curseg(sbi);
+ err = f2fs_init_inmem_curseg(sbi);
+ if (err)
+ goto sync_free_meta;
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -4818,6 +4963,7 @@ free_sbi:
if (sbi->s_chksum_driver)
crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi);
+ sb->s_fs_info = NULL;
/* give only one another chance */
if (retry_cnt > 0 && skip_recovery) {
@@ -4836,9 +4982,9 @@ static struct dentry *f2fs_mount(struct file_system_type *fs_type, int flags,
static void kill_f2fs_super(struct super_block *sb)
{
- if (sb->s_root) {
- struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ if (sb->s_root) {
set_sbi_flag(sbi, SBI_IS_CLOSE);
f2fs_stop_gc_thread(sbi);
f2fs_stop_discard_thread(sbi);
@@ -4865,6 +5011,12 @@ static void kill_f2fs_super(struct super_block *sb)
sb->s_flags &= ~SB_RDONLY;
}
kill_block_super(sb);
+ /* Release block devices last, after fscrypt_destroy_keyring(). */
+ if (sbi) {
+ destroy_device_list(sbi);
+ kfree(sbi);
+ sb->s_fs_info = NULL;
+ }
}
static struct file_system_type f2fs_fs_type = {
@@ -4898,12 +5050,6 @@ static int __init init_f2fs_fs(void)
{
int err;
- if (PAGE_SIZE != F2FS_BLKSIZE) {
- printk("F2FS not supported on PAGE_SIZE(%lu) != %d\n",
- PAGE_SIZE, F2FS_BLKSIZE);
- return -EINVAL;
- }
-
err = init_inodecache();
if (err)
goto fail;
@@ -4928,12 +5074,9 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_sysfs();
if (err)
goto free_garbage_collection_cache;
- err = register_shrinker(&f2fs_shrinker_info, "f2fs-shrinker");
+ err = f2fs_init_shrinker();
if (err)
goto free_sysfs;
- err = register_filesystem(&f2fs_fs_type);
- if (err)
- goto free_shrinker;
f2fs_create_root_stats();
err = f2fs_init_post_read_processing();
if (err)
@@ -4956,7 +5099,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_create_casefold_cache();
if (err)
goto free_compress_cache;
+ err = register_filesystem(&f2fs_fs_type);
+ if (err)
+ goto free_casefold_cache;
return 0;
+free_casefold_cache:
+ f2fs_destroy_casefold_cache();
free_compress_cache:
f2fs_destroy_compress_cache();
free_compress_mempool:
@@ -4971,9 +5119,7 @@ free_post_read:
f2fs_destroy_post_read_processing();
free_root_stats:
f2fs_destroy_root_stats();
- unregister_filesystem(&f2fs_fs_type);
-free_shrinker:
- unregister_shrinker(&f2fs_shrinker_info);
+ f2fs_exit_shrinker();
free_sysfs:
f2fs_exit_sysfs();
free_garbage_collection_cache:
@@ -4996,6 +5142,7 @@ fail:
static void __exit exit_f2fs_fs(void)
{
+ unregister_filesystem(&f2fs_fs_type);
f2fs_destroy_casefold_cache();
f2fs_destroy_compress_cache();
f2fs_destroy_compress_mempool();
@@ -5004,8 +5151,7 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_iostat_processing();
f2fs_destroy_post_read_processing();
f2fs_destroy_root_stats();
- unregister_filesystem(&f2fs_fs_type);
- unregister_shrinker(&f2fs_shrinker_info);
+ f2fs_exit_shrinker();
f2fs_exit_sysfs();
f2fs_destroy_garbage_collection_cache();
f2fs_destroy_extent_cache();
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 180feefc4a9c..eb84b9418ac1 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -61,6 +61,12 @@ struct f2fs_attr {
int id;
};
+struct f2fs_base_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct f2fs_base_attr *a, char *buf);
+ ssize_t (*store)(struct f2fs_base_attr *a, const char *buf, size_t len);
+};
+
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf);
@@ -143,6 +149,39 @@ static ssize_t pending_discard_show(struct f2fs_attr *a,
&SM_I(sbi)->dcc_info->discard_cmd_cnt));
}
+static ssize_t issued_discard_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ if (!SM_I(sbi)->dcc_info)
+ return -EINVAL;
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic_read(
+ &SM_I(sbi)->dcc_info->issued_discard));
+}
+
+static ssize_t queued_discard_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ if (!SM_I(sbi)->dcc_info)
+ return -EINVAL;
+ return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic_read(
+ &SM_I(sbi)->dcc_info->queued_discard));
+}
+
+static ssize_t undiscard_blks_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ if (!SM_I(sbi)->dcc_info)
+ return -EINVAL;
+ return sysfs_emit(buf, "%u\n",
+ SM_I(sbi)->dcc_info->undiscard_blks);
+}
+
+static ssize_t atgc_enabled_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", sbi->am.atgc_enabled ? 1 : 0);
+}
+
static ssize_t gc_mode_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -155,50 +194,50 @@ static ssize_t features_show(struct f2fs_attr *a,
int len = 0;
if (f2fs_sb_has_encrypt(sbi))
- len += scnprintf(buf, PAGE_SIZE - len, "%s",
+ len += sysfs_emit_at(buf, len, "%s",
"encryption");
if (f2fs_sb_has_blkzoned(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "blkzoned");
if (f2fs_sb_has_extra_attr(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "extra_attr");
if (f2fs_sb_has_project_quota(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "projquota");
if (f2fs_sb_has_inode_chksum(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "inode_checksum");
if (f2fs_sb_has_flexible_inline_xattr(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "flexible_inline_xattr");
if (f2fs_sb_has_quota_ino(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "quota_ino");
if (f2fs_sb_has_inode_crtime(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "inode_crtime");
if (f2fs_sb_has_lost_found(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "lost_found");
if (f2fs_sb_has_verity(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "verity");
if (f2fs_sb_has_sb_chksum(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "sb_checksum");
if (f2fs_sb_has_casefold(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "casefold");
if (f2fs_sb_has_readonly(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "readonly");
if (f2fs_sb_has_compression(sbi))
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "compression");
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s%s",
+ len += sysfs_emit_at(buf, len, "%s%s",
len ? ", " : "", "pin_file");
- len += scnprintf(buf + len, PAGE_SIZE - len, "\n");
+ len += sysfs_emit_at(buf, len, "\n");
return len;
}
@@ -296,30 +335,27 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
int hot_count = sbi->raw_super->hot_ext_count;
int len = 0, i;
- len += scnprintf(buf + len, PAGE_SIZE - len,
- "cold file extension:\n");
+ len += sysfs_emit_at(buf, len, "cold file extension:\n");
for (i = 0; i < cold_count; i++)
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n",
- extlist[i]);
+ len += sysfs_emit_at(buf, len, "%s\n", extlist[i]);
- len += scnprintf(buf + len, PAGE_SIZE - len,
- "hot file extension:\n");
+ len += sysfs_emit_at(buf, len, "hot file extension:\n");
for (i = cold_count; i < cold_count + hot_count; i++)
- len += scnprintf(buf + len, PAGE_SIZE - len, "%s\n",
- extlist[i]);
+ len += sysfs_emit_at(buf, len, "%s\n", extlist[i]);
+
return len;
}
if (!strcmp(a->attr.name, "ckpt_thread_ioprio")) {
struct ckpt_req_control *cprc = &sbi->cprc_info;
int class = IOPRIO_PRIO_CLASS(cprc->ckpt_thread_ioprio);
- int data = IOPRIO_PRIO_DATA(cprc->ckpt_thread_ioprio);
+ int level = IOPRIO_PRIO_LEVEL(cprc->ckpt_thread_ioprio);
if (class != IOPRIO_CLASS_RT && class != IOPRIO_CLASS_BE)
return -EINVAL;
return sysfs_emit(buf, "%s,%d\n",
- class == IOPRIO_CLASS_RT ? "rt" : "be", data);
+ class == IOPRIO_CLASS_RT ? "rt" : "be", level);
}
#ifdef CONFIG_F2FS_FS_COMPRESSION
@@ -423,7 +459,7 @@ out:
const char *name = strim((char *)buf);
struct ckpt_req_control *cprc = &sbi->cprc_info;
int class;
- long data;
+ long level;
int ret;
if (!strncmp(name, "rt,", 3))
@@ -434,13 +470,13 @@ out:
return -EINVAL;
name += 3;
- ret = kstrtol(name, 10, &data);
+ ret = kstrtol(name, 10, &level);
if (ret)
return ret;
- if (data >= IOPRIO_NR_LEVELS || data < 0)
+ if (level >= IOPRIO_NR_LEVELS || level < 0)
return -EINVAL;
- cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, data);
+ cprc->ckpt_thread_ioprio = IOPRIO_PRIO_VALUE(class, level);
if (test_opt(sbi, MERGE_CHECKPOINT)) {
ret = set_task_ioprio(cprc->f2fs_issue_ckpt,
cprc->ckpt_thread_ioprio);
@@ -472,8 +508,8 @@ out:
spin_lock(&sbi->stat_lock);
if (t > (unsigned long)(sbi->user_block_count -
F2FS_OPTION(sbi).root_reserved_blocks -
- (SM_I(sbi)->additional_reserved_segments <<
- sbi->log_blocks_per_seg))) {
+ SEGS_TO_BLKS(sbi,
+ SM_I(sbi)->additional_reserved_segments))) {
spin_unlock(&sbi->stat_lock);
return -EINVAL;
}
@@ -522,11 +558,23 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "discard_io_aware")) {
+ if (t >= DPOLICY_IO_AWARE_MAX)
+ return -EINVAL;
+ *ui = t;
+ return count;
+ }
+
if (!strcmp(a->attr.name, "migration_granularity")) {
if (t == 0 || t > SEGS_PER_SEC(sbi))
return -EINVAL;
}
+ if (!strcmp(a->attr.name, "migration_window_granularity")) {
+ if (t == 0 || t > SEGS_PER_SEC(sbi))
+ return -EINVAL;
+ }
+
if (!strcmp(a->attr.name, "gc_urgent")) {
if (t == 0) {
sbi->gc_mode = GC_NORMAL;
@@ -575,6 +623,27 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "gc_no_zoned_gc_percent")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_boost_zoned_gc_percent")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "gc_valid_thresh_ratio")) {
+ if (t > 100)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
#ifdef CONFIG_F2FS_IOSTAT
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
@@ -593,6 +662,15 @@ out:
}
#endif
+#ifdef CONFIG_BLK_DEV_ZONED
+ if (!strcmp(a->attr.name, "blkzone_alloc_policy")) {
+ if (t < BLKZONE_ALLOC_PRIOR_SEQ || t > BLKZONE_ALLOC_PRIOR_CONV)
+ return -EINVAL;
+ sbi->blkzone_alloc_policy = t;
+ return count;
+ }
+#endif
+
#ifdef CONFIG_F2FS_FS_COMPRESSION
if (!strcmp(a->attr.name, "compr_written_block") ||
!strcmp(a->attr.name, "compr_saved_block")) {
@@ -647,6 +725,13 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "gc_pin_file_threshold")) {
+ if (t > MAX_GC_FAILED_PINNED_FILES)
+ return -EINVAL;
+ sbi->gc_pin_file_threshold = t;
+ return count;
+ }
+
if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
if (t != 0)
return -EINVAL;
@@ -731,15 +816,30 @@ out:
return count;
}
+ if (!strcmp(a->attr.name, "max_read_extent_count")) {
+ if (t > UINT_MAX)
+ return -EINVAL;
+ *ui = (unsigned int)t;
+ return count;
+ }
+
if (!strcmp(a->attr.name, "ipu_policy")) {
if (t >= BIT(F2FS_IPU_MAX))
return -EINVAL;
- if (t && f2fs_lfs_mode(sbi))
+ /* allow F2FS_IPU_NOCACHE only for IPU in the pinned file */
+ if (f2fs_lfs_mode(sbi) && (t & ~BIT(F2FS_IPU_NOCACHE)))
return -EINVAL;
SM_I(sbi)->ipu_policy = (unsigned int)t;
return count;
}
+ if (!strcmp(a->attr.name, "dir_level")) {
+ if (t > MAX_DIR_HASH_DEPTH)
+ return -EINVAL;
+ sbi->dir_level = t;
+ return count;
+ }
+
*ui = (unsigned int)t;
return count;
@@ -791,6 +891,25 @@ static void f2fs_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
+static ssize_t f2fs_base_attr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct f2fs_base_attr *a = container_of(attr,
+ struct f2fs_base_attr, attr);
+
+ return a->show ? a->show(a, buf) : 0;
+}
+
+static ssize_t f2fs_base_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct f2fs_base_attr *a = container_of(attr,
+ struct f2fs_base_attr, attr);
+
+ return a->store ? a->store(a, buf, len) : 0;
+}
+
/*
* Note that there are three feature list entries:
* 1) /sys/fs/f2fs/features
@@ -809,14 +928,13 @@ static void f2fs_sb_release(struct kobject *kobj)
* please add new on-disk feature in this list only.
* - ref. F2FS_SB_FEATURE_RO_ATTR()
*/
-static ssize_t f2fs_feature_show(struct f2fs_attr *a,
- struct f2fs_sb_info *sbi, char *buf)
+static ssize_t f2fs_feature_show(struct f2fs_base_attr *a, char *buf)
{
return sysfs_emit(buf, "supported\n");
}
#define F2FS_FEATURE_RO_ATTR(_name) \
-static struct f2fs_attr f2fs_attr_##_name = { \
+static struct f2fs_base_attr f2fs_base_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = 0444 }, \
.show = f2fs_feature_show, \
}
@@ -912,6 +1030,9 @@ GC_THREAD_RW_ATTR(gc_urgent_sleep_time, urgent_sleep_time);
GC_THREAD_RW_ATTR(gc_min_sleep_time, min_sleep_time);
GC_THREAD_RW_ATTR(gc_max_sleep_time, max_sleep_time);
GC_THREAD_RW_ATTR(gc_no_gc_sleep_time, no_gc_sleep_time);
+GC_THREAD_RW_ATTR(gc_no_zoned_gc_percent, no_zoned_gc_percent);
+GC_THREAD_RW_ATTR(gc_boost_zoned_gc_percent, boost_zoned_gc_percent);
+GC_THREAD_RW_ATTR(gc_valid_thresh_ratio, valid_thresh_ratio);
/* SM_INFO ATTR */
SM_INFO_RW_ATTR(reclaim_segments, rec_prefree_segments);
@@ -921,6 +1042,7 @@ SM_INFO_GENERAL_RW_ATTR(min_fsync_blocks);
SM_INFO_GENERAL_RW_ATTR(min_seq_blocks);
SM_INFO_GENERAL_RW_ATTR(min_hot_blocks);
SM_INFO_GENERAL_RW_ATTR(min_ssr_sections);
+SM_INFO_GENERAL_RW_ATTR(reserved_segments);
/* DCC_INFO ATTR */
DCC_INFO_RW_ATTR(max_small_discards, max_discards);
@@ -932,6 +1054,7 @@ DCC_INFO_GENERAL_RW_ATTR(discard_io_aware_gran);
DCC_INFO_GENERAL_RW_ATTR(discard_urgent_util);
DCC_INFO_GENERAL_RW_ATTR(discard_granularity);
DCC_INFO_GENERAL_RW_ATTR(max_ordered_discard);
+DCC_INFO_GENERAL_RW_ATTR(discard_io_aware);
/* NM_INFO ATTR */
NM_INFO_RW_ATTR(max_roll_forward_node_blocks, max_rf_node_blocks);
@@ -952,6 +1075,7 @@ F2FS_SBI_RW_ATTR(gc_pin_file_thresh, gc_pin_file_threshold);
F2FS_SBI_RW_ATTR(gc_reclaimed_segments, gc_reclaimed_segs);
F2FS_SBI_GENERAL_RW_ATTR(max_victim_search);
F2FS_SBI_GENERAL_RW_ATTR(migration_granularity);
+F2FS_SBI_GENERAL_RW_ATTR(migration_window_granularity);
F2FS_SBI_GENERAL_RW_ATTR(dir_level);
#ifdef CONFIG_F2FS_IOSTAT
F2FS_SBI_GENERAL_RW_ATTR(iostat_enable);
@@ -982,8 +1106,11 @@ F2FS_SBI_GENERAL_RW_ATTR(revoked_atomic_block);
F2FS_SBI_GENERAL_RW_ATTR(hot_data_age_threshold);
F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold);
F2FS_SBI_GENERAL_RW_ATTR(last_age_weight);
+/* read extent cache */
+F2FS_SBI_GENERAL_RW_ATTR(max_read_extent_count);
#ifdef CONFIG_BLK_DEV_ZONED
F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec);
+F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy);
#endif
/* STAT_INFO ATTR */
@@ -1023,6 +1150,7 @@ F2FS_GENERAL_RO_ATTR(encoding);
F2FS_GENERAL_RO_ATTR(mounted_time_sec);
F2FS_GENERAL_RO_ATTR(main_blkaddr);
F2FS_GENERAL_RO_ATTR(pending_discard);
+F2FS_GENERAL_RO_ATTR(atgc_enabled);
F2FS_GENERAL_RO_ATTR(gc_mode);
#ifdef CONFIG_F2FS_STAT_FS
F2FS_GENERAL_RO_ATTR(moved_blocks_background);
@@ -1067,6 +1195,9 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(gc_min_sleep_time),
ATTR_LIST(gc_max_sleep_time),
ATTR_LIST(gc_no_gc_sleep_time),
+ ATTR_LIST(gc_no_zoned_gc_percent),
+ ATTR_LIST(gc_boost_zoned_gc_percent),
+ ATTR_LIST(gc_valid_thresh_ratio),
ATTR_LIST(gc_idle),
ATTR_LIST(gc_urgent),
ATTR_LIST(reclaim_segments),
@@ -1080,6 +1211,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(discard_urgent_util),
ATTR_LIST(discard_granularity),
ATTR_LIST(max_ordered_discard),
+ ATTR_LIST(discard_io_aware),
ATTR_LIST(pending_discard),
ATTR_LIST(gc_mode),
ATTR_LIST(ipu_policy),
@@ -1088,8 +1220,10 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(min_seq_blocks),
ATTR_LIST(min_hot_blocks),
ATTR_LIST(min_ssr_sections),
+ ATTR_LIST(reserved_segments),
ATTR_LIST(max_victim_search),
ATTR_LIST(migration_granularity),
+ ATTR_LIST(migration_window_granularity),
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
@@ -1137,6 +1271,7 @@ static struct attribute *f2fs_attrs[] = {
#endif
#ifdef CONFIG_BLK_DEV_ZONED
ATTR_LIST(unusable_blocks_per_sec),
+ ATTR_LIST(blkzone_alloc_policy),
#endif
#ifdef CONFIG_F2FS_FS_COMPRESSION
ATTR_LIST(compr_written_block),
@@ -1150,6 +1285,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(atgc_candidate_count),
ATTR_LIST(atgc_age_weight),
ATTR_LIST(atgc_age_threshold),
+ ATTR_LIST(atgc_enabled),
ATTR_LIST(seq_file_ra_mul),
ATTR_LIST(gc_segment_mode),
ATTR_LIST(gc_reclaimed_segments),
@@ -1162,50 +1298,59 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(hot_data_age_threshold),
ATTR_LIST(warm_data_age_threshold),
ATTR_LIST(last_age_weight),
+ ATTR_LIST(max_read_extent_count),
NULL,
};
ATTRIBUTE_GROUPS(f2fs);
+#define BASE_ATTR_LIST(name) (&f2fs_base_attr_##name.attr)
static struct attribute *f2fs_feat_attrs[] = {
#ifdef CONFIG_FS_ENCRYPTION
- ATTR_LIST(encryption),
- ATTR_LIST(test_dummy_encryption_v2),
+ BASE_ATTR_LIST(encryption),
+ BASE_ATTR_LIST(test_dummy_encryption_v2),
#if IS_ENABLED(CONFIG_UNICODE)
- ATTR_LIST(encrypted_casefold),
+ BASE_ATTR_LIST(encrypted_casefold),
#endif
#endif /* CONFIG_FS_ENCRYPTION */
#ifdef CONFIG_BLK_DEV_ZONED
- ATTR_LIST(block_zoned),
+ BASE_ATTR_LIST(block_zoned),
#endif
- ATTR_LIST(atomic_write),
- ATTR_LIST(extra_attr),
- ATTR_LIST(project_quota),
- ATTR_LIST(inode_checksum),
- ATTR_LIST(flexible_inline_xattr),
- ATTR_LIST(quota_ino),
- ATTR_LIST(inode_crtime),
- ATTR_LIST(lost_found),
+ BASE_ATTR_LIST(atomic_write),
+ BASE_ATTR_LIST(extra_attr),
+ BASE_ATTR_LIST(project_quota),
+ BASE_ATTR_LIST(inode_checksum),
+ BASE_ATTR_LIST(flexible_inline_xattr),
+ BASE_ATTR_LIST(quota_ino),
+ BASE_ATTR_LIST(inode_crtime),
+ BASE_ATTR_LIST(lost_found),
#ifdef CONFIG_FS_VERITY
- ATTR_LIST(verity),
+ BASE_ATTR_LIST(verity),
#endif
- ATTR_LIST(sb_checksum),
+ BASE_ATTR_LIST(sb_checksum),
#if IS_ENABLED(CONFIG_UNICODE)
- ATTR_LIST(casefold),
+ BASE_ATTR_LIST(casefold),
#endif
- ATTR_LIST(readonly),
+ BASE_ATTR_LIST(readonly),
#ifdef CONFIG_F2FS_FS_COMPRESSION
- ATTR_LIST(compression),
+ BASE_ATTR_LIST(compression),
#endif
- ATTR_LIST(pin_file),
+ BASE_ATTR_LIST(pin_file),
NULL,
};
ATTRIBUTE_GROUPS(f2fs_feat);
F2FS_GENERAL_RO_ATTR(sb_status);
F2FS_GENERAL_RO_ATTR(cp_status);
+F2FS_GENERAL_RO_ATTR(issued_discard);
+F2FS_GENERAL_RO_ATTR(queued_discard);
+F2FS_GENERAL_RO_ATTR(undiscard_blks);
+
static struct attribute *f2fs_stat_attrs[] = {
ATTR_LIST(sb_status),
ATTR_LIST(cp_status),
+ ATTR_LIST(issued_discard),
+ ATTR_LIST(queued_discard),
+ ATTR_LIST(undiscard_blks),
NULL,
};
ATTRIBUTE_GROUPS(f2fs_stat);
@@ -1263,9 +1408,14 @@ static struct kset f2fs_kset = {
.kobj = {.ktype = &f2fs_ktype},
};
+static const struct sysfs_ops f2fs_feat_attr_ops = {
+ .show = f2fs_base_attr_show,
+ .store = f2fs_base_attr_store,
+};
+
static const struct kobj_type f2fs_feat_ktype = {
.default_groups = f2fs_feat_groups,
- .sysfs_ops = &f2fs_attr_ops,
+ .sysfs_ops = &f2fs_feat_attr_ops,
};
static struct kobject f2fs_feat = {
@@ -1448,6 +1598,50 @@ static int __maybe_unused discard_plist_seq_show(struct seq_file *seq,
return 0;
}
+static int __maybe_unused disk_map_seq_show(struct seq_file *seq,
+ void *offset)
+{
+ struct super_block *sb = seq->private;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
+ int i;
+
+ seq_printf(seq, "Address Layout : %5luB Block address (# of Segments)\n",
+ F2FS_BLKSIZE);
+ seq_printf(seq, " SB : %12s\n", "0/1024B");
+ seq_printf(seq, " seg0_blkaddr : 0x%010x\n", SEG0_BLKADDR(sbi));
+ seq_printf(seq, " Checkpoint : 0x%010x (%10d)\n",
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr), 2);
+ seq_printf(seq, " SIT : 0x%010x (%10d)\n",
+ SIT_I(sbi)->sit_base_addr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_sit));
+ seq_printf(seq, " NAT : 0x%010x (%10d)\n",
+ NM_I(sbi)->nat_blkaddr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_nat));
+ seq_printf(seq, " SSA : 0x%010x (%10d)\n",
+ SM_I(sbi)->ssa_blkaddr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_ssa));
+ seq_printf(seq, " Main : 0x%010x (%10d)\n",
+ SM_I(sbi)->main_blkaddr,
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main));
+ seq_printf(seq, " # of Sections : %12d\n",
+ le32_to_cpu(F2FS_RAW_SUPER(sbi)->section_count));
+ seq_printf(seq, " Segs/Sections : %12d\n",
+ SEGS_PER_SEC(sbi));
+ seq_printf(seq, " Section size : %12d MB\n",
+ SEGS_PER_SEC(sbi) << 1);
+
+ if (!f2fs_is_multi_device(sbi))
+ return 0;
+
+ seq_puts(seq, "\nDisk Map for multi devices:\n");
+ for (i = 0; i < sbi->s_ndevs; i++)
+ seq_printf(seq, "Disk:%2d (zoned=%d): 0x%010x - 0x%010x on %s\n",
+ i, bdev_is_zoned(FDEV(i).bdev),
+ FDEV(i).start_blk, FDEV(i).end_blk,
+ FDEV(i).path);
+ return 0;
+}
+
int __init f2fs_init_sysfs(void)
{
int ret;
@@ -1529,6 +1723,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
victim_bits_seq_show, sb);
proc_create_single_data("discard_plist_info", 0444, sbi->s_proc,
discard_plist_seq_show, sb);
+ proc_create_single_data("disk_map", 0444, sbi->s_proc,
+ disk_map_seq_show, sb);
return 0;
put_feature_list_kobj:
kobject_put(&sbi->s_feature_list_kobj);
diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c
index 4fc95f353a7a..2287f238ae09 100644
--- a/fs/f2fs/verity.c
+++ b/fs/f2fs/verity.c
@@ -74,23 +74,23 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
struct address_space *mapping = inode->i_mapping;
const struct address_space_operations *aops = mapping->a_ops;
- if (pos + count > inode->i_sb->s_maxbytes)
+ if (pos + count > F2FS_BLK_TO_BYTES(max_file_blocks(inode)))
return -EFBIG;
while (count) {
size_t n = min_t(size_t, count,
PAGE_SIZE - offset_in_page(pos));
- struct page *page;
+ struct folio *folio;
void *fsdata = NULL;
int res;
- res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata);
+ res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata);
if (res)
return res;
- memcpy_to_page(page, offset_in_page(pos), buf, n);
+ memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n);
- res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata);
+ res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata);
if (res < 0)
return res;
if (res != n)
@@ -237,7 +237,8 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
pos = le64_to_cpu(dloc.pos);
/* Get the descriptor */
- if (pos + size < pos || pos + size > inode->i_sb->s_maxbytes ||
+ if (pos + size < pos ||
+ pos + size > F2FS_BLK_TO_BYTES(max_file_blocks(inode)) ||
pos < f2fs_verity_metadata_pos(inode) || size > INT_MAX) {
f2fs_warn(F2FS_I_SB(inode), "invalid verity xattr");
f2fs_handle_error(F2FS_I_SB(inode),
@@ -258,21 +259,23 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
pgoff_t index,
unsigned long num_ra_pages)
{
- struct page *page;
+ struct folio *folio;
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
- page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED);
- if (!page || !PageUptodate(page)) {
+ folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0);
+ if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
- if (page)
- put_page(page);
+ if (!IS_ERR(folio))
+ folio_put(folio);
else if (num_ra_pages > 1)
page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
- page = read_mapping_page(inode->i_mapping, index, NULL);
+ folio = read_mapping_folio(inode->i_mapping, index, NULL);
+ if (IS_ERR(folio))
+ return ERR_CAST(folio);
}
- return page;
+ return folio_file_page(folio, index);
}
static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf,
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 54ab9caaae4d..3f3874943679 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -189,7 +189,7 @@ const struct xattr_handler f2fs_xattr_security_handler = {
.set = f2fs_xattr_generic_set,
};
-static const struct xattr_handler *f2fs_xattr_handler_map[] = {
+static const struct xattr_handler * const f2fs_xattr_handler_map[] = {
[F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler,
#ifdef CONFIG_F2FS_FS_POSIX_ACL
[F2FS_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access,
@@ -202,7 +202,7 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = {
[F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler,
};
-const struct xattr_handler *f2fs_xattr_handlers[] = {
+const struct xattr_handler * const f2fs_xattr_handlers[] = {
&f2fs_xattr_user_handler,
&f2fs_xattr_trusted_handler,
#ifdef CONFIG_F2FS_FS_SECURITY
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index b1811c392e6f..a005ffdcf717 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -125,7 +125,7 @@ extern const struct xattr_handler f2fs_xattr_trusted_handler;
extern const struct xattr_handler f2fs_xattr_advise_handler;
extern const struct xattr_handler f2fs_xattr_security_handler;
-extern const struct xattr_handler *f2fs_xattr_handlers[];
+extern const struct xattr_handler * const f2fs_xattr_handlers[];
extern int f2fs_setxattr(struct inode *, int, const char *,
const void *, size_t, struct page *, int);