diff options
Diffstat (limited to 'fs/f2fs')
-rw-r--r-- | fs/f2fs/Kconfig | 3 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 38 | ||||
-rw-r--r-- | fs/f2fs/acl.h | 10 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 311 | ||||
-rw-r--r-- | fs/f2fs/compress.c | 203 | ||||
-rw-r--r-- | fs/f2fs/data.c | 602 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 114 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 286 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 129 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 489 | ||||
-rw-r--r-- | fs/f2fs/file.c | 477 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 223 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 1 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 315 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 186 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 154 | ||||
-rw-r--r-- | fs/f2fs/node.c | 997 | ||||
-rw-r--r-- | fs/f2fs/node.h | 25 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 189 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 617 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 213 | ||||
-rw-r--r-- | fs/f2fs/shrinker.c | 99 | ||||
-rw-r--r-- | fs/f2fs/super.c | 455 | ||||
-rw-r--r-- | fs/f2fs/sysfs.c | 199 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 116 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 24 |
26 files changed, 3835 insertions, 2640 deletions
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 68a1e23e1557..5916a02fb46d 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -4,8 +4,7 @@ config F2FS_FS depends on BLOCK select BUFFER_HEAD select NLS - select CRYPTO - select CRYPTO_CRC32 + select CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION select FS_IOMAP diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 8bffdeccdbc3..d4d7f329d23f 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -166,7 +166,7 @@ fail: } static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, - struct page *dpage) + struct folio *dfolio) { int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; @@ -176,13 +176,13 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dfolio); if (retval > 0) { value = f2fs_kmalloc(F2FS_I_SB(inode), retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); retval = f2fs_getxattr(inode, name_index, "", value, - retval, dpage); + retval, dfolio); } if (retval > 0) @@ -227,7 +227,7 @@ static int f2fs_acl_update_mode(struct mnt_idmap *idmap, static int __f2fs_set_acl(struct mnt_idmap *idmap, struct inode *inode, int type, - struct posix_acl *acl, struct page *ipage) + struct posix_acl *acl, struct folio *ifolio) { int name_index; void *value = NULL; @@ -238,9 +238,8 @@ static int __f2fs_set_acl(struct mnt_idmap *idmap, switch (type) { case ACL_TYPE_ACCESS: name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl && !ipage) { - error = f2fs_acl_update_mode(idmap, inode, - &mode, &acl); + if (acl && !ifolio) { + error = f2fs_acl_update_mode(idmap, inode, &mode, &acl); if (error) return error; set_acl_inode(inode, mode); @@ -265,7 +264,7 @@ static int __f2fs_set_acl(struct mnt_idmap *idmap, } } - error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); + error = f2fs_setxattr(inode, name_index, "", value, size, ifolio, 0); kfree(value); if (!error) @@ -296,9 +295,8 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, struct posix_acl *clone = NULL; if (acl) { - int size = sizeof(struct posix_acl) + acl->a_count * - sizeof(struct posix_acl_entry); - clone = kmemdup(acl, size, flags); + clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count), + flags); if (clone) refcount_set(&clone->a_refcount, 1); } @@ -361,7 +359,7 @@ static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) static int f2fs_acl_create(struct inode *dir, umode_t *mode, struct posix_acl **default_acl, struct posix_acl **acl, - struct page *dpage) + struct folio *dfolio) { struct posix_acl *p; struct posix_acl *clone; @@ -373,7 +371,7 @@ static int f2fs_acl_create(struct inode *dir, umode_t *mode, if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) return 0; - p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); + p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dfolio); if (!p || p == ERR_PTR(-EOPNOTSUPP)) { *mode &= ~current_umask(); return 0; @@ -410,29 +408,29 @@ release_acl: return ret; } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, - struct page *dpage) +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct folio *ifolio, + struct folio *dfolio) { struct posix_acl *default_acl = NULL, *acl = NULL; int error; - error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); + error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dfolio); if (error) return error; f2fs_mark_inode_dirty_sync(inode, true); if (default_acl) { - error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl, - ipage); + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, + default_acl, ifolio); posix_acl_release(default_acl); } else { inode->i_default_acl = NULL; } if (acl) { if (!error) - error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl, - ipage); + error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, + acl, ifolio); posix_acl_release(acl); } else { inode->i_acl = NULL; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 94ebfbfbdc6f..20e87e63c089 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -33,17 +33,17 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL -extern struct posix_acl *f2fs_get_acl(struct inode *, int, bool); -extern int f2fs_set_acl(struct mnt_idmap *, struct dentry *, +struct posix_acl *f2fs_get_acl(struct inode *, int, bool); +int f2fs_set_acl(struct mnt_idmap *, struct dentry *, struct posix_acl *, int); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, - struct page *); +int f2fs_init_acl(struct inode *, struct inode *, struct folio *ifolio, + struct folio *dfolio); #else #define f2fs_get_acl NULL #define f2fs_set_acl NULL static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *ipage, struct page *dpage) + struct folio *ifolio, struct folio *dfolio) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7f76460b721f..f149ec28aefd 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -21,7 +21,7 @@ #include "iostat.h" #include <trace/events/f2fs.h> -#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) +#define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 3)) static struct kmem_cache *ino_entry_slab; struct kmem_cache *f2fs_inode_entry_slab; @@ -29,36 +29,36 @@ struct kmem_cache *f2fs_inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, unsigned char reason) { - f2fs_build_fault_attr(sbi, 0, 0); + f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); if (!end_io) f2fs_flush_merged_writes(sbi); - f2fs_handle_critical_error(sbi, reason, end_io); + f2fs_handle_critical_error(sbi, reason); } /* * We guarantee no failure on the returned page. */ -struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_grab_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index) { struct address_space *mapping = META_MAPPING(sbi); - struct page *page; + struct folio *folio; repeat: - page = f2fs_grab_cache_page(mapping, index, false); - if (!page) { + folio = f2fs_grab_cache_folio(mapping, index, false); + if (IS_ERR(folio)) { cond_resched(); goto repeat; } - f2fs_wait_on_page_writeback(page, META, true, true); - if (!PageUptodate(page)) - SetPageUptodate(page); - return page; + f2fs_folio_wait_writeback(folio, META, true, true); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + return folio; } -static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, +static struct folio *__get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index, bool is_meta) { struct address_space *mapping = META_MAPPING(sbi); - struct page *page; + struct folio *folio; struct f2fs_io_info fio = { .sbi = sbi, .type = META, @@ -74,64 +74,64 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index, if (unlikely(!is_meta)) fio.op_flags &= ~REQ_META; repeat: - page = f2fs_grab_cache_page(mapping, index, false); - if (!page) { + folio = f2fs_grab_cache_folio(mapping, index, false); + if (IS_ERR(folio)) { cond_resched(); goto repeat; } - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) goto out; - fio.page = page; + fio.page = &folio->page; err = f2fs_submit_page_bio(&fio); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return ERR_PTR(err); } f2fs_update_iostat(sbi, NULL, FS_META_READ_IO, F2FS_BLKSIZE); - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + folio_lock(folio); + if (unlikely(!is_meta_folio(folio))) { + f2fs_folio_put(folio, true); goto repeat; } - if (unlikely(!PageUptodate(page))) { - f2fs_handle_page_eio(sbi, page_folio(page), META); - f2fs_put_page(page, 1); + if (unlikely(!folio_test_uptodate(folio))) { + f2fs_handle_page_eio(sbi, folio, META); + f2fs_folio_put(folio, true); return ERR_PTR(-EIO); } out: - return page; + return folio; } -struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index) { - return __get_meta_page(sbi, index, true); + return __get_meta_folio(sbi, index, true); } -struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_get_meta_folio_retry(struct f2fs_sb_info *sbi, pgoff_t index) { - struct page *page; + struct folio *folio; int count = 0; retry: - page = __get_meta_page(sbi, index, true); - if (IS_ERR(page)) { - if (PTR_ERR(page) == -EIO && + folio = __get_meta_folio(sbi, index, true); + if (IS_ERR(folio)) { + if (PTR_ERR(folio) == -EIO && ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_META_PAGE); } - return page; + return folio; } /* for POR only */ -struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index) +struct folio *f2fs_get_tmp_folio(struct f2fs_sb_info *sbi, pgoff_t index) { - return __get_meta_page(sbi, index, false); + return __get_meta_folio(sbi, index, false); } static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr, @@ -252,7 +252,6 @@ bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync) { - struct page *page; block_t blkno = start; struct f2fs_io_info fio = { .sbi = sbi, @@ -271,6 +270,7 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, blk_start_plug(&plug); for (; nrpages-- > 0; blkno++) { + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkno, type)) goto out; @@ -300,18 +300,18 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, BUG(); } - page = f2fs_grab_cache_page(META_MAPPING(sbi), + folio = f2fs_grab_cache_folio(META_MAPPING(sbi), fio.new_blkaddr, false); - if (!page) + if (IS_ERR(folio)) continue; - if (PageUptodate(page)) { - f2fs_put_page(page, 1); + if (folio_test_uptodate(folio)) { + f2fs_folio_put(folio, true); continue; } - fio.page = page; + fio.page = &folio->page; err = f2fs_submit_page_bio(&fio); - f2fs_put_page(page, err ? 1 : 0); + f2fs_folio_put(folio, err ? true : false); if (!err) f2fs_update_iostat(sbi, NULL, FS_META_READ_IO, @@ -325,27 +325,26 @@ out: void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, unsigned int ra_blocks) { - struct page *page; + struct folio *folio; bool readahead = false; if (ra_blocks == RECOVERY_MIN_RA_BLOCKS) return; - page = find_get_page(META_MAPPING(sbi), index); - if (!page || !PageUptodate(page)) + folio = filemap_get_folio(META_MAPPING(sbi), index); + if (IS_ERR(folio) || !folio_test_uptodate(folio)) readahead = true; - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); if (readahead) f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true); } -static int __f2fs_write_meta_page(struct page *page, +static bool __f2fs_write_meta_folio(struct folio *folio, struct writeback_control *wbc, enum iostat_type io_type) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); - struct folio *folio = page_folio(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); trace_f2fs_writepage(folio, META); @@ -354,37 +353,26 @@ static int __f2fs_write_meta_page(struct page *page, folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_META); folio_unlock(folio); - return 0; + return true; } goto redirty_out; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (wbc->for_reclaim && folio->index < GET_SUM_BLOCK(sbi, 0)) - goto redirty_out; f2fs_do_write_meta_page(sbi, folio, io_type); dec_page_count(sbi, F2FS_DIRTY_META); - if (wbc->for_reclaim) - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, META); - folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) f2fs_submit_merged_write(sbi, META); - return 0; + return true; redirty_out: - redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; -} - -static int f2fs_write_meta_page(struct page *page, - struct writeback_control *wbc) -{ - return __f2fs_write_meta_page(page, wbc, FS_META_IO); + folio_redirty_for_writepage(wbc, folio); + return false; } static int f2fs_write_meta_pages(struct address_space *mapping, @@ -427,9 +415,7 @@ long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, struct folio_batch fbatch; long nwritten = 0; int nr_folios; - struct writeback_control wbc = { - .for_reclaim = 0, - }; + struct writeback_control wbc = {}; struct blk_plug plug; folio_batch_init(&fbatch); @@ -453,7 +439,7 @@ long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, folio_lock(folio); - if (unlikely(folio->mapping != mapping)) { + if (unlikely(!is_meta_folio(folio))) { continue_unlock: folio_unlock(folio); continue; @@ -463,13 +449,12 @@ continue_unlock: goto continue_unlock; } - f2fs_wait_on_page_writeback(&folio->page, META, - true, true); + f2fs_folio_wait_writeback(folio, META, true, true); if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - if (__f2fs_write_meta_page(&folio->page, &wbc, + if (!__f2fs_write_meta_folio(folio, &wbc, io_type)) { folio_unlock(folio); break; @@ -507,7 +492,6 @@ static bool f2fs_dirty_meta_folio(struct address_space *mapping, } const struct address_space_operations f2fs_meta_aops = { - .writepage = f2fs_write_meta_page, .writepages = f2fs_write_meta_pages, .dirty_folio = f2fs_dirty_meta_folio, .invalidate_folio = f2fs_invalidate_folio, @@ -520,6 +504,7 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, { struct inode_management *im = &sbi->im[type]; struct ino_entry *e = NULL, *new = NULL; + int ret; if (type == FLUSH_INO) { rcu_read_lock(); @@ -532,7 +517,8 @@ retry: new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS, true, NULL); - radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + ret = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + f2fs_bug_on(sbi, ret); spin_lock(&im->ino_lock); e = radix_tree_lookup(&im->ino_root, ino); @@ -757,26 +743,26 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) f2fs_ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true); for (i = 0; i < orphan_blocks; i++) { - struct page *page; + struct folio *folio; struct f2fs_orphan_block *orphan_blk; - page = f2fs_get_meta_page(sbi, start_blk + i); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_meta_folio(sbi, start_blk + i); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out; } - orphan_blk = (struct f2fs_orphan_block *)page_address(page); + orphan_blk = folio_address(folio); for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) { nid_t ino = le32_to_cpu(orphan_blk->ino[j]); err = recover_orphan_inode(sbi, ino); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); goto out; } } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } /* clear Orphan Flag */ clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); @@ -793,7 +779,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) unsigned int nentries = 0; unsigned short index = 1; unsigned short orphan_blocks; - struct page *page = NULL; + struct folio *folio = NULL; struct ino_entry *orphan = NULL; struct inode_management *im = &sbi->im[ORPHAN_INO]; @@ -808,10 +794,9 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) /* loop for each orphan inode entry and write them in journal block */ list_for_each_entry(orphan, head, list) { - if (!page) { - page = f2fs_grab_meta_page(sbi, start_blk++); - orphan_blk = - (struct f2fs_orphan_block *)page_address(page); + if (!folio) { + folio = f2fs_grab_meta_folio(sbi, start_blk++); + orphan_blk = folio_address(folio); memset(orphan_blk, 0, sizeof(*orphan_blk)); } @@ -826,62 +811,61 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) orphan_blk->blk_addr = cpu_to_le16(index); orphan_blk->blk_count = cpu_to_le16(orphan_blocks); orphan_blk->entry_count = cpu_to_le32(nentries); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); index++; nentries = 0; - page = NULL; + folio = NULL; } } - if (page) { + if (folio) { orphan_blk->blk_addr = cpu_to_le16(index); orphan_blk->blk_count = cpu_to_le16(orphan_blocks); orphan_blk->entry_count = cpu_to_le32(nentries); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } } -static __u32 f2fs_checkpoint_chksum(struct f2fs_sb_info *sbi, - struct f2fs_checkpoint *ckpt) +static __u32 f2fs_checkpoint_chksum(struct f2fs_checkpoint *ckpt) { unsigned int chksum_ofs = le32_to_cpu(ckpt->checksum_offset); __u32 chksum; - chksum = f2fs_crc32(sbi, ckpt, chksum_ofs); + chksum = f2fs_crc32(ckpt, chksum_ofs); if (chksum_ofs < CP_CHKSUM_OFFSET) { chksum_ofs += sizeof(chksum); - chksum = f2fs_chksum(sbi, chksum, (__u8 *)ckpt + chksum_ofs, - F2FS_BLKSIZE - chksum_ofs); + chksum = f2fs_chksum(chksum, (__u8 *)ckpt + chksum_ofs, + F2FS_BLKSIZE - chksum_ofs); } return chksum; } static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, - struct f2fs_checkpoint **cp_block, struct page **cp_page, + struct f2fs_checkpoint **cp_block, struct folio **cp_folio, unsigned long long *version) { size_t crc_offset = 0; __u32 crc; - *cp_page = f2fs_get_meta_page(sbi, cp_addr); - if (IS_ERR(*cp_page)) - return PTR_ERR(*cp_page); + *cp_folio = f2fs_get_meta_folio(sbi, cp_addr); + if (IS_ERR(*cp_folio)) + return PTR_ERR(*cp_folio); - *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); + *cp_block = folio_address(*cp_folio); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); if (crc_offset < CP_MIN_CHKSUM_OFFSET || crc_offset > CP_CHKSUM_OFFSET) { - f2fs_put_page(*cp_page, 1); + f2fs_folio_put(*cp_folio, true); f2fs_warn(sbi, "invalid crc_offset: %zu", crc_offset); return -EINVAL; } - crc = f2fs_checkpoint_chksum(sbi, *cp_block); + crc = f2fs_checkpoint_chksum(*cp_block); if (crc != cur_cp_crc(*cp_block)) { - f2fs_put_page(*cp_page, 1); + f2fs_folio_put(*cp_folio, true); f2fs_warn(sbi, "invalid crc value"); return -EINVAL; } @@ -890,17 +874,17 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, return 0; } -static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, +static struct folio *validate_checkpoint(struct f2fs_sb_info *sbi, block_t cp_addr, unsigned long long *version) { - struct page *cp_page_1 = NULL, *cp_page_2 = NULL; + struct folio *cp_folio_1 = NULL, *cp_folio_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, - &cp_page_1, version); + &cp_folio_1, version); if (err) return NULL; @@ -915,19 +899,19 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, - &cp_page_2, version); + &cp_folio_2, version); if (err) goto invalid_cp; cur_version = *version; if (cur_version == pre_version) { *version = cur_version; - f2fs_put_page(cp_page_2, 1); - return cp_page_1; + f2fs_folio_put(cp_folio_2, true); + return cp_folio_1; } - f2fs_put_page(cp_page_2, 1); + f2fs_folio_put(cp_folio_2, true); invalid_cp: - f2fs_put_page(cp_page_1, 1); + f2fs_folio_put(cp_folio_1, true); return NULL; } @@ -935,7 +919,7 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) { struct f2fs_checkpoint *cp_block; struct f2fs_super_block *fsb = sbi->raw_super; - struct page *cp1, *cp2, *cur_page; + struct folio *cp1, *cp2, *cur_folio; unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; @@ -962,22 +946,22 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) if (cp1 && cp2) { if (ver_after(cp2_version, cp1_version)) - cur_page = cp2; + cur_folio = cp2; else - cur_page = cp1; + cur_folio = cp1; } else if (cp1) { - cur_page = cp1; + cur_folio = cp1; } else if (cp2) { - cur_page = cp2; + cur_folio = cp2; } else { err = -EFSCORRUPTED; goto fail_no_cp; } - cp_block = (struct f2fs_checkpoint *)page_address(cur_page); + cp_block = folio_address(cur_folio); memcpy(sbi->ckpt, cp_block, blk_size); - if (cur_page == cp1) + if (cur_folio == cp1) sbi->cur_cp_pack = 1; else sbi->cur_cp_pack = 2; @@ -992,30 +976,30 @@ int f2fs_get_valid_checkpoint(struct f2fs_sb_info *sbi) goto done; cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); - if (cur_page == cp2) + if (cur_folio == cp2) cp_blk_no += BIT(le32_to_cpu(fsb->log_blocks_per_seg)); for (i = 1; i < cp_blks; i++) { void *sit_bitmap_ptr; unsigned char *ckpt = (unsigned char *)sbi->ckpt; - cur_page = f2fs_get_meta_page(sbi, cp_blk_no + i); - if (IS_ERR(cur_page)) { - err = PTR_ERR(cur_page); + cur_folio = f2fs_get_meta_folio(sbi, cp_blk_no + i); + if (IS_ERR(cur_folio)) { + err = PTR_ERR(cur_folio); goto free_fail_no_cp; } - sit_bitmap_ptr = page_address(cur_page); + sit_bitmap_ptr = folio_address(cur_folio); memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); - f2fs_put_page(cur_page, 1); + f2fs_folio_put(cur_folio, true); } done: - f2fs_put_page(cp1, 1); - f2fs_put_page(cp2, 1); + f2fs_folio_put(cp1, true); + f2fs_folio_put(cp2, true); return 0; free_fail_no_cp: - f2fs_put_page(cp1, 1); - f2fs_put_page(cp2, 1); + f2fs_folio_put(cp1, true); + f2fs_folio_put(cp2, true); fail_no_cp: kvfree(sbi->ckpt); return err; @@ -1225,7 +1209,6 @@ static int block_operations(struct f2fs_sb_info *sbi) struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_reclaim = 0, }; int err = 0, cnt = 0; @@ -1237,7 +1220,7 @@ static int block_operations(struct f2fs_sb_info *sbi) retry_flush_quotas: f2fs_lock_all(sbi); if (__need_flush_quota(sbi)) { - int locked; + bool need_lock = sbi->umount_lock_holder != current; if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) { set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH); @@ -1246,11 +1229,13 @@ retry_flush_quotas: } f2fs_unlock_all(sbi); - /* only failed during mount/umount/freeze/quotactl */ - locked = down_read_trylock(&sbi->sb->s_umount); - f2fs_quota_sync(sbi->sb, -1); - if (locked) + /* don't grab s_umount lock during mount/umount/remount/freeze/quotactl */ + if (!need_lock) { + f2fs_do_quota_sync(sbi->sb, -1); + } else if (down_read_trylock(&sbi->sb->s_umount)) { + f2fs_do_quota_sync(sbi->sb, -1); up_read(&sbi->sb->s_umount); + } cond_resched(); goto retry_flush_quotas; } @@ -1344,21 +1329,13 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); unsigned long flags; - if (cpc->reason & CP_UMOUNT) { - if (le32_to_cpu(ckpt->cp_pack_total_block_count) + - NM_I(sbi)->nat_bits_blocks > BLKS_PER_SEG(sbi)) { - clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - f2fs_notice(sbi, "Disable nat_bits due to no space"); - } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) && - f2fs_nat_bitmap_enabled(sbi)) { - f2fs_enable_nat_bits(sbi); - set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - f2fs_notice(sbi, "Rebuild and enable nat_bits"); - } - } - spin_lock_irqsave(&sbi->cp_lock, flags); + if ((cpc->reason & CP_UMOUNT) && + le32_to_cpu(ckpt->cp_pack_total_block_count) > + sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) + disable_nat_bits(sbi, false); + if (cpc->reason & CP_TRIMMED) __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); else @@ -1415,35 +1392,31 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) static void commit_checkpoint(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { - struct writeback_control wbc = { - .for_reclaim = 0, - }; + struct writeback_control wbc = {}; /* - * filemap_get_folios_tag and lock_page again will take + * filemap_get_folios_tag and folio_lock again will take * some extra time. Therefore, f2fs_update_meta_pages and * f2fs_sync_meta_pages are combined in this function. */ - struct page *page = f2fs_grab_meta_page(sbi, blk_addr); - int err; - - f2fs_wait_on_page_writeback(page, META, true, true); + struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); - memcpy(page_address(page), src, PAGE_SIZE); + memcpy(folio_address(folio), src, PAGE_SIZE); - set_page_dirty(page); - if (unlikely(!clear_page_dirty_for_io(page))) + folio_mark_dirty(folio); + if (unlikely(!folio_clear_dirty_for_io(folio))) f2fs_bug_on(sbi, 1); /* writeout cp pack 2 page */ - err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO); - if (unlikely(err && f2fs_cp_error(sbi))) { - f2fs_put_page(page, 1); - return; + if (unlikely(!__f2fs_write_meta_folio(folio, &wbc, FS_CP_META_IO))) { + if (f2fs_cp_error(sbi)) { + f2fs_folio_put(folio, true); + return; + } + f2fs_bug_on(sbi, true); } - f2fs_bug_on(sbi, err); - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); /* submit checkpoint (with barrier if NOBARRIER is not set) */ f2fs_submit_merged_write(sbi, META_FLUSH); @@ -1533,7 +1506,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); - crc32 = f2fs_checkpoint_chksum(sbi, ckpt); + crc32 = f2fs_checkpoint_chksum(ckpt); *((__le32 *)((unsigned char *)ckpt + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); @@ -1541,8 +1514,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) start_blk = __start_cp_next_addr(sbi); /* write nat bits */ - if ((cpc->reason & CP_UMOUNT) && - is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) { + if (enabled_nat_bits(sbi, cpc)) { __u64 cp_ver = cur_cp_version(ckpt); block_t blk; @@ -1867,7 +1839,8 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi) struct cp_control cpc; cpc.reason = __get_cp_reason(sbi); - if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) { + if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC || + sbi->umount_lock_holder == current) { int ret; f2fs_down_write(&sbi->gc_lock); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 7f26440e8595..b3c1df93a163 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -82,7 +82,7 @@ bool f2fs_is_compressed_page(struct page *page) if (page_private_nonpointer(page)) return false; - f2fs_bug_on(F2FS_M_SB(page->mapping), + f2fs_bug_on(F2FS_P_SB(page), *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); return true; } @@ -137,9 +137,11 @@ static void f2fs_put_rpages_wbc(struct compress_ctx *cc, } } -struct page *f2fs_compress_control_page(struct page *page) +struct folio *f2fs_compress_control_folio(struct folio *folio) { - return ((struct compress_io_ctx *)page_private(page))->rpages[0]; + struct compress_io_ctx *ctx = folio->private; + + return page_folio(ctx->rpages[0]); } int f2fs_init_compress_ctx(struct compress_ctx *cc) @@ -178,8 +180,8 @@ void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct folio *folio) #ifdef CONFIG_F2FS_FS_LZO static int lzo_init_compress_ctx(struct compress_ctx *cc) { - cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), - LZO1X_MEM_COMPRESS, GFP_NOFS); + cc->private = f2fs_vmalloc(F2FS_I_SB(cc->inode), + LZO1X_MEM_COMPRESS); if (!cc->private) return -ENOMEM; @@ -189,7 +191,7 @@ static int lzo_init_compress_ctx(struct compress_ctx *cc) static void lzo_destroy_compress_ctx(struct compress_ctx *cc) { - kvfree(cc->private); + vfree(cc->private); cc->private = NULL; } @@ -246,7 +248,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc) size = LZ4HC_MEM_COMPRESS; #endif - cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), size, GFP_NOFS); + cc->private = f2fs_vmalloc(F2FS_I_SB(cc->inode), size); if (!cc->private) return -ENOMEM; @@ -261,7 +263,7 @@ static int lz4_init_compress_ctx(struct compress_ctx *cc) static void lz4_destroy_compress_ctx(struct compress_ctx *cc) { - kvfree(cc->private); + vfree(cc->private); cc->private = NULL; } @@ -342,8 +344,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) params = zstd_get_params(level, cc->rlen); workspace_size = zstd_cstream_workspace_bound(¶ms.cParams); - workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), - workspace_size, GFP_NOFS); + workspace = f2fs_vmalloc(F2FS_I_SB(cc->inode), workspace_size); if (!workspace) return -ENOMEM; @@ -351,7 +352,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) if (!stream) { f2fs_err_ratelimited(F2FS_I_SB(cc->inode), "%s zstd_init_cstream failed", __func__); - kvfree(workspace); + vfree(workspace); return -EIO; } @@ -364,7 +365,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc) static void zstd_destroy_compress_ctx(struct compress_ctx *cc) { - kvfree(cc->private); + vfree(cc->private); cc->private = NULL; cc->private2 = NULL; } @@ -423,8 +424,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) workspace_size = zstd_dstream_workspace_bound(max_window_size); - workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), - workspace_size, GFP_NOFS); + workspace = f2fs_vmalloc(F2FS_I_SB(dic->inode), workspace_size); if (!workspace) return -ENOMEM; @@ -432,7 +432,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) if (!stream) { f2fs_err_ratelimited(F2FS_I_SB(dic->inode), "%s zstd_init_dstream failed", __func__); - kvfree(workspace); + vfree(workspace); return -EIO; } @@ -444,7 +444,7 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) { - kvfree(dic->private); + vfree(dic->private); dic->private = NULL; dic->private2 = NULL; } @@ -593,11 +593,14 @@ static struct page *f2fs_compress_alloc_page(void) static void f2fs_compress_free_page(struct page *page) { + struct folio *folio; + if (!page) return; - detach_page_private(page); - page->mapping = NULL; - unlock_page(page); + folio = page_folio(page); + folio_detach_private(folio); + folio->mapping = NULL; + folio_unlock(folio); mempool_free(page, compress_page_pool); } @@ -674,8 +677,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) cc->cbuf->clen = cpu_to_le32(cc->clen); if (fi->i_compress_flag & BIT(COMPRESS_CHKSUM)) - chksum = f2fs_crc32(F2FS_I_SB(cc->inode), - cc->cbuf->cdata, cc->clen); + chksum = f2fs_crc32(cc->cbuf->cdata, cc->clen); cc->cbuf->chksum = cpu_to_le32(chksum); for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) @@ -771,7 +773,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) if (!ret && (fi->i_compress_flag & BIT(COMPRESS_CHKSUM))) { u32 provided = le32_to_cpu(dic->cbuf->chksum); - u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen); + u32 calculated = f2fs_crc32(dic->cbuf->cdata, dic->clen); if (provided != calculated) { if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) { @@ -846,7 +848,7 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, int index, int nr_pages, bool uptodate) { - unsigned long pgidx = pages[index]->index; + unsigned long pgidx = page_folio(pages[index])->index; int i = uptodate ? 0 : 1; /* @@ -860,9 +862,11 @@ bool f2fs_all_cluster_page_ready(struct compress_ctx *cc, struct page **pages, return false; for (; i < cc->cluster_size; i++) { - if (pages[index + i]->index != pgidx + i) + struct folio *folio = page_folio(pages[index + i]); + + if (folio->index != pgidx + i) return false; - if (uptodate && !PageUptodate(pages[index + i])) + if (uptodate && !folio_test_uptodate(folio)) return false; } @@ -907,7 +911,7 @@ bool f2fs_sanity_check_cluster(struct dnode_of_data *dn) } for (i = 1, count = 1; i < cluster_size; i++, count++) { - block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, + block_t blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); /* [COMPR_ADDR, ..., COMPR_ADDR] */ @@ -948,7 +952,7 @@ static int __f2fs_get_cluster_blocks(struct inode *inode, int count, i; for (i = 0, count = 0; i < cluster_size; i++) { - block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, + block_t blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (__is_valid_data_blkaddr(blkaddr)) @@ -1088,7 +1092,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, { struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); struct address_space *mapping = cc->inode->i_mapping; - struct page *page; + struct folio *folio; sector_t last_block_in_bio; fgf_t fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; pgoff_t start_idx = start_idx_of_cluster(cc); @@ -1103,19 +1107,19 @@ retry: if (ret) return ret; - /* keep page reference to avoid page reclaim */ + /* keep folio reference to avoid page reclaim */ for (i = 0; i < cc->cluster_size; i++) { - page = f2fs_pagecache_get_page(mapping, start_idx + i, - fgp_flag, GFP_NOFS); - if (!page) { - ret = -ENOMEM; + folio = f2fs_filemap_get_folio(mapping, start_idx + i, + fgp_flag, GFP_NOFS); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); goto unlock_pages; } - if (PageUptodate(page)) - f2fs_put_page(page, 1); + if (folio_test_uptodate(folio)) + f2fs_folio_put(folio, true); else - f2fs_compress_ctx_add_page(cc, page_folio(page)); + f2fs_compress_ctx_add_page(cc, folio); } if (!f2fs_cluster_is_empty(cc)) { @@ -1138,16 +1142,17 @@ retry: for (i = 0; i < cc->cluster_size; i++) { f2fs_bug_on(sbi, cc->rpages[i]); - page = find_lock_page(mapping, start_idx + i); - if (!page) { - /* page can be truncated */ + folio = filemap_lock_folio(mapping, start_idx + i); + if (IS_ERR(folio)) { + /* folio could be truncated */ goto release_and_retry; } - f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page_folio(page)); + f2fs_folio_wait_writeback(folio, DATA, true, true); + f2fs_compress_ctx_add_page(cc, folio); - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { + f2fs_handle_page_eio(sbi, folio, DATA); release_and_retry: f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i + 1); @@ -1195,7 +1200,8 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, .cluster_size = F2FS_I(inode)->i_cluster_size, .rpages = fsdata, }; - bool first_index = (index == cc.rpages[0]->index); + struct folio *folio = page_folio(cc.rpages[0]); + bool first_index = (index == folio->index); if (copied) set_cluster_dirty(&cc); @@ -1239,13 +1245,14 @@ int f2fs_truncate_partial_cluster(struct inode *inode, u64 from, bool lock) int i; for (i = cluster_size - 1; i >= 0; i--) { - loff_t start = rpages[i]->index << PAGE_SHIFT; + struct folio *folio = page_folio(rpages[i]); + loff_t start = folio->index << PAGE_SHIFT; if (from <= start) { - zero_user_segment(rpages[i], 0, PAGE_SIZE); + folio_zero_segment(folio, 0, folio_size(folio)); } else { - zero_user_segment(rpages[i], from - start, - PAGE_SIZE); + folio_zero_segment(folio, from - start, + folio_size(folio)); break; } } @@ -1278,6 +1285,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, .encrypted = fscrypt_inode_uses_fs_layer_crypto(cc->inode) ? 1 : 0, }; + struct folio *folio; struct dnode_of_data dn; struct node_info ni; struct compress_io_ctx *cic; @@ -1289,7 +1297,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, /* we should bypass data pages to proceed the kworker jobs */ if (unlikely(f2fs_cp_error(sbi))) { - mapping_set_error(cc->rpages[0]->mapping, -EIO); + mapping_set_error(inode->i_mapping, -EIO); goto out_free; } @@ -1311,12 +1319,13 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, goto out_unlock_op; for (i = 0; i < cc->cluster_size; i++) { - if (data_blkaddr(dn.inode, dn.node_page, + if (data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i) == NULL_ADDR) goto out_put_dnode; } - psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + folio = page_folio(cc->rpages[last_index]); + psize = folio_pos(folio) + folio_size(folio); err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) @@ -1339,10 +1348,10 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, - cc->rpages[i + 1]->index, cic); + page_folio(cc->rpages[i + 1])->index, cic); fio.compressed_page = cc->cpages[i]; - fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_page, + fio.old_blkaddr = data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i + 1); /* wait for GCed page writeback via META_MAPPING */ @@ -1374,7 +1383,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (blkaddr == COMPRESS_ADDR) fio.compr_blocks++; if (__is_valid_data_blkaddr(blkaddr)) - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR); goto unlock_continue; } @@ -1384,7 +1393,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (i > cc->valid_nr_cpages) { if (__is_valid_data_blkaddr(blkaddr)) { - f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_invalidate_blocks(sbi, blkaddr, 1); f2fs_update_data_blkaddr(&dn, NEW_ADDR); } goto unlock_continue; @@ -1474,7 +1483,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page) f2fs_is_compressed_page(page)); int i; - if (unlikely(bio->bi_status)) + if (unlikely(bio->bi_status != BLK_STS_OK)) mapping_set_error(cic->inode->i_mapping, -EIO); f2fs_compress_free_page(page); @@ -1522,36 +1531,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, f2fs_lock_op(sbi); for (i = 0; i < cc->cluster_size; i++) { + struct folio *folio; + if (!cc->rpages[i]) continue; + folio = page_folio(cc->rpages[i]); retry_write: - lock_page(cc->rpages[i]); + folio_lock(folio); - if (cc->rpages[i]->mapping != mapping) { + if (folio->mapping != mapping) { continue_unlock: - unlock_page(cc->rpages[i]); + folio_unlock(folio); continue; } - if (!PageDirty(cc->rpages[i])) + if (!folio_test_dirty(folio)) goto continue_unlock; - if (folio_test_writeback(page_folio(cc->rpages[i]))) { + if (folio_test_writeback(folio)) { if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; - f2fs_wait_on_page_writeback(cc->rpages[i], DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); } - if (!clear_page_dirty_for_io(cc->rpages[i])) + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - ret = f2fs_write_single_data_page(page_folio(cc->rpages[i]), - &submitted, + submitted = 0; + ret = f2fs_write_single_data_page(folio, &submitted, NULL, NULL, wbc, io_type, compr_blocks, false); if (ret) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(cc->rpages[i]); + if (ret == 1) { ret = 0; } else if (ret == -EAGAIN) { ret = 0; @@ -1854,14 +1865,13 @@ void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, } /* - * Put a reference to a compressed page's decompress_io_ctx. + * Put a reference to a compressed folio's decompress_io_ctx. * - * This is called when the page is no longer needed and can be freed. + * This is called when the folio is no longer needed and can be freed. */ -void f2fs_put_page_dic(struct page *page, bool in_task) +void f2fs_put_folio_dic(struct folio *folio, bool in_task) { - struct decompress_io_ctx *dic = - (struct decompress_io_ctx *)page_private(page); + struct decompress_io_ctx *dic = folio->private; f2fs_put_dic(dic, in_task); } @@ -1873,14 +1883,14 @@ void f2fs_put_page_dic(struct page *page, bool in_task) unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, unsigned int ofs_in_node) { - bool compressed = data_blkaddr(dn->inode, dn->node_page, + bool compressed = data_blkaddr(dn->inode, dn->node_folio, ofs_in_node) == COMPRESS_ADDR; int i = compressed ? 1 : 0; - block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, ofs_in_node + i); for (i += 1; i < F2FS_I(dn->inode)->i_cluster_size; i++) { - block_t blkaddr = data_blkaddr(dn->inode, dn->node_page, + block_t blkaddr = data_blkaddr(dn->inode, dn->node_folio, ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -1903,17 +1913,18 @@ struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi) return sbi->compress_inode->i_mapping; } -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr) +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { if (!sbi->compress_inode) return; - invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr); + invalidate_mapping_pages(COMPRESS_MAPPING(sbi), blkaddr, blkaddr + len - 1); } void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, nid_t ino, block_t blkaddr) { - struct page *cpage; + struct folio *cfolio; int ret; if (!test_opt(sbi, COMPRESS_CACHE)) @@ -1925,49 +1936,49 @@ void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, if (!f2fs_available_free_memory(sbi, COMPRESS_PAGE)) return; - cpage = find_get_page(COMPRESS_MAPPING(sbi), blkaddr); - if (cpage) { - f2fs_put_page(cpage, 0); + cfolio = filemap_get_folio(COMPRESS_MAPPING(sbi), blkaddr); + if (!IS_ERR(cfolio)) { + f2fs_folio_put(cfolio, false); return; } - cpage = alloc_page(__GFP_NOWARN | __GFP_IO); - if (!cpage) + cfolio = filemap_alloc_folio(__GFP_NOWARN | __GFP_IO, 0); + if (!cfolio) return; - ret = add_to_page_cache_lru(cpage, COMPRESS_MAPPING(sbi), + ret = filemap_add_folio(COMPRESS_MAPPING(sbi), cfolio, blkaddr, GFP_NOFS); if (ret) { - f2fs_put_page(cpage, 0); + f2fs_folio_put(cfolio, false); return; } - set_page_private_data(cpage, ino); + set_page_private_data(&cfolio->page, ino); - memcpy(page_address(cpage), page_address(page), PAGE_SIZE); - SetPageUptodate(cpage); - f2fs_put_page(cpage, 1); + memcpy(folio_address(cfolio), page_address(page), PAGE_SIZE); + folio_mark_uptodate(cfolio); + f2fs_folio_put(cfolio, true); } -bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, +bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr) { - struct page *cpage; + struct folio *cfolio; bool hitted = false; if (!test_opt(sbi, COMPRESS_CACHE)) return false; - cpage = f2fs_pagecache_get_page(COMPRESS_MAPPING(sbi), + cfolio = f2fs_filemap_get_folio(COMPRESS_MAPPING(sbi), blkaddr, FGP_LOCK | FGP_NOWAIT, GFP_NOFS); - if (cpage) { - if (PageUptodate(cpage)) { + if (!IS_ERR(cfolio)) { + if (folio_test_uptodate(cfolio)) { atomic_inc(&sbi->compress_page_hit); - memcpy(page_address(page), - page_address(cpage), PAGE_SIZE); + memcpy(folio_address(folio), + folio_address(cfolio), folio_size(folio)); hitted = true; } - f2fs_put_page(cpage, 1); + f2fs_folio_put(cfolio, true); } return hitted; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 94f7b084f601..31e892842625 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -49,12 +49,12 @@ void f2fs_destroy_bioset(void) bool f2fs_is_cp_guaranteed(struct page *page) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = page_folio(page)->mapping; struct inode *inode; struct f2fs_sb_info *sbi; - if (!mapping) - return false; + if (fscrypt_is_bounce_page(page)) + return page_private_gcing(fscrypt_pagecache_page(page)); inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -70,9 +70,9 @@ bool f2fs_is_cp_guaranteed(struct page *page) return false; } -static enum count_type __read_io_type(struct page *page) +static enum count_type __read_io_type(struct folio *folio) { - struct address_space *mapping = page_file_mapping(page); + struct address_space *mapping = folio->mapping; if (mapping) { struct inode *inode = mapping->host; @@ -136,27 +136,22 @@ struct bio_post_read_ctx { */ static void f2fs_finish_read_bio(struct bio *bio, bool in_task) { - struct bio_vec *bv; - struct bvec_iter_all iter_all; + struct folio_iter fi; struct bio_post_read_ctx *ctx = bio->bi_private; - bio_for_each_segment_all(bv, bio, iter_all) { - struct page *page = bv->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - if (f2fs_is_compressed_page(page)) { + if (f2fs_is_compressed_page(&folio->page)) { if (ctx && !ctx->decompression_attempted) - f2fs_end_read_compressed_page(page, true, 0, + f2fs_end_read_compressed_page(&folio->page, true, 0, in_task); - f2fs_put_page_dic(page, in_task); + f2fs_put_folio_dic(folio, in_task); continue; } - if (bio->bi_status) - ClearPageUptodate(page); - else - SetPageUptodate(page); - dec_page_count(F2FS_P_SB(page), __read_io_type(page)); - unlock_page(page); + dec_page_count(F2FS_F_SB(folio), __read_io_type(folio)); + folio_end_read(folio, bio->bi_status == BLK_STS_OK); } if (ctx) @@ -295,7 +290,7 @@ static void f2fs_read_end_io(struct bio *bio) if (time_to_inject(sbi, FAULT_READ_IO)) bio->bi_status = BLK_STS_IOERR; - if (bio->bi_status) { + if (bio->bi_status != BLK_STS_OK) { f2fs_finish_read_bio(bio, intask); return; } @@ -324,8 +319,7 @@ static void f2fs_read_end_io(struct bio *bio) static void f2fs_write_end_io(struct bio *bio) { struct f2fs_sb_info *sbi; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; iostat_update_and_unbind_ctx(bio); sbi = bio->bi_private; @@ -333,34 +327,41 @@ static void f2fs_write_end_io(struct bio *bio) if (time_to_inject(sbi, FAULT_WRITE_IO)) bio->bi_status = BLK_STS_IOERR; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - enum count_type type = WB_DATA_TYPE(page, false); + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; + enum count_type type; - fscrypt_finalize_bounce_page(&page); + if (fscrypt_is_bounce_folio(folio)) { + struct folio *io_folio = folio; + + folio = fscrypt_pagecache_folio(io_folio); + fscrypt_free_bounce_page(&io_folio->page); + } #ifdef CONFIG_F2FS_FS_COMPRESSION - if (f2fs_is_compressed_page(page)) { - f2fs_compress_write_end_io(bio, page); + if (f2fs_is_compressed_page(&folio->page)) { + f2fs_compress_write_end_io(bio, &folio->page); continue; } #endif - if (unlikely(bio->bi_status)) { - mapping_set_error(page->mapping, -EIO); + type = WB_DATA_TYPE(&folio->page, false); + + if (unlikely(bio->bi_status != BLK_STS_OK)) { + mapping_set_error(folio->mapping, -EIO); if (type == F2FS_WB_CP_DATA) f2fs_stop_checkpoint(sbi, true, STOP_CP_REASON_WRITE_FAIL); } - f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && - page_folio(page)->index != nid_of_node(page)); + f2fs_bug_on(sbi, is_node_folio(folio) && + folio->index != nid_of_node(&folio->page)); dec_page_count(sbi, type); - if (f2fs_in_warm_node_list(sbi, page)) - f2fs_del_fsync_node_entry(sbi, page); - clear_page_private_gcing(page); - end_page_writeback(page); + if (f2fs_in_warm_node_list(sbi, folio)) + f2fs_del_fsync_node_entry(sbi, folio); + clear_page_private_gcing(&folio->page); + folio_end_writeback(folio); } if (!get_pages(sbi, F2FS_WB_CP_DATA) && wq_has_sleeper(&sbi->cp_wait)) @@ -418,6 +419,7 @@ int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) { unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0); + struct folio *fio_folio = page_folio(fio->page); unsigned int fua_flag, meta_flag, io_flag; blk_opf_t op_flags = 0; @@ -443,6 +445,11 @@ static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio) op_flags |= REQ_META; if (BIT(fio->temp) & fua_flag) op_flags |= REQ_FUA; + + if (fio->type == DATA && + F2FS_I(fio_folio->mapping->host)->ioprio_hint == F2FS_IOPRIO_WRITE) + op_flags |= REQ_PRIO; + return op_flags; } @@ -516,10 +523,6 @@ static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { WARN_ON_ONCE(is_read_io(bio_op(bio))); - - if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type)) - blk_finish_plug(current->plug); - trace_f2fs_submit_write_bio(sbi->sb, type, bio); iostat_update_submit_ctx(bio, type); submit_bio(bio); @@ -545,8 +548,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; if (!bio) return false; @@ -554,25 +556,25 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, if (!inode && !page && !ino) return true; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *target = bvec->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *target = fi.folio; - if (fscrypt_is_bounce_page(target)) { - target = fscrypt_pagecache_page(target); + if (fscrypt_is_bounce_folio(target)) { + target = fscrypt_pagecache_folio(target); if (IS_ERR(target)) continue; } - if (f2fs_is_compressed_page(target)) { - target = f2fs_compress_control_page(target); + if (f2fs_is_compressed_page(&target->page)) { + target = f2fs_compress_control_folio(target); if (IS_ERR(target)) continue; } if (inode && inode == target->mapping->host) return true; - if (page && page == target) + if (page && page == &target->page) return true; - if (ino && ino == ino_of_node(target)) + if (ino && ino == ino_of_node(&target->page)) return true; } @@ -689,32 +691,29 @@ void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) int f2fs_submit_page_bio(struct f2fs_io_info *fio) { struct bio *bio; - struct page *page = fio->encrypted_page ? - fio->encrypted_page : fio->page; + struct folio *fio_folio = page_folio(fio->page); + struct folio *data_folio = fio->encrypted_page ? + page_folio(fio->encrypted_page) : fio_folio; if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, fio->is_por ? META_POR : (__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(data_folio, fio); /* Allocate a new bio */ bio = __bio_alloc(fio, 1); - f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - page_folio(fio->page)->index, fio, GFP_NOIO); - - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - bio_put(bio); - return -EFAULT; - } + f2fs_set_bio_crypt_ctx(bio, fio_folio->mapping->host, + fio_folio->index, fio, GFP_NOIO); + bio_add_folio_nofail(bio, data_folio, folio_size(data_folio), 0); if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, fio_folio, PAGE_SIZE); inc_page_count(fio->sbi, is_read_io(fio->op) ? - __read_io_type(page) : WB_DATA_TYPE(fio->page, false)); + __read_io_type(data_folio) : WB_DATA_TYPE(fio->page, false)); if (is_read_io(bio_op(bio))) f2fs_submit_read_bio(fio->sbi, bio, fio->type); @@ -780,6 +779,7 @@ static void del_bio_entry(struct bio_entry *be) static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct page *page) { + struct folio *fio_folio = page_folio(fio->page); struct f2fs_sb_info *sbi = fio->sbi; enum temp_type temp; bool found = false; @@ -801,8 +801,8 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, *fio->last_block, fio->new_blkaddr)); if (f2fs_crypt_mergeable_bio(*bio, - fio->page->mapping->host, - page_folio(fio->page)->index, fio) && + fio_folio->mapping->host, + fio_folio->index, fio) && bio_add_page(*bio, page, PAGE_SIZE, 0) == PAGE_SIZE) { ret = 0; @@ -826,13 +826,13 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, } void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, - struct bio **bio, struct page *page) + struct bio **bio, struct folio *folio) { enum temp_type temp; bool found = false; struct bio *target = bio ? *bio : NULL; - f2fs_bug_on(sbi, !target && !page); + f2fs_bug_on(sbi, !target && !folio); for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; @@ -848,7 +848,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - page, 0); + &folio->page, 0); if (found) break; } @@ -865,7 +865,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, found = (target == be->bio); else found = __has_merged_page(be->bio, NULL, - page, 0); + &folio->page, 0); if (found) { target = be->bio; del_bio_entry(be); @@ -888,12 +888,13 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) struct bio *bio = *fio->bio; struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page; + struct folio *folio = page_folio(fio->page); if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(page_folio(page), fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -901,8 +902,8 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_VECS); - f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, - page_folio(fio->page)->index, fio, GFP_NOIO); + f2fs_set_bio_crypt_ctx(bio, folio->mapping->host, + folio->index, fio, GFP_NOIO); add_bio_entry(fio->sbi, bio, page, fio->temp); } else { @@ -911,7 +912,7 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, folio, folio_size(folio)); inc_page_count(fio->sbi, WB_DATA_TYPE(page, false)); @@ -994,13 +995,13 @@ next: if (io->bio && (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, fio->new_blkaddr) || - !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, + !f2fs_crypt_mergeable_bio(io->bio, fio_inode(fio), page_folio(bio_page)->index, fio))) __submit_merged_bio(io); alloc_new: if (io->bio == NULL) { io->bio = __bio_alloc(fio, BIO_MAX_VECS); - f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, + f2fs_set_bio_crypt_ctx(io->bio, fio_inode(fio), page_folio(bio_page)->index, fio, GFP_NOIO); io->fio = *fio; } @@ -1011,11 +1012,12 @@ alloc_new: } if (fio->io_wbc) - wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); + wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page), + PAGE_SIZE); io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_page_write(fio->page, fio); + trace_f2fs_submit_folio_write(page_folio(fio->page), fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { @@ -1051,8 +1053,6 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages), REQ_OP_READ | op_flag, for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset); - if (!bio) - return ERR_PTR(-ENOMEM); bio->bi_iter.bi_sector = sector; f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); bio->bi_end_io = f2fs_read_end_io; @@ -1116,7 +1116,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct folio *folio, static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - __le32 *addr = get_dnode_addr(dn->inode, dn->node_page); + __le32 *addr = get_dnode_addr(dn->inode, dn->node_folio); dn->data_blkaddr = blkaddr; addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); @@ -1125,14 +1125,14 @@ static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) /* * Lock ordering for the change of data block address: * ->data_page - * ->node_page + * ->node_folio * update block addresses in the node page */ void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) { - f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); + f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true); __set_data_blkaddr(dn, blkaddr); - if (set_page_dirty(dn->node_page)) + if (folio_mark_dirty(dn->node_folio)) dn->node_changed = true; } @@ -1160,7 +1160,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, dn->ofs_in_node, count); - f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); + f2fs_folio_wait_writeback(dn->node_folio, NODE, true, true); for (; count > 0; dn->ofs_in_node++) { block_t blkaddr = f2fs_data_blkaddr(dn); @@ -1171,7 +1171,7 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) } } - if (set_page_dirty(dn->node_page)) + if (folio_mark_dirty(dn->node_folio)) dn->node_changed = true; return 0; } @@ -1189,7 +1189,7 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn) int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) { - bool need_put = dn->inode_page ? false : true; + bool need_put = dn->inode_folio ? false : true; int err; err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); @@ -1203,18 +1203,17 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) return err; } -struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write, - pgoff_t *next_pgofs) +struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index, + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; struct dnode_of_data dn; - struct page *page; + struct folio *folio; int err; - page = f2fs_grab_cache_page(mapping, index, for_write); - if (!page) - return ERR_PTR(-ENOMEM); + folio = f2fs_grab_cache_folio(mapping, index, for_write); + if (IS_ERR(folio)) + return folio; if (f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { @@ -1249,61 +1248,64 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, goto put_err; } got_it: - if (PageUptodate(page)) { - unlock_page(page); - return page; + if (folio_test_uptodate(folio)) { + folio_unlock(folio); + return folio; } /* * A new dentry page is allocated but not able to be written, since its * new inode page couldn't be allocated due to -ENOSPC. * In such the case, its blkaddr can be remained as NEW_ADDR. - * see, f2fs_add_link -> f2fs_get_new_data_page -> + * see, f2fs_add_link -> f2fs_get_new_data_folio -> * f2fs_init_inode_metadata. */ if (dn.data_blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); - unlock_page(page); - return page; + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + folio_unlock(folio); + return folio; } - err = f2fs_submit_page_read(inode, page_folio(page), dn.data_blkaddr, + err = f2fs_submit_page_read(inode, folio, dn.data_blkaddr, op_flags, for_write); if (err) goto put_err; - return page; + return folio; put_err: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return ERR_PTR(err); } -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, +struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index, pgoff_t *next_pgofs) { struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; - page = find_get_page(mapping, index); - if (page && PageUptodate(page)) - return page; - f2fs_put_page(page, 0); + folio = __filemap_get_folio(mapping, index, FGP_ACCESSED, 0); + if (IS_ERR(folio)) + goto read; + if (folio_test_uptodate(folio)) + return folio; + f2fs_folio_put(folio, false); - page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs); - if (IS_ERR(page)) - return page; +read: + folio = f2fs_get_read_data_folio(inode, index, 0, false, next_pgofs); + if (IS_ERR(folio)) + return folio; - if (PageUptodate(page)) - return page; + if (folio_test_uptodate(folio)) + return folio; - wait_on_page_locked(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 0); + folio_wait_locked(folio); + if (unlikely(!folio_test_uptodate(folio))) { + f2fs_folio_put(folio, false); return ERR_PTR(-EIO); } - return page; + return folio; } /* @@ -1311,23 +1313,23 @@ struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, * Because, the callers, functions in dir.c and GC, should be able to know * whether this page exists or not. */ -struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, +struct folio *f2fs_get_lock_data_folio(struct inode *inode, pgoff_t index, bool for_write) { struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; - page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL); - if (IS_ERR(page)) - return page; + folio = f2fs_get_read_data_folio(inode, index, 0, for_write, NULL); + if (IS_ERR(folio)) + return folio; /* wait for read completion */ - lock_page(page); - if (unlikely(page->mapping != mapping || !PageUptodate(page))) { - f2fs_put_page(page, 1); + folio_lock(folio); + if (unlikely(folio->mapping != mapping || !folio_test_uptodate(folio))) { + f2fs_folio_put(folio, true); return ERR_PTR(-EIO); } - return page; + return folio; } /* @@ -1336,57 +1338,57 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, * * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). - * Note that, ipage is set only by make_empty_dir, and if any error occur, - * ipage should be released by this function. + * Note that, ifolio is set only by make_empty_dir, and if any error occur, + * ifolio should be released by this function. */ -struct page *f2fs_get_new_data_page(struct inode *inode, - struct page *ipage, pgoff_t index, bool new_i_size) +struct folio *f2fs_get_new_data_folio(struct inode *inode, + struct folio *ifolio, pgoff_t index, bool new_i_size) { struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; struct dnode_of_data dn; int err; - page = f2fs_grab_cache_page(mapping, index, true); - if (!page) { + folio = f2fs_grab_cache_folio(mapping, index, true); + if (IS_ERR(folio)) { /* - * before exiting, we should make sure ipage will be released + * before exiting, we should make sure ifolio will be released * if any error occur. */ - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return ERR_PTR(-ENOMEM); } - set_new_dnode(&dn, inode, ipage, NULL, 0); + set_new_dnode(&dn, inode, ifolio, NULL, 0); err = f2fs_reserve_block(&dn, index); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return ERR_PTR(err); } - if (!ipage) + if (!ifolio) f2fs_put_dnode(&dn); - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) goto got_it; if (dn.data_blkaddr == NEW_ADDR) { - zero_user_segment(page, 0, PAGE_SIZE); - if (!PageUptodate(page)) - SetPageUptodate(page); + folio_zero_segment(folio, 0, folio_size(folio)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); } else { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); - /* if ipage exists, blkaddr should be NEW_ADDR */ - f2fs_bug_on(F2FS_I_SB(inode), ipage); - page = f2fs_get_lock_data_page(inode, index, true); - if (IS_ERR(page)) - return page; + /* if ifolio exists, blkaddr should be NEW_ADDR */ + f2fs_bug_on(F2FS_I_SB(inode), ifolio); + folio = f2fs_get_lock_data_folio(inode, index, true); + if (IS_ERR(folio)) + return folio; } got_it: if (new_i_size && i_size_read(inode) < ((loff_t)(index + 1) << PAGE_SHIFT)) f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); - return page; + return folio; } static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) @@ -1420,7 +1422,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) return err; if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) - f2fs_invalidate_internal_cache(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); f2fs_update_data_blkaddr(dn, dn->data_blkaddr); return 0; @@ -1587,7 +1589,7 @@ next_dnode: start_pgofs = pgofs; prealloc = 0; last_ofs_in_node = ofs_in_node = dn.ofs_in_node; - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); next_block: blkaddr = f2fs_data_blkaddr(&dn); @@ -1676,7 +1678,8 @@ next_block: /* reserved delalloc block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) map->m_flags |= F2FS_MAP_DELALLOC; - if (flag != F2FS_GET_BLOCK_DIO || !is_hole) + /* DIO READ and hole case, should not map the blocks. */ + if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create)) map->m_flags |= F2FS_MAP_MAPPED; map->m_pblk = blkaddr; @@ -1818,21 +1821,10 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) return true; } -static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) -{ - return (bytes >> inode->i_blkbits); -} - -static inline u64 blks_to_bytes(struct inode *inode, u64 blks) -{ - return (blks << inode->i_blkbits); -} - static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; struct node_info ni; __u64 phys = 0, len; __u32 flags; @@ -1841,19 +1833,19 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (f2fs_has_inline_xattr(inode)) { int offset; + struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), + inode->i_ino, false); - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), - inode->i_ino, false); - if (!page) - return -ENOMEM; + if (IS_ERR(folio)) + return PTR_ERR(folio); err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } - phys = blks_to_bytes(inode, ni.blk_addr); + phys = F2FS_BLK_TO_BYTES(ni.blk_addr); offset = offsetof(struct f2fs_inode, i_addr) + sizeof(__le32) * (DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)); @@ -1861,7 +1853,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, phys += offset; len = inline_xattr_size(inode); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; @@ -1875,20 +1867,22 @@ static int f2fs_xattr_fiemap(struct inode *inode, } if (xnid) { - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); - if (!page) - return -ENOMEM; + struct folio *folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), + xnid, false); + + if (IS_ERR(folio)) + return PTR_ERR(folio); err = f2fs_get_node_info(sbi, xnid, &ni, false); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } - phys = blks_to_bytes(inode, ni.blk_addr); + phys = F2FS_BLK_TO_BYTES(ni.blk_addr); len = inode->i_sb->s_blocksize; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); flags = FIEMAP_EXTENT_LAST; } @@ -1901,30 +1895,11 @@ static int f2fs_xattr_fiemap(struct inode *inode, return (err < 0 ? err : 0); } -static loff_t max_inode_blocks(struct inode *inode) -{ - loff_t result = ADDRS_PER_INODE(inode); - loff_t leaf_count = ADDRS_PER_BLOCK(inode); - - /* two direct node blocks */ - result += (leaf_count * 2); - - /* two indirect node blocks */ - leaf_count *= NIDS_PER_BLOCK; - result += (leaf_count * 2); - - /* one double indirect node block */ - leaf_count *= NIDS_PER_BLOCK; - result += leaf_count; - - return result; -} - int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { struct f2fs_map_blocks map; - sector_t start_blk, last_blk; + sector_t start_blk, last_blk, blk_len, max_len; pgoff_t next_pgofs; u64 logical = 0, phys = 0, size = 0; u32 flags = 0; @@ -1966,16 +1941,15 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - if (bytes_to_blks(inode, len) == 0) - len = blks_to_bytes(inode, 1); - - start_blk = bytes_to_blks(inode, start); - last_blk = bytes_to_blks(inode, start + len - 1); + start_blk = F2FS_BYTES_TO_BLK(start); + last_blk = F2FS_BYTES_TO_BLK(start + len - 1); + blk_len = last_blk - start_blk + 1; + max_len = F2FS_BYTES_TO_BLK(maxbytes) - start_blk; next: memset(&map, 0, sizeof(map)); map.m_lblk = start_blk; - map.m_len = bytes_to_blks(inode, len); + map.m_len = blk_len; map.m_next_pgofs = &next_pgofs; map.m_seg_type = NO_CHECK_TYPE; @@ -1992,13 +1966,23 @@ next: if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) { start_blk = next_pgofs; - if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, - max_inode_blocks(inode))) + if (F2FS_BLK_TO_BYTES(start_blk) < maxbytes) goto prep_next; flags |= FIEMAP_EXTENT_LAST; } + /* + * current extent may cross boundary of inquiry, increase len to + * requery. + */ + if (!compr_cluster && (map.m_flags & F2FS_MAP_MAPPED) && + map.m_lblk + map.m_len - 1 == last_blk && + blk_len != max_len) { + blk_len = max_len; + goto next; + } + compr_appended = false; /* In a case of compressed cluster, append this to the last extent */ if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) || @@ -2030,14 +2014,14 @@ skip_fill: } else if (compr_appended) { unsigned int appended_blks = cluster_size - count_in_cluster + 1; - size += blks_to_bytes(inode, appended_blks); + size += F2FS_BLK_TO_BYTES(appended_blks); start_blk += appended_blks; compr_cluster = false; } else { - logical = blks_to_bytes(inode, start_blk); + logical = F2FS_BLK_TO_BYTES(start_blk); phys = __is_valid_data_blkaddr(map.m_pblk) ? - blks_to_bytes(inode, map.m_pblk) : 0; - size = blks_to_bytes(inode, map.m_len); + F2FS_BLK_TO_BYTES(map.m_pblk) : 0; + size = F2FS_BLK_TO_BYTES(map.m_len); flags = 0; if (compr_cluster) { @@ -2045,13 +2029,13 @@ skip_fill: count_in_cluster += map.m_len; if (count_in_cluster == cluster_size) { compr_cluster = false; - size += blks_to_bytes(inode, 1); + size += F2FS_BLKSIZE; } } else if (map.m_flags & F2FS_MAP_DELALLOC) { flags = FIEMAP_EXTENT_UNWRITTEN; } - start_blk += bytes_to_blks(inode, size); + start_blk += F2FS_BYTES_TO_BLK(size); } prep_next: @@ -2089,18 +2073,18 @@ static int f2fs_read_single_page(struct inode *inode, struct folio *folio, struct readahead_control *rac) { struct bio *bio = *bio_ret; - const unsigned blocksize = blks_to_bytes(inode, 1); + const unsigned int blocksize = F2FS_BLKSIZE; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; sector_t block_nr; - pgoff_t index = folio_index(folio); + pgoff_t index = folio->index; int ret = 0; block_in_file = (sector_t)index; last_block = block_in_file + nr_pages; - last_block_in_file = bytes_to_blks(inode, - f2fs_readpage_limit(inode) + blocksize - 1); + last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + + blocksize - 1); if (last_block > last_block_in_file) last_block = last_block_in_file; @@ -2200,17 +2184,23 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct bio *bio = *bio_ret; unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; sector_t last_block_in_file; - const unsigned blocksize = blks_to_bytes(inode, 1); + const unsigned int blocksize = F2FS_BLKSIZE; struct decompress_io_ctx *dic = NULL; struct extent_info ei = {}; bool from_dnode = true; int i; int ret = 0; + if (unlikely(f2fs_cp_error(sbi))) { + ret = -EIO; + from_dnode = false; + goto out_put_dnode; + } + f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); - last_block_in_file = bytes_to_blks(inode, - f2fs_readpage_limit(inode) + blocksize - 1); + last_block_in_file = F2FS_BYTES_TO_BLK(f2fs_readpage_limit(inode) + + blocksize - 1); /* get rid of pages beyond EOF */ for (i = 0; i < cc->cluster_size; i++) { @@ -2250,17 +2240,13 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, if (ret) goto out; - if (unlikely(f2fs_cp_error(sbi))) { - ret = -EIO; - goto out_put_dnode; - } f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); skip_reading_dnode: for (i = 1; i < cc->cluster_size; i++) { block_t blkaddr; - blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i) : ei.blk + i - 1; @@ -2294,14 +2280,13 @@ skip_reading_dnode: block_t blkaddr; struct bio_post_read_ctx *ctx; - blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, + blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_folio, dn.ofs_in_node + i + 1) : ei.blk + i; f2fs_wait_on_block_writeback(inode, blkaddr); - if (f2fs_load_compressed_page(sbi, folio_page(folio, 0), - blkaddr)) { + if (f2fs_load_compressed_folio(sbi, folio, blkaddr)) { if (atomic_dec_and_test(&dic->remaining_pages)) { f2fs_decompress_cluster(dic, true); break; @@ -2385,10 +2370,10 @@ static int f2fs_mpage_readpages(struct inode *inode, .nr_cpages = 0, }; pgoff_t nc_cluster_idx = NULL_CLUSTER; + pgoff_t index; #endif unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; - pgoff_t index; int ret = 0; map.m_pblk = 0; @@ -2406,9 +2391,9 @@ static int f2fs_mpage_readpages(struct inode *inode, prefetchw(&folio->flags); } - index = folio_index(folio); - #ifdef CONFIG_F2FS_FS_COMPRESSION + index = folio->index; + if (!f2fs_compressed_file(inode)) goto read_single_page; @@ -2480,7 +2465,7 @@ next_page: static int f2fs_read_data_folio(struct file *file, struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; int ret = -EAGAIN; trace_f2fs_readpage(folio, DATA); @@ -2516,8 +2501,9 @@ static void f2fs_readahead(struct readahead_control *rac) int f2fs_encrypt_one_page(struct f2fs_io_info *fio) { - struct inode *inode = fio->page->mapping->host; - struct page *mpage, *page; + struct inode *inode = fio_inode(fio); + struct folio *mfolio; + struct page *page; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) @@ -2529,7 +2515,7 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio) return 0; retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page), PAGE_SIZE, 0, gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ @@ -2542,12 +2528,12 @@ retry_encrypt: return PTR_ERR(fio->encrypted_page); } - mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); - if (mpage) { - if (PageUptodate(mpage)) - memcpy(page_address(mpage), + mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr); + if (!IS_ERR(mfolio)) { + if (folio_test_uptodate(mfolio)) + memcpy(folio_address(mfolio), page_address(fio->encrypted_page), PAGE_SIZE); - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); } return 0; } @@ -2646,7 +2632,7 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) static inline bool need_inplace_update(struct f2fs_io_info *fio) { - struct inode *inode = fio->page->mapping->host; + struct inode *inode = fio_inode(fio); if (f2fs_should_update_outplace(inode, fio)) return false; @@ -2870,13 +2856,7 @@ write: goto done; } - if (!wbc->for_reclaim) - need_balance_fs = true; - else if (has_not_enough_free_secs(sbi, 0, 0)) - goto redirty_out; - else - set_inode_flag(inode, FI_HOT_DATA); - + need_balance_fs = true; err = -EAGAIN; if (f2fs_has_inline_data(inode)) { err = f2fs_write_inline_data(inode, folio); @@ -2912,13 +2892,6 @@ out: folio_clear_uptodate(folio); clear_page_private_gcing(page); } - - if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); - clear_inode_flag(inode, FI_HOT_DATA); - f2fs_remove_dirty_inode(inode); - submitted = NULL; - } folio_unlock(folio); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && !F2FS_I(inode)->wb_task && allow_balance) @@ -2944,35 +2917,12 @@ redirty_out: * file_write_and_wait_range() will see EIO error, which is critical * to return value of fsync() followed by atomic_write failure to user. */ - if (!err || wbc->for_reclaim) - return AOP_WRITEPAGE_ACTIVATE; folio_unlock(folio); + if (!err) + return 1; return err; } -static int f2fs_write_data_page(struct page *page, - struct writeback_control *wbc) -{ - struct folio *folio = page_folio(page); -#ifdef CONFIG_F2FS_FS_COMPRESSION - struct inode *inode = folio->mapping->host; - - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) - goto out; - - if (f2fs_compressed_file(inode)) { - if (f2fs_is_compressed_cluster(inode, folio->index)) { - folio_redirty_for_writepage(wbc, folio); - return AOP_WRITEPAGE_ACTIVATE; - } - } -out: -#endif - - return f2fs_write_single_data_page(folio, NULL, NULL, NULL, - wbc, FS_DATA_IO, 0, true); -} - /* * This function was copied from write_cache_pages from mm/page-writeback.c. * The major change is making write step of cold data page separately from @@ -3166,7 +3116,7 @@ continue_unlock: if (folio_test_writeback(folio)) { if (wbc->sync_mode == WB_SYNC_NONE) goto continue_unlock; - f2fs_wait_on_page_writeback(&folio->page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); } if (!folio_clear_dirty_for_io(folio)) @@ -3179,11 +3129,10 @@ continue_unlock: continue; } #endif + submitted = 0; ret = f2fs_write_single_data_page(folio, &submitted, &bio, &last_block, wbc, io_type, 0, true); - if (ret == AOP_WRITEPAGE_ACTIVATE) - folio_unlock(folio); #ifdef CONFIG_F2FS_FS_COMPRESSION result: #endif @@ -3195,7 +3144,7 @@ result: * keep nr_to_write, since vfs uses this to * get # of written pages. */ - if (ret == AOP_WRITEPAGE_ACTIVATE) { + if (ret == 1) { ret = 0; goto next; } else if (ret == -EAGAIN) { @@ -3294,10 +3243,6 @@ static int __f2fs_write_data_pages(struct address_space *mapping, int ret; bool locked = false; - /* deal with chardevs and other special file */ - if (!mapping->a_ops->writepage) - return 0; - /* skip writing if there is no dirty page in this inode */ if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) return 0; @@ -3393,7 +3338,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, struct inode *inode = folio->mapping->host; pgoff_t index = folio->index; struct dnode_of_data dn; - struct page *ipage; + struct folio *ifolio; bool locked = false; int flag = F2FS_GET_BLOCK_PRE_AIO; int err = 0; @@ -3418,29 +3363,34 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, restart: /* check inline_data */ - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto unlock_out; } - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA(inode)) { - f2fs_do_read_inline_data(folio, ipage); + f2fs_do_read_inline_data(folio, ifolio); set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) - set_page_private_inline(ipage); + set_page_private_inline(&ifolio->page); goto out; } - err = f2fs_convert_inline_page(&dn, folio_page(folio, 0)); + err = f2fs_convert_inline_folio(&dn, folio); if (err || dn.data_blkaddr != NULL_ADDR) goto out; } if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { + if (IS_DEVICE_ALIASING(inode)) { + err = -ENODATA; + goto out; + } + if (locked) { err = f2fs_reserve_block(&dn, index); goto out; @@ -3473,14 +3423,14 @@ static int __find_data_block(struct inode *inode, pgoff_t index, block_t *blk_addr) { struct dnode_of_data dn; - struct page *ipage; + struct folio *ifolio; int err = 0; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { @@ -3501,17 +3451,17 @@ static int __reserve_data_block(struct inode *inode, pgoff_t index, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - struct page *ipage; + struct folio *ifolio; int err = 0; f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto unlock_out; } - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (!f2fs_lookup_read_extent_cache_block(dn.inode, index, &dn.data_blkaddr)) @@ -3659,7 +3609,7 @@ repeat: } } - f2fs_wait_on_page_writeback(&folio->page, DATA, false, true); + f2fs_folio_wait_writeback(folio, DATA, false, true); if (len == folio_size(folio) || folio_test_uptodate(folio)) return 0; @@ -3914,18 +3864,18 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, set_inode_flag(inode, FI_SKIP_WRITES); for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { - struct page *page; + struct folio *folio; unsigned int blkidx = secidx * blk_per_sec + blkofs; - page = f2fs_get_lock_data_page(inode, blkidx, true); - if (IS_ERR(page)) { + folio = f2fs_get_lock_data_folio(inode, blkidx, true); + if (IS_ERR(folio)) { f2fs_up_write(&sbi->pin_sem); - ret = PTR_ERR(page); + ret = PTR_ERR(folio); goto done; } - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } clear_inode_flag(inode, FI_SKIP_WRITES); @@ -3971,7 +3921,7 @@ static int check_swap_activate(struct swap_info_struct *sis, * to be very smart. */ cur_lblock = 0; - last_lblock = bytes_to_blks(inode, i_size_read(inode)); + last_lblock = F2FS_BYTES_TO_BLK(i_size_read(inode)); while (cur_lblock < last_lblock && cur_lblock < sis->max) { struct f2fs_map_blocks map; @@ -4002,7 +3952,7 @@ retry: if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || nr_pblocks % blks_per_sec || - !f2fs_valid_pinned_area(sbi, pblock)) { + f2fs_is_sequential_zone_area(sbi, pblock)) { bool last_extent = false; not_aligned++; @@ -4054,7 +4004,6 @@ retry: cur_lblock = 1; /* force Empty message */ sis->max = cur_lblock; sis->pages = cur_lblock - 1; - sis->highest_bit = cur_lblock - 1; out: if (not_aligned) f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)", @@ -4125,7 +4074,6 @@ static void f2fs_swap_deactivate(struct file *file) const struct address_space_operations f2fs_dblock_aops = { .read_folio = f2fs_read_data_folio, .readahead = f2fs_readahead, - .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, .write_end = f2fs_write_end, @@ -4214,19 +4162,25 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, pgoff_t next_pgofs = 0; int err; - map.m_lblk = bytes_to_blks(inode, offset); - map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; + map.m_lblk = F2FS_BYTES_TO_BLK(offset); + map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1; map.m_next_pgofs = &next_pgofs; map.m_seg_type = f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint); - if (flags & IOMAP_WRITE) + + /* + * If the blocks being overwritten are already allocated, + * f2fs_map_lock and f2fs_balance_fs are not necessary. + */ + if ((flags & IOMAP_WRITE) && + !f2fs_overwrite_io(inode, offset, length)) map.m_may_create = true; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO); if (err) return err; - iomap->offset = blks_to_bytes(inode, map.m_lblk); + iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk); /* * When inline encryption is enabled, sometimes I/O to an encrypted file @@ -4246,21 +4200,21 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) return -EINVAL; - iomap->length = blks_to_bytes(inode, map.m_len); + iomap->length = F2FS_BLK_TO_BYTES(map.m_len); iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; iomap->bdev = map.m_bdev; - iomap->addr = blks_to_bytes(inode, map.m_pblk); + iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk); } else { if (flags & IOMAP_WRITE) return -ENOTBLK; if (map.m_pblk == NULL_ADDR) { - iomap->length = blks_to_bytes(inode, next_pgofs) - - iomap->offset; + iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) - + iomap->offset; iomap->type = IOMAP_HOLE; } else if (map.m_pblk == NEW_ADDR) { - iomap->length = blks_to_bytes(inode, map.m_len); + iomap->length = F2FS_BLK_TO_BYTES(map.m_len); iomap->type = IOMAP_UNWRITTEN; } else { f2fs_bug_on(F2FS_I_SB(inode), 1); diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 546b8ba91261..16c2dfb4f595 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -60,6 +60,70 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi) } #ifdef CONFIG_DEBUG_FS +static void update_multidevice_stats(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + struct f2fs_dev_stats *dev_stats = si->dev_stats; + int i, j; + + if (!f2fs_is_multi_device(sbi)) + return; + + memset(dev_stats, 0, sizeof(struct f2fs_dev_stats) * sbi->s_ndevs); + for (i = 0; i < sbi->s_ndevs; i++) { + unsigned int start_segno, end_segno; + block_t start_blk, end_blk; + + if (i == 0) { + start_blk = MAIN_BLKADDR(sbi); + end_blk = FDEV(i).end_blk + 1 - SEG0_BLKADDR(sbi); + } else { + start_blk = FDEV(i).start_blk; + end_blk = FDEV(i).end_blk + 1; + } + + start_segno = GET_SEGNO(sbi, start_blk); + end_segno = GET_SEGNO(sbi, end_blk); + + for (j = start_segno; j < end_segno; j++) { + unsigned int seg_blks, sec_blks; + + seg_blks = get_seg_entry(sbi, j)->valid_blocks; + + /* update segment stats */ + if (IS_CURSEG(sbi, j)) + dev_stats[i].devstats[0][DEVSTAT_INUSE]++; + else if (seg_blks == BLKS_PER_SEG(sbi)) + dev_stats[i].devstats[0][DEVSTAT_FULL]++; + else if (seg_blks != 0) + dev_stats[i].devstats[0][DEVSTAT_DIRTY]++; + else if (!test_bit(j, FREE_I(sbi)->free_segmap)) + dev_stats[i].devstats[0][DEVSTAT_FREE]++; + else + dev_stats[i].devstats[0][DEVSTAT_PREFREE]++; + + if (!__is_large_section(sbi) || + (j % SEGS_PER_SEC(sbi)) != 0) + continue; + + sec_blks = get_sec_entry(sbi, j)->valid_blocks; + + /* update section stats */ + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, j))) + dev_stats[i].devstats[1][DEVSTAT_INUSE]++; + else if (sec_blks == BLKS_PER_SEC(sbi)) + dev_stats[i].devstats[1][DEVSTAT_FULL]++; + else if (sec_blks != 0) + dev_stats[i].devstats[1][DEVSTAT_DIRTY]++; + else if (!test_bit(GET_SEC_FROM_SEG(sbi, j), + FREE_I(sbi)->free_secmap)) + dev_stats[i].devstats[1][DEVSTAT_FREE]++; + else + dev_stats[i].devstats[1][DEVSTAT_PREFREE]++; + } + } +} + static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); @@ -100,6 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + si->ndonate_files = sbi->donate_files; si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->aw_cnt = atomic_read(&sbi->atomic_files); @@ -214,6 +279,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->valid_blks[type] += blks; } + update_multidevice_stats(sbi); + for (i = 0; i < MAX_CALL_TYPE; i++) si->cp_call_count[i] = atomic_read(&sbi->cp_call_count[i]); @@ -435,6 +502,8 @@ static int stat_show(struct seq_file *s, void *v) si->compr_inode, si->compr_blocks); seq_printf(s, " - Swapfile Inode: %u\n", si->swapfile_inode); + seq_printf(s, " - Donate Inode: %u\n", + si->ndonate_files); seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", @@ -498,6 +567,36 @@ static int stat_show(struct seq_file *s, void *v) si->dirty_count); seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", si->prefree_count, si->free_segs, si->free_secs); + if (f2fs_is_multi_device(sbi)) { + seq_puts(s, "Multidevice stats:\n"); + seq_printf(s, " [seg: %8s %8s %8s %8s %8s]", + "inuse", "dirty", "full", "free", "prefree"); + if (__is_large_section(sbi)) + seq_printf(s, " [sec: %8s %8s %8s %8s %8s]\n", + "inuse", "dirty", "full", "free", "prefree"); + else + seq_puts(s, "\n"); + + for (i = 0; i < sbi->s_ndevs; i++) { + seq_printf(s, " #%-2d %8u %8u %8u %8u %8u", i, + si->dev_stats[i].devstats[0][DEVSTAT_INUSE], + si->dev_stats[i].devstats[0][DEVSTAT_DIRTY], + si->dev_stats[i].devstats[0][DEVSTAT_FULL], + si->dev_stats[i].devstats[0][DEVSTAT_FREE], + si->dev_stats[i].devstats[0][DEVSTAT_PREFREE]); + if (!__is_large_section(sbi)) { + seq_puts(s, "\n"); + continue; + } + seq_printf(s, " %8u %8u %8u %8u %8u\n", + si->dev_stats[i].devstats[1][DEVSTAT_INUSE], + si->dev_stats[i].devstats[1][DEVSTAT_DIRTY], + si->dev_stats[i].devstats[1][DEVSTAT_FULL], + si->dev_stats[i].devstats[1][DEVSTAT_FREE], + si->dev_stats[i].devstats[1][DEVSTAT_PREFREE]); + } + seq_puts(s, "\n"); + } seq_printf(s, "CP calls: %d (BG: %d)\n", si->cp_call_count[TOTAL_CALL], si->cp_call_count[BACKGROUND]); @@ -598,9 +697,9 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); - seq_printf(s, " - datas: %4d in files:%4d\n", + seq_printf(s, " - data: %4d in files:%4d\n", si->ndirty_data, si->ndirty_files); - seq_printf(s, " - quota datas: %4d in quota files:%4d\n", + seq_printf(s, " - quota data: %4d in quota files:%4d\n", si->ndirty_qdata, si->nquota_files); seq_printf(s, " - meta: %4d in %4d\n", si->ndirty_meta, si->meta_pages); @@ -665,6 +764,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + struct f2fs_dev_stats *dev_stats; unsigned long flags; int i; @@ -672,6 +772,15 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) if (!si) return -ENOMEM; + dev_stats = f2fs_kzalloc(sbi, sizeof(struct f2fs_dev_stats) * + sbi->s_ndevs, GFP_KERNEL); + if (!dev_stats) { + kfree(si); + return -ENOMEM; + } + + si->dev_stats = dev_stats; + si->all_area_segs = le32_to_cpu(raw_super->segment_count); si->sit_area_segs = le32_to_cpu(raw_super->segment_count_sit); si->nat_area_segs = le32_to_cpu(raw_super->segment_count_nat); @@ -724,6 +833,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) list_del(&si->stat_list); raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); + kfree(si->dev_stats); kfree(si); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 47a5c806cf16..c36b3b22bfff 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -173,17 +173,18 @@ static unsigned long dir_block_index(unsigned int level, } static struct f2fs_dir_entry *find_in_block(struct inode *dir, - struct page *dentry_page, + struct folio *dentry_folio, const struct f2fs_filename *fname, - int *max_slots) + int *max_slots, + bool use_hash) { struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; - dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(dir, &d, dentry_blk); - return f2fs_find_target_dentry(&d, fname, max_slots); + return f2fs_find_target_dentry(&d, fname, max_slots, use_hash); } static inline int f2fs_match_name(const struct inode *dir, @@ -208,7 +209,8 @@ static inline int f2fs_match_name(const struct inode *dir, } struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots) + const struct f2fs_filename *fname, int *max_slots, + bool use_hash) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; @@ -231,7 +233,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, continue; } - if (de->hash_code == fname->hash) { + if (!use_hash || de->hash_code == fname->hash) { res = f2fs_match_name(d->inode, fname, d->filename[bit_pos], le16_to_cpu(de->name_len)); @@ -258,12 +260,12 @@ found: static struct f2fs_dir_entry *find_in_level(struct inode *dir, unsigned int level, const struct f2fs_filename *fname, - struct page **res_page) + struct folio **res_folio, + bool use_hash) { int s = GET_DENTRY_SLOTS(fname->disk_name.len); unsigned int nbucket, nblock; - unsigned int bidx, end_block; - struct page *dentry_page; + unsigned int bidx, end_block, bucket_no; struct f2fs_dir_entry *de = NULL; pgoff_t next_pgofs; bool room = false; @@ -272,62 +274,76 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); + bucket_no = use_hash ? le32_to_cpu(fname->hash) % nbucket : 0; + +start_find_bucket: bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, - le32_to_cpu(fname->hash) % nbucket); + bucket_no); end_block = bidx + nblock; while (bidx < end_block) { /* no need to allocate new dentry pages to all the indices */ - dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); - if (IS_ERR(dentry_page)) { - if (PTR_ERR(dentry_page) == -ENOENT) { + struct folio *dentry_folio; + dentry_folio = f2fs_find_data_folio(dir, bidx, &next_pgofs); + if (IS_ERR(dentry_folio)) { + if (PTR_ERR(dentry_folio) == -ENOENT) { room = true; bidx = next_pgofs; continue; } else { - *res_page = dentry_page; + *res_folio = dentry_folio; break; } } - de = find_in_block(dir, dentry_page, fname, &max_slots); + de = find_in_block(dir, dentry_folio, fname, &max_slots, use_hash); if (IS_ERR(de)) { - *res_page = ERR_CAST(de); + *res_folio = ERR_CAST(de); de = NULL; break; } else if (de) { - *res_page = dentry_page; + *res_folio = dentry_folio; break; } if (max_slots >= s) room = true; - f2fs_put_page(dentry_page, 0); + f2fs_folio_put(dentry_folio, false); bidx++; } - if (!de && room && F2FS_I(dir)->chash != fname->hash) { - F2FS_I(dir)->chash = fname->hash; - F2FS_I(dir)->clevel = level; - } + if (de) + return de; - return de; + if (likely(use_hash)) { + if (room && F2FS_I(dir)->chash != fname->hash) { + F2FS_I(dir)->chash = fname->hash; + F2FS_I(dir)->clevel = level; + } + } else if (++bucket_no < nbucket) { + goto start_find_bucket; + } + return NULL; } struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page) + struct folio **res_folio) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; + bool use_hash = true; - *res_page = NULL; + *res_folio = NULL; +#if IS_ENABLED(CONFIG_UNICODE) +start_find_entry: +#endif if (f2fs_has_inline_dentry(dir)) { - de = f2fs_find_in_inline_dir(dir, fname, res_page); + de = f2fs_find_in_inline_dir(dir, fname, res_folio, use_hash); goto out; } @@ -343,11 +359,19 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, } for (level = 0; level < max_depth; level++) { - de = find_in_level(dir, level, fname, res_page); - if (de || IS_ERR(*res_page)) + de = find_in_level(dir, level, fname, res_folio, use_hash); + if (de || IS_ERR(*res_folio)) break; } + out: +#if IS_ENABLED(CONFIG_UNICODE) + if (!sb_no_casefold_compat_fallback(dir->i_sb) && + IS_CASEFOLDED(dir) && !de && use_hash) { + use_hash = false; + goto start_find_entry; + } +#endif /* This is to increase the speed of f2fs_create */ if (!de) F2FS_I(dir)->task = current; @@ -361,7 +385,7 @@ out: * Entry is guaranteed to be valid. */ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - const struct qstr *child, struct page **res_page) + const struct qstr *child, struct folio **res_folio) { struct f2fs_dir_entry *de = NULL; struct f2fs_filename fname; @@ -370,67 +394,67 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, err = f2fs_setup_filename(dir, child, 1, &fname); if (err) { if (err == -ENOENT) - *res_page = NULL; + *res_folio = NULL; else - *res_page = ERR_PTR(err); + *res_folio = ERR_PTR(err); return NULL; } - de = __f2fs_find_entry(dir, &fname, res_page); + de = __f2fs_find_entry(dir, &fname, res_folio); f2fs_free_filename(&fname); return de; } -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct folio **f) { - return f2fs_find_entry(dir, &dotdot_name, p); + return f2fs_find_entry(dir, &dotdot_name, f); } ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, - struct page **page) + struct folio **folio) { ino_t res = 0; struct f2fs_dir_entry *de; - de = f2fs_find_entry(dir, qstr, page); + de = f2fs_find_entry(dir, qstr, folio); if (de) { res = le32_to_cpu(de->ino); - f2fs_put_page(*page, 0); + f2fs_folio_put(*folio, false); } return res; } void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, - struct page *page, struct inode *inode) + struct folio *folio, struct inode *inode) { enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; - lock_page(page); - f2fs_wait_on_page_writeback(page, type, true, true); + folio_lock(folio); + f2fs_folio_wait_writeback(folio, type, true, true); de->ino = cpu_to_le32(inode->i_ino); de->file_type = fs_umode_to_ftype(inode->i_mode); - set_page_dirty(page); + folio_mark_dirty(folio); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } static void init_dent_inode(struct inode *dir, struct inode *inode, const struct f2fs_filename *fname, - struct page *ipage) + struct folio *ifolio) { struct f2fs_inode *ri; if (!fname) /* tmpfile case? */ return; - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); - /* copy name info. to this inode page */ - ri = F2FS_INODE(ipage); + /* copy name info. to this inode folio */ + ri = F2FS_INODE(&ifolio->page); ri->i_namelen = cpu_to_le32(fname->disk_name.len); memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len); if (IS_ENCRYPTED(dir)) { @@ -451,7 +475,7 @@ static void init_dent_inode(struct inode *dir, struct inode *inode, file_lost_pino(inode); } } - set_page_dirty(ipage); + folio_mark_dirty(ifolio); } void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, @@ -468,72 +492,73 @@ void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, } static int make_empty_dir(struct inode *inode, - struct inode *parent, struct page *page) + struct inode *parent, struct folio *folio) { - struct page *dentry_page; + struct folio *dentry_folio; struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) - return f2fs_make_empty_inline_dir(inode, parent, page); + return f2fs_make_empty_inline_dir(inode, parent, folio); - dentry_page = f2fs_get_new_data_page(inode, page, 0, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); + dentry_folio = f2fs_get_new_data_folio(inode, folio, 0, true); + if (IS_ERR(dentry_folio)) + return PTR_ERR(dentry_folio); - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_do_make_empty_dir(inode, parent, &d); - set_page_dirty(dentry_page); - f2fs_put_page(dentry_page, 1); + folio_mark_dirty(dentry_folio); + f2fs_folio_put(dentry_folio, true); return 0; } -struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct f2fs_filename *fname, struct page *dpage) +struct folio *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, + const struct f2fs_filename *fname, struct folio *dfolio) { - struct page *page; + struct folio *folio; int err; if (is_inode_flag_set(inode, FI_NEW_INODE)) { - page = f2fs_new_inode_page(inode); - if (IS_ERR(page)) - return page; + folio = f2fs_new_inode_folio(inode); + if (IS_ERR(folio)) + return folio; if (S_ISDIR(inode->i_mode)) { /* in order to handle error case */ - get_page(page); - err = make_empty_dir(inode, dir, page); + folio_get(folio); + err = make_empty_dir(inode, dir, folio); if (err) { - lock_page(page); + folio_lock(folio); goto put_error; } - put_page(page); + folio_put(folio); } - err = f2fs_init_acl(inode, dir, page, dpage); + err = f2fs_init_acl(inode, dir, folio, dfolio); if (err) goto put_error; err = f2fs_init_security(inode, dir, - fname ? fname->usr_fname : NULL, page); + fname ? fname->usr_fname : NULL, + folio); if (err) goto put_error; if (IS_ENCRYPTED(inode)) { - err = fscrypt_set_context(inode, page); + err = fscrypt_set_context(inode, folio); if (err) goto put_error; } } else { - page = f2fs_get_node_page(F2FS_I_SB(dir), inode->i_ino); - if (IS_ERR(page)) - return page; + folio = f2fs_get_inode_folio(F2FS_I_SB(dir), inode->i_ino); + if (IS_ERR(folio)) + return folio; } - init_dent_inode(dir, inode, fname, page); + init_dent_inode(dir, inode, fname, folio); /* * This file should be checkpointed during fsync. @@ -550,12 +575,12 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, f2fs_remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); f2fs_i_links_write(inode, true); } - return page; + return folio; put_error: clear_nlink(inode); - f2fs_update_inode(inode, page); - f2fs_put_page(page, 1); + f2fs_update_inode(inode, folio); + f2fs_folio_put(folio, true); return ERR_PTR(err); } @@ -597,14 +622,14 @@ next: goto next; } -bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, +bool f2fs_has_enough_room(struct inode *dir, struct folio *ifolio, const struct f2fs_filename *fname) { struct f2fs_dentry_ptr d; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(fname->disk_name.len); - make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage)); + make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ifolio)); bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); @@ -641,10 +666,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct f2fs_filename *fname, unsigned int current_depth; unsigned long bidx, block; unsigned int nbucket, nblock; - struct page *dentry_page = NULL; + struct folio *dentry_folio = NULL; struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dentry_ptr d; - struct page *page = NULL; + struct folio *folio = NULL; int slots, err = 0; level = 0; @@ -674,30 +699,30 @@ start: (le32_to_cpu(fname->hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = f2fs_get_new_data_page(dir, NULL, block, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); + dentry_folio = f2fs_get_new_data_folio(dir, NULL, block, true); + if (IS_ERR(dentry_folio)) + return PTR_ERR(dentry_folio); - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); bit_pos = f2fs_room_for_filename(&dentry_blk->dentry_bitmap, slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; - f2fs_put_page(dentry_page, 1); + f2fs_folio_put(dentry_folio, true); } /* Move to next level to find the empty slot for new dentry */ ++level; goto start; add_dentry: - f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); + f2fs_folio_wait_writeback(dentry_folio, DATA, true, true); if (inode) { f2fs_down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, fname, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_init_inode_metadata(inode, dir, fname, NULL); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } } @@ -706,16 +731,16 @@ add_dentry: f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, bit_pos); - set_page_dirty(dentry_page); + folio_mark_dirty(dentry_folio); if (inode) { f2fs_i_pino_write(inode, dir->i_ino); /* synchronize inode page's data from inode cache */ if (is_inode_flag_set(inode, FI_NEW_INODE)) - f2fs_update_inode(inode, page); + f2fs_update_inode(inode, folio); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } f2fs_update_parent_metadata(dir, inode, current_depth); @@ -723,7 +748,7 @@ fail: if (inode) f2fs_up_write(&F2FS_I(inode)->i_sem); - f2fs_put_page(dentry_page, 1); + f2fs_folio_put(dentry_folio, true); return err; } @@ -757,7 +782,7 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_filename fname; - struct page *page = NULL; + struct folio *folio = NULL; struct f2fs_dir_entry *de = NULL; int err; @@ -773,14 +798,14 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, * consistency more. */ if (current != F2FS_I(dir)->task) { - de = __f2fs_find_entry(dir, &fname, &page); + de = __f2fs_find_entry(dir, &fname, &folio); F2FS_I(dir)->task = NULL; } if (de) { - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); err = -EEXIST; - } else if (IS_ERR(page)) { - err = PTR_ERR(page); + } else if (IS_ERR(folio)) { + err = PTR_ERR(folio); } else { err = f2fs_add_dentry(dir, &fname, inode, ino, mode); } @@ -791,16 +816,16 @@ int f2fs_do_add_link(struct inode *dir, const struct qstr *name, int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, struct f2fs_filename *fname) { - struct page *page; + struct folio *folio; int err = 0; f2fs_down_write(&F2FS_I(inode)->i_sem); - page = f2fs_init_inode_metadata(inode, dir, fname, NULL); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_init_inode_metadata(inode, dir, fname, NULL); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); clear_inode_flag(inode, FI_NEW_INODE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); @@ -836,13 +861,13 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ -void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, struct inode *dir, struct inode *inode) { - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); - pgoff_t index = page_folio(page)->index; + pgoff_t index = folio->index; int i; f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -851,12 +876,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, f2fs_add_ino_entry(F2FS_I_SB(dir), dir->i_ino, TRANS_DIR_INO); if (f2fs_has_inline_dentry(dir)) - return f2fs_delete_inline_entry(dentry, page, dir, inode); + return f2fs_delete_inline_entry(dentry, folio, dir, inode); - lock_page(page); - f2fs_wait_on_page_writeback(page, DATA, true, true); + folio_lock(folio); + f2fs_folio_wait_writeback(folio, DATA, true, true); - dentry_blk = page_address(page); + dentry_blk = folio_address(folio); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) __clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); @@ -865,19 +890,19 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, NR_DENTRY_IN_BLOCK, 0); - set_page_dirty(page); + folio_mark_dirty(folio); if (bit_pos == NR_DENTRY_IN_BLOCK && !f2fs_truncate_hole(dir, index, index + 1)) { - f2fs_clear_page_cache_dirty_tag(page_folio(page)); - clear_page_dirty_for_io(page); - ClearPageUptodate(page); - clear_page_private_all(page); + f2fs_clear_page_cache_dirty_tag(folio); + folio_clear_dirty_for_io(folio); + folio_clear_uptodate(folio); + clear_page_private_all(&folio->page); inode_dec_dirty_pages(dir); f2fs_remove_dirty_inode(dir); } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); @@ -889,7 +914,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, bool f2fs_empty_dir(struct inode *dir) { unsigned long bidx = 0; - struct page *dentry_page; unsigned int bit_pos; struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); @@ -899,10 +923,11 @@ bool f2fs_empty_dir(struct inode *dir) while (bidx < nblock) { pgoff_t next_pgofs; + struct folio *dentry_folio; - dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs); - if (IS_ERR(dentry_page)) { - if (PTR_ERR(dentry_page) == -ENOENT) { + dentry_folio = f2fs_find_data_folio(dir, bidx, &next_pgofs); + if (IS_ERR(dentry_folio)) { + if (PTR_ERR(dentry_folio) == -ENOENT) { bidx = next_pgofs; continue; } else { @@ -910,7 +935,7 @@ bool f2fs_empty_dir(struct inode *dir) } } - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); if (bidx == 0) bit_pos = 2; else @@ -919,7 +944,7 @@ bool f2fs_empty_dir(struct inode *dir) NR_DENTRY_IN_BLOCK, bit_pos); - f2fs_put_page(dentry_page, 0); + f2fs_folio_put(dentry_folio, false); if (bit_pos < NR_DENTRY_IN_BLOCK) return false; @@ -1018,7 +1043,6 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); struct f2fs_dentry_block *dentry_blk = NULL; - struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; loff_t start_pos = ctx->pos; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); @@ -1042,6 +1066,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } for (; n < npages; ctx->pos = n * NR_DENTRY_IN_BLOCK) { + struct folio *dentry_folio; pgoff_t next_pgofs; /* allow readdir() to be interrupted */ @@ -1056,9 +1081,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) page_cache_sync_readahead(inode->i_mapping, ra, file, n, min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); - dentry_page = f2fs_find_data_page(inode, n, &next_pgofs); - if (IS_ERR(dentry_page)) { - err = PTR_ERR(dentry_page); + dentry_folio = f2fs_find_data_folio(inode, n, &next_pgofs); + if (IS_ERR(dentry_folio)) { + err = PTR_ERR(dentry_folio); if (err == -ENOENT) { err = 0; n = next_pgofs; @@ -1068,18 +1093,15 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) } } - dentry_blk = page_address(dentry_page); + dentry_blk = folio_address(dentry_folio); make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); - if (err) { - f2fs_put_page(dentry_page, 0); + f2fs_folio_put(dentry_folio, false); + if (err) break; - } - - f2fs_put_page(dentry_page, 0); n++; } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 62ac440d9416..cfe925a3d555 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -24,6 +24,7 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; struct extent_info ei; + int devi; get_read_extent_info(&ei, i_ext); @@ -38,7 +39,36 @@ bool sanity_check_extent_cache(struct inode *inode, struct page *ipage) ei.blk, ei.fofs, ei.len); return false; } - return true; + + if (!IS_DEVICE_ALIASING(inode)) + return true; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (FDEV(devi).start_blk != ei.blk || + FDEV(devi).end_blk != ei.blk + ei.len - 1) + continue; + + if (devi == 0) { + f2fs_warn(sbi, + "%s: inode (ino=%lx) is an alias of meta device", + __func__, inode->i_ino); + return false; + } + + if (bdev_is_zoned(FDEV(devi).bdev)) { + f2fs_warn(sbi, + "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] maps to zoned block device", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; + } + return true; + } + + f2fs_warn(sbi, "%s: device alias inode (ino=%lx)'s extent info " + "[%u, %u, %u] is inconsistent w/ any devices", + __func__, inode->i_ino, ei.blk, ei.fofs, ei.len); + return false; } static void __set_extent_info(struct extent_info *ei, @@ -76,6 +106,9 @@ static bool __init_may_extent_tree(struct inode *inode, enum extent_type type) static bool __may_extent_tree(struct inode *inode, enum extent_type type) { + if (IS_DEVICE_ALIASING(inode) && type == EX_READ) + return true; + /* * for recovered files during mount do not create extents * if shrinker is not registered. @@ -346,21 +379,22 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode, } static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et) + struct extent_tree *et, unsigned int nr_shrink) { struct rb_node *node, *next; struct extent_node *en; - unsigned int count = atomic_read(&et->node_cnt); + unsigned int count; node = rb_first_cached(&et->root); - while (node) { + + for (count = 0; node && count < nr_shrink; count++) { next = rb_next(node); en = rb_entry(node, struct extent_node, rb_node); __release_extent_node(sbi, et, en); node = next; } - return count - atomic_read(&et->node_cnt); + return count; } static void __drop_largest_extent(struct extent_tree *et, @@ -373,11 +407,11 @@ static void __drop_largest_extent(struct extent_tree *et, } } -void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) +void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree_info *eti = &sbi->extent_tree[EX_READ]; - struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext; + struct f2fs_extent *i_ext = &F2FS_INODE(&ifolio->page)->i_ext; struct extent_tree *et; struct extent_node *en; struct extent_info ei; @@ -385,9 +419,9 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) if (!__may_extent_tree(inode, EX_READ)) { /* drop largest read extent */ if (i_ext->len) { - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); i_ext->len = 0; - set_page_dirty(ipage); + folio_mark_dirty(ifolio); } set_inode_flag(inode, FI_NO_EXTENT); return; @@ -401,6 +435,11 @@ void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage) if (atomic_read(&et->node_cnt) || !ei.len) goto skip; + if (IS_DEVICE_ALIASING(inode)) { + et->largest = ei; + goto skip; + } + en = __attach_extent_node(sbi, et, &ei, NULL, &et->root.rb_root.rb_node, true); if (en) { @@ -463,6 +502,11 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } + if (IS_DEVICE_ALIASING(inode)) { + ret = false; + goto out; + } + en = __lookup_extent_node(&et->root, et->cached_en, pgofs); if (!en) goto out; @@ -579,6 +623,30 @@ do_insert: return en; } +static unsigned int __destroy_extent_node(struct inode *inode, + enum extent_type type) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; + unsigned int nr_shrink = type == EX_READ ? + READ_EXTENT_CACHE_SHRINK_NUMBER : + AGE_EXTENT_CACHE_SHRINK_NUMBER; + unsigned int node_cnt = 0; + + if (!et || !atomic_read(&et->node_cnt)) + return 0; + + while (atomic_read(&et->node_cnt)) { + write_lock(&et->lock); + node_cnt += __free_extent_tree(sbi, et, nr_shrink); + write_unlock(&et->lock); + } + + f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); + + return node_cnt; +} + static void __update_extent_tree_range(struct inode *inode, struct extent_info *tei, enum extent_type type) { @@ -649,7 +717,9 @@ static void __update_extent_tree_range(struct inode *inode, } if (end < org_end && (type != EX_READ || - org_end - end >= F2FS_MIN_EXTENT_LEN)) { + (org_end - end >= F2FS_MIN_EXTENT_LEN && + atomic_read(&et->node_cnt) < + sbi->max_read_extent_count))) { if (parts) { __set_extent_info(&ei, end, org_end - end, @@ -717,9 +787,6 @@ static void __update_extent_tree_range(struct inode *inode, } } - if (is_inode_flag_set(inode, FI_NO_EXTENT)) - __free_extent_tree(sbi, et); - if (et->largest_updated) { et->largest_updated = false; updated = true; @@ -737,6 +804,9 @@ update_age_extent_cache: out_read_extent_cache: write_unlock(&et->lock); + if (is_inode_flag_set(inode, FI_NO_EXTENT)) + __destroy_extent_node(inode, EX_READ); + if (updated) f2fs_mark_inode_dirty_sync(inode, true); } @@ -864,7 +934,7 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ if (!__may_extent_tree(dn->inode, type)) return; - ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + + ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) + dn->ofs_in_node; ei.len = 1; @@ -899,10 +969,14 @@ static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink list_for_each_entry_safe(et, next, &eti->zombie_list, list) { if (atomic_read(&et->node_cnt)) { write_lock(&et->lock); - node_cnt += __free_extent_tree(sbi, et); + node_cnt += __free_extent_tree(sbi, et, + nr_shrink - node_cnt - tree_cnt); write_unlock(&et->lock); } - f2fs_bug_on(sbi, atomic_read(&et->node_cnt)); + + if (atomic_read(&et->node_cnt)) + goto unlock_out; + list_del_init(&et->list); radix_tree_delete(&eti->extent_tree_root, et->ino); kmem_cache_free(extent_tree_slab, et); @@ -1041,23 +1115,6 @@ unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE); } -static unsigned int __destroy_extent_node(struct inode *inode, - enum extent_type type) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; - unsigned int node_cnt = 0; - - if (!et || !atomic_read(&et->node_cnt)) - return 0; - - write_lock(&et->lock); - node_cnt = __free_extent_tree(sbi, et); - write_unlock(&et->lock); - - return node_cnt; -} - void f2fs_destroy_extent_node(struct inode *inode) { __destroy_extent_node(inode, EX_READ); @@ -1066,7 +1123,6 @@ void f2fs_destroy_extent_node(struct inode *inode) static void __drop_extent_tree(struct inode *inode, enum extent_type type) { - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et = F2FS_I(inode)->extent_tree[type]; bool updated = false; @@ -1074,7 +1130,6 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) return; write_lock(&et->lock); - __free_extent_tree(sbi, et); if (type == EX_READ) { set_inode_flag(inode, FI_NO_EXTENT); if (et->largest.len) { @@ -1083,6 +1138,9 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type) } } write_unlock(&et->lock); + + __destroy_extent_node(inode, type); + if (updated) f2fs_mark_inode_dirty_sync(inode, true); } @@ -1156,6 +1214,7 @@ void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi) sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD; sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD; sbi->last_age_weight = LAST_AGE_WEIGHT; + sbi->max_read_extent_count = DEF_MAX_READ_EXTENT_COUNT; } int __init f2fs_create_extent_cache(void) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 33f5449dc22d..9333a22b9a01 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -24,7 +24,6 @@ #include <linux/quotaops.h> #include <linux/part_stat.h> #include <linux/rw_hint.h> -#include <crypto/hash.h> #include <linux/fscrypt.h> #include <linux/fsverity.h> @@ -63,16 +62,26 @@ enum { FAULT_BLKADDR_VALIDITY, FAULT_BLKADDR_CONSISTENCE, FAULT_NO_SEGMENT, + FAULT_INCONSISTENT_FOOTER, + FAULT_TIMEOUT, + FAULT_VMALLOC, FAULT_MAX, }; -#ifdef CONFIG_F2FS_FAULT_INJECTION -#define F2FS_ALL_FAULT_TYPE (GENMASK(FAULT_MAX - 1, 0)) +/* indicate which option to update */ +enum fault_option { + FAULT_RATE = 1, /* only update fault rate */ + FAULT_TYPE = 2, /* only update fault type */ + FAULT_ALL = 4, /* reset all fault injection options/stats */ +}; +#ifdef CONFIG_F2FS_FAULT_INJECTION struct f2fs_fault_info { atomic_t inject_ops; int inject_rate; unsigned int inject_type; + /* Used to account total count of injection for each type */ + unsigned int inject_count[FAULT_MAX]; }; extern const char *f2fs_fault_name[FAULT_MAX]; @@ -115,6 +124,13 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_GC_MERGE 0x02000000 #define F2FS_MOUNT_COMPRESS_CACHE 0x04000000 #define F2FS_MOUNT_AGE_EXTENT_CACHE 0x08000000 +#define F2FS_MOUNT_NAT_BITS 0x10000000 +#define F2FS_MOUNT_INLINECRYPT 0x20000000 +/* + * Some f2fs environments expect to be able to pass the "lazytime" option + * string rather than using the MS_LAZYTIME flag, so this must remain. + */ +#define F2FS_MOUNT_LAZYTIME 0x40000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -213,6 +229,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_CASEFOLD 0x00001000 #define F2FS_FEATURE_COMPRESSION 0x00002000 #define F2FS_FEATURE_RO 0x00004000 +#define F2FS_FEATURE_DEVICE_ALIAS 0x00008000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -309,7 +326,7 @@ struct inode_entry { struct fsync_node_entry { struct list_head list; /* list head */ - struct page *page; /* warm node page pointer */ + struct folio *folio; /* warm node folio pointer */ unsigned int seq_id; /* sequence id */ }; @@ -598,6 +615,9 @@ enum { /* congestion wait timeout value, default: 20ms */ #define DEFAULT_IO_TIMEOUT (msecs_to_jiffies(20)) +/* timeout value injected, default: 1000ms */ +#define DEFAULT_FAULT_TIMEOUT (msecs_to_jiffies(1000)) + /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 @@ -634,6 +654,9 @@ enum { #define DEF_HOT_DATA_AGE_THRESHOLD 262144 #define DEF_WARM_DATA_AGE_THRESHOLD 2621440 +/* default max read extent count per inode */ +#define DEF_MAX_READ_EXTENT_COUNT 10240 + /* extent cache type */ enum extent_type { EX_READ, @@ -810,6 +833,7 @@ enum { FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */ FI_ATOMIC_REPLACE, /* indicate atomic replace */ FI_OPENED_FILE, /* indicate file has been opened */ + FI_DONATE_FINISHED, /* indicate page donation of file has been finished */ FI_MAX, /* max flag, never be used */ }; @@ -827,6 +851,7 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ + unsigned int ioprio_hint; /* hint for IO priority */ struct f2fs_rwsem i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ @@ -846,6 +871,11 @@ struct f2fs_inode_info { #endif struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ + + /* linked in global inode list for cache donation */ + struct list_head gdonate_list; + pgoff_t donate_start, donate_end; /* inclusive */ + struct task_struct *atomic_write_task; /* store atomic write task */ struct extent_tree *extent_tree[NR_EXTENT_CACHES]; /* cached extent_tree entry */ @@ -977,11 +1007,11 @@ struct f2fs_nm_info { */ struct dnode_of_data { struct inode *inode; /* vfs inode pointer */ - struct page *inode_page; /* its inode page, NULL is possible */ - struct page *node_page; /* cached direct node page */ + struct folio *inode_folio; /* its inode folio, NULL is possible */ + struct folio *node_folio; /* cached direct node folio */ nid_t nid; /* node id of the direct node block */ unsigned int ofs_in_node; /* data offset in the node page */ - bool inode_page_locked; /* inode page is locked or not */ + bool inode_folio_locked; /* inode folio is locked or not */ bool node_changed; /* is node block changed */ char cur_level; /* level of hole node page */ char max_level; /* level of current page located */ @@ -989,12 +1019,12 @@ struct dnode_of_data { }; static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, - struct page *ipage, struct page *npage, nid_t nid) + struct folio *ifolio, struct folio *nfolio, nid_t nid) { memset(dn, 0, sizeof(*dn)); dn->inode = inode; - dn->inode_page = ipage; - dn->node_page = npage; + dn->inode_folio = ifolio; + dn->node_folio = nfolio; dn->nid = nid; } @@ -1018,7 +1048,7 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, #define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) #define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE) -enum { +enum log_type { CURSEG_HOT_DATA = 0, /* directory entry blocks */ CURSEG_WARM_DATA, /* data blocks */ CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ @@ -1063,7 +1093,6 @@ struct f2fs_sm_info { unsigned int segment_count; /* total # of segments */ unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ - unsigned int additional_reserved_segments;/* reserved segs for IO align feature */ unsigned int ovp_segments; /* # of overprovision segments */ /* a threshold to reclaim prefree segments */ @@ -1271,6 +1300,7 @@ enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ DIRTY_META, /* for all dirtied inode metadata */ + DONATE_INODE, /* for all inode to donate pages */ NR_INODE_TYPE, }; @@ -1619,12 +1649,16 @@ struct f2fs_sb_info { /* for extent tree cache */ struct extent_tree_info extent_tree[NR_EXTENT_CACHES]; atomic64_t allocated_data_blocks; /* for block age extent_cache */ + unsigned int max_read_extent_count; /* max read extent count per inode */ /* The threshold used for hot and warm data seperation*/ unsigned int hot_data_age_threshold; unsigned int warm_data_age_threshold; unsigned int last_age_weight; + /* control donate caches */ + unsigned int donate_files; + /* basic filesystem units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ unsigned int log_blocksize; /* log2 block size */ @@ -1656,6 +1690,7 @@ struct f2fs_sb_info { unsigned int nquota_files; /* # of quota sysfile */ struct f2fs_rwsem quota_sem; /* blocking cp for flags */ + struct task_struct *umount_lock_holder; /* s_umount lock holder */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; @@ -1758,14 +1793,12 @@ struct f2fs_sb_info { unsigned int dirty_device; /* for checkpoint data flush */ spinlock_t dev_lock; /* protect dirty_device */ bool aligned_blksize; /* all devices has the same logical blksize */ + unsigned int first_seq_zone_segno; /* first segno in sequential zone */ /* For write statistics */ u64 sectors_written_start; u64 kbytes_written; - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; @@ -1799,6 +1832,9 @@ struct f2fs_sb_info { u64 committed_atomic_block; u64 revoked_atomic_block; + /* carve out reserved_blocks from total blocks */ + bool carve_out; + #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ @@ -1879,6 +1915,7 @@ static inline bool __time_to_inject(struct f2fs_sb_info *sbi, int type, atomic_inc(&ffi->inject_ops); if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { atomic_set(&ffi->inject_ops, 0); + ffi->inject_count[type]++; f2fs_info_ratelimited(sbi, "inject %s in %s of %pS", f2fs_fault_name[type], func, parent_func); return true; @@ -1940,42 +1977,20 @@ static inline unsigned int f2fs_time_to_wait(struct f2fs_sb_info *sbi, /* * Inline functions */ -static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc, - const void *address, unsigned int length) +static inline u32 __f2fs_crc32(u32 crc, const void *address, + unsigned int length) { - struct { - struct shash_desc shash; - char ctx[4]; - } desc; - int err; - - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) != sizeof(desc.ctx)); - - desc.shash.tfm = sbi->s_chksum_driver; - *(u32 *)desc.ctx = crc; - - err = crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; -} - -static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, - unsigned int length) -{ - return __f2fs_crc32(sbi, F2FS_SUPER_MAGIC, address, length); + return crc32(crc, address, length); } -static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc, - void *buf, size_t buf_size) +static inline u32 f2fs_crc32(const void *address, unsigned int length) { - return f2fs_crc32(sbi, buf, buf_size) == blk_crc; + return __f2fs_crc32(F2FS_SUPER_MAGIC, address, length); } -static inline u32 f2fs_chksum(struct f2fs_sb_info *sbi, u32 crc, - const void *address, unsigned int length) +static inline u32 f2fs_chksum(u32 crc, const void *address, unsigned int length) { - return __f2fs_crc32(sbi, crc, address, length); + return __f2fs_crc32(crc, address, length); } static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) @@ -1998,9 +2013,14 @@ static inline struct f2fs_sb_info *F2FS_M_SB(struct address_space *mapping) return F2FS_I_SB(mapping->host); } +static inline struct f2fs_sb_info *F2FS_F_SB(struct folio *folio) +{ + return F2FS_M_SB(folio->mapping); +} + static inline struct f2fs_sb_info *F2FS_P_SB(struct page *page) { - return F2FS_M_SB(page_file_mapping(page)); + return F2FS_F_SB(page_folio(page)); } static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) @@ -2023,7 +2043,7 @@ static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) return (struct f2fs_checkpoint *)(sbi->ckpt); } -static inline struct f2fs_node *F2FS_NODE(struct page *page) +static inline struct f2fs_node *F2FS_NODE(const struct page *page) { return (struct f2fs_node *)page_address(page); } @@ -2068,6 +2088,16 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) return sbi->node_inode->i_mapping; } +static inline bool is_meta_folio(struct folio *folio) +{ + return folio->mapping == META_MAPPING(F2FS_F_SB(folio)); +} + +static inline bool is_node_folio(struct folio *folio) +{ + return folio->mapping == NODE_MAPPING(F2FS_F_SB(folio)); +} + static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) { return test_bit(type, &sbi->s_flag); @@ -2227,6 +2257,36 @@ static inline void f2fs_up_write(struct f2fs_rwsem *sem) #endif } +static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) +{ + unsigned long flags; + unsigned char *nat_bits; + + /* + * In order to re-enable nat_bits we need to call fsck.f2fs by + * set_sbi_flag(sbi, SBI_NEED_FSCK). But it may give huge cost, + * so let's rely on regular fsck or unclean shutdown. + */ + + if (lock) + spin_lock_irqsave(&sbi->cp_lock, flags); + __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG); + nat_bits = NM_I(sbi)->nat_bits; + NM_I(sbi)->nat_bits = NULL; + if (lock) + spin_unlock_irqrestore(&sbi->cp_lock, flags); + + kvfree(nat_bits); +} + +static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, + struct cp_control *cpc) +{ + bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); + + return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set; +} + static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { f2fs_down_read(&sbi->cp_rwsem); @@ -2474,8 +2534,14 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t sectors = count << F2FS_LOG_SECTORS_PER_BLOCK; spin_lock(&sbi->stat_lock); - f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); - sbi->total_valid_block_count -= (block_t)count; + if (unlikely(sbi->total_valid_block_count < count)) { + f2fs_warn(sbi, "Inconsistent total_valid_block_count:%u, ino:%lu, count:%u", + sbi->total_valid_block_count, inode->i_ino, count); + sbi->total_valid_block_count = 0; + set_sbi_flag(sbi, SBI_NEED_FSCK); + } else { + sbi->total_valid_block_count -= count; + } if (sbi->reserved_blocks && sbi->current_reserved_blocks < sbi->reserved_blocks) sbi->current_reserved_blocks = min(sbi->reserved_blocks, @@ -2773,33 +2839,46 @@ static inline s64 valid_inode_count(struct f2fs_sb_info *sbi) return percpu_counter_sum_positive(&sbi->total_valid_inode_count); } -static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, - pgoff_t index, bool for_write) +static inline struct folio *f2fs_grab_cache_folio(struct address_space *mapping, + pgoff_t index, bool for_write) { - struct page *page; + struct folio *folio; unsigned int flags; if (IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION)) { + fgf_t fgf_flags; + if (!for_write) - page = find_get_page_flags(mapping, index, - FGP_LOCK | FGP_ACCESSED); + fgf_flags = FGP_LOCK | FGP_ACCESSED; else - page = find_lock_page(mapping, index); - if (page) - return page; + fgf_flags = FGP_LOCK; + folio = __filemap_get_folio(mapping, index, fgf_flags, 0); + if (!IS_ERR(folio)) + return folio; if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) - return NULL; + return ERR_PTR(-ENOMEM); } if (!for_write) - return grab_cache_page(mapping, index); + return filemap_grab_folio(mapping, index); flags = memalloc_nofs_save(); - page = grab_cache_page_write_begin(mapping, index); + folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, + mapping_gfp_mask(mapping)); memalloc_nofs_restore(flags); - return page; + return folio; +} + +static inline struct folio *f2fs_filemap_get_folio( + struct address_space *mapping, pgoff_t index, + fgf_t fgp_flags, gfp_t gfp_mask) +{ + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) + return ERR_PTR(-ENOMEM); + + return __filemap_get_folio(mapping, index, fgp_flags, gfp_mask); } static inline struct page *f2fs_pagecache_get_page( @@ -2812,26 +2891,33 @@ static inline struct page *f2fs_pagecache_get_page( return pagecache_get_page(mapping, index, fgp_flags, gfp_mask); } -static inline void f2fs_put_page(struct page *page, int unlock) +static inline void f2fs_folio_put(struct folio *folio, bool unlock) { - if (!page) + if (IS_ERR_OR_NULL(folio)) return; if (unlock) { - f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page)); - unlock_page(page); + f2fs_bug_on(F2FS_F_SB(folio), !folio_test_locked(folio)); + folio_unlock(folio); } - put_page(page); + folio_put(folio); +} + +static inline void f2fs_put_page(struct page *page, int unlock) +{ + if (!page) + return; + f2fs_folio_put(page_folio(page), unlock); } static inline void f2fs_put_dnode(struct dnode_of_data *dn) { - if (dn->node_page) - f2fs_put_page(dn->node_page, 1); - if (dn->inode_page && dn->node_page != dn->inode_page) - f2fs_put_page(dn->inode_page, 0); - dn->node_page = NULL; - dn->inode_page = NULL; + if (dn->node_folio) + f2fs_folio_put(dn->node_folio, true); + if (dn->inode_folio && dn->node_folio != dn->inode_folio) + f2fs_folio_put(dn->inode_folio, false); + dn->node_folio = NULL; + dn->inode_folio = NULL; } static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, @@ -2955,21 +3041,21 @@ static inline unsigned int get_dnode_base(struct inode *inode, } static inline __le32 *get_dnode_addr(struct inode *inode, - struct page *node_page) + struct folio *node_folio) { - return blkaddr_in_node(F2FS_NODE(node_page)) + - get_dnode_base(inode, node_page); + return blkaddr_in_node(F2FS_NODE(&node_folio->page)) + + get_dnode_base(inode, &node_folio->page); } static inline block_t data_blkaddr(struct inode *inode, - struct page *node_page, unsigned int offset) + struct folio *node_folio, unsigned int offset) { - return le32_to_cpu(*(get_dnode_addr(inode, node_page) + offset)); + return le32_to_cpu(*(get_dnode_addr(inode, node_folio) + offset)); } static inline block_t f2fs_data_blkaddr(struct dnode_of_data *dn) { - return data_blkaddr(dn->inode, dn->node_page, dn->ofs_in_node); + return data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node); } static inline int f2fs_test_bit(unsigned int nr, char *addr) @@ -3046,6 +3132,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ #define F2FS_CASEFOLD_FL 0x40000000 /* Casefolded file */ +#define F2FS_DEVICE_ALIAS_FL 0x80000000 /* File for aliasing a device */ #define F2FS_QUOTA_DEFAULT_FL (F2FS_NOATIME_FL | F2FS_IMMUTABLE_FL) @@ -3061,6 +3148,8 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that are appropriate for non-directories/regular files. */ #define F2FS_OTHER_FLMASK (F2FS_NODUMP_FL | F2FS_NOATIME_FL) +#define IS_DEVICE_ALIASING(inode) (F2FS_I(inode)->i_flags & F2FS_DEVICE_ALIAS_FL) + static inline __u32 f2fs_mask_flags(umode_t mode, __u32 flags) { if (S_ISDIR(mode)) @@ -3277,9 +3366,9 @@ static inline unsigned int addrs_per_page(struct inode *inode, return addrs; } -static inline void *inline_xattr_addr(struct inode *inode, struct page *page) +static inline void *inline_xattr_addr(struct inode *inode, struct folio *folio) { - struct f2fs_inode *ri = F2FS_INODE(page); + struct f2fs_inode *ri = F2FS_INODE(&folio->page); return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - get_inline_xattr_addrs(inode)]); @@ -3294,7 +3383,7 @@ static inline int inline_xattr_size(struct inode *inode) /* * Notice: check inline_data flag without inode page lock is unsafe. - * It could change at any time by f2fs_convert_inline_page(). + * It could change at any time by f2fs_convert_inline_folio(). */ static inline int f2fs_has_inline_data(struct inode *inode) { @@ -3326,9 +3415,9 @@ static inline bool f2fs_is_cow_file(struct inode *inode) return is_inode_flag_set(inode, FI_COW_FILE); } -static inline void *inline_data_addr(struct inode *inode, struct page *page) +static inline void *inline_data_addr(struct inode *inode, struct folio *folio) { - __le32 *addr = get_dnode_addr(inode, page); + __le32 *addr = get_dnode_addr(inode, folio); return (void *)(addr + DEF_INLINE_RESERVED_SIZE); } @@ -3454,6 +3543,14 @@ static inline void *f2fs_kvzalloc(struct f2fs_sb_info *sbi, return f2fs_kvmalloc(sbi, size, flags | __GFP_ZERO); } +static inline void *f2fs_vmalloc(struct f2fs_sb_info *sbi, size_t size) +{ + if (time_to_inject(sbi, FAULT_VMALLOC)) + return NULL; + + return vmalloc(size); +} + static inline int get_extra_isize(struct inode *inode) { return F2FS_I(inode)->i_extra_isize / sizeof(__le32); @@ -3530,12 +3627,12 @@ int f2fs_pin_file_control(struct inode *inode, bool inc); * inode.c */ void f2fs_set_inode_flags(struct inode *inode); -bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page); +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_inode_chksum_set(struct f2fs_sb_info *sbi, struct page *page); struct inode *f2fs_iget(struct super_block *sb, unsigned long ino); struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino); int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink); -void f2fs_update_inode(struct inode *inode, struct page *node_page); +void f2fs_update_inode(struct inode *inode, struct folio *node_folio); void f2fs_update_inode_page(struct inode *inode); int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc); void f2fs_evict_inode(struct inode *inode); @@ -3575,28 +3672,28 @@ int f2fs_prepare_lookup(struct inode *dir, struct dentry *dentry, struct f2fs_filename *fname); void f2fs_free_filename(struct f2fs_filename *fname); struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d, - const struct f2fs_filename *fname, int *max_slots); + const struct f2fs_filename *fname, int *max_slots, + bool use_hash); int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, unsigned int start_pos, struct fscrypt_str *fstr); void f2fs_do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d); -struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, - const struct f2fs_filename *fname, struct page *dpage); +struct folio *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir, + const struct f2fs_filename *fname, struct folio *dfolio); void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth); int f2fs_room_for_filename(const void *bitmap, int slots, int max_slots); void f2fs_drop_nlink(struct inode *dir, struct inode *inode); struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, - const struct f2fs_filename *fname, - struct page **res_page); + const struct f2fs_filename *fname, struct folio **res_folio); struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - const struct qstr *child, struct page **res_page); -struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p); + const struct qstr *child, struct folio **res_folio); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct folio **f); ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, - struct page **page); + struct folio **folio); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, - struct page *page, struct inode *inode); -bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct folio *folio, struct inode *inode); +bool f2fs_has_enough_room(struct inode *dir, struct folio *ifolio, const struct f2fs_filename *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct fscrypt_str *name, f2fs_hash_t name_hash, @@ -3607,7 +3704,7 @@ int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); int f2fs_do_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode); -void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, +void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct folio *folio, struct inode *dir, struct inode *inode); int f2fs_do_tmpfile(struct inode *inode, struct inode *dir, struct f2fs_filename *fname); @@ -3628,12 +3725,11 @@ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); int f2fs_dquot_initialize(struct inode *inode); int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); -int f2fs_quota_sync(struct super_block *sb, int type); +int f2fs_do_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); void f2fs_quota_off_umount(struct super_block *sb); void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag); -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, - bool irq_context); +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason); void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error); void f2fs_handle_error_async(struct f2fs_sb_info *sbi, unsigned char error); int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover); @@ -3652,9 +3748,9 @@ struct node_info; int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type); -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page); +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi); -void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page); +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio); void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi); int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); @@ -3667,14 +3763,14 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); int f2fs_truncate_xattr_node(struct inode *inode); int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, unsigned int seq_id); -bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi); int f2fs_remove_inode_page(struct inode *inode); -struct page *f2fs_new_inode_page(struct inode *inode); -struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs); +struct folio *f2fs_new_inode_folio(struct inode *inode); +struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs); void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid); -struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid); -struct page *f2fs_get_node_page_ra(struct page *parent, int start); -int f2fs_move_node_page(struct page *node_page, int gc_type); +struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid); +struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino); +struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid); +int f2fs_move_node_folio(struct folio *node_folio, int gc_type); void f2fs_flush_inline_data(struct f2fs_sb_info *sbi); int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, @@ -3687,12 +3783,11 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid); void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid); void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid); int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink); -int f2fs_recover_inline_xattr(struct inode *inode, struct page *page); +int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio); int f2fs_recover_xattr_data(struct inode *inode, struct page *page); int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page); int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, unsigned int segno, struct f2fs_summary_block *sum); -void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi); int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); int f2fs_build_node_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi); @@ -3711,7 +3806,8 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi); int f2fs_flush_device_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len); bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); int f2fs_start_discard_thread(struct f2fs_sb_info *sbi); void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi); @@ -3737,7 +3833,7 @@ int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); -struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno); +struct folio *f2fs_get_sum_folio(struct f2fs_sb_info *sbi, unsigned int segno); void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio, @@ -3754,15 +3850,18 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, block_t old_addr, block_t new_addr, unsigned char version, bool recover_curseg, bool recover_newaddr); -int f2fs_get_segment_temp(int seg_type); +enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, + enum log_type seg_type); int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, block_t blkaddr, unsigned int blkcnt); -void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered, bool locked); +void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, + bool ordered, bool locked); +#define f2fs_wait_on_page_writeback(page, type, ordered, locked) \ + f2fs_folio_wait_writeback(page_folio(page), type, ordered, locked) void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr, block_t len); @@ -3771,8 +3870,7 @@ void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk); int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, unsigned int val, int alloc); void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc); -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi); -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi); +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi); int f2fs_build_segment_manager(struct f2fs_sb_info *sbi); void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi); int __init f2fs_create_segment_manager_caches(void); @@ -3783,6 +3881,13 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno); + +static inline struct inode *fio_inode(struct f2fs_io_info *fio) +{ + return page_folio(fio->page)->mapping->host; +} #define DEF_FRAGMENT_SIZE 4 #define MIN_FRAGMENT_SIZE 1 @@ -3800,10 +3905,10 @@ static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi) void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io, unsigned char reason); void f2fs_flush_ckpt_thread(struct f2fs_sb_info *sbi); -struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index); -struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_grab_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_meta_folio(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_meta_folio_retry(struct f2fs_sb_info *sbi, pgoff_t index); +struct folio *f2fs_get_tmp_folio(struct f2fs_sb_info *sbi, pgoff_t index); bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); bool f2fs_is_valid_blkaddr_raw(struct f2fs_sb_info *sbi, @@ -3859,7 +3964,7 @@ void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, nid_t ino, enum page_type type); void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, - struct bio **bio, struct page *page); + struct bio **bio, struct folio *folio); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); @@ -3873,14 +3978,14 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); -struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, - blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); -struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, - pgoff_t *next_pgofs); -struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, +struct folio *f2fs_get_read_data_folio(struct inode *inode, pgoff_t index, + blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs); +struct folio *f2fs_find_data_folio(struct inode *inode, pgoff_t index, + pgoff_t *next_pgofs); +struct folio *f2fs_get_lock_data_folio(struct inode *inode, pgoff_t index, bool for_write); -struct page *f2fs_get_new_data_page(struct inode *inode, - struct page *ipage, pgoff_t index, bool new_i_size); +struct folio *f2fs_get_new_data_folio(struct inode *inode, + struct folio *ifolio, pgoff_t index, bool new_i_size); int f2fs_do_write_data_page(struct f2fs_io_info *fio); int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, @@ -3935,6 +4040,19 @@ void f2fs_destroy_recovery_cache(void); * debug.c */ #ifdef CONFIG_F2FS_STAT_FS +enum { + DEVSTAT_INUSE, + DEVSTAT_DIRTY, + DEVSTAT_FULL, + DEVSTAT_FREE, + DEVSTAT_PREFREE, + DEVSTAT_MAX, +}; + +struct f2fs_dev_stats { + unsigned int devstats[2][DEVSTAT_MAX]; /* 0: segs, 1: secs */ +}; + struct f2fs_stat_info { struct list_head stat_list; struct f2fs_sb_info *sbi; @@ -3955,7 +4073,8 @@ struct f2fs_stat_info { unsigned long long allocated_data_blocks; int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta; int ndirty_data, ndirty_qdata; - unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all; + unsigned int ndirty_dirs, ndirty_files, ndirty_all; + unsigned int nquota_files, ndonate_files; int nats, dirty_nats, sits, dirty_sits; int free_nids, avail_nids, alloc_nids; int total_count, utilization; @@ -3998,6 +4117,7 @@ struct f2fs_stat_info { unsigned int block_count[2]; unsigned int inplace_count; unsigned long long base_mem, cache_mem, page_mem; + struct f2fs_dev_stats *dev_stats; }; static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) @@ -4185,25 +4305,24 @@ extern struct kmem_cache *f2fs_inode_entry_slab; bool f2fs_may_inline_data(struct inode *inode); bool f2fs_sanity_check_inline_data(struct inode *inode, struct page *ipage); bool f2fs_may_inline_dentry(struct inode *inode); -void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage); -void f2fs_truncate_inline_inode(struct inode *inode, - struct page *ipage, u64 from); +void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio); +void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, + u64 from); int f2fs_read_inline_data(struct inode *inode, struct folio *folio); -int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); +int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio); int f2fs_convert_inline_inode(struct inode *inode); int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); int f2fs_write_inline_data(struct inode *inode, struct folio *folio); -int f2fs_recover_inline_data(struct inode *inode, struct page *npage); +int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, - const struct f2fs_filename *fname, - struct page **res_page); + const struct f2fs_filename *fname, struct folio **res_folio, + bool use_hash); int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, - struct page *ipage); + struct folio *ifolio); int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, - struct page *page, struct inode *dir, - struct inode *inode); + struct folio *folio, struct inode *dir, struct inode *inode); bool f2fs_empty_inline_dir(struct inode *dir); int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr); @@ -4218,6 +4337,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, struct shrink_control *sc); unsigned long f2fs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc); +unsigned int f2fs_donate_files(void); +void f2fs_reclaim_caches(unsigned int reclaim_caches_kb); void f2fs_join_shrinker(struct f2fs_sb_info *sbi); void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); @@ -4234,7 +4355,7 @@ int __init f2fs_create_extent_cache(void); void f2fs_destroy_extent_cache(void); /* read extent cache ops */ -void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage); +void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio); bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs, struct extent_info *ei); bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index, @@ -4315,7 +4436,7 @@ enum cluster_check_type { CLUSTER_RAW_BLKS /* return # of raw blocks in a cluster */ }; bool f2fs_is_compressed_page(struct page *page); -struct page *f2fs_compress_control_page(struct page *page); +struct folio *f2fs_compress_control_folio(struct folio *folio); int f2fs_prepare_compress_overwrite(struct inode *inode, struct page **pagep, pgoff_t index, void **fsdata); bool f2fs_compress_write_end(struct inode *inode, void *fsdata, @@ -4350,7 +4471,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed, bool in_task); -void f2fs_put_page_dic(struct page *page, bool in_task); +void f2fs_put_folio_dic(struct folio *folio, bool in_task); unsigned int f2fs_cluster_blocks_are_contiguous(struct dnode_of_data *dn, unsigned int ofs_in_node); int f2fs_init_compress_ctx(struct compress_ctx *cc); @@ -4363,10 +4484,11 @@ void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); int __init f2fs_init_compress_cache(void); void f2fs_destroy_compress_cache(void); struct address_space *COMPRESS_MAPPING(struct f2fs_sb_info *sbi); -void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len); void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, nid_t ino, block_t blkaddr); -bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, struct page *page, +bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, struct folio *folio, block_t blkaddr); void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino); #define inc_compr_inode_stat(inode) \ @@ -4391,7 +4513,7 @@ static inline bool f2fs_is_compress_backend_ready(struct inode *inode) return false; } static inline bool f2fs_is_compress_level_valid(int alg, int lvl) { return false; } -static inline struct page *f2fs_compress_control_page(struct page *page) +static inline struct folio *f2fs_compress_control_folio(struct folio *folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); @@ -4405,7 +4527,7 @@ static inline void f2fs_end_read_compressed_page(struct page *page, { WARN_ON_ONCE(1); } -static inline void f2fs_put_page_dic(struct page *page, bool in_task) +static inline void f2fs_put_folio_dic(struct folio *folio, bool in_task) { WARN_ON_ONCE(1); } @@ -4418,12 +4540,12 @@ static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { } static inline int __init f2fs_init_compress_cache(void) { return 0; } static inline void f2fs_destroy_compress_cache(void) { } -static inline void f2fs_invalidate_compress_page(struct f2fs_sb_info *sbi, - block_t blkaddr) { } +static inline void f2fs_invalidate_compress_pages_range(struct f2fs_sb_info *sbi, + block_t blkaddr, unsigned int len) { } static inline void f2fs_cache_compressed_page(struct f2fs_sb_info *sbi, struct page *page, nid_t ino, block_t blkaddr) { } -static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi, - struct page *page, block_t blkaddr) { return false; } +static inline bool f2fs_load_compressed_folio(struct f2fs_sb_info *sbi, + struct folio *folio, block_t blkaddr) { return false; } static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi, nid_t ino) { } #define inc_compr_inode_stat(inode) do { } while (0) @@ -4510,14 +4632,19 @@ F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); F2FS_FEATURE_FUNCS(compression, COMPRESSION); F2FS_FEATURE_FUNCS(readonly, RO); +F2FS_FEATURE_FUNCS(device_alias, DEVICE_ALIAS); #ifdef CONFIG_BLK_DEV_ZONED -static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, - block_t blkaddr) +static inline bool f2fs_zone_is_seq(struct f2fs_sb_info *sbi, int devi, + unsigned int zone) { - unsigned int zno = blkaddr / sbi->blocks_per_blkz; + return test_bit(zone, FDEV(devi).blkz_seq); +} - return test_bit(zno, FDEV(devi).blkz_seq); +static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, + block_t blkaddr) +{ + return f2fs_zone_is_seq(sbi, devi, blkaddr / sbi->blocks_per_blkz); } #endif @@ -4589,15 +4716,31 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } -static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, +static inline bool f2fs_is_sequential_zone_area(struct f2fs_sb_info *sbi, block_t blkaddr) { if (f2fs_sb_has_blkzoned(sbi)) { +#ifdef CONFIG_BLK_DEV_ZONED int devi = f2fs_target_device_index(sbi, blkaddr); - return !bdev_is_zoned(FDEV(devi).bdev); + if (!bdev_is_zoned(FDEV(devi).bdev)) + return false; + + if (f2fs_is_multi_device(sbi)) { + if (blkaddr < FDEV(devi).start_blk || + blkaddr > FDEV(devi).end_blk) { + f2fs_err(sbi, "Invalid block %x", blkaddr); + return false; + } + blkaddr -= FDEV(devi).start_blk; + } + + return f2fs_blkz_is_seq(sbi, devi, blkaddr); +#else + return false; +#endif } - return true; + return false; } static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) @@ -4652,10 +4795,11 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx) #ifdef CONFIG_F2FS_FAULT_INJECTION extern int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, - unsigned long type); + unsigned long type, enum fault_option fo); #else static inline int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, - unsigned long rate, unsigned long type) + unsigned long rate, unsigned long type, + enum fault_option fo) { return 0; } @@ -4685,6 +4829,19 @@ static inline void f2fs_io_schedule_timeout(long timeout) io_schedule_timeout(timeout); } +static inline void f2fs_io_schedule_timeout_killable(long timeout) +{ + while (timeout) { + if (fatal_signal_pending(current)) + return; + set_current_state(TASK_UNINTERRUPTIBLE); + io_schedule_timeout(DEFAULT_IO_TIMEOUT); + if (timeout <= DEFAULT_IO_TIMEOUT) + return; + timeout -= DEFAULT_IO_TIMEOUT; + } +} + static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, struct folio *folio, enum page_type type) { @@ -4714,13 +4871,13 @@ static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, int i = 0; do { - struct page *page; + struct folio *folio; - page = find_get_page(META_MAPPING(sbi), blkaddr + i); - if (page) { - if (folio_test_writeback(page_folio(page))) + folio = filemap_get_folio(META_MAPPING(sbi), blkaddr + i); + if (!IS_ERR(folio)) { + if (folio_test_writeback(folio)) need_submit = true; - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); } } while (++i < cnt && !need_submit); @@ -4734,10 +4891,10 @@ static inline void f2fs_truncate_meta_inode_pages(struct f2fs_sb_info *sbi, } static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, - block_t blkaddr) + block_t blkaddr, unsigned int len) { - f2fs_truncate_meta_inode_pages(sbi, blkaddr, 1); - f2fs_invalidate_compress_page(sbi, blkaddr); + f2fs_truncate_meta_inode_pages(sbi, blkaddr, len); + f2fs_invalidate_compress_pages_range(sbi, blkaddr, len); } #define EFSBADCRC EBADMSG /* Bad CRC detected */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 321d8ffbab6e..696131e655ed 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,6 +35,17 @@ #include <trace/events/f2fs.h> #include <uapi/linux/f2fs.h> +static void f2fs_zero_post_eof_page(struct inode *inode, loff_t new_size) +{ + loff_t old_size = i_size_read(inode); + + if (old_size >= new_size) + return; + + /* zero or drop pages only in range of [old_size, new_size] */ + truncate_pagecache(inode, old_size); +} + static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); @@ -103,8 +114,13 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, (folio->index + 1) << PAGE_SHIFT); + filemap_invalidate_unlock(inode->i_mapping); + file_update_time(vmf->vma->vm_file); filemap_invalidate_lock_shared(inode->i_mapping); + folio_lock(folio); if (unlikely(folio->mapping != inode->i_mapping || folio_pos(folio) > i_size_read(inode) || @@ -131,7 +147,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } - f2fs_wait_on_page_writeback(folio_page(folio, 0), DATA, false, true); + f2fs_folio_wait_writeback(folio, DATA, false, true); /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -226,12 +242,13 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + struct folio *i = filemap_get_folio(NODE_MAPPING(sbi), ino); bool ret = false; /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || f2fs_need_inode_block_update(sbi, ino)) + if ((!IS_ERR(i) && folio_test_dirty(i)) || + f2fs_need_inode_block_update(sbi, ino)) ret = true; - f2fs_put_page(i, 0); + f2fs_folio_put(i, false); return ret; } @@ -260,7 +277,6 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, - .for_reclaim = 0, }; unsigned int seq_id = 0; @@ -403,7 +419,7 @@ static bool __found_offset(struct address_space *mapping, bool compressed_cluster = false; if (f2fs_compressed_file(inode)) { - block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_page, + block_t first_blkaddr = data_blkaddr(dn->inode, dn->node_folio, ALIGN_DOWN(dn->ofs_in_node, F2FS_I(inode)->i_cluster_size)); compressed_cluster = first_blkaddr == COMPRESS_ADDR; @@ -473,7 +489,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) } } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; @@ -554,19 +570,21 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) static int finish_preallocate_blocks(struct inode *inode) { - int ret; + int ret = 0; + bool opened; - inode_lock(inode); - if (is_inode_flag_set(inode, FI_OPENED_FILE)) { - inode_unlock(inode); + f2fs_down_read(&F2FS_I(inode)->i_sem); + opened = is_inode_flag_set(inode, FI_OPENED_FILE); + f2fs_up_read(&F2FS_I(inode)->i_sem); + if (opened) return 0; - } - if (!file_should_truncate(inode)) { - set_inode_flag(inode, FI_OPENED_FILE); - inode_unlock(inode); - return 0; - } + inode_lock(inode); + if (is_inode_flag_set(inode, FI_OPENED_FILE)) + goto out_unlock; + + if (!file_should_truncate(inode)) + goto out_update; f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); @@ -576,16 +594,17 @@ static int finish_preallocate_blocks(struct inode *inode) filemap_invalidate_unlock(inode->i_mapping); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - - if (!ret) - set_inode_flag(inode, FI_OPENED_FILE); - - inode_unlock(inode); if (ret) - return ret; + goto out_unlock; file_dont_truncate(inode); - return 0; +out_update: + f2fs_down_write(&F2FS_I(inode)->i_sem); + set_inode_flag(inode, FI_OPENED_FILE); + f2fs_up_write(&F2FS_I(inode)->i_sem); +out_unlock: + inode_unlock(inode); + return ret; } static int f2fs_file_open(struct inode *inode, struct file *filp) @@ -621,8 +640,11 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) int cluster_index = 0, valid_blocks = 0; int cluster_size = F2FS_I(dn->inode)->i_cluster_size; bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks); + block_t blkstart; + int blklen = 0; - addr = get_dnode_addr(dn->inode, dn->node_page) + ofs; + addr = get_dnode_addr(dn->inode, dn->node_folio) + ofs; + blkstart = le32_to_cpu(*addr); /* Assumption: truncation starts with cluster */ for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { @@ -638,26 +660,44 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) } if (blkaddr == NULL_ADDR) - continue; + goto next; f2fs_set_data_blkaddr(dn, NULL_ADDR); if (__is_valid_data_blkaddr(blkaddr)) { if (time_to_inject(sbi, FAULT_BLKADDR_CONSISTENCE)) - continue; + goto next; if (!f2fs_is_valid_blkaddr_raw(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - continue; + goto next; if (compressed_cluster) valid_blocks++; } - f2fs_invalidate_blocks(sbi, blkaddr); + if (blkstart + blklen == blkaddr) { + blklen++; + } else { + f2fs_invalidate_blocks(sbi, blkstart, blklen); + blkstart = blkaddr; + blklen = 1; + } if (!released || blkaddr != COMPRESS_ADDR) nr_free++; + + continue; + +next: + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + + blkstart = le32_to_cpu(*(addr + 1)); + blklen = 0; } + if (blklen) + f2fs_invalidate_blocks(sbi, blkstart, blklen); + if (compressed_cluster) f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); @@ -667,7 +707,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) * once we invalidate valid blkaddr in range [ofs, ofs + count], * we will invalidate all blkaddr in the whole range. */ - fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), + fofs = f2fs_start_bidx_of_node(ofs_of_node(&dn->node_folio->page), dn->inode) + ofs; f2fs_update_read_extent_cache_range(dn, fofs, 0, len); f2fs_update_age_extent_cache_range(dn, fofs, len); @@ -686,31 +726,33 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, loff_t offset = from & (PAGE_SIZE - 1); pgoff_t index = from >> PAGE_SHIFT; struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; if (!offset && !cache_only) return 0; if (cache_only) { - page = find_lock_page(mapping, index); - if (page && PageUptodate(page)) + folio = filemap_lock_folio(mapping, index); + if (IS_ERR(folio)) + return 0; + if (folio_test_uptodate(folio)) goto truncate_out; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return 0; } - page = f2fs_get_lock_data_page(inode, index, true); - if (IS_ERR(page)) - return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, index, true); + if (IS_ERR(folio)) + return PTR_ERR(folio) == -ENOENT ? 0 : PTR_ERR(folio); truncate_out: - f2fs_wait_on_page_writeback(page, DATA, true, true); - zero_user(page, offset, PAGE_SIZE - offset); + f2fs_folio_wait_writeback(folio, DATA, true, true); + folio_zero_segment(folio, offset, folio_size(folio)); /* An encrypted inode should have a key and truncate the last page. */ f2fs_bug_on(F2FS_I_SB(inode), cache_only && IS_ENCRYPTED(inode)); if (!cache_only) - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); return 0; } @@ -720,11 +762,16 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; - struct page *ipage; + struct folio *ifolio; bool truncate_page = false; trace_f2fs_truncate_blocks_enter(inode, from); + if (IS_DEVICE_ALIASING(inode) && from) { + err = -EINVAL; + goto out_err; + } + free_from = (pgoff_t)F2FS_BLK_ALIGN(from); if (free_from >= max_file_blocks(inode)) @@ -733,20 +780,33 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) if (lock) f2fs_lock_op(sbi); - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); + goto out; + } + + if (IS_DEVICE_ALIASING(inode)) { + struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ]; + struct extent_info ei = et->largest; + + f2fs_invalidate_blocks(sbi, ei.blk, ei.len); + + dec_valid_block_count(sbi, inode, ei.len); + f2fs_update_time(sbi, REQ_TIME); + + f2fs_folio_put(ifolio, true); goto out; } if (f2fs_has_inline_data(inode)) { - f2fs_truncate_inline_inode(inode, ipage, from); - f2fs_put_page(ipage, 1); + f2fs_truncate_inline_inode(inode, ifolio, from); + f2fs_folio_put(ifolio, true); truncate_page = true; goto out; } - set_new_dnode(&dn, inode, ipage, NULL, 0); + set_new_dnode(&dn, inode, ifolio, NULL, 0); err = f2fs_get_dnode_of_data(&dn, free_from, LOOKUP_NODE_RA); if (err) { if (err == -ENOENT) @@ -754,12 +814,12 @@ int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock) goto out; } - count = ADDRS_PER_PAGE(dn.node_page, inode); + count = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count -= dn.ofs_in_node; f2fs_bug_on(sbi, count < 0); - if (dn.ofs_in_node || IS_INODE(dn.node_page)) { + if (dn.ofs_in_node || IS_INODE(&dn.node_folio->page)) { f2fs_truncate_data_blocks_range(&dn, count); free_from += count; } @@ -774,7 +834,7 @@ free_partial: /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from, truncate_page); - +out_err: trace_f2fs_truncate_blocks_exit(inode, err); return err; } @@ -863,7 +923,11 @@ static bool f2fs_force_buffered_io(struct inode *inode, int rw) return true; if (f2fs_compressed_file(inode)) return true; - if (f2fs_has_inline_data(inode)) + /* + * only force direct read to use buffered IO, for direct write, + * it expects inline data conversion before committing IO. + */ + if (f2fs_has_inline_data(inode) && rw == READ) return true; /* disallow direct IO if any of devices has unaligned blksize */ @@ -992,7 +1056,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, return -EPERM; if ((attr->ia_valid & ATTR_SIZE)) { - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || + IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && !IS_ALIGNED(attr->ia_size, @@ -1060,6 +1125,8 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, f2fs_down_write(&fi->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + if (attr->ia_size > old_size) + f2fs_zero_post_eof_page(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size); if (attr->ia_size <= old_size) @@ -1115,7 +1182,7 @@ static int fill_zero(struct inode *inode, pgoff_t index, loff_t start, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct folio *folio; if (!len) return 0; @@ -1123,16 +1190,16 @@ static int fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - page = f2fs_get_new_data_page(inode, NULL, index, false); + folio = f2fs_get_new_data_folio(inode, NULL, index, false); f2fs_unlock_op(sbi); - if (IS_ERR(page)) - return PTR_ERR(page); + if (IS_ERR(folio)) + return PTR_ERR(folio); - f2fs_wait_on_page_writeback(page, DATA, true, true); - zero_user(page, start, len); - set_page_dirty(page); - f2fs_put_page(page, 1); + f2fs_folio_wait_writeback(folio, DATA, true, true); + folio_zero_range(folio, start, len); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); return 0; } @@ -1155,7 +1222,7 @@ int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end) return err; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, pg_end - pg_start); f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset); @@ -1178,6 +1245,10 @@ static int f2fs_punch_hole(struct inode *inode, loff_t offset, loff_t len) if (ret) return ret; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1250,7 +1321,7 @@ next_dnode: goto next; } - done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - + done = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, inode) - dn.ofs_in_node, len); for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { *blkaddr = f2fs_data_blkaddr(&dn); @@ -1298,7 +1369,7 @@ static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, ret = f2fs_get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); if (ret) { dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, *blkaddr); + f2fs_invalidate_blocks(sbi, *blkaddr, 1); } else { f2fs_update_data_blkaddr(&dn, *blkaddr); } @@ -1339,7 +1410,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } ilen = min((pgoff_t) - ADDRS_PER_PAGE(dn.node_page, dst_inode) - + ADDRS_PER_PAGE(&dn.node_folio->page, dst_inode) - dn.ofs_in_node, len - i); do { dn.data_blkaddr = f2fs_data_blkaddr(&dn); @@ -1364,26 +1435,26 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_dnode(&dn); } else { - struct page *psrc, *pdst; + struct folio *fsrc, *fdst; - psrc = f2fs_get_lock_data_page(src_inode, + fsrc = f2fs_get_lock_data_folio(src_inode, src + i, true); - if (IS_ERR(psrc)) - return PTR_ERR(psrc); - pdst = f2fs_get_new_data_page(dst_inode, NULL, dst + i, + if (IS_ERR(fsrc)) + return PTR_ERR(fsrc); + fdst = f2fs_get_new_data_folio(dst_inode, NULL, dst + i, true); - if (IS_ERR(pdst)) { - f2fs_put_page(psrc, 1); - return PTR_ERR(pdst); + if (IS_ERR(fdst)) { + f2fs_folio_put(fsrc, true); + return PTR_ERR(fdst); } - f2fs_wait_on_page_writeback(pdst, DATA, true, true); + f2fs_folio_wait_writeback(fdst, DATA, true, true); - memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); - set_page_dirty(pdst); - set_page_private_gcing(pdst); - f2fs_put_page(pdst, 1); - f2fs_put_page(psrc, 1); + memcpy_folio(fdst, 0, fsrc, 0, PAGE_SIZE); + folio_mark_dirty(fdst); + set_page_private_gcing(&fdst->page); + f2fs_folio_put(fdst, true); + f2fs_folio_put(fsrc, true); ret = f2fs_truncate_hole(src_inode, src + i, src + i + 1); @@ -1461,6 +1532,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + f2fs_lock_op(sbi); f2fs_drop_extent_tree(inode); truncate_pagecache(inode, offset); @@ -1550,7 +1623,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start, break; } - f2fs_invalidate_blocks(sbi, dn->data_blkaddr); + f2fs_invalidate_blocks(sbi, dn->data_blkaddr, 1); f2fs_set_data_blkaddr(dn, NEW_ADDR); } @@ -1582,6 +1655,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; + filemap_invalidate_lock(mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(mapping); + pg_start = ((unsigned long long) offset) >> PAGE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT; @@ -1629,7 +1706,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, goto out; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); end = min(pg_end, end_offset - dn.ofs_in_node + index); ret = f2fs_do_zero_range(&dn, index, end); @@ -1713,6 +1790,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) /* avoid gc operation during block exchange */ f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(mapping); + + f2fs_zero_post_eof_page(inode, offset + len); truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { @@ -1770,6 +1849,10 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, if (err) return err; + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, offset + len); + filemap_invalidate_unlock(inode->i_mapping); + f2fs_balance_fs(sbi, true); pg_start = ((unsigned long long)offset) >> PAGE_SHIFT; @@ -1790,17 +1873,32 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, map.m_len = sec_blks; next_alloc: - if (has_not_enough_free_secs(sbi, 0, + f2fs_down_write(&sbi->pin_sem); + + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + if (has_not_enough_free_secs(sbi, 0, 0)) { + f2fs_up_write(&sbi->pin_sem); + err = -ENOSPC; + f2fs_warn_ratelimited(sbi, + "ino:%lu, start:%lu, end:%lu, need to trigger GC to " + "reclaim enough free segment when checkpoint is enabled", + inode->i_ino, pg_start, pg_end); + goto out_err; + } + } + + if (has_not_enough_free_secs(sbi, 0, f2fs_sb_has_blkzoned(sbi) ? + ZONED_PIN_SEC_REQUIRED_COUNT : GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { f2fs_down_write(&sbi->gc_lock); stat_inc_gc_call_count(sbi, FOREGROUND); err = f2fs_gc(sbi, &gc_control); - if (err && err != -ENODATA) + if (err && err != -ENODATA) { + f2fs_up_write(&sbi->pin_sem); goto out_err; + } } - f2fs_down_write(&sbi->pin_sem); - err = f2fs_allocate_pinning_section(sbi); if (err) { f2fs_up_write(&sbi->pin_sem); @@ -1860,7 +1958,7 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; - if (!f2fs_is_compress_backend_ready(inode)) + if (!f2fs_is_compress_backend_ready(inode) || IS_DEVICE_ALIASING(inode)) return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ @@ -2343,9 +2441,12 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, if (readonly) goto out; - /* grab sb->s_umount to avoid racing w/ remount() */ + /* + * grab sb->s_umount to avoid racing w/ remount() and other shutdown + * paths. + */ if (need_lock) - down_read(&sbi->sb->s_umount); + down_write(&sbi->sb->s_umount); f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); @@ -2354,7 +2455,7 @@ int f2fs_do_shutdown(struct f2fs_sb_info *sbi, unsigned int flag, clear_opt(sbi, DISCARD); if (need_lock) - up_read(&sbi->sb->s_umount); + up_write(&sbi->sb->s_umount); f2fs_update_time(sbi, REQ_TIME); out: @@ -2400,6 +2501,61 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg) return ret; } +static int f2fs_keep_noreuse_range(struct inode *inode, + loff_t offset, loff_t len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + u64 max_bytes = F2FS_BLK_TO_BYTES(max_file_blocks(inode)); + u64 start, end; + int ret = 0; + + if (!S_ISREG(inode->i_mode)) + return 0; + + if (offset >= max_bytes || len > max_bytes || + (offset + len) > max_bytes) + return 0; + + start = offset >> PAGE_SHIFT; + end = DIV_ROUND_UP(offset + len, PAGE_SIZE); + + inode_lock(inode); + if (f2fs_is_atomic_file(inode)) { + inode_unlock(inode); + return 0; + } + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + /* let's remove the range, if len = 0 */ + if (!len) { + if (!list_empty(&F2FS_I(inode)->gdonate_list)) { + list_del_init(&F2FS_I(inode)->gdonate_list); + sbi->donate_files--; + if (is_inode_flag_set(inode, FI_DONATE_FINISHED)) + ret = -EALREADY; + else + set_inode_flag(inode, FI_DONATE_FINISHED); + } else + ret = -ENOENT; + } else { + if (list_empty(&F2FS_I(inode)->gdonate_list)) { + list_add_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + sbi->donate_files++; + } else { + list_move_tail(&F2FS_I(inode)->gdonate_list, + &sbi->inode_list[DONATE_INODE]); + } + F2FS_I(inode)->donate_start = start; + F2FS_I(inode)->donate_end = end - 1; + clear_inode_flag(inode, FI_DONATE_FINISHED); + } + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + inode_unlock(inode); + + return ret; +} + static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -2810,19 +2966,19 @@ do_map: idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < BLKS_PER_SEG(sbi)) { - struct page *page; + struct folio *folio; - page = f2fs_get_lock_data_page(inode, idx, true); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, idx, true); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto clear_out; } - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); - set_page_dirty(page); - set_page_private_gcing(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + set_page_private_gcing(&folio->page); + f2fs_folio_put(folio, true); idx++; cnt++; @@ -2861,7 +3017,7 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) + if (!S_ISREG(inode->i_mode)) return -EINVAL; if (f2fs_readonly(sbi->sb)) @@ -3038,32 +3194,27 @@ out: static int __f2fs_ioc_move_range(struct file *filp, struct f2fs_move_range *range) { - struct fd dst; int err; if (!(filp->f_mode & FMODE_READ) || !(filp->f_mode & FMODE_WRITE)) return -EBADF; - dst = fdget(range->dst_fd); - if (!fd_file(dst)) + CLASS(fd, dst)(range->dst_fd); + if (fd_empty(dst)) return -EBADF; - if (!(fd_file(dst)->f_mode & FMODE_WRITE)) { - err = -EBADF; - goto err_out; - } + if (!(fd_file(dst)->f_mode & FMODE_WRITE)) + return -EBADF; err = mnt_want_write_file(filp); if (err) - goto err_out; + return err; err = f2fs_move_file_range(filp, range->pos_in, fd_file(dst), range->pos_out, range->len); mnt_drop_write_file(filp); -err_out: - fdput(dst); return err; } @@ -3296,6 +3447,9 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + if (IS_DEVICE_ALIASING(inode)) + return -EINVAL; + if (fi->i_gc_failures >= sbi->gc_pin_file_threshold) { f2fs_warn(sbi, "%s: Enable GC = ino %lx after %x GC trials", __func__, inode->i_ino, fi->i_gc_failures); @@ -3326,6 +3480,9 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (f2fs_readonly(sbi->sb)) return -EROFS; + if (!pin && IS_DEVICE_ALIASING(inode)) + return -EOPNOTSUPP; + ret = mnt_want_write_file(filp); if (ret) return ret; @@ -3391,6 +3548,29 @@ static int f2fs_ioc_get_pin_file(struct file *filp, unsigned long arg) return put_user(pin, (u32 __user *)arg); } +static int f2fs_ioc_get_dev_alias_file(struct file *filp, unsigned long arg) +{ + return put_user(IS_DEVICE_ALIASING(file_inode(filp)) ? 1 : 0, + (u32 __user *)arg); +} + +static int f2fs_ioc_io_prio(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + __u32 level; + + if (get_user(level, (__u32 __user *)arg)) + return -EFAULT; + + if (!S_ISREG(inode->i_mode) || level >= F2FS_IOPRIO_MAX) + return -EINVAL; + + inode_lock(inode); + F2FS_I(inode)->ioprio_hint = level; + inode_unlock(inode); + return 0; +} + int f2fs_precache_extents(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); @@ -3577,7 +3757,7 @@ static int release_compress_blocks(struct dnode_of_data *dn, pgoff_t count) int i; for (i = 0; i < count; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -3695,7 +3875,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -3746,7 +3926,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, int i; for (i = 0; i < count; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (!__is_valid_data_blkaddr(blkaddr)) @@ -3763,7 +3943,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, int ret; for (i = 0; i < cluster_size; i++) { - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, dn->ofs_in_node + i); if (i == 0) { @@ -3792,7 +3972,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, to_reserved = cluster_size - compr_blocks - reserved; /* for the case all blocks in cluster were reserved */ - if (to_reserved == 1) { + if (reserved && to_reserved == 1) { dn->ofs_in_node += cluster_size; goto next; } @@ -3873,7 +4053,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) break; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, last_idx - page_idx); count = round_up(count, fi->i_cluster_size); @@ -4037,7 +4217,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) goto out; } - end_offset = ADDRS_PER_PAGE(dn.node_page, inode); + end_offset = ADDRS_PER_PAGE(&dn.node_folio->page, inode); count = min(end_offset - dn.ofs_in_node, pg_end - index); for (i = 0; i < count; i++, index++, dn.ofs_in_node++) { struct block_device *cur_bdev; @@ -4209,34 +4389,36 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) { DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, page_idx); struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; pgoff_t redirty_idx = page_idx; - int i, page_len = 0, ret = 0; + int page_len = 0, ret = 0; page_cache_ra_unbounded(&ractl, len, 0); - for (i = 0; i < len; i++, page_idx++) { - page = read_cache_page(mapping, page_idx, NULL, NULL); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + do { + folio = read_cache_folio(mapping, page_idx, NULL, NULL); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; } - page_len++; - } + page_len += folio_nr_pages(folio) - (page_idx - folio->index); + page_idx = folio_next_index(folio); + } while (page_len < len); - for (i = 0; i < page_len; i++, redirty_idx++) { - page = find_lock_page(mapping, redirty_idx); + do { + folio = filemap_lock_folio(mapping, redirty_idx); - /* It will never fail, when page has pinned above */ - f2fs_bug_on(F2FS_I_SB(inode), !page); + /* It will never fail, when folio has pinned above */ + f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(folio)); - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); - set_page_dirty(page); - set_page_private_gcing(page); - f2fs_put_page(page, 1); - f2fs_put_page(page, 0); - } + folio_mark_dirty(folio); + set_page_private_gcing(&folio->page); + redirty_idx = folio_next_index(folio); + folio_unlock(folio); + folio_put_refs(folio, 2); + } while (redirty_idx < page_idx); return ret; } @@ -4490,6 +4672,10 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_decompress_file(filp); case F2FS_IOC_COMPRESS_FILE: return f2fs_ioc_compress_file(filp); + case F2FS_IOC_GET_DEV_ALIAS_FILE: + return f2fs_ioc_get_dev_alias_file(filp, arg); + case F2FS_IOC_IO_PRIO: + return f2fs_ioc_io_prio(filp, arg); default: return -ENOTTY; } @@ -4708,6 +4894,10 @@ static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) err = file_modified(file); if (err) return err; + + filemap_invalidate_lock(inode->i_mapping); + f2fs_zero_post_eof_page(inode, iocb->ki_pos + iov_iter_count(from)); + filemap_invalidate_unlock(inode->i_mapping); return count; } @@ -4765,7 +4955,8 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, else return 0; - map.m_may_create = true; + if (!IS_DEVICE_ALIASING(inode)) + map.m_may_create = true; if (dio) { map.m_seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); @@ -4821,8 +5012,8 @@ static void f2fs_dio_write_submit_io(const struct iomap_iter *iter, { struct inode *inode = iter->inode; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int seg_type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); - enum temp_type temp = f2fs_get_segment_temp(seg_type); + enum log_type type = f2fs_rw_hint_to_seg_type(sbi, inode->i_write_hint); + enum temp_type temp = f2fs_get_segment_temp(sbi, type); bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, DATA, temp); submit_bio(bio); @@ -5089,11 +5280,15 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, } err = generic_fadvise(filp, offset, len, advice); - if (!err && advice == POSIX_FADV_DONTNEED && - test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && - f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + if (err) + return err; + if (advice == POSIX_FADV_DONTNEED && + (test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && + f2fs_compressed_file(inode))) + f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + else if (advice == POSIX_FADV_NOREUSE) + err = f2fs_keep_noreuse_range(inode, offset, len); return err; } @@ -5202,6 +5397,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_SET_COMPRESS_OPTION: case F2FS_IOC_DECOMPRESS_FILE: case F2FS_IOC_COMPRESS_FILE: + case F2FS_IOC_GET_DEV_ALIAS_FILE: + case F2FS_IOC_IO_PRIO: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9322a7200e31..3cb5242f4ddf 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -257,6 +257,8 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type) switch (sbi->gc_mode) { case GC_IDLE_CB: + case GC_URGENT_LOW: + case GC_URGENT_MID: gc_mode = GC_CB; break; case GC_IDLE_GREEDY: @@ -361,20 +363,15 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; unsigned char age = 0; unsigned char u; - unsigned int i; unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); - for (i = 0; i < usable_segs_per_sec; i++) - mtime += get_seg_entry(sbi, start + i)->mtime; + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); vblocks = get_valid_blocks(sbi, segno, true); - - mtime = div_u64(mtime, usable_segs_per_sec); vblocks = div_u64(vblocks, usable_segs_per_sec); u = BLKS_TO_SEGS(sbi, vblocks * 100); @@ -519,10 +516,7 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, struct victim_sel_policy *p, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; - unsigned int i; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (p->gc_mode == GC_AT && @@ -530,9 +524,8 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, return; } - for (i = 0; i < SEGS_PER_SEC(sbi); i++) - mtime += get_seg_entry(sbi, start + i)->mtime; - mtime = div_u64(mtime, SEGS_PER_SEC(sbi)); + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) @@ -813,11 +806,14 @@ retry: goto out; } - if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + if (sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) { ret = -EBUSY; - else - p.min_segno = *result; - goto out; + goto out; + } + if (gc_type == FG_GC) + clear_bit(GET_SEC_FROM_SEG(sbi, *result), dirty_i->victim_secmap); + p.min_segno = *result; + goto got_result; } ret = -ENODATA; @@ -1049,7 +1045,7 @@ next_step: for (off = 0; off < usable_blks_in_seg; off++, entry++) { nid_t nid = le32_to_cpu(entry->nid); - struct page *node_page; + struct folio *node_folio; struct node_info ni; int err; @@ -1072,27 +1068,27 @@ next_step: } /* phase == 2 */ - node_page = f2fs_get_node_page(sbi, nid); - if (IS_ERR(node_page)) + node_folio = f2fs_get_node_folio(sbi, nid); + if (IS_ERR(node_folio)) continue; - /* block may become invalid during f2fs_get_node_page */ + /* block may become invalid during f2fs_get_node_folio */ if (check_valid_map(sbi, segno, off) == 0) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); continue; } if (f2fs_get_node_info(sbi, nid, &ni, false)) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); continue; } if (ni.blk_addr != start_addr + off) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); continue; } - err = f2fs_move_node_page(node_page, gc_type); + err = f2fs_move_node_folio(node_folio, gc_type); if (!err && gc_type == FG_GC) submitted++; stat_inc_node_blk_count(sbi, 1, gc_type); @@ -1138,7 +1134,7 @@ block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode) static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, struct node_info *dni, block_t blkaddr, unsigned int *nofs) { - struct page *node_page; + struct folio *node_folio; nid_t nid; unsigned int ofs_in_node, max_addrs, base; block_t source_blkaddr; @@ -1146,12 +1142,12 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, nid = le32_to_cpu(sum->nid); ofs_in_node = le16_to_cpu(sum->ofs_in_node); - node_page = f2fs_get_node_page(sbi, nid); - if (IS_ERR(node_page)) + node_folio = f2fs_get_node_folio(sbi, nid); + if (IS_ERR(node_folio)) return false; if (f2fs_get_node_info(sbi, nid, dni, false)) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); return false; } @@ -1162,12 +1158,12 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } if (f2fs_check_nid_range(sbi, dni->ino)) { - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); return false; } - if (IS_INODE(node_page)) { - base = offset_in_addr(F2FS_INODE(node_page)); + if (IS_INODE(&node_folio->page)) { + base = offset_in_addr(F2FS_INODE(&node_folio->page)); max_addrs = DEF_ADDRS_PER_INODE; } else { base = 0; @@ -1177,13 +1173,13 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (base + ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u", base, ofs_in_node, max_addrs, dni->ino, dni->nid); - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); return false; } - *nofs = ofs_of_node(node_page); - source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); - f2fs_put_page(node_page, 1); + *nofs = ofs_of_node(&node_folio->page); + source_blkaddr = data_blkaddr(NULL, node_folio, ofs_in_node); + f2fs_folio_put(node_folio, true); if (source_blkaddr != blkaddr) { #ifdef CONFIG_F2FS_CHECK_FS @@ -1209,7 +1205,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index) struct address_space *mapping = f2fs_is_cow_file(inode) ? F2FS_I(inode)->atomic_inode->i_mapping : inode->i_mapping; struct dnode_of_data dn; - struct page *page; + struct folio *folio; struct f2fs_io_info fio = { .sbi = sbi, .ino = inode->i_ino, @@ -1222,16 +1218,16 @@ static int ra_data_block(struct inode *inode, pgoff_t index) }; int err; - page = f2fs_grab_cache_page(mapping, index, true); - if (!page) - return -ENOMEM; + folio = f2fs_grab_cache_folio(mapping, index, true); + if (IS_ERR(folio)) + return PTR_ERR(folio); if (f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE_READ))) { err = -EFSCORRUPTED; - goto put_page; + goto put_folio; } goto got_it; } @@ -1239,28 +1235,28 @@ static int ra_data_block(struct inode *inode, pgoff_t index) set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err) - goto put_page; + goto put_folio; f2fs_put_dnode(&dn); if (!__is_valid_data_blkaddr(dn.data_blkaddr)) { err = -ENOENT; - goto put_page; + goto put_folio; } if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr, DATA_GENERIC_ENHANCE))) { err = -EFSCORRUPTED; - goto put_page; + goto put_folio; } got_it: - /* read page */ - fio.page = page; + /* read folio */ + fio.page = &folio->page; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; /* * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -1269,14 +1265,14 @@ got_it: FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; - goto put_page; + goto put_folio; } err = f2fs_submit_page_bio(&fio); if (err) goto put_encrypted_page; f2fs_put_page(fio.encrypted_page, 0); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); f2fs_update_iostat(sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE); @@ -1284,8 +1280,8 @@ got_it: return 0; put_encrypted_page: f2fs_put_page(fio.encrypted_page, 1); -put_page: - f2fs_put_page(page, 1); +put_folio: + f2fs_folio_put(folio, true); return err; } @@ -1311,7 +1307,7 @@ static int move_data_block(struct inode *inode, block_t bidx, struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - struct page *page, *mpage; + struct folio *folio, *mfolio; block_t newaddr; int err = 0; bool lfs_mode = f2fs_lfs_mode(fio.sbi); @@ -1320,9 +1316,9 @@ static int move_data_block(struct inode *inode, block_t bidx, CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA; /* do not read out */ - page = f2fs_grab_cache_page(mapping, bidx, false); - if (!page) - return -ENOMEM; + folio = f2fs_grab_cache_folio(mapping, bidx, false); + if (IS_ERR(folio)) + return PTR_ERR(folio); if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { err = -ENOENT; @@ -1339,7 +1335,7 @@ static int move_data_block(struct inode *inode, block_t bidx, goto out; if (unlikely(dn.data_blkaddr == NULL_ADDR)) { - ClearPageUptodate(page); + folio_clear_uptodate(folio); err = -ENOENT; goto put_out; } @@ -1348,7 +1344,7 @@ static int move_data_block(struct inode *inode, block_t bidx, * don't cache encrypted data into meta inode until previous dirty * data were writebacked to avoid racing between GC and flush. */ - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); @@ -1357,26 +1353,26 @@ static int move_data_block(struct inode *inode, block_t bidx, goto put_out; /* read page */ - fio.page = page; + fio.page = &folio->page; fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) f2fs_down_write(&fio.sbi->io_order_lock); - mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), + mfolio = f2fs_grab_cache_folio(META_MAPPING(fio.sbi), fio.old_blkaddr, false); - if (!mpage) { - err = -ENOMEM; + if (IS_ERR(mfolio)) { + err = PTR_ERR(mfolio); goto up_out; } - fio.encrypted_page = mpage; + fio.encrypted_page = folio_file_page(mfolio, fio.old_blkaddr); - /* read source block in mpage */ - if (!PageUptodate(mpage)) { + /* read source block in mfolio */ + if (!folio_test_uptodate(mfolio)) { err = f2fs_submit_page_bio(&fio); if (err) { - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); goto up_out; } @@ -1385,11 +1381,11 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_update_iostat(fio.sbi, NULL, FS_GDATA_READ_IO, F2FS_BLKSIZE); - lock_page(mpage); - if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) || - !PageUptodate(mpage))) { + folio_lock(mfolio); + if (unlikely(!is_meta_folio(mfolio) || + !folio_test_uptodate(mfolio))) { err = -EIO; - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); goto up_out; } } @@ -1400,7 +1396,7 @@ static int move_data_block(struct inode *inode, block_t bidx, err = f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, &sum, type, NULL); if (err) { - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); /* filesystem should shutdown, no need to recovery block */ goto up_out; } @@ -1409,17 +1405,17 @@ static int move_data_block(struct inode *inode, block_t bidx, newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); if (!fio.encrypted_page) { err = -ENOMEM; - f2fs_put_page(mpage, 1); + f2fs_folio_put(mfolio, true); goto recover_block; } /* write target block */ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); memcpy(page_address(fio.encrypted_page), - page_address(mpage), PAGE_SIZE); - f2fs_put_page(mpage, 1); + folio_address(mfolio), PAGE_SIZE); + f2fs_folio_put(mfolio, true); - f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr); + f2fs_invalidate_internal_cache(fio.sbi, fio.old_blkaddr, 1); set_page_dirty(fio.encrypted_page); if (clear_page_dirty_for_io(fio.encrypted_page)) @@ -1448,19 +1444,19 @@ up_out: put_out: f2fs_put_dnode(&dn); out: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } static int move_data_page(struct inode *inode, block_t bidx, int gc_type, - unsigned int segno, int off) + unsigned int segno, int off) { - struct page *page; + struct folio *folio; int err = 0; - page = f2fs_get_lock_data_page(inode, bidx, true); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_lock_data_folio(inode, bidx, true); + if (IS_ERR(folio)) + return PTR_ERR(folio); if (!check_valid_map(F2FS_I_SB(inode), segno, off)) { err = -ENOENT; @@ -1472,12 +1468,12 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, goto out; if (gc_type == BG_GC) { - if (folio_test_writeback(page_folio(page))) { + if (folio_test_writeback(folio)) { err = -EAGAIN; goto out; } - set_page_dirty(page); - set_page_private_gcing(page); + folio_mark_dirty(folio); + set_page_private_gcing(&folio->page); } else { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(inode), @@ -1487,37 +1483,37 @@ static int move_data_page(struct inode *inode, block_t bidx, int gc_type, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, .old_blkaddr = NULL_ADDR, - .page = page, + .page = &folio->page, .encrypted_page = NULL, .need_lock = LOCK_REQ, .io_type = FS_GC_DATA_IO, }; - bool is_dirty = PageDirty(page); + bool is_dirty = folio_test_dirty(folio); retry: - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); - set_page_dirty(page); - if (clear_page_dirty_for_io(page)) { + folio_mark_dirty(folio); + if (folio_clear_dirty_for_io(folio)) { inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); } - set_page_private_gcing(page); + set_page_private_gcing(&folio->page); err = f2fs_do_write_data_page(&fio); if (err) { - clear_page_private_gcing(page); + clear_page_private_gcing(&folio->page); if (err == -ENOMEM) { memalloc_retry_wait(GFP_NOFS); goto retry; } if (is_dirty) - set_page_dirty(page); + folio_mark_dirty(folio); } } out: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } @@ -1546,7 +1542,6 @@ next_step: entry = sum; for (off = 0; off < usable_blks_in_seg; off++, entry++) { - struct page *data_page; struct inode *inode; struct node_info dni; /* dnode info for the data */ unsigned int ofs_in_node, nofs; @@ -1589,6 +1584,7 @@ next_step: ofs_in_node = le16_to_cpu(entry->ofs_in_node); if (phase == 3) { + struct folio *data_folio; int err; inode = f2fs_iget(sb, dni.ino); @@ -1639,15 +1635,15 @@ next_step: continue; } - data_page = f2fs_get_read_data_page(inode, start_bidx, + data_folio = f2fs_get_read_data_folio(inode, start_bidx, REQ_RAHEAD, true, NULL); f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - if (IS_ERR(data_page)) { + if (IS_ERR(data_folio)) { iput(inode); continue; } - f2fs_put_page(data_page, 0); + f2fs_folio_put(data_folio, false); add_gc_inode(gc_list, inode); continue; } @@ -1722,8 +1718,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct gc_inode_list *gc_list, int gc_type, bool force_migrate, bool one_time) { - struct page *sum_page; - struct f2fs_summary_block *sum; struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + SEGS_PER_SEC(sbi); @@ -1773,40 +1767,40 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, /* reference all summary page */ while (segno < end_segno) { - sum_page = f2fs_get_sum_page(sbi, segno++); - if (IS_ERR(sum_page)) { - int err = PTR_ERR(sum_page); + struct folio *sum_folio = f2fs_get_sum_folio(sbi, segno++); + if (IS_ERR(sum_folio)) { + int err = PTR_ERR(sum_folio); end_segno = segno - 1; for (segno = start_segno; segno < end_segno; segno++) { - sum_page = find_get_page(META_MAPPING(sbi), + sum_folio = filemap_get_folio(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); - f2fs_put_page(sum_page, 0); - f2fs_put_page(sum_page, 0); + folio_put_refs(sum_folio, 2); } return err; } - unlock_page(sum_page); + folio_unlock(sum_folio); } blk_start_plug(&plug); for (segno = start_segno; segno < end_segno; segno++) { + struct f2fs_summary_block *sum; /* find segment summary of victim */ - sum_page = find_get_page(META_MAPPING(sbi), + struct folio *sum_folio = filemap_get_folio(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); - f2fs_put_page(sum_page, 0); if (get_valid_blocks(sbi, segno, false) == 0) goto freed; if (gc_type == BG_GC && __is_large_section(sbi) && migrated >= sbi->migration_granularity) goto skip; - if (!PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) + if (!folio_test_uptodate(sum_folio) || + unlikely(f2fs_cp_error(sbi))) goto skip; - sum = page_address(sum_page); + sum = folio_address(sum_folio); if (type != GET_SUM_TYPE((&sum->footer))) { f2fs_err(sbi, "Inconsistent segment (%u) type [%d, %d] in SSA and SIT", segno, type, GET_SUM_TYPE((&sum->footer))); @@ -1844,7 +1838,7 @@ freed: (segno + 1 < sec_end_segno) ? segno + 1 : NULL_SEGNO; skip: - f2fs_put_page(sum_page, 0); + folio_put_refs(sum_folio, 2); } if (submitted) @@ -2070,6 +2064,9 @@ int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; + if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, segno))) + continue; + do_garbage_collect(sbi, segno, &gc_list, FG_GC, true, false); put_gc_inode(&gc_list); @@ -2275,12 +2272,12 @@ out_drop_write: if (err) return err; - err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE); + err = freeze_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); if (err) return err; if (f2fs_readonly(sbi->sb)) { - err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); + err = thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); if (err) return err; return -EROFS; @@ -2337,6 +2334,6 @@ recover_out: out_err: f2fs_up_write(&sbi->cp_global_sem); f2fs_up_write(&sbi->gc_lock); - thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE); + thaw_super(sbi->sb, FREEZE_HOLDER_KERNEL, NULL); return err; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 2914b678bf8f..5c1eaf55e127 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -35,6 +35,7 @@ #define LIMIT_BOOST_ZONED_GC 25 /* percentage over total user space of boosted gc for zoned devices */ #define DEF_MIGRATION_WINDOW_GRANULARITY_ZONED 3 #define BOOST_GC_MULTIPLE 5 +#define ZONED_PIN_SEC_REQUIRED_COUNT 1 #define DEF_GC_FAILED_PINNED_FILES 2048 #define MAX_GC_FAILED_PINNED_FILES USHRT_MAX diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 005babf1bed1..901c630685ce 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -79,37 +79,37 @@ bool f2fs_may_inline_dentry(struct inode *inode) return true; } -void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage) +void f2fs_do_read_inline_data(struct folio *folio, struct folio *ifolio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; if (folio_test_uptodate(folio)) return; - f2fs_bug_on(F2FS_I_SB(inode), folio_index(folio)); + f2fs_bug_on(F2FS_I_SB(inode), folio->index); folio_zero_segment(folio, MAX_INLINE_DATA(inode), folio_size(folio)); /* Copy the whole inline data block */ - memcpy_to_folio(folio, 0, inline_data_addr(inode, ipage), + memcpy_to_folio(folio, 0, inline_data_addr(inode, ifolio), MAX_INLINE_DATA(inode)); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); } -void f2fs_truncate_inline_inode(struct inode *inode, - struct page *ipage, u64 from) +void f2fs_truncate_inline_inode(struct inode *inode, struct folio *ifolio, + u64 from) { void *addr; if (from >= MAX_INLINE_DATA(inode)) return; - addr = inline_data_addr(inode, ipage); + addr = inline_data_addr(inode, ifolio); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); memset(addr + from, 0, MAX_INLINE_DATA(inode) - from); - set_page_dirty(ipage); + folio_mark_dirty(ifolio); if (from == 0) clear_inode_flag(inode, FI_DATA_EXIST); @@ -117,32 +117,32 @@ void f2fs_truncate_inline_inode(struct inode *inode, int f2fs_read_inline_data(struct inode *inode, struct folio *folio) { - struct page *ipage; + struct folio *ifolio; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) { + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) { folio_unlock(folio); - return PTR_ERR(ipage); + return PTR_ERR(ifolio); } if (!f2fs_has_inline_data(inode)) { - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return -EAGAIN; } - if (folio_index(folio)) + if (folio->index) folio_zero_segment(folio, 0, folio_size(folio)); else - f2fs_do_read_inline_data(folio, ipage); + f2fs_do_read_inline_data(folio, ifolio); if (!folio_test_uptodate(folio)) folio_mark_uptodate(folio); - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); folio_unlock(folio); return 0; } -int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) +int f2fs_convert_inline_folio(struct dnode_of_data *dn, struct folio *folio) { struct f2fs_io_info fio = { .sbi = F2FS_I_SB(dn->inode), @@ -150,7 +150,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, - .page = page, + .page = &folio->page, .encrypted_page = NULL, .io_type = FS_DATA_IO, }; @@ -182,20 +182,20 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) return -EFSCORRUPTED; } - f2fs_bug_on(F2FS_P_SB(page), folio_test_writeback(page_folio(page))); + f2fs_bug_on(F2FS_F_SB(folio), folio_test_writeback(folio)); - f2fs_do_read_inline_data(page_folio(page), dn->inode_page); - set_page_dirty(page); + f2fs_do_read_inline_data(folio, dn->inode_folio); + folio_mark_dirty(folio); /* clear dirty state */ - dirty = clear_page_dirty_for_io(page); + dirty = folio_clear_dirty_for_io(folio); /* write data page to try to make data consistent */ - set_page_writeback(page); + folio_start_writeback(folio); fio.old_blkaddr = dn->data_blkaddr; set_inode_flag(dn->inode, FI_HOT_DATA); f2fs_outplace_write_data(dn, &fio); - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); if (dirty) { inode_dec_dirty_pages(dn->inode); f2fs_remove_dirty_inode(dn->inode); @@ -205,8 +205,8 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - f2fs_truncate_inline_inode(dn->inode, dn->inode_page, 0); - clear_page_private_inline(dn->inode_page); + f2fs_truncate_inline_inode(dn->inode, dn->inode_folio, 0); + clear_page_private_inline(&dn->inode_folio->page); clear_out: stat_dec_inline_inode(dn->inode); clear_inode_flag(dn->inode, FI_INLINE_DATA); @@ -218,7 +218,7 @@ int f2fs_convert_inline_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - struct page *ipage, *page; + struct folio *ifolio, *folio; int err = 0; if (f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) @@ -231,28 +231,28 @@ int f2fs_convert_inline_inode(struct inode *inode) if (err) return err; - page = f2fs_grab_cache_page(inode->i_mapping, 0, false); - if (!page) - return -ENOMEM; + folio = f2fs_grab_cache_folio(inode->i_mapping, 0, false); + if (IS_ERR(folio)) + return PTR_ERR(folio); f2fs_lock_op(sbi); - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto out; } - set_new_dnode(&dn, inode, ipage, ipage, 0); + set_new_dnode(&dn, inode, ifolio, ifolio, 0); if (f2fs_has_inline_data(inode)) - err = f2fs_convert_inline_page(&dn, page); + err = f2fs_convert_inline_folio(&dn, folio); f2fs_put_dnode(&dn); out: f2fs_unlock_op(sbi); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); if (!err) f2fs_balance_fs(sbi, dn.node_changed); @@ -263,40 +263,39 @@ out: int f2fs_write_inline_data(struct inode *inode, struct folio *folio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *ipage; + struct folio *ifolio; - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); if (!f2fs_has_inline_data(inode)) { - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return -EAGAIN; } f2fs_bug_on(F2FS_I_SB(inode), folio->index); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); - memcpy_from_folio(inline_data_addr(inode, ipage), + f2fs_folio_wait_writeback(ifolio, NODE, true, true); + memcpy_from_folio(inline_data_addr(inode, ifolio), folio, 0, MAX_INLINE_DATA(inode)); - set_page_dirty(ipage); + folio_mark_dirty(ifolio); f2fs_clear_page_cache_dirty_tag(folio); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - clear_page_private_inline(ipage); - f2fs_put_page(ipage, 1); + clear_page_private_inline(&ifolio->page); + f2fs_folio_put(ifolio, 1); return 0; } -int f2fs_recover_inline_data(struct inode *inode, struct page *npage) +int f2fs_recover_inline_data(struct inode *inode, struct folio *nfolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode *ri = NULL; void *src_addr, *dst_addr; - struct page *ipage; /* * The inline_data recovery policy is as follows. @@ -306,38 +305,39 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage) * x o -> remove data blocks, and then recover inline_data * x x -> recover data blocks */ - if (IS_INODE(npage)) - ri = F2FS_INODE(npage); + if (IS_INODE(&nfolio->page)) + ri = F2FS_INODE(&nfolio->page); if (f2fs_has_inline_data(inode) && ri && (ri->i_inline & F2FS_INLINE_DATA)) { + struct folio *ifolio; process_inline: - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); - src_addr = inline_data_addr(inode, npage); - dst_addr = inline_data_addr(inode, ipage); + src_addr = inline_data_addr(inode, nfolio); + dst_addr = inline_data_addr(inode, ifolio); memcpy(dst_addr, src_addr, MAX_INLINE_DATA(inode)); set_inode_flag(inode, FI_INLINE_DATA); set_inode_flag(inode, FI_DATA_EXIST); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + folio_mark_dirty(ifolio); + f2fs_folio_put(ifolio, true); return 1; } if (f2fs_has_inline_data(inode)) { - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - f2fs_truncate_inline_inode(inode, ipage, 0); + struct folio *ifolio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); + f2fs_truncate_inline_inode(inode, ifolio, 0); stat_dec_inline_inode(inode); clear_inode_flag(inode, FI_INLINE_DATA); - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { int ret; @@ -352,49 +352,50 @@ process_inline: struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, const struct f2fs_filename *fname, - struct page **res_page) + struct folio **res_folio, + bool use_hash) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_dir_entry *de; struct f2fs_dentry_ptr d; - struct page *ipage; + struct folio *ifolio; void *inline_dentry; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) { - *res_page = ipage; + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) { + *res_folio = ifolio; return NULL; } - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); make_dentry_ptr_inline(dir, &d, inline_dentry); - de = f2fs_find_target_dentry(&d, fname, NULL); - unlock_page(ipage); + de = f2fs_find_target_dentry(&d, fname, NULL, use_hash); + folio_unlock(ifolio); if (IS_ERR(de)) { - *res_page = ERR_CAST(de); + *res_folio = ERR_CAST(de); de = NULL; } if (de) - *res_page = ipage; + *res_folio = ifolio; else - f2fs_put_page(ipage, 0); + f2fs_folio_put(ifolio, false); return de; } int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, - struct page *ipage) + struct folio *ifolio) { struct f2fs_dentry_ptr d; void *inline_dentry; - inline_dentry = inline_data_addr(inode, ipage); + inline_dentry = inline_data_addr(inode, ifolio); make_dentry_ptr_inline(inode, &d, inline_dentry); f2fs_do_make_empty_dir(inode, parent, &d); - set_page_dirty(ipage); + folio_mark_dirty(ifolio); /* update i_size to MAX_INLINE_DATA */ if (i_size_read(inode) < MAX_INLINE_DATA(inode)) @@ -406,39 +407,39 @@ int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, * NOTE: ipage is grabbed by caller, but if any error occurs, we should * release ipage in this function. */ -static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, +static int f2fs_move_inline_dirents(struct inode *dir, struct folio *ifolio, void *inline_dentry) { - struct page *page; + struct folio *folio; struct dnode_of_data dn; struct f2fs_dentry_block *dentry_blk; struct f2fs_dentry_ptr src, dst; int err; - page = f2fs_grab_cache_page(dir->i_mapping, 0, true); - if (!page) { - f2fs_put_page(ipage, 1); - return -ENOMEM; + folio = f2fs_grab_cache_folio(dir->i_mapping, 0, true); + if (IS_ERR(folio)) { + f2fs_folio_put(ifolio, true); + return PTR_ERR(folio); } - set_new_dnode(&dn, dir, ipage, NULL, 0); + set_new_dnode(&dn, dir, ifolio, NULL, 0); err = f2fs_reserve_block(&dn, 0); if (err) goto out; if (unlikely(dn.data_blkaddr != NEW_ADDR)) { f2fs_put_dnode(&dn); - set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); - f2fs_warn(F2FS_P_SB(page), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", + set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK); + f2fs_warn(F2FS_F_SB(folio), "%s: corrupted inline inode ino=%lx, i_addr[0]:0x%x, run fsck to fix.", __func__, dir->i_ino, dn.data_blkaddr); - f2fs_handle_error(F2FS_P_SB(page), ERROR_INVALID_BLKADDR); + f2fs_handle_error(F2FS_F_SB(folio), ERROR_INVALID_BLKADDR); err = -EFSCORRUPTED; goto out; } - f2fs_wait_on_page_writeback(page, DATA, true, true); + f2fs_folio_wait_writeback(folio, DATA, true, true); - dentry_blk = page_address(page); + dentry_blk = folio_address(folio); /* * Start by zeroing the full block, to ensure that all unused space is @@ -454,12 +455,12 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); - if (!PageUptodate(page)) - SetPageUptodate(page); - set_page_dirty(page); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + folio_mark_dirty(folio); /* clear inline dir and flag after data writeback */ - f2fs_truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ifolio, 0); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -476,7 +477,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, if (i_size_read(dir) < PAGE_SIZE) f2fs_i_size_write(dir, PAGE_SIZE); out: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } @@ -532,7 +533,7 @@ punch_dentry_pages: return err; } -static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, +static int f2fs_move_rehashed_dirents(struct inode *dir, struct folio *ifolio, void *inline_dentry) { void *backup_dentry; @@ -541,20 +542,20 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir), MAX_INLINE_DATA(dir), GFP_F2FS_ZERO); if (!backup_dentry) { - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return -ENOMEM; } memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA(dir)); - f2fs_truncate_inline_inode(dir, ipage, 0); + f2fs_truncate_inline_inode(dir, ifolio, 0); - unlock_page(ipage); + folio_unlock(ifolio); err = f2fs_add_inline_entries(dir, backup_dentry); if (err) goto recover; - lock_page(ipage); + folio_lock(ifolio); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -570,31 +571,31 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, kfree(backup_dentry); return 0; recover: - lock_page(ipage); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + folio_lock(ifolio); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA(dir)); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA(dir)); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + folio_mark_dirty(ifolio); + f2fs_folio_put(ifolio, 1); kfree(backup_dentry); return err; } -static int do_convert_inline_dir(struct inode *dir, struct page *ipage, +static int do_convert_inline_dir(struct inode *dir, struct folio *ifolio, void *inline_dentry) { if (!F2FS_I(dir)->i_dir_level) - return f2fs_move_inline_dirents(dir, ipage, inline_dentry); + return f2fs_move_inline_dirents(dir, ifolio, inline_dentry); else - return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); + return f2fs_move_rehashed_dirents(dir, ifolio, inline_dentry); } int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; + struct folio *ifolio; struct f2fs_filename fname; void *inline_dentry = NULL; int err = 0; @@ -608,22 +609,22 @@ int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) if (err) goto out; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) { + err = PTR_ERR(ifolio); goto out_fname; } - if (f2fs_has_enough_room(dir, ipage, &fname)) { - f2fs_put_page(ipage, 1); + if (f2fs_has_enough_room(dir, ifolio, &fname)) { + f2fs_folio_put(ifolio, true); goto out_fname; } - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); - err = do_convert_inline_dir(dir, ipage, inline_dentry); + err = do_convert_inline_dir(dir, ifolio, inline_dentry); if (!err) - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); out_fname: f2fs_free_filename(&fname); out: @@ -635,24 +636,24 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; + struct folio *ifolio; unsigned int bit_pos; void *inline_dentry = NULL; struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(fname->disk_name.len); - struct page *page = NULL; + struct folio *folio = NULL; int err = 0; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { - err = do_convert_inline_dir(dir, ipage, inline_dentry); + err = do_convert_inline_dir(dir, ifolio, inline_dentry); if (err) return err; err = -EAGAIN; @@ -662,19 +663,19 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, if (inode) { f2fs_down_write_nested(&F2FS_I(inode)->i_sem, SINGLE_DEPTH_NESTING); - page = f2fs_init_inode_metadata(inode, dir, fname, ipage); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_init_inode_metadata(inode, dir, fname, ifolio); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto fail; } } - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); f2fs_update_dentry(ino, mode, &d, &fname->disk_name, fname->hash, bit_pos); - set_page_dirty(ipage); + folio_mark_dirty(ifolio); /* we don't need to mark_inode_dirty now */ if (inode) { @@ -682,9 +683,9 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, /* synchronize inode page's data from inode cache */ if (is_inode_flag_set(inode, FI_NEW_INODE)) - f2fs_update_inode(inode, page); + f2fs_update_inode(inode, folio); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } f2fs_update_parent_metadata(dir, inode, 0); @@ -692,12 +693,12 @@ fail: if (inode) f2fs_up_write(&F2FS_I(inode)->i_sem); out: - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return err; } -void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *dir, struct inode *inode) +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, + struct folio *folio, struct inode *dir, struct inode *inode) { struct f2fs_dentry_ptr d; void *inline_dentry; @@ -705,18 +706,18 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, unsigned int bit_pos; int i; - lock_page(page); - f2fs_wait_on_page_writeback(page, NODE, true, true); + folio_lock(folio); + f2fs_folio_wait_writeback(folio, NODE, true, true); - inline_dentry = inline_data_addr(dir, page); + inline_dentry = inline_data_addr(dir, folio); make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = dentry - d.dentry; for (i = 0; i < slots; i++) __clear_bit_le(bit_pos + i, d.bitmap); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); f2fs_mark_inode_dirty_sync(dir, false); @@ -728,21 +729,21 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, bool f2fs_empty_inline_dir(struct inode *dir) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; + struct folio *ifolio; unsigned int bit_pos = 2; void *inline_dentry; struct f2fs_dentry_ptr d; - ipage = f2fs_get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) + ifolio = f2fs_get_inode_folio(sbi, dir->i_ino); + if (IS_ERR(ifolio)) return false; - inline_dentry = inline_data_addr(dir, ipage); + inline_dentry = inline_data_addr(dir, ifolio); make_dentry_ptr_inline(dir, &d, inline_dentry); bit_pos = find_next_bit_le(d.bitmap, d.max, bit_pos); - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); if (bit_pos < d.max) return false; @@ -754,7 +755,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, struct fscrypt_str *fstr) { struct inode *inode = file_inode(file); - struct page *ipage = NULL; + struct folio *ifolio = NULL; struct f2fs_dentry_ptr d; void *inline_dentry = NULL; int err; @@ -764,17 +765,17 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (ctx->pos == d.max) return 0; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); /* * f2fs_readdir was protected by inode.i_rwsem, it is safe to access * ipage without page's lock held. */ - unlock_page(ipage); + folio_unlock(ifolio); - inline_dentry = inline_data_addr(inode, ipage); + inline_dentry = inline_data_addr(inode, ifolio); make_dentry_ptr_inline(inode, &d, inline_dentry); @@ -782,7 +783,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, if (!err) ctx->pos = d.max; - f2fs_put_page(ipage, 0); + f2fs_folio_put(ifolio, false); return err < 0 ? err : 0; } @@ -793,12 +794,12 @@ int f2fs_inline_data_fiemap(struct inode *inode, __u32 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED | FIEMAP_EXTENT_LAST; struct node_info ni; - struct page *ipage; + struct folio *ifolio; int err = 0; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); if ((S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) && !f2fs_has_inline_data(inode)) { @@ -823,11 +824,11 @@ int f2fs_inline_data_fiemap(struct inode *inode, goto out; byteaddr = (__u64)ni.blk_addr << inode->i_sb->s_blocksize_bits; - byteaddr += (char *)inline_data_addr(inode, ipage) - - (char *)F2FS_INODE(ipage); + byteaddr += (char *)inline_data_addr(inode, ifolio) - + (char *)F2FS_INODE(&ifolio->page); err = fiemap_fill_next_extent(fieinfo, start, byteaddr, ilen, flags); trace_f2fs_fiemap(inode, start, byteaddr, ilen, flags, err); out: - f2fs_put_page(ipage, 1); + f2fs_folio_put(ifolio, true); return err; } diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1ed86df343a5..083d52a42bfb 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -34,10 +34,10 @@ void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync) if (f2fs_inode_dirtied(inode, sync)) return; - if (f2fs_is_atomic_file(inode)) { - set_inode_flag(inode, FI_ATOMIC_DIRTIED); + /* only atomic file w/ FI_ATOMIC_COMMITTED can be set vfs dirty */ + if (f2fs_is_atomic_file(inode) && + !is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) return; - } mark_inode_dirty_sync(inode); } @@ -68,9 +68,9 @@ void f2fs_set_inode_flags(struct inode *inode) S_ENCRYPTED|S_VERITY|S_CASEFOLD); } -static void __get_inode_rdev(struct inode *inode, struct page *node_page) +static void __get_inode_rdev(struct inode *inode, struct folio *node_folio) { - __le32 *addr = get_dnode_addr(inode, node_page); + __le32 *addr = get_dnode_addr(inode, node_folio); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { @@ -81,9 +81,9 @@ static void __get_inode_rdev(struct inode *inode, struct page *node_page) } } -static void __set_inode_rdev(struct inode *inode, struct page *node_page) +static void __set_inode_rdev(struct inode *inode, struct folio *node_folio) { - __le32 *addr = get_dnode_addr(inode, node_page); + __le32 *addr = get_dnode_addr(inode, node_folio); if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { if (old_valid_dev(inode->i_rdev)) { @@ -97,19 +97,19 @@ static void __set_inode_rdev(struct inode *inode, struct page *node_page) } } -static void __recover_inline_status(struct inode *inode, struct page *ipage) +static void __recover_inline_status(struct inode *inode, struct folio *ifolio) { - void *inline_data = inline_data_addr(inode, ipage); + void *inline_data = inline_data_addr(inode, ifolio); __le32 *start = inline_data; __le32 *end = start + MAX_INLINE_DATA(inode) / sizeof(__le32); while (start < end) { if (*start++) { - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); set_inode_flag(inode, FI_DATA_EXIST); - set_raw_inline(inode, F2FS_INODE(ipage)); - set_page_dirty(ipage); + set_raw_inline(inode, F2FS_INODE(&ifolio->page)); + folio_mark_dirty(ifolio); return; } } @@ -144,19 +144,18 @@ static __u32 f2fs_inode_chksum(struct f2fs_sb_info *sbi, struct page *page) unsigned int offset = offsetof(struct f2fs_inode, i_inode_checksum); unsigned int cs_size = sizeof(dummy_cs); - chksum = f2fs_chksum(sbi, sbi->s_chksum_seed, (__u8 *)&ino, - sizeof(ino)); - chksum_seed = f2fs_chksum(sbi, chksum, (__u8 *)&gen, sizeof(gen)); + chksum = f2fs_chksum(sbi->s_chksum_seed, (__u8 *)&ino, sizeof(ino)); + chksum_seed = f2fs_chksum(chksum, (__u8 *)&gen, sizeof(gen)); - chksum = f2fs_chksum(sbi, chksum_seed, (__u8 *)ri, offset); - chksum = f2fs_chksum(sbi, chksum, (__u8 *)&dummy_cs, cs_size); + chksum = f2fs_chksum(chksum_seed, (__u8 *)ri, offset); + chksum = f2fs_chksum(chksum, (__u8 *)&dummy_cs, cs_size); offset += cs_size; - chksum = f2fs_chksum(sbi, chksum, (__u8 *)ri + offset, - F2FS_BLKSIZE - offset); + chksum = f2fs_chksum(chksum, (__u8 *)ri + offset, + F2FS_BLKSIZE - offset); return chksum; } -bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) +bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct folio *folio) { struct f2fs_inode *ri; __u32 provided, calculated; @@ -165,21 +164,21 @@ bool f2fs_inode_chksum_verify(struct f2fs_sb_info *sbi, struct page *page) return true; #ifdef CONFIG_F2FS_CHECK_FS - if (!f2fs_enable_inode_chksum(sbi, page)) + if (!f2fs_enable_inode_chksum(sbi, &folio->page)) #else - if (!f2fs_enable_inode_chksum(sbi, page) || - PageDirty(page) || - folio_test_writeback(page_folio(page))) + if (!f2fs_enable_inode_chksum(sbi, &folio->page) || + folio_test_dirty(folio) || + folio_test_writeback(folio)) #endif return true; - ri = &F2FS_NODE(page)->i; + ri = &F2FS_NODE(&folio->page)->i; provided = le32_to_cpu(ri->i_inode_checksum); - calculated = f2fs_inode_chksum(sbi, page); + calculated = f2fs_inode_chksum(sbi, &folio->page); if (provided != calculated) f2fs_warn(sbi, "checksum invalid, nid = %lu, ino_of_node = %x, %x vs. %x", - page_folio(page)->index, ino_of_node(page), + folio->index, ino_of_node(&folio->page), provided, calculated); return provided == calculated; @@ -288,6 +287,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (ino_of_node(node_page) == fi->i_xattr_nid) { + f2fs_warn(sbi, "%s: corrupted inode i_ino=%lx, xnid=%x, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + if (f2fs_has_extra_attr(inode)) { if (!f2fs_sb_has_extra_attr(sbi)) { f2fs_warn(sbi, "%s: inode (ino=%lx) is with extra_attr, but extra_attr feature is off", @@ -302,15 +307,6 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) F2FS_TOTAL_EXTRA_ATTR_SIZE); return false; } - if (f2fs_sb_has_flexible_inline_xattr(sbi) && - f2fs_has_inline_xattr(inode) && - (!fi->i_inline_xattr_size || - fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { - f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, max: %lu", - __func__, inode->i_ino, fi->i_inline_xattr_size, - MAX_INLINE_XATTR_SIZE); - return false; - } if (f2fs_sb_has_compression(sbi) && fi->i_flags & F2FS_COMPR_FL && F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, @@ -320,6 +316,16 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) } } + if (f2fs_sb_has_flexible_inline_xattr(sbi) && + f2fs_has_inline_xattr(inode) && + (fi->i_inline_xattr_size < MIN_INLINE_XATTR_SIZE || + fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_inline_xattr_size: %d, min: %zu, max: %lu", + __func__, inode->i_ino, fi->i_inline_xattr_size, + MIN_INLINE_XATTR_SIZE, MAX_INLINE_XATTR_SIZE); + return false; + } + if (!f2fs_sb_has_extra_attr(sbi)) { if (f2fs_sb_has_project_quota(sbi)) { f2fs_warn(sbi, "%s: corrupted inode ino=%lx, wrong feature flag: %u, run fsck to fix.", @@ -372,6 +378,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (IS_DEVICE_ALIASING(inode)) { + if (!f2fs_sb_has_device_alias(sbi)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but the feature is off", + __func__, inode->i_ino); + return false; + } + if (!f2fs_is_pinned_file(inode)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has device alias flag, but is not pinned", + __func__, inode->i_ino); + return false; + } + } + return true; } @@ -388,7 +407,7 @@ static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); - struct page *node_page; + struct folio *node_folio; struct f2fs_inode *ri; projid_t i_projid; @@ -396,11 +415,11 @@ static int do_read_inode(struct inode *inode) if (f2fs_check_nid_range(sbi, inode->i_ino)) return -EINVAL; - node_page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); + node_folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(node_folio)) + return PTR_ERR(node_folio); - ri = F2FS_INODE(node_page); + ri = F2FS_INODE(&node_folio->page); inode->i_mode = le16_to_cpu(ri->i_mode); i_uid_write(inode, le32_to_cpu(ri->i_uid)); @@ -450,8 +469,8 @@ static int do_read_inode(struct inode *inode) fi->i_inline_xattr_size = 0; } - if (!sanity_check_inode(inode, node_page)) { - f2fs_put_page(node_page, 1); + if (!sanity_check_inode(inode, &node_folio->page)) { + f2fs_folio_put(node_folio, true); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; @@ -459,17 +478,17 @@ static int do_read_inode(struct inode *inode) /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) - __recover_inline_status(inode, node_page); + __recover_inline_status(inode, node_folio); /* try to recover cold bit for non-dir inode */ - if (!S_ISDIR(inode->i_mode) && !is_cold_node(node_page)) { - f2fs_wait_on_page_writeback(node_page, NODE, true, true); - set_cold_node(node_page, false); - set_page_dirty(node_page); + if (!S_ISDIR(inode->i_mode) && !is_cold_node(&node_folio->page)) { + f2fs_folio_wait_writeback(node_folio, NODE, true, true); + set_cold_node(&node_folio->page, false); + folio_mark_dirty(node_folio); } /* get rdev by using inline_info */ - __get_inode_rdev(inode, node_page); + __get_inode_rdev(inode, node_folio); if (!f2fs_need_inode_block_update(sbi, inode->i_ino)) fi->last_disk_size = inode->i_size; @@ -512,17 +531,17 @@ static int do_read_inode(struct inode *inode) init_idisk_time(inode); - if (!sanity_check_extent_cache(inode, node_page)) { - f2fs_put_page(node_page, 1); + if (!sanity_check_extent_cache(inode, &node_folio->page)) { + f2fs_folio_put(node_folio, true); f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE); return -EFSCORRUPTED; } /* Need all the flag bits */ - f2fs_init_read_extent_tree(inode, node_page); + f2fs_init_read_extent_tree(inode, node_folio); f2fs_init_age_extent_tree(inode); - f2fs_put_page(node_page, 1); + f2fs_folio_put(node_folio, true); stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); @@ -639,18 +658,18 @@ retry: return inode; } -void f2fs_update_inode(struct inode *inode, struct page *node_page) +void f2fs_update_inode(struct inode *inode, struct folio *node_folio) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_inode *ri; struct extent_tree *et = fi->extent_tree[EX_READ]; - f2fs_wait_on_page_writeback(node_page, NODE, true, true); - set_page_dirty(node_page); + f2fs_folio_wait_writeback(node_folio, NODE, true, true); + folio_mark_dirty(node_folio); f2fs_inode_synced(inode); - ri = F2FS_INODE(node_page); + ri = F2FS_INODE(&node_folio->page); ri->i_mode = cpu_to_le16(inode->i_mode); ri->i_advise = fi->i_advise; @@ -725,39 +744,43 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) } } - __set_inode_rdev(inode, node_page); + __set_inode_rdev(inode, node_folio); /* deleted inode */ if (inode->i_nlink == 0) - clear_page_private_inline(node_page); + clear_page_private_inline(&node_folio->page); init_idisk_time(inode); #ifdef CONFIG_F2FS_CHECK_FS - f2fs_inode_chksum_set(F2FS_I_SB(inode), node_page); + f2fs_inode_chksum_set(F2FS_I_SB(inode), &node_folio->page); #endif } void f2fs_update_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *node_page; + struct folio *node_folio; int count = 0; retry: - node_page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(node_page)) { - int err = PTR_ERR(node_page); + node_folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(node_folio)) { + int err = PTR_ERR(node_folio); /* The node block was truncated. */ if (err == -ENOENT) return; + if (err == -EFSCORRUPTED) + goto stop_checkpoint; + if (err == -ENOMEM || ++count <= DEFAULT_RETRY_IO_COUNT) goto retry; +stop_checkpoint: f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_UPDATE_INODE); return; } - f2fs_update_inode(inode, node_page); - f2fs_put_page(node_page, 1); + f2fs_update_inode(inode, node_folio); + f2fs_folio_put(node_folio, true); } int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -775,8 +798,17 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; - if (!f2fs_is_checkpoint_ready(sbi)) + /* + * no need to update inode page, ultimately f2fs_evict_inode() will + * clear dirty status of inode. + */ + if (f2fs_cp_error(sbi)) + return -EIO; + + if (!f2fs_is_checkpoint_ready(sbi)) { + f2fs_mark_inode_dirty_sync(inode, true); return -ENOSPC; + } /* * We need to balance fs here to prevent from producing dirty node pages @@ -788,6 +820,19 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) return 0; } +static void f2fs_remove_donate_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (list_empty(&F2FS_I(inode)->gdonate_list)) + return; + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + list_del_init(&F2FS_I(inode)->gdonate_list); + sbi->donate_files--; + spin_unlock(&sbi->inode_lock[DONATE_INODE]); +} + /* * Called at the last iput() if i_nlink is zero */ @@ -822,8 +867,10 @@ void f2fs_evict_inode(struct inode *inode) f2fs_bug_on(sbi, get_dirty_pages(inode)); f2fs_remove_dirty_inode(inode); + f2fs_remove_donate_inode(inode); - f2fs_destroy_extent_tree(inode); + if (!IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; @@ -879,6 +926,9 @@ retry: goto retry; } + if (IS_DEVICE_ALIASING(inode)) + f2fs_destroy_extent_tree(inode); + if (err) { f2fs_update_inode_page(inode); if (dquot_initialize_needed(inode)) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 57d46e1439de..07e333ee21b7 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -341,6 +341,7 @@ fail_drop: trace_f2fs_new_inode(inode, err); dquot_drop(inode); inode->i_flags |= S_NOQUOTA; + make_bad_inode(inode); if (nid_free) set_inode_flag(inode, FI_FREE_NID); clear_nlink(inode); @@ -413,7 +414,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (is_inode_flag_set(dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(dir); @@ -446,12 +447,12 @@ out: struct dentry *f2fs_get_parent(struct dentry *child) { - struct page *page; - unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &page); + struct folio *folio; + unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &folio); if (!ino) { - if (IS_ERR(page)) - return ERR_CAST(page); + if (IS_ERR(folio)) + return ERR_CAST(folio); return ERR_PTR(-ENOENT); } return d_obtain_alias(f2fs_iget(child->d_sb, ino)); @@ -462,7 +463,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, { struct inode *inode = NULL; struct f2fs_dir_entry *de; - struct page *page; + struct folio *folio; struct dentry *new; nid_t ino = -1; int err = 0; @@ -480,12 +481,12 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out_splice; if (err) goto out; - de = __f2fs_find_entry(dir, &fname, &page); + de = __f2fs_find_entry(dir, &fname, &folio); f2fs_free_filename(&fname); if (!de) { - if (IS_ERR(page)) { - err = PTR_ERR(page); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); goto out; } err = -ENOENT; @@ -493,7 +494,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } ino = le32_to_cpu(de->ino); - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) { @@ -501,6 +502,14 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out; } + if (inode->i_nlink == 0) { + f2fs_warn(F2FS_I_SB(inode), "%s: inode (ino=%lx) has zero i_nlink", + __func__, inode->i_ino); + err = -EFSCORRUPTED; + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + goto out_iput; + } + if (IS_ENCRYPTED(dir) && (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && !fscrypt_has_permitted_context(dir, inode)) { @@ -536,7 +545,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode = d_inode(dentry); struct f2fs_dir_entry *de; - struct page *page; + struct folio *folio; int err; trace_f2fs_unlink_enter(dir, dentry); @@ -553,10 +562,19 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) if (err) goto fail; - de = f2fs_find_entry(dir, &dentry->d_name, &page); + de = f2fs_find_entry(dir, &dentry->d_name, &folio); if (!de) { - if (IS_ERR(page)) - err = PTR_ERR(page); + if (IS_ERR(folio)) + err = PTR_ERR(folio); + goto fail; + } + + if (unlikely(inode->i_nlink == 0)) { + f2fs_warn(F2FS_I_SB(inode), "%s: inode (ino=%lx) has zero i_nlink", + __func__, inode->i_ino); + err = -EFSCORRUPTED; + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + f2fs_folio_put(folio, false); goto fail; } @@ -566,10 +584,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = f2fs_acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); goto fail; } - f2fs_delete_entry(de, page, dir, inode); + f2fs_delete_entry(de, folio, dir, inode); f2fs_unlock_op(sbi); /* VFS negative dentries are incompatible with Encoding and @@ -683,23 +701,23 @@ out_free_encrypted_link: return err; } -static int f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) +static struct dentry *f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *dentry, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct inode *inode; int err; if (unlikely(f2fs_cp_error(sbi))) - return -EIO; + return ERR_PTR(-EIO); err = f2fs_dquot_initialize(dir); if (err) - return err; + return ERR_PTR(err); inode = f2fs_new_inode(idmap, dir, S_IFDIR | mode, NULL); if (IS_ERR(inode)) - return PTR_ERR(inode); + return ERR_CAST(inode); inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; @@ -721,12 +739,12 @@ static int f2fs_mkdir(struct mnt_idmap *idmap, struct inode *dir, f2fs_sync_fs(sbi->sb, 1); f2fs_balance_fs(sbi, true); - return 0; + return NULL; out_fail: clear_inode_flag(inode, FI_INC_LINK); f2fs_handle_failed_inode(inode); - return err; + return ERR_PTR(err); } static int f2fs_rmdir(struct inode *dir, struct dentry *dentry) @@ -890,8 +908,8 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct inode *whiteout = NULL; - struct page *old_dir_page = NULL; - struct page *old_page, *new_page = NULL; + struct folio *old_dir_folio = NULL; + struct folio *old_folio, *new_folio = NULL; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; @@ -905,7 +923,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && (!projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid))) + F2FS_I(old_inode)->i_projid))) return -EXDEV; /* @@ -950,18 +968,18 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } err = -ENOENT; - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_folio); if (!old_entry) { - if (IS_ERR(old_page)) - err = PTR_ERR(old_page); + if (IS_ERR(old_folio)) + err = PTR_ERR(old_folio); goto out; } if (old_is_dir && old_dir != new_dir) { - old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); + old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_folio); if (!old_dir_entry) { - if (IS_ERR(old_dir_page)) - err = PTR_ERR(old_dir_page); + if (IS_ERR(old_dir_folio)) + err = PTR_ERR(old_dir_folio); goto out_old; } } @@ -974,10 +992,10 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, - &new_page); + &new_folio); if (!new_entry) { - if (IS_ERR(new_page)) - err = PTR_ERR(new_page); + if (IS_ERR(new_folio)) + err = PTR_ERR(new_folio); goto out_dir; } @@ -989,8 +1007,8 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (err) goto put_out_dir; - f2fs_set_link(new_dir, new_entry, new_page, old_inode); - new_page = NULL; + f2fs_set_link(new_dir, new_entry, new_folio, old_inode); + new_folio = NULL; inode_set_ctime_current(new_inode); f2fs_down_write(&F2FS_I(new_inode)->i_sem); @@ -1029,8 +1047,8 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, inode_set_ctime_current(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); - f2fs_delete_entry(old_entry, old_page, old_dir, NULL); - old_page = NULL; + f2fs_delete_entry(old_entry, old_folio, old_dir, NULL); + old_folio = NULL; if (whiteout) { set_inode_flag(whiteout, FI_INC_LINK); @@ -1046,7 +1064,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, } if (old_dir_entry) - f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + f2fs_set_link(old_inode, old_dir_entry, old_dir_folio, new_dir); if (old_is_dir) f2fs_i_links_write(old_dir, false); @@ -1067,12 +1085,12 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir, put_out_dir: f2fs_unlock_op(sbi); - f2fs_put_page(new_page, 0); + f2fs_folio_put(new_folio, false); out_dir: if (old_dir_entry) - f2fs_put_page(old_dir_page, 0); + f2fs_folio_put(old_dir_folio, false); out_old: - f2fs_put_page(old_page, 0); + f2fs_folio_put(old_folio, false); out: iput(whiteout); return err; @@ -1084,8 +1102,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, struct f2fs_sb_info *sbi = F2FS_I_SB(old_dir); struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct page *old_dir_page, *new_dir_page; - struct page *old_page, *new_page; + struct folio *old_dir_folio, *new_dir_folio; + struct folio *old_folio, *new_folio; struct f2fs_dir_entry *old_dir_entry = NULL, *new_dir_entry = NULL; struct f2fs_dir_entry *old_entry, *new_entry; int old_nlink = 0, new_nlink = 0; @@ -1098,10 +1116,10 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(new_dir)->i_projid, - F2FS_I(old_dentry->d_inode)->i_projid)) || - (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) && + F2FS_I(old_inode)->i_projid)) || + (is_inode_flag_set(old_dir, FI_PROJ_INHERIT) && !projid_eq(F2FS_I(old_dir)->i_projid, - F2FS_I(new_dentry->d_inode)->i_projid))) + F2FS_I(new_inode)->i_projid))) return -EXDEV; err = f2fs_dquot_initialize(old_dir); @@ -1113,17 +1131,17 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; err = -ENOENT; - old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); + old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_folio); if (!old_entry) { - if (IS_ERR(old_page)) - err = PTR_ERR(old_page); + if (IS_ERR(old_folio)) + err = PTR_ERR(old_folio); goto out; } - new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); + new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_folio); if (!new_entry) { - if (IS_ERR(new_page)) - err = PTR_ERR(new_page); + if (IS_ERR(new_folio)) + err = PTR_ERR(new_folio); goto out_old; } @@ -1131,20 +1149,20 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir != new_dir) { if (S_ISDIR(old_inode->i_mode)) { old_dir_entry = f2fs_parent_dir(old_inode, - &old_dir_page); + &old_dir_folio); if (!old_dir_entry) { - if (IS_ERR(old_dir_page)) - err = PTR_ERR(old_dir_page); + if (IS_ERR(old_dir_folio)) + err = PTR_ERR(old_dir_folio); goto out_new; } } if (S_ISDIR(new_inode->i_mode)) { new_dir_entry = f2fs_parent_dir(new_inode, - &new_dir_page); + &new_dir_folio); if (!new_dir_entry) { - if (IS_ERR(new_dir_page)) - err = PTR_ERR(new_dir_page); + if (IS_ERR(new_dir_folio)) + err = PTR_ERR(new_dir_folio); goto out_old_dir; } } @@ -1171,14 +1189,14 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, /* update ".." directory entry info of old dentry */ if (old_dir_entry) - f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); + f2fs_set_link(old_inode, old_dir_entry, old_dir_folio, new_dir); /* update ".." directory entry info of new dentry */ if (new_dir_entry) - f2fs_set_link(new_inode, new_dir_entry, new_dir_page, old_dir); + f2fs_set_link(new_inode, new_dir_entry, new_dir_folio, old_dir); /* update directory entry info of old dir inode */ - f2fs_set_link(old_dir, old_entry, old_page, new_inode); + f2fs_set_link(old_dir, old_entry, old_folio, new_inode); f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry) @@ -1197,7 +1215,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_mark_inode_dirty_sync(old_dir, false); /* update directory entry info of new dir inode */ - f2fs_set_link(new_dir, new_entry, new_page, old_inode); + f2fs_set_link(new_dir, new_entry, new_folio, old_inode); f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (!new_dir_entry) @@ -1229,16 +1247,16 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, return 0; out_new_dir: if (new_dir_entry) { - f2fs_put_page(new_dir_page, 0); + f2fs_folio_put(new_dir_folio, 0); } out_old_dir: if (old_dir_entry) { - f2fs_put_page(old_dir_page, 0); + f2fs_folio_put(old_dir_folio, 0); } out_new: - f2fs_put_page(new_page, 0); + f2fs_folio_put(new_folio, false); out_old: - f2fs_put_page(old_page, 0); + f2fs_folio_put(old_folio, false); out: return err; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 59b13ff243fa..bfe104db284e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -120,25 +120,25 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type) return res; } -static void clear_node_page_dirty(struct page *page) +static void clear_node_folio_dirty(struct folio *folio) { - if (PageDirty(page)) { - f2fs_clear_page_cache_dirty_tag(page_folio(page)); - clear_page_dirty_for_io(page); - dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); + if (folio_test_dirty(folio)) { + f2fs_clear_page_cache_dirty_tag(folio); + folio_clear_dirty_for_io(folio); + dec_page_count(F2FS_F_SB(folio), F2FS_DIRTY_NODES); } - ClearPageUptodate(page); + folio_clear_uptodate(folio); } -static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid) +static struct folio *get_current_nat_folio(struct f2fs_sb_info *sbi, nid_t nid) { - return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid)); + return f2fs_get_meta_folio_retry(sbi, current_nat_addr(sbi, nid)); } static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) { - struct page *src_page; - struct page *dst_page; + struct folio *src_folio; + struct folio *dst_folio; pgoff_t dst_off; void *src_addr; void *dst_addr; @@ -147,21 +147,21 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid)); /* get current nat block page with lock */ - src_page = get_current_nat_page(sbi, nid); - if (IS_ERR(src_page)) - return src_page; - dst_page = f2fs_grab_meta_page(sbi, dst_off); - f2fs_bug_on(sbi, PageDirty(src_page)); - - src_addr = page_address(src_page); - dst_addr = page_address(dst_page); + src_folio = get_current_nat_folio(sbi, nid); + if (IS_ERR(src_folio)) + return &src_folio->page; + dst_folio = f2fs_grab_meta_folio(sbi, dst_off); + f2fs_bug_on(sbi, folio_test_dirty(src_folio)); + + src_addr = folio_address(src_folio); + dst_addr = folio_address(dst_folio); memcpy(dst_addr, src_addr, PAGE_SIZE); - set_page_dirty(dst_page); - f2fs_put_page(src_page, 1); + folio_mark_dirty(dst_folio); + f2fs_folio_put(src_folio, true); set_to_next_nat(nm_i, nid); - return dst_page; + return &dst_folio->page; } static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi, @@ -310,10 +310,10 @@ static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, start, nr); } -bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page) +bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio) { - return NODE_MAPPING(sbi) == page->mapping && - IS_DNODE(page) && is_cold_node(page); + return is_node_folio(folio) && IS_DNODE(&folio->page) && + is_cold_node(&folio->page); } void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) @@ -325,7 +325,7 @@ void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi) } static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, - struct page *page) + struct folio *folio) { struct fsync_node_entry *fn; unsigned long flags; @@ -334,8 +334,8 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS, true, NULL); - get_page(page); - fn->page = page; + folio_get(folio); + fn->folio = folio; INIT_LIST_HEAD(&fn->list); spin_lock_irqsave(&sbi->fsync_node_lock, flags); @@ -348,19 +348,19 @@ static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi, return seq_id; } -void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page) +void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio) { struct fsync_node_entry *fn; unsigned long flags; spin_lock_irqsave(&sbi->fsync_node_lock, flags); list_for_each_entry(fn, &sbi->fsync_node_list, list) { - if (fn->page == page) { + if (fn->folio == folio) { list_del(&fn->list); sbi->fsync_node_num--; spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); kmem_cache_free(fsync_node_entry_slab, fn); - put_page(page); + folio_put(folio); return; } } @@ -551,13 +551,14 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_journal *journal = curseg->journal; nid_t start_nid = START_NID(nid); struct f2fs_nat_block *nat_blk; - struct page *page = NULL; + struct folio *folio = NULL; struct f2fs_nat_entry ne; struct nat_entry *e; pgoff_t index; block_t blkaddr; int i; + ni->flag = 0; ni->nid = nid; retry: /* Check nat cache */ @@ -600,14 +601,14 @@ retry: index = current_nat_addr(sbi, nid); f2fs_up_read(&nm_i->nat_tree_lock); - page = f2fs_get_meta_page(sbi, index); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_meta_folio(sbi, index); + if (IS_ERR(folio)) + return PTR_ERR(folio); - nat_blk = (struct f2fs_nat_block *)page_address(page); + nat_blk = folio_address(folio); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); cache: blkaddr = le32_to_cpu(ne.block_addr); if (__is_valid_data_blkaddr(blkaddr) && @@ -622,9 +623,9 @@ cache: /* * readahead MAX_RA_NODE number of node pages. */ -static void f2fs_ra_node_pages(struct page *parent, int start, int n) +static void f2fs_ra_node_pages(struct folio *parent, int start, int n) { - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); + struct f2fs_sb_info *sbi = F2FS_F_SB(parent); struct blk_plug plug; int i, end; nid_t nid; @@ -635,7 +636,7 @@ static void f2fs_ra_node_pages(struct page *parent, int start, int n) end = start + n; end = min(end, (int)NIDS_PER_BLOCK); for (i = start; i < end; i++) { - nid = get_nid(parent, i, false); + nid = get_nid(&parent->page, i, false); f2fs_ra_node_page(sbi, nid); } @@ -753,6 +754,8 @@ got: return level; } +static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start); + /* * Caller should call f2fs_put_dnode(dn). * Also, it should grab and release a rwsem by calling f2fs_lock_op() and @@ -761,8 +764,8 @@ got: int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct page *npage[4]; - struct page *parent = NULL; + struct folio *nfolio[4]; + struct folio *parent = NULL; int offset[4]; unsigned int noffset[4]; nid_t nids[4]; @@ -774,26 +777,27 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) return level; nids[0] = dn->inode->i_ino; - npage[0] = dn->inode_page; - if (!npage[0]) { - npage[0] = f2fs_get_node_page(sbi, nids[0]); - if (IS_ERR(npage[0])) - return PTR_ERR(npage[0]); + if (!dn->inode_folio) { + nfolio[0] = f2fs_get_inode_folio(sbi, nids[0]); + if (IS_ERR(nfolio[0])) + return PTR_ERR(nfolio[0]); + } else { + nfolio[0] = dn->inode_folio; } /* if inline_data is set, should not report any block indices */ if (f2fs_has_inline_data(dn->inode) && index) { err = -ENOENT; - f2fs_put_page(npage[0], 1); + f2fs_folio_put(nfolio[0], true); goto release_out; } - parent = npage[0]; + parent = nfolio[0]; if (level != 0) - nids[1] = get_nid(parent, offset[0], true); - dn->inode_page = npage[0]; - dn->inode_page_locked = true; + nids[1] = get_nid(&parent->page, offset[0], true); + dn->inode_folio = nfolio[0]; + dn->inode_folio_locked = true; /* get indirect or direct nodes */ for (i = 1; i <= level; i++) { @@ -807,10 +811,10 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } dn->nid = nids[i]; - npage[i] = f2fs_new_node_page(dn, noffset[i]); - if (IS_ERR(npage[i])) { + nfolio[i] = f2fs_new_node_folio(dn, noffset[i]); + if (IS_ERR(nfolio[i])) { f2fs_alloc_nid_failed(sbi, nids[i]); - err = PTR_ERR(npage[i]); + err = PTR_ERR(nfolio[i]); goto release_pages; } @@ -818,36 +822,36 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) f2fs_alloc_nid_done(sbi, nids[i]); done = true; } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { - npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]); - if (IS_ERR(npage[i])) { - err = PTR_ERR(npage[i]); + nfolio[i] = f2fs_get_node_folio_ra(parent, offset[i - 1]); + if (IS_ERR(nfolio[i])) { + err = PTR_ERR(nfolio[i]); goto release_pages; } done = true; } if (i == 1) { - dn->inode_page_locked = false; - unlock_page(parent); + dn->inode_folio_locked = false; + folio_unlock(parent); } else { - f2fs_put_page(parent, 1); + f2fs_folio_put(parent, true); } if (!done) { - npage[i] = f2fs_get_node_page(sbi, nids[i]); - if (IS_ERR(npage[i])) { - err = PTR_ERR(npage[i]); - f2fs_put_page(npage[0], 0); + nfolio[i] = f2fs_get_node_folio(sbi, nids[i]); + if (IS_ERR(nfolio[i])) { + err = PTR_ERR(nfolio[i]); + f2fs_folio_put(nfolio[0], false); goto release_out; } } if (i < level) { - parent = npage[i]; - nids[i + 1] = get_nid(parent, offset[i], false); + parent = nfolio[i]; + nids[i + 1] = get_nid(&parent->page, offset[i], false); } } dn->nid = nids[level]; dn->ofs_in_node = offset[level]; - dn->node_page = npage[level]; + dn->node_folio = nfolio[level]; dn->data_blkaddr = f2fs_data_blkaddr(dn); if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) && @@ -868,9 +872,9 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) if (!c_len) goto out; - blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node); + blkaddr = data_blkaddr(dn->inode, dn->node_folio, ofs_in_node); if (blkaddr == COMPRESS_ADDR) - blkaddr = data_blkaddr(dn->inode, dn->node_page, + blkaddr = data_blkaddr(dn->inode, dn->node_folio, ofs_in_node + 1); f2fs_update_read_extent_tree_range_compressed(dn->inode, @@ -880,12 +884,12 @@ out: return 0; release_pages: - f2fs_put_page(parent, 1); + f2fs_folio_put(parent, true); if (i > 1) - f2fs_put_page(npage[0], 0); + f2fs_folio_put(nfolio[0], false); release_out: - dn->inode_page = NULL; - dn->node_page = NULL; + dn->inode_folio = NULL; + dn->node_folio = NULL; if (err == -ENOENT) { dn->cur_level = i; dn->max_level = level; @@ -905,8 +909,18 @@ static int truncate_node(struct dnode_of_data *dn) if (err) return err; + if (ni.blk_addr != NEW_ADDR && + !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { + f2fs_err_ratelimited(sbi, + "nat entry is corrupted, run fsck to fix it, ino:%u, " + "nid:%u, blkaddr:%u", ni.ino, ni.nid, ni.blk_addr); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); + return -EFSCORRUPTED; + } + /* Deallocate node address */ - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -916,16 +930,16 @@ static int truncate_node(struct dnode_of_data *dn) f2fs_inode_synced(dn->inode); } - clear_node_page_dirty(dn->node_page); + clear_node_folio_dirty(dn->node_folio); set_sbi_flag(sbi, SBI_IS_DIRTY); - index = page_folio(dn->node_page)->index; - f2fs_put_page(dn->node_page, 1); + index = dn->node_folio->index; + f2fs_folio_put(dn->node_folio, true); invalidate_mapping_pages(NODE_MAPPING(sbi), index, index); - dn->node_page = NULL; + dn->node_folio = NULL; trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); return 0; @@ -934,35 +948,35 @@ static int truncate_node(struct dnode_of_data *dn) static int truncate_dnode(struct dnode_of_data *dn) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); - struct page *page; + struct folio *folio; int err; if (dn->nid == 0) return 1; /* get direct node */ - page = f2fs_get_node_page(sbi, dn->nid); - if (PTR_ERR(page) == -ENOENT) + folio = f2fs_get_node_folio(sbi, dn->nid); + if (PTR_ERR(folio) == -ENOENT) return 1; - else if (IS_ERR(page)) - return PTR_ERR(page); + else if (IS_ERR(folio)) + return PTR_ERR(folio); - if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) { + if (IS_INODE(&folio->page) || ino_of_node(&folio->page) != dn->inode->i_ino) { f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u", - dn->inode->i_ino, dn->nid, ino_of_node(page)); + dn->inode->i_ino, dn->nid, ino_of_node(&folio->page)); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return -EFSCORRUPTED; } /* Make dnode_of_data for parameter */ - dn->node_page = page; + dn->node_folio = folio; dn->ofs_in_node = 0; f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode)); err = truncate_node(dn); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return err; } @@ -973,7 +987,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, int ofs, int depth) { struct dnode_of_data rdn = *dn; - struct page *page; + struct folio *folio; struct f2fs_node *rn; nid_t child_nid; unsigned int child_nofs; @@ -985,15 +999,15 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); - page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid); - if (IS_ERR(page)) { - trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); - return PTR_ERR(page); + folio = f2fs_get_node_folio(F2FS_I_SB(dn->inode), dn->nid); + if (IS_ERR(folio)) { + trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(folio)); + return PTR_ERR(folio); } - f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK); + f2fs_ra_node_pages(folio, ofs, NIDS_PER_BLOCK); - rn = F2FS_NODE(page); + rn = F2FS_NODE(&folio->page); if (depth < 3) { for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) { child_nid = le32_to_cpu(rn->in.nid[i]); @@ -1003,7 +1017,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, ret = truncate_dnode(&rdn); if (ret < 0) goto out_err; - if (set_nid(page, i, 0, false)) + if (set_nid(folio, i, 0, false)) dn->node_changed = true; } } else { @@ -1017,7 +1031,7 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, rdn.nid = child_nid; ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1); if (ret == (NIDS_PER_BLOCK + 1)) { - if (set_nid(page, i, 0, false)) + if (set_nid(folio, i, 0, false)) dn->node_changed = true; child_nofs += ret; } else if (ret < 0 && ret != -ENOENT) { @@ -1029,19 +1043,19 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (!ofs) { /* remove current indirect node */ - dn->node_page = page; + dn->node_folio = folio; ret = truncate_node(dn); if (ret) goto out_err; freed++; } else { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } trace_f2fs_truncate_nodes_exit(dn->inode, freed); return freed; out_err: - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); trace_f2fs_truncate_nodes_exit(dn->inode, ret); return ret; } @@ -1049,59 +1063,59 @@ out_err: static int truncate_partial_nodes(struct dnode_of_data *dn, struct f2fs_inode *ri, int *offset, int depth) { - struct page *pages[2]; + struct folio *folios[2]; nid_t nid[3]; nid_t child_nid; int err = 0; int i; int idx = depth - 2; - nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + nid[0] = get_nid(&dn->inode_folio->page, offset[0], true); if (!nid[0]) return 0; /* get indirect nodes in the path */ for (i = 0; i < idx + 1; i++) { /* reference count'll be increased */ - pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]); - if (IS_ERR(pages[i])) { - err = PTR_ERR(pages[i]); + folios[i] = f2fs_get_node_folio(F2FS_I_SB(dn->inode), nid[i]); + if (IS_ERR(folios[i])) { + err = PTR_ERR(folios[i]); idx = i - 1; goto fail; } - nid[i + 1] = get_nid(pages[i], offset[i + 1], false); + nid[i + 1] = get_nid(&folios[i]->page, offset[i + 1], false); } - f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK); + f2fs_ra_node_pages(folios[idx], offset[idx + 1], NIDS_PER_BLOCK); /* free direct nodes linked to a partial indirect node */ for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) { - child_nid = get_nid(pages[idx], i, false); + child_nid = get_nid(&folios[idx]->page, i, false); if (!child_nid) continue; dn->nid = child_nid; err = truncate_dnode(dn); if (err < 0) goto fail; - if (set_nid(pages[idx], i, 0, false)) + if (set_nid(folios[idx], i, 0, false)) dn->node_changed = true; } if (offset[idx + 1] == 0) { - dn->node_page = pages[idx]; + dn->node_folio = folios[idx]; dn->nid = nid[idx]; err = truncate_node(dn); if (err) goto fail; } else { - f2fs_put_page(pages[idx], 1); + f2fs_folio_put(folios[idx], true); } offset[idx]++; offset[idx + 1] = 0; idx--; fail: for (i = idx; i >= 0; i--) - f2fs_put_page(pages[i], 1); + f2fs_folio_put(folios[i], true); trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); @@ -1119,26 +1133,33 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) unsigned int nofs = 0; struct f2fs_inode *ri; struct dnode_of_data dn; - struct page *page; + struct folio *folio; trace_f2fs_truncate_inode_blocks_enter(inode, from); level = get_node_path(inode, from, offset, noffset); - if (level < 0) { + if (level <= 0) { + if (!level) { + level = -EFSCORRUPTED; + f2fs_err(sbi, "%s: inode ino=%lx has corrupted node block, from:%lu addrs:%u", + __func__, inode->i_ino, + from, ADDRS_PER_INODE(inode)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + } trace_f2fs_truncate_inode_blocks_exit(inode, level); return level; } - page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) { - trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); - return PTR_ERR(page); + folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(folio)) { + trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(folio)); + return PTR_ERR(folio); } - set_new_dnode(&dn, inode, page, NULL, 0); - unlock_page(page); + set_new_dnode(&dn, inode, folio, NULL, 0); + folio_unlock(folio); - ri = F2FS_INODE(page); + ri = F2FS_INODE(&folio->page); switch (level) { case 0: case 1: @@ -1167,7 +1188,7 @@ int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from) skip_partial: while (cont) { - dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]); + dn.nid = get_nid(&folio->page, offset[0], true); switch (offset[0]) { case NODE_DIR1_BLOCK: case NODE_DIR2_BLOCK: @@ -1188,7 +1209,7 @@ skip_partial: BUG(); } if (err == -ENOENT) { - set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); + set_sbi_flag(F2FS_F_SB(folio), SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); f2fs_err_ratelimited(sbi, "truncate node fail, ino:%lu, nid:%u, " @@ -1199,21 +1220,18 @@ skip_partial: } if (err < 0) goto fail; - if (offset[1] == 0 && - ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { - lock_page(page); - BUG_ON(page->mapping != NODE_MAPPING(sbi)); - f2fs_wait_on_page_writeback(page, NODE, true, true); - ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; - set_page_dirty(page); - unlock_page(page); + if (offset[1] == 0 && get_nid(&folio->page, offset[0], true)) { + folio_lock(folio); + BUG_ON(!is_node_folio(folio)); + set_nid(folio, offset[0], 0, true); + folio_unlock(folio); } offset[1] = 0; offset[0]++; nofs += err; } fail: - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); trace_f2fs_truncate_inode_blocks_exit(inode, err); return err > 0 ? 0 : err; } @@ -1224,20 +1242,20 @@ int f2fs_truncate_xattr_node(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t nid = F2FS_I(inode)->i_xattr_nid; struct dnode_of_data dn; - struct page *npage; + struct folio *nfolio; int err; if (!nid) return 0; - npage = f2fs_get_node_page(sbi, nid); - if (IS_ERR(npage)) - return PTR_ERR(npage); + nfolio = f2fs_get_xnode_folio(sbi, nid); + if (IS_ERR(nfolio)) + return PTR_ERR(nfolio); - set_new_dnode(&dn, inode, NULL, npage, nid); + set_new_dnode(&dn, inode, NULL, nfolio, nid); err = truncate_node(&dn); if (err) { - f2fs_put_page(npage, 1); + f2fs_folio_put(nfolio, true); return err; } @@ -1267,8 +1285,9 @@ int f2fs_remove_inode_page(struct inode *inode) } /* remove potential inline_data blocks */ - if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) + if (!IS_DEVICE_ALIASING(inode) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode))) f2fs_truncate_data_blocks_range(&dn, 1); /* 0 is possible, after f2fs_new_inode() has failed */ @@ -1293,30 +1312,30 @@ int f2fs_remove_inode_page(struct inode *inode) return 0; } -struct page *f2fs_new_inode_page(struct inode *inode) +struct folio *f2fs_new_inode_folio(struct inode *inode) { struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - /* caller should f2fs_put_page(page, 1); */ - return f2fs_new_node_page(&dn, 0); + /* caller should f2fs_folio_put(folio, true); */ + return f2fs_new_node_folio(&dn, 0); } -struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) +struct folio *f2fs_new_node_folio(struct dnode_of_data *dn, unsigned int ofs) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct node_info new_ni; - struct page *page; + struct folio *folio; int err; if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); - if (!page) - return ERR_PTR(-ENOMEM); + folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), dn->nid, false); + if (IS_ERR(folio)) + return folio; if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs)))) goto fail; @@ -1331,7 +1350,12 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) err = -EFSCORRUPTED; dec_valid_node_count(sbi, dn->inode, !ofs); set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + f2fs_warn_ratelimited(sbi, + "f2fs_new_node_folio: inconsistent nat entry, " + "ino:%u, nid:%u, blkaddr:%u, ver:%u, flag:%u", + new_ni.ino, new_ni.nid, new_ni.blk_addr, + new_ni.version, new_ni.flag); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_NAT); goto fail; } #endif @@ -1342,12 +1366,12 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) new_ni.version = 0; set_node_addr(sbi, &new_ni, NEW_ADDR, false); - f2fs_wait_on_page_writeback(page, NODE, true, true); - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); - set_cold_node(page, S_ISDIR(dn->inode->i_mode)); - if (!PageUptodate(page)) - SetPageUptodate(page); - if (set_page_dirty(page)) + f2fs_folio_wait_writeback(folio, NODE, true, true); + fill_node_footer(&folio->page, dn->nid, dn->inode->i_ino, ofs, true); + set_cold_node(&folio->page, S_ISDIR(dn->inode->i_mode)); + if (!folio_test_uptodate(folio)) + folio_mark_uptodate(folio); + if (folio_mark_dirty(folio)) dn->node_changed = true; if (f2fs_has_xattr_block(ofs)) @@ -1355,35 +1379,34 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) if (ofs == 0) inc_valid_inode_count(sbi); - return page; + return folio; fail: - clear_node_page_dirty(page); - f2fs_put_page(page, 1); + clear_node_folio_dirty(folio); + f2fs_folio_put(folio, true); return ERR_PTR(err); } /* * Caller should do after getting the following values. - * 0: f2fs_put_page(page, 0) - * LOCKED_PAGE or error: f2fs_put_page(page, 1) + * 0: f2fs_folio_put(folio, false) + * LOCKED_PAGE or error: f2fs_folio_put(folio, true) */ -static int read_node_page(struct page *page, blk_opf_t op_flags) +static int read_node_folio(struct folio *folio, blk_opf_t op_flags) { - struct folio *folio = page_folio(page); - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, .type = NODE, .op = REQ_OP_READ, .op_flags = op_flags, - .page = page, + .page = &folio->page, .encrypted_page = NULL, }; int err; if (folio_test_uptodate(folio)) { - if (!f2fs_inode_chksum_verify(sbi, page)) { + if (!f2fs_inode_chksum_verify(sbi, folio)) { folio_clear_uptodate(folio); return -EFSBADCRC; } @@ -1415,7 +1438,7 @@ static int read_node_page(struct page *page, blk_opf_t op_flags) */ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { - struct page *apage; + struct folio *afolio; int err; if (!nid) @@ -1423,22 +1446,45 @@ void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) if (f2fs_check_nid_range(sbi, nid)) return; - apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid); - if (apage) + afolio = xa_load(&NODE_MAPPING(sbi)->i_pages, nid); + if (afolio) return; - apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); - if (!apage) + afolio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), nid, false); + if (IS_ERR(afolio)) return; - err = read_node_page(apage, REQ_RAHEAD); - f2fs_put_page(apage, err ? 1 : 0); + err = read_node_folio(afolio, REQ_RAHEAD); + f2fs_folio_put(afolio, err ? true : false); } -static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, - struct page *parent, int start) +static int sanity_check_node_footer(struct f2fs_sb_info *sbi, + struct folio *folio, pgoff_t nid, + enum node_type ntype) { - struct page *page; + struct page *page = &folio->page; + + if (unlikely(nid != nid_of_node(page) || + (ntype == NODE_TYPE_INODE && !IS_INODE(page)) || + (ntype == NODE_TYPE_XATTR && + !f2fs_has_xattr_block(ofs_of_node(page))) || + time_to_inject(sbi, FAULT_INCONSISTENT_FOOTER))) { + f2fs_warn(sbi, "inconsistent node block, node_type:%d, nid:%lu, " + "node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", + ntype, nid, nid_of_node(page), ino_of_node(page), + ofs_of_node(page), cpver_of_node(page), + next_blkaddr_of_node(folio)); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); + return -EFSCORRUPTED; + } + return 0; +} + +static struct folio *__get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid, + struct folio *parent, int start, enum node_type ntype) +{ + struct folio *folio; int err; if (!nid) @@ -1446,75 +1492,76 @@ static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid, if (f2fs_check_nid_range(sbi, nid)) return ERR_PTR(-EINVAL); repeat: - page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false); - if (!page) - return ERR_PTR(-ENOMEM); + folio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), nid, false); + if (IS_ERR(folio)) + return folio; - err = read_node_page(page, 0); - if (err < 0) { + err = read_node_folio(folio, 0); + if (err < 0) goto out_put_err; - } else if (err == LOCKED_PAGE) { - err = 0; + if (err == LOCKED_PAGE) goto page_hit; - } if (parent) f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE); - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { - f2fs_put_page(page, 1); + if (unlikely(!is_node_folio(folio))) { + f2fs_folio_put(folio, true); goto repeat; } - if (unlikely(!PageUptodate(page))) { + if (unlikely(!folio_test_uptodate(folio))) { err = -EIO; goto out_err; } - if (!f2fs_inode_chksum_verify(sbi, page)) { + if (!f2fs_inode_chksum_verify(sbi, folio)) { err = -EFSBADCRC; goto out_err; } page_hit: - if (likely(nid == nid_of_node(page))) - return page; - - f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]", - nid, nid_of_node(page), ino_of_node(page), - ofs_of_node(page), cpver_of_node(page), - next_blkaddr_of_node(page)); - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); - err = -EFSCORRUPTED; + err = sanity_check_node_footer(sbi, folio, nid, ntype); + if (!err) + return folio; out_err: - ClearPageUptodate(page); + folio_clear_uptodate(folio); out_put_err: - /* ENOENT comes from read_node_page which is not an error. */ + /* ENOENT comes from read_node_folio which is not an error. */ if (err != -ENOENT) - f2fs_handle_page_eio(sbi, page_folio(page), NODE); - f2fs_put_page(page, 1); + f2fs_handle_page_eio(sbi, folio, NODE); + f2fs_folio_put(folio, true); return ERR_PTR(err); } -struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) +struct folio *f2fs_get_node_folio(struct f2fs_sb_info *sbi, pgoff_t nid) +{ + return __get_node_folio(sbi, nid, NULL, 0, NODE_TYPE_REGULAR); +} + +struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino) +{ + return __get_node_folio(sbi, ino, NULL, 0, NODE_TYPE_INODE); +} + +struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid) { - return __get_node_page(sbi, nid, NULL, 0); + return __get_node_folio(sbi, xnid, NULL, 0, NODE_TYPE_XATTR); } -struct page *f2fs_get_node_page_ra(struct page *parent, int start) +static struct folio *f2fs_get_node_folio_ra(struct folio *parent, int start) { - struct f2fs_sb_info *sbi = F2FS_P_SB(parent); - nid_t nid = get_nid(parent, start, false); + struct f2fs_sb_info *sbi = F2FS_F_SB(parent); + nid_t nid = get_nid(&parent->page, start, false); - return __get_node_page(sbi, nid, parent, start); + return __get_node_folio(sbi, nid, parent, start, NODE_TYPE_REGULAR); } static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; - struct page *page; + struct folio *folio; int ret; /* should flush inline_data before evict_inode */ @@ -1522,36 +1569,36 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) if (!inode) return; - page = f2fs_pagecache_get_page(inode->i_mapping, 0, + folio = f2fs_filemap_get_folio(inode->i_mapping, 0, FGP_LOCK|FGP_NOWAIT, 0); - if (!page) + if (IS_ERR(folio)) goto iput_out; - if (!PageUptodate(page)) - goto page_out; + if (!folio_test_uptodate(folio)) + goto folio_out; - if (!PageDirty(page)) - goto page_out; + if (!folio_test_dirty(folio)) + goto folio_out; - if (!clear_page_dirty_for_io(page)) - goto page_out; + if (!folio_clear_dirty_for_io(folio)) + goto folio_out; - ret = f2fs_write_inline_data(inode, page_folio(page)); + ret = f2fs_write_inline_data(inode, folio); inode_dec_dirty_pages(inode); f2fs_remove_dirty_inode(inode); if (ret) - set_page_dirty(page); -page_out: - f2fs_put_page(page, 1); + folio_mark_dirty(folio); +folio_out: + f2fs_folio_put(folio, true); iput_out: iput(inode); } -static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) +static struct folio *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) { pgoff_t index; struct folio_batch fbatch; - struct page *last_page = NULL; + struct folio *last_folio = NULL; int nr_folios; folio_batch_init(&fbatch); @@ -1563,62 +1610,61 @@ static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino) int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; if (unlikely(f2fs_cp_error(sbi))) { - f2fs_put_page(last_page, 0); + f2fs_folio_put(last_folio, false); folio_batch_release(&fbatch); return ERR_PTR(-EIO); } - if (!IS_DNODE(page) || !is_cold_node(page)) + if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) continue; - if (ino_of_node(page) != ino) + if (ino_of_node(&folio->page) != ino) continue; - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { + if (unlikely(!is_node_folio(folio))) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (ino_of_node(page) != ino) + if (ino_of_node(&folio->page) != ino) goto continue_unlock; - if (!PageDirty(page)) { + if (!folio_test_dirty(folio)) { /* someone wrote it for us */ goto continue_unlock; } - if (last_page) - f2fs_put_page(last_page, 0); + if (last_folio) + f2fs_folio_put(last_folio, false); - get_page(page); - last_page = page; - unlock_page(page); + folio_get(folio); + last_folio = folio; + folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); } - return last_page; + return last_folio; } -static int __write_node_page(struct page *page, bool atomic, bool *submitted, +static bool __write_node_folio(struct folio *folio, bool atomic, bool *submitted, struct writeback_control *wbc, bool do_balance, enum iostat_type io_type, unsigned int *seq_id) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); - struct folio *folio = page_folio(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); nid_t nid; struct node_info ni; struct f2fs_io_info fio = { .sbi = sbi, - .ino = ino_of_node(page), + .ino = ino_of_node(&folio->page), .type = NODE, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), - .page = page, + .page = &folio->page, .encrypted_page = NULL, .submitted = 0, .io_type = io_type, @@ -1635,7 +1681,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, folio_clear_uptodate(folio); dec_page_count(sbi, F2FS_DIRTY_NODES); folio_unlock(folio); - return 0; + return true; } if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) @@ -1643,22 +1689,17 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) && wbc->sync_mode == WB_SYNC_NONE && - IS_DNODE(page) && is_cold_node(page)) + IS_DNODE(&folio->page) && is_cold_node(&folio->page)) goto redirty_out; /* get old block addr of this node page */ - nid = nid_of_node(page); + nid = nid_of_node(&folio->page); f2fs_bug_on(sbi, folio->index != nid); if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; - if (wbc->for_reclaim) { - if (!f2fs_down_read_trylock(&sbi->node_write)) - goto redirty_out; - } else { - f2fs_down_read(&sbi->node_write); - } + f2fs_down_read(&sbi->node_write); /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { @@ -1666,7 +1707,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); folio_unlock(folio); - return 0; + return true; } if (__is_valid_data_blkaddr(ni.blk_addr) && @@ -1680,8 +1721,8 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, fio.op_flags |= REQ_PREFLUSH | REQ_FUA; /* should add to global list before clearing PAGECACHE status */ - if (f2fs_in_warm_node_list(sbi, page)) { - seq = f2fs_add_fsync_node_entry(sbi, page); + if (f2fs_in_warm_node_list(sbi, folio)) { + seq = f2fs_add_fsync_node_entry(sbi, folio); if (seq_id) *seq_id = seq; } @@ -1690,15 +1731,10 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, fio.old_blkaddr = ni.blk_addr; f2fs_do_write_node_page(nid, &fio); - set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); + set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(&folio->page)); dec_page_count(sbi, F2FS_DIRTY_NODES); f2fs_up_read(&sbi->node_write); - if (wbc->for_reclaim) { - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); - submitted = NULL; - } - folio_unlock(folio); if (unlikely(f2fs_cp_error(sbi))) { @@ -1710,14 +1746,15 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (do_balance) f2fs_balance_fs(sbi, false); - return 0; + return true; redirty_out: folio_redirty_for_writepage(wbc, folio); - return AOP_WRITEPAGE_ACTIVATE; + folio_unlock(folio); + return false; } -int f2fs_move_node_page(struct page *node_page, int gc_type) +int f2fs_move_node_folio(struct folio *node_folio, int gc_type) { int err = 0; @@ -1725,43 +1762,33 @@ int f2fs_move_node_page(struct page *node_page, int gc_type) struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = 1, - .for_reclaim = 0, }; - f2fs_wait_on_page_writeback(node_page, NODE, true, true); + f2fs_folio_wait_writeback(node_folio, NODE, true, true); - set_page_dirty(node_page); + folio_mark_dirty(node_folio); - if (!clear_page_dirty_for_io(node_page)) { + if (!folio_clear_dirty_for_io(node_folio)) { err = -EAGAIN; goto out_page; } - if (__write_node_page(node_page, false, NULL, - &wbc, false, FS_GC_NODE_IO, NULL)) { + if (!__write_node_folio(node_folio, false, NULL, + &wbc, false, FS_GC_NODE_IO, NULL)) err = -EAGAIN; - unlock_page(node_page); - } goto release_page; } else { /* set page dirty and write it */ - if (!folio_test_writeback(page_folio(node_page))) - set_page_dirty(node_page); + if (!folio_test_writeback(node_folio)) + folio_mark_dirty(node_folio); } out_page: - unlock_page(node_page); + folio_unlock(node_folio); release_page: - f2fs_put_page(node_page, 0); + f2fs_folio_put(node_folio, false); return err; } -static int f2fs_write_node_page(struct page *page, - struct writeback_control *wbc) -{ - return __write_node_page(page, false, NULL, wbc, false, - FS_NODE_IO, NULL); -} - int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic, unsigned int *seq_id) @@ -1769,16 +1796,16 @@ int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, pgoff_t index; struct folio_batch fbatch; int ret = 0; - struct page *last_page = NULL; + struct folio *last_folio = NULL; bool marked = false; nid_t ino = inode->i_ino; int nr_folios; int nwritten = 0; if (atomic) { - last_page = last_fsync_dnode(sbi, ino); - if (IS_ERR_OR_NULL(last_page)) - return PTR_ERR_OR_ZERO(last_page); + last_folio = last_fsync_dnode(sbi, ino); + if (IS_ERR_OR_NULL(last_folio)) + return PTR_ERR_OR_ZERO(last_folio); } retry: folio_batch_init(&fbatch); @@ -1790,96 +1817,94 @@ retry: int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; bool submitted = false; if (unlikely(f2fs_cp_error(sbi))) { - f2fs_put_page(last_page, 0); + f2fs_folio_put(last_folio, false); folio_batch_release(&fbatch); ret = -EIO; goto out; } - if (!IS_DNODE(page) || !is_cold_node(page)) + if (!IS_DNODE(&folio->page) || !is_cold_node(&folio->page)) continue; - if (ino_of_node(page) != ino) + if (ino_of_node(&folio->page) != ino) continue; - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { + if (unlikely(!is_node_folio(folio))) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (ino_of_node(page) != ino) + if (ino_of_node(&folio->page) != ino) goto continue_unlock; - if (!PageDirty(page) && page != last_page) { + if (!folio_test_dirty(folio) && folio != last_folio) { /* someone wrote it for us */ goto continue_unlock; } - f2fs_wait_on_page_writeback(page, NODE, true, true); + f2fs_folio_wait_writeback(folio, NODE, true, true); - set_fsync_mark(page, 0); - set_dentry_mark(page, 0); + set_fsync_mark(&folio->page, 0); + set_dentry_mark(&folio->page, 0); - if (!atomic || page == last_page) { - set_fsync_mark(page, 1); + if (!atomic || folio == last_folio) { + set_fsync_mark(&folio->page, 1); percpu_counter_inc(&sbi->rf_node_block_count); - if (IS_INODE(page)) { + if (IS_INODE(&folio->page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) - f2fs_update_inode(inode, page); - set_dentry_mark(page, + f2fs_update_inode(inode, folio); + set_dentry_mark(&folio->page, f2fs_need_dentry_mark(sbi, ino)); } /* may be written by other thread */ - if (!PageDirty(page)) - set_page_dirty(page); + if (!folio_test_dirty(folio)) + folio_mark_dirty(folio); } - if (!clear_page_dirty_for_io(page)) + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - ret = __write_node_page(page, atomic && - page == last_page, + if (!__write_node_folio(folio, atomic && + folio == last_folio, &submitted, wbc, true, - FS_NODE_IO, seq_id); - if (ret) { - unlock_page(page); - f2fs_put_page(last_page, 0); - break; - } else if (submitted) { - nwritten++; + FS_NODE_IO, seq_id)) { + f2fs_folio_put(last_folio, false); + folio_batch_release(&fbatch); + ret = -EIO; + goto out; } + if (submitted) + nwritten++; - if (page == last_page) { - f2fs_put_page(page, 0); + if (folio == last_folio) { + f2fs_folio_put(folio, false); + folio_batch_release(&fbatch); marked = true; - break; + goto out; } } folio_batch_release(&fbatch); cond_resched(); - - if (ret || marked) - break; } - if (!ret && atomic && !marked) { + if (atomic && !marked) { f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx", - ino, page_folio(last_page)->index); - lock_page(last_page); - f2fs_wait_on_page_writeback(last_page, NODE, true, true); - set_page_dirty(last_page); - unlock_page(last_page); + ino, last_folio->index); + folio_lock(last_folio); + f2fs_folio_wait_writeback(last_folio, NODE, true, true); + folio_mark_dirty(last_folio); + folio_unlock(last_folio); goto retry; } out: if (nwritten) f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE); - return ret ? -EIO : 0; + return ret; } static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) @@ -1906,18 +1931,18 @@ static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data) return 1; } -static bool flush_dirty_inode(struct page *page) +static bool flush_dirty_inode(struct folio *folio) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); struct inode *inode; - nid_t ino = ino_of_node(page); + nid_t ino = ino_of_node(&folio->page); inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL); if (!inode) return false; - f2fs_update_inode(inode, page); - unlock_page(page); + f2fs_update_inode(inode, folio); + folio_unlock(folio); iput(inode); return true; @@ -1937,32 +1962,27 @@ void f2fs_flush_inline_data(struct f2fs_sb_info *sbi) int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; - if (!IS_INODE(page)) + if (!IS_INODE(&folio->page)) continue; - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { -continue_unlock: - unlock_page(page); - continue; - } - - if (!PageDirty(page)) { - /* someone wrote it for us */ - goto continue_unlock; - } + if (unlikely(!is_node_folio(folio))) + goto unlock; + if (!folio_test_dirty(folio)) + goto unlock; /* flush inline_data, if it's async context. */ - if (page_private_inline(page)) { - clear_page_private_inline(page); - unlock_page(page); - flush_inline_data(sbi, ino_of_node(page)); + if (page_private_inline(&folio->page)) { + clear_page_private_inline(&folio->page); + folio_unlock(folio); + flush_inline_data(sbi, ino_of_node(&folio->page)); continue; } - unlock_page(page); +unlock: + folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); @@ -1991,7 +2011,7 @@ next_step: int i; for (i = 0; i < nr_folios; i++) { - struct page *page = &fbatch.folios[i]->page; + struct folio *folio = fbatch.folios[i]; bool submitted = false; /* give a priority to WB_SYNC threads */ @@ -2007,27 +2027,27 @@ next_step: * 1. dentry dnodes * 2. file dnodes */ - if (step == 0 && IS_DNODE(page)) + if (step == 0 && IS_DNODE(&folio->page)) continue; - if (step == 1 && (!IS_DNODE(page) || - is_cold_node(page))) + if (step == 1 && (!IS_DNODE(&folio->page) || + is_cold_node(&folio->page))) continue; - if (step == 2 && (!IS_DNODE(page) || - !is_cold_node(page))) + if (step == 2 && (!IS_DNODE(&folio->page) || + !is_cold_node(&folio->page))) continue; lock_node: if (wbc->sync_mode == WB_SYNC_ALL) - lock_page(page); - else if (!trylock_page(page)) + folio_lock(folio); + else if (!folio_trylock(folio)) continue; - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { + if (unlikely(!is_node_folio(folio))) { continue_unlock: - unlock_page(page); + folio_unlock(folio); continue; } - if (!PageDirty(page)) { + if (!folio_test_dirty(folio)) { /* someone wrote it for us */ goto continue_unlock; } @@ -2037,30 +2057,32 @@ continue_unlock: goto write_node; /* flush inline_data */ - if (page_private_inline(page)) { - clear_page_private_inline(page); - unlock_page(page); - flush_inline_data(sbi, ino_of_node(page)); + if (page_private_inline(&folio->page)) { + clear_page_private_inline(&folio->page); + folio_unlock(folio); + flush_inline_data(sbi, ino_of_node(&folio->page)); goto lock_node; } /* flush dirty inode */ - if (IS_INODE(page) && flush_dirty_inode(page)) + if (IS_INODE(&folio->page) && flush_dirty_inode(folio)) goto lock_node; write_node: - f2fs_wait_on_page_writeback(page, NODE, true, true); + f2fs_folio_wait_writeback(folio, NODE, true, true); - if (!clear_page_dirty_for_io(page)) + if (!folio_clear_dirty_for_io(folio)) goto continue_unlock; - set_fsync_mark(page, 0); - set_dentry_mark(page, 0); + set_fsync_mark(&folio->page, 0); + set_dentry_mark(&folio->page, 0); - ret = __write_node_page(page, false, &submitted, - wbc, do_balance, io_type, NULL); - if (ret) - unlock_page(page); - else if (submitted) + if (!__write_node_folio(folio, false, &submitted, + wbc, do_balance, io_type, NULL)) { + folio_batch_release(&fbatch); + ret = -EIO; + goto out; + } + if (submitted) nwritten++; if (--wbc->nr_to_write == 0) @@ -2095,12 +2117,13 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, unsigned int seq_id) { struct fsync_node_entry *fn; - struct page *page; struct list_head *head = &sbi->fsync_node_list; unsigned long flags; unsigned int cur_seq_id = 0; while (seq_id && cur_seq_id < seq_id) { + struct folio *folio; + spin_lock_irqsave(&sbi->fsync_node_lock, flags); if (list_empty(head)) { spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); @@ -2112,13 +2135,13 @@ int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, break; } cur_seq_id = fn->seq_id; - page = fn->page; - get_page(page); + folio = fn->folio; + folio_get(folio); spin_unlock_irqrestore(&sbi->fsync_node_lock, flags); - f2fs_wait_on_page_writeback(page, NODE, true, false); + f2fs_folio_wait_writeback(folio, NODE, true, false); - put_page(page); + folio_put(folio); } return filemap_check_errors(NODE_MAPPING(sbi)); @@ -2193,7 +2216,6 @@ static bool f2fs_dirty_node_folio(struct address_space *mapping, * Structure of the f2fs node operations */ const struct address_space_operations f2fs_node_aops = { - .writepage = f2fs_write_node_page, .writepages = f2fs_write_node_pages, .dirty_folio = f2fs_dirty_node_folio, .invalidate_folio = f2fs_invalidate_folio, @@ -2255,24 +2277,6 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i, } } -bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned int i; - bool ret = true; - - f2fs_down_read(&nm_i->nat_tree_lock); - for (i = 0; i < nm_i->nat_blocks; i++) { - if (!test_bit_le(i, nm_i->nat_block_bitmap)) { - ret = false; - break; - } - } - f2fs_up_read(&nm_i->nat_tree_lock); - - return ret; -} - static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, bool set, bool build) { @@ -2304,7 +2308,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i, *e; struct nat_entry *ne; - int err = -EINVAL; + int err; bool ret = false; /* 0 nid should not be used */ @@ -2318,7 +2322,10 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, i->nid = nid; i->state = FREE_NID; - radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL); + f2fs_bug_on(sbi, err); + + err = -EINVAL; spin_lock(&nm_i->nid_list_lock); @@ -2337,8 +2344,8 @@ static bool add_free_nid(struct f2fs_sb_info *sbi, * - __lookup_nat_cache * - f2fs_add_link * - f2fs_init_inode_metadata - * - f2fs_new_inode_page - * - f2fs_new_node_page + * - f2fs_new_inode_folio + * - f2fs_new_node_folio * - set_node_addr * - f2fs_alloc_nid_done * - __remove_nid_from_list(PREALLOC_NID) @@ -2391,10 +2398,9 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static int scan_nat_page(struct f2fs_sb_info *sbi, - struct page *nat_page, nid_t start_nid) + struct f2fs_nat_block *nat_blk, nid_t start_nid) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid); int i; @@ -2514,13 +2520,14 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, while (1) { if (!test_bit_le(NAT_BLOCK_OFFSET(nid), nm_i->nat_block_bitmap)) { - struct page *page = get_current_nat_page(sbi, nid); + struct folio *folio = get_current_nat_folio(sbi, nid); - if (IS_ERR(page)) { - ret = PTR_ERR(page); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); } else { - ret = scan_nat_page(sbi, page, nid); - f2fs_put_page(page, 1); + ret = scan_nat_page(sbi, folio_address(folio), + nid); + f2fs_folio_put(folio, true); } if (ret) { @@ -2696,18 +2703,18 @@ int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) return nr - nr_shrink; } -int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) +int f2fs_recover_inline_xattr(struct inode *inode, struct folio *folio) { void *src_addr, *dst_addr; size_t inline_size; - struct page *ipage; + struct folio *ifolio; struct f2fs_inode *ri; - ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + ifolio = f2fs_get_inode_folio(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ifolio)) + return PTR_ERR(ifolio); - ri = F2FS_INODE(page); + ri = F2FS_INODE(&folio->page); if (ri->i_inline & F2FS_INLINE_XATTR) { if (!f2fs_has_inline_xattr(inode)) { set_inode_flag(inode, FI_INLINE_XATTR); @@ -2721,15 +2728,15 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct page *page) goto update_inode; } - dst_addr = inline_xattr_addr(inode, ipage); - src_addr = inline_xattr_addr(inode, page); + dst_addr = inline_xattr_addr(inode, ifolio); + src_addr = inline_xattr_addr(inode, folio); inline_size = inline_xattr_size(inode); - f2fs_wait_on_page_writeback(ipage, NODE, true, true); + f2fs_folio_wait_writeback(ifolio, NODE, true, true); memcpy(dst_addr, src_addr, inline_size); update_inode: - f2fs_update_inode(inode, ipage); - f2fs_put_page(ipage, 1); + f2fs_update_inode(inode, ifolio); + f2fs_folio_put(ifolio, true); return 0; } @@ -2740,7 +2747,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) nid_t new_xnid; struct dnode_of_data dn; struct node_info ni; - struct page *xpage; + struct folio *xfolio; int err; if (!prev_xnid) @@ -2751,7 +2758,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) if (err) return err; - f2fs_invalidate_blocks(sbi, ni.blk_addr); + f2fs_invalidate_blocks(sbi, ni.blk_addr, 1); dec_valid_node_count(sbi, inode, false); set_node_addr(sbi, &ni, NULL_ADDR, false); @@ -2761,10 +2768,10 @@ recover_xnid: return -ENOSPC; set_new_dnode(&dn, inode, NULL, NULL, new_xnid); - xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); - if (IS_ERR(xpage)) { + xfolio = f2fs_new_node_folio(&dn, XATTR_NODE_OFFSET); + if (IS_ERR(xfolio)) { f2fs_alloc_nid_failed(sbi, new_xnid); - return PTR_ERR(xpage); + return PTR_ERR(xfolio); } f2fs_alloc_nid_done(sbi, new_xnid); @@ -2772,11 +2779,11 @@ recover_xnid: /* 3: update and set xattr node page dirty */ if (page) { - memcpy(F2FS_NODE(xpage), F2FS_NODE(page), + memcpy(F2FS_NODE(&xfolio->page), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE); - set_page_dirty(xpage); + folio_mark_dirty(xfolio); } - f2fs_put_page(xpage, 1); + f2fs_folio_put(xfolio, true); return 0; } @@ -2786,7 +2793,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct f2fs_inode *src, *dst; nid_t ino = ino_of_node(page); struct node_info old_ni, new_ni; - struct page *ipage; + struct folio *ifolio; int err; err = f2fs_get_node_info(sbi, ino, &old_ni, false); @@ -2796,8 +2803,8 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (unlikely(old_ni.blk_addr != NULL_ADDR)) return -EINVAL; retry: - ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); - if (!ipage) { + ifolio = f2fs_grab_cache_folio(NODE_MAPPING(sbi), ino, false); + if (IS_ERR(ifolio)) { memalloc_retry_wait(GFP_NOFS); goto retry; } @@ -2805,13 +2812,13 @@ retry: /* Should not use this inode from free nid list */ remove_free_nid(sbi, ino); - if (!PageUptodate(ipage)) - SetPageUptodate(ipage); - fill_node_footer(ipage, ino, ino, 0, true); - set_cold_node(ipage, false); + if (!folio_test_uptodate(ifolio)) + folio_mark_uptodate(ifolio); + fill_node_footer(&ifolio->page, ino, ino, 0, true); + set_cold_node(&ifolio->page, false); src = F2FS_INODE(page); - dst = F2FS_INODE(ipage); + dst = F2FS_INODE(&ifolio->page); memcpy(dst, src, offsetof(struct f2fs_inode, i_ext)); dst->i_size = 0; @@ -2847,8 +2854,8 @@ retry: WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR, false); inc_valid_inode_count(sbi); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + folio_mark_dirty(ifolio); + f2fs_folio_put(ifolio, true); return 0; } @@ -2872,17 +2879,17 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true); for (idx = addr; idx < addr + nrpages; idx++) { - struct page *page = f2fs_get_tmp_page(sbi, idx); + struct folio *folio = f2fs_get_tmp_folio(sbi, idx); - if (IS_ERR(page)) - return PTR_ERR(page); + if (IS_ERR(folio)) + return PTR_ERR(folio); - rn = F2FS_NODE(page); + rn = F2FS_NODE(&folio->page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; sum_entry++; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); } invalidate_mapping_pages(META_MAPPING(sbi), addr, @@ -2951,23 +2958,7 @@ add_out: list_add_tail(&nes->set_list, head); } -static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs, - unsigned int valid) -{ - if (valid == 0) { - __set_bit_le(nat_ofs, nm_i->empty_nat_bits); - __clear_bit_le(nat_ofs, nm_i->full_nat_bits); - return; - } - - __clear_bit_le(nat_ofs, nm_i->empty_nat_bits); - if (valid == NAT_ENTRY_PER_BLOCK) - __set_bit_le(nat_ofs, nm_i->full_nat_bits); - else - __clear_bit_le(nat_ofs, nm_i->full_nat_bits); -} - -static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, +static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, struct page *page) { struct f2fs_nm_info *nm_i = NM_I(sbi); @@ -2976,7 +2967,7 @@ static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, int valid = 0; int i = 0; - if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) + if (!enabled_nat_bits(sbi, NULL)) return; if (nat_index == 0) { @@ -2987,36 +2978,17 @@ static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid, if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR) valid++; } - - __update_nat_bits(nm_i, nat_index, valid); -} - -void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) -{ - struct f2fs_nm_info *nm_i = NM_I(sbi); - unsigned int nat_ofs; - - f2fs_down_read(&nm_i->nat_tree_lock); - - for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) { - unsigned int valid = 0, nid_ofs = 0; - - /* handle nid zero due to it should never be used */ - if (unlikely(nat_ofs == 0)) { - valid = 1; - nid_ofs = 1; - } - - for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) { - if (!test_bit_le(nid_ofs, - nm_i->free_nid_bitmap[nat_ofs])) - valid++; - } - - __update_nat_bits(nm_i, nat_ofs, valid); + if (valid == 0) { + __set_bit_le(nat_index, nm_i->empty_nat_bits); + __clear_bit_le(nat_index, nm_i->full_nat_bits); + return; } - f2fs_up_read(&nm_i->nat_tree_lock); + __clear_bit_le(nat_index, nm_i->empty_nat_bits); + if (valid == NAT_ENTRY_PER_BLOCK) + __set_bit_le(nat_index, nm_i->full_nat_bits); + else + __clear_bit_le(nat_index, nm_i->full_nat_bits); } static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, @@ -3035,7 +3007,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, * #1, flush nat entries to journal in current hot data summary block. * #2, flush nat entries to nat page. */ - if ((cpc->reason & CP_UMOUNT) || + if (enabled_nat_bits(sbi, cpc) || !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL)) to_journal = false; @@ -3082,7 +3054,7 @@ static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, if (to_journal) { up_write(&curseg->journal_rwsem); } else { - update_nat_bits(sbi, start_nid, page); + __update_nat_bits(sbi, start_nid, page); f2fs_put_page(page, 1); } @@ -3113,7 +3085,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * during unmount, let's flush nat_bits before checking * nat_cnt[DIRTY_NAT]. */ - if (cpc->reason & CP_UMOUNT) { + if (enabled_nat_bits(sbi, cpc)) { f2fs_down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); f2fs_up_write(&nm_i->nat_tree_lock); @@ -3129,7 +3101,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * entries, remove all entries from journal and merge them * into nat entry set. */ - if (cpc->reason & CP_UMOUNT || + if (enabled_nat_bits(sbi, cpc) || !__has_cursum_space(journal, nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL)) remove_nats_in_journal(sbi); @@ -3166,40 +3138,38 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) __u64 cp_ver = cur_cp_version(ckpt); block_t nat_bits_addr; + if (!enabled_nat_bits(sbi, NULL)) + return 0; + nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8); nm_i->nat_bits = f2fs_kvzalloc(sbi, F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL); if (!nm_i->nat_bits) return -ENOMEM; - nm_i->full_nat_bits = nm_i->nat_bits + 8; - nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; - - if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) - return 0; - nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) - nm_i->nat_bits_blocks; for (i = 0; i < nm_i->nat_bits_blocks; i++) { - struct page *page; + struct folio *folio; - page = f2fs_get_meta_page(sbi, nat_bits_addr++); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_meta_folio(sbi, nat_bits_addr++); + if (IS_ERR(folio)) + return PTR_ERR(folio); memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i), - page_address(page), F2FS_BLKSIZE); - f2fs_put_page(page, 1); + folio_address(folio), F2FS_BLKSIZE); + f2fs_folio_put(folio, true); } cp_ver |= (cur_cp_crc(ckpt) << 32); if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) { - clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)", - cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits)); + disable_nat_bits(sbi, true); return 0; } + nm_i->full_nat_bits = nm_i->nat_bits + 8; + nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes; + f2fs_notice(sbi, "Found nat_bits in checkpoint"); return 0; } @@ -3210,7 +3180,7 @@ static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) unsigned int i = 0; nid_t nid, last_nid; - if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG)) + if (!enabled_nat_bits(sbi, NULL)) return; for (i = 0; i < nm_i->nat_blocks; i++) { @@ -3282,6 +3252,9 @@ static int init_node_manager(struct f2fs_sb_info *sbi) if (!nm_i->nat_bitmap) return -ENOMEM; + if (!test_opt(sbi, NAT_BITS)) + disable_nat_bits(sbi, true); + err = __get_nat_bitmaps(sbi); if (err) return err; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 6aea13024ac1..1446c433b3ec 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -52,6 +52,13 @@ enum { IS_PREALLOC, /* nat entry is preallocated */ }; +/* For node type in __get_node_folio() */ +enum node_type { + NODE_TYPE_REGULAR, + NODE_TYPE_INODE, + NODE_TYPE_XATTR, +}; + /* * For node information */ @@ -248,7 +255,7 @@ static inline nid_t nid_of_node(struct page *node_page) return le32_to_cpu(rn->footer.nid); } -static inline unsigned int ofs_of_node(struct page *node_page) +static inline unsigned int ofs_of_node(const struct page *node_page) { struct f2fs_node *rn = F2FS_NODE(node_page); unsigned flag = le32_to_cpu(rn->footer.flag); @@ -261,9 +268,9 @@ static inline __u64 cpver_of_node(struct page *node_page) return le64_to_cpu(rn->footer.cp_ver); } -static inline block_t next_blkaddr_of_node(struct page *node_page) +static inline block_t next_blkaddr_of_node(struct folio *node_folio) { - struct f2fs_node *rn = F2FS_NODE(node_page); + struct f2fs_node *rn = F2FS_NODE(&node_folio->page); return le32_to_cpu(rn->footer.next_blkaddr); } @@ -342,7 +349,7 @@ static inline bool is_recoverable_dnode(struct page *page) * `- indirect node ((6 + 2N) + (N - 1)(N + 1)) * `- direct node */ -static inline bool IS_DNODE(struct page *node_page) +static inline bool IS_DNODE(const struct page *node_page) { unsigned int ofs = ofs_of_node(node_page); @@ -360,17 +367,17 @@ static inline bool IS_DNODE(struct page *node_page) return true; } -static inline int set_nid(struct page *p, int off, nid_t nid, bool i) +static inline int set_nid(struct folio *folio, int off, nid_t nid, bool i) { - struct f2fs_node *rn = F2FS_NODE(p); + struct f2fs_node *rn = F2FS_NODE(&folio->page); - f2fs_wait_on_page_writeback(p, NODE, true, true); + f2fs_folio_wait_writeback(folio, NODE, true, true); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); else rn->in.nid[off] = cpu_to_le32(nid); - return set_page_dirty(p); + return folio_mark_dirty(folio); } static inline nid_t get_nid(struct page *p, int off, bool i) @@ -389,7 +396,7 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold data pages in page cache */ -static inline int is_node(struct page *page, int type) +static inline int is_node(const struct page *page, int type) { struct f2fs_node *rn = F2FS_NODE(page); return le32_to_cpu(rn->footer.flag) & BIT(type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e4d81b8705d1..51ebed4e1521 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -165,7 +165,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, struct f2fs_dir_entry *de; struct f2fs_filename fname; struct qstr usr_fname; - struct page *page; + struct folio *folio; struct inode *dir, *einode; struct fsync_inode_entry *entry; int err = 0; @@ -187,7 +187,7 @@ static int recover_dentry(struct inode *inode, struct page *ipage, if (err) goto out; retry: - de = __f2fs_find_entry(dir, &fname, &page); + de = __f2fs_find_entry(dir, &fname, &folio); if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out_put; @@ -212,11 +212,11 @@ retry: iput(einode); goto out_put; } - f2fs_delete_entry(de, page, dir, einode); + f2fs_delete_entry(de, folio, dir, einode); iput(einode); goto retry; - } else if (IS_ERR(page)) { - err = PTR_ERR(page); + } else if (IS_ERR(folio)) { + err = PTR_ERR(folio); } else { err = f2fs_add_dentry(dir, &fname, inode, inode->i_ino, inode->i_mode); @@ -226,7 +226,7 @@ retry: goto out; out_put: - f2fs_put_page(page, 0); + f2fs_folio_put(folio, false); out: if (file_enc_name(inode)) name = "<encrypted>"; @@ -358,33 +358,34 @@ static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr, block_t *blkaddr_fast, bool *is_detecting) { unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; - struct page *page = NULL; int i; if (!*is_detecting) return 0; for (i = 0; i < 2; i++) { + struct folio *folio; + if (!f2fs_is_valid_blkaddr(sbi, *blkaddr_fast, META_POR)) { *is_detecting = false; return 0; } - page = f2fs_get_tmp_page(sbi, *blkaddr_fast); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, *blkaddr_fast); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!is_recoverable_dnode(page)) { - f2fs_put_page(page, 1); + if (!is_recoverable_dnode(&folio->page)) { + f2fs_folio_put(folio, true); *is_detecting = false; return 0; } ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, *blkaddr_fast, - next_blkaddr_of_node(page)); + next_blkaddr_of_node(folio)); - *blkaddr_fast = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + *blkaddr_fast = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); f2fs_ra_meta_pages_cond(sbi, *blkaddr_fast, ra_blocks); } @@ -401,7 +402,6 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, bool check_only) { struct curseg_info *curseg; - struct page *page = NULL; block_t blkaddr, blkaddr_fast; bool is_detecting = true; int err = 0; @@ -413,33 +413,35 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, while (1) { struct fsync_inode_entry *entry; + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) return 0; - page = f2fs_get_tmp_page(sbi, blkaddr); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, blkaddr); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); break; } - if (!is_recoverable_dnode(page)) { - f2fs_put_page(page, 1); + if (!is_recoverable_dnode(&folio->page)) { + f2fs_folio_put(folio, true); break; } - if (!is_fsync_dnode(page)) + if (!is_fsync_dnode(&folio->page)) goto next; - entry = get_fsync_inode(head, ino_of_node(page)); + entry = get_fsync_inode(head, ino_of_node(&folio->page)); if (!entry) { bool quota_inode = false; if (!check_only && - IS_INODE(page) && is_dent_dnode(page)) { - err = f2fs_recover_inode_page(sbi, page); + IS_INODE(&folio->page) && + is_dent_dnode(&folio->page)) { + err = f2fs_recover_inode_page(sbi, &folio->page); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } quota_inode = true; @@ -449,24 +451,24 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, * CP | dnode(F) | inode(DF) * For this case, we should not give up now. */ - entry = add_fsync_inode(sbi, head, ino_of_node(page), + entry = add_fsync_inode(sbi, head, ino_of_node(&folio->page), quota_inode); if (IS_ERR(entry)) { err = PTR_ERR(entry); if (err == -ENOENT) goto next; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } } entry->blkaddr = blkaddr; - if (IS_INODE(page) && is_dent_dnode(page)) + if (IS_INODE(&folio->page) && is_dent_dnode(&folio->page)) entry->last_dentry = blkaddr; next: /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + blkaddr = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); err = sanity_check_node_chain(sbi, blkaddr, &blkaddr_fast, &is_detecting); @@ -492,7 +494,7 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); struct f2fs_summary_block *sum_node; struct f2fs_summary sum; - struct page *sum_page, *node_page; + struct folio *sum_folio, *node_folio; struct dnode_of_data tdn = *dn; nid_t ino, nid; struct inode *inode; @@ -514,18 +516,18 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, } } - sum_page = f2fs_get_sum_page(sbi, segno); - if (IS_ERR(sum_page)) - return PTR_ERR(sum_page); - sum_node = (struct f2fs_summary_block *)page_address(sum_page); + sum_folio = f2fs_get_sum_folio(sbi, segno); + if (IS_ERR(sum_folio)) + return PTR_ERR(sum_folio); + sum_node = folio_address(sum_folio); sum = sum_node->entries[blkoff]; - f2fs_put_page(sum_page, 1); + f2fs_folio_put(sum_folio, true); got_it: /* Use the locked dnode page and inode */ nid = le32_to_cpu(sum.nid); ofs_in_node = le16_to_cpu(sum.ofs_in_node); - max_addrs = ADDRS_PER_PAGE(dn->node_page, dn->inode); + max_addrs = ADDRS_PER_PAGE(&dn->node_folio->page, dn->inode); if (ofs_in_node >= max_addrs) { f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u", ofs_in_node, dn->inode->i_ino, nid, max_addrs); @@ -535,9 +537,9 @@ got_it: if (dn->inode->i_ino == nid) { tdn.nid = nid; - if (!dn->inode_page_locked) - lock_page(dn->inode_page); - tdn.node_page = dn->inode_page; + if (!dn->inode_folio_locked) + folio_lock(dn->inode_folio); + tdn.node_folio = dn->inode_folio; tdn.ofs_in_node = ofs_in_node; goto truncate_out; } else if (dn->nid == nid) { @@ -546,13 +548,13 @@ got_it: } /* Get the node page */ - node_page = f2fs_get_node_page(sbi, nid); - if (IS_ERR(node_page)) - return PTR_ERR(node_page); + node_folio = f2fs_get_node_folio(sbi, nid); + if (IS_ERR(node_folio)) + return PTR_ERR(node_folio); - offset = ofs_of_node(node_page); - ino = ino_of_node(node_page); - f2fs_put_page(node_page, 1); + offset = ofs_of_node(&node_folio->page); + ino = ino_of_node(&node_folio->page); + f2fs_folio_put(node_folio, true); if (ino != dn->inode->i_ino) { int ret; @@ -578,8 +580,8 @@ got_it: * if inode page is locked, unlock temporarily, but its reference * count keeps alive. */ - if (ino == dn->inode->i_ino && dn->inode_page_locked) - unlock_page(dn->inode_page); + if (ino == dn->inode->i_ino && dn->inode_folio_locked) + folio_unlock(dn->inode_folio); set_new_dnode(&tdn, inode, NULL, NULL, 0); if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE)) @@ -592,15 +594,15 @@ got_it: out: if (ino != dn->inode->i_ino) iput(inode); - else if (dn->inode_page_locked) - lock_page(dn->inode_page); + else if (dn->inode_folio_locked) + folio_lock(dn->inode_folio); return 0; truncate_out: if (f2fs_data_blkaddr(&tdn) == blkaddr) f2fs_truncate_data_blocks_range(&tdn, 1); - if (dn->inode->i_ino == nid && !dn->inode_page_locked) - unlock_page(dn->inode_page); + if (dn->inode->i_ino == nid && !dn->inode_folio_locked) + folio_unlock(dn->inode_folio); return 0; } @@ -618,7 +620,7 @@ static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn) } static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, - struct page *page) + struct folio *folio) { struct dnode_of_data dn; struct node_info ni; @@ -626,19 +628,19 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, int err = 0, recovered = 0; /* step 1: recover xattr */ - if (IS_INODE(page)) { - err = f2fs_recover_inline_xattr(inode, page); + if (IS_INODE(&folio->page)) { + err = f2fs_recover_inline_xattr(inode, folio); if (err) goto out; - } else if (f2fs_has_xattr_block(ofs_of_node(page))) { - err = f2fs_recover_xattr_data(inode, page); + } else if (f2fs_has_xattr_block(ofs_of_node(&folio->page))) { + err = f2fs_recover_xattr_data(inode, &folio->page); if (!err) recovered++; goto out; } /* step 2: recover inline data */ - err = f2fs_recover_inline_data(inode, page); + err = f2fs_recover_inline_data(inode, folio); if (err) { if (err == 1) err = 0; @@ -646,8 +648,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* step 3: recover data indices */ - start = f2fs_start_bidx_of_node(ofs_of_node(page), inode); - end = start + ADDRS_PER_PAGE(page, inode); + start = f2fs_start_bidx_of_node(ofs_of_node(&folio->page), inode); + end = start + ADDRS_PER_PAGE(&folio->page, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); retry_dn: @@ -660,18 +662,18 @@ retry_dn: goto out; } - f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); + f2fs_folio_wait_writeback(dn.node_folio, NODE, true, true); err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) goto err; - f2fs_bug_on(sbi, ni.ino != ino_of_node(page)); + f2fs_bug_on(sbi, ni.ino != ino_of_node(&folio->page)); - if (ofs_of_node(dn.node_page) != ofs_of_node(page)) { + if (ofs_of_node(&dn.node_folio->page) != ofs_of_node(&folio->page)) { f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u", - inode->i_ino, ofs_of_node(dn.node_page), - ofs_of_node(page)); + inode->i_ino, ofs_of_node(&dn.node_folio->page), + ofs_of_node(&folio->page)); err = -EFSCORRUPTED; f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER); goto err; @@ -681,7 +683,7 @@ retry_dn: block_t src, dest; src = f2fs_data_blkaddr(&dn); - dest = data_blkaddr(dn.inode, page, dn.ofs_in_node); + dest = data_blkaddr(dn.inode, folio, dn.ofs_in_node); if (__is_valid_data_blkaddr(src) && !f2fs_is_valid_blkaddr(sbi, src, META_POR)) { @@ -756,10 +758,10 @@ retry_prev: } } - copy_node_footer(dn.node_page, page); - fill_node_footer(dn.node_page, dn.nid, ni.ino, - ofs_of_node(page), false); - set_page_dirty(dn.node_page); + copy_node_footer(&dn.node_folio->page, &folio->page); + fill_node_footer(&dn.node_folio->page, dn.nid, ni.ino, + ofs_of_node(&folio->page), false); + folio_mark_dirty(dn.node_folio); err: f2fs_put_dnode(&dn); out: @@ -773,7 +775,6 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, struct list_head *tmp_inode_list, struct list_head *dir_list) { struct curseg_info *curseg; - struct page *page = NULL; int err = 0; block_t blkaddr; unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; @@ -784,22 +785,23 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, while (1) { struct fsync_inode_entry *entry; + struct folio *folio; if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - page = f2fs_get_tmp_page(sbi, blkaddr); - if (IS_ERR(page)) { - err = PTR_ERR(page); + folio = f2fs_get_tmp_folio(sbi, blkaddr); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); break; } - if (!is_recoverable_dnode(page)) { - f2fs_put_page(page, 1); + if (!is_recoverable_dnode(&folio->page)) { + f2fs_folio_put(folio, true); break; } - entry = get_fsync_inode(inode_list, ino_of_node(page)); + entry = get_fsync_inode(inode_list, ino_of_node(&folio->page)); if (!entry) goto next; /* @@ -807,23 +809,23 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, * In this case, we can lose the latest inode(x). * So, call recover_inode for the inode update. */ - if (IS_INODE(page)) { - err = recover_inode(entry->inode, page); + if (IS_INODE(&folio->page)) { + err = recover_inode(entry->inode, &folio->page); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } } if (entry->last_dentry == blkaddr) { - err = recover_dentry(entry->inode, page, dir_list); + err = recover_dentry(entry->inode, &folio->page, dir_list); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } } - err = do_recover_data(sbi, entry->inode, page); + err = do_recover_data(sbi, entry->inode, folio); if (err) { - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); break; } @@ -831,11 +833,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, list_move_tail(&entry->list, tmp_inode_list); next: ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, - next_blkaddr_of_node(page)); + next_blkaddr_of_node(folio)); /* check next segment */ - blkaddr = next_blkaddr_of_node(page); - f2fs_put_page(page, 1); + blkaddr = next_blkaddr_of_node(folio); + f2fs_folio_put(folio, true); f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } @@ -899,15 +901,8 @@ skip: * and the f2fs is not read only, check and fix zoned block devices' * write pointer consistency. */ - if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sbi->sb)) { - int err2 = f2fs_fix_curseg_write_pointer(sbi); - - if (!err2) - err2 = f2fs_check_write_pointer(sbi); - if (err2) - err = err2; - ret = err; - } + if (!err) + err = f2fs_check_and_fix_write_pointer(sbi); if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1766254279d2..ae1223ef648f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -201,6 +201,12 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean) clear_inode_flag(inode, FI_ATOMIC_FILE); if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { clear_inode_flag(inode, FI_ATOMIC_DIRTIED); + /* + * The vfs inode keeps clean during commit, but the f2fs inode + * doesn't. So clear the dirty state after commit and let + * f2fs_mark_inode_dirty_sync ensure a consistent dirty state. + */ + f2fs_inode_synced(inode); f2fs_mark_inode_dirty_sync(inode, true); } stat_dec_atomic_inode(inode); @@ -245,7 +251,7 @@ retry: if (!__is_valid_data_blkaddr(new_addr)) { if (new_addr == NULL_ADDR) dec_valid_block_count(sbi, inode, 1); - f2fs_invalidate_blocks(sbi, dn.data_blkaddr); + f2fs_invalidate_blocks(sbi, dn.data_blkaddr, 1); f2fs_update_data_blkaddr(&dn, new_addr); } else { f2fs_replace_block(sbi, &dn, dn.data_blkaddr, @@ -328,7 +334,7 @@ static int __f2fs_commit_atomic_write(struct inode *inode) goto next; } - blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode), + blen = min((pgoff_t)ADDRS_PER_PAGE(&dn.node_folio->page, cow_inode), len); index = off; for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) { @@ -365,12 +371,21 @@ next: } out: + if (time_to_inject(sbi, FAULT_TIMEOUT)) + f2fs_io_schedule_timeout_killable(DEFAULT_FAULT_TIMEOUT); + if (ret) { sbi->revoked_atomic_block += fi->atomic_write_cnt; } else { sbi->committed_atomic_block += fi->atomic_write_cnt; set_inode_flag(inode, FI_ATOMIC_COMMITTED); + + /* + * inode may has no FI_ATOMIC_DIRTIED flag due to no write + * before commit. + */ if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) { + /* clear atomic dirty status and set vfs dirty status */ clear_inode_flag(inode, FI_ATOMIC_DIRTIED); f2fs_mark_inode_dirty_sync(inode, true); } @@ -418,7 +433,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi, false); - if (!f2fs_is_checkpoint_ready(sbi)) + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return; /* @@ -1290,16 +1305,18 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, wait_list, issued); return 0; } - - /* - * Issue discard for conventional zones only if the device - * supports discard. - */ - if (!bdev_max_discard_sectors(bdev)) - return -EOPNOTSUPP; } #endif + /* + * stop issuing discard for any of below cases: + * 1. device is conventional zone, but it doesn't support discard. + * 2. device is regulare device, after snapshot it doesn't support + * discard. + */ + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; + trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len); lstart = dc->di.lstart; @@ -2088,7 +2105,9 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, return false; if (!force) { - if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks || + if (!f2fs_realtime_discard_enable(sbi) || + (!se->valid_blocks && + !IS_CURSEG(sbi, cpc->trim_start)) || SM_I(sbi)->dcc_info->nr_discards >= SM_I(sbi)->dcc_info->max_discards) return false; @@ -2312,10 +2331,9 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY; dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE; - if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT) + if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT || + F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) dcc->discard_granularity = BLKS_PER_SEG(sbi); - else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION) - dcc->discard_granularity = BLKS_PER_SEC(sbi); INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) @@ -2424,78 +2442,38 @@ static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr, SIT_I(sbi)->max_mtime = ctime; } -static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +/* + * NOTE: when updating multiple blocks at the same time, please ensure + * that the consecutive input blocks belong to the same segment. + */ +static int update_sit_entry_for_release(struct f2fs_sb_info *sbi, struct seg_entry *se, + unsigned int segno, block_t blkaddr, unsigned int offset, int del) { - struct seg_entry *se; - unsigned int segno, offset; - long int new_vblocks; bool exist; #ifdef CONFIG_F2FS_CHECK_FS bool mir_exist; #endif + int i; + int del_count = -del; - segno = GET_SEGNO(sbi, blkaddr); - if (segno == NULL_SEGNO) - return; - - se = get_seg_entry(sbi, segno); - new_vblocks = se->valid_blocks + del; - offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - - f2fs_bug_on(sbi, (new_vblocks < 0 || - (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); - - se->valid_blocks = new_vblocks; - - /* Update valid block bitmap */ - if (del > 0) { - exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); -#ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_set_bit(offset, - se->cur_valid_map_mir); - if (unlikely(exist != mir_exist)) { - f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", - blkaddr, exist); - f2fs_bug_on(sbi, 1); - } -#endif - if (unlikely(exist)) { - f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", - blkaddr); - f2fs_bug_on(sbi, 1); - se->valid_blocks--; - del = 0; - } - - if (f2fs_block_unit_discard(sbi) && - !f2fs_test_and_set_bit(offset, se->discard_map)) - sbi->discard_blks--; + f2fs_bug_on(sbi, GET_SEGNO(sbi, blkaddr) != GET_SEGNO(sbi, blkaddr + del_count - 1)); - /* - * SSR should never reuse block which is checkpointed - * or newly invalidated. - */ - if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { - if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) - se->ckpt_valid_blocks++; - } - } else { - exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map); + for (i = 0; i < del_count; i++) { + exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map); #ifdef CONFIG_F2FS_CHECK_FS - mir_exist = f2fs_test_and_clear_bit(offset, + mir_exist = f2fs_test_and_clear_bit(offset + i, se->cur_valid_map_mir); if (unlikely(exist != mir_exist)) { f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d", - blkaddr, exist); + blkaddr + i, exist); f2fs_bug_on(sbi, 1); } #endif if (unlikely(!exist)) { - f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", - blkaddr); + f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u", blkaddr + i); f2fs_bug_on(sbi, 1); se->valid_blocks++; - del = 0; + del += 1; } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { /* * If checkpoints are off, we must not reuse data that @@ -2503,7 +2481,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) * before, we must track that to know how much space we * really have. */ - if (f2fs_test_bit(offset, se->ckpt_valid_map)) { + if (f2fs_test_bit(offset + i, se->ckpt_valid_map)) { spin_lock(&sbi->stat_lock); sbi->unusable_block_count++; spin_unlock(&sbi->stat_lock); @@ -2511,11 +2489,105 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) } if (f2fs_block_unit_discard(sbi) && - f2fs_test_and_clear_bit(offset, se->discard_map)) + f2fs_test_and_clear_bit(offset + i, se->discard_map)) sbi->discard_blks++; + + if (!f2fs_test_bit(offset + i, se->ckpt_valid_map)) { + se->ckpt_valid_blocks -= 1; + if (__is_large_section(sbi)) + get_sec_entry(sbi, segno)->ckpt_valid_blocks -= 1; + } + } + + if (__is_large_section(sbi)) + sanity_check_valid_blocks(sbi, segno); + + return del; +} + +static int update_sit_entry_for_alloc(struct f2fs_sb_info *sbi, struct seg_entry *se, + unsigned int segno, block_t blkaddr, unsigned int offset, int del) +{ + bool exist; +#ifdef CONFIG_F2FS_CHECK_FS + bool mir_exist; +#endif + + exist = f2fs_test_and_set_bit(offset, se->cur_valid_map); +#ifdef CONFIG_F2FS_CHECK_FS + mir_exist = f2fs_test_and_set_bit(offset, + se->cur_valid_map_mir); + if (unlikely(exist != mir_exist)) { + f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d", + blkaddr, exist); + f2fs_bug_on(sbi, 1); + } +#endif + if (unlikely(exist)) { + f2fs_err(sbi, "Bitmap was wrongly set, blk:%u", blkaddr); + f2fs_bug_on(sbi, 1); + se->valid_blocks--; + del = 0; } - if (!f2fs_test_bit(offset, se->ckpt_valid_map)) + + if (f2fs_block_unit_discard(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) + sbi->discard_blks--; + + /* + * SSR should never reuse block which is checkpointed + * or newly invalidated. + */ + if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) { + if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map)) { + se->ckpt_valid_blocks++; + if (__is_large_section(sbi)) + get_sec_entry(sbi, segno)->ckpt_valid_blocks++; + } + } + + if (!f2fs_test_bit(offset, se->ckpt_valid_map)) { se->ckpt_valid_blocks += del; + if (__is_large_section(sbi)) + get_sec_entry(sbi, segno)->ckpt_valid_blocks += del; + } + + if (__is_large_section(sbi)) + sanity_check_valid_blocks(sbi, segno); + + return del; +} + +/* + * If releasing blocks, this function supports updating multiple consecutive blocks + * at one time, but please note that these consecutive blocks need to belong to the + * same segment. + */ +static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) +{ + struct seg_entry *se; + unsigned int segno, offset; + long int new_vblocks; + + segno = GET_SEGNO(sbi, blkaddr); + if (segno == NULL_SEGNO) + return; + + se = get_seg_entry(sbi, segno); + new_vblocks = se->valid_blocks + del; + offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); + + f2fs_bug_on(sbi, (new_vblocks < 0 || + (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno)))); + + se->valid_blocks = new_vblocks; + + /* Update valid block bitmap */ + if (del > 0) { + del = update_sit_entry_for_alloc(sbi, se, segno, blkaddr, offset, del); + } else { + del = update_sit_entry_for_release(sbi, se, segno, blkaddr, offset, del); + } __mark_sit_entry_dirty(sbi, segno); @@ -2526,25 +2598,43 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) get_sec_entry(sbi, segno)->valid_blocks += del; } -void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) +void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr, + unsigned int len) { unsigned int segno = GET_SEGNO(sbi, addr); struct sit_info *sit_i = SIT_I(sbi); + block_t addr_start = addr, addr_end = addr + len - 1; + unsigned int seg_num = GET_SEGNO(sbi, addr_end) - segno + 1; + unsigned int i = 1, max_blocks = sbi->blocks_per_seg, cnt; f2fs_bug_on(sbi, addr == NULL_ADDR); if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; - f2fs_invalidate_internal_cache(sbi, addr); + f2fs_invalidate_internal_cache(sbi, addr, len); /* add it into sit main buffer */ down_write(&sit_i->sentry_lock); - update_segment_mtime(sbi, addr, 0); - update_sit_entry(sbi, addr, -1); + if (seg_num == 1) + cnt = len; + else + cnt = max_blocks - GET_BLKOFF_FROM_SEG0(sbi, addr); + + do { + update_segment_mtime(sbi, addr_start, 0); + update_sit_entry(sbi, addr_start, -cnt); - /* add it into dirty seglist */ - locate_dirty_segment(sbi, segno); + /* add it into dirty seglist */ + locate_dirty_segment(sbi, segno); + + /* update @addr_start and @cnt and @segno */ + addr_start = START_BLOCK(sbi, ++segno); + if (++i == seg_num) + cnt = GET_BLKOFF_FROM_SEG0(sbi, addr_end) + 1; + else + cnt = max_blocks; + } while (i <= seg_num); up_write(&sit_i->sentry_lock); } @@ -2609,23 +2699,23 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra) } /* - * Caller should put this summary page + * Caller should put this summary folio */ -struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno) +struct folio *f2fs_get_sum_folio(struct f2fs_sb_info *sbi, unsigned int segno) { if (unlikely(f2fs_cp_error(sbi))) return ERR_PTR(-EIO); - return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno)); + return f2fs_get_meta_folio_retry(sbi, GET_SUM_BLOCK(sbi, segno)); } void f2fs_update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr) { - struct page *page = f2fs_grab_meta_page(sbi, blk_addr); + struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); - memcpy(page_address(page), src, PAGE_SIZE); - set_page_dirty(page); - f2fs_put_page(page, 1); + memcpy(folio_address(folio), src, PAGE_SIZE); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } static void write_sum_page(struct f2fs_sb_info *sbi, @@ -2638,11 +2728,11 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, int type, block_t blk_addr) { struct curseg_info *curseg = CURSEG_I(sbi, type); - struct page *page = f2fs_grab_meta_page(sbi, blk_addr); + struct folio *folio = f2fs_grab_meta_folio(sbi, blk_addr); struct f2fs_summary_block *src = curseg->sum_blk; struct f2fs_summary_block *dst; - dst = (struct f2fs_summary_block *)page_address(page); + dst = folio_address(folio); memset(dst, 0, PAGE_SIZE); mutex_lock(&curseg->curseg_mutex); @@ -2656,8 +2746,8 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, mutex_unlock(&curseg->curseg_mutex); - set_page_dirty(page); - f2fs_put_page(page, 1); + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } static int is_next_segment_free(struct f2fs_sb_info *sbi, @@ -2711,7 +2801,7 @@ static int get_new_segment(struct f2fs_sb_info *sbi, if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning) segno = 0; else - segno = max(first_zoned_segno(sbi), *newseg); + segno = max(sbi->first_seq_zone_segno, *newseg); hint = GET_SEC_FROM_SEG(sbi, segno); } #endif @@ -2723,7 +2813,7 @@ find_other_zone: if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) { /* Write only to sequential zones */ if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) { - hint = GET_SEC_FROM_SEG(sbi, first_zoned_segno(sbi)); + hint = GET_SEC_FROM_SEG(sbi, sbi->first_seq_zone_segno); secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); } else secno = find_first_zero_bit(free_i->free_secmap, @@ -2741,7 +2831,7 @@ find_other_zone: MAIN_SECS(sbi)); if (secno >= MAIN_SECS(sbi)) { ret = -ENOSPC; - f2fs_bug_on(sbi, 1); + f2fs_bug_on(sbi, !pinning); goto out_unlock; } } @@ -2770,11 +2860,15 @@ find_other_zone: } got_it: /* set it as dirty segment in free segmap */ - f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap)); + if (test_bit(segno, free_i->free_segmap)) { + ret = -EFSCORRUPTED; + f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_CORRUPTED_FREE_BITMAP); + goto out_unlock; + } - /* no free section in conventional zone */ + /* no free section in conventional device or conventional zone */ if (new_sec && pinning && - !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { + f2fs_is_sequential_zone_area(sbi, START_BLOCK(sbi, segno))) { ret = -EAGAIN; goto out_unlock; } @@ -2783,7 +2877,7 @@ got_it: out_unlock: spin_unlock(&free_i->segmap_lock); - if (ret == -ENOSPC) + if (ret == -ENOSPC && !pinning) f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT); return ret; } @@ -2856,6 +2950,13 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) return curseg->segno; } +static void reset_curseg_fields(struct curseg_info *curseg) +{ + curseg->inited = false; + curseg->segno = NULL_SEGNO; + curseg->next_segno = 0; +} + /* * Allocate a current working segment. * This function always allocates a free segment in LFS manner. @@ -2874,7 +2975,7 @@ static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) ret = get_new_segment(sbi, &segno, new_sec, pinning); if (ret) { if (ret == -ENOSPC) - curseg->segno = NULL_SEGNO; + reset_curseg_fields(curseg); return ret; } @@ -2924,9 +3025,10 @@ static int change_curseg(struct f2fs_sb_info *sbi, int type) struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int new_segno = curseg->next_segno; struct f2fs_summary_block *sum_node; - struct page *sum_page; + struct folio *sum_folio; - write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); + if (curseg->inited) + write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno)); __set_test_and_inuse(sbi, new_segno); @@ -2939,15 +3041,15 @@ static int change_curseg(struct f2fs_sb_info *sbi, int type) curseg->alloc_type = SSR; curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0); - sum_page = f2fs_get_sum_page(sbi, new_segno); - if (IS_ERR(sum_page)) { + sum_folio = f2fs_get_sum_folio(sbi, new_segno); + if (IS_ERR(sum_folio)) { /* GC won't be able to use stale summary pages by cp_error */ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE); - return PTR_ERR(sum_page); + return PTR_ERR(sum_folio); } - sum_node = (struct f2fs_summary_block *)page_address(sum_page); + sum_node = folio_address(sum_folio); memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE); - f2fs_put_page(sum_page, 1); + f2fs_folio_put(sum_folio, true); return 0; } @@ -3237,7 +3339,8 @@ retry: if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) { f2fs_down_write(&sbi->gc_lock); - err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); + err = f2fs_gc_range(sbi, 0, sbi->first_seq_zone_segno - 1, + true, ZONED_PIN_SEC_REQUIRED_COUNT); f2fs_up_write(&sbi->gc_lock); gc_required = false; @@ -3509,7 +3612,7 @@ static int __get_segment_type_2(struct f2fs_io_info *fio) static int __get_segment_type_4(struct f2fs_io_info *fio) { if (fio->type == DATA) { - struct inode *inode = fio->page->mapping->host; + struct inode *inode = fio_inode(fio); if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; @@ -3543,7 +3646,7 @@ static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs) static int __get_segment_type_6(struct f2fs_io_info *fio) { if (fio->type == DATA) { - struct inode *inode = fio->page->mapping->host; + struct inode *inode = fio_inode(fio); int type; if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) @@ -3581,18 +3684,35 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) } } -int f2fs_get_segment_temp(int seg_type) +enum temp_type f2fs_get_segment_temp(struct f2fs_sb_info *sbi, + enum log_type type) { - if (IS_HOT(seg_type)) - return HOT; - else if (IS_WARM(seg_type)) - return WARM; - return COLD; + struct curseg_info *curseg = CURSEG_I(sbi, type); + enum temp_type temp = COLD; + + switch (curseg->seg_type) { + case CURSEG_HOT_NODE: + case CURSEG_HOT_DATA: + temp = HOT; + break; + case CURSEG_WARM_NODE: + case CURSEG_WARM_DATA: + temp = WARM; + break; + case CURSEG_COLD_NODE: + case CURSEG_COLD_DATA: + temp = COLD; + break; + default: + f2fs_bug_on(sbi, 1); + } + + return temp; } static int __get_segment_type(struct f2fs_io_info *fio) { - int type = 0; + enum log_type type = CURSEG_HOT_DATA; switch (F2FS_OPTION(fio->sbi).active_logs) { case 2: @@ -3608,7 +3728,7 @@ static int __get_segment_type(struct f2fs_io_info *fio) f2fs_bug_on(fio->sbi, true); } - fio->temp = f2fs_get_segment_temp(type); + fio->temp = f2fs_get_segment_temp(fio->sbi, type); return type; } @@ -3626,13 +3746,6 @@ static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi, get_random_u32_inclusive(1, sbi->max_fragment_hole); } -static void reset_curseg_fields(struct curseg_info *curseg) -{ - curseg->inited = false; - curseg->segno = NULL_SEGNO; - curseg->next_segno = 0; -} - int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, @@ -3793,25 +3906,51 @@ void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, } } +static int log_type_to_seg_type(enum log_type type) +{ + int seg_type = CURSEG_COLD_DATA; + + switch (type) { + case CURSEG_HOT_DATA: + case CURSEG_WARM_DATA: + case CURSEG_COLD_DATA: + case CURSEG_HOT_NODE: + case CURSEG_WARM_NODE: + case CURSEG_COLD_NODE: + seg_type = (int)type; + break; + case CURSEG_COLD_DATA_PINNED: + case CURSEG_ALL_DATA_ATGC: + seg_type = CURSEG_COLD_DATA; + break; + default: + break; + } + return seg_type; +} + static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { - int type = __get_segment_type(fio); - bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); + struct folio *folio = page_folio(fio->page); + enum log_type type = __get_segment_type(fio); + int seg_type = log_type_to_seg_type(type); + bool keep_order = (f2fs_lfs_mode(fio->sbi) && + seg_type == CURSEG_COLD_DATA); if (keep_order) f2fs_down_read(&fio->sbi->io_order_lock); if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio)) { - if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host)) + if (fscrypt_inode_uses_fs_layer_crypto(folio->mapping->host)) fscrypt_finalize_bounce_page(&fio->encrypted_page); - end_page_writeback(fio->page); - if (f2fs_in_warm_node_list(fio->sbi, fio->page)) - f2fs_del_fsync_node_entry(fio->sbi, fio->page); + folio_end_writeback(folio); + if (f2fs_in_warm_node_list(fio->sbi, folio)) + f2fs_del_fsync_node_entry(fio->sbi, folio); goto out; } if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) - f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr); + f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr, 1); /* writeout dirty page into bdev */ f2fs_submit_page_write(fio); @@ -3912,7 +4051,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) if (!err) { f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); - f2fs_update_iostat(fio->sbi, fio->page->mapping->host, + f2fs_update_iostat(fio->sbi, fio_inode(fio), fio->io_type, F2FS_BLKSIZE); } @@ -3977,8 +4116,8 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, } } - f2fs_bug_on(sbi, !IS_DATASEG(type)); curseg = CURSEG_I(sbi, type); + f2fs_bug_on(sbi, !IS_DATASEG(curseg->seg_type)); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); @@ -4003,7 +4142,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, update_sit_entry(sbi, new_blkaddr, 1); } if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { - f2fs_invalidate_internal_cache(sbi, old_blkaddr); + f2fs_invalidate_internal_cache(sbi, old_blkaddr, 1); if (!from_gc) update_segment_mtime(sbi, old_blkaddr, 0); update_sit_entry(sbi, old_blkaddr, -1); @@ -4045,22 +4184,21 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, f2fs_update_data_blkaddr(dn, new_addr); } -void f2fs_wait_on_page_writeback(struct page *page, - enum page_type type, bool ordered, bool locked) +void f2fs_folio_wait_writeback(struct folio *folio, enum page_type type, + bool ordered, bool locked) { - if (folio_test_writeback(page_folio(page))) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); + if (folio_test_writeback(folio)) { + struct f2fs_sb_info *sbi = F2FS_F_SB(folio); /* submit cached LFS IO */ - f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + f2fs_submit_merged_write_cond(sbi, NULL, &folio->page, 0, type); /* submit cached IPU IO */ - f2fs_submit_merged_ipu_write(sbi, NULL, page); + f2fs_submit_merged_ipu_write(sbi, NULL, folio); if (ordered) { - wait_on_page_writeback(page); - f2fs_bug_on(sbi, locked && - folio_test_writeback(page_folio(page))); + folio_wait_writeback(folio); + f2fs_bug_on(sbi, locked && folio_test_writeback(folio)); } else { - wait_for_stable_page(page); + folio_wait_stable(folio); } } } @@ -4068,7 +4206,7 @@ void f2fs_wait_on_page_writeback(struct page *page, void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *cpage; + struct folio *cfolio; if (!f2fs_meta_inode_gc_required(inode)) return; @@ -4076,10 +4214,10 @@ void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr) if (!__is_valid_data_blkaddr(blkaddr)) return; - cpage = find_lock_page(META_MAPPING(sbi), blkaddr); - if (cpage) { - f2fs_wait_on_page_writeback(cpage, DATA, true, true); - f2fs_put_page(cpage, 1); + cfolio = filemap_lock_folio(META_MAPPING(sbi), blkaddr); + if (!IS_ERR(cfolio)) { + f2fs_folio_wait_writeback(cfolio, DATA, true, true); + f2fs_folio_put(cfolio, true); } } @@ -4103,16 +4241,16 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *seg_i; unsigned char *kaddr; - struct page *page; + struct folio *folio; block_t start; int i, j, offset; start = start_sum_block(sbi); - page = f2fs_get_meta_page(sbi, start++); - if (IS_ERR(page)) - return PTR_ERR(page); - kaddr = (unsigned char *)page_address(page); + folio = f2fs_get_meta_folio(sbi, start++); + if (IS_ERR(folio)) + return PTR_ERR(folio); + kaddr = folio_address(folio); /* Step 1: restore nat cache */ seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -4149,17 +4287,16 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi) SUM_FOOTER_SIZE) continue; - f2fs_put_page(page, 1); - page = NULL; + f2fs_folio_put(folio, true); - page = f2fs_get_meta_page(sbi, start++); - if (IS_ERR(page)) - return PTR_ERR(page); - kaddr = (unsigned char *)page_address(page); + folio = f2fs_get_meta_folio(sbi, start++); + if (IS_ERR(folio)) + return PTR_ERR(folio); + kaddr = folio_address(folio); offset = 0; } } - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return 0; } @@ -4168,7 +4305,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_summary_block *sum; struct curseg_info *curseg; - struct page *new; + struct folio *new; unsigned short blk_off; unsigned int segno = 0; block_t blk_addr = 0; @@ -4195,10 +4332,10 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) blk_addr = GET_SUM_BLOCK(sbi, segno); } - new = f2fs_get_meta_page(sbi, blk_addr); + new = f2fs_get_meta_folio(sbi, blk_addr); if (IS_ERR(new)) return PTR_ERR(new); - sum = (struct f2fs_summary_block *)page_address(new); + sum = folio_address(new); if (IS_NODESEG(type)) { if (__exist_node_summaries(sbi)) { @@ -4233,7 +4370,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) curseg->next_blkoff = blk_off; mutex_unlock(&curseg->curseg_mutex); out: - f2fs_put_page(new, 1); + f2fs_folio_put(new, true); return err; } @@ -4282,15 +4419,15 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) { - struct page *page; + struct folio *folio; unsigned char *kaddr; struct f2fs_summary *summary; struct curseg_info *seg_i; int written_size = 0; int i, j; - page = f2fs_grab_meta_page(sbi, blkaddr++); - kaddr = (unsigned char *)page_address(page); + folio = f2fs_grab_meta_folio(sbi, blkaddr++); + kaddr = folio_address(folio); memset(kaddr, 0, PAGE_SIZE); /* Step 1: write nat cache */ @@ -4307,9 +4444,9 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { seg_i = CURSEG_I(sbi, i); for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) { - if (!page) { - page = f2fs_grab_meta_page(sbi, blkaddr++); - kaddr = (unsigned char *)page_address(page); + if (!folio) { + folio = f2fs_grab_meta_folio(sbi, blkaddr++); + kaddr = folio_address(folio); memset(kaddr, 0, PAGE_SIZE); written_size = 0; } @@ -4321,14 +4458,14 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr) SUM_FOOTER_SIZE) continue; - set_page_dirty(page); - f2fs_put_page(page, 1); - page = NULL; + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); + folio = NULL; } } - if (page) { - set_page_dirty(page); - f2fs_put_page(page, 1); + if (folio) { + folio_mark_dirty(folio); + f2fs_folio_put(folio, true); } } @@ -4381,29 +4518,29 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type, return -1; } -static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, +static struct folio *get_current_sit_folio(struct f2fs_sb_info *sbi, unsigned int segno) { - return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno)); + return f2fs_get_meta_folio(sbi, current_sit_addr(sbi, segno)); } -static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, +static struct folio *get_next_sit_folio(struct f2fs_sb_info *sbi, unsigned int start) { struct sit_info *sit_i = SIT_I(sbi); - struct page *page; + struct folio *folio; pgoff_t src_off, dst_off; src_off = current_sit_addr(sbi, start); dst_off = next_sit_addr(sbi, src_off); - page = f2fs_grab_meta_page(sbi, dst_off); - seg_info_to_sit_page(sbi, page, start); + folio = f2fs_grab_meta_folio(sbi, dst_off); + seg_info_to_sit_folio(sbi, folio, start); - set_page_dirty(page); + folio_mark_dirty(folio); set_to_next_sit(sit_i, start); - return page; + return folio; } static struct sit_entry_set *grab_sit_entry_set(void) @@ -4533,7 +4670,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page = NULL; + struct folio *folio = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, @@ -4547,8 +4684,8 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (to_journal) { down_write(&curseg->journal_rwsem); } else { - page = get_next_sit_page(sbi, start_segno); - raw_sit = page_address(page); + folio = get_next_sit_folio(sbi, start_segno); + raw_sit = folio_address(folio); } /* flush dirty sit entries in region of current sit set */ @@ -4586,6 +4723,12 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) &raw_sit->entries[sit_offset]); } + /* update ckpt_valid_block */ + if (__is_large_section(sbi)) { + set_ckpt_valid_blocks(sbi, segno); + sanity_check_valid_blocks(sbi, segno); + } + __clear_bit(segno, bitmap); sit_i->dirty_sentries--; ses->entry_cnt--; @@ -4594,7 +4737,7 @@ void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (to_journal) up_write(&curseg->journal_rwsem); else - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); f2fs_bug_on(sbi, ses->entry_cnt); release_sit_entry_set(ses); @@ -4778,12 +4921,7 @@ static int build_curseg(struct f2fs_sb_info *sbi) sizeof(struct f2fs_journal), GFP_KERNEL); if (!array[i].journal) return -ENOMEM; - if (i < NR_PERSISTENT_LOG) - array[i].seg_type = CURSEG_HOT_DATA + i; - else if (i == CURSEG_COLD_DATA_PINNED) - array[i].seg_type = CURSEG_COLD_DATA; - else if (i == CURSEG_ALL_DATA_ATGC) - array[i].seg_type = CURSEG_COLD_DATA; + array[i].seg_type = log_type_to_seg_type(i); reset_curseg_fields(&array[i]); } return restore_curseg_summaries(sbi); @@ -4811,15 +4949,15 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) for (; start < end && start < MAIN_SEGS(sbi); start++) { struct f2fs_sit_block *sit_blk; - struct page *page; + struct folio *folio; se = &sit_i->sentries[start]; - page = get_current_sit_page(sbi, start); - if (IS_ERR(page)) - return PTR_ERR(page); - sit_blk = (struct f2fs_sit_block *)page_address(page); + folio = get_current_sit_folio(sbi, start); + if (IS_ERR(folio)) + return PTR_ERR(folio); + sit_blk = folio_address(folio); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); err = check_block_count(sbi, start, &sit); if (err) @@ -4912,6 +5050,16 @@ init_discard_map_done: } up_read(&curseg->journal_rwsem); + /* update ckpt_valid_block */ + if (__is_large_section(sbi)) { + unsigned int segno; + + for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { + set_ckpt_valid_blocks(sbi, segno); + sanity_check_valid_blocks(sbi, segno); + } + } + if (err) return err; @@ -5207,7 +5355,7 @@ static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx, return 0; } -static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) +static int do_fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) { struct curseg_info *cs = CURSEG_I(sbi, type); struct f2fs_dev_info *zbd; @@ -5312,12 +5460,12 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) return 0; } -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) +static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; for (i = 0; i < NR_PERSISTENT_LOG; i++) { - ret = fix_curseg_write_pointer(sbi, i); + ret = do_fix_curseg_write_pointer(sbi, i); if (ret) return ret; } @@ -5340,7 +5488,7 @@ static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx, return check_zone_write_pointer(args->sbi, args->fdev, zone); } -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +static int check_write_pointer(struct f2fs_sb_info *sbi) { int i, ret; struct check_zone_write_pointer_args args; @@ -5360,6 +5508,21 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) return 0; } +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) +{ + int ret; + + if (!f2fs_sb_has_blkzoned(sbi) || f2fs_readonly(sbi->sb) || + f2fs_hw_is_readonly(sbi)) + return 0; + + f2fs_notice(sbi, "Checking entire write pointers"); + ret = fix_curseg_write_pointer(sbi); + if (!ret) + ret = check_write_pointer(sbi); + return ret; +} + /* * Return the number of usable blocks in a segment. The number of blocks * returned is always equal to the number of blocks in a segment for @@ -5396,12 +5559,7 @@ static inline unsigned int f2fs_usable_zone_blks_in_seg( return BLKS_PER_SEG(sbi); } #else -int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi) -{ - return 0; -} - -int f2fs_check_write_pointer(struct f2fs_sb_info *sbi) +int f2fs_check_and_fix_write_pointer(struct f2fs_sb_info *sbi) { return 0; } @@ -5430,6 +5588,41 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) return SEGS_PER_SEC(sbi); } +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); + unsigned int secno = 0, start = 0; + unsigned int total_valid_blocks = 0; + unsigned long long mtime = 0; + unsigned int i = 0; + + secno = GET_SEC_FROM_SEG(sbi, segno); + start = GET_SEG_FROM_SEC(sbi, secno); + + if (!__is_large_section(sbi)) { + mtime = get_seg_entry(sbi, start + i)->mtime; + goto out; + } + + for (i = 0; i < usable_segs_per_sec; i++) { + /* for large section, only check the mtime of valid segments */ + struct seg_entry *se = get_seg_entry(sbi, start+i); + + mtime += se->mtime * se->valid_blocks; + total_valid_blocks += se->valid_blocks; + } + + if (total_valid_blocks == 0) + return INVALID_MTIME; + + mtime = div_u64(mtime, total_valid_blocks); +out: + if (unlikely(mtime == INVALID_MTIME)) + mtime -= 1; + return mtime; +} + /* * Update min, max modified time for cost-benefit GC algorithm */ @@ -5443,13 +5636,9 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = ULLONG_MAX; for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { - unsigned int i; unsigned long long mtime = 0; - for (i = 0; i < SEGS_PER_SEC(sbi); i++) - mtime += get_seg_entry(sbi, segno + i)->mtime; - - mtime = div_u64(mtime, SEGS_PER_SEC(sbi)); + mtime = f2fs_get_section_mtime(sbi, segno); if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 71adb4a43bec..db619fd2f51a 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -18,6 +18,8 @@ #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ #define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */ +#define INVALID_MTIME ULLONG_MAX /* no valid blocks in a segment/section */ + /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) @@ -32,10 +34,6 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG); } -#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA) -#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA) -#define IS_COLD(t) ((t) == CURSEG_COLD_NODE || (t) == CURSEG_COLD_DATA) - #define IS_CURSEG(sbi, seg) \ (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ @@ -104,6 +102,8 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define CAP_SEGS_PER_SEC(sbi) \ (SEGS_PER_SEC(sbi) - \ BLKS_TO_SEGS(sbi, (sbi)->unusable_blocks_per_sec)) +#define GET_START_SEG_FROM_SEC(sbi, segno) \ + (rounddown(segno, SEGS_PER_SEC(sbi))) #define GET_SEC_FROM_SEG(sbi, segno) \ (((segno) == -1) ? -1 : (segno) / SEGS_PER_SEC(sbi)) #define GET_SEG_FROM_SEC(sbi, secno) \ @@ -211,6 +211,7 @@ struct seg_entry { struct sec_entry { unsigned int valid_blocks; /* # of valid blocks in a section */ + unsigned int ckpt_valid_blocks; /* # of valid blocks last cp in a section */ }; #define MAX_SKIP_GC_COUNT 16 @@ -347,22 +348,57 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi, unsigned int segno, bool use_section) { - if (use_section && __is_large_section(sbi)) { - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); - unsigned int blocks = 0; - int i; + if (use_section && __is_large_section(sbi)) + return get_sec_entry(sbi, segno)->ckpt_valid_blocks; + else + return get_seg_entry(sbi, segno)->ckpt_valid_blocks; +} + +static inline void set_ckpt_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); + unsigned int blocks = 0; + int i; - for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { - struct seg_entry *se = get_seg_entry(sbi, start_segno); + for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { + struct seg_entry *se = get_seg_entry(sbi, start_segno); - blocks += se->ckpt_valid_blocks; - } - return blocks; + blocks += se->ckpt_valid_blocks; } - return get_seg_entry(sbi, segno)->ckpt_valid_blocks; + get_sec_entry(sbi, segno)->ckpt_valid_blocks = blocks; } +#ifdef CONFIG_F2FS_CHECK_FS +static inline void sanity_check_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); + unsigned int blocks = 0; + int i; + + for (i = 0; i < SEGS_PER_SEC(sbi); i++, start_segno++) { + struct seg_entry *se = get_seg_entry(sbi, start_segno); + + blocks += se->ckpt_valid_blocks; + } + + if (blocks != get_sec_entry(sbi, segno)->ckpt_valid_blocks) { + f2fs_err(sbi, + "Inconsistent ckpt valid blocks: " + "seg entry(%d) vs sec entry(%d) at secno %d", + blocks, get_sec_entry(sbi, segno)->ckpt_valid_blocks, secno); + f2fs_bug_on(sbi, 1); + } +} +#else +static inline void sanity_check_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno) +{ +} +#endif static inline void seg_info_from_raw_sit(struct seg_entry *se, struct f2fs_sit_entry *rs) { @@ -387,8 +423,8 @@ static inline void __seg_info_to_raw_sit(struct seg_entry *se, rs->mtime = cpu_to_le64(se->mtime); } -static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi, - struct page *page, unsigned int start) +static inline void seg_info_to_sit_folio(struct f2fs_sb_info *sbi, + struct folio *folio, unsigned int start) { struct f2fs_sit_block *raw_sit; struct seg_entry *se; @@ -397,7 +433,7 @@ static inline void seg_info_to_sit_page(struct f2fs_sb_info *sbi, (unsigned long)MAIN_SEGS(sbi)); int i; - raw_sit = (struct f2fs_sit_block *)page_address(page); + raw_sit = folio_address(folio); memset(raw_sit, 0, PAGE_SIZE); for (i = 0; i < end - start; i++) { rs = &raw_sit->entries[i]; @@ -431,7 +467,6 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); spin_lock(&free_i->segmap_lock); clear_bit(segno, free_i->free_segmap); @@ -439,7 +474,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) next = find_next_bit(free_i->free_segmap, start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { + if (next >= start_segno + f2fs_usable_segs_in_sec(sbi)) { clear_bit(secno, free_i->free_secmap); free_i->free_sections++; } @@ -465,22 +500,36 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; - unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi); + bool ret; spin_lock(&free_i->segmap_lock); - if (test_and_clear_bit(segno, free_i->free_segmap)) { - free_i->free_segments++; - - if (!inmem && IS_CURSEC(sbi, secno)) - goto skip_free; - next = find_next_bit(free_i->free_segmap, - start_segno + SEGS_PER_SEC(sbi), start_segno); - if (next >= start_segno + usable_segs) { - if (test_and_clear_bit(secno, free_i->free_secmap)) - free_i->free_sections++; - } - } -skip_free: + ret = test_and_clear_bit(segno, free_i->free_segmap); + if (!ret) + goto unlock_out; + + free_i->free_segments++; + + if (!inmem && IS_CURSEC(sbi, secno)) + goto unlock_out; + + /* check large section */ + next = find_next_bit(free_i->free_segmap, + start_segno + SEGS_PER_SEC(sbi), start_segno); + if (next < start_segno + f2fs_usable_segs_in_sec(sbi)) + goto unlock_out; + + ret = test_and_clear_bit(secno, free_i->free_secmap); + if (!ret) + goto unlock_out; + + free_i->free_sections++; + + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[BG_GC]) == secno) + sbi->next_victim_seg[BG_GC] = NULL_SEGNO; + if (GET_SEC_FROM_SEG(sbi, sbi->next_victim_seg[FG_GC]) == secno) + sbi->next_victim_seg[FG_GC] = NULL_SEGNO; + +unlock_out: spin_unlock(&free_i->segmap_lock); } @@ -524,8 +573,7 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi) static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi) { - return SM_I(sbi)->reserved_segments + - SM_I(sbi)->additional_reserved_segments; + return SM_I(sbi)->reserved_segments; } static inline unsigned int free_sections(struct f2fs_sb_info *sbi) @@ -559,33 +607,57 @@ static inline int reserved_sections(struct f2fs_sb_info *sbi) } static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi, - unsigned int node_blocks, unsigned int dent_blocks) + unsigned int node_blocks, unsigned int data_blocks, + unsigned int dent_blocks) { - - unsigned segno, left_blocks; + unsigned int segno, left_blocks, blocks; int i; - /* check current node sections in the worst case. */ - for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) { + /* check current data/node sections in the worst case. */ + for (i = CURSEG_HOT_DATA; i < NR_PERSISTENT_LOG; i++) { segno = CURSEG_I(sbi, i)->segno; - left_blocks = CAP_BLKS_PER_SEC(sbi) - - get_ckpt_valid_blocks(sbi, segno, true); - if (node_blocks > left_blocks) + + if (unlikely(segno == NULL_SEGNO)) + return false; + + if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) { + left_blocks = CAP_BLKS_PER_SEC(sbi) - + SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) - + CURSEG_I(sbi, i)->next_blkoff; + } else { + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); + } + + blocks = i <= CURSEG_COLD_DATA ? data_blocks : node_blocks; + if (blocks > left_blocks) return false; } /* check current data section for dentry blocks. */ segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno; - left_blocks = CAP_BLKS_PER_SEC(sbi) - - get_ckpt_valid_blocks(sbi, segno, true); + + if (unlikely(segno == NULL_SEGNO)) + return false; + + if (f2fs_lfs_mode(sbi) && __is_large_section(sbi)) { + left_blocks = CAP_BLKS_PER_SEC(sbi) - + SEGS_TO_BLKS(sbi, (segno - GET_START_SEG_FROM_SEC(sbi, segno))) - + CURSEG_I(sbi, CURSEG_HOT_DATA)->next_blkoff; + } else { + left_blocks = CAP_BLKS_PER_SEC(sbi) - + get_ckpt_valid_blocks(sbi, segno, true); + } + if (dent_blocks > left_blocks) return false; return true; } /* - * calculate needed sections for dirty node/dentry - * and call has_curseg_enough_space + * calculate needed sections for dirty node/dentry and call + * has_curseg_enough_space, please note that, it needs to account + * dirty data as well in lfs mode when checkpoint is disabled. */ static inline void __get_secs_required(struct f2fs_sb_info *sbi, unsigned int *lower_p, unsigned int *upper_p, bool *curseg_p) @@ -594,19 +666,30 @@ static inline void __get_secs_required(struct f2fs_sb_info *sbi, get_pages(sbi, F2FS_DIRTY_DENTS) + get_pages(sbi, F2FS_DIRTY_IMETA); unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int total_data_blocks = 0; unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi); unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi); + unsigned int data_secs = 0; unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi); unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi); + unsigned int data_blocks = 0; + + if (f2fs_lfs_mode(sbi) && + unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { + total_data_blocks = get_pages(sbi, F2FS_DIRTY_DATA); + data_secs = total_data_blocks / CAP_BLKS_PER_SEC(sbi); + data_blocks = total_data_blocks % CAP_BLKS_PER_SEC(sbi); + } if (lower_p) - *lower_p = node_secs + dent_secs; + *lower_p = node_secs + dent_secs + data_secs; if (upper_p) *upper_p = node_secs + dent_secs + - (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0); + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0) + + (data_blocks ? 1 : 0); if (curseg_p) *curseg_p = has_curseg_enough_space(sbi, - node_blocks, dent_blocks); + node_blocks, data_blocks, dent_blocks); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, @@ -637,12 +720,30 @@ static inline bool has_enough_free_secs(struct f2fs_sb_info *sbi, return !has_not_enough_free_secs(sbi, freed, needed); } +static inline bool has_enough_free_blks(struct f2fs_sb_info *sbi) +{ + unsigned int total_free_blocks = 0; + unsigned int avail_user_block_count; + + spin_lock(&sbi->stat_lock); + + avail_user_block_count = get_available_block_count(sbi, NULL, true); + total_free_blocks = avail_user_block_count - (unsigned int)valid_user_blocks(sbi); + + spin_unlock(&sbi->stat_lock); + + return total_free_blocks > 0; +} + static inline bool f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi) { if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) return true; if (likely(has_enough_free_secs(sbi, 0, 0))) return true; + if (!f2fs_lfs_mode(sbi) && + likely(has_enough_free_blks(sbi))) + return true; return false; } @@ -957,13 +1058,3 @@ wake_up: dcc->discard_wake = true; wake_up_interruptible_all(&dcc->discard_wait_queue); } - -static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) -{ - int devi; - - for (devi = 0; devi < sbi->s_ndevs; devi++) - if (bdev_is_zoned(FDEV(devi).bdev)) - return GET_SEGNO(sbi, FDEV(devi).start_blk); - return 0; -} diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 83d6fb97dcae..b88babcf6ab4 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -73,7 +73,7 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink, mutex_unlock(&sbi->umount_mutex); } spin_unlock(&f2fs_list_lock); - return count; + return count ?: SHRINK_EMPTY; } unsigned long f2fs_shrink_scan(struct shrinker *shrink, @@ -130,6 +130,103 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink, return freed; } +unsigned int f2fs_donate_files(void) +{ + struct f2fs_sb_info *sbi; + struct list_head *p; + unsigned int donate_files = 0; + + spin_lock(&f2fs_list_lock); + p = f2fs_list.next; + while (p != &f2fs_list) { + sbi = list_entry(p, struct f2fs_sb_info, s_list); + + /* stop f2fs_put_super */ + if (!mutex_trylock(&sbi->umount_mutex)) { + p = p->next; + continue; + } + spin_unlock(&f2fs_list_lock); + + donate_files += sbi->donate_files; + + spin_lock(&f2fs_list_lock); + p = p->next; + mutex_unlock(&sbi->umount_mutex); + } + spin_unlock(&f2fs_list_lock); + + return donate_files; +} + +static unsigned int do_reclaim_caches(struct f2fs_sb_info *sbi, + unsigned int reclaim_caches_kb) +{ + struct inode *inode; + struct f2fs_inode_info *fi; + unsigned int nfiles = sbi->donate_files; + pgoff_t npages = reclaim_caches_kb >> (PAGE_SHIFT - 10); + + while (npages && nfiles--) { + pgoff_t len; + + spin_lock(&sbi->inode_lock[DONATE_INODE]); + if (list_empty(&sbi->inode_list[DONATE_INODE])) { + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + break; + } + fi = list_first_entry(&sbi->inode_list[DONATE_INODE], + struct f2fs_inode_info, gdonate_list); + list_move_tail(&fi->gdonate_list, &sbi->inode_list[DONATE_INODE]); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[DONATE_INODE]); + + if (!inode) + continue; + + inode_lock(inode); + if (!is_inode_flag_set(inode, FI_DONATE_FINISHED)) { + len = fi->donate_end - fi->donate_start + 1; + npages = npages < len ? 0 : npages - len; + + invalidate_inode_pages2_range(inode->i_mapping, + fi->donate_start, fi->donate_end); + set_inode_flag(inode, FI_DONATE_FINISHED); + } + inode_unlock(inode); + + iput(inode); + cond_resched(); + } + return npages << (PAGE_SHIFT - 10); +} + +void f2fs_reclaim_caches(unsigned int reclaim_caches_kb) +{ + struct f2fs_sb_info *sbi; + struct list_head *p; + + spin_lock(&f2fs_list_lock); + p = f2fs_list.next; + while (p != &f2fs_list && reclaim_caches_kb) { + sbi = list_entry(p, struct f2fs_sb_info, s_list); + + /* stop f2fs_put_super */ + if (!mutex_trylock(&sbi->umount_mutex)) { + p = p->next; + continue; + } + spin_unlock(&f2fs_list_lock); + + reclaim_caches_kb = do_reclaim_caches(sbi, reclaim_caches_kb); + + spin_lock(&f2fs_list_lock); + p = p->next; + mutex_unlock(&sbi->umount_mutex); + } + spin_unlock(&f2fs_list_lock); +} + void f2fs_join_shrinker(struct f2fs_sb_info *sbi) { spin_lock(&f2fs_list_lock); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 87ab5696bd48..bbf1dad6843f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -47,6 +47,7 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_KVMALLOC] = "kvmalloc", [FAULT_PAGE_ALLOC] = "page alloc", [FAULT_PAGE_GET] = "page get", + [FAULT_ALLOC_BIO] = "alloc bio(obsolete)", [FAULT_ALLOC_NID] = "alloc nid", [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", @@ -63,32 +64,36 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_BLKADDR_VALIDITY] = "invalid blkaddr", [FAULT_BLKADDR_CONSISTENCE] = "inconsistent blkaddr", [FAULT_NO_SEGMENT] = "no free segment", + [FAULT_INCONSISTENT_FOOTER] = "inconsistent footer", + [FAULT_TIMEOUT] = "timeout", + [FAULT_VMALLOC] = "vmalloc", }; int f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned long rate, - unsigned long type) + unsigned long type, enum fault_option fo) { struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; - if (rate) { + if (fo & FAULT_ALL) { + memset(ffi, 0, sizeof(struct f2fs_fault_info)); + return 0; + } + + if (fo & FAULT_RATE) { if (rate > INT_MAX) return -EINVAL; atomic_set(&ffi->inject_ops, 0); ffi->inject_rate = (int)rate; + f2fs_info(sbi, "build fault injection rate: %lu", rate); } - if (type) { + if (fo & FAULT_TYPE) { if (type >= BIT(FAULT_MAX)) return -EINVAL; ffi->inject_type = (unsigned int)type; + f2fs_info(sbi, "build fault injection type: 0x%lx", type); } - if (!rate && !type) - memset(ffi, 0, sizeof(struct f2fs_fault_info)); - else - f2fs_info(sbi, - "build fault injection attr: rate: %lu, type: 0x%lx", - rate, type); return 0; } #endif @@ -150,6 +155,8 @@ enum { Opt_mode, Opt_fault_injection, Opt_fault_type, + Opt_lazytime, + Opt_nolazytime, Opt_quota, Opt_noquota, Opt_usrquota, @@ -188,6 +195,7 @@ enum { Opt_memory_mode, Opt_age_extent_cache, Opt_errors, + Opt_nat_bits, Opt_err, }; @@ -226,6 +234,8 @@ static match_table_t f2fs_tokens = { {Opt_mode, "mode=%s"}, {Opt_fault_injection, "fault_injection=%u"}, {Opt_fault_type, "fault_type=%u"}, + {Opt_lazytime, "lazytime"}, + {Opt_nolazytime, "nolazytime"}, {Opt_quota, "quota"}, {Opt_noquota, "noquota"}, {Opt_usrquota, "usrquota"}, @@ -265,6 +275,7 @@ static match_table_t f2fs_tokens = { {Opt_memory_mode, "memory=%s"}, {Opt_age_extent_cache, "age_extent_cache"}, {Opt_errors, "errors=%s"}, + {Opt_nat_bits, "nat_bits"}, {Opt_err, NULL}, }; @@ -379,10 +390,10 @@ static void init_once(void *foo) #ifdef CONFIG_QUOTA static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) -static int f2fs_set_qf_name(struct super_block *sb, int qtype, +static int f2fs_set_qf_name(struct f2fs_sb_info *sbi, int qtype, substring_t *args) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct super_block *sb = sbi->sb; char *qname; int ret = -EINVAL; @@ -420,9 +431,9 @@ errout: return ret; } -static int f2fs_clear_qf_name(struct super_block *sb, int qtype) +static int f2fs_clear_qf_name(struct f2fs_sb_info *sbi, int qtype) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct super_block *sb = sbi->sb; if (sb_any_quota_loaded(sb) && F2FS_OPTION(sbi).s_qf_names[qtype]) { f2fs_err(sbi, "Cannot change journaled quota options when quota turned on"); @@ -479,12 +490,11 @@ static int f2fs_check_quota_options(struct f2fs_sb_info *sbi) } #endif -static int f2fs_set_test_dummy_encryption(struct super_block *sb, +static int f2fs_set_test_dummy_encryption(struct f2fs_sb_info *sbi, const char *opt, const substring_t *arg, bool is_remount) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); struct fs_parameter param = { .type = fs_value_is_string, .string = arg->from ? arg->from : "", @@ -667,9 +677,8 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str) #endif #endif -static int parse_options(struct super_block *sb, char *options, bool is_remount) +static int parse_options(struct f2fs_sb_info *sbi, char *options, bool is_remount) { - struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; #ifdef CONFIG_F2FS_FS_COMPRESSION unsigned char (*ext)[F2FS_EXTENSION_LEN]; @@ -683,7 +692,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) int ret; if (!options) - goto default_check; + return 0; while ((p = strsep(&options, ",")) != NULL) { int token; @@ -724,10 +733,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, DISABLE_ROLL_FORWARD); break; case Opt_norecovery: - /* this option mounts f2fs with ro */ + /* requires ro mount, checked in f2fs_default_check */ set_opt(sbi, NORECOVERY); - if (!f2fs_readonly(sb)) - return -EINVAL; break; case Opt_discard: if (!f2fs_hw_support_discard(sbi)) { @@ -768,16 +775,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) break; #else case Opt_user_xattr: - f2fs_info(sbi, "user_xattr options not supported"); - break; case Opt_nouser_xattr: - f2fs_info(sbi, "nouser_xattr options not supported"); - break; case Opt_inline_xattr: - f2fs_info(sbi, "inline_xattr options not supported"); - break; case Opt_noinline_xattr: - f2fs_info(sbi, "noinline_xattr options not supported"); + case Opt_inline_xattr_size: + f2fs_info(sbi, "xattr options not supported"); break; #endif #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -789,10 +791,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) break; #else case Opt_acl: - f2fs_info(sbi, "acl options not supported"); - break; case Opt_noacl: - f2fs_info(sbi, "noacl options not supported"); + f2fs_info(sbi, "acl options not supported"); break; #endif case Opt_active_logs: @@ -834,6 +834,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noextent_cache: + if (f2fs_sb_has_device_alias(sbi)) { + f2fs_err(sbi, "device aliasing requires extent cache"); + return -EINVAL; + } clear_opt(sbi, READ_EXTENT_CACHE); break; case Opt_noinline_data: @@ -896,8 +900,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; - if (f2fs_build_fault_attr(sbi, arg, - F2FS_ALL_FAULT_TYPE)) + if (f2fs_build_fault_attr(sbi, arg, 0, FAULT_RATE)) return -EINVAL; set_opt(sbi, FAULT_INJECTION); break; @@ -905,19 +908,22 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_fault_type: if (args->from && match_int(args, &arg)) return -EINVAL; - if (f2fs_build_fault_attr(sbi, 0, arg)) + if (f2fs_build_fault_attr(sbi, 0, arg, FAULT_TYPE)) return -EINVAL; set_opt(sbi, FAULT_INJECTION); break; #else case Opt_fault_injection: - f2fs_info(sbi, "fault_injection options not supported"); - break; - case Opt_fault_type: - f2fs_info(sbi, "fault_type options not supported"); + f2fs_info(sbi, "fault injection options not supported"); break; #endif + case Opt_lazytime: + set_opt(sbi, LAZYTIME); + break; + case Opt_nolazytime: + clear_opt(sbi, LAZYTIME); + break; #ifdef CONFIG_QUOTA case Opt_quota: case Opt_usrquota: @@ -930,32 +936,32 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) set_opt(sbi, PRJQUOTA); break; case Opt_usrjquota: - ret = f2fs_set_qf_name(sb, USRQUOTA, &args[0]); + ret = f2fs_set_qf_name(sbi, USRQUOTA, &args[0]); if (ret) return ret; break; case Opt_grpjquota: - ret = f2fs_set_qf_name(sb, GRPQUOTA, &args[0]); + ret = f2fs_set_qf_name(sbi, GRPQUOTA, &args[0]); if (ret) return ret; break; case Opt_prjjquota: - ret = f2fs_set_qf_name(sb, PRJQUOTA, &args[0]); + ret = f2fs_set_qf_name(sbi, PRJQUOTA, &args[0]); if (ret) return ret; break; case Opt_offusrjquota: - ret = f2fs_clear_qf_name(sb, USRQUOTA); + ret = f2fs_clear_qf_name(sbi, USRQUOTA); if (ret) return ret; break; case Opt_offgrpjquota: - ret = f2fs_clear_qf_name(sb, GRPQUOTA); + ret = f2fs_clear_qf_name(sbi, GRPQUOTA); if (ret) return ret; break; case Opt_offprjjquota: - ret = f2fs_clear_qf_name(sb, PRJQUOTA); + ret = f2fs_clear_qf_name(sbi, PRJQUOTA); if (ret) return ret; break; @@ -1025,14 +1031,14 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) kfree(name); break; case Opt_test_dummy_encryption: - ret = f2fs_set_test_dummy_encryption(sb, p, &args[0], + ret = f2fs_set_test_dummy_encryption(sbi, p, &args[0], is_remount); if (ret) return ret; break; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT - sb->s_flags |= SB_INLINECRYPT; + set_opt(sbi, INLINECRYPT); #else f2fs_info(sbi, "inline encryption not supported"); #endif @@ -1158,7 +1164,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) break; } - strcpy(ext[ext_cnt], name); + ret = strscpy(ext[ext_cnt], name); + if (ret < 0) { + kfree(name); + return ret; + } F2FS_OPTION(sbi).compress_ext_cnt++; kfree(name); break; @@ -1187,7 +1197,11 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) break; } - strcpy(noext[noext_cnt], name); + ret = strscpy(noext[noext_cnt], name); + if (ret < 0) { + kfree(name); + return ret; + } F2FS_OPTION(sbi).nocompress_ext_cnt++; kfree(name); break; @@ -1300,13 +1314,20 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) } kfree(name); break; + case Opt_nat_bits: + set_opt(sbi, NAT_BITS); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); return -EINVAL; } } -default_check: + return 0; +} + +static int f2fs_default_check(struct f2fs_sb_info *sbi) +{ #ifdef CONFIG_QUOTA if (f2fs_check_quota_options(sbi)) return -EINVAL; @@ -1396,6 +1417,12 @@ default_check: f2fs_err(sbi, "Allow to mount readonly mode only"); return -EROFS; } + + if (test_opt(sbi, NORECOVERY) && !f2fs_readonly(sbi->sb)) { + f2fs_err(sbi, "norecovery requires readonly mount"); + return -EINVAL; + } + return 0; } @@ -1419,6 +1446,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); + INIT_LIST_HEAD(&fi->gdonate_list); init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); init_f2fs_rwsem(&fi->i_xattr_sem); @@ -1505,6 +1533,12 @@ int f2fs_inode_dirtied(struct inode *inode, bool sync) inc_page_count(sbi, F2FS_DIRTY_IMETA); } spin_unlock(&sbi->inode_lock[DIRTY_META]); + + /* if atomic write is not committed, set inode w/ atomic dirty */ + if (!ret && f2fs_is_atomic_file(inode) && + !is_inode_flag_set(inode, FI_ATOMIC_COMMITTED)) + set_inode_flag(inode, FI_ATOMIC_DIRTIED); + return ret; } @@ -1672,8 +1706,6 @@ static void f2fs_put_super(struct super_block *sb) kvfree(sbi->ckpt); - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->raw_super); f2fs_destroy_page_array_cache(sbi); @@ -1717,27 +1749,45 @@ int f2fs_sync_fs(struct super_block *sb, int sync) static int f2fs_freeze(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); + if (f2fs_readonly(sb)) return 0; /* IO error happened before */ - if (unlikely(f2fs_cp_error(F2FS_SB(sb)))) + if (unlikely(f2fs_cp_error(sbi))) return -EIO; /* must be clean, since sync_filesystem() was already called */ - if (is_sbi_flag_set(F2FS_SB(sb), SBI_IS_DIRTY)) + if (is_sbi_flag_set(sbi, SBI_IS_DIRTY)) return -EINVAL; + sbi->umount_lock_holder = current; + /* Let's flush checkpoints and stop the thread. */ - f2fs_flush_ckpt_thread(F2FS_SB(sb)); + f2fs_flush_ckpt_thread(sbi); + + sbi->umount_lock_holder = NULL; /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ - set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); + set_sbi_flag(sbi, SBI_IS_FREEZING); return 0; } static int f2fs_unfreeze(struct super_block *sb) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + /* + * It will update discard_max_bytes of mounted lvm device to zero + * after creating snapshot on this lvm device, let's drop all + * remained discards. + * We don't need to disable real-time discard because discard_max_bytes + * will recover after removal of snapshot. + */ + if (test_opt(sbi, DISCARD) && !f2fs_hw_support_discard(sbi)) + f2fs_issue_discard_timeout(sbi); + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } @@ -1759,26 +1809,32 @@ static int f2fs_statfs_project(struct super_block *sb, limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, dquot->dq_dqb.dqb_bhardlimit); - if (limit) - limit >>= sb->s_blocksize_bits; + limit >>= sb->s_blocksize_bits; + + if (limit) { + uint64_t remaining = 0; - if (limit && buf->f_blocks > limit) { curblock = (dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits; - buf->f_blocks = limit; - buf->f_bfree = buf->f_bavail = - (buf->f_blocks > curblock) ? - (buf->f_blocks - curblock) : 0; + if (limit > curblock) + remaining = limit - curblock; + + buf->f_blocks = min(buf->f_blocks, limit); + buf->f_bfree = min(buf->f_bfree, remaining); + buf->f_bavail = min(buf->f_bavail, remaining); } limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, dquot->dq_dqb.dqb_ihardlimit); - if (limit && buf->f_files > limit) { - buf->f_files = limit; - buf->f_ffree = - (buf->f_files > dquot->dq_dqb.dqb_curinodes) ? - (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; + if (limit) { + uint64_t remaining = 0; + + if (limit > dquot->dq_dqb.dqb_curinodes) + remaining = limit - dquot->dq_dqb.dqb_curinodes; + + buf->f_files = min(buf->f_files, limit); + buf->f_ffree = min(buf->f_ffree, remaining); } spin_unlock(&dquot->dq_dqb_lock); @@ -1804,7 +1860,8 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_blocks = total_count - start_count; spin_lock(&sbi->stat_lock); - + if (sbi->carve_out) + buf->f_blocks -= sbi->current_reserved_blocks; user_block_count = sbi->user_block_count; total_valid_node_count = valid_node_count(sbi); avail_node_count = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; @@ -1836,9 +1893,9 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_fsid = u64_to_fsid(id); #ifdef CONFIG_QUOTA - if (is_inode_flag_set(dentry->d_inode, FI_PROJ_INHERIT) && + if (is_inode_flag_set(d_inode(dentry), FI_PROJ_INHERIT) && sb_has_quota_limits_enabled(sb, PRJQUOTA)) { - f2fs_statfs_project(sb, F2FS_I(dentry->d_inode)->i_projid, buf); + f2fs_statfs_project(sb, F2FS_I(d_inode(dentry))->i_projid, buf); } #endif return 0; @@ -2096,6 +2153,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC) seq_printf(seq, ",errors=%s", "panic"); + if (test_opt(sbi, NAT_BITS)) + seq_puts(seq, ",nat_bits"); + return 0; } @@ -2143,8 +2203,8 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); set_opt(sbi, MERGE_CHECKPOINT); + set_opt(sbi, LAZYTIME); F2FS_OPTION(sbi).unusable_cap = 0; - sbi->sb->s_flags |= SB_LAZYTIME; if (!f2fs_is_readonly(sbi)) set_opt(sbi, FLUSH_MERGE); if (f2fs_sb_has_blkzoned(sbi)) @@ -2159,7 +2219,7 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) set_opt(sbi, POSIX_ACL); #endif - f2fs_build_fault_attr(sbi, 0, 0); + f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL); } #ifdef CONFIG_QUOTA @@ -2286,6 +2346,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool no_discard = !test_opt(sbi, DISCARD); bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE); bool block_unit_discard = f2fs_block_unit_discard(sbi); + bool no_nat_bits = !test_opt(sbi, NAT_BITS); #ifdef CONFIG_QUOTA int i, j; #endif @@ -2297,6 +2358,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) org_mount_opt = sbi->mount_opt; old_sb_flags = sb->s_flags; + sbi->umount_lock_holder = current; + #ifdef CONFIG_QUOTA org_mount_opt.s_jquota_fmt = F2FS_OPTION(sbi).s_jquota_fmt; for (i = 0; i < MAXQUOTAS; i++) { @@ -2327,7 +2390,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) default_options(sbi, true); /* parse mount options */ - err = parse_options(sb, data, true); + err = parse_options(sbi, data, true); if (err) goto restore_opts; @@ -2342,6 +2405,10 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } #endif + err = f2fs_default_check(sbi); + if (err) + goto restore_opts; + /* flush outstanding errors before changing fs state */ flush_work(&sbi->s_error_work); @@ -2412,6 +2479,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (no_nat_bits == !!test_opt(sbi, NAT_BITS)) { + err = -EINVAL; + f2fs_warn(sbi, "switch nat_bits option is not allowed"); + goto restore_opts; + } + if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) { err = -EINVAL; f2fs_warn(sbi, "disabling checkpoint not compatible with read-only"); @@ -2474,6 +2547,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) } } + adjust_unusable_cap_perc(sbi); if (enable_checkpoint == !!test_opt(sbi, DISABLE_CHECKPOINT)) { if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); @@ -2518,8 +2592,9 @@ skip: (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); limit_reserve_root(sbi); - adjust_unusable_cap_perc(sbi); *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME); + + sbi->umount_lock_holder = NULL; return 0; restore_checkpoint: if (need_enable_checkpoint) { @@ -2560,6 +2635,8 @@ restore_opts: #endif sbi->mount_opt = org_mount_opt; sb->s_flags = old_sb_flags; + + sbi->umount_lock_holder = NULL; return err; } @@ -2623,12 +2700,9 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, { struct inode *inode = sb_dqopt(sb)->files[type]; struct address_space *mapping = inode->i_mapping; - block_t blkidx = F2FS_BYTES_TO_BLK(off); - int offset = off & (sb->s_blocksize - 1); int tocopy; size_t toread; loff_t i_size = i_size_read(inode); - struct page *page; if (off > i_size) return 0; @@ -2637,37 +2711,42 @@ static ssize_t f2fs_quota_read(struct super_block *sb, int type, char *data, len = i_size - off; toread = len; while (toread > 0) { - tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread); + struct folio *folio; + size_t offset; + repeat: - page = read_cache_page_gfp(mapping, blkidx, GFP_NOFS); - if (IS_ERR(page)) { - if (PTR_ERR(page) == -ENOMEM) { + folio = mapping_read_folio_gfp(mapping, off >> PAGE_SHIFT, + GFP_NOFS); + if (IS_ERR(folio)) { + if (PTR_ERR(folio) == -ENOMEM) { memalloc_retry_wait(GFP_NOFS); goto repeat; } set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); - return PTR_ERR(page); + return PTR_ERR(folio); } + offset = offset_in_folio(folio, off); + tocopy = min(folio_size(folio) - offset, toread); - lock_page(page); + folio_lock(folio); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); + if (unlikely(folio->mapping != mapping)) { + f2fs_folio_put(folio, true); goto repeat; } - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR); - return -EIO; - } - memcpy_from_page(data, page, offset, tocopy); - f2fs_put_page(page, 1); + /* + * should never happen, just leave f2fs_bug_on() here to catch + * any potential bug. + */ + f2fs_bug_on(F2FS_SB(sb), !folio_test_uptodate(folio)); + + memcpy_from_folio(data, folio, offset, tocopy); + f2fs_folio_put(folio, true); - offset = 0; toread -= tocopy; data += tocopy; - blkidx++; + off += tocopy; } return len; } @@ -2876,7 +2955,7 @@ out: return ret; } -int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_do_quota_sync(struct super_block *sb, int type) { struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); @@ -2924,11 +3003,21 @@ int f2fs_quota_sync(struct super_block *sb, int type) return ret; } +static int f2fs_quota_sync(struct super_block *sb, int type) +{ + int ret; + + F2FS_SB(sb)->umount_lock_holder = current; + ret = f2fs_do_quota_sync(sb, type); + F2FS_SB(sb)->umount_lock_holder = NULL; + return ret; +} + static int f2fs_quota_on(struct super_block *sb, int type, int format_id, const struct path *path) { struct inode *inode; - int err; + int err = 0; /* if quota sysfile exists, deny enabling quota with specific file */ if (f2fs_sb_has_quota_ino(F2FS_SB(sb))) { @@ -2939,31 +3028,34 @@ static int f2fs_quota_on(struct super_block *sb, int type, int format_id, if (path->dentry->d_sb != sb) return -EXDEV; - err = f2fs_quota_sync(sb, type); + F2FS_SB(sb)->umount_lock_holder = current; + + err = f2fs_do_quota_sync(sb, type); if (err) - return err; + goto out; inode = d_inode(path->dentry); err = filemap_fdatawrite(inode->i_mapping); if (err) - return err; + goto out; err = filemap_fdatawait(inode->i_mapping); if (err) - return err; + goto out; err = dquot_quota_on(sb, type, format_id, path); if (err) - return err; + goto out; inode_lock(inode); F2FS_I(inode)->i_flags |= F2FS_QUOTA_DEFAULT_FL; f2fs_set_inode_flags(inode); inode_unlock(inode); f2fs_mark_inode_dirty_sync(inode, false); - - return 0; +out: + F2FS_SB(sb)->umount_lock_holder = NULL; + return err; } static int __f2fs_quota_off(struct super_block *sb, int type) @@ -2974,7 +3066,7 @@ static int __f2fs_quota_off(struct super_block *sb, int type) if (!inode || !igrab(inode)) return dquot_quota_off(sb, type); - err = f2fs_quota_sync(sb, type); + err = f2fs_do_quota_sync(sb, type); if (err) goto out_put; @@ -2997,6 +3089,8 @@ static int f2fs_quota_off(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); int err; + F2FS_SB(sb)->umount_lock_holder = current; + err = __f2fs_quota_off(sb, type); /* @@ -3006,6 +3100,9 @@ static int f2fs_quota_off(struct super_block *sb, int type) */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); + + F2FS_SB(sb)->umount_lock_holder = NULL; + return err; } @@ -3138,7 +3235,7 @@ int f2fs_dquot_initialize(struct inode *inode) return 0; } -int f2fs_quota_sync(struct super_block *sb, int type) +int f2fs_do_quota_sync(struct super_block *sb, int type) { return 0; } @@ -3322,7 +3419,7 @@ loff_t max_file_blocks(struct inode *inode) * fit within U32_MAX + 1 data units. */ - result = min(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096)); + result = umin(result, F2FS_BYTES_TO_BLK(((loff_t)U32_MAX + 1) * 4096)); return result; } @@ -3348,7 +3445,7 @@ static int __f2fs_commit_super(struct f2fs_sb_info *sbi, struct folio *folio, bio = bio_alloc(sbi->sb->s_bdev, 1, opf, GFP_NOFS); /* it doesn't need to set crypto context for superblock update */ - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio_index(folio)); + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(folio->index); if (!bio_add_folio(bio, folio, folio_size(folio), 0)) f2fs_bug_on(sbi, 1); @@ -3474,7 +3571,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } crc = le32_to_cpu(raw_super->crc); - if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { + if (crc != f2fs_crc32(raw_super, crc_offset)) { f2fs_info(sbi, "Invalid SB checksum value: %u", crc); return -EFSCORRUPTED; } @@ -3633,6 +3730,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi) block_t user_block_count, valid_user_blocks; block_t avail_node_count, valid_node_count; unsigned int nat_blocks, nat_bits_bytes, nat_bits_blocks; + unsigned int sit_blk_cnt; int i, j; total = le32_to_cpu(raw_super->segment_count); @@ -3744,6 +3842,13 @@ skip_cross: return 1; } + sit_blk_cnt = DIV_ROUND_UP(main_segs, SIT_ENTRY_PER_BLOCK); + if (sit_bitmap_size * 8 < sit_blk_cnt) { + f2fs_err(sbi, "Wrong bitmap size: sit: %u, sit_blk_cnt:%u", + sit_bitmap_size, sit_blk_cnt); + return 1; + } + cp_pack_start_sum = __start_sum_addr(sbi); cp_payload = __cp_payload(sbi); if (cp_pack_start_sum < cp_payload + 1 || @@ -4022,7 +4127,7 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover) /* we should update superblock crc here */ if (!recover && f2fs_sb_has_sb_chksum(sbi)) { - crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi), + crc = f2fs_crc32(F2FS_RAW_SUPER(sbi), offsetof(struct f2fs_super_block, crc)); F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc); } @@ -4155,8 +4260,7 @@ static bool system_going_down(void) || system_state == SYSTEM_RESTART; } -void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, - bool irq_context) +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason) { struct super_block *sb = sbi->sb; bool shutdown = reason == STOP_CP_REASON_SHUTDOWN; @@ -4168,10 +4272,12 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (!f2fs_hw_is_readonly(sbi)) { save_stop_reason(sbi, reason); - if (irq_context && !shutdown) - schedule_work(&sbi->s_error_work); - else - f2fs_record_stop_reason(sbi); + /* + * always create an asynchronous task to record stop_reason + * in order to avoid potential deadlock when running into + * f2fs_record_stop_reason() synchronously. + */ + schedule_work(&sbi->s_error_work); } /* @@ -4187,6 +4293,8 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (shutdown) set_sbi_flag(sbi, SBI_IS_SHUTDOWN); + else + dump_stack(); /* * Continue filesystem operators if errors=continue. Should not set @@ -4217,6 +4325,37 @@ static void f2fs_record_error_work(struct work_struct *work) f2fs_record_stop_reason(sbi); } +static inline unsigned int get_first_seq_zone_segno(struct f2fs_sb_info *sbi) +{ +#ifdef CONFIG_BLK_DEV_ZONED + unsigned int zoneno, total_zones; + int devi; + + if (!f2fs_sb_has_blkzoned(sbi)) + return NULL_SEGNO; + + for (devi = 0; devi < sbi->s_ndevs; devi++) { + if (!bdev_is_zoned(FDEV(devi).bdev)) + continue; + + total_zones = GET_ZONE_FROM_SEG(sbi, FDEV(devi).total_segments); + + for (zoneno = 0; zoneno < total_zones; zoneno++) { + unsigned int segs, blks; + + if (!f2fs_zone_is_seq(sbi, devi, zoneno)) + continue; + + segs = GET_SEG_FROM_SEC(sbi, + zoneno * sbi->secs_per_zone); + blks = SEGS_TO_BLKS(sbi, segs); + return GET_SEGNO(sbi, FDEV(devi).start_blk + blks); + } + } +#endif + return NULL_SEGNO; +} + static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); @@ -4251,6 +4390,14 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) #endif for (i = 0; i < max_devices; i++) { + if (max_devices == 1) { + FDEV(i).total_segments = + le32_to_cpu(raw_super->segment_count_main); + FDEV(i).start_blk = 0; + FDEV(i).end_blk = FDEV(i).total_segments * + BLKS_PER_SEG(sbi); + } + if (i == 0) FDEV(0).bdev_file = sbi->sb->s_bdev_file; else if (!RDEV(i).path[0]) @@ -4421,15 +4568,6 @@ try_onemore: } mutex_init(&sbi->flush_lock); - /* Load the checksum driver */ - sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_err(sbi, "Cannot load crc32 driver."); - err = PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver = NULL; - goto free_sbi; - } - /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { f2fs_err(sbi, "unable to set blocksize"); @@ -4450,8 +4588,8 @@ try_onemore: /* precompute checksum seed for metadata */ if (f2fs_sb_has_inode_chksum(sbi)) - sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid, - sizeof(raw_super->uuid)); + sbi->s_chksum_seed = f2fs_chksum(~0, raw_super->uuid, + sizeof(raw_super->uuid)); default_options(sbi, false); /* parse mount options */ @@ -4461,7 +4599,11 @@ try_onemore: goto free_sb_buf; } - err = parse_options(sb, options, false); + err = parse_options(sbi, options, false); + if (err) + goto free_options; + + err = f2fs_default_check(sbi); if (err) goto free_options; @@ -4499,6 +4641,14 @@ try_onemore: sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? SB_POSIXACL : 0); + if (test_opt(sbi, INLINECRYPT)) + sb->s_flags |= SB_INLINECRYPT; + + if (test_opt(sbi, LAZYTIME)) + sb->s_flags |= SB_LAZYTIME; + else + sb->s_flags &= ~SB_LAZYTIME; + super_set_uuid(sb, (void *) raw_super->uuid, sizeof(raw_super->uuid)); super_set_sysfs_name_bdev(sb); sb->s_iflags |= SB_I_CGROUPWB; @@ -4617,6 +4767,9 @@ try_onemore: /* For write statistics */ sbi->sectors_written_start = f2fs_get_sectors_written(sbi); + /* get segno of first zoned block device */ + sbi->first_seq_zone_segno = get_first_seq_zone_segno(sbi); + /* Read accumulated write IO statistics if exists */ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); if (__exist_node_summaries(sbi)) @@ -4666,6 +4819,7 @@ try_onemore: if (err) goto free_compress_inode; + sbi->umount_lock_holder = current; #ifdef CONFIG_QUOTA /* Enable quota usage during mount */ if (f2fs_sb_has_quota_ino(sbi) && !f2fs_readonly(sb)) { @@ -4681,8 +4835,10 @@ try_onemore: if (err) goto free_meta; - if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) + if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG))) { + skip_recovery = true; goto reset_checkpoint; + } /* recover fsynced data */ if (!test_opt(sbi, DISABLE_ROLL_FORWARD) && @@ -4732,32 +4888,29 @@ try_onemore: } } +reset_checkpoint: #ifdef CONFIG_QUOTA f2fs_recover_quota_end(sbi, quota_enabled); #endif -reset_checkpoint: /* * If the f2fs is not readonly and fsync data recovery succeeds, - * check zoned block devices' write pointer consistency. + * write pointer consistency of cursegs and other zones are already + * checked and fixed during recovery. However, if recovery fails, + * write pointers are left untouched, and retry-mount should check + * them here. */ - if (f2fs_sb_has_blkzoned(sbi) && !f2fs_readonly(sb)) { - int err2; - - f2fs_notice(sbi, "Checking entire write pointers"); - err2 = f2fs_check_write_pointer(sbi); - if (err2) - err = err2; - } + if (skip_recovery) + err = f2fs_check_and_fix_write_pointer(sbi); if (err) goto free_meta; + /* f2fs_recover_fsync_data() cleared this already */ + clear_sbi_flag(sbi, SBI_POR_DOING); + err = f2fs_init_inmem_curseg(sbi); if (err) goto sync_free_meta; - /* f2fs_recover_fsync_data() cleared this already */ - clear_sbi_flag(sbi, SBI_POR_DOING); - if (test_opt(sbi, DISABLE_CHECKPOINT)) { err = f2fs_disable_checkpoint(sbi); if (err) @@ -4795,6 +4948,8 @@ reset_checkpoint: f2fs_update_time(sbi, CP_TIME); f2fs_update_time(sbi, REQ_TIME); clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK); + + sbi->umount_lock_holder = NULL; return 0; sync_free_meta: @@ -4874,8 +5029,6 @@ free_options: free_sb_buf: kfree(raw_super); free_sbi: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi); sb->s_fs_info = NULL; @@ -4899,6 +5052,8 @@ static void kill_f2fs_super(struct super_block *sb) struct f2fs_sb_info *sbi = F2FS_SB(sb); if (sb->s_root) { + sbi->umount_lock_holder = current; + set_sbi_flag(sbi, SBI_IS_CLOSE); f2fs_stop_gc_thread(sbi); f2fs_stop_discard_thread(sbi); @@ -4991,9 +5146,6 @@ static int __init init_f2fs_fs(void) err = f2fs_init_shrinker(); if (err) goto free_sysfs; - err = register_filesystem(&f2fs_fs_type); - if (err) - goto free_shrinker; f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) @@ -5016,7 +5168,12 @@ static int __init init_f2fs_fs(void) err = f2fs_create_casefold_cache(); if (err) goto free_compress_cache; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_casefold_cache; return 0; +free_casefold_cache: + f2fs_destroy_casefold_cache(); free_compress_cache: f2fs_destroy_compress_cache(); free_compress_mempool: @@ -5031,8 +5188,6 @@ free_post_read: f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); -free_shrinker: f2fs_exit_shrinker(); free_sysfs: f2fs_exit_sysfs(); @@ -5056,6 +5211,7 @@ fail: static void __exit exit_f2fs_fs(void) { + unregister_filesystem(&f2fs_fs_type); f2fs_destroy_casefold_cache(); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); @@ -5064,7 +5220,6 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_iostat_processing(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); f2fs_exit_shrinker(); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); @@ -5082,5 +5237,3 @@ module_exit(exit_f2fs_fs) MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32"); - diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index c56e8c873935..75134d69a0bd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -61,6 +61,12 @@ struct f2fs_attr { int id; }; +struct f2fs_base_attr { + struct attribute attr; + ssize_t (*show)(struct f2fs_base_attr *a, char *buf); + ssize_t (*store)(struct f2fs_base_attr *a, const char *buf, size_t len); +}; + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf); @@ -268,6 +274,13 @@ static ssize_t encoding_show(struct f2fs_attr *a, return sysfs_emit(buf, "(none)\n"); } +static ssize_t encoding_flags_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%x\n", + le16_to_cpu(F2FS_RAW_SUPER(sbi)->s_encoding_flags)); +} + static ssize_t mounted_time_sec_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -488,12 +501,12 @@ out: return ret; #ifdef CONFIG_F2FS_FAULT_INJECTION if (a->struct_type == FAULT_INFO_TYPE) { - if (f2fs_build_fault_attr(sbi, 0, t)) + if (f2fs_build_fault_attr(sbi, 0, t, FAULT_TYPE)) return -EINVAL; return count; } if (a->struct_type == FAULT_INFO_RATE) { - if (f2fs_build_fault_attr(sbi, t, 0)) + if (f2fs_build_fault_attr(sbi, t, 0, FAULT_RATE)) return -EINVAL; return count; } @@ -501,9 +514,7 @@ out: if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - F2FS_OPTION(sbi).root_reserved_blocks - - SEGS_TO_BLKS(sbi, - SM_I(sbi)->additional_reserved_segments))) { + F2FS_OPTION(sbi).root_reserved_blocks)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -789,6 +800,13 @@ out: return count; } + if (!strcmp(a->attr.name, "max_read_extent_count")) { + if (t > UINT_MAX) + return -EINVAL; + *ui = (unsigned int)t; + return count; + } + if (!strcmp(a->attr.name, "ipu_policy")) { if (t >= BIT(F2FS_IPU_MAX)) return -EINVAL; @@ -857,6 +875,25 @@ static void f2fs_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } +static ssize_t f2fs_base_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct f2fs_base_attr *a = container_of(attr, + struct f2fs_base_attr, attr); + + return a->show ? a->show(a, buf) : 0; +} + +static ssize_t f2fs_base_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct f2fs_base_attr *a = container_of(attr, + struct f2fs_base_attr, attr); + + return a->store ? a->store(a, buf, len) : 0; +} + /* * Note that there are three feature list entries: * 1) /sys/fs/f2fs/features @@ -875,18 +912,50 @@ static void f2fs_sb_release(struct kobject *kobj) * please add new on-disk feature in this list only. * - ref. F2FS_SB_FEATURE_RO_ATTR() */ -static ssize_t f2fs_feature_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) +static ssize_t f2fs_feature_show(struct f2fs_base_attr *a, char *buf) { return sysfs_emit(buf, "supported\n"); } #define F2FS_FEATURE_RO_ATTR(_name) \ -static struct f2fs_attr f2fs_attr_##_name = { \ +static struct f2fs_base_attr f2fs_base_attr_##_name = { \ .attr = {.name = __stringify(_name), .mode = 0444 }, \ .show = f2fs_feature_show, \ } +static ssize_t f2fs_tune_show(struct f2fs_base_attr *a, char *buf) +{ + unsigned int res = 0; + + if (!strcmp(a->attr.name, "reclaim_caches_kb")) + res = f2fs_donate_files(); + + return sysfs_emit(buf, "%u\n", res); +} + +static ssize_t f2fs_tune_store(struct f2fs_base_attr *a, + const char *buf, size_t count) +{ + unsigned long t; + int ret; + + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + + if (!strcmp(a->attr.name, "reclaim_caches_kb")) + f2fs_reclaim_caches(t); + + return count; +} + +#define F2FS_TUNE_RW_ATTR(_name) \ +static struct f2fs_base_attr f2fs_base_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0644 }, \ + .show = f2fs_tune_show, \ + .store = f2fs_tune_store, \ +} + static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -1054,10 +1123,13 @@ F2FS_SBI_GENERAL_RW_ATTR(revoked_atomic_block); F2FS_SBI_GENERAL_RW_ATTR(hot_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(warm_data_age_threshold); F2FS_SBI_GENERAL_RW_ATTR(last_age_weight); +/* read extent cache */ +F2FS_SBI_GENERAL_RW_ATTR(max_read_extent_count); #ifdef CONFIG_BLK_DEV_ZONED F2FS_SBI_GENERAL_RO_ATTR(unusable_blocks_per_sec); F2FS_SBI_GENERAL_RW_ATTR(blkzone_alloc_policy); #endif +F2FS_SBI_GENERAL_RW_ATTR(carve_out); /* STAT_INFO ATTR */ #ifdef CONFIG_F2FS_STAT_FS @@ -1093,6 +1165,7 @@ F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); +F2FS_GENERAL_RO_ATTR(encoding_flags); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); F2FS_GENERAL_RO_ATTR(pending_discard); @@ -1134,6 +1207,9 @@ F2FS_FEATURE_RO_ATTR(readonly); F2FS_FEATURE_RO_ATTR(compression); #endif F2FS_FEATURE_RO_ATTR(pin_file); +#ifdef CONFIG_UNICODE +F2FS_FEATURE_RO_ATTR(linear_lookup); +#endif #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -1205,6 +1281,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), ATTR_LIST(encoding), + ATTR_LIST(encoding_flags), ATTR_LIST(mounted_time_sec), #ifdef CONFIG_F2FS_STAT_FS ATTR_LIST(cp_foreground_calls), @@ -1244,41 +1321,47 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(hot_data_age_threshold), ATTR_LIST(warm_data_age_threshold), ATTR_LIST(last_age_weight), + ATTR_LIST(max_read_extent_count), + ATTR_LIST(carve_out), NULL, }; ATTRIBUTE_GROUPS(f2fs); +#define BASE_ATTR_LIST(name) (&f2fs_base_attr_##name.attr) static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION - ATTR_LIST(encryption), - ATTR_LIST(test_dummy_encryption_v2), + BASE_ATTR_LIST(encryption), + BASE_ATTR_LIST(test_dummy_encryption_v2), #if IS_ENABLED(CONFIG_UNICODE) - ATTR_LIST(encrypted_casefold), + BASE_ATTR_LIST(encrypted_casefold), #endif #endif /* CONFIG_FS_ENCRYPTION */ #ifdef CONFIG_BLK_DEV_ZONED - ATTR_LIST(block_zoned), + BASE_ATTR_LIST(block_zoned), #endif - ATTR_LIST(atomic_write), - ATTR_LIST(extra_attr), - ATTR_LIST(project_quota), - ATTR_LIST(inode_checksum), - ATTR_LIST(flexible_inline_xattr), - ATTR_LIST(quota_ino), - ATTR_LIST(inode_crtime), - ATTR_LIST(lost_found), + BASE_ATTR_LIST(atomic_write), + BASE_ATTR_LIST(extra_attr), + BASE_ATTR_LIST(project_quota), + BASE_ATTR_LIST(inode_checksum), + BASE_ATTR_LIST(flexible_inline_xattr), + BASE_ATTR_LIST(quota_ino), + BASE_ATTR_LIST(inode_crtime), + BASE_ATTR_LIST(lost_found), #ifdef CONFIG_FS_VERITY - ATTR_LIST(verity), + BASE_ATTR_LIST(verity), #endif - ATTR_LIST(sb_checksum), + BASE_ATTR_LIST(sb_checksum), #if IS_ENABLED(CONFIG_UNICODE) - ATTR_LIST(casefold), + BASE_ATTR_LIST(casefold), #endif - ATTR_LIST(readonly), + BASE_ATTR_LIST(readonly), #ifdef CONFIG_F2FS_FS_COMPRESSION - ATTR_LIST(compression), + BASE_ATTR_LIST(compression), +#endif + BASE_ATTR_LIST(pin_file), +#ifdef CONFIG_UNICODE + BASE_ATTR_LIST(linear_lookup), #endif - ATTR_LIST(pin_file), NULL, }; ATTRIBUTE_GROUPS(f2fs_feat); @@ -1313,6 +1396,7 @@ F2FS_SB_FEATURE_RO_ATTR(sb_checksum, SB_CHKSUM); F2FS_SB_FEATURE_RO_ATTR(casefold, CASEFOLD); F2FS_SB_FEATURE_RO_ATTR(compression, COMPRESSION); F2FS_SB_FEATURE_RO_ATTR(readonly, RO); +F2FS_SB_FEATURE_RO_ATTR(device_alias, DEVICE_ALIAS); static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_encryption), @@ -1329,10 +1413,19 @@ static struct attribute *f2fs_sb_feat_attrs[] = { ATTR_LIST(sb_casefold), ATTR_LIST(sb_compression), ATTR_LIST(sb_readonly), + ATTR_LIST(sb_device_alias), NULL, }; ATTRIBUTE_GROUPS(f2fs_sb_feat); +F2FS_TUNE_RW_ATTR(reclaim_caches_kb); + +static struct attribute *f2fs_tune_attrs[] = { + BASE_ATTR_LIST(reclaim_caches_kb), + NULL, +}; +ATTRIBUTE_GROUPS(f2fs_tune); + static const struct sysfs_ops f2fs_attr_ops = { .show = f2fs_attr_show, .store = f2fs_attr_store, @@ -1352,15 +1445,34 @@ static struct kset f2fs_kset = { .kobj = {.ktype = &f2fs_ktype}, }; +static const struct sysfs_ops f2fs_feat_attr_ops = { + .show = f2fs_base_attr_show, + .store = f2fs_base_attr_store, +}; + static const struct kobj_type f2fs_feat_ktype = { .default_groups = f2fs_feat_groups, - .sysfs_ops = &f2fs_attr_ops, + .sysfs_ops = &f2fs_feat_attr_ops, }; static struct kobject f2fs_feat = { .kset = &f2fs_kset, }; +static const struct sysfs_ops f2fs_tune_attr_ops = { + .show = f2fs_base_attr_show, + .store = f2fs_base_attr_store, +}; + +static const struct kobj_type f2fs_tune_ktype = { + .default_groups = f2fs_tune_groups, + .sysfs_ops = &f2fs_tune_attr_ops, +}; + +static struct kobject f2fs_tune = { + .kset = &f2fs_kset, +}; + static ssize_t f2fs_stat_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -1462,7 +1574,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, le32_to_cpu(sbi->raw_super->segment_count_main); int i, j; - seq_puts(seq, "format: segment_type|valid_blocks|bitmaps\n" + seq_puts(seq, "format: segment_type|valid_blocks|bitmaps|mtime\n" "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); for (i = 0; i < total_segs; i++) { @@ -1472,6 +1584,7 @@ static int __maybe_unused segment_bits_seq_show(struct seq_file *seq, seq_printf(seq, "%d|%-3u|", se->type, se->valid_blocks); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); + seq_printf(seq, "| %llx", se->mtime); seq_putc(seq, '\n'); } return 0; @@ -1581,6 +1694,24 @@ static int __maybe_unused disk_map_seq_show(struct seq_file *seq, return 0; } +#ifdef CONFIG_F2FS_FAULT_INJECTION +static int __maybe_unused inject_stats_seq_show(struct seq_file *seq, + void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_fault_info *ffi = &F2FS_OPTION(sbi).fault_info; + int i; + + seq_puts(seq, "fault_type injected_count\n"); + + for (i = 0; i < FAULT_MAX; i++) + seq_printf(seq, "%-24s%-10u\n", f2fs_fault_name[i], + ffi->inject_count[i]); + return 0; +} +#endif + int __init f2fs_init_sysfs(void) { int ret; @@ -1596,6 +1727,11 @@ int __init f2fs_init_sysfs(void) if (ret) goto put_kobject; + ret = kobject_init_and_add(&f2fs_tune, &f2fs_tune_ktype, + NULL, "tuning"); + if (ret) + goto put_kobject; + f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); if (!f2fs_proc_root) { ret = -ENOMEM; @@ -1603,7 +1739,9 @@ int __init f2fs_init_sysfs(void) } return 0; + put_kobject: + kobject_put(&f2fs_tune); kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); return ret; @@ -1611,6 +1749,7 @@ put_kobject: void f2fs_exit_sysfs(void) { + kobject_put(&f2fs_tune); kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); remove_proc_entry("fs/f2fs", NULL); @@ -1664,6 +1803,10 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) discard_plist_seq_show, sb); proc_create_single_data("disk_map", 0444, sbi->s_proc, disk_map_seq_show, sb); +#ifdef CONFIG_F2FS_FAULT_INJECTION + proc_create_single_data("inject_stats", 0444, sbi->s_proc, + inject_stats_seq_show, sb); +#endif return 0; put_feature_list_kobj: kobject_put(&sbi->s_feature_list_kobj); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 3f3874943679..58632a2b6613 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -136,7 +136,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, #ifdef CONFIG_F2FS_FS_SECURITY static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, - void *page) + void *folio) { const struct xattr *xattr; int err = 0; @@ -144,7 +144,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, for (xattr = xattr_array; xattr->name != NULL; xattr++) { err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, - xattr->value_len, (struct page *)page, 0); + xattr->value_len, folio, 0); if (err < 0) break; } @@ -152,10 +152,10 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, } int f2fs_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, struct page *ipage) + const struct qstr *qstr, struct folio *ifolio) { return security_inode_init_security(inode, dir, qstr, - &f2fs_initxattrs, ipage); + f2fs_initxattrs, ifolio); } #endif @@ -271,25 +271,25 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, return entry; } -static int read_inline_xattr(struct inode *inode, struct page *ipage, +static int read_inline_xattr(struct inode *inode, struct folio *ifolio, void *txattr_addr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int inline_size = inline_xattr_size(inode); - struct page *page = NULL; + struct folio *folio = NULL; void *inline_addr; - if (ipage) { - inline_addr = inline_xattr_addr(inode, ipage); + if (ifolio) { + inline_addr = inline_xattr_addr(inode, ifolio); } else { - page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) - return PTR_ERR(page); + folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(folio)) + return PTR_ERR(folio); - inline_addr = inline_xattr_addr(inode, page); + inline_addr = inline_xattr_addr(inode, folio); } memcpy(txattr_addr, inline_addr, inline_size); - f2fs_put_page(page, 1); + f2fs_folio_put(folio, true); return 0; } @@ -299,22 +299,22 @@ static int read_xattr_block(struct inode *inode, void *txattr_addr) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int inline_size = inline_xattr_size(inode); - struct page *xpage; + struct folio *xfolio; void *xattr_addr; /* The inode already has an extended attribute block. */ - xpage = f2fs_get_node_page(sbi, xnid); - if (IS_ERR(xpage)) - return PTR_ERR(xpage); + xfolio = f2fs_get_xnode_folio(sbi, xnid); + if (IS_ERR(xfolio)) + return PTR_ERR(xfolio); - xattr_addr = page_address(xpage); + xattr_addr = folio_address(xfolio); memcpy(txattr_addr + inline_size, xattr_addr, VALID_XATTR_BLOCK_SIZE); - f2fs_put_page(xpage, 1); + f2fs_folio_put(xfolio, true); return 0; } -static int lookup_all_xattrs(struct inode *inode, struct page *ipage, +static int lookup_all_xattrs(struct inode *inode, struct folio *ifolio, unsigned int index, unsigned int len, const char *name, struct f2fs_xattr_entry **xe, void **base_addr, int *base_size, @@ -338,7 +338,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - err = read_inline_xattr(inode, ipage, txattr_addr); + err = read_inline_xattr(inode, ifolio, txattr_addr); if (err) goto out; @@ -385,7 +385,7 @@ out: return err; } -static int read_all_xattrs(struct inode *inode, struct page *ipage, +static int read_all_xattrs(struct inode *inode, struct folio *ifolio, void **base_addr) { struct f2fs_xattr_header *header; @@ -402,7 +402,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, /* read from inline xattr */ if (inline_size) { - err = read_inline_xattr(inode, ipage, txattr_addr); + err = read_inline_xattr(inode, ifolio, txattr_addr); if (err) goto fail; } @@ -429,14 +429,14 @@ fail: } static inline int write_all_xattrs(struct inode *inode, __u32 hsize, - void *txattr_addr, struct page *ipage) + void *txattr_addr, struct folio *ifolio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); size_t inline_size = inline_xattr_size(inode); - struct page *in_page = NULL; + struct folio *in_folio = NULL; void *xattr_addr; void *inline_addr = NULL; - struct page *xpage; + struct folio *xfolio; nid_t new_nid = 0; int err = 0; @@ -446,73 +446,73 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, /* write to inline xattr */ if (inline_size) { - if (ipage) { - inline_addr = inline_xattr_addr(inode, ipage); + if (ifolio) { + inline_addr = inline_xattr_addr(inode, ifolio); } else { - in_page = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(in_page)) { + in_folio = f2fs_get_inode_folio(sbi, inode->i_ino); + if (IS_ERR(in_folio)) { f2fs_alloc_nid_failed(sbi, new_nid); - return PTR_ERR(in_page); + return PTR_ERR(in_folio); } - inline_addr = inline_xattr_addr(inode, in_page); + inline_addr = inline_xattr_addr(inode, in_folio); } - f2fs_wait_on_page_writeback(ipage ? ipage : in_page, + f2fs_folio_wait_writeback(ifolio ? ifolio : in_folio, NODE, true, true); /* no need to use xattr node block */ if (hsize <= inline_size) { err = f2fs_truncate_xattr_node(inode); f2fs_alloc_nid_failed(sbi, new_nid); if (err) { - f2fs_put_page(in_page, 1); + f2fs_folio_put(in_folio, true); return err; } memcpy(inline_addr, txattr_addr, inline_size); - set_page_dirty(ipage ? ipage : in_page); + folio_mark_dirty(ifolio ? ifolio : in_folio); goto in_page_out; } } /* write to xattr node block */ if (F2FS_I(inode)->i_xattr_nid) { - xpage = f2fs_get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + xfolio = f2fs_get_xnode_folio(sbi, F2FS_I(inode)->i_xattr_nid); + if (IS_ERR(xfolio)) { + err = PTR_ERR(xfolio); f2fs_alloc_nid_failed(sbi, new_nid); goto in_page_out; } f2fs_bug_on(sbi, new_nid); - f2fs_wait_on_page_writeback(xpage, NODE, true, true); + f2fs_folio_wait_writeback(xfolio, NODE, true, true); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); - xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET); - if (IS_ERR(xpage)) { - err = PTR_ERR(xpage); + xfolio = f2fs_new_node_folio(&dn, XATTR_NODE_OFFSET); + if (IS_ERR(xfolio)) { + err = PTR_ERR(xfolio); f2fs_alloc_nid_failed(sbi, new_nid); goto in_page_out; } f2fs_alloc_nid_done(sbi, new_nid); } - xattr_addr = page_address(xpage); + xattr_addr = folio_address(xfolio); if (inline_size) memcpy(inline_addr, txattr_addr, inline_size); memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); if (inline_size) - set_page_dirty(ipage ? ipage : in_page); - set_page_dirty(xpage); + folio_mark_dirty(ifolio ? ifolio : in_folio); + folio_mark_dirty(xfolio); - f2fs_put_page(xpage, 1); + f2fs_folio_put(xfolio, true); in_page_out: - f2fs_put_page(in_page, 1); + f2fs_folio_put(in_folio, true); return err; } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size, struct page *ipage) + void *buffer, size_t buffer_size, struct folio *ifolio) { struct f2fs_xattr_entry *entry = NULL; int error; @@ -528,11 +528,11 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - if (!ipage) + if (!ifolio) f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); - error = lookup_all_xattrs(inode, ipage, index, len, name, + error = lookup_all_xattrs(inode, ifolio, index, len, name, &entry, &base_addr, &base_size, &is_inline); - if (!ipage) + if (!ifolio) f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -627,7 +627,7 @@ static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry, static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *ipage, int flags) + struct folio *ifolio, int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_entry *here, *last; @@ -651,7 +651,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (size > MAX_VALUE_LEN(inode)) return -E2BIG; retry: - error = read_all_xattrs(inode, ipage, &base_addr); + error = read_all_xattrs(inode, ifolio, &base_addr); if (error) return error; @@ -766,7 +766,7 @@ retry: *(u32 *)((u8 *)last + newsize) = 0; } - error = write_all_xattrs(inode, new_hsize, base_addr, ipage); + error = write_all_xattrs(inode, new_hsize, base_addr, ifolio); if (error) goto exit; @@ -800,7 +800,7 @@ exit: int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *ipage, int flags) + struct folio *ifolio, int flags) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int err; @@ -815,14 +815,14 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, return err; /* this case is only from f2fs_init_inode_metadata */ - if (ipage) + if (ifolio) return __f2fs_setxattr(inode, index, name, value, - size, ipage, flags); + size, ifolio, flags); f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); f2fs_down_write(&F2FS_I(inode)->i_xattr_sem); - err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); + err = __f2fs_setxattr(inode, index, name, value, size, NULL, flags); f2fs_up_write(&F2FS_I(inode)->i_xattr_sem); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index a005ffdcf717..4fc0b2305fbd 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -127,26 +127,26 @@ extern const struct xattr_handler f2fs_xattr_security_handler; extern const struct xattr_handler * const f2fs_xattr_handlers[]; -extern int f2fs_setxattr(struct inode *, int, const char *, - const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, - size_t, struct page *); -extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); -extern int f2fs_init_xattr_caches(struct f2fs_sb_info *); -extern void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); +int f2fs_setxattr(struct inode *, int, const char *, const void *, + size_t, struct folio *, int); +int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct folio *); +ssize_t f2fs_listxattr(struct dentry *, char *, size_t); +int f2fs_init_xattr_caches(struct f2fs_sb_info *); +void f2fs_destroy_xattr_caches(struct f2fs_sb_info *); #else #define f2fs_xattr_handlers NULL #define f2fs_listxattr NULL static inline int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *page, int flags) + struct folio *folio, int flags) { return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, - size_t buffer_size, struct page *dpage) + size_t buffer_size, struct folio *dfolio) { return -EOPNOTSUPP; } @@ -155,11 +155,11 @@ static inline void f2fs_destroy_xattr_caches(struct f2fs_sb_info *sbi) { } #endif #ifdef CONFIG_F2FS_FS_SECURITY -extern int f2fs_init_security(struct inode *, struct inode *, - const struct qstr *, struct page *); +int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct folio *); #else static inline int f2fs_init_security(struct inode *inode, struct inode *dir, - const struct qstr *qstr, struct page *ipage) + const struct qstr *qstr, struct folio *ifolio) { return 0; } |