diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 20:36:31 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 20:36:31 +0300 |
commit | 4fc29c1aa375353ffe7c8fa171bf941b71ce29ef (patch) | |
tree | 5d4fa5f8c95831655cb89f1a37c0ea5daf847fbd | |
parent | 0e6acf0204da5b8705722a5f6806a4f55ed379d6 (diff) | |
parent | 5302fb000def84100740a84d7f176c0e167b2141 (diff) | |
download | linux-4fc29c1aa375353ffe7c8fa171bf941b71ce29ef.tar.xz |
Merge tag 'for-f2fs-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"The major change in this version is mitigating cpu overheads on write
paths by replacing redundant inode page updates with mark_inode_dirty
calls. And we tried to reduce lock contentions as well to improve
filesystem scalability. Other feature is setting F2FS automatically
when detecting host-managed SMR.
Enhancements:
- ioctl to move a range of data between files
- inject orphan inode errors
- avoid flush commands congestion
- support lazytime
Bug fixes:
- return proper results for some dentry operations
- fix deadlock in add_link failure
- disable extent_cache for fcollapse/finsert"
* tag 'for-f2fs-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (68 commits)
f2fs: clean up coding style and redundancy
f2fs: get victim segment again after new cp
f2fs: handle error case with f2fs_bug_on
f2fs: avoid data race when deciding checkpoin in f2fs_sync_file
f2fs: support an ioctl to move a range of data blocks
f2fs: fix to report error number of f2fs_find_entry
f2fs: avoid memory allocation failure due to a long length
f2fs: reset default idle interval value
f2fs: use blk_plug in all the possible paths
f2fs: fix to avoid data update racing between GC and DIO
f2fs: add maximum prefree segments
f2fs: disable extent_cache for fcollapse/finsert inodes
f2fs: refactor __exchange_data_block for speed up
f2fs: fix ERR_PTR returned by bio
f2fs: avoid mark_inode_dirty
f2fs: move i_size_write in f2fs_write_end
f2fs: fix to avoid redundant discard during fstrim
f2fs: avoid mismatching block range for discard
f2fs: fix incorrect f_bfree calculation in ->statfs
f2fs: use percpu_rw_semaphore
...
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 7 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 9 | ||||
-rw-r--r-- | fs/f2fs/acl.h | 2 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 80 | ||||
-rw-r--r-- | fs/f2fs/data.c | 311 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 5 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 123 | ||||
-rw-r--r-- | fs/f2fs/extent_cache.c | 50 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 286 | ||||
-rw-r--r-- | fs/f2fs/file.c | 499 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 52 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 92 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 53 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 147 | ||||
-rw-r--r-- | fs/f2fs/node.c | 144 | ||||
-rw-r--r-- | fs/f2fs/node.h | 14 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 23 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 59 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 22 | ||||
-rw-r--r-- | fs/f2fs/shrinker.c | 5 | ||||
-rw-r--r-- | fs/f2fs/super.c | 134 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 20 |
22 files changed, 1387 insertions, 750 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index e1c9f0849da6..ecd808088362 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -109,7 +109,9 @@ background_gc=%s Turn on/off cleaning operations, namely garbage disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) -discard Issue discard/TRIM commands when a segment is cleaned. +discard/nodiscard Enable/disable real-time discard in f2fs, if discard is + enabled, f2fs will issue discard/TRIM commands when a + segment is cleaned. no_heap Disable heap-style segment allocation which finds free segments for data from the beginning of main area, while for node from the end of main area. @@ -151,6 +153,9 @@ noinline_data Disable the inline data feature, inline data feature is enabled by default. data_flush Enable data flushing before checkpoint in order to persist data of regular and symlink. +mode=%s Control block allocation mode which supports "adaptive" + and "lfs". In "lfs" mode, there should be no random + writes towards main area. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index a31c7e859af6..4dcc9e28dc5c 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -201,7 +201,6 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) static int __f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { - struct f2fs_inode_info *fi = F2FS_I(inode); int name_index; void *value = NULL; size_t size = 0; @@ -214,7 +213,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, error = posix_acl_equiv_mode(acl, &inode->i_mode); if (error < 0) return error; - set_acl_inode(fi, inode->i_mode); + set_acl_inode(inode, inode->i_mode); if (error == 0) acl = NULL; } @@ -233,7 +232,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -244,7 +243,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); return error; } @@ -385,6 +384,8 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, if (error) return error; + f2fs_mark_inode_dirty_sync(inode); + if (default_acl) { error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index 997ca8edb6cb..b2334d11dae8 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -37,7 +37,7 @@ struct f2fs_acl_header { #ifdef CONFIG_F2FS_FS_POSIX_ACL extern struct posix_acl *f2fs_get_acl(struct inode *, int); -extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +extern int f2fs_set_acl(struct inode *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, struct page *); #else diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 124b4a3017b5..f94d01e7d001 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -48,7 +48,8 @@ repeat: goto repeat; } f2fs_wait_on_page_writeback(page, META, true); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); return page; } @@ -266,6 +267,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct blk_plug plug; long diff, written; /* collect a number of dirty meta pages and write together */ @@ -278,7 +280,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, /* if mounting is failed, skip writing node pages */ mutex_lock(&sbi->cp_mutex); diff = nr_pages_to_write(sbi, META, wbc); + blk_start_plug(&plug); written = sync_meta_pages(sbi, META, wbc->nr_to_write); + blk_finish_plug(&plug); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -366,9 +370,10 @@ static int f2fs_set_meta_page_dirty(struct page *page) { trace_f2fs_set_page_dirty(page, META); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); f2fs_trace_pid(page); @@ -510,10 +515,11 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) spin_unlock(&im->ino_lock); } -void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) +void add_orphan_inode(struct inode *inode) { /* add new orphan ino entry into list */ - __add_ino_entry(sbi, ino, ORPHAN_INO); + __add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO); + update_inode_page(inode); } void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -761,28 +767,25 @@ fail_no_cp: static void __add_dirty_inode(struct inode *inode, enum inode_type type) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (is_inode_flag_set(fi, flag)) + if (is_inode_flag_set(inode, flag)) return; - set_inode_flag(fi, flag); - list_add_tail(&fi->dirty_list, &sbi->inode_list[type]); + set_inode_flag(inode, flag); + list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); stat_inc_dirty_inode(sbi, type); } static void __remove_dirty_inode(struct inode *inode, enum inode_type type) { - struct f2fs_inode_info *fi = F2FS_I(inode); int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE; - if (get_dirty_pages(inode) || - !is_inode_flag_set(F2FS_I(inode), flag)) + if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag)) return; - list_del_init(&fi->dirty_list); - clear_inode_flag(fi, flag); + list_del_init(&F2FS_I(inode)->dirty_list); + clear_inode_flag(inode, flag); stat_dec_dirty_inode(F2FS_I_SB(inode), type); } @@ -795,13 +798,12 @@ void update_dirty_page(struct inode *inode, struct page *page) !S_ISLNK(inode->i_mode)) return; - if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) { - spin_lock(&sbi->inode_lock[type]); + spin_lock(&sbi->inode_lock[type]); + if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH)) __add_dirty_inode(inode, type); - spin_unlock(&sbi->inode_lock[type]); - } - inode_inc_dirty_pages(inode); + spin_unlock(&sbi->inode_lock[type]); + SetPagePrivate(page); f2fs_trace_pid(page); } @@ -864,6 +866,34 @@ retry: goto retry; } +int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[DIRTY_META]; + struct inode *inode; + struct f2fs_inode_info *fi; + s64 total = get_pages(sbi, F2FS_DIRTY_IMETA); + + while (total--) { + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return 0; + } + fi = list_entry(head->next, struct f2fs_inode_info, + gdirty_list); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + if (inode) { + update_inode_page(inode); + iput(inode); + } + }; + return 0; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -890,6 +920,14 @@ retry_flush_dents: goto retry_flush_dents; } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + f2fs_unlock_all(sbi); + err = f2fs_sync_inode_meta(sbi); + if (err) + goto out; + goto retry_flush_dents; + } + /* * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. @@ -914,6 +952,8 @@ out: static void unblock_operations(struct f2fs_sb_info *sbi) { up_write(&sbi->node_write); + + build_free_nids(sbi); f2fs_unlock_all(sbi); } @@ -954,7 +994,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) * This avoids to conduct wrong roll-forward operations and uses * metapages, so should be called prior to sync_meta_pages below. */ - if (discard_next_dnode(sbi, discard_blk)) + if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk)) invalidate = true; /* Flush all the NAT/SIT pages */ diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ded224518978..e2624275d828 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,6 +19,8 @@ #include <linux/bio.h> #include <linux/prefetch.h> #include <linux/uio.h> +#include <linux/mm.h> +#include <linux/memcontrol.h> #include <linux/cleancache.h> #include "f2fs.h" @@ -45,7 +47,8 @@ static void f2fs_read_end_io(struct bio *bio) struct page *page = bvec->bv_page; if (!bio->bi_error) { - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { ClearPageUptodate(page); SetPageError(page); @@ -97,10 +100,15 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, return bio; } -static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio) +static inline void __submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) { - if (!is_read_io(bio_op(bio))) + if (!is_read_io(bio_op(bio))) { atomic_inc(&sbi->nr_wb_bios); + if (f2fs_sb_mounted_hmsmr(sbi->sb) && + current->plug && (type == DATA || type == NODE)) + blk_finish_plug(current->plug); + } submit_bio(bio); } @@ -118,7 +126,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) bio_set_op_attrs(io->bio, fio->op, fio->op_flags); - __submit_bio(io->sbi, io->bio); + __submit_bio(io->sbi, io->bio, fio->type); io->bio = NULL; } @@ -240,7 +248,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio->bi_rw = fio->op_flags; bio_set_op_attrs(bio, fio->op, fio->op_flags); - __submit_bio(fio->sbi, bio); + __submit_bio(fio->sbi, bio, fio->type); return 0; } @@ -326,7 +334,7 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (!count) return 0; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count))) return -ENOSPC; @@ -348,9 +356,6 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (set_page_dirty(dn->node_page)) dn->node_changed = true; - - mark_inode_dirty(dn->inode); - sync_inode_page(dn); return 0; } @@ -446,7 +451,8 @@ got_it: */ if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); return page; } @@ -505,14 +511,14 @@ repeat: /* wait for read completion */ lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } return page; } @@ -557,7 +563,8 @@ struct page *get_new_data_page(struct inode *inode, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { f2fs_put_page(page, 1); @@ -569,11 +576,8 @@ struct page *get_new_data_page(struct inode *inode, } got_it: if (new_i_size && i_size_read(inode) < - ((loff_t)(index + 1) << PAGE_SHIFT)) { - i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); - /* Only the directory inode sets new_i_size */ - set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); - } + ((loff_t)(index + 1) << PAGE_SHIFT)) + f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); return page; } @@ -586,7 +590,7 @@ static int __allocate_data_block(struct dnode_of_data *dn) pgoff_t fofs; blkcnt_t count = 1; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node); @@ -611,7 +615,7 @@ alloc: fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT)) - i_size_write(dn->inode, + f2fs_i_size_write(dn->inode, ((loff_t)(fofs + 1) << PAGE_SHIFT)); return 0; } @@ -660,7 +664,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, unsigned int maxblocks = map->m_len; struct dnode_of_data dn; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA; + int mode = create ? ALLOC_NODE : LOOKUP_NODE; pgoff_t pgofs, end_offset, end; int err = 0, ofs = 1; unsigned int ofs_in_node, last_ofs_in_node; @@ -723,8 +727,7 @@ next_block: } else { err = __allocate_data_block(&dn); if (!err) { - set_inode_flag(F2FS_I(inode), - FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); allocated = true; } } @@ -795,8 +798,6 @@ skip: else if (dn.ofs_in_node < end_offset) goto next_block; - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); if (create) { @@ -807,8 +808,6 @@ skip: goto next_dnode; sync_out: - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); unlock_out: if (create) { @@ -968,6 +967,37 @@ out: return ret; } +struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, + unsigned nr_pages) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct fscrypt_ctx *ctx = NULL; + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio; + + if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + ctx = fscrypt_get_ctx(inode, GFP_NOFS); + if (IS_ERR(ctx)) + return ERR_CAST(ctx); + + /* wait the page to be moved by cleaning */ + f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); + } + + bio = bio_alloc(GFP_KERNEL, min_t(int, nr_pages, BIO_MAX_PAGES)); + if (!bio) { + if (ctx) + fscrypt_release_ctx(ctx); + return ERR_PTR(-ENOMEM); + } + bio->bi_bdev = bdev; + bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr); + bio->bi_end_io = f2fs_read_end_io; + bio->bi_private = ctx; + + return bio; +} + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -986,7 +1016,6 @@ static int f2fs_mpage_readpages(struct address_space *mapping, sector_t last_block; sector_t last_block_in_file; sector_t block_nr; - struct block_device *bdev = inode->i_sb->s_bdev; struct f2fs_map_blocks map; map.m_pblk = 0; @@ -1047,7 +1076,8 @@ got_it: } } else { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); goto next_page; } @@ -1058,35 +1088,15 @@ got_it: */ if (bio && (last_block_in_bio != block_nr - 1)) { submit_and_realloc: - __submit_bio(F2FS_I_SB(inode), bio); + __submit_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } if (bio == NULL) { - struct fscrypt_ctx *ctx = NULL; - - if (f2fs_encrypted_inode(inode) && - S_ISREG(inode->i_mode)) { - - ctx = fscrypt_get_ctx(inode, GFP_NOFS); - if (IS_ERR(ctx)) - goto set_error_page; - - /* wait the page to be moved by cleaning */ - f2fs_wait_on_encrypted_page_writeback( - F2FS_I_SB(inode), block_nr); - } - - bio = bio_alloc(GFP_KERNEL, - min_t(int, nr_pages, BIO_MAX_PAGES)); - if (!bio) { - if (ctx) - fscrypt_release_ctx(ctx); + bio = f2fs_grab_bio(inode, block_nr, nr_pages); + if (IS_ERR(bio)) { + bio = NULL; goto set_error_page; } - bio->bi_bdev = bdev; - bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr); - bio->bi_end_io = f2fs_read_end_io; - bio->bi_private = ctx; bio_set_op_attrs(bio, REQ_OP_READ, 0); } @@ -1102,7 +1112,7 @@ set_error_page: goto next_page; confused: if (bio) { - __submit_bio(F2FS_I_SB(inode), bio); + __submit_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; } unlock_page(page); @@ -1112,7 +1122,7 @@ next_page: } BUG_ON(pages && !list_empty(pages)); if (bio) - __submit_bio(F2FS_I_SB(inode), bio); + __submit_bio(F2FS_I_SB(inode), bio, DATA); return 0; } @@ -1201,14 +1211,14 @@ retry_encrypt: !IS_ATOMIC_WRITTEN_PAGE(page) && need_inplace_update(inode))) { rewrite_data_page(fio); - set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + set_inode_flag(inode, FI_UPDATE_WRITE); trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) - set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); } out_writepage: f2fs_put_dnode(&dn); @@ -1223,6 +1233,7 @@ static int f2fs_write_data_page(struct page *page, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_SHIFT; + loff_t psize = (page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; @@ -1260,20 +1271,18 @@ write: available_free_memory(sbi, BASE_CHECK)))) goto redirty_out; + /* we should bypass data pages to proceed the kworkder jobs */ + if (unlikely(f2fs_cp_error(sbi))) { + mapping_set_error(page->mapping, -EIO); + goto out; + } + /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { - if (unlikely(f2fs_cp_error(sbi))) - goto redirty_out; err = do_write_data_page(&fio); goto done; } - /* we should bypass data pages to proceed the kworkder jobs */ - if (unlikely(f2fs_cp_error(sbi))) { - SetPageError(page); - goto out; - } - if (!wbc->for_reclaim) need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0)) @@ -1285,6 +1294,8 @@ write: err = f2fs_write_inline_data(inode, page); if (err == -EAGAIN) err = do_write_data_page(&fio); + if (F2FS_I(inode)->last_disk_size < psize) + F2FS_I(inode)->last_disk_size = psize; f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) @@ -1311,16 +1322,8 @@ out: redirty_out: redirty_page_for_writepage(wbc, page); - return AOP_WRITEPAGE_ACTIVATE; -} - -static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, - void *data) -{ - struct address_space *mapping = data; - int ret = mapping->a_ops->writepage(page, wbc); - mapping_set_error(mapping, ret); - return ret; + unlock_page(page); + return err; } /* @@ -1329,8 +1332,7 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, * warm/hot data page. */ static int f2fs_write_cache_pages(struct address_space *mapping, - struct writeback_control *wbc, writepage_t writepage, - void *data) + struct writeback_control *wbc) { int ret = 0; int done = 0; @@ -1343,10 +1345,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int cycled; int range_whole = 0; int tag; - int step = 0; pagevec_init(&pvec, 0); -next: + if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; @@ -1401,9 +1402,6 @@ continue_unlock: goto continue_unlock; } - if (step == is_cold_data(page)) - goto continue_unlock; - if (PageWriteback(page)) { if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, @@ -1416,16 +1414,11 @@ continue_unlock: if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = (*writepage)(page, wbc, data); + ret = mapping->a_ops->writepage(page, wbc); if (unlikely(ret)) { - if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); - ret = 0; - } else { - done_index = page->index + 1; - done = 1; - break; - } + done_index = page->index + 1; + done = 1; + break; } if (--wbc->nr_to_write <= 0 && @@ -1438,11 +1431,6 @@ continue_unlock: cond_resched(); } - if (step < 1) { - step++; - goto next; - } - if (!cycled && !done) { cycled = 1; index = 0; @@ -1460,9 +1448,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - bool locked = false; + struct blk_plug plug; int ret; - long diff; /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) @@ -1478,7 +1465,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, goto skip_write; /* skip writing during file defragment */ - if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG)) + if (is_inode_flag_set(inode, FI_DO_DEFRAG)) goto skip_write; /* during POR, we don't need to trigger writepage at all. */ @@ -1487,20 +1474,16 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); - diff = nr_pages_to_write(sbi, DATA, wbc); - - if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) { - mutex_lock(&sbi->writepages); - locked = true; - } - ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); - f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE); - if (locked) - mutex_unlock(&sbi->writepages); + blk_start_plug(&plug); + ret = f2fs_write_cache_pages(mapping, wbc); + blk_finish_plug(&plug); + /* + * if some pages were truncated, we cannot guarantee its mapping->host + * to detect pending bios. + */ + f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); - - wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return ret; skip_write: @@ -1558,7 +1541,7 @@ restart: if (f2fs_has_inline_data(inode)) { if (pos + len <= MAX_INLINE_DATA) { read_inline_data(page, ipage); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_inode_flag(inode, FI_DATA_EXIST); if (inode->i_nlink) set_inline_node(ipage); } else { @@ -1668,39 +1651,35 @@ repeat: if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); } else { - struct f2fs_io_info fio = { - .sbi = sbi, - .type = DATA, - .op = REQ_OP_READ, - .op_flags = READ_SYNC, - .old_blkaddr = blkaddr, - .new_blkaddr = blkaddr, - .page = page, - .encrypted_page = NULL, - }; - err = f2fs_submit_page_bio(&fio); - if (err) - goto fail; + struct bio *bio; - lock_page(page); - if (unlikely(!PageUptodate(page))) { - err = -EIO; + bio = f2fs_grab_bio(inode, blkaddr, 1); + if (IS_ERR(bio)) { + err = PTR_ERR(bio); goto fail; } + bio_set_op_attrs(bio, REQ_OP_READ, READ_SYNC); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { + bio_put(bio); + err = -EFAULT; + goto fail; + } + + __submit_bio(sbi, bio, DATA); + + lock_page(page); if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } - - /* avoid symlink page */ - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - err = fscrypt_decrypt_page(page); - if (err) - goto fail; + if (unlikely(!PageUptodate(page))) { + err = -EIO; + goto fail; } } out_update: - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); out_clear: clear_cold_data(page); return 0; @@ -1721,13 +1700,11 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); set_page_dirty(page); + f2fs_put_page(page, 1); - if (pos + copied > i_size_read(inode)) { - i_size_write(inode, pos + copied); - mark_inode_dirty(inode); - } + if (pos + copied > i_size_read(inode)) + f2fs_i_size_write(inode, pos + copied); - f2fs_put_page(page, 1); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } @@ -1752,6 +1729,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); loff_t offset = iocb->ki_pos; + int rw = iov_iter_rw(iter); int err; err = check_direct_IO(inode, iter, offset); @@ -1760,18 +1738,23 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) return 0; + if (test_opt(F2FS_I_SB(inode), LFS)) + return 0; - trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter)); + trace_f2fs_direct_IO_enter(inode, offset, count, rw); + down_read(&F2FS_I(inode)->dio_rwsem[rw]); err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio); - if (iov_iter_rw(iter) == WRITE) { + up_read(&F2FS_I(inode)->dio_rwsem[rw]); + + if (rw == WRITE) { if (err > 0) - set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE); + set_inode_flag(inode, FI_UPDATE_WRITE); else if (err < 0) f2fs_write_failed(mapping, offset + count); } - trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err); + trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); return err; } @@ -1818,6 +1801,35 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 1; } +/* + * This was copied from __set_page_dirty_buffers which gives higher performance + * in very high speed storages. (e.g., pmem) + */ +void f2fs_set_page_dirty_nobuffers(struct page *page) +{ + struct address_space *mapping = page->mapping; + unsigned long flags; + + if (unlikely(!mapping)) + return; + + spin_lock(&mapping->private_lock); + lock_page_memcg(page); + SetPageDirty(page); + spin_unlock(&mapping->private_lock); + + spin_lock_irqsave(&mapping->tree_lock, flags); + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + unlock_page_memcg(page); + + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return; +} + static int f2fs_set_data_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; @@ -1825,7 +1837,8 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (f2fs_is_atomic_file(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { @@ -1840,7 +1853,7 @@ static int f2fs_set_data_page_dirty(struct page *page) } if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); update_dirty_page(inode, page); return 1; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index d89a425055d0..badd407bb622 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -47,6 +47,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->wb_bios = atomic_read(&sbi->nr_wb_bios); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; @@ -304,8 +305,8 @@ static int stat_show(struct seq_file *s, void *v) si->inmem_pages, si->wb_bios); seq_printf(s, " - nodes: %4lld in %4d\n", si->ndirty_node, si->node_pages); - seq_printf(s, " - dents: %4lld in dirs:%4d\n", - si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n", + si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); seq_printf(s, " - datas: %4lld in files:%4d\n", si->ndirty_data, si->ndirty_files); seq_printf(s, " - meta: %4lld in %4d\n", diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f9313f684540..a485f68a76b1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -185,8 +185,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, /* no need to allocate new dentry pages to all the indices */ dentry_page = find_data_page(dir, bidx); if (IS_ERR(dentry_page)) { - room = true; - continue; + if (PTR_ERR(dentry_page) == -ENOENT) { + room = true; + continue; + } else { + *res_page = dentry_page; + break; + } } de = find_in_block(dentry_page, fname, namehash, &max_slots, @@ -223,19 +228,22 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct fscrypt_name fname; int err; - *res_page = NULL; - err = fscrypt_setup_filename(dir, child, 1, &fname); - if (err) + if (err) { + *res_page = ERR_PTR(err); return NULL; + } if (f2fs_has_inline_dentry(dir)) { + *res_page = NULL; de = find_in_inline_dir(dir, &fname, res_page); goto out; } - if (npages == 0) + if (npages == 0) { + *res_page = NULL; goto out; + } max_depth = F2FS_I(dir)->i_current_depth; if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) { @@ -243,13 +251,13 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, "Corrupted max_depth of %lu: %u", dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; - F2FS_I(dir)->i_current_depth = max_depth; - mark_inode_dirty(dir); + f2fs_i_depth_write(dir, max_depth); } for (level = 0; level < max_depth; level++) { + *res_page = NULL; de = find_in_level(dir, level, &fname, res_page); - if (de) + if (de || IS_ERR(*res_page)) break; } out: @@ -259,35 +267,22 @@ out: struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) { - struct page *page; - struct f2fs_dir_entry *de; - struct f2fs_dentry_block *dentry_blk; - - if (f2fs_has_inline_dentry(dir)) - return f2fs_parent_inline_dir(dir, p); - - page = get_lock_data_page(dir, 0, false); - if (IS_ERR(page)) - return NULL; + struct qstr dotdot = QSTR_INIT("..", 2); - dentry_blk = kmap(page); - de = &dentry_blk->dentry[1]; - *p = page; - unlock_page(page); - return de; + return f2fs_find_entry(dir, &dotdot, p); } -ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) +ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr, + struct page **page) { ino_t res = 0; struct f2fs_dir_entry *de; - struct page *page; - de = f2fs_find_entry(dir, qstr, &page); + de = f2fs_find_entry(dir, qstr, page); if (de) { res = le32_to_cpu(de->ino); - f2fs_dentry_kunmap(dir, page); - f2fs_put_page(page, 0); + f2fs_dentry_kunmap(dir, *page); + f2fs_put_page(*page, 0); } return res; @@ -303,9 +298,9 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, set_de_type(de, inode->i_mode); f2fs_dentry_kunmap(dir, page); set_page_dirty(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty(dir); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + f2fs_mark_inode_dirty_sync(dir); f2fs_put_page(page, 1); } @@ -385,7 +380,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, struct page *page; int err; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { + if (is_inode_flag_set(inode, FI_NEW_INODE)) { page = new_inode_page(inode); if (IS_ERR(page)) return page; @@ -429,7 +424,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, * This file should be checkpointed during fsync. * We lost i_pino from now on. */ - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + if (is_inode_flag_set(inode, FI_INC_LINK)) { file_lost_pino(inode); /* * If link the tmpfile to alias through linkat path, @@ -437,14 +432,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, */ if (inode->i_nlink == 0) remove_orphan_inode(F2FS_I_SB(dir), inode->i_ino); - inc_nlink(inode); + f2fs_i_links_write(inode, true); } return page; put_error: - /* truncate empty dir pages */ - truncate_inode_pages(&inode->i_data, 0); - clear_nlink(inode); update_inode(inode, page); f2fs_put_page(page, 1); @@ -454,23 +446,19 @@ put_error: void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - if (inode && is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - if (S_ISDIR(inode->i_mode)) { - inc_nlink(dir); - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - } - clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { + if (S_ISDIR(inode->i_mode)) + f2fs_i_links_write(dir, true); + clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty(dir); + f2fs_mark_inode_dirty_sync(dir); - if (F2FS_I(dir)->i_current_depth != current_depth) { - F2FS_I(dir)->i_current_depth = current_depth; - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - } + if (F2FS_I(dir)->i_current_depth != current_depth) + f2fs_i_depth_write(dir, current_depth); - if (inode && is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + if (inode && is_inode_flag_set(inode, FI_INC_LINK)) + clear_inode_flag(inode, FI_INC_LINK); } int room_for_filename(const void *bitmap, int slots, int max_slots) @@ -596,9 +584,7 @@ add_dentry: set_page_dirty(dentry_page); if (inode) { - /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); + f2fs_i_pino_write(inode, dir->i_ino); f2fs_put_page(page, 1); } @@ -607,10 +593,6 @@ fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { - update_inode_page(dir); - clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); @@ -657,42 +639,34 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) err = PTR_ERR(page); goto fail; } - /* we don't need to mark_inode_dirty now */ - update_inode(inode, page); f2fs_put_page(page, 1); - clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); + clear_inode_flag(inode, FI_NEW_INODE); fail: up_write(&F2FS_I(inode)->i_sem); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return err; } -void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +void f2fs_drop_nlink(struct inode *dir, struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); down_write(&F2FS_I(inode)->i_sem); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - if (page) - update_inode(dir, page); - else - update_inode_page(dir); - } + if (S_ISDIR(inode->i_mode)) + f2fs_i_links_write(dir, false); inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); + f2fs_i_links_write(inode, false); if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); + f2fs_i_links_write(inode, false); + f2fs_i_size_write(inode, 0); } up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); + add_orphan_inode(inode); else release_orphan_inode(sbi); } @@ -730,9 +704,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; + f2fs_mark_inode_dirty_sync(dir); if (inode) - f2fs_drop_nlink(dir, inode, NULL); + f2fs_drop_nlink(dir, inode); if (bit_pos == NR_DENTRY_IN_BLOCK && !truncate_hole(dir, page->index, page->index + 1)) { diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 5bfcdb9b69f2..2b06d4fcd954 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -170,8 +170,10 @@ static void __drop_largest_extent(struct inode *inode, { struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) + if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { largest->len = 0; + f2fs_mark_inode_dirty_sync(inode); + } } /* return true, if inode page is changed */ @@ -335,11 +337,12 @@ lookup_neighbors: return en; } -static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, +static struct extent_node *__try_merge_extent_node(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -360,7 +363,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, if (!en) return NULL; - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -371,11 +374,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, return en; } -static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, +static struct extent_node *__insert_extent_tree(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct rb_node **p = &et->root.rb_node; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -402,7 +406,7 @@ do_insert: if (!en) return NULL; - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -431,7 +435,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, write_lock(&et->lock); - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) { + if (is_inode_flag_set(inode, FI_NO_EXTENT)) { write_unlock(&et->lock); return false; } @@ -473,7 +477,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(sbi, et, &ei, + en1 = __insert_extent_tree(inode, et, &ei, NULL, NULL); next_en = en1; } else { @@ -494,7 +498,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); else __release_extent_node(sbi, et, en); @@ -514,20 +518,20 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) - __insert_extent_tree(sbi, et, &ei, + if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) + __insert_extent_tree(inode, et, &ei, insert_p, insert_parent); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - et->largest.len = 0; - set_inode_flag(F2FS_I(inode), FI_NO_EXTENT); + __drop_largest_extent(inode, 0, UINT_MAX); + set_inode_flag(inode, FI_NO_EXTENT); } } - if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + if (is_inode_flag_set(inode, FI_NO_EXTENT)) __free_extent_tree(sbi, et); write_unlock(&et->lock); @@ -627,6 +631,19 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode) return node_cnt; } +void f2fs_drop_extent_tree(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct extent_tree *et = F2FS_I(inode)->extent_tree; + + set_inode_flag(inode, FI_NO_EXTENT); + + write_lock(&et->lock); + __free_extent_tree(sbi, et); + __drop_largest_extent(inode, 0, UINT_MAX); + write_unlock(&et->lock); +} + void f2fs_destroy_extent_tree(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -685,9 +702,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; - - if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1)) - sync_inode_page(dn); + f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); } void f2fs_update_extent_cache_range(struct dnode_of_data *dn, @@ -697,8 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn, if (!f2fs_may_extent_tree(dn->inode)) return; - if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len)) - sync_inode_page(dn); + f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); } void init_extent_cache_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 23ae6a81ccd6..7890e9071499 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -45,6 +45,7 @@ enum { FAULT_ORPHAN, FAULT_BLOCK, FAULT_DIR_DEPTH, + FAULT_EVICT_INODE, FAULT_MAX, }; @@ -74,6 +75,8 @@ static inline bool time_to_inject(int type) return false; else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type)) return false; + else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type)) + return false; atomic_inc(&f2fs_fault.inject_ops); if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) { @@ -108,6 +111,8 @@ static inline bool time_to_inject(int type) #define F2FS_MOUNT_FORCE_FG_GC 0x00004000 #define F2FS_MOUNT_DATA_FLUSH 0x00008000 #define F2FS_MOUNT_FAULT_INJECTION 0x00010000 +#define F2FS_MOUNT_ADAPTIVE 0x00020000 +#define F2FS_MOUNT_LFS 0x00040000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -128,6 +133,7 @@ struct f2fs_mount_info { }; #define F2FS_FEATURE_ENCRYPT 0x0001 +#define F2FS_FEATURE_HMSMR 0x0002 #define F2FS_HAS_FEATURE(sb, mask) \ ((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0) @@ -158,7 +164,7 @@ enum { #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) #define DEF_CP_INTERVAL 60 /* 60 secs */ -#define DEF_IDLE_INTERVAL 120 /* 2 mins */ +#define DEF_IDLE_INTERVAL 5 /* 5 secs */ struct cp_control { int reason; @@ -262,6 +268,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) #define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) +#define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ + struct f2fs_move_range) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -291,6 +299,13 @@ struct f2fs_defragment { u64 len; }; +struct f2fs_move_range { + u32 dst_fd; /* destination fd */ + u64 pos_in; /* start position in src_fd */ + u64 pos_out; /* start position in dst_fd */ + u64 len; /* size to move */ +}; + /* * For INODE and NODE manager */ @@ -441,11 +456,14 @@ struct f2fs_inode_info { unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ + loff_t last_disk_size; /* lastly written file size */ - struct list_head dirty_list; /* linked in global dirty list */ + struct list_head dirty_list; /* dirty list for dirs and files */ + struct list_head gdirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ + struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ }; static inline void get_extent_info(struct extent_info *ext, @@ -498,11 +516,14 @@ static inline bool __is_front_mergeable(struct extent_info *cur, return __is_extent_mergeable(cur, front); } -static inline void __try_update_largest_extent(struct extent_tree *et, - struct extent_node *en) +extern void f2fs_mark_inode_dirty_sync(struct inode *); +static inline void __try_update_largest_extent(struct inode *inode, + struct extent_tree *et, struct extent_node *en) { - if (en->ei.len > et->largest.len) + if (en->ei.len > et->largest.len) { et->largest = en->ei; + f2fs_mark_inode_dirty_sync(inode); + } } struct f2fs_nm_info { @@ -517,7 +538,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -599,6 +620,7 @@ struct flush_cmd { struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ + atomic_t submit_flush; /* # of issued flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -655,6 +677,7 @@ enum count_type { F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, + F2FS_DIRTY_IMETA, NR_COUNT_TYPE, }; @@ -706,6 +729,7 @@ struct f2fs_bio_info { enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ + DIRTY_META, /* for all dirtied inode metadata */ NR_INODE_TYPE, }; @@ -757,14 +781,14 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ + struct mutex wio_mutex[NODE + 1]; /* bio ordering for NODE/DATA */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ + struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ - struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ @@ -1050,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + percpu_down_read(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + percpu_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + percpu_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + percpu_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) @@ -1120,34 +1144,37 @@ static inline bool f2fs_has_xattr_block(unsigned int ofs) return ofs == XATTR_NODE_OFFSET; } +static inline void f2fs_i_blocks_write(struct inode *, blkcnt_t, bool); static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count) { - block_t valid_block_count; + blkcnt_t diff; - spin_lock(&sbi->stat_lock); #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_BLOCK)) { - spin_unlock(&sbi->stat_lock); + if (time_to_inject(FAULT_BLOCK)) return false; - } #endif - valid_block_count = - sbi->total_valid_block_count + (block_t)(*count); - if (unlikely(valid_block_count > sbi->user_block_count)) { - *count = sbi->user_block_count - sbi->total_valid_block_count; + /* + * let's increase this in prior to actual block count change in order + * for f2fs_sync_file to avoid data races when deciding checkpoint. + */ + percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); + + spin_lock(&sbi->stat_lock); + sbi->total_valid_block_count += (block_t)(*count); + if (unlikely(sbi->total_valid_block_count > sbi->user_block_count)) { + diff = sbi->total_valid_block_count - sbi->user_block_count; + *count -= diff; + sbi->total_valid_block_count = sbi->user_block_count; if (!*count) { spin_unlock(&sbi->stat_lock); + percpu_counter_sub(&sbi->alloc_valid_block_count, diff); return false; } } - /* *count can be recalculated */ - inode->i_blocks += *count; - sbi->total_valid_block_count = - sbi->total_valid_block_count + (block_t)(*count); spin_unlock(&sbi->stat_lock); - percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); + f2fs_i_blocks_write(inode, *count, true); return true; } @@ -1158,9 +1185,9 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, spin_lock(&sbi->stat_lock); f2fs_bug_on(sbi, sbi->total_valid_block_count < (block_t) count); f2fs_bug_on(sbi, inode->i_blocks < count); - inode->i_blocks -= count; sbi->total_valid_block_count -= (block_t)count; spin_unlock(&sbi->stat_lock); + f2fs_i_blocks_write(inode, count, false); } static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) @@ -1295,7 +1322,7 @@ static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, } if (inode) - inode->i_blocks++; + f2fs_i_blocks_write(inode, 1, true); sbi->total_valid_node_count++; sbi->total_valid_block_count++; @@ -1314,7 +1341,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, !sbi->total_valid_node_count); f2fs_bug_on(sbi, !inode->i_blocks); - inode->i_blocks--; + f2fs_i_blocks_write(inode, 1, false); sbi->total_valid_node_count--; sbi->total_valid_block_count--; @@ -1511,12 +1538,12 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) enum { FI_NEW_INODE, /* indicate newly allocated inode */ FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ FI_DIRTY_DIR, /* indicate directory has dirty pages */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ FI_FREE_NID, /* free allocated nide */ - FI_UPDATE_DIR, /* should update inode block for consistency */ FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ @@ -1534,64 +1561,143 @@ enum { FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ }; -static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +static inline void __mark_inode_dirty_flag(struct inode *inode, + int flag, bool set) +{ + switch (flag) { + case FI_INLINE_XATTR: + case FI_INLINE_DATA: + case FI_INLINE_DENTRY: + if (set) + return; + case FI_DATA_EXIST: + case FI_INLINE_DOTS: + f2fs_mark_inode_dirty_sync(inode); + } +} + +static inline void set_inode_flag(struct inode *inode, int flag) +{ + if (!test_bit(flag, &F2FS_I(inode)->flags)) + set_bit(flag, &F2FS_I(inode)->flags); + __mark_inode_dirty_flag(inode, flag, true); +} + +static inline int is_inode_flag_set(struct inode *inode, int flag) +{ + return test_bit(flag, &F2FS_I(inode)->flags); +} + +static inline void clear_inode_flag(struct inode *inode, int flag) +{ + if (test_bit(flag, &F2FS_I(inode)->flags)) + clear_bit(flag, &F2FS_I(inode)->flags); + __mark_inode_dirty_flag(inode, flag, false); +} + +static inline void set_acl_inode(struct inode *inode, umode_t mode) { - if (!test_bit(flag, &fi->flags)) - set_bit(flag, &fi->flags); + F2FS_I(inode)->i_acl_mode = mode; + set_inode_flag(inode, FI_ACL_MODE); + f2fs_mark_inode_dirty_sync(inode); } -static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +static inline void f2fs_i_links_write(struct inode *inode, bool inc) { - return test_bit(flag, &fi->flags); + if (inc) + inc_nlink(inode); + else + drop_nlink(inode); + f2fs_mark_inode_dirty_sync(inode); +} + +static inline void f2fs_i_blocks_write(struct inode *inode, + blkcnt_t diff, bool add) +{ + bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); + bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); + + inode->i_blocks = add ? inode->i_blocks + diff : + inode->i_blocks - diff; + f2fs_mark_inode_dirty_sync(inode); + if (clean || recover) + set_inode_flag(inode, FI_AUTO_RECOVER); +} + +static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) +{ + bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); + bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); + + if (i_size_read(inode) == i_size) + return; + + i_size_write(inode, i_size); + f2fs_mark_inode_dirty_sync(inode); + if (clean || recover) + set_inode_flag(inode, FI_AUTO_RECOVER); } -static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +static inline bool f2fs_skip_inode_update(struct inode *inode) { - if (test_bit(flag, &fi->flags)) - clear_bit(flag, &fi->flags); + if (!is_inode_flag_set(inode, FI_AUTO_RECOVER)) + return false; + return F2FS_I(inode)->last_disk_size == i_size_read(inode); } -static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) { - fi->i_acl_mode = mode; - set_inode_flag(fi, FI_ACL_MODE); + F2FS_I(inode)->i_current_depth = depth; + f2fs_mark_inode_dirty_sync(inode); } -static inline void get_inline_info(struct f2fs_inode_info *fi, - struct f2fs_inode *ri) +static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) { + F2FS_I(inode)->i_xattr_nid = xnid; + f2fs_mark_inode_dirty_sync(inode); +} + +static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino) +{ + F2FS_I(inode)->i_pino = pino; + f2fs_mark_inode_dirty_sync(inode); +} + +static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + if (ri->i_inline & F2FS_INLINE_XATTR) - set_inode_flag(fi, FI_INLINE_XATTR); + set_bit(FI_INLINE_XATTR, &fi->flags); if (ri->i_inline & F2FS_INLINE_DATA) - set_inode_flag(fi, FI_INLINE_DATA); + set_bit(FI_INLINE_DATA, &fi->flags); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_inode_flag(fi, FI_INLINE_DENTRY); + set_bit(FI_INLINE_DENTRY, &fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) - set_inode_flag(fi, FI_DATA_EXIST); + set_bit(FI_DATA_EXIST, &fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) - set_inode_flag(fi, FI_INLINE_DOTS); + set_bit(FI_INLINE_DOTS, &fi->flags); } -static inline void set_raw_inline(struct f2fs_inode_info *fi, - struct f2fs_inode *ri) +static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) { ri->i_inline = 0; - if (is_inode_flag_set(fi, FI_INLINE_XATTR)) + if (is_inode_flag_set(inode, FI_INLINE_XATTR)) ri->i_inline |= F2FS_INLINE_XATTR; - if (is_inode_flag_set(fi, FI_INLINE_DATA)) + if (is_inode_flag_set(inode, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; - if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + if (is_inode_flag_set(inode, FI_INLINE_DENTRY)) ri->i_inline |= F2FS_INLINE_DENTRY; - if (is_inode_flag_set(fi, FI_DATA_EXIST)) + if (is_inode_flag_set(inode, FI_DATA_EXIST)) ri->i_inline |= F2FS_DATA_EXIST; - if (is_inode_flag_set(fi, FI_INLINE_DOTS)) + if (is_inode_flag_set(inode, FI_INLINE_DOTS)) ri->i_inline |= F2FS_INLINE_DOTS; } static inline int f2fs_has_inline_xattr(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR); + return is_inode_flag_set(inode, FI_INLINE_XATTR); } static inline unsigned int addrs_per_inode(struct inode *inode) @@ -1618,43 +1724,43 @@ static inline int inline_xattr_size(struct inode *inode) static inline int f2fs_has_inline_data(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); + return is_inode_flag_set(inode, FI_INLINE_DATA); } static inline void f2fs_clear_inline_inode(struct inode *inode) { - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + clear_inode_flag(inode, FI_INLINE_DATA); + clear_inode_flag(inode, FI_DATA_EXIST); } static inline int f2fs_exist_data(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); + return is_inode_flag_set(inode, FI_DATA_EXIST); } static inline int f2fs_has_inline_dots(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DOTS); + return is_inode_flag_set(inode, FI_INLINE_DOTS); } static inline bool f2fs_is_atomic_file(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); + return is_inode_flag_set(inode, FI_ATOMIC_FILE); } static inline bool f2fs_is_volatile_file(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_VOLATILE_FILE); + return is_inode_flag_set(inode, FI_VOLATILE_FILE); } static inline bool f2fs_is_first_block_written(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + return is_inode_flag_set(inode, FI_FIRST_BLOCK_WRITTEN); } static inline bool f2fs_is_drop_cache(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_DROP_CACHE); + return is_inode_flag_set(inode, FI_DROP_CACHE); } static inline void *inline_data_addr(struct page *page) @@ -1665,7 +1771,7 @@ static inline void *inline_data_addr(struct page *page) static inline int f2fs_has_inline_dentry(struct inode *inode) { - return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); + return is_inode_flag_set(inode, FI_INLINE_DENTRY); } static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) @@ -1682,11 +1788,13 @@ static inline int is_file(struct inode *inode, int type) static inline void set_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise |= type; + f2fs_mark_inode_dirty_sync(inode); } static inline void clear_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise &= ~type; + f2fs_mark_inode_dirty_sync(inode); } static inline int f2fs_readonly(struct super_block *sb) @@ -1713,7 +1821,7 @@ static inline bool is_dot_dotdot(const struct qstr *str) static inline bool f2fs_may_extent_tree(struct inode *inode) { if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE) || - is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) + is_inode_flag_set(inode, FI_NO_EXTENT)) return false; return S_ISREG(inode->i_mode); @@ -1749,7 +1857,7 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) } #define get_inode_mode(i) \ - ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ + ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) /* get offset of first page in next direct node */ @@ -1764,7 +1872,7 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); int truncate_blocks(struct inode *, u64, bool); -int f2fs_truncate(struct inode *, bool); +int f2fs_truncate(struct inode *); int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); int truncate_hole(struct inode *, pgoff_t, pgoff_t); @@ -1805,11 +1913,11 @@ struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); int room_for_filename(const void *, int, int); -void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); +void f2fs_drop_nlink(struct inode *, struct inode *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); -ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *, struct page **); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, struct inode *, const struct qstr *); @@ -1833,6 +1941,8 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ +int f2fs_inode_dirtied(struct inode *); +void f2fs_inode_synced(struct inode *); int f2fs_commit_super(struct f2fs_sb_info *, bool); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) @@ -1866,11 +1976,11 @@ struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); -void sync_inode_page(struct dnode_of_data *); void move_node_page(struct page *, int); -int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, - bool); +int fsync_node_pages(struct f2fs_sb_info *, struct inode *, + struct writeback_control *, bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); +void build_free_nids(struct f2fs_sb_info *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); @@ -1944,9 +2054,10 @@ void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); void release_ino_entry(struct f2fs_sb_info *, bool); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); +int f2fs_sync_inode_meta(struct f2fs_sb_info *); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); -void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void add_orphan_inode(struct inode *); void remove_orphan_inode(struct f2fs_sb_info *, nid_t); int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); @@ -1981,6 +2092,7 @@ struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct f2fs_io_info *); int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); +void f2fs_set_page_dirty_nobuffers(struct page *); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); int f2fs_release_page(struct page *, gfp_t); @@ -2012,7 +2124,7 @@ struct f2fs_stat_info { unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages; - unsigned int ndirty_dirs, ndirty_files; + unsigned int ndirty_dirs, ndirty_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, wb_bios; @@ -2181,7 +2293,6 @@ int f2fs_write_inline_data(struct inode *, struct page *); bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct fscrypt_name *, struct page **); -struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, nid_t, umode_t); @@ -2206,6 +2317,7 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *); */ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *, int); bool f2fs_init_extent_tree(struct inode *, struct f2fs_extent *); +void f2fs_drop_extent_tree(struct inode *); unsigned int f2fs_destroy_extent_node(struct inode *); void f2fs_destroy_extent_tree(struct inode *); bool f2fs_lookup_extent_cache(struct inode *, pgoff_t, struct extent_info *); @@ -2241,6 +2353,26 @@ static inline int f2fs_sb_has_crypto(struct super_block *sb) return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT); } +static inline int f2fs_sb_mounted_hmsmr(struct super_block *sb) +{ + return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_HMSMR); +} + +static inline void set_opt_mode(struct f2fs_sb_info *sbi, unsigned int mt) +{ + clear_opt(sbi, ADAPTIVE); + clear_opt(sbi, LFS); + + switch (mt) { + case F2FS_MOUNT_ADAPTIVE: + set_opt(sbi, ADAPTIVE); + break; + case F2FS_MOUNT_LFS: + set_opt(sbi, LFS); + break; + } +} + static inline bool f2fs_may_encrypt(struct inode *inode) { #ifdef CONFIG_F2FS_FS_ENCRYPTION diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f4c0086655c4..0e493f63ea41 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -21,6 +21,7 @@ #include <linux/mount.h> #include <linux/pagevec.h> #include <linux/uuid.h> +#include <linux/file.h> #include "f2fs.h" #include "node.h" @@ -81,7 +82,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, zero_user_segment(page, offset, PAGE_SIZE); } set_page_dirty(page); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); trace_f2fs_vm_page_mkwrite(page, DATA); mapped: @@ -171,22 +173,16 @@ static void try_to_fix_pino(struct inode *inode) fi->xattr_ver = 0; if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { - fi->i_pino = pino; + f2fs_i_pino_write(inode, pino); file_got_pino(inode); - up_write(&fi->i_sem); - - mark_inode_dirty_sync(inode); - f2fs_write_inode(inode, NULL); - } else { - up_write(&fi->i_sem); } + up_write(&fi->i_sem); } static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, int datasync, bool atomic) { struct inode *inode = file->f_mapping->host; - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; @@ -204,9 +200,9 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, /* if fdatasync is triggered, let's do in-place-update */ if (datasync || get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) - set_inode_flag(fi, FI_NEED_IPU); + set_inode_flag(inode, FI_NEED_IPU); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); - clear_inode_flag(fi, FI_NEED_IPU); + clear_inode_flag(inode, FI_NEED_IPU); if (ret) { trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); @@ -214,7 +210,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, } /* if the inode is dirty, let's recover all the time */ - if (!datasync) { + if (!datasync && !f2fs_skip_inode_update(inode)) { f2fs_write_inode(inode, NULL); goto go_write; } @@ -222,14 +218,14 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, /* * if there is no written data, don't waste time to write recovery info. */ - if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && + if (!is_inode_flag_set(inode, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { /* it may call write_inode just prior to fsync */ if (need_inode_page_update(sbi, ino)) goto go_write; - if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || + if (is_inode_flag_set(inode, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; @@ -239,9 +235,9 @@ go_write: * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - down_read(&fi->i_sem); + down_read(&F2FS_I(inode)->i_sem); need_cp = need_do_checkpoint(inode); - up_read(&fi->i_sem); + up_read(&F2FS_I(inode)->i_sem); if (need_cp) { /* all the dirty node pages should be flushed for POR */ @@ -252,12 +248,12 @@ go_write: * will be used only for fsynced inodes after checkpoint. */ try_to_fix_pino(inode); - clear_inode_flag(fi, FI_APPEND_WRITE); - clear_inode_flag(fi, FI_UPDATE_WRITE); + clear_inode_flag(inode, FI_APPEND_WRITE); + clear_inode_flag(inode, FI_UPDATE_WRITE); goto out; } sync_nodes: - ret = fsync_node_pages(sbi, ino, &wbc, atomic); + ret = fsync_node_pages(sbi, inode, &wbc, atomic); if (ret) goto out; @@ -268,7 +264,7 @@ sync_nodes: } if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); + f2fs_mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); goto sync_nodes; } @@ -279,10 +275,10 @@ sync_nodes: /* once recovery info is written, don't need to tack this */ remove_ino_entry(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + clear_inode_flag(inode, FI_APPEND_WRITE); flush_out: remove_ino_entry(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); + clear_inode_flag(inode, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); f2fs_update_time(sbi, REQ_TIME); out: @@ -360,7 +356,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); - err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE); if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { @@ -487,8 +483,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) set_data_blkaddr(dn); invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) - clear_inode_flag(F2FS_I(dn->inode), - FI_FIRST_BLOCK_WRITTEN); + clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); nr_free++; } @@ -502,7 +497,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->inode) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); - sync_inode_page(dn); } dn->ofs_in_node = ofs; @@ -616,7 +610,7 @@ free_partial: return err; } -int f2fs_truncate(struct inode *inode, bool lock) +int f2fs_truncate(struct inode *inode) { int err; @@ -633,12 +627,12 @@ int f2fs_truncate(struct inode *inode, bool lock) return err; } - err = truncate_blocks(inode, i_size_read(inode), lock); + err = truncate_blocks(inode, i_size_read(inode), true); if (err) return err; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + f2fs_mark_inode_dirty_sync(inode); return 0; } @@ -654,7 +648,6 @@ int f2fs_getattr(struct vfsmount *mnt, #ifdef CONFIG_F2FS_FS_POSIX_ACL static void __setattr_copy(struct inode *inode, const struct iattr *attr) { - struct f2fs_inode_info *fi = F2FS_I(inode); unsigned int ia_valid = attr->ia_valid; if (ia_valid & ATTR_UID) @@ -675,7 +668,7 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) mode &= ~S_ISGID; - set_acl_inode(fi, mode); + set_acl_inode(inode, mode); } } #else @@ -685,7 +678,6 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) int f2fs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); - struct f2fs_inode_info *fi = F2FS_I(inode); int err; err = inode_change_ok(inode, attr); @@ -699,7 +691,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size <= i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); - err = f2fs_truncate(inode, true); + err = f2fs_truncate(inode); if (err) return err; f2fs_balance_fs(F2FS_I_SB(inode), true); @@ -724,13 +716,13 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid & ATTR_MODE) { err = posix_acl_chmod(inode, get_inode_mode(inode)); - if (err || is_inode_flag_set(fi, FI_ACL_MODE)) { - inode->i_mode = fi->i_acl_mode; - clear_inode_flag(fi, FI_ACL_MODE); + if (err || is_inode_flag_set(inode, FI_ACL_MODE)) { + inode->i_mode = F2FS_I(inode)->i_acl_mode; + clear_inode_flag(inode, FI_ACL_MODE); } } - mark_inode_dirty(inode); + f2fs_mark_inode_dirty_sync(inode); return err; } @@ -859,79 +851,199 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) return ret; } -static int __exchange_data_block(struct inode *inode, pgoff_t src, - pgoff_t dst, bool full) +static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, + int *do_replace, pgoff_t off, pgoff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - block_t new_addr; - bool do_replace = false; - int ret; + int ret, done, i; +next_dnode: set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA); + ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { - new_addr = NULL_ADDR; - } else { - new_addr = dn.data_blkaddr; - if (!is_checkpointed_data(sbi, new_addr)) { + if (dn.max_level == 0) + return -ENOENT; + done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len); + blkaddr += done; + do_replace += done; + goto next; + } + + done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - + dn.ofs_in_node, len); + for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { + *blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + if (!is_checkpointed_data(sbi, *blkaddr)) { + + if (test_opt(sbi, LFS)) { + f2fs_put_dnode(&dn); + return -ENOTSUPP; + } + /* do not invalidate this block address */ f2fs_update_data_blkaddr(&dn, NULL_ADDR); - do_replace = true; + *do_replace = 1; } - f2fs_put_dnode(&dn); } + f2fs_put_dnode(&dn); +next: + len -= done; + off += done; + if (len) + goto next_dnode; + return 0; +} - if (new_addr == NULL_ADDR) - return full ? truncate_hole(inode, dst, dst + 1) : 0; +static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, + int *do_replace, pgoff_t off, int len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + int ret, i; - if (do_replace) { - struct page *ipage = get_node_page(sbi, inode->i_ino); - struct node_info ni; + for (i = 0; i < len; i++, do_replace++, blkaddr++) { + if (*do_replace == 0) + continue; - if (IS_ERR(ipage)) { - ret = PTR_ERR(ipage); - goto err_out; + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); + if (ret) { + dec_valid_block_count(sbi, inode, 1); + invalidate_blocks(sbi, *blkaddr); + } else { + f2fs_update_data_blkaddr(&dn, *blkaddr); } + f2fs_put_dnode(&dn); + } + return 0; +} - set_new_dnode(&dn, inode, ipage, NULL, 0); - ret = f2fs_reserve_block(&dn, dst); - if (ret) - goto err_out; +static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, + block_t *blkaddr, int *do_replace, + pgoff_t src, pgoff_t dst, pgoff_t len, bool full) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); + pgoff_t i = 0; + int ret; + + while (i < len) { + if (blkaddr[i] == NULL_ADDR && !full) { + i++; + continue; + } - truncate_data_blocks_range(&dn, 1); + if (do_replace[i] || blkaddr[i] == NULL_ADDR) { + struct dnode_of_data dn; + struct node_info ni; + size_t new_size; + pgoff_t ilen; - get_node_info(sbi, dn.nid, &ni); - f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, - ni.version, true, false); - f2fs_put_dnode(&dn); - } else { - struct page *psrc, *pdst; + set_new_dnode(&dn, dst_inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE); + if (ret) + return ret; + + get_node_info(sbi, dn.nid, &ni); + ilen = min((pgoff_t) + ADDRS_PER_PAGE(dn.node_page, dst_inode) - + dn.ofs_in_node, len - i); + do { + dn.data_blkaddr = datablock_addr(dn.node_page, + dn.ofs_in_node); + truncate_data_blocks_range(&dn, 1); + + if (do_replace[i]) { + f2fs_i_blocks_write(src_inode, + 1, false); + f2fs_i_blocks_write(dst_inode, + 1, true); + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, + blkaddr[i], ni.version, true, false); + + do_replace[i] = 0; + } + dn.ofs_in_node++; + i++; + new_size = (dst + i) << PAGE_SHIFT; + if (dst_inode->i_size < new_size) + f2fs_i_size_write(dst_inode, new_size); + } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen); - psrc = get_lock_data_page(inode, src, true); - if (IS_ERR(psrc)) - return PTR_ERR(psrc); - pdst = get_new_data_page(inode, NULL, dst, true); - if (IS_ERR(pdst)) { + f2fs_put_dnode(&dn); + } else { + struct page *psrc, *pdst; + + psrc = get_lock_data_page(src_inode, src + i, true); + if (IS_ERR(psrc)) + return PTR_ERR(psrc); + pdst = get_new_data_page(dst_inode, NULL, dst + i, + true); + if (IS_ERR(pdst)) { + f2fs_put_page(psrc, 1); + return PTR_ERR(pdst); + } + f2fs_copy_page(psrc, pdst); + set_page_dirty(pdst); + f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); - return PTR_ERR(pdst); - } - f2fs_copy_page(psrc, pdst); - set_page_dirty(pdst); - f2fs_put_page(pdst, 1); - f2fs_put_page(psrc, 1); - return truncate_hole(inode, src, src + 1); + ret = truncate_hole(src_inode, src + i, src + i + 1); + if (ret) + return ret; + i++; + } } return 0; +} -err_out: - if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) { - f2fs_update_data_blkaddr(&dn, new_addr); - f2fs_put_dnode(&dn); +static int __exchange_data_block(struct inode *src_inode, + struct inode *dst_inode, pgoff_t src, pgoff_t dst, + pgoff_t len, bool full) +{ + block_t *src_blkaddr; + int *do_replace; + pgoff_t olen; + int ret; + + while (len) { + olen = min((pgoff_t)4 * ADDRS_PER_BLOCK, len); + + src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * olen, GFP_KERNEL); + if (!src_blkaddr) + return -ENOMEM; + + do_replace = f2fs_kvzalloc(sizeof(int) * olen, GFP_KERNEL); + if (!do_replace) { + kvfree(src_blkaddr); + return -ENOMEM; + } + + ret = __read_out_blkaddrs(src_inode, src_blkaddr, + do_replace, src, olen); + if (ret) + goto roll_back; + + ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, + do_replace, src, dst, olen, full); + if (ret) + goto roll_back; + + src += olen; + dst += olen; + len -= olen; + + kvfree(src_blkaddr); + kvfree(do_replace); } + return 0; + +roll_back: + __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + kvfree(src_blkaddr); + kvfree(do_replace); return ret; } @@ -939,16 +1051,15 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; - int ret = 0; + int ret; - for (; end < nrpages; start++, end++) { - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); - ret = __exchange_data_block(inode, end, start, true); - f2fs_unlock_op(sbi); - if (ret) - break; - } + f2fs_balance_fs(sbi, true); + f2fs_lock_op(sbi); + + f2fs_drop_extent_tree(inode); + + ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); + f2fs_unlock_op(sbi); return ret; } @@ -992,7 +1103,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = truncate_blocks(inode, new_size, true); if (!ret) - i_size_write(inode, new_size); + f2fs_i_size_write(inode, new_size); return ret; } @@ -1128,11 +1239,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { - i_size_write(inode, new_size); - mark_inode_dirty(inode); - update_inode_page(inode); - } + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) + f2fs_i_size_write(inode, new_size); return ret; } @@ -1140,7 +1248,7 @@ out: static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t pg_start, pg_end, delta, nrpages, idx; + pgoff_t nr, pg_start, pg_end, delta, idx; loff_t new_size; int ret = 0; @@ -1175,14 +1283,20 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + + while (!ret && idx > pg_start) { + nr = idx - pg_start; + if (nr > delta) + nr = delta; + idx -= nr; - for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) { f2fs_lock_op(sbi); - ret = __exchange_data_block(inode, idx, idx + delta, false); + f2fs_drop_extent_tree(inode); + + ret = __exchange_data_block(inode, inode, idx, + idx + delta, nr, false); f2fs_unlock_op(sbi); - if (ret) - break; } /* write out all moved pages, if possible */ @@ -1190,7 +1304,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) truncate_pagecache(inode, offset); if (!ret) - i_size_write(inode, new_size); + f2fs_i_size_write(inode, new_size); return ret; } @@ -1238,11 +1352,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { - i_size_write(inode, new_size); - mark_inode_dirty(inode); - update_inode_page(inode); - } + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) + f2fs_i_size_write(inode, new_size); return ret; } @@ -1285,7 +1396,7 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + f2fs_mark_inode_dirty_sync(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1310,10 +1421,10 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) if (f2fs_is_atomic_file(inode)) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - set_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + clear_inode_flag(inode, FI_VOLATILE_FILE); + set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); - clear_inode_flag(F2FS_I(inode), FI_DROP_CACHE); + clear_inode_flag(inode, FI_DROP_CACHE); } return 0; } @@ -1376,9 +1487,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) fi->i_flags = flags; inode_unlock(inode); - f2fs_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + f2fs_set_inode_flags(inode); out: mnt_drop_write_file(filp); return ret; @@ -1412,7 +1522,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (ret) goto out; - set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + set_inode_flag(inode, FI_ATOMIC_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (!get_dirty_pages(inode)) @@ -1423,7 +1533,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_FILE); out: inode_unlock(inode); mnt_drop_write_file(filp); @@ -1448,10 +1558,10 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp) goto err_out; if (f2fs_is_atomic_file(inode)) { - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_FILE); ret = commit_inmem_pages(inode); if (ret) { - set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + set_inode_flag(inode, FI_ATOMIC_FILE); goto err_out; } } @@ -1484,7 +1594,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (ret) goto out; - set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + set_inode_flag(inode, FI_VOLATILE_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: inode_unlock(inode); @@ -1538,7 +1648,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) if (f2fs_is_atomic_file(inode)) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { - clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); + clear_inode_flag(inode, FI_VOLATILE_FILE); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } @@ -1871,7 +1981,7 @@ do_map: continue; } - set_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + set_inode_flag(inode, FI_DO_DEFRAG); idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { @@ -1896,14 +2006,14 @@ do_map: if (idx < pg_end && cnt < blk_per_seg) goto do_map; - clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + clear_inode_flag(inode, FI_DO_DEFRAG); err = filemap_fdatawrite(inode->i_mapping); if (err) goto out; } clear_out: - clear_inode_flag(F2FS_I(inode), FI_DO_DEFRAG); + clear_inode_flag(inode, FI_DO_DEFRAG); out: inode_unlock(inode); if (!err) @@ -1959,6 +2069,133 @@ out: return err; } +static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, size_t len) +{ + struct inode *src = file_inode(file_in); + struct inode *dst = file_inode(file_out); + struct f2fs_sb_info *sbi = F2FS_I_SB(src); + size_t olen = len, dst_max_i_size = 0; + size_t dst_osize; + int ret; + + if (file_in->f_path.mnt != file_out->f_path.mnt || + src->i_sb != dst->i_sb) + return -EXDEV; + + if (unlikely(f2fs_readonly(src->i_sb))) + return -EROFS; + + if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode)) + return -EISDIR; + + if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) + return -EOPNOTSUPP; + + inode_lock(src); + if (src != dst) + inode_lock(dst); + + ret = -EINVAL; + if (pos_in + len > src->i_size || pos_in + len < pos_in) + goto out_unlock; + if (len == 0) + olen = len = src->i_size - pos_in; + if (pos_in + len == src->i_size) + len = ALIGN(src->i_size, F2FS_BLKSIZE) - pos_in; + if (len == 0) { + ret = 0; + goto out_unlock; + } + + dst_osize = dst->i_size; + if (pos_out + olen > dst->i_size) + dst_max_i_size = pos_out + olen; + + /* verify the end result is block aligned */ + if (!IS_ALIGNED(pos_in, F2FS_BLKSIZE) || + !IS_ALIGNED(pos_in + len, F2FS_BLKSIZE) || + !IS_ALIGNED(pos_out, F2FS_BLKSIZE)) + goto out_unlock; + + ret = f2fs_convert_inline_inode(src); + if (ret) + goto out_unlock; + + ret = f2fs_convert_inline_inode(dst); + if (ret) + goto out_unlock; + + /* write out all dirty pages from offset */ + ret = filemap_write_and_wait_range(src->i_mapping, + pos_in, pos_in + len); + if (ret) + goto out_unlock; + + ret = filemap_write_and_wait_range(dst->i_mapping, + pos_out, pos_out + len); + if (ret) + goto out_unlock; + + f2fs_balance_fs(sbi, true); + f2fs_lock_op(sbi); + ret = __exchange_data_block(src, dst, pos_in, + pos_out, len >> F2FS_BLKSIZE_BITS, false); + + if (!ret) { + if (dst_max_i_size) + f2fs_i_size_write(dst, dst_max_i_size); + else if (dst_osize != dst->i_size) + f2fs_i_size_write(dst, dst_osize); + } + f2fs_unlock_op(sbi); +out_unlock: + if (src != dst) + inode_unlock(dst); + inode_unlock(src); + return ret; +} + +static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) +{ + struct f2fs_move_range range; + struct fd dst; + int err; + + if (!(filp->f_mode & FMODE_READ) || + !(filp->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(&range, (struct f2fs_move_range __user *)arg, + sizeof(range))) + return -EFAULT; + + dst = fdget(range.dst_fd); + if (!dst.file) + return -EBADF; + + if (!(dst.file->f_mode & FMODE_WRITE)) { + err = -EBADF; + goto err_out; + } + + err = mnt_want_write_file(filp); + if (err) + goto err_out; + + err = f2fs_move_file_range(filp, range.pos_in, dst.file, + range.pos_out, range.len); + + mnt_drop_write_file(filp); + + if (copy_to_user((struct f2fs_move_range __user *)arg, + &range, sizeof(range))) + err = -EFAULT; +err_out: + fdput(dst); + return err; +} + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -1994,6 +2231,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_write_checkpoint(filp, arg); case F2FS_IOC_DEFRAGMENT: return f2fs_ioc_defragment(filp, arg); + case F2FS_IOC_MOVE_RANGE: + return f2fs_ioc_move_range(filp, arg); default: return -ENOTTY; } @@ -2003,6 +2242,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct blk_plug plug; ssize_t ret; if (f2fs_encrypted_inode(inode) && @@ -2014,8 +2254,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret > 0) { ret = f2fs_preallocate_blocks(iocb, from); - if (!ret) + if (!ret) { + blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); + blk_finish_plug(&plug); + } } inode_unlock(inode); @@ -2050,6 +2293,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: break; + case F2FS_IOC_MOVE_RANGE: + break; default: return -ENOIOCTLCMD; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index f06ed73adf99..8f7fa326ce95 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -594,11 +594,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) /* write page */ lock_page(fio.encrypted_page); - if (unlikely(!PageUptodate(fio.encrypted_page))) { + if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { err = -EIO; goto put_page_out; } - if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { + if (unlikely(!PageUptodate(fio.encrypted_page))) { err = -EIO; goto put_page_out; } @@ -619,9 +619,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) f2fs_submit_page_mbio(&fio); f2fs_update_data_blkaddr(&dn, newaddr); - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(inode, FI_APPEND_WRITE); if (page->index == 0) - set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); put_page_out: f2fs_put_page(fio.encrypted_page, 1); recover_block: @@ -656,12 +656,23 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type) .page = page, .encrypted_page = NULL, }; + bool is_dirty = PageDirty(page); + int err; + +retry: set_page_dirty(page); f2fs_wait_on_page_writeback(page, DATA, true); if (clear_page_dirty_for_io(page)) inode_dec_dirty_pages(inode); + set_cold_data(page); - do_write_data_page(&fio); + + err = do_write_data_page(&fio); + if (err == -ENOMEM && is_dirty) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + clear_cold_data(page); } out: @@ -748,12 +759,32 @@ next_step: /* phase 3 */ inode = find_gc_inode(gc_list, dni.ino); if (inode) { + struct f2fs_inode_info *fi = F2FS_I(inode); + bool locked = false; + + if (S_ISREG(inode->i_mode)) { + if (!down_write_trylock(&fi->dio_rwsem[READ])) + continue; + if (!down_write_trylock( + &fi->dio_rwsem[WRITE])) { + up_write(&fi->dio_rwsem[READ]); + continue; + } + locked = true; + } + start_bidx = start_bidx_of_node(nofs, inode) + ofs_in_node; if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) move_encrypted_block(inode, start_bidx); else move_data_page(inode, start_bidx, gc_type); + + if (locked) { + up_write(&fi->dio_rwsem[WRITE]); + up_write(&fi->dio_rwsem[READ]); + } + stat_inc_data_blk_count(sbi, 1, gc_type); } } @@ -802,6 +833,10 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, blk_start_plug(&plug); for (segno = start_segno; segno < end_segno; segno++) { + + if (get_valid_blocks(sbi, segno, 1) == 0) + continue; + /* find segment summary of victim */ sum_page = find_get_page(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); @@ -877,10 +912,13 @@ gc_more: * enough free sections, we should flush dent/node blocks and do * garbage collections. */ - if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) + if (__get_victim(sbi, &segno, gc_type) || + prefree_segments(sbi)) { write_checkpoint(sbi, &cpc); - else if (has_not_enough_free_secs(sbi, 0)) + segno = NULL_SEGNO; + } else if (has_not_enough_free_secs(sbi, 0)) { write_checkpoint(sbi, &cpc); + } } if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type)) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index c15e53c1d794..ccea8735de59 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -59,7 +59,8 @@ void read_inline_data(struct page *page, struct page *ipage) memcpy(dst_addr, src_addr, MAX_INLINE_DATA); flush_dcache_page(page); kunmap_atomic(dst_addr); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } bool truncate_inline_inode(struct page *ipage, u64 from) @@ -73,7 +74,7 @@ bool truncate_inline_inode(struct page *ipage, u64 from) f2fs_wait_on_page_writeback(ipage, NODE, true); memset(addr + from, 0, MAX_INLINE_DATA - from); - + set_page_dirty(ipage); return true; } @@ -97,7 +98,8 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) else read_inline_data(page, ipage); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); f2fs_put_page(ipage, 1); unlock_page(page); return 0; @@ -139,7 +141,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) inode_dec_dirty_pages(dn->inode); /* this converted inline_data should be recovered. */ - set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); + set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ truncate_inline_inode(dn->inode_page, 0); @@ -147,7 +149,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); - sync_inode_page(dn); f2fs_put_dnode(dn); return 0; } @@ -213,11 +214,11 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) dst_addr = inline_data_addr(dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); + set_page_dirty(dn.inode_page); - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_inode_flag(inode, FI_APPEND_WRITE); + set_inode_flag(inode, FI_DATA_EXIST); - sync_inode_page(&dn); clear_inline_node(dn.inode_page); f2fs_put_dnode(&dn); return 0; @@ -253,10 +254,10 @@ process_inline: dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + set_inode_flag(inode, FI_INLINE_DATA); + set_inode_flag(inode, FI_DATA_EXIST); - update_inode(inode, ipage); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); return true; } @@ -267,7 +268,6 @@ process_inline: if (!truncate_inline_inode(ipage, 0)) return false; f2fs_clear_inline_inode(inode); - update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { if (truncate_blocks(inode, 0, false)) @@ -289,8 +289,10 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, f2fs_hash_t namehash; ipage = get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) + if (IS_ERR(ipage)) { + *res_page = ipage; return NULL; + } namehash = f2fs_dentry_hash(&name); @@ -307,25 +309,6 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, return de; } -struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, - struct page **p) -{ - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - struct page *ipage; - struct f2fs_dir_entry *de; - struct f2fs_inline_dentry *dentry_blk; - - ipage = get_node_page(sbi, dir->i_ino); - if (IS_ERR(ipage)) - return NULL; - - dentry_blk = inline_data_addr(ipage); - de = &dentry_blk->dentry[1]; - *p = ipage; - unlock_page(ipage); - return de; -} - int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { @@ -340,10 +323,8 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, set_page_dirty(ipage); /* update i_size to MAX_INLINE_DATA */ - if (i_size_read(inode) < MAX_INLINE_DATA) { - i_size_write(inode, MAX_INLINE_DATA); - set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); - } + if (i_size_read(inode) < MAX_INLINE_DATA) + f2fs_i_size_write(inode, MAX_INLINE_DATA); return 0; } @@ -392,22 +373,19 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, NR_INLINE_DENTRY * F2FS_SLOT_LEN); kunmap_atomic(dentry_blk); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); set_page_dirty(page); /* clear inline dir and flag after data writeback */ truncate_inline_inode(ipage, 0); stat_dec_inline_dir(dir); - clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); + clear_inode_flag(dir, FI_INLINE_DENTRY); - F2FS_I(dir)->i_current_depth = 1; - if (i_size_read(dir) < PAGE_SIZE) { - i_size_write(dir, PAGE_SIZE); - set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - } - - sync_inode_page(&dn); + f2fs_i_depth_write(dir, 1); + if (i_size_read(dir) < PAGE_SIZE) + f2fs_i_size_write(dir, PAGE_SIZE); out: f2fs_put_page(page, 1); return err; @@ -465,7 +443,6 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, struct f2fs_inline_dentry *inline_dentry) { struct f2fs_inline_dentry *backup_dentry; - struct f2fs_inode_info *fi = F2FS_I(dir); int err; backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry), @@ -487,16 +464,15 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, lock_page(ipage); stat_dec_inline_dir(dir); - clear_inode_flag(fi, FI_INLINE_DENTRY); - update_inode(dir, ipage); + clear_inode_flag(dir, FI_INLINE_DENTRY); kfree(backup_dentry); return 0; recover: lock_page(ipage); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); - fi->i_current_depth = 0; - i_size_write(dir, MAX_INLINE_DATA); - update_inode(dir, ipage); + f2fs_i_depth_write(dir, 0); + f2fs_i_size_write(dir, MAX_INLINE_DATA); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); kfree(backup_dentry); @@ -560,8 +536,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, /* we don't need to mark_inode_dirty now */ if (inode) { - F2FS_I(inode)->i_pino = dir->i_ino; - update_inode(inode, page); + f2fs_i_pino_write(inode, dir->i_ino); f2fs_put_page(page, 1); } @@ -569,11 +544,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - - if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { - update_inode(dir, ipage); - clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); - } out: f2fs_put_page(ipage, 1); return err; @@ -597,13 +567,13 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, &inline_dentry->dentry_bitmap); set_page_dirty(page); + f2fs_put_page(page, 1); dir->i_ctime = dir->i_mtime = CURRENT_TIME; + f2fs_mark_inode_dirty_sync(dir); if (inode) - f2fs_drop_nlink(dir, inode, page); - - f2fs_put_page(page, 1); + f2fs_drop_nlink(dir, inode); } bool f2fs_empty_inline_dir(struct inode *dir) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2e68adab0d64..9ac5efc15347 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -18,6 +18,13 @@ #include <trace/events/f2fs.h> +void f2fs_mark_inode_dirty_sync(struct inode *inode) +{ + if (f2fs_inode_dirtied(inode)) + return; + mark_inode_dirty_sync(inode); +} + void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; @@ -35,6 +42,7 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + f2fs_mark_inode_dirty_sync(inode); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -85,8 +93,8 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage) if (*start++) { f2fs_wait_on_page_writeback(ipage, NODE, true); - set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); - set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage)); + set_inode_flag(inode, FI_DATA_EXIST); + set_raw_inline(inode, F2FS_INODE(ipage)); set_page_dirty(ipage); return; } @@ -141,7 +149,7 @@ static int do_read_inode(struct inode *inode) if (f2fs_init_extent_tree(inode, &ri->i_ext)) set_page_dirty(node_page); - get_inline_info(fi, ri); + get_inline_info(inode, ri); /* check data exist */ if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) @@ -151,7 +159,10 @@ static int do_read_inode(struct inode *inode) __get_inode_rdev(inode, ri); if (__written_first_block(ri)) - set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN); + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + + if (!need_inode_block_update(sbi, inode->i_ino)) + fi->last_disk_size = inode->i_size; f2fs_put_page(node_page, 1); @@ -227,6 +238,8 @@ int update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; + f2fs_inode_synced(inode); + f2fs_wait_on_page_writeback(node_page, NODE, true); ri = F2FS_INODE(node_page); @@ -244,7 +257,7 @@ int update_inode(struct inode *inode, struct page *node_page) &ri->i_ext); else memset(&ri->i_ext, 0, sizeof(ri->i_ext)); - set_raw_inline(F2FS_I(inode), ri); + set_raw_inline(inode, ri); ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec); ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); @@ -261,7 +274,6 @@ int update_inode(struct inode *inode, struct page *node_page) __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); - clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); /* deleted inode */ if (inode->i_nlink == 0) @@ -285,6 +297,7 @@ retry: } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } + f2fs_inode_synced(inode); return 0; } ret = update_inode(inode, node_page); @@ -300,7 +313,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; - if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; /* @@ -318,8 +331,7 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) void f2fs_evict_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - nid_t xnid = fi->i_xattr_nid; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; int err = 0; /* some remained atomic pages should discarded */ @@ -341,12 +353,17 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(FAULT_EVICT_INODE)) + goto no_delete; +#endif + sb_start_intwrite(inode->i_sb); - set_inode_flag(fi, FI_NO_ALLOC); + set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: if (F2FS_HAS_BLOCKS(inode)) - err = f2fs_truncate(inode, true); + err = f2fs_truncate(inode); if (!err) { f2fs_lock_op(sbi); @@ -360,6 +377,8 @@ retry: goto retry; } + if (err) + update_inode_page(inode); sb_end_intwrite(inode->i_sb); no_delete: stat_dec_inline_xattr(inode); @@ -369,13 +388,13 @@ no_delete: invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); - if (is_inode_flag_set(fi, FI_APPEND_WRITE)) + if (is_inode_flag_set(inode, FI_APPEND_WRITE)) add_ino_entry(sbi, inode->i_ino, APPEND_INO); - if (is_inode_flag_set(fi, FI_UPDATE_WRITE)) + if (is_inode_flag_set(inode, FI_UPDATE_WRITE)) add_ino_entry(sbi, inode->i_ino, UPDATE_INO); - if (is_inode_flag_set(fi, FI_FREE_NID)) { + if (is_inode_flag_set(inode, FI_FREE_NID)) { alloc_nid_failed(sbi, inode->i_ino); - clear_inode_flag(fi, FI_FREE_NID); + clear_inode_flag(inode, FI_FREE_NID); } f2fs_bug_on(sbi, err && !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); @@ -407,11 +426,11 @@ void handle_failed_inode(struct inode *inode) f2fs_msg(sbi->sb, KERN_WARNING, "Too many orphan inodes, run fsck to fix."); } else { - add_orphan_inode(sbi, inode->i_ino); + add_orphan_inode(inode); } alloc_nid_done(sbi, inode->i_ino); } else { - set_inode_flag(F2FS_I(inode), FI_FREE_NID); + set_inode_flag(inode, FI_FREE_NID); } f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 324ed3812f30..73fa356f8fbb 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -60,10 +60,14 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) f2fs_set_encrypted_inode(inode); + set_inode_flag(inode, FI_NEW_INODE); + + if (test_opt(sbi, INLINE_XATTR)) + set_inode_flag(inode, FI_INLINE_XATTR); if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode)) - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(inode, FI_INLINE_DATA); if (f2fs_may_inline_dentry(inode)) - set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + set_inode_flag(inode, FI_INLINE_DENTRY); f2fs_init_extent_tree(inode, NULL); @@ -72,14 +76,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) stat_inc_inline_dir(inode); trace_f2fs_new_inode(inode, 0); - mark_inode_dirty(inode); return inode; fail: trace_f2fs_new_inode(inode, err); make_bad_inode(inode); if (nid_free) - set_inode_flag(F2FS_I(inode), FI_FREE_NID); + set_inode_flag(inode, FI_FREE_NID); iput(inode); return ERR_PTR(err); } @@ -177,7 +180,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME; ihold(inode); - set_inode_flag(F2FS_I(inode), FI_INC_LINK); + set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -190,7 +193,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_sync_fs(sbi->sb, 1); return 0; out: - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_inode_flag(inode, FI_INC_LINK); iput(inode); f2fs_unlock_op(sbi); return err; @@ -199,9 +202,13 @@ out: struct dentry *f2fs_get_parent(struct dentry *child) { struct qstr dotdot = QSTR_INIT("..", 2); - unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot); - if (!ino) + struct page *page; + unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot, &page); + if (!ino) { + if (IS_ERR(page)) + return ERR_CAST(page); return ERR_PTR(-ENOENT); + } return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } @@ -229,6 +236,9 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) if (de) { f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); + } else if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; } else { err = __f2fs_add_link(dir, &dot, NULL, dir->i_ino, S_IFDIR); if (err) @@ -239,14 +249,14 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) if (de) { f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); + } else if (IS_ERR(page)) { + err = PTR_ERR(page); } else { err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); } out: - if (!err) { - clear_inode_flag(F2FS_I(dir), FI_INLINE_DOTS); - mark_inode_dirty(dir); - } + if (!err) + clear_inode_flag(dir, FI_INLINE_DOTS); f2fs_unlock_op(sbi); return err; @@ -281,8 +291,11 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENAMETOOLONG); de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (!de) + if (!de) { + if (IS_ERR(page)) + return (struct dentry *)page; return d_splice_alias(inode, dentry); + } ino = le32_to_cpu(de->ino); f2fs_dentry_kunmap(dir, page); @@ -329,8 +342,11 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) trace_f2fs_unlink_enter(dir, dentry); de = f2fs_find_entry(dir, &dentry->d_name, &page); - if (!de) + if (!de) { + if (IS_ERR(page)) + err = PTR_ERR(page); goto fail; + } f2fs_balance_fs(sbi, true); @@ -345,9 +361,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); - /* In order to evict this inode, we set it dirty */ - mark_inode_dirty(inode); - if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); fail: @@ -492,7 +505,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) f2fs_balance_fs(sbi, true); - set_inode_flag(F2FS_I(inode), FI_INC_LINK); + set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -509,7 +522,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) return 0; out_fail: - clear_inode_flag(F2FS_I(inode), FI_INC_LINK); + clear_inode_flag(inode, FI_INC_LINK); handle_failed_inode(inode); return err; } @@ -592,17 +605,17 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, * add this non-linked tmpfile to orphan list, in this way we could * remove all unused data of tmpfile after abnormal power-off. */ - add_orphan_inode(sbi, inode->i_ino); - f2fs_unlock_op(sbi); - + add_orphan_inode(inode); alloc_nid_done(sbi, inode->i_ino); if (whiteout) { - inode_dec_link_count(inode); + f2fs_i_links_write(inode, false); *whiteout = inode; } else { d_tmpfile(dentry, inode); } + /* link_count was changed by d_tmpfile as well. */ + f2fs_unlock_op(sbi); unlock_new_inode(inode); return 0; @@ -652,14 +665,19 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_entry) + if (!old_entry) { + if (IS_ERR(old_page)) + err = PTR_ERR(old_page); goto out; + } if (S_ISDIR(old_inode->i_mode)) { - err = -EIO; old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); - if (!old_dir_entry) + if (!old_dir_entry) { + if (IS_ERR(old_dir_page)) + err = PTR_ERR(old_dir_page); goto out_old; + } } if (flags & RENAME_WHITEOUT) { @@ -677,8 +695,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_entry) + if (!new_entry) { + if (IS_ERR(new_page)) + err = PTR_ERR(new_page); goto out_whiteout; + } f2fs_balance_fs(sbi, true); @@ -700,19 +721,14 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, new_inode->i_ctime = CURRENT_TIME; down_write(&F2FS_I(new_inode)->i_sem); if (old_dir_entry) - drop_nlink(new_inode); - drop_nlink(new_inode); + f2fs_i_links_write(new_inode, false); + f2fs_i_links_write(new_inode, false); up_write(&F2FS_I(new_inode)->i_sem); - mark_inode_dirty(new_inode); - if (!new_inode->i_nlink) - add_orphan_inode(sbi, new_inode->i_ino); + add_orphan_inode(new_inode); else release_orphan_inode(sbi); - - update_inode_page(old_inode); - update_inode_page(new_inode); } else { f2fs_balance_fs(sbi, true); @@ -724,10 +740,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_whiteout; } - if (old_dir_entry) { - inc_nlink(new_dir); - update_inode_page(new_dir); - } + if (old_dir_entry) + f2fs_i_links_write(new_dir, true); /* * old entry and new entry can locate in the same inline @@ -743,7 +757,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_entry) { - err = -EIO; + err = -ENOENT; + if (IS_ERR(old_page)) + err = PTR_ERR(old_page); f2fs_unlock_op(sbi); goto out_whiteout; } @@ -757,13 +773,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(old_inode); + f2fs_mark_inode_dirty_sync(old_inode); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (whiteout) { whiteout->i_state |= I_LINKABLE; - set_inode_flag(F2FS_I(whiteout), FI_INC_LINK); + set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); if (err) goto put_out_dir; @@ -775,14 +791,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir != new_dir && !whiteout) { f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - update_inode_page(old_inode); } else { f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } - drop_nlink(old_dir); - mark_inode_dirty(old_dir); - update_inode_page(old_dir); + f2fs_i_links_write(old_dir, false); } f2fs_unlock_op(sbi); @@ -832,29 +845,39 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, return -EPERM; old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page); - if (!old_entry) + if (!old_entry) { + if (IS_ERR(old_page)) + err = PTR_ERR(old_page); goto out; + } new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, &new_page); - if (!new_entry) + if (!new_entry) { + if (IS_ERR(new_page)) + err = PTR_ERR(new_page); goto out_old; + } /* prepare for updating ".." directory entry info later */ if (old_dir != new_dir) { if (S_ISDIR(old_inode->i_mode)) { - err = -EIO; old_dir_entry = f2fs_parent_dir(old_inode, &old_dir_page); - if (!old_dir_entry) + if (!old_dir_entry) { + if (IS_ERR(old_dir_page)) + err = PTR_ERR(old_dir_page); goto out_new; + } } if (S_ISDIR(new_inode->i_mode)) { - err = -EIO; new_dir_entry = f2fs_parent_dir(new_inode, &new_dir_page); - if (!new_dir_entry) + if (!new_dir_entry) { + if (IS_ERR(new_dir_page)) + err = PTR_ERR(new_dir_page); goto out_old_dir; + } } } @@ -904,19 +927,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, file_lost_pino(old_inode); up_write(&F2FS_I(old_inode)->i_sem); - update_inode_page(old_inode); - old_dir->i_ctime = CURRENT_TIME; if (old_nlink) { down_write(&F2FS_I(old_dir)->i_sem); - if (old_nlink < 0) - drop_nlink(old_dir); - else - inc_nlink(old_dir); + f2fs_i_links_write(old_dir, old_nlink > 0); up_write(&F2FS_I(old_dir)->i_sem); } - mark_inode_dirty(old_dir); - update_inode_page(old_dir); + f2fs_mark_inode_dirty_sync(old_dir); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); @@ -925,19 +942,13 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, file_lost_pino(new_inode); up_write(&F2FS_I(new_inode)->i_sem); - update_inode_page(new_inode); - new_dir->i_ctime = CURRENT_TIME; if (new_nlink) { down_write(&F2FS_I(new_dir)->i_sem); - if (new_nlink < 0) - drop_nlink(new_dir); - else - inc_nlink(new_dir); + f2fs_i_links_write(new_dir, new_nlink > 0); up_write(&F2FS_I(new_dir)->i_sem); } - mark_inode_dirty(new_dir); - update_inode_page(new_dir); + f2fs_mark_inode_dirty_sync(new_dir); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d1867698e601..b2fa4b615925 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -52,6 +52,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); + if (excess_cached_nats(sbi)) + res = false; + if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD) + res = false; } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->wb.dirty_exceeded) return false; @@ -202,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return need; } @@ -219,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -233,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -280,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); @@ -330,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -338,8 +342,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) - return 0; + percpu_down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; @@ -348,7 +351,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -370,13 +373,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return; } @@ -400,11 +403,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); /* cache nat entry */ - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); } /* @@ -646,6 +649,7 @@ release_out: if (err == -ENOENT) { dn->cur_level = i; dn->max_level = level; + dn->ofs_in_node = offset[level]; } return err; } @@ -670,8 +674,7 @@ static void truncate_node(struct dnode_of_data *dn) if (dn->nid == dn->inode->i_ino) { remove_orphan_inode(sbi, dn->nid); dec_valid_inode_count(sbi); - } else { - sync_inode_page(dn); + f2fs_inode_synced(dn->inode); } invalidate: clear_node_page_dirty(dn->node_page); @@ -953,7 +956,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page) if (IS_ERR(npage)) return PTR_ERR(npage); - F2FS_I(inode)->i_xattr_nid = 0; + f2fs_i_xnid_write(inode, 0); /* need to do checkpoint during fsync */ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); @@ -1019,7 +1022,7 @@ struct page *new_node_page(struct dnode_of_data *dn, struct page *page; int err; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return ERR_PTR(-EPERM); page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); @@ -1042,21 +1045,16 @@ struct page *new_node_page(struct dnode_of_data *dn, f2fs_wait_on_page_writeback(page, NODE, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (set_page_dirty(page)) dn->node_changed = true; if (f2fs_has_xattr_block(ofs)) - F2FS_I(dn->inode)->i_xattr_nid = dn->nid; + f2fs_i_xnid_write(dn->inode, dn->nid); - dn->node_page = page; - if (ipage) - update_inode(dn->inode, ipage); - else - sync_inode_page(dn); if (ofs == 0) inc_valid_inode_count(sbi); - return page; fail: @@ -1083,6 +1081,9 @@ static int read_node_page(struct page *page, int op_flags) .encrypted_page = NULL, }; + if (PageUptodate(page)) + return LOCKED_PAGE; + get_node_info(sbi, page->index, &ni); if (unlikely(ni.blk_addr == NULL_ADDR)) { @@ -1090,9 +1091,6 @@ static int read_node_page(struct page *page, int op_flags) return -ENOENT; } - if (PageUptodate(page)) - return LOCKED_PAGE; - fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; return f2fs_submit_page_bio(&fio); } @@ -1150,16 +1148,21 @@ repeat: lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } + + if (unlikely(!PageUptodate(page))) + goto out_err; page_hit: - f2fs_bug_on(sbi, nid != nid_of_node(page)); + if(unlikely(nid != nid_of_node(page))) { + f2fs_bug_on(sbi, 1); + ClearPageUptodate(page); +out_err: + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } return page; } @@ -1176,24 +1179,6 @@ struct page *get_node_page_ra(struct page *parent, int start) return __get_node_page(sbi, nid, parent, start); } -void sync_inode_page(struct dnode_of_data *dn) -{ - int ret = 0; - - if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { - ret = update_inode(dn->inode, dn->node_page); - } else if (dn->inode_page) { - if (!dn->inode_page_locked) - lock_page(dn->inode_page); - ret = update_inode(dn->inode, dn->inode_page); - if (!dn->inode_page_locked) - unlock_page(dn->inode_page); - } else { - ret = update_inode_page(dn->inode); - } - dn->node_changed = ret ? true: false; -} - static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; @@ -1319,7 +1304,7 @@ continue_unlock: return last_page; } -int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, +int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic) { pgoff_t index, end; @@ -1327,6 +1312,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, int ret = 0; struct page *last_page = NULL; bool marked = false; + nid_t ino = inode->i_ino; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1380,9 +1366,13 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); - if (IS_INODE(page)) + if (IS_INODE(page)) { + if (is_inode_flag_set(inode, + FI_DIRTY_INODE)) + update_inode(inode, page); set_dentry_mark(page, need_dentry_mark(sbi, ino)); + } /* may be written by other thread */ if (!PageDirty(page)) set_page_dirty(page); @@ -1630,6 +1620,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct blk_plug plug; long diff; /* balancing f2fs's metadata in background */ @@ -1643,7 +1634,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; + blk_start_plug(&plug); sync_node_pages(sbi, wbc); + blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; @@ -1657,9 +1650,10 @@ static int f2fs_set_node_page_dirty(struct page *page) { trace_f2fs_set_page_dirty(page, NODE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); f2fs_trace_pid(page); @@ -1778,7 +1772,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, } } -static void build_free_nids(struct f2fs_sb_info *sbi) +void build_free_nids(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -1787,14 +1781,14 @@ static void build_free_nids(struct f2fs_sb_info *sbi) nid_t nid = nm_i->next_scan_nid; /* Enough entries */ - if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) return; /* readahead nat pages to be scanned */ ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); while (1) { struct page *page = get_current_nat_page(sbi, nid); @@ -1826,7 +1820,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) remove_free_nid(nm_i, nid); } up_read(&curseg->journal_rwsem); - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -1925,12 +1919,15 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) struct free_nid *i, *next; int nr = nr_shrink; + if (nm_i->fcnt <= MAX_FREE_NIDS) + return 0; + if (!mutex_trylock(&nm_i->build_lock)) return 0; spin_lock(&nm_i->free_nid_list_lock); list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK) + if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) break; if (i->state == NID_ALLOC) continue; @@ -1957,7 +1954,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page) ri = F2FS_INODE(page); if (!(ri->i_inline & F2FS_INLINE_XATTR)) { - clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR); + clear_inode_flag(inode, FI_INLINE_XATTR); goto update_inode; } @@ -1999,13 +1996,11 @@ recover_xnid: get_node_info(sbi, new_xnid, &ni); ni.ino = inode->i_ino; set_node_addr(sbi, &ni, NEW_ADDR, false); - F2FS_I(inode)->i_xattr_nid = new_xnid; + f2fs_i_xnid_write(inode, new_xnid); /* 3: update xattr blkaddr */ refresh_sit_entry(sbi, NEW_ADDR, blkaddr); set_node_addr(sbi, &ni, blkaddr, false); - - update_inode_page(inode); } int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) @@ -2027,7 +2022,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); - SetPageUptodate(ipage); + if (!PageUptodate(ipage)) + SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); src = F2FS_INODE(page); @@ -2213,7 +2209,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!nm_i->dirty_nat_cnt) return; - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -2236,7 +2232,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set); - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } @@ -2272,7 +2268,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + if (percpu_init_rwsem(&nm_i->nat_tree_lock)) + return -ENOMEM; nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2329,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2354,8 +2351,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); + percpu_free_rwsem(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 1f4f9d4569d9..fc7684554b1a 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -15,18 +15,21 @@ #define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ -#define FREE_NID_PAGES 4 +#define FREE_NID_PAGES 8 +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) -#define DEF_RA_NID_PAGES 4 /* # of nid pages to be readaheaded */ +#define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */ /* maximum readahead size for node during getting data blocks */ #define MAX_RA_NODE 128 /* control the memory footprint threshold (10MB per 1GB ram) */ -#define DEF_RAM_THRESHOLD 10 +#define DEF_RAM_THRESHOLD 1 /* control dirty nats ratio threshold (default: 10% over max nid count) */ #define DEF_DIRTY_NAT_RATIO_THRESHOLD 10 +/* control total # of nats */ +#define DEF_NAT_CACHE_THRESHOLD 100000 /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 @@ -126,6 +129,11 @@ static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) NM_I(sbi)->dirty_nats_ratio / 100; } +static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) +{ + return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; +} + enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 3d7216d7a288..9e652d5a659b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -153,9 +153,12 @@ retry: f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; + } else if (IS_ERR(page)) { + err = PTR_ERR(page); + } else { + err = __f2fs_add_link(dir, &name, inode, + inode->i_ino, inode->i_mode); } - err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode); - goto out; out_unmap_put: @@ -175,7 +178,7 @@ static void recover_inode(struct inode *inode, struct page *page) char *name; inode->i_mode = le16_to_cpu(raw->i_mode); - i_size_write(inode, le64_to_cpu(raw->i_size)); + f2fs_i_size_write(inode, le64_to_cpu(raw->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime); inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime); inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime); @@ -455,6 +458,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, continue; } + if ((start + 1) << PAGE_SHIFT > i_size_read(inode)) + f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT); + /* * dest is reserved block, invalidate src block * and then reserve one new block in dnode page. @@ -476,6 +482,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, #endif /* We should not get -ENOSPC */ f2fs_bug_on(sbi, err); + if (err) + goto err; } /* Check the previous node page having this index */ @@ -490,9 +498,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } } - if (IS_INODE(dn.node_page)) - sync_inode_page(&dn); - copy_node_footer(dn.node_page, page); fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page), false); @@ -624,8 +629,12 @@ out: if (err) { bool invalidate = false; - if (discard_next_dnode(sbi, blkaddr)) + if (test_opt(sbi, LFS)) { + update_meta_page(sbi, NULL, blkaddr); + invalidate = true; + } else if (discard_next_dnode(sbi, blkaddr)) { invalidate = true; + } /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4c2d1fa1e0e2..a46296f57b02 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -241,7 +241,7 @@ void drop_inmem_pages(struct inode *inode) { struct f2fs_inode_info *fi = F2FS_I(inode); - clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); + clear_inode_flag(inode, FI_ATOMIC_FILE); mutex_lock(&fi->inmem_lock); __revoke_inmem_pages(inode, &fi->inmem_pages, true, false); @@ -346,6 +346,11 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (!need) return; + + /* balance_fs_bg is able to be pending */ + if (excess_cached_nats(sbi)) + f2fs_balance_fs_bg(sbi); + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. @@ -367,7 +372,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); + try_to_free_nids(sbi, MAX_FREE_NIDS); + else + build_free_nids(sbi); /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || @@ -435,25 +442,29 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE)) { + if (!test_opt(sbi, FLUSH_MERGE) || !atomic_read(&fcc->submit_flush)) { struct bio *bio = f2fs_bio_alloc(0); int ret; + atomic_inc(&fcc->submit_flush); bio->bi_bdev = sbi->sb->s_bdev; bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH); ret = submit_bio_wait(bio); + atomic_dec(&fcc->submit_flush); bio_put(bio); return ret; } init_completion(&cmd.wait); + atomic_inc(&fcc->submit_flush); llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) wake_up(&fcc->flush_wait_queue); wait_for_completion(&cmd.wait); + atomic_dec(&fcc->submit_flush); return cmd.ret; } @@ -467,6 +478,7 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; + atomic_set(&fcc->submit_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->cmd_control_info = fcc; @@ -668,6 +680,10 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + if (force && start && end != max_blocks + && (end - start) < cpc->trim_minlen) + continue; + __add_discard_entry(sbi, cpc, se, start, end); } } @@ -705,6 +721,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; + unsigned int secno, start_segno; + bool force = (cpc->reason == CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -721,17 +739,31 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) dirty_i->nr_dirty[PRE] -= end - start; - if (!test_opt(sbi, DISCARD)) + if (force || !test_opt(sbi, DISCARD)) continue; - f2fs_issue_discard(sbi, START_BLOCK(sbi, start), + if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) { + f2fs_issue_discard(sbi, START_BLOCK(sbi, start), (end - start) << sbi->log_blocks_per_seg); + continue; + } +next: + secno = GET_SECNO(sbi, start); + start_segno = secno * sbi->segs_per_sec; + if (!IS_CURSEC(sbi, secno) && + !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), + sbi->segs_per_sec << sbi->log_blocks_per_seg); + + start = start_segno + sbi->segs_per_sec; + if (start < end) + goto next; } mutex_unlock(&dirty_i->seglist_lock); /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (cpc->reason == CP_DISCARD && entry->len < cpc->trim_minlen) + if (force && entry->len < cpc->trim_minlen) goto skip; f2fs_issue_discard(sbi, entry->blkaddr, entry->len); cpc->trimmed += entry->len; @@ -1219,6 +1251,9 @@ void allocate_new_segments(struct f2fs_sb_info *sbi) { int i; + if (test_opt(sbi, LFS)) + return; + for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segments(sbi, i); } @@ -1392,11 +1427,17 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) { int type = __get_segment_type(fio->page, fio->type); + if (fio->type == NODE || fio->type == DATA) + mutex_lock(&fio->sbi->wio_mutex[fio->type]); + allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type); /* writeout dirty page into bdev */ f2fs_submit_page_mbio(fio); + + if (fio->type == NODE || fio->type == DATA) + mutex_unlock(&fio->sbi->wio_mutex[fio->type]); } void write_meta_page(struct f2fs_sb_info *sbi, struct page *page) @@ -2377,7 +2418,11 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); sm_info->rec_prefree_segments = sm_info->main_segments * DEF_RECLAIM_PREFREE_SEGMENTS / 100; - sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; + if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS) + sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS; + + if (!test_opt(sbi, LFS)) + sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 7a756ff5a36d..b33f73ec60a4 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -16,6 +16,7 @@ #define NULL_SECNO ((unsigned int)(~0)) #define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ +#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */ /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) @@ -470,6 +471,10 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + + if (test_opt(sbi, LFS)) + return false; + return free_sections(sbi) <= (node_secs + 2 * dent_secs + reserved_sections(sbi) + 1); } @@ -479,6 +484,8 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; @@ -531,6 +538,9 @@ static inline bool need_inplace_update(struct inode *inode) if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) return false; + if (test_opt(sbi, LFS)) + return false; + if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi)) @@ -544,7 +554,7 @@ static inline bool need_inplace_update(struct inode *inode) /* this is only set during fdatasync */ if (policy & (0x1 << F2FS_IPU_FSYNC) && - is_inode_flag_set(F2FS_I(inode), FI_NEED_IPU)) + is_inode_flag_set(inode, FI_NEED_IPU)) return true; return false; @@ -706,9 +716,9 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) - return 3 * sbi->blocks_per_seg; + return 8 * sbi->blocks_per_seg; else if (type == META) - return MAX_BIO_BLOCKS(sbi); + return 8 * MAX_BIO_BLOCKS(sbi); else return 0; } @@ -726,10 +736,8 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, nr_to_write = wbc->nr_to_write; - if (type == DATA) - desired = 4096; - else if (type == NODE) - desired = 3 * max_hw_blocks(sbi); + if (type == NODE) + desired = 2 * max_hw_blocks(sbi); else desired = MAX_BIO_BLOCKS(sbi); diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 93606f281bf9..46c915425923 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -13,6 +13,7 @@ #include <linux/f2fs_fs.h> #include "f2fs.h" +#include "node.h" static LIST_HEAD(f2fs_list); static DEFINE_SPINLOCK(f2fs_list_lock); @@ -25,8 +26,8 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { - if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK) - return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK; + if (NM_I(sbi)->fcnt > MAX_FREE_NIDS) + return NM_I(sbi)->fcnt - MAX_FREE_NIDS; return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 74cc8520b8b1..b97c065cbe74 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_ORPHAN] = "orphan", [FAULT_BLOCK] = "no more block", [FAULT_DIR_DEPTH] = "too big dir depth", + [FAULT_EVICT_INODE] = "evict_inode fail", }; static void f2fs_build_fault_attr(unsigned int rate) @@ -75,6 +76,7 @@ enum { Opt_disable_roll_forward, Opt_norecovery, Opt_discard, + Opt_nodiscard, Opt_noheap, Opt_user_xattr, Opt_nouser_xattr, @@ -86,13 +88,17 @@ enum { Opt_inline_data, Opt_inline_dentry, Opt_flush_merge, + Opt_noflush_merge, Opt_nobarrier, Opt_fastboot, Opt_extent_cache, Opt_noextent_cache, Opt_noinline_data, Opt_data_flush, + Opt_mode, Opt_fault_injection, + Opt_lazytime, + Opt_nolazytime, Opt_err, }; @@ -101,6 +107,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_roll_forward, "disable_roll_forward"}, {Opt_norecovery, "norecovery"}, {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, {Opt_noheap, "no_heap"}, {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, @@ -112,13 +119,17 @@ static match_table_t f2fs_tokens = { {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, + {Opt_noflush_merge, "noflush_merge"}, {Opt_nobarrier, "nobarrier"}, {Opt_fastboot, "fastboot"}, {Opt_extent_cache, "extent_cache"}, {Opt_noextent_cache, "noextent_cache"}, {Opt_noinline_data, "noinline_data"}, {Opt_data_flush, "data_flush"}, + {Opt_mode, "mode=%s"}, {Opt_fault_injection, "fault_injection=%u"}, + {Opt_lazytime, "lazytime"}, + {Opt_nolazytime, "nolazytime"}, {Opt_err, NULL}, }; @@ -417,6 +428,8 @@ static int parse_options(struct super_block *sb, char *options) "the device does not support discard"); } break; + case Opt_nodiscard: + clear_opt(sbi, DISCARD); case Opt_noheap: set_opt(sbi, NOHEAP); break; @@ -478,6 +491,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; + case Opt_noflush_merge: + clear_opt(sbi, FLUSH_MERGE); + break; case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; @@ -496,6 +512,23 @@ static int parse_options(struct super_block *sb, char *options) case Opt_data_flush: set_opt(sbi, DATA_FLUSH); break; + case Opt_mode: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (strlen(name) == 8 && + !strncmp(name, "adaptive", 8)) { + set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + } else if (strlen(name) == 3 && + !strncmp(name, "lfs", 3)) { + set_opt_mode(sbi, F2FS_MOUNT_LFS); + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; case Opt_fault_injection: if (args->from && match_int(args, &arg)) return -EINVAL; @@ -506,6 +539,12 @@ static int parse_options(struct super_block *sb, char *options) "FAULT_INJECTION was not selected"); #endif break; + case Opt_lazytime: + sb->s_flags |= MS_LAZYTIME; + break; + case Opt_nolazytime: + sb->s_flags &= ~MS_LAZYTIME; + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -537,13 +576,11 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); + INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - - set_inode_flag(fi, FI_NEW_INODE); - - if (test_opt(F2FS_SB(sb), INLINE_XATTR)) - set_inode_flag(fi, FI_INLINE_XATTR); + init_rwsem(&fi->dio_rwsem[READ]); + init_rwsem(&fi->dio_rwsem[WRITE]); /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -559,7 +596,7 @@ static int f2fs_drop_inode(struct inode *inode) * - f2fs_gc -> iput -> evict * - inode_wait_for_writeback(inode) */ - if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { + if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) { if (!inode->i_nlink && !is_bad_inode(inode)) { /* to avoid evict_inode call simultaneously */ atomic_inc(&inode->i_count); @@ -573,10 +610,10 @@ static int f2fs_drop_inode(struct inode *inode) f2fs_destroy_extent_node(inode); sb_start_intwrite(inode->i_sb); - i_size_write(inode, 0); + f2fs_i_size_write(inode, 0); if (F2FS_HAS_BLOCKS(inode)) - f2fs_truncate(inode, true); + f2fs_truncate(inode); sb_end_intwrite(inode->i_sb); @@ -586,9 +623,47 @@ static int f2fs_drop_inode(struct inode *inode) } return 0; } + return generic_drop_inode(inode); } +int f2fs_inode_dirtied(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return 1; + } + + set_inode_flag(inode, FI_DIRTY_INODE); + list_add_tail(&F2FS_I(inode)->gdirty_list, + &sbi->inode_list[DIRTY_META]); + inc_page_count(sbi, F2FS_DIRTY_IMETA); + stat_inc_dirty_inode(sbi, DIRTY_META); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + + return 0; +} + +void f2fs_inode_synced(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return; + } + list_del_init(&F2FS_I(inode)->gdirty_list); + clear_inode_flag(inode, FI_DIRTY_INODE); + clear_inode_flag(inode, FI_AUTO_RECOVER); + dec_page_count(sbi, F2FS_DIRTY_IMETA); + stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); + spin_unlock(&sbi->inode_lock[DIRTY_META]); +} + /* * f2fs_dirty_inode() is called from __mark_inode_dirty() * @@ -596,7 +671,19 @@ static int f2fs_drop_inode(struct inode *inode) */ static void f2fs_dirty_inode(struct inode *inode, int flags) { - set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return; + + if (flags == I_DIRTY_TIME) + return; + + if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) + clear_inode_flag(inode, FI_AUTO_RECOVER); + + f2fs_inode_dirtied(inode); } static void f2fs_i_callback(struct rcu_head *head) @@ -619,6 +706,8 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi) percpu_counter_destroy(&sbi->nr_pages[i]); percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); + + percpu_free_rwsem(&sbi->cp_rwsem); } static void f2fs_put_super(struct super_block *sb) @@ -738,7 +827,7 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bsize = sbi->blocksize; buf->f_blocks = total_count - start_count; - buf->f_bfree = buf->f_blocks - valid_user_blocks(sbi) - ovp_count; + buf->f_bfree = user_block_count - valid_user_blocks(sbi) + ovp_count; buf->f_bavail = user_block_count - valid_user_blocks(sbi); buf->f_files = sbi->total_node_count - F2FS_RESERVED_NODE_NUM; @@ -803,6 +892,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noextent_cache"); if (test_opt(sbi, DATA_FLUSH)) seq_puts(seq, ",data_flush"); + + seq_puts(seq, ",mode="); + if (test_opt(sbi, ADAPTIVE)) + seq_puts(seq, "adaptive"); + else if (test_opt(sbi, LFS)) + seq_puts(seq, "lfs"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; @@ -884,6 +979,14 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, BG_GC); set_opt(sbi, INLINE_DATA); set_opt(sbi, EXTENT_CACHE); + sbi->sb->s_flags |= MS_LAZYTIME; + set_opt(sbi, FLUSH_MERGE); + if (f2fs_sb_mounted_hmsmr(sbi->sb)) { + set_opt_mode(sbi, F2FS_MOUNT_LFS); + set_opt(sbi, DISCARD); + } else { + set_opt_mode(sbi, F2FS_MOUNT_ADAPTIVE); + } #ifdef CONFIG_F2FS_FS_XATTR set_opt(sbi, XATTR_USER); @@ -1367,6 +1470,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); + mutex_init(&sbi->wio_mutex[NODE]); + mutex_init(&sbi->wio_mutex[DATA]); #ifdef CONFIG_F2FS_FS_ENCRYPTION memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, @@ -1379,6 +1484,9 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) { int i, err; + if (percpu_init_rwsem(&sbi->cp_rwsem)) + return -ENOMEM; + for (i = 0; i < NR_COUNT_TYPE; i++) { err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); if (err) @@ -1530,6 +1638,8 @@ try_onemore: goto free_sbi; sb->s_fs_info = sbi; + sbi->raw_super = raw_super; + default_options(sbi); /* parse mount options */ options = kstrdup((const char *)data, GFP_KERNEL); @@ -1559,10 +1669,8 @@ try_onemore: memcpy(sb->s_uuid, raw_super->uuid, sizeof(raw_super->uuid)); /* init f2fs-specific super block info */ - sbi->raw_super = raw_super; sbi->valid_super_block = valid_super_block; mutex_init(&sbi->gc_mutex); - mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); @@ -1579,7 +1687,6 @@ try_onemore: sbi->write_io[i].bio = NULL; } - init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); @@ -1762,6 +1869,7 @@ try_onemore: return 0; free_kobj: + f2fs_sync_inode_meta(sbi); kobject_del(&sbi->s_kobj); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index e3decae3acfb..c8898b5148eb 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; - mark_inode_dirty(inode); + f2fs_mark_inode_dirty_sync(inode); return 0; } @@ -299,6 +299,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (ipage) { inline_addr = inline_xattr_addr(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); + set_page_dirty(ipage); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -441,13 +442,12 @@ static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, struct page *ipage, int flags) { - struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; void *base_addr; int found, newsize; size_t len; __u32 new_hsize; - int error = -ENOMEM; + int error = 0; if (name == NULL) return -EINVAL; @@ -465,7 +465,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, base_addr = read_all_xattrs(inode, ipage); if (!base_addr) - goto exit; + return -ENOMEM; /* find entry with wanted name. */ here = __find_xattr(base_addr, index, len, name); @@ -539,19 +539,15 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (error) goto exit; - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - inode->i_mode = fi->i_acl_mode; + if (is_inode_flag_set(inode, FI_ACL_MODE)) { + inode->i_mode = F2FS_I(inode)->i_acl_mode; inode->i_ctime = CURRENT_TIME; - clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(inode, FI_ACL_MODE); } if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - - if (ipage) - update_inode(inode, ipage); - else - update_inode_page(inode); + f2fs_mark_inode_dirty_sync(inode); exit: kzfree(base_addr); return error; |