From 91942321e4c9f8460f260cdfcf0a7a48a73a84a4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 10:13:22 -0700 Subject: f2fs: use inode pointer for {set, clear}_inode_flag This patch refactors to use inode pointer for set_inode_flag and clear_inode_flag. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1f21aae80c40..8001020f7762 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1019,7 +1019,7 @@ struct page *new_node_page(struct dnode_of_data *dn, struct page *page; int err; - if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) + if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return ERR_PTR(-EPERM); page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false); @@ -1955,7 +1955,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page) ri = F2FS_INODE(page); if (!(ri->i_inline & F2FS_INLINE_XATTR)) { - clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR); + clear_inode_flag(inode, FI_INLINE_XATTR); goto update_inode; } -- cgit v1.2.3 From 205b98221cdf72b1cbdedf55f93d193999616e6e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 09:52:20 -0700 Subject: f2fs: call mark_inode_dirty_sync for i_field changes This patch calls mark_inode_dirty_sync() for the following on-disk inode changes. -> largest -> ctime/mtime/atime -> i_current_depth -> i_xattr_nid -> i_pino -> i_advise -> i_flags -> i_mode Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 ++ fs/f2fs/dir.c | 14 ++++++------ fs/f2fs/extent_cache.c | 24 ++++++++++++--------- fs/f2fs/f2fs.h | 58 +++++++++++++++++++++++++++++++++++++++++++------- fs/f2fs/file.c | 12 +++++------ fs/f2fs/inline.c | 7 +++--- fs/f2fs/inode.c | 1 + fs/f2fs/namei.c | 11 ++++------ fs/f2fs/node.c | 6 +++--- fs/f2fs/xattr.c | 3 ++- 10 files changed, 92 insertions(+), 46 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 1b2c20228300..6a414e75e705 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -384,6 +384,8 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, if (error) return error; + mark_inode_dirty_sync(inode); + if (default_acl) { error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl, ipage); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index f8ca0f31271f..384d51cb77bf 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -243,8 +243,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, "Corrupted max_depth of %lu: %u", dir->i_ino, max_depth); max_depth = MAX_DIR_HASH_DEPTH; - F2FS_I(dir)->i_current_depth = max_depth; - mark_inode_dirty(dir); + f2fs_i_depth_write(dir, max_depth); } for (level = 0; level < max_depth; level++) { @@ -303,9 +302,9 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, set_de_type(de, inode->i_mode); f2fs_dentry_kunmap(dir, page); set_page_dirty(page); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty(dir); + dir->i_mtime = dir->i_ctime = CURRENT_TIME; + mark_inode_dirty_sync(dir); f2fs_put_page(page, 1); } @@ -462,10 +461,10 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; - mark_inode_dirty(dir); + mark_inode_dirty_sync(dir); if (F2FS_I(dir)->i_current_depth != current_depth) { - F2FS_I(dir)->i_current_depth = current_depth; + f2fs_i_depth_write(dir, current_depth); set_inode_flag(dir, FI_UPDATE_DIR); } @@ -597,7 +596,7 @@ add_dentry: if (inode) { /* we don't need to mark_inode_dirty now */ - F2FS_I(inode)->i_pino = dir->i_ino; + f2fs_i_pino_write(inode, dir->i_ino); update_inode(inode, page); f2fs_put_page(page, 1); } @@ -730,6 +729,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; + mark_inode_dirty_sync(dir); if (inode) f2fs_drop_nlink(dir, inode, NULL); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 852a0b6f0600..d21dda607bf2 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -170,8 +170,10 @@ static void __drop_largest_extent(struct inode *inode, { struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest; - if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) + if (fofs < largest->fofs + largest->len && fofs + len > largest->fofs) { largest->len = 0; + mark_inode_dirty_sync(inode); + } } /* return true, if inode page is changed */ @@ -335,11 +337,12 @@ lookup_neighbors: return en; } -static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, +static struct extent_node *__try_merge_extent_node(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, struct extent_node *next_ex) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_node *en = NULL; if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) { @@ -360,7 +363,7 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, if (!en) return NULL; - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); spin_lock(&sbi->extent_lock); if (!list_empty(&en->list)) { @@ -371,11 +374,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi, return en; } -static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, +static struct extent_node *__insert_extent_tree(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct rb_node **insert_p, struct rb_node *insert_parent) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct rb_node **p = &et->root.rb_node; struct rb_node *parent = NULL; struct extent_node *en = NULL; @@ -402,7 +406,7 @@ do_insert: if (!en) return NULL; - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); /* update in global extent list */ spin_lock(&sbi->extent_lock); @@ -473,7 +477,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, set_extent_info(&ei, end, end - dei.fofs + dei.blk, org_end - end); - en1 = __insert_extent_tree(sbi, et, &ei, + en1 = __insert_extent_tree(inode, et, &ei, NULL, NULL); next_en = en1; } else { @@ -494,7 +498,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, } if (parts) - __try_update_largest_extent(et, en); + __try_update_largest_extent(inode, et, en); else __release_extent_node(sbi, et, en); @@ -514,15 +518,15 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode, if (blkaddr) { set_extent_info(&ei, fofs, blkaddr, len); - if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en)) - __insert_extent_tree(sbi, et, &ei, + if (!__try_merge_extent_node(inode, et, &ei, prev_en, next_en)) + __insert_extent_tree(inode, et, &ei, insert_p, insert_parent); /* give up extent_cache, if split and small updates happen */ if (dei.len >= 1 && prev.len < F2FS_MIN_EXTENT_LEN && et->largest.len < F2FS_MIN_EXTENT_LEN) { - et->largest.len = 0; + __drop_largest_extent(inode, 0, UINT_MAX); set_inode_flag(inode, FI_NO_EXTENT); } } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d98aaf3829a6..0534d7a18b35 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -498,11 +498,13 @@ static inline bool __is_front_mergeable(struct extent_info *cur, return __is_extent_mergeable(cur, front); } -static inline void __try_update_largest_extent(struct extent_tree *et, - struct extent_node *en) +static inline void __try_update_largest_extent(struct inode *inode, + struct extent_tree *et, struct extent_node *en) { - if (en->ei.len > et->largest.len) + if (en->ei.len > et->largest.len) { et->largest = en->ei; + mark_inode_dirty_sync(inode); + } } struct f2fs_nm_info { @@ -1534,10 +1536,26 @@ enum { FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ }; +static inline void __mark_inode_dirty_flag(struct inode *inode, + int flag, bool set) +{ + switch (flag) { + case FI_INLINE_XATTR: + case FI_INLINE_DATA: + case FI_INLINE_DENTRY: + if (set) + return; + case FI_DATA_EXIST: + case FI_INLINE_DOTS: + mark_inode_dirty_sync(inode); + } +} + static inline void set_inode_flag(struct inode *inode, int flag) { if (!test_bit(flag, &F2FS_I(inode)->flags)) set_bit(flag, &F2FS_I(inode)->flags); + __mark_inode_dirty_flag(inode, flag, true); } static inline int is_inode_flag_set(struct inode *inode, int flag) @@ -1549,12 +1567,14 @@ static inline void clear_inode_flag(struct inode *inode, int flag) { if (test_bit(flag, &F2FS_I(inode)->flags)) clear_bit(flag, &F2FS_I(inode)->flags); + __mark_inode_dirty_flag(inode, flag, false); } static inline void set_acl_inode(struct inode *inode, umode_t mode) { F2FS_I(inode)->i_acl_mode = mode; set_inode_flag(inode, FI_ACL_MODE); + mark_inode_dirty_sync(inode); } static inline void f2fs_i_links_write(struct inode *inode, bool inc) @@ -1583,18 +1603,38 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) mark_inode_dirty_sync(inode); } +static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) +{ + F2FS_I(inode)->i_current_depth = depth; + mark_inode_dirty_sync(inode); +} + +static inline void f2fs_i_xnid_write(struct inode *inode, nid_t xnid) +{ + F2FS_I(inode)->i_xattr_nid = xnid; + mark_inode_dirty_sync(inode); +} + +static inline void f2fs_i_pino_write(struct inode *inode, nid_t pino) +{ + F2FS_I(inode)->i_pino = pino; + mark_inode_dirty_sync(inode); +} + static inline void get_inline_info(struct inode *inode, struct f2fs_inode *ri) { + struct f2fs_inode_info *fi = F2FS_I(inode); + if (ri->i_inline & F2FS_INLINE_XATTR) - set_inode_flag(inode, FI_INLINE_XATTR); + set_bit(FI_INLINE_XATTR, &fi->flags); if (ri->i_inline & F2FS_INLINE_DATA) - set_inode_flag(inode, FI_INLINE_DATA); + set_bit(FI_INLINE_DATA, &fi->flags); if (ri->i_inline & F2FS_INLINE_DENTRY) - set_inode_flag(inode, FI_INLINE_DENTRY); + set_bit(FI_INLINE_DENTRY, &fi->flags); if (ri->i_inline & F2FS_DATA_EXIST) - set_inode_flag(inode, FI_DATA_EXIST); + set_bit(FI_DATA_EXIST, &fi->flags); if (ri->i_inline & F2FS_INLINE_DOTS) - set_inode_flag(inode, FI_INLINE_DOTS); + set_bit(FI_INLINE_DOTS, &fi->flags); } static inline void set_raw_inline(struct inode *inode, struct f2fs_inode *ri) @@ -1706,11 +1746,13 @@ static inline int is_file(struct inode *inode, int type) static inline void set_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise |= type; + mark_inode_dirty_sync(inode); } static inline void clear_file(struct inode *inode, int type) { F2FS_I(inode)->i_advise &= ~type; + mark_inode_dirty_sync(inode); } static inline int f2fs_readonly(struct super_block *sb) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d0f42587309f..c5606b1e1a89 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -171,11 +171,10 @@ static void try_to_fix_pino(struct inode *inode) fi->xattr_ver = 0; if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { - fi->i_pino = pino; + f2fs_i_pino_write(inode, pino); file_got_pino(inode); up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); } else { up_write(&fi->i_sem); @@ -636,7 +635,7 @@ int f2fs_truncate(struct inode *inode, bool lock) return err; inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); return 0; } @@ -726,7 +725,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) } } - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); return err; } @@ -1279,7 +1278,7 @@ static long f2fs_fallocate(struct file *file, int mode, if (!ret) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); } @@ -1370,9 +1369,8 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) fi->i_flags = flags; inode_unlock(inode); - f2fs_set_inode_flags(inode); inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(inode); + f2fs_set_inode_flags(inode); out: mnt_drop_write_file(filp); return ret; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 7a9bc442dd77..4bc025c29f82 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -400,7 +400,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - F2FS_I(dir)->i_current_depth = 1; + f2fs_i_depth_write(dir, 1); if (i_size_read(dir) < PAGE_SIZE) { f2fs_i_size_write(dir, PAGE_SIZE); set_inode_flag(dir, FI_UPDATE_DIR); @@ -492,7 +492,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, recover: lock_page(ipage); memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); - F2FS_I(dir)->i_current_depth = 0; + f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA); update_inode(dir, ipage); f2fs_put_page(ipage, 1); @@ -558,7 +558,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, /* we don't need to mark_inode_dirty now */ if (inode) { - F2FS_I(inode)->i_pino = dir->i_ino; + f2fs_i_pino_write(inode, dir->i_ino); update_inode(inode, page); f2fs_put_page(page, 1); } @@ -597,6 +597,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, set_page_dirty(page); dir->i_ctime = dir->i_mtime = CURRENT_TIME; + mark_inode_dirty_sync(dir); if (inode) f2fs_drop_nlink(dir, inode, page); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 62d8c9052f9b..34aa0949e48c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -35,6 +35,7 @@ void f2fs_set_inode_flags(struct inode *inode) new_fl |= S_DIRSYNC; inode_set_flags(inode, new_fl, S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); + mark_inode_dirty_sync(inode); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index af7c75ab9343..f2b2c4068648 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -76,7 +76,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) stat_inc_inline_dir(inode); trace_f2fs_new_inode(inode, 0); - mark_inode_dirty(inode); return inode; fail: @@ -247,10 +246,8 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) err = __f2fs_add_link(dir, &dotdot, NULL, pino, S_IFDIR); } out: - if (!err) { + if (!err) clear_inode_flag(dir, FI_INLINE_DOTS); - mark_inode_dirty(dir); - } f2fs_unlock_op(sbi); return err; @@ -756,7 +753,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = CURRENT_TIME; - mark_inode_dirty(old_inode); + mark_inode_dirty_sync(old_inode); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); @@ -910,7 +907,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(old_dir, old_nlink > 0); up_write(&F2FS_I(old_dir)->i_sem); } - mark_inode_dirty(old_dir); + mark_inode_dirty_sync(old_dir); update_inode_page(old_dir); /* update directory entry info of new dir inode */ @@ -928,7 +925,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(new_dir, new_nlink > 0); up_write(&F2FS_I(new_dir)->i_sem); } - mark_inode_dirty(new_dir); + mark_inode_dirty_sync(new_dir); update_inode_page(new_dir); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8001020f7762..0635304c50ac 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -953,7 +953,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page) if (IS_ERR(npage)) return PTR_ERR(npage); - F2FS_I(inode)->i_xattr_nid = 0; + f2fs_i_xnid_write(inode, 0); /* need to do checkpoint during fsync */ F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi)); @@ -1047,7 +1047,7 @@ struct page *new_node_page(struct dnode_of_data *dn, dn->node_changed = true; if (f2fs_has_xattr_block(ofs)) - F2FS_I(dn->inode)->i_xattr_nid = dn->nid; + f2fs_i_xnid_write(dn->inode, dn->nid); dn->node_page = page; if (ipage) @@ -1997,7 +1997,7 @@ recover_xnid: get_node_info(sbi, new_xnid, &ni); ni.ino = inode->i_ino; set_node_addr(sbi, &ni, NEW_ADDR, false); - F2FS_I(inode)->i_xattr_nid = new_xnid; + f2fs_i_xnid_write(inode, new_xnid); /* 3: update xattr blkaddr */ refresh_sit_entry(sbi, NEW_ADDR, blkaddr); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ca12d4b051f7..1fe6366a60c1 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -106,7 +106,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler, return -EINVAL; F2FS_I(inode)->i_advise |= *(char *)value; - mark_inode_dirty(inode); + mark_inode_dirty_sync(inode); return 0; } @@ -551,6 +551,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, update_inode(inode, ipage); else update_inode_page(inode); + mark_inode_dirty_sync(inode); exit: kzfree(base_addr); return error; -- cgit v1.2.3 From 0f18b462b2e5aff64b8638e8a47284b907351ef3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 11:10:10 -0700 Subject: f2fs: flush inode metadata when checkpoint is doing This patch registers all the inodes which have dirty metadata to sync when checkpoint is doing. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 36 +++++++++++++++++++++++++++++++++++ fs/f2fs/debug.c | 5 +++-- fs/f2fs/f2fs.h | 9 +++++++-- fs/f2fs/inode.c | 7 ++++++- fs/f2fs/node.c | 1 + fs/f2fs/segment.h | 2 ++ fs/f2fs/super.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 107 insertions(+), 7 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 02e0522beccf..5ddd15cd6c6c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -859,6 +859,34 @@ retry: goto retry; } +int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) +{ + struct list_head *head = &sbi->inode_list[DIRTY_META]; + struct inode *inode; + struct f2fs_inode_info *fi; + s64 total = get_pages(sbi, F2FS_DIRTY_IMETA); + + while (total--) { + if (unlikely(f2fs_cp_error(sbi))) + return -EIO; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (list_empty(head)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return 0; + } + fi = list_entry(head->next, struct f2fs_inode_info, + gdirty_list); + inode = igrab(&fi->vfs_inode); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + if (inode) { + update_inode_page(inode); + iput(inode); + } + }; + return 0; +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -885,6 +913,14 @@ retry_flush_dents: goto retry_flush_dents; } + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + f2fs_unlock_all(sbi); + err = f2fs_sync_inode_meta(sbi); + if (err) + goto out; + goto retry_flush_dents; + } + /* * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index d89a425055d0..badd407bb622 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -47,6 +47,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; + si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->wb_bios = atomic_read(&sbi->nr_wb_bios); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; @@ -304,8 +305,8 @@ static int stat_show(struct seq_file *s, void *v) si->inmem_pages, si->wb_bios); seq_printf(s, " - nodes: %4lld in %4d\n", si->ndirty_node, si->node_pages); - seq_printf(s, " - dents: %4lld in dirs:%4d\n", - si->ndirty_dent, si->ndirty_dirs); + seq_printf(s, " - dents: %4lld in dirs:%4d (%4d)\n", + si->ndirty_dent, si->ndirty_dirs, si->ndirty_all); seq_printf(s, " - datas: %4lld in files:%4d\n", si->ndirty_data, si->ndirty_files); seq_printf(s, " - meta: %4lld in %4d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0534d7a18b35..b541164ce5af 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -442,7 +442,8 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ - struct list_head dirty_list; /* linked in global dirty list */ + struct list_head dirty_list; /* dirty list for dirs and files */ + struct list_head gdirty_list; /* linked in global dirty list */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ struct extent_tree *extent_tree; /* cached extent_tree entry */ @@ -657,6 +658,7 @@ enum count_type { F2FS_DIRTY_NODES, F2FS_DIRTY_META, F2FS_INMEM_PAGES, + F2FS_DIRTY_IMETA, NR_COUNT_TYPE, }; @@ -707,6 +709,7 @@ struct f2fs_bio_info { enum inode_type { DIR_INODE, /* for dirty dir inode */ FILE_INODE, /* for dirty regular/symlink inode */ + DIRTY_META, /* for all dirtied inode metadata */ NR_INODE_TYPE, }; @@ -1899,6 +1902,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) /* * super.c */ +void f2fs_inode_synced(struct inode *); int f2fs_commit_super(struct f2fs_sb_info *, bool); int f2fs_sync_fs(struct super_block *, int); extern __printf(3, 4) @@ -2010,6 +2014,7 @@ void add_ino_entry(struct f2fs_sb_info *, nid_t, int type); void remove_ino_entry(struct f2fs_sb_info *, nid_t, int type); void release_ino_entry(struct f2fs_sb_info *, bool); bool exist_written_data(struct f2fs_sb_info *, nid_t, int); +int f2fs_sync_inode_meta(struct f2fs_sb_info *); int acquire_orphan_inode(struct f2fs_sb_info *); void release_orphan_inode(struct f2fs_sb_info *); void add_orphan_inode(struct f2fs_sb_info *, nid_t); @@ -2078,7 +2083,7 @@ struct f2fs_stat_info { unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages; - unsigned int ndirty_dirs, ndirty_files; + unsigned int ndirty_dirs, ndirty_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; int bg_gc, wb_bios; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 34aa0949e48c..2d892b6d5632 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -262,7 +262,7 @@ int update_inode(struct inode *inode, struct page *node_page) __set_inode_rdev(inode, ri); set_cold_node(inode, node_page); - clear_inode_flag(inode, FI_DIRTY_INODE); + f2fs_inode_synced(inode); /* deleted inode */ if (inode->i_nlink == 0) @@ -286,6 +286,7 @@ retry: } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } + f2fs_inode_synced(inode); return 0; } ret = update_inode(inode, node_page); @@ -360,6 +361,8 @@ retry: goto retry; } + if (err) + update_inode_page(inode); sb_end_intwrite(inode->i_sb); no_delete: stat_dec_inline_xattr(inode); @@ -381,6 +384,8 @@ no_delete: !exist_written_data(sbi, inode->i_ino, ORPHAN_INO)); out_clear: fscrypt_put_encryption_info(inode, NULL); + + f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE)); clear_inode(inode); } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 0635304c50ac..1965351b644c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -670,6 +670,7 @@ static void truncate_node(struct dnode_of_data *dn) if (dn->nid == dn->inode->i_ino) { remove_orphan_inode(sbi, dn->nid); dec_valid_inode_count(sbi); + f2fs_inode_synced(dn->inode); } else { sync_inode_page(dn); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index fcdd7310b961..5d016a1edf21 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -479,6 +479,8 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + node_secs += get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index d832bf4051b5..b5144b81e4c4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -537,6 +537,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; init_rwsem(&fi->i_sem); INIT_LIST_HEAD(&fi->dirty_list); + INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); @@ -547,6 +548,8 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) static int f2fs_drop_inode(struct inode *inode) { + int ret; + /* * This is to avoid a deadlock condition like below. * writeback_single_inode(inode) @@ -554,7 +557,7 @@ static int f2fs_drop_inode(struct inode *inode) * - f2fs_gc -> iput -> evict * - inode_wait_for_writeback(inode) */ - if (!inode_unhashed(inode) && inode->i_state & I_SYNC) { + if ((!inode_unhashed(inode) && inode->i_state & I_SYNC)) { if (!inode->i_nlink && !is_bad_inode(inode)) { /* to avoid evict_inode call simultaneously */ atomic_inc(&inode->i_count); @@ -581,7 +584,20 @@ static int f2fs_drop_inode(struct inode *inode) } return 0; } - return generic_drop_inode(inode); + + ret = generic_drop_inode(inode); + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + if (ret) + inode->i_state |= I_WILL_FREE; + spin_unlock(&inode->i_lock); + + update_inode_page(inode); + + spin_lock(&inode->i_lock); + if (ret) + inode->i_state &= ~I_WILL_FREE; + } + return ret; } /* @@ -591,7 +607,40 @@ static int f2fs_drop_inode(struct inode *inode) */ static void f2fs_dirty_inode(struct inode *inode, int flags) { + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + if (inode->i_ino == F2FS_NODE_INO(sbi) || + inode->i_ino == F2FS_META_INO(sbi)) + return; + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return; + } + set_inode_flag(inode, FI_DIRTY_INODE); + list_add_tail(&F2FS_I(inode)->gdirty_list, + &sbi->inode_list[DIRTY_META]); + inc_page_count(sbi, F2FS_DIRTY_IMETA); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + stat_inc_dirty_inode(sbi, DIRTY_META); +} + +void f2fs_inode_synced(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + spin_lock(&sbi->inode_lock[DIRTY_META]); + if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) { + spin_unlock(&sbi->inode_lock[DIRTY_META]); + return; + } + list_del_init(&F2FS_I(inode)->gdirty_list); + clear_inode_flag(inode, FI_DIRTY_INODE); + dec_page_count(sbi, F2FS_DIRTY_IMETA); + spin_unlock(&sbi->inode_lock[DIRTY_META]); + stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); } static void f2fs_i_callback(struct rcu_head *head) @@ -1757,6 +1806,7 @@ try_onemore: return 0; free_kobj: + f2fs_sync_inode_meta(sbi); kobject_del(&sbi->s_kobj); kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); -- cgit v1.2.3 From ee6d182f2a19d5d44607b5ae4bec523726d76a99 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 16:32:49 -0700 Subject: f2fs: remove syncing inode page in all the cases This patch reduces to call them across the whole tree. - sync_inode_page() - update_inode_page() - update_inode() - f2fs_write_inode() Instead, checkpoint will flush all the dirty inode metadata before syncing node pages. Note that, this is doable, since we call mark_inode_dirty_sync() for all inode's field change which needs to update on-disk inode as well. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 11 +---------- fs/f2fs/dir.c | 24 +++--------------------- fs/f2fs/extent_cache.c | 7 ++----- fs/f2fs/f2fs.h | 2 -- fs/f2fs/file.c | 15 +++------------ fs/f2fs/inline.c | 27 ++++++--------------------- fs/f2fs/namei.c | 15 +-------------- fs/f2fs/node.c | 28 ---------------------------- fs/f2fs/recovery.c | 3 --- fs/f2fs/xattr.c | 6 +----- 10 files changed, 17 insertions(+), 121 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6a4c60c2fd3a..a3dea51f4702 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -343,8 +343,6 @@ int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) if (set_page_dirty(dn->node_page)) dn->node_changed = true; - - sync_inode_page(dn); return 0; } @@ -562,11 +560,8 @@ struct page *get_new_data_page(struct inode *inode, } got_it: if (new_i_size && i_size_read(inode) < - ((loff_t)(index + 1) << PAGE_SHIFT)) { + ((loff_t)(index + 1) << PAGE_SHIFT)) f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); - /* Only the directory inode sets new_i_size */ - set_inode_flag(inode, FI_UPDATE_DIR); - } return page; } @@ -787,8 +782,6 @@ skip: else if (dn.ofs_in_node < end_offset) goto next_block; - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); if (create) { @@ -799,8 +792,6 @@ skip: goto next_dnode; sync_out: - if (allocated) - sync_inode_page(&dn); f2fs_put_dnode(&dn); unlock_out: if (create) { diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 384d51cb77bf..24d1308838b5 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -454,19 +454,15 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (inode && is_inode_flag_set(inode, FI_NEW_INODE)) { - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, true); - set_inode_flag(dir, FI_UPDATE_DIR); - } clear_inode_flag(inode, FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty_sync(dir); - if (F2FS_I(dir)->i_current_depth != current_depth) { + if (F2FS_I(dir)->i_current_depth != current_depth) f2fs_i_depth_write(dir, current_depth); - set_inode_flag(dir, FI_UPDATE_DIR); - } if (inode && is_inode_flag_set(inode, FI_INC_LINK)) clear_inode_flag(inode, FI_INC_LINK); @@ -595,9 +591,7 @@ add_dentry: set_page_dirty(dentry_page); if (inode) { - /* we don't need to mark_inode_dirty now */ f2fs_i_pino_write(inode, dir->i_ino); - update_inode(inode, page); f2fs_put_page(page, 1); } @@ -606,10 +600,6 @@ fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - if (is_inode_flag_set(dir, FI_UPDATE_DIR)) { - update_inode_page(dir); - clear_inode_flag(dir, FI_UPDATE_DIR); - } kunmap(dentry_page); f2fs_put_page(dentry_page, 1); @@ -656,8 +646,6 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) err = PTR_ERR(page); goto fail; } - /* we don't need to mark_inode_dirty now */ - update_inode(inode, page); f2fs_put_page(page, 1); clear_inode_flag(inode, FI_NEW_INODE); @@ -673,13 +661,8 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) down_write(&F2FS_I(inode)->i_sem); - if (S_ISDIR(inode->i_mode)) { + if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); - if (page) - update_inode(dir, page); - else - update_inode_page(dir); - } inode->i_ctime = CURRENT_TIME; f2fs_i_links_write(inode, false); @@ -688,7 +671,6 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) f2fs_i_size_write(inode, 0); } up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); if (inode->i_nlink == 0) add_orphan_inode(sbi, inode->i_ino); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index d21dda607bf2..e858869d76cb 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -689,9 +689,7 @@ void f2fs_update_extent_cache(struct dnode_of_data *dn) fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) + dn->ofs_in_node; - - if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1)) - sync_inode_page(dn); + f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1); } void f2fs_update_extent_cache_range(struct dnode_of_data *dn, @@ -701,8 +699,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn, if (!f2fs_may_extent_tree(dn->inode)) return; - if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len)) - sync_inode_page(dn); + f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len); } void init_extent_cache_info(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b541164ce5af..2adef0e58461 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1521,7 +1521,6 @@ enum { FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ FI_FREE_NID, /* free allocated nide */ - FI_UPDATE_DIR, /* should update inode block for consistency */ FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ @@ -1936,7 +1935,6 @@ struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); -void sync_inode_page(struct dnode_of_data *); void move_node_page(struct page *, int); int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, bool); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c5606b1e1a89..73bc946974ad 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -173,12 +173,8 @@ static void try_to_fix_pino(struct inode *inode) get_parent_ino(inode, &pino)) { f2fs_i_pino_write(inode, pino); file_got_pino(inode); - up_write(&fi->i_sem); - - f2fs_write_inode(inode, NULL); - } else { - up_write(&fi->i_sem); } + up_write(&fi->i_sem); } static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, @@ -499,7 +495,6 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) dn->inode) + ofs; f2fs_update_extent_cache_range(dn, fofs, 0, len); dec_valid_block_count(sbi, dn->inode, nr_free); - sync_inode_page(dn); } dn->ofs_in_node = ofs; @@ -1123,10 +1118,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, } out: - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); - update_inode_page(inode); - } return ret; } @@ -1232,10 +1225,8 @@ static int expand_inode_data(struct inode *inode, loff_t offset, new_size = ((loff_t)pg_end << PAGE_SHIFT) + off_end; } - if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) { + if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) f2fs_i_size_write(inode, new_size); - update_inode_page(inode); - } return ret; } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4bc025c29f82..77c9c2439993 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -73,7 +73,7 @@ bool truncate_inline_inode(struct page *ipage, u64 from) f2fs_wait_on_page_writeback(ipage, NODE, true); memset(addr + from, 0, MAX_INLINE_DATA - from); - + set_page_dirty(ipage); return true; } @@ -146,7 +146,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) clear_out: stat_dec_inline_inode(dn->inode); f2fs_clear_inline_inode(dn->inode); - sync_inode_page(dn); f2fs_put_dnode(dn); return 0; } @@ -212,11 +211,11 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page) dst_addr = inline_data_addr(dn.inode_page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); + set_page_dirty(dn.inode_page); set_inode_flag(inode, FI_APPEND_WRITE); set_inode_flag(inode, FI_DATA_EXIST); - sync_inode_page(&dn); clear_inline_node(dn.inode_page); f2fs_put_dnode(&dn); return 0; @@ -255,7 +254,7 @@ process_inline: set_inode_flag(inode, FI_INLINE_DATA); set_inode_flag(inode, FI_DATA_EXIST); - update_inode(inode, ipage); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); return true; } @@ -266,7 +265,6 @@ process_inline: if (!truncate_inline_inode(ipage, 0)) return false; f2fs_clear_inline_inode(inode); - update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { if (truncate_blocks(inode, 0, false)) @@ -339,10 +337,8 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, set_page_dirty(ipage); /* update i_size to MAX_INLINE_DATA */ - if (i_size_read(inode) < MAX_INLINE_DATA) { + if (i_size_read(inode) < MAX_INLINE_DATA) f2fs_i_size_write(inode, MAX_INLINE_DATA); - set_inode_flag(inode, FI_UPDATE_DIR); - } return 0; } @@ -401,12 +397,8 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, clear_inode_flag(dir, FI_INLINE_DENTRY); f2fs_i_depth_write(dir, 1); - if (i_size_read(dir) < PAGE_SIZE) { + if (i_size_read(dir) < PAGE_SIZE) f2fs_i_size_write(dir, PAGE_SIZE); - set_inode_flag(dir, FI_UPDATE_DIR); - } - - sync_inode_page(&dn); out: f2fs_put_page(page, 1); return err; @@ -486,7 +478,6 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); - update_inode(dir, ipage); kfree(backup_dentry); return 0; recover: @@ -494,7 +485,7 @@ recover: memcpy(inline_dentry, backup_dentry, MAX_INLINE_DATA); f2fs_i_depth_write(dir, 0); f2fs_i_size_write(dir, MAX_INLINE_DATA); - update_inode(dir, ipage); + set_page_dirty(ipage); f2fs_put_page(ipage, 1); kfree(backup_dentry); @@ -559,7 +550,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, /* we don't need to mark_inode_dirty now */ if (inode) { f2fs_i_pino_write(inode, dir->i_ino); - update_inode(inode, page); f2fs_put_page(page, 1); } @@ -567,11 +557,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, fail: if (inode) up_write(&F2FS_I(inode)->i_sem); - - if (is_inode_flag_set(dir, FI_UPDATE_DIR)) { - update_inode(dir, ipage); - clear_inode_flag(dir, FI_UPDATE_DIR); - } out: f2fs_put_page(ipage, 1); return err; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f2b2c4068648..496f4e3018b2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -706,9 +706,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, add_orphan_inode(sbi, new_inode->i_ino); else release_orphan_inode(sbi); - - update_inode_page(old_inode); - update_inode_page(new_inode); } else { f2fs_balance_fs(sbi, true); @@ -720,10 +717,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_whiteout; } - if (old_dir_entry) { + if (old_dir_entry) f2fs_i_links_write(new_dir, true); - update_inode_page(new_dir); - } /* * old entry and new entry can locate in the same inline @@ -771,13 +766,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (old_dir != new_dir && !whiteout) { f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); - update_inode_page(old_inode); } else { f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } f2fs_i_links_write(old_dir, false); - update_inode_page(old_dir); } f2fs_unlock_op(sbi); @@ -899,8 +892,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, file_lost_pino(old_inode); up_write(&F2FS_I(old_inode)->i_sem); - update_inode_page(old_inode); - old_dir->i_ctime = CURRENT_TIME; if (old_nlink) { down_write(&F2FS_I(old_dir)->i_sem); @@ -908,7 +899,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(old_dir)->i_sem); } mark_inode_dirty_sync(old_dir); - update_inode_page(old_dir); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); @@ -917,8 +907,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, file_lost_pino(new_inode); up_write(&F2FS_I(new_inode)->i_sem); - update_inode_page(new_inode); - new_dir->i_ctime = CURRENT_TIME; if (new_nlink) { down_write(&F2FS_I(new_dir)->i_sem); @@ -926,7 +914,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, up_write(&F2FS_I(new_dir)->i_sem); } mark_inode_dirty_sync(new_dir); - update_inode_page(new_dir); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 1965351b644c..82f0f833151e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -671,8 +671,6 @@ static void truncate_node(struct dnode_of_data *dn) remove_orphan_inode(sbi, dn->nid); dec_valid_inode_count(sbi); f2fs_inode_synced(dn->inode); - } else { - sync_inode_page(dn); } invalidate: clear_node_page_dirty(dn->node_page); @@ -1050,14 +1048,8 @@ struct page *new_node_page(struct dnode_of_data *dn, if (f2fs_has_xattr_block(ofs)) f2fs_i_xnid_write(dn->inode, dn->nid); - dn->node_page = page; - if (ipage) - update_inode(dn->inode, ipage); - else - sync_inode_page(dn); if (ofs == 0) inc_valid_inode_count(sbi); - return page; fail: @@ -1176,24 +1168,6 @@ struct page *get_node_page_ra(struct page *parent, int start) return __get_node_page(sbi, nid, parent, start); } -void sync_inode_page(struct dnode_of_data *dn) -{ - int ret = 0; - - if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) { - ret = update_inode(dn->inode, dn->node_page); - } else if (dn->inode_page) { - if (!dn->inode_page_locked) - lock_page(dn->inode_page); - ret = update_inode(dn->inode, dn->inode_page); - if (!dn->inode_page_locked) - unlock_page(dn->inode_page); - } else { - ret = update_inode_page(dn->inode); - } - dn->node_changed = ret ? true: false; -} - static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; @@ -2003,8 +1977,6 @@ recover_xnid: /* 3: update xattr blkaddr */ refresh_sit_entry(sbi, NEW_ADDR, blkaddr); set_node_addr(sbi, &ni, blkaddr, false); - - update_inode_page(inode); } int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2500b6a5daf0..68c433f17ab5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -490,9 +490,6 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } } - if (IS_INODE(dn.node_page)) - sync_inode_page(&dn); - copy_node_footer(dn.node_page, page); fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page), false); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 1fe6366a60c1..8c0a3b36a917 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -299,6 +299,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (ipage) { inline_addr = inline_xattr_addr(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); + set_page_dirty(ipage); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -546,11 +547,6 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (index == F2FS_XATTR_INDEX_ENCRYPTION && !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); - - if (ipage) - update_inode(inode, ipage); - else - update_inode_page(inode); mark_inode_dirty_sync(inode); exit: kzfree(base_addr); -- cgit v1.2.3 From 26de9b11713057a16a9220423a2f137774763b0e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 20 May 2016 20:42:37 -0700 Subject: f2fs: avoid unnecessary updating inode during fsync If roll-forward recovery can recover i_size, we don't need to update inode's metadata during fsync. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 23 +++++++++++++++++++++-- fs/f2fs/file.c | 4 ++-- fs/f2fs/inode.c | 3 +++ fs/f2fs/node.c | 9 +++++++-- fs/f2fs/recovery.c | 3 +++ fs/f2fs/super.c | 4 ++++ 7 files changed, 43 insertions(+), 6 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a3dea51f4702..287582e12f8f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1204,6 +1204,7 @@ static int f2fs_write_data_page(struct page *page, loff_t i_size = i_size_read(inode); const pgoff_t end_index = ((unsigned long long) i_size) >> PAGE_SHIFT; + loff_t psize = (page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; @@ -1265,6 +1266,8 @@ write: err = f2fs_write_inline_data(inode, page); if (err == -EAGAIN) err = do_write_data_page(&fio); + if (F2FS_I(inode)->last_disk_size < psize) + F2FS_I(inode)->last_disk_size = psize; f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2adef0e58461..bf1c8b0ef6c7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -441,6 +441,7 @@ struct f2fs_inode_info { unsigned int clevel; /* maximum level of given file name */ nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ + loff_t last_disk_size; /* lastly written file size */ struct list_head dirty_list; /* dirty list for dirs and files */ struct list_head gdirty_list; /* linked in global dirty list */ @@ -1516,6 +1517,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) enum { FI_NEW_INODE, /* indicate newly allocated inode */ FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_AUTO_RECOVER, /* indicate inode is recoverable */ FI_DIRTY_DIR, /* indicate directory has dirty pages */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ @@ -1591,18 +1593,35 @@ static inline void f2fs_i_links_write(struct inode *inode, bool inc) static inline void f2fs_i_blocks_write(struct inode *inode, blkcnt_t diff, bool add) { + bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); + bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); + inode->i_blocks = add ? inode->i_blocks + diff : inode->i_blocks - diff; mark_inode_dirty_sync(inode); + if (clean || recover) + set_inode_flag(inode, FI_AUTO_RECOVER); } static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size) { + bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE); + bool recover = is_inode_flag_set(inode, FI_AUTO_RECOVER); + if (i_size_read(inode) == i_size) return; i_size_write(inode, i_size); mark_inode_dirty_sync(inode); + if (clean || recover) + set_inode_flag(inode, FI_AUTO_RECOVER); +} + +static inline bool f2fs_skip_inode_update(struct inode *inode) +{ + if (!is_inode_flag_set(inode, FI_AUTO_RECOVER)) + return false; + return F2FS_I(inode)->last_disk_size == i_size_read(inode); } static inline void f2fs_i_depth_write(struct inode *inode, unsigned int depth) @@ -1936,8 +1955,8 @@ void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); void move_node_page(struct page *, int); -int fsync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *, - bool); +int fsync_node_pages(struct f2fs_sb_info *, struct inode *, + struct writeback_control *, bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 73bc946974ad..23decf050236 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -208,7 +208,7 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, } /* if the inode is dirty, let's recover all the time */ - if (!datasync) { + if (!datasync && !f2fs_skip_inode_update(inode)) { f2fs_write_inode(inode, NULL); goto go_write; } @@ -251,7 +251,7 @@ go_write: goto out; } sync_nodes: - ret = fsync_node_pages(sbi, ino, &wbc, atomic); + ret = fsync_node_pages(sbi, inode, &wbc, atomic); if (ret) goto out; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2d892b6d5632..bdd814db883e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -154,6 +154,9 @@ static int do_read_inode(struct inode *inode) if (__written_first_block(ri)) set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + if (!need_inode_block_update(sbi, inode->i_ino)) + fi->last_disk_size = inode->i_size; + f2fs_put_page(node_page, 1); stat_inc_inline_xattr(inode); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 82f0f833151e..641d60392bff 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1293,7 +1293,7 @@ continue_unlock: return last_page; } -int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, +int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct writeback_control *wbc, bool atomic) { pgoff_t index, end; @@ -1301,6 +1301,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino, int ret = 0; struct page *last_page = NULL; bool marked = false; + nid_t ino = inode->i_ino; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1354,9 +1355,13 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); - if (IS_INODE(page)) + if (IS_INODE(page)) { + if (is_inode_flag_set(inode, + FI_DIRTY_INODE)) + update_inode(inode, page); set_dentry_mark(page, need_dentry_mark(sbi, ino)); + } /* may be written by other thread */ if (!PageDirty(page)) set_page_dirty(page); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 68c433f17ab5..b568b28c74f2 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -455,6 +455,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, continue; } + if ((start + 1) << PAGE_SHIFT > i_size_read(inode)) + f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT); + /* * dest is reserved block, invalidate src block * and then reserve one new block in dnode page. diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b5144b81e4c4..6fa4ec8ea1f7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -613,6 +613,9 @@ static void f2fs_dirty_inode(struct inode *inode, int flags) inode->i_ino == F2FS_META_INO(sbi)) return; + if (is_inode_flag_set(inode, FI_AUTO_RECOVER)) + clear_inode_flag(inode, FI_AUTO_RECOVER); + spin_lock(&sbi->inode_lock[DIRTY_META]); if (is_inode_flag_set(inode, FI_DIRTY_INODE)) { spin_unlock(&sbi->inode_lock[DIRTY_META]); @@ -638,6 +641,7 @@ void f2fs_inode_synced(struct inode *inode) } list_del_init(&F2FS_I(inode)->gdirty_list); clear_inode_flag(inode, FI_DIRTY_INODE); + clear_inode_flag(inode, FI_AUTO_RECOVER); dec_page_count(sbi, F2FS_DIRTY_IMETA); spin_unlock(&sbi->inode_lock[DIRTY_META]); stat_dec_dirty_inode(F2FS_I_SB(inode), DIRTY_META); -- cgit v1.2.3 From 0c9df7fb80360802f241428be7104f79d7c0f4ee Mon Sep 17 00:00:00 2001 From: Yunlong Song Date: Thu, 26 May 2016 19:40:29 +0800 Subject: f2fs: return the errno to the caller to avoid using a wrong page Commit aaf9607516ed38825268515ef4d773289a44f429 ("f2fs: check node page contents all the time") pointed out that "sometimes it was reported that its contents was missing", so it checks the page's mapping and contents. When "nid != nid_of_node(page)", ERR_PTR(-EIO) will be returned to the caller. However, commit e1c51b9f1df2f9efc2ec11488717e40cd12015f9 ("f2fs: clean up node page updating flow") moves "nid != nid_of_node(page)" test to "f2fs_bug_on(sbi, nid != nid_of_node(page))", this will return a wrong page to the caller when F2FS_CHECK_FS is off when "sometimes it was reported that its contents was missing" happens. This patch restores to check node page contents all the time, and returns the errno to make the caller known something is wrong and avoid to use the page. This patch also moves f2fs_bug_on to its proper location. Signed-off-by: Yunlong Song Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 641d60392bff..16532b31dcd6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1142,16 +1142,21 @@ repeat: lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } + if (unlikely(!PageUptodate(page))) + goto out_err; + if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } page_hit: - f2fs_bug_on(sbi, nid != nid_of_node(page)); + if(unlikely(nid != nid_of_node(page))) { + f2fs_bug_on(sbi, 1); + ClearPageUptodate(page); +out_err: + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } return page; } -- cgit v1.2.3 From e589c2c477b44e06754508a4e8b883e5ae7294aa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 2 Jun 2016 15:24:24 -0700 Subject: f2fs: control not to exceed # of cached nat entries This is to avoid cache entry management overhead including radix tree. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 4 ++++ fs/f2fs/node.h | 7 +++++++ fs/f2fs/segment.c | 5 +++++ 3 files changed, 16 insertions(+) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 16532b31dcd6..b448c8fec7fc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -52,6 +52,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); + if (excess_cached_nats(sbi)) + res = false; + if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD) + res = false; } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->wb.dirty_exceeded) return false; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 2c2a797e18a8..673ce926cf09 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -27,6 +27,8 @@ /* control dirty nats ratio threshold (default: 10% over max nid count) */ #define DEF_DIRTY_NAT_RATIO_THRESHOLD 10 +/* control total # of nats */ +#define DEF_NAT_CACHE_THRESHOLD 100000 /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 @@ -126,6 +128,11 @@ static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi) NM_I(sbi)->dirty_nats_ratio / 100; } +static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) +{ + return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD; +} + enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 34a9159cf5ac..9011bffd1dd0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -345,6 +345,11 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (!need) return; + + /* balance_fs_bg is able to be pending */ + if (excess_cached_nats(sbi)) + f2fs_balance_fs_bg(sbi); + /* * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. -- cgit v1.2.3 From ad4edb83143fdeef9e6fdd9daaa735b59476565b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Jun 2016 16:41:49 -0700 Subject: f2fs: produce more nids and reduce readahead nats The readahead nat pages are more likely to be reclaimed quickly, so it'd better to gather more free nids in advance. And, let's keep some free nids as much as possible. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 9 ++++++--- fs/f2fs/node.h | 5 +++-- fs/f2fs/segment.c | 4 +++- fs/f2fs/shrinker.c | 5 +++-- 6 files changed, 18 insertions(+), 8 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8534b98c0712..2b43d4013e92 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -941,6 +941,8 @@ out: static void unblock_operations(struct f2fs_sb_info *sbi) { up_write(&sbi->node_write); + + build_free_nids(sbi); f2fs_unlock_all(sbi); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b3aeb58a6285..32884a7bdcc4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1965,6 +1965,7 @@ void move_node_page(struct page *, int); int fsync_node_pages(struct f2fs_sb_info *, struct inode *, struct writeback_control *, bool); int sync_node_pages(struct f2fs_sb_info *, struct writeback_control *); +void build_free_nids(struct f2fs_sb_info *); bool alloc_nid(struct f2fs_sb_info *, nid_t *); void alloc_nid_done(struct f2fs_sb_info *, nid_t); void alloc_nid_failed(struct f2fs_sb_info *, nid_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b448c8fec7fc..729fb1eb86ce 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1765,7 +1765,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, } } -static void build_free_nids(struct f2fs_sb_info *sbi) +void build_free_nids(struct f2fs_sb_info *sbi) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -1774,7 +1774,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) nid_t nid = nm_i->next_scan_nid; /* Enough entries */ - if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) return; /* readahead nat pages to be scanned */ @@ -1912,12 +1912,15 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) struct free_nid *i, *next; int nr = nr_shrink; + if (nm_i->fcnt <= MAX_FREE_NIDS) + return 0; + if (!mutex_trylock(&nm_i->build_lock)) return 0; spin_lock(&nm_i->free_nid_list_lock); list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK) + if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) break; if (i->state == NID_ALLOC) continue; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 673ce926cf09..fc7684554b1a 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -15,9 +15,10 @@ #define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ -#define FREE_NID_PAGES 4 +#define FREE_NID_PAGES 8 +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) -#define DEF_RA_NID_PAGES 4 /* # of nid pages to be readaheaded */ +#define DEF_RA_NID_PAGES 0 /* # of nid pages to be readaheaded */ /* maximum readahead size for node during getting data blocks */ #define MAX_RA_NODE 128 diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 782975e791f1..6d16ecf9d29e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -371,7 +371,9 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK); if (!available_free_memory(sbi, FREE_NIDS)) - try_to_free_nids(sbi, NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES); + try_to_free_nids(sbi, MAX_FREE_NIDS); + else + build_free_nids(sbi); /* checkpoint is the only way to shrink partial cached entries */ if (!available_free_memory(sbi, NAT_ENTRIES) || diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c index 93606f281bf9..46c915425923 100644 --- a/fs/f2fs/shrinker.c +++ b/fs/f2fs/shrinker.c @@ -13,6 +13,7 @@ #include #include "f2fs.h" +#include "node.h" static LIST_HEAD(f2fs_list); static DEFINE_SPINLOCK(f2fs_list_lock); @@ -25,8 +26,8 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi) static unsigned long __count_free_nids(struct f2fs_sb_info *sbi) { - if (NM_I(sbi)->fcnt > NAT_ENTRY_PER_BLOCK) - return NM_I(sbi)->fcnt - NAT_ENTRY_PER_BLOCK; + if (NM_I(sbi)->fcnt > MAX_FREE_NIDS) + return NM_I(sbi)->fcnt - MAX_FREE_NIDS; return 0; } -- cgit v1.2.3 From 1563ac75e7e45adcdc1271e6bb55fe27a23d4e4e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 3 Jul 2016 22:05:12 +0800 Subject: f2fs: fix to detect truncation prior rather than EIO during read In procedure of synchonized read, after sending out the read request, reader will try to lock the page for waiting device to finish the read jobs and unlock the page, but meanwhile, truncater will race with reader, so after reader get lock of the page, it should check page's mapping to detect whether someone has truncated the page in advance, then reader has the chance to do the retry if truncation was done, otherwise read can be failed due to previous condition check. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 ++++++++-------- fs/f2fs/gc.c | 4 ++-- fs/f2fs/node.c | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 53fae38009d3..3d93cf184114 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -500,14 +500,14 @@ repeat: /* wait for read completion */ lock_page(page); - if (unlikely(!PageUptodate(page))) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } + if (unlikely(!PageUptodate(page))) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } return page; } @@ -1647,14 +1647,14 @@ repeat: __submit_bio(sbi, READ_SYNC, bio, DATA); lock_page(page); - if (unlikely(!PageUptodate(page))) { - err = -EIO; - goto fail; - } if (unlikely(page->mapping != mapping)) { f2fs_put_page(page, 1); goto repeat; } + if (unlikely(!PageUptodate(page))) { + err = -EIO; + goto fail; + } } out_update: SetPageUptodate(page); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e1d274cdecb8..c9602d0dc57a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -593,11 +593,11 @@ static void move_encrypted_block(struct inode *inode, block_t bidx) /* write page */ lock_page(fio.encrypted_page); - if (unlikely(!PageUptodate(fio.encrypted_page))) { + if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { err = -EIO; goto put_page_out; } - if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) { + if (unlikely(!PageUptodate(fio.encrypted_page))) { err = -EIO; goto put_page_out; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 729fb1eb86ce..69171ce9b4b1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1146,13 +1146,13 @@ repeat: lock_page(page); - if (unlikely(!PageUptodate(page))) - goto out_err; - if (unlikely(page->mapping != NODE_MAPPING(sbi))) { f2fs_put_page(page, 1); goto repeat; } + + if (unlikely(!PageUptodate(page))) + goto out_err; page_hit: if(unlikely(nid != nid_of_node(page))) { f2fs_bug_on(sbi, 1); -- cgit v1.2.3 From fe76b796fc5194cc3d57265002e3a748566d073f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 18:40:10 -0700 Subject: f2fs: introduce f2fs_set_page_dirty_nobuffer This patch adds f2fs_set_page_dirty_nobuffer() copied from __set_page_dirty_buffer. When appending 4KB blocks in f2fs on pmem with multiple cores, this improves the overall performance. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 33 ++++++++++++++++++++++++++++++++- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 2 +- 4 files changed, 35 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2b43d4013e92..2755ef730a41 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -366,7 +366,7 @@ static int f2fs_set_meta_page_dirty(struct page *page) SetPageUptodate(page); if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); SetPagePrivate(page); f2fs_trace_pid(page); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3d93cf184114..4a2e97dad9e9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include "f2fs.h" @@ -1775,6 +1777,35 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 1; } +/* + * This was copied from __set_page_dirty_buffers which gives higher performance + * in very high speed storages. (e.g., pmem) + */ +void f2fs_set_page_dirty_nobuffers(struct page *page) +{ + struct address_space *mapping = page->mapping; + unsigned long flags; + + if (unlikely(!mapping)) + return; + + spin_lock(&mapping->private_lock); + lock_page_memcg(page); + SetPageDirty(page); + spin_unlock(&mapping->private_lock); + + spin_lock_irqsave(&mapping->tree_lock, flags); + WARN_ON_ONCE(!PageUptodate(page)); + account_page_dirtied(page, mapping); + radix_tree_tag_set(&mapping->page_tree, + page_index(page), PAGECACHE_TAG_DIRTY); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + unlock_page_memcg(page); + + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return; +} + static int f2fs_set_data_page_dirty(struct page *page) { struct address_space *mapping = page->mapping; @@ -1797,7 +1828,7 @@ static int f2fs_set_data_page_dirty(struct page *page) } if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); update_dirty_page(inode, page); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 32884a7bdcc4..096f16d343a8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2077,6 +2077,7 @@ struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct f2fs_io_info *); int f2fs_map_blocks(struct inode *, struct f2fs_map_blocks *, int, int); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); +void f2fs_set_page_dirty_nobuffers(struct page *); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); int f2fs_release_page(struct page *, gfp_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 69171ce9b4b1..db73f3c823dc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1646,7 +1646,7 @@ static int f2fs_set_node_page_dirty(struct page *page) SetPageUptodate(page); if (!PageDirty(page)) { - __set_page_dirty_nobuffers(page); + f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); SetPagePrivate(page); f2fs_trace_pid(page); -- cgit v1.2.3 From 237c0790e54020d522b8fd23097e8dcafb4e331d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 18:49:15 -0700 Subject: f2fs: call SetPageUptodate if needed SetPageUptodate() issues memory barrier, resulting in performance degrdation. Let's avoid that. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 ++++-- fs/f2fs/data.c | 18 ++++++++++++------ fs/f2fs/file.c | 3 ++- fs/f2fs/inline.c | 9 ++++++--- fs/f2fs/node.c | 9 ++++++--- 5 files changed, 30 insertions(+), 15 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2755ef730a41..8ea895389ae4 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -48,7 +48,8 @@ repeat: goto repeat; } f2fs_wait_on_page_writeback(page, META, true); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); return page; } @@ -364,7 +365,8 @@ static int f2fs_set_meta_page_dirty(struct page *page) { trace_f2fs_set_page_dirty(page, META); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (!PageDirty(page)) { f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4a2e97dad9e9..b6fd5bd05e41 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -47,7 +47,8 @@ static void f2fs_read_end_io(struct bio *bio) struct page *page = bvec->bv_page; if (!bio->bi_error) { - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { ClearPageUptodate(page); SetPageError(page); @@ -443,7 +444,8 @@ got_it: */ if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); return page; } @@ -554,7 +556,8 @@ struct page *get_new_data_page(struct inode *inode, if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } else { f2fs_put_page(page, 1); @@ -1065,7 +1068,8 @@ got_it: } } else { zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); unlock_page(page); goto next_page; } @@ -1659,7 +1663,8 @@ repeat: } } out_update: - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); out_clear: clear_cold_data(page); return 0; @@ -1813,7 +1818,8 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (f2fs_is_atomic_file(inode)) { if (!IS_ATOMIC_WRITTEN_PAGE(page)) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d07e7759f970..2b777a42bf43 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -81,7 +81,8 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, zero_user_segment(page, offset, PAGE_SIZE); } set_page_dirty(page); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); trace_f2fs_vm_page_mkwrite(page, DATA); mapped: diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e10e958250ff..2cd0edcc4ebc 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -59,7 +59,8 @@ void read_inline_data(struct page *page, struct page *ipage) memcpy(dst_addr, src_addr, MAX_INLINE_DATA); flush_dcache_page(page); kunmap_atomic(dst_addr); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); } bool truncate_inline_inode(struct page *ipage, u64 from) @@ -97,7 +98,8 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) else read_inline_data(page, ipage); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); f2fs_put_page(ipage, 1); unlock_page(page); return 0; @@ -370,7 +372,8 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, NR_INLINE_DENTRY * F2FS_SLOT_LEN); kunmap_atomic(dentry_blk); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); set_page_dirty(page); /* clear inline dir and flag after data writeback */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index db73f3c823dc..ca1bb3cc6c32 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1045,7 +1045,8 @@ struct page *new_node_page(struct dnode_of_data *dn, f2fs_wait_on_page_writeback(page, NODE, true); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (set_page_dirty(page)) dn->node_changed = true; @@ -1644,7 +1645,8 @@ static int f2fs_set_node_page_dirty(struct page *page) { trace_f2fs_set_page_dirty(page, NODE); - SetPageUptodate(page); + if (!PageUptodate(page)) + SetPageUptodate(page); if (!PageDirty(page)) { f2fs_set_page_dirty_nobuffers(page); inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES); @@ -2015,7 +2017,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); - SetPageUptodate(ipage); + if (!PageUptodate(ipage)) + SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); src = F2FS_INODE(page); -- cgit v1.2.3 From 3bdad3c7ee72a76ec87be3477eb958ed7ca304fc Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 19:04:16 -0700 Subject: f2fs: skip to check the block address of node page If the node page is up-to-date, it should be alive. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ca1bb3cc6c32..9d994b97e61f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1080,6 +1080,9 @@ static int read_node_page(struct page *page, int rw) .encrypted_page = NULL, }; + if (PageUptodate(page)) + return LOCKED_PAGE; + get_node_info(sbi, page->index, &ni); if (unlikely(ni.blk_addr == NULL_ADDR)) { @@ -1087,9 +1090,6 @@ static int read_node_page(struct page *page, int rw) return -ENOENT; } - if (PageUptodate(page)) - return LOCKED_PAGE; - fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr; return f2fs_submit_page_bio(&fio); } -- cgit v1.2.3 From ec795418c41850056feb956534edf059dc1155d4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Jun 2016 19:59:11 -0700 Subject: f2fs: use percpu_rw_semaphore This patch replaces rw_semaphore with percpu_rw_semaphore for: sbi->cp_rwsem nm_i->nat_tree_lock Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 12 ++++++------ fs/f2fs/node.c | 47 ++++++++++++++++++++++++----------------------- fs/f2fs/super.c | 6 +++++- 3 files changed, 35 insertions(+), 30 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d5892f56c44d..88fa13909b9c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -527,7 +527,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -775,7 +775,7 @@ struct f2fs_sb_info { struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ + struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ @@ -1062,22 +1062,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + percpu_down_read(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + percpu_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + percpu_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + percpu_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9d994b97e61f..b841c439adb7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return need; } @@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); @@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -342,8 +342,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) - return 0; + percpu_down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; @@ -352,7 +351,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -374,13 +373,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); return; } @@ -404,11 +403,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); /* cache nat entry */ - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); } /* @@ -1783,7 +1782,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + percpu_down_read(&nm_i->nat_tree_lock); while (1) { struct page *page = get_current_nat_page(sbi, nid); @@ -1815,7 +1814,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) remove_free_nid(nm_i, nid); } up_read(&curseg->journal_rwsem); - up_read(&nm_i->nat_tree_lock); + percpu_up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2204,7 +2203,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!nm_i->dirty_nat_cnt) return; - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -2227,7 +2226,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set); - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } @@ -2263,7 +2262,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + if (percpu_init_rwsem(&nm_i->nat_tree_lock)) + return -ENOMEM; nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2320,7 +2320,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + percpu_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2345,8 +2345,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + percpu_up_write(&nm_i->nat_tree_lock); + percpu_free_rwsem(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e0a975dc6fc6..2bac9171d2c3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -695,6 +695,8 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi) percpu_counter_destroy(&sbi->nr_pages[i]); percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); + + percpu_free_rwsem(&sbi->cp_rwsem); } static void f2fs_put_super(struct super_block *sb) @@ -1471,6 +1473,9 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) { int i, err; + if (percpu_init_rwsem(&sbi->cp_rwsem)) + return -ENOMEM; + for (i = 0; i < NR_COUNT_TYPE; i++) { err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); if (err) @@ -1671,7 +1676,6 @@ try_onemore: sbi->write_io[i].bio = NULL; } - init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); -- cgit v1.2.3 From 0a2aa8fbb9693020b822ac7a23755591554eaea5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 8 Jul 2016 17:42:21 -0700 Subject: f2fs: refactor __exchange_data_block for speed up This reduces the elapsed time to do xfstests/generic/017. Before: 715 s After: 458 s Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 236 +++++++++++++++++++++++++++++++++++++++++---------------- fs/f2fs/node.c | 1 + 2 files changed, 171 insertions(+), 66 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1ec0197a3c9b..b1dc972407e5 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -850,85 +850,189 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) return ret; } -static int __exchange_data_block(struct inode *inode, pgoff_t src, - pgoff_t dst, bool full) +static int __read_out_blkaddrs(struct inode *inode, block_t *blkaddr, + int *do_replace, pgoff_t off, pgoff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - block_t new_addr; - bool do_replace = false; - int ret; + int ret, done, i; +next_dnode: set_new_dnode(&dn, inode, NULL, NULL, 0); - ret = get_dnode_of_data(&dn, src, LOOKUP_NODE_RA); + ret = get_dnode_of_data(&dn, off, LOOKUP_NODE_RA); if (ret && ret != -ENOENT) { return ret; } else if (ret == -ENOENT) { - new_addr = NULL_ADDR; - } else { - new_addr = dn.data_blkaddr; - if (!is_checkpointed_data(sbi, new_addr)) { + if (dn.max_level == 0) + return -ENOENT; + done = min((pgoff_t)ADDRS_PER_BLOCK - dn.ofs_in_node, len); + blkaddr += done; + do_replace += done; + goto next; + } + + done = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, inode) - + dn.ofs_in_node, len); + for (i = 0; i < done; i++, blkaddr++, do_replace++, dn.ofs_in_node++) { + *blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + if (!is_checkpointed_data(sbi, *blkaddr)) { + + if (test_opt(sbi, LFS)) { + f2fs_put_dnode(&dn); + return -ENOTSUPP; + } + /* do not invalidate this block address */ f2fs_update_data_blkaddr(&dn, NULL_ADDR); - do_replace = true; + *do_replace = 1; } - f2fs_put_dnode(&dn); } + f2fs_put_dnode(&dn); +next: + len -= done; + off += done; + if (len) + goto next_dnode; + return 0; +} - if (new_addr == NULL_ADDR) - return full ? truncate_hole(inode, dst, dst + 1) : 0; +static int __roll_back_blkaddrs(struct inode *inode, block_t *blkaddr, + int *do_replace, pgoff_t off, int len) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + int ret, i; - if (do_replace) { - struct page *ipage; - struct node_info ni; + for (i = 0; i < len; i++, do_replace++, blkaddr++) { + if (*do_replace == 0) + continue; - if (test_opt(sbi, LFS)) { - ret = -ENOTSUPP; - goto err_out; + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, off + i, LOOKUP_NODE_RA); + if (ret) { + dec_valid_block_count(sbi, inode, 1); + invalidate_blocks(sbi, *blkaddr); + } else { + f2fs_update_data_blkaddr(&dn, *blkaddr); } + f2fs_put_dnode(&dn); + } + return 0; +} - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - ret = PTR_ERR(ipage); - goto err_out; +static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, + block_t *blkaddr, int *do_replace, + pgoff_t src, pgoff_t dst, pgoff_t len, bool full) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(src_inode); + pgoff_t i = 0; + int ret; + + while (i < len) { + if (blkaddr[i] == NULL_ADDR && !full) { + i++; + continue; } - set_new_dnode(&dn, inode, ipage, NULL, 0); - ret = f2fs_reserve_block(&dn, dst); - if (ret) - goto err_out; + if (do_replace[i] || blkaddr[i] == NULL_ADDR) { + struct dnode_of_data dn; + struct node_info ni; + size_t new_size; + pgoff_t ilen; - truncate_data_blocks_range(&dn, 1); + set_new_dnode(&dn, dst_inode, NULL, NULL, 0); + ret = get_dnode_of_data(&dn, dst + i, ALLOC_NODE); + if (ret) + return ret; - get_node_info(sbi, dn.nid, &ni); - f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr, - ni.version, true, false); - f2fs_put_dnode(&dn); - } else { - struct page *psrc, *pdst; + get_node_info(sbi, dn.nid, &ni); + ilen = min((pgoff_t) + ADDRS_PER_PAGE(dn.node_page, dst_inode) - + dn.ofs_in_node, len - i); + do { + dn.data_blkaddr = datablock_addr(dn.node_page, + dn.ofs_in_node); + truncate_data_blocks_range(&dn, 1); + + if (do_replace[i]) { + f2fs_i_blocks_write(src_inode, + 1, false); + f2fs_i_blocks_write(dst_inode, + 1, true); + f2fs_replace_block(sbi, &dn, dn.data_blkaddr, + blkaddr[i], ni.version, true, false); + + do_replace[i] = 0; + } + dn.ofs_in_node++; + i++; + new_size = (dst + i) << PAGE_SHIFT; + if (dst_inode->i_size < new_size) + f2fs_i_size_write(dst_inode, new_size); + } while ((do_replace[i] || blkaddr[i] == NULL_ADDR) && --ilen); - psrc = get_lock_data_page(inode, src, true); - if (IS_ERR(psrc)) - return PTR_ERR(psrc); - pdst = get_new_data_page(inode, NULL, dst, true); - if (IS_ERR(pdst)) { + f2fs_put_dnode(&dn); + } else { + struct page *psrc, *pdst; + + psrc = get_lock_data_page(src_inode, src + i, true); + if (IS_ERR(psrc)) + return PTR_ERR(psrc); + pdst = get_new_data_page(dst_inode, NULL, dst + i, + true); + if (IS_ERR(pdst)) { + f2fs_put_page(psrc, 1); + return PTR_ERR(pdst); + } + f2fs_copy_page(psrc, pdst); + set_page_dirty(pdst); + f2fs_put_page(pdst, 1); f2fs_put_page(psrc, 1); - return PTR_ERR(pdst); - } - f2fs_copy_page(psrc, pdst); - set_page_dirty(pdst); - f2fs_put_page(pdst, 1); - f2fs_put_page(psrc, 1); - return truncate_hole(inode, src, src + 1); + ret = truncate_hole(src_inode, src + i, src + i + 1); + if (ret) + return ret; + i++; + } } return 0; +} -err_out: - if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) { - f2fs_update_data_blkaddr(&dn, new_addr); - f2fs_put_dnode(&dn); +static int __exchange_data_block(struct inode *src_inode, + struct inode *dst_inode, pgoff_t src, pgoff_t dst, + int len, bool full) +{ + block_t *src_blkaddr; + int *do_replace; + int ret; + + src_blkaddr = f2fs_kvzalloc(sizeof(block_t) * len, GFP_KERNEL); + if (!src_blkaddr) + return -ENOMEM; + + do_replace = f2fs_kvzalloc(sizeof(int) * len, GFP_KERNEL); + if (!do_replace) { + kvfree(src_blkaddr); + return -ENOMEM; } + + ret = __read_out_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + if (ret) + goto roll_back; + + ret = __clone_blkaddrs(src_inode, dst_inode, src_blkaddr, + do_replace, src, dst, len, full); + if (ret) + goto roll_back; + + kvfree(src_blkaddr); + kvfree(do_replace); + return 0; + +roll_back: + __roll_back_blkaddrs(src_inode, src_blkaddr, do_replace, src, len); + kvfree(src_blkaddr); + kvfree(do_replace); return ret; } @@ -936,16 +1040,12 @@ static int f2fs_do_collapse(struct inode *inode, pgoff_t start, pgoff_t end) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); pgoff_t nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; - int ret = 0; + int ret; - for (; end < nrpages; start++, end++) { - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); - ret = __exchange_data_block(inode, end, start, true); - f2fs_unlock_op(sbi); - if (ret) - break; - } + f2fs_balance_fs(sbi, true); + f2fs_lock_op(sbi); + ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); + f2fs_unlock_op(sbi); return ret; } @@ -1134,7 +1234,7 @@ out: static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - pgoff_t pg_start, pg_end, delta, nrpages, idx; + pgoff_t nr, pg_start, pg_end, delta, idx; loff_t new_size; int ret = 0; @@ -1169,14 +1269,18 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) pg_start = offset >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT; delta = pg_end - pg_start; - nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + idx = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE; + + while (!ret && idx > pg_start) { + nr = idx - pg_start; + if (nr > delta) + nr = delta; + idx -= nr; - for (idx = nrpages - 1; idx >= pg_start && idx != -1; idx--) { f2fs_lock_op(sbi); - ret = __exchange_data_block(inode, idx, idx + delta, false); + ret = __exchange_data_block(inode, inode, idx, + idx + delta, nr, false); f2fs_unlock_op(sbi); - if (ret) - break; } /* write out all moved pages, if possible */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b841c439adb7..d78f61d647c0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -649,6 +649,7 @@ release_out: if (err == -ENOENT) { dn->cur_level = i; dn->max_level = level; + dn->ofs_in_node = offset[level]; } return err; } -- cgit v1.2.3 From 9dfa1baff76d08843aaf5e3c78f6da6950957702 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 13 Jul 2016 19:33:19 -0700 Subject: f2fs: use blk_plug in all the possible paths This patch reverts 19a5f5e2ef37 (f2fs: drop any block plugging), and adds blk_plug in write paths additionally. The main reason is that blk_start_plug can be used to wake up from low-power mode before submitting further bios. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 +++++++ fs/f2fs/data.c | 3 +++ fs/f2fs/file.c | 6 +++++- fs/f2fs/gc.c | 5 +++++ fs/f2fs/node.c | 3 +++ fs/f2fs/segment.c | 7 ++++++- 6 files changed, 29 insertions(+), 2 deletions(-) (limited to 'fs/f2fs/node.c') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 8ea895389ae4..be1c54b62388 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -265,6 +265,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct blk_plug plug; long diff, written; /* collect a number of dirty meta pages and write together */ @@ -277,7 +278,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping, /* if mounting is failed, skip writing node pages */ mutex_lock(&sbi->cp_mutex); diff = nr_pages_to_write(sbi, META, wbc); + blk_start_plug(&plug); written = sync_meta_pages(sbi, META, wbc->nr_to_write); + blk_finish_plug(&plug); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -899,8 +902,11 @@ static int block_operations(struct f2fs_sb_info *sbi) .nr_to_write = LONG_MAX, .for_reclaim = 0, }; + struct blk_plug plug; int err = 0; + blk_start_plug(&plug); + retry_flush_dents: f2fs_lock_all(sbi); /* write all the dirty dentry pages */ @@ -937,6 +943,7 @@ retry_flush_nodes: goto retry_flush_nodes; } out: + blk_finish_plug(&plug); return err; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index adfe47b21991..87a458fb8afe 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1438,6 +1438,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct blk_plug plug; int ret; /* deal with chardevs and other special file */ @@ -1463,7 +1464,9 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc); + blk_finish_plug(&plug); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 72e52cd15b60..47f1f5e36d1c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2102,6 +2102,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); + struct blk_plug plug; ssize_t ret; if (f2fs_encrypted_inode(inode) && @@ -2113,8 +2114,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = generic_write_checks(iocb, from); if (ret > 0) { ret = f2fs_preallocate_blocks(iocb, from); - if (!ret) + if (!ret) { + blk_start_plug(&plug); ret = __generic_file_write_iter(iocb, from); + blk_finish_plug(&plug); + } } inode_unlock(inode); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 5c8acf754513..de6c41c32c62 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -808,6 +808,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, { struct page *sum_page; struct f2fs_summary_block *sum; + struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; int seg_freed = 0; @@ -825,6 +826,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, unlock_page(sum_page); } + blk_start_plug(&plug); + for (segno = start_segno; segno < end_segno; segno++) { if (get_valid_blocks(sbi, segno, 1) == 0) @@ -862,6 +865,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, f2fs_submit_merged_bio(sbi, (type == SUM_TYPE_NODE) ? NODE : DATA, WRITE); + blk_finish_plug(&plug); + if (gc_type == FG_GC) { while (start_segno < end_segno) if (get_valid_blocks(sbi, start_segno++, 1) == 0) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d78f61d647c0..79a93c6e632f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1618,6 +1618,7 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); + struct blk_plug plug; long diff; /* balancing f2fs's metadata in background */ @@ -1631,7 +1632,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, NODE, wbc); wbc->sync_mode = WB_SYNC_NONE; + blk_start_plug(&plug); sync_node_pages(sbi, wbc); + blk_finish_plug(&plug); wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); return 0; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e87aa058f57a..d45e6bbf8493 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -381,8 +381,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) excess_prefree_segs(sbi) || excess_dirty_nats(sbi) || (is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) { - if (test_opt(sbi, DATA_FLUSH)) + if (test_opt(sbi, DATA_FLUSH)) { + struct blk_plug plug; + + blk_start_plug(&plug); sync_dirty_inodes(sbi, FILE_INODE); + blk_finish_plug(&plug); + } f2fs_sync_fs(sbi->sb, true); stat_inc_bg_cp_count(sbi->stat_info); } -- cgit v1.2.3