From a4ed23f2f1a875f4b50d5f7e3dd5b0abbedda1ff Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 11 Apr 2014 17:49:35 +0800 Subject: f2fs: put the bio when issue_flush completed Put the bio when the flush cmd issued, it also can fix the following kmemleak: unreferenced object 0xffff8800270c73c0 (size 200): comm "f2fs_flush-7:0", pid 27161, jiffies 4312127988 (age 988.503s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 40 07 81 19 01 88 ff ff ........@....... 01 00 00 00 00 00 00 f0 11 14 00 00 00 00 00 00 ................ backtrace: [] kmemleak_alloc+0x72/0x96 [] slab_post_alloc_hook+0x28/0x2a [] kmem_cache_alloc+0xec/0x157 [] mempool_alloc_slab+0x15/0x17 [] mempool_alloc+0x71/0x138 [] bio_alloc_bioset+0x93/0x18c [] issue_flush_thread+0x8d/0x145 [f2fs] [] kthread+0xba/0xc2 [] ret_from_fork+0x7c/0xb0 [] 0xffffffffffffffff Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 085f548be7a3..524b7ed36e0a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -226,6 +226,7 @@ repeat: next = cmd->next; complete(&cmd->wait); } + bio_put(bio); sm_i->dispatch_list = NULL; } -- cgit v1.2.3 From 197d46476c3ba5f8595209025d20953a0c851748 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 11 Apr 2014 17:49:50 +0800 Subject: f2fs: use __GFP_ZERO to avoid appending set-NULL Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 524b7ed36e0a..ba067b1b8107 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -243,9 +243,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (!test_opt(sbi, FLUSH_MERGE)) return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); - cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC); - cmd->next = NULL; - cmd->ret = 0; + cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC | __GFP_ZERO); init_completion(&cmd->wait); spin_lock(&sm_i->issue_lock); -- cgit v1.2.3 From b270ad6f0aedd27bdc689fc15f26bc650a59b12b Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 11 Apr 2014 17:49:55 +0800 Subject: f2fs: enable flush_merge only in f2fs is not read-only Enable flush_merge only in f2fs is not read-only, so does the mount option show. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 +-- fs/f2fs/super.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ba067b1b8107..1e264e761f71 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1859,10 +1859,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->nr_discards = 0; sm_info->max_discards = 0; - if (test_opt(sbi, FLUSH_MERGE)) { + if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { spin_lock_init(&sm_info->issue_lock); init_waitqueue_head(&sm_info->flush_wait_queue); - sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(sm_info->f2fs_issue_flush)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c756923a7302..5e20d2a36eac 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -514,7 +514,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) seq_printf(seq, ",background_gc=%s", "on"); else seq_printf(seq, ",background_gc=%s", "off"); @@ -542,7 +542,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); - if (test_opt(sbi, FLUSH_MERGE)) + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); -- cgit v1.2.3 From 1e87a78d95ecea7a989349860feb42db3e4b7db5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Apr 2014 13:57:55 +0900 Subject: f2fs: avoid to conduct roll-forward due to the remained garbage blocks The f2fs always scans the next chain of direct node blocks. But some garbage blocks are able to be remained due to no discard support or SSR triggers. This occasionally wreaks recovering wrong inodes that were used or BUG_ONs due to reallocating node ids as follows. When mount this f2fs image: http://linuxtesting.org/downloads/f2fs_fault_image.zip BUG_ON is triggered in f2fs driver (messages below are generated on kernel 3.13.2; for other kernels output is similar): kernel BUG at fs/f2fs/node.c:215! Call Trace: [] recover_inode_page+0x1fd/0x3e0 [f2fs] [] ? __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x50/0x50 [] recover_fsync_data+0x1398/0x15d0 [f2fs] [] ? selinux_d_instantiate+0x1c/0x20 [] ? d_instantiate+0x5b/0x80 [] f2fs_fill_super+0xb04/0xbf0 [f2fs] [] ? mount_bdev+0x7e/0x210 [] mount_bdev+0x1c9/0x210 [] ? validate_superblock+0x210/0x210 [f2fs] [] f2fs_mount+0x1d/0x30 [f2fs] [] mount_fs+0x47/0x1c0 [] ? __alloc_percpu+0x10/0x20 [] vfs_kern_mount+0x72/0x110 [] do_mount+0x493/0x910 [] ? strndup_user+0x5b/0x80 [] SyS_mount+0x90/0xe0 [] system_call_fastpath+0x16/0x1b Found by Linux File System Verification project (linuxtesting.org). Reported-by: Andrey Tsyvarev Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 6 ++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 17 +++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4aa521aa9bc3..890e23d208a8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -762,6 +762,12 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) void *kaddr; int i; + /* + * This avoids to conduct wrong roll-forward operations and uses + * metapages, so should be called prior to sync_meta_pages below. + */ + discard_next_dnode(sbi); + /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) sync_meta_pages(sbi, META, LONG_MAX); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2ecac8312359..2c5a5dadae65 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1179,6 +1179,7 @@ int f2fs_issue_flush(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); +void discard_next_dnode(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1e264e761f71..9993f94848fc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -335,13 +335,26 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static void f2fs_issue_discard(struct f2fs_sb_info *sbi, +static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); - blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); +} + +void discard_next_dnode(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); + block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + + if (f2fs_issue_discard(sbi, blkaddr, 1)) { + struct page *page = grab_meta_page(sbi, blkaddr); + /* zero-filled page */ + set_page_dirty(page); + f2fs_put_page(page, 1); + } } static void add_discard_addrs(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 76f60268e70a700c04c85e1b0d520c94062a40a2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Apr 2014 16:04:15 +0900 Subject: f2fs: call redirty_page_for_writepage This patch replace some general codes with redirty_page_for_writepage, which can be enabled after consideration on additional procedure like counting dirty pages appropriately. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 5 +---- fs/f2fs/data.c | 10 +++------- fs/f2fs/node.c | 5 +---- 3 files changed, 5 insertions(+), 15 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 890e23d208a8..2902f7d50770 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -174,10 +174,7 @@ no_write: return 0; redirty_out: - dec_page_count(sbi, F2FS_DIRTY_META); - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 45abd60e2bff..b5cd6d1c9320 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -798,10 +798,8 @@ static int f2fs_write_data_page(struct page *page, * this page does not have to be written to disk. */ offset = i_size & (PAGE_CACHE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) { - inode_dec_dirty_dents(inode); + if ((page->index >= end_index + 1) || !offset) goto out; - } zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: @@ -810,7 +808,6 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { - inode_dec_dirty_dents(inode); err = do_write_data_page(page, &fio); goto done; } @@ -832,15 +829,14 @@ done: clear_cold_data(page); out: + inode_dec_dirty_dents(inode); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); return 0; redirty_out: - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index a161e955c4c8..f760793c1a64 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1227,10 +1227,7 @@ static int f2fs_write_node_page(struct page *page, return 0; redirty_out: - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } -- cgit v1.2.3 From 15c6e3aae68d6167cba063387a59968f811c8268 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 16 Apr 2014 14:22:50 +0900 Subject: f2fs: fix to unlock f2fs_lock at the omitted error case If it occurs an error, we should call f2fs_unlock_op. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inline.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 383db1fabcf4..3258c7cf00d5 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -81,8 +81,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) f2fs_lock_op(sbi); ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } /* * i_addr[0] is not used for inline data, @@ -90,10 +92,8 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) */ set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, 0); - if (err) { - f2fs_unlock_op(sbi); - return err; - } + if (err) + goto out; zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); @@ -118,6 +118,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) sync_inode_page(&dn); f2fs_put_dnode(&dn); +out: f2fs_unlock_op(sbi); return err; } -- cgit v1.2.3 From ed57c27f736f6d8a51e442610c800ee0c3d83977 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 15 Apr 2014 11:19:28 +0900 Subject: f2fs: remove costly dirty_dir_inode operations This patch removes list opeations in handling dirty dir inodes. Previously, F2FS traverses whole the list of dirty dir inodes to check whether there is an existing inode or not, resulting in heavy CPU overheads. So this patch removes such the traverse operations by adding FI_DIRTY_DIR to indicate the inode lies on the list or not. Through this simple flag, we can remove redundant operations gracefully. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 52 +++++++++++++--------------------------------------- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/recovery.c | 18 ++++++++++-------- 3 files changed, 25 insertions(+), 48 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2902f7d50770..744c68be2e15 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -552,14 +552,13 @@ fail_no_cp: static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head = &sbi->dir_inode_list; - struct dir_inode_entry *entry; - list_for_each_entry(entry, head, list) - if (unlikely(entry->inode == inode)) - return -EEXIST; + if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) + return -EEXIST; - list_add_tail(&new->list, head); + set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + F2FS_I(inode)->dirty_dir = new; + list_add_tail(&new->list, &sbi->dir_inode_list); stat_inc_dirty_dir(sbi); return 0; } @@ -608,31 +607,26 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head; struct dir_inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; spin_lock(&sbi->dir_inode_lock); - if (get_dirty_dents(inode)) { + if (get_dirty_dents(inode) || + !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { spin_unlock(&sbi->dir_inode_lock); return; } - head = &sbi->dir_inode_list; - list_for_each_entry(entry, head, list) { - if (entry->inode == inode) { - list_del(&entry->list); - stat_dec_dirty_dir(sbi); - spin_unlock(&sbi->dir_inode_lock); - kmem_cache_free(inode_entry_slab, entry); - goto done; - } - } + entry = F2FS_I(inode)->dirty_dir; + list_del(&entry->list); + F2FS_I(inode)->dirty_dir = NULL; + clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + stat_dec_dirty_dir(sbi); spin_unlock(&sbi->dir_inode_lock); + kmem_cache_free(inode_entry_slab, entry); -done: /* Only from the recovery routine */ if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); @@ -640,26 +634,6 @@ done: } } -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ - - struct list_head *head; - struct inode *inode = NULL; - struct dir_inode_entry *entry; - - spin_lock(&sbi->dir_inode_lock); - - head = &sbi->dir_inode_list; - list_for_each_entry(entry, head, list) { - if (entry->inode->i_ino == ino) { - inode = entry->inode; - break; - } - } - spin_unlock(&sbi->dir_inode_lock); - return inode; -} - void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2c5a5dadae65..d3180f8c0f97 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -218,6 +218,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ + struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ }; static inline void get_extent_info(struct extent_info *ext, @@ -958,6 +959,7 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) enum { FI_NEW_INODE, /* indicate newly allocated inode */ FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ @@ -1222,7 +1224,6 @@ int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool); void init_orphan_info(struct f2fs_sb_info *); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b1ae89f0f44e..9eb6487f383d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -46,15 +46,17 @@ static int recover_dentry(struct page *ipage, struct inode *inode) struct inode *dir, *einode; int err = 0; - dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); - if (!dir) { - dir = f2fs_iget(inode->i_sb, pino); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; - } - set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; + } + + if (is_inode_flag_set(F2FS_I(dir), FI_DIRTY_DIR)) { + iput(dir); + } else { add_dirty_dir_inode(dir); + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); } name.len = le32_to_cpu(raw_inode->i_namelen); -- cgit v1.2.3 From b156d542415d88c265a0a579448a93e66aa18e33 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Tue, 15 Apr 2014 17:51:05 +0900 Subject: f2fs: make recover_inline_xattr() static Make recover_inline_xattr() static, because this function is used only in this file. Signed-off-by: Jingoo Han Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f760793c1a64..38fe5f534e7d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1531,7 +1531,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, clear_node_page_dirty(page); } -void recover_inline_xattr(struct inode *inode, struct page *page) +static void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); void *src_addr, *dst_addr; -- cgit v1.2.3 From 8abfb36ab396377ea712cd640c525fd5535d1dc9 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Tue, 15 Apr 2014 14:19:38 +0800 Subject: f2fs: atomically set inode->i_flags in f2fs_set_inode_flags() Use set_mask_bits() to atomically set i_flags instead of clearing out the S_IMMUTABLE, S_APPEND, etc. flags and then setting them from the FS_IMMUTABLE_FL, FS_APPEND_FL, etc. flags, since this opens up a race where an immutable file has the immutable flag cleared for a brief window of time. Signed-off-by: Zhang Zhen Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ee829d360468..f7a655373c46 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -21,20 +22,20 @@ void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; - - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | - S_NOATIME | S_DIRSYNC); + unsigned int new_fl = 0; if (flags & FS_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & FS_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & FS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) -- cgit v1.2.3 From 876dc59eb1f0131c092803d0d206d47dd0119dfe Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 11 Apr 2014 17:50:00 +0800 Subject: f2fs: add the flush_merge handle in the remount flow Add the *remount* handle of flush_merge option, so that the users can enable flush_merge in the runtime, such as the underlying device handles the cache_flush command relatively slowly. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d3180f8c0f97..55152de1118c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1177,6 +1177,7 @@ void destroy_node_manager_caches(void); */ void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); +int issue_flush_thread(void *); int f2fs_issue_flush(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9993f94848fc..f6816e18db98 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -197,7 +197,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) f2fs_sync_fs(sbi->sb, true); } -static int issue_flush_thread(void *data) +int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; struct f2fs_sm_info *sm_i = SM_I(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 5e20d2a36eac..bea642aec0fe 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -594,6 +594,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; int err, active_logs; + bool need_restart_gc = false; + bool need_stop_gc = false; sync_filesystem(sb); @@ -611,7 +613,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* * Previous and new state of filesystem is RO, - * so no point in checking GC conditions. + * so skip checking GC and FLUSH_MERGE conditions. */ if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) goto skip; @@ -625,18 +627,57 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (sbi->gc_thread) { stop_gc_thread(sbi); f2fs_sync_fs(sb, 1); + need_restart_gc = true; } } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; + need_stop_gc = true; + } + + /* + * We stop issue flush thread if FS is mounted as RO + * or if flush_merge is not passed in mount option. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + struct f2fs_sm_info *sm_info = sbi->sm_info; + + if (sm_info->f2fs_issue_flush) + kthread_stop(sm_info->f2fs_issue_flush); + sm_info->issue_list = sm_info->dispatch_list = NULL; + sm_info->f2fs_issue_flush = NULL; + } else if (test_opt(sbi, FLUSH_MERGE)) { + struct f2fs_sm_info *sm_info = sbi->sm_info; + + if (!sm_info->f2fs_issue_flush) { + dev_t dev = sbi->sb->s_bdev->bd_dev; + + spin_lock_init(&sm_info->issue_lock); + init_waitqueue_head(&sm_info->flush_wait_queue); + sm_info->f2fs_issue_flush = + kthread_run(issue_flush_thread, sbi, + "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(sm_info->f2fs_issue_flush)) { + err = PTR_ERR(sm_info->f2fs_issue_flush); + sm_info->f2fs_issue_flush = NULL; + goto restore_gc; + } + } } skip: /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); return 0; - +restore_gc: + if (need_restart_gc) { + if (start_gc_thread(sbi)) + f2fs_msg(sbi->sb, KERN_WARNING, + "background gc thread is stop"); + } else if (need_stop_gc) { + stop_gc_thread(sbi); + } restore_opts: sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; -- cgit v1.2.3 From 94dac22e72e891b16e8e8abbdb6df10f322d20e0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 17 Apr 2014 10:51:05 +0800 Subject: f2fs: introduce raw_nat_from_node_info() to simplfy codes This patch introduce raw_nat_from_node_info() to simplfy some codes, and also use exist function node_info_from_raw_nat() to do the same job. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 15 +++------------ fs/f2fs/node.h | 8 ++++++++ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 38fe5f534e7d..837f5fdabd19 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -179,9 +179,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); - nat_set_ino(e, le32_to_cpu(ne->ino)); - nat_set_version(e, ne->version); + node_info_from_raw_nat(&e->ni, ne); } write_unlock(&nm_i->nat_tree_lock); } @@ -1755,9 +1753,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); - nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); - nat_set_version(ne, raw_ne.version); + node_info_from_raw_nat(&ne->ni, &raw_ne); __set_nat_cache_dirty(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); } @@ -1790,7 +1786,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t nid; struct f2fs_nat_entry raw_ne; int offset = -1; - block_t new_blkaddr; if (nat_get_blkaddr(ne) == NEW_ADDR) continue; @@ -1826,11 +1821,7 @@ to_nat_page: f2fs_bug_on(!nat_blk); raw_ne = nat_blk->entries[nid - start_nid]; flush_now: - new_blkaddr = nat_get_blkaddr(ne); - - raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); - raw_ne.block_addr = cpu_to_le32(new_blkaddr); - raw_ne.version = nat_get_version(ne); + raw_nat_from_node_info(&raw_ne, &ne->ni); if (offset < 0) { nat_blk->entries[nid - start_nid] = raw_ne; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5decc1a375f0..41bb65b1c97d 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -75,6 +75,14 @@ static inline void node_info_from_raw_nat(struct node_info *ni, ni->version = raw_ne->version; } +static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, + struct node_info *ni) +{ + raw_ne->ino = cpu_to_le32(ni->ino); + raw_ne->block_addr = cpu_to_le32(ni->blk_addr); + raw_ne->version = ni->version; +} + enum nid_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES /* indicates the cached nat entry */ -- cgit v1.2.3 From b49ad51e6d4fa665a0ab587798d36bebfdfa54a9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Thu, 17 Apr 2014 17:51:06 +0200 Subject: f2fs: add static to get_max_meta_blks inline get_max_meta_blks is only used in checkpoint.c Use standard static inline format. Cc: Jaegeuk Kim Cc: Andrew Morton Signed-off-by: Fabian Frederick Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 744c68be2e15..1346ce916b84 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -73,7 +73,7 @@ out: return page; } -inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) { switch (type) { case META_NAT: -- cgit v1.2.3 From 7ee0eeabcdbb1610d7bc75d1132a43e0c7f7ef28 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 18 Apr 2014 11:14:37 +0900 Subject: f2fs: add available_nids to fix handling max_nid correctly This patch introduces available_nids for alloc_nids() and fixes max_nid for build_free_nids() and scan_nat_pages(). Signed-off-by: Chao Yu Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 55152de1118c..556d06b67e6a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -244,6 +244,7 @@ static inline void set_raw_extent(struct extent_info *ext, struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ + nid_t available_nids; /* maximum available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ unsigned int ram_thresh; /* control the memory footprint */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 837f5fdabd19..6ebdba151f12 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1447,7 +1447,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: - if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) + if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) return false; spin_lock(&nm_i->free_nid_list_lock); @@ -1859,8 +1859,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3; + nm_i->available_nids = nm_i->max_nid - 3; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; -- cgit v1.2.3 From e8271fa3908de52937d298b339f9f7984c491cc6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 18 Apr 2014 15:21:04 +0900 Subject: f2fs: avoid BUG_ON when mouting corrupted image having garbage blocks If the disk has some garbage blocks, F2FS is able to face with BUG_ON when recovering direct node blocks. This patch detects the error case and avoids that prior to reaching BUG_ON. Alexey Khoroshilov addressed the potential security issues as follows. "An ability to trigger a BUG_ON assert by mounting a crafted image is usually considered as a local denial of service [1-3]. As far as I understand, the reason is that some kernel data may become inconsistent that can lead to further problems. [1] http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2011-3353 [2] http://www.openwall.com/lists/oss-security/2011/06/24/4 [3] http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2011-2928 etc." Reported-by: Andrey Tsyvarev Cc: Alexey Khoroshilov Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6ebdba151f12..64755f49d6e7 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1609,6 +1609,11 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct node_info old_ni, new_ni; struct page *ipage; + get_node_info(sbi, ino, &old_ni); + + if (unlikely(old_ni.blk_addr != NULL_ADDR)) + return -EINVAL; + ipage = grab_cache_page(NODE_MAPPING(sbi), ino); if (!ipage) return -ENOMEM; @@ -1616,7 +1621,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); - get_node_info(sbi, ino, &old_ni); SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); -- cgit v1.2.3 From 6fb03f3a40805a412c9b285010ffdc2e7563f81b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 16 Apr 2014 10:47:06 +0900 Subject: f2fs: adjust free mem size to flush dentry blocks If so many dirty dentry blocks are cached, not reached to the flush condition, we should fall into livelock in balance_dirty_pages. So, let's consider the mem size for the condition. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 44 ++++++++++++++++++++++++++------------------ fs/f2fs/node.h | 5 +++-- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b5cd6d1c9320..6b89b2517edf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -863,7 +863,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, return 0; if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && - get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA)) + get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && + available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; diff = nr_pages_to_write(sbi, DATA, wbc); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 556d06b67e6a..97da71d96ce3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1143,6 +1143,7 @@ f2fs_hash_t f2fs_dentry_hash(const char *, size_t); struct dnode_of_data; struct node_info; +bool available_free_memory(struct f2fs_sb_info *, int); int is_checkpointed_node(struct f2fs_sb_info *, nid_t); bool fsync_mark_done(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 64755f49d6e7..2803ef6cf533 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -26,20 +26,26 @@ static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; -static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type) +bool available_free_memory(struct f2fs_sb_info *sbi, int type) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; unsigned long mem_size = 0; + bool res = false; si_meminfo(&val); - if (type == FREE_NIDS) - mem_size = nm_i->fcnt * sizeof(struct free_nid); - else if (type == NAT_ENTRIES) - mem_size += nm_i->nat_cnt * sizeof(struct nat_entry); - mem_size >>= 12; - - /* give 50:50 memory for free nids and nat caches respectively */ - return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11)); + /* give 25%, 25%, 50% memory for each components respectively */ + if (type == FREE_NIDS) { + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == NAT_ENTRIES) { + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == DIRTY_DENTS) { + mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + } + return res; } static void clear_node_page_dirty(struct page *page) @@ -241,7 +247,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (available_free_memory(nm_i, NAT_ENTRIES)) + if (available_free_memory(sbi, NAT_ENTRIES)) return 0; write_lock(&nm_i->nat_tree_lock); @@ -1310,13 +1316,14 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, radix_tree_delete(&nm_i->free_nid_root, i->nid); } -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; struct nat_entry *ne; bool allocated = false; - if (!available_free_memory(nm_i, FREE_NIDS)) + if (!available_free_memory(sbi, FREE_NIDS)) return -1; /* 0 nid should not be used */ @@ -1369,9 +1376,10 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void scan_nat_page(struct f2fs_nm_info *nm_i, +static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; int i; @@ -1386,7 +1394,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); f2fs_bug_on(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { - if (add_free_nid(nm_i, start_nid, true) < 0) + if (add_free_nid(sbi, start_nid, true) < 0) break; } } @@ -1410,7 +1418,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) while (1) { struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(nm_i, page, nid); + scan_nat_page(sbi, page, nid); f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); @@ -1430,7 +1438,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); nid = le32_to_cpu(nid_in_journal(sum, i)); if (addr == NULL_ADDR) - add_free_nid(nm_i, nid, true); + add_free_nid(sbi, nid, true); else remove_free_nid(nm_i, nid); } @@ -1507,7 +1515,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(!i || i->state != NID_ALLOC); - if (!available_free_memory(nm_i, FREE_NIDS)) { + if (!available_free_memory(sbi, FREE_NIDS)) { __del_from_free_nid_list(nm_i, i); need_free = true; } else { @@ -1835,7 +1843,7 @@ flush_now: } if (nat_get_blkaddr(ne) == NULL_ADDR && - add_free_nid(NM_I(sbi), nid, false) <= 0) { + add_free_nid(sbi, nid, false) <= 0) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 41bb65b1c97d..a076c88bdca5 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -83,9 +83,10 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, raw_ne->version = ni->version; } -enum nid_type { +enum mem_type { FREE_NIDS, /* indicates the free nid list */ - NAT_ENTRIES /* indicates the cached nat entry */ + NAT_ENTRIES, /* indicates the cached nat entry */ + DIRTY_DENTS /* indicates dirty dentry pages */ }; /* -- cgit v1.2.3 From 454ae7e519f90db11c0ae082df12a162d2e206ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 22 Apr 2014 13:34:01 +0800 Subject: f2fs: handle inline data independently in f2fs_bmap We'd better handle inline data case independently in f2fs_bmap(). It can reduce our handling time in f2fs_bmap(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6b89b2517edf..150c12ace4af 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1058,6 +1058,11 @@ static int f2fs_set_data_page_dirty(struct page *page) static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { + struct inode *inode = mapping->host; + + if (f2fs_has_inline_data(inode)) + return 0; + return generic_block_bmap(mapping, block, get_data_block); } -- cgit v1.2.3 From e112326805a50ee2016faf0971660122471f7c04 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Apr 2014 12:17:25 +0900 Subject: f2fs: clean up long variable names This patch includes simple clean-ups to reduce unnecessary long variable names. Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 86 +++++++++++++++++++++++++++++---------------------------- fs/f2fs/xattr.h | 6 ++-- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 503c2451131e..1de057d9e7a7 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -26,7 +26,7 @@ #include "xattr.h" static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) + size_t list_size, const char *name, size_t len, int type) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); int total_len, prefix_len = 0; @@ -53,11 +53,11 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, return -EINVAL; } - total_len = prefix_len + name_len + 1; + total_len = prefix_len + len + 1; if (list && total_len <= list_size) { memcpy(list, prefix, prefix_len); - memcpy(list + prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; + memcpy(list + prefix_len, name, len); + list[prefix_len + len] = '\0'; } return total_len; } @@ -112,7 +112,7 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, } static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) + size_t list_size, const char *name, size_t len, int type) { const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; size_t size; @@ -155,9 +155,10 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, } #ifdef CONFIG_F2FS_FS_SECURITY -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, +static int __f2fs_setxattr(struct inode *inode, int index, + const char *name, const void *value, size_t size, struct page *ipage); + static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *page) { @@ -241,26 +242,26 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { NULL, }; -static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +static inline const struct xattr_handler *f2fs_xattr_handler(int index) { const struct xattr_handler *handler = NULL; - if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) - handler = f2fs_xattr_handler_map[name_index]; + if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map)) + handler = f2fs_xattr_handler_map[index]; return handler; } -static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, - size_t name_len, const char *name) +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; list_for_each_xattr(entry, base_addr) { - if (entry->e_name_index != name_index) + if (entry->e_name_index != index) continue; - if (entry->e_name_len != name_len) + if (entry->e_name_len != len) continue; - if (!memcmp(entry->e_name, name, name_len)) + if (!memcmp(entry->e_name, name, len)) break; } return entry; @@ -396,42 +397,43 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return 0; } -int f2fs_getxattr(struct inode *inode, int name_index, const char *name, +int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size) { struct f2fs_xattr_entry *entry; void *base_addr; int error = 0; - size_t value_len, name_len; + size_t size, len; if (name == NULL) return -EINVAL; - name_len = strlen(name); - if (name_len > F2FS_NAME_LEN) + + len = strlen(name); + if (len > F2FS_NAME_LEN) return -ERANGE; base_addr = read_all_xattrs(inode, NULL); if (!base_addr) return -ENOMEM; - entry = __find_xattr(base_addr, name_index, name_len, name); + entry = __find_xattr(base_addr, index, len, name); if (IS_XATTR_LAST_ENTRY(entry)) { error = -ENODATA; goto cleanup; } - value_len = le16_to_cpu(entry->e_value_size); + size = le16_to_cpu(entry->e_value_size); - if (buffer && value_len > buffer_size) { + if (buffer && size > buffer_size) { error = -ERANGE; goto cleanup; } if (buffer) { char *pval = entry->e_name + entry->e_name_len; - memcpy(buffer, pval, value_len); + memcpy(buffer, pval, size); } - error = value_len; + error = size; cleanup: kzfree(base_addr); @@ -475,15 +477,15 @@ cleanup: return error; } -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, +static int __f2fs_setxattr(struct inode *inode, int index, + const char *name, const void *value, size_t size, struct page *ipage) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; void *base_addr; int found, newsize; - size_t name_len; + size_t len; __u32 new_hsize; int error = -ENOMEM; @@ -491,11 +493,11 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, return -EINVAL; if (value == NULL) - value_len = 0; + size = 0; - name_len = strlen(name); + len = strlen(name); - if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) + if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode)) return -ERANGE; base_addr = read_all_xattrs(inode, ipage); @@ -503,7 +505,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, goto exit; /* find entry with wanted name. */ - here = __find_xattr(base_addr, name_index, name_len, name); + here = __find_xattr(base_addr, index, len, name); found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; last = here; @@ -511,8 +513,7 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, while (!IS_XATTR_LAST_ENTRY(last)) last = XATTR_NEXT_ENTRY(last); - newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + - name_len + value_len); + newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size); /* 1. Check space */ if (value) { @@ -555,12 +556,12 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, * We just write new entry. */ memset(last, 0, newsize); - last->e_name_index = name_index; - last->e_name_len = name_len; - memcpy(last->e_name, name, name_len); - pval = last->e_name + name_len; - memcpy(pval, value, value_len); - last->e_value_size = cpu_to_le16(value_len); + last->e_name_index = index; + last->e_name_len = len; + memcpy(last->e_name, name, len); + pval = last->e_name + len; + memcpy(pval, value, size); + last->e_value_size = cpu_to_le16(size); new_hsize += newsize; } @@ -583,8 +584,9 @@ exit: return error; } -int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, struct page *ipage) +int f2fs_setxattr(struct inode *inode, int index, const char *name, + const void *value, size_t size, + struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int err; @@ -594,7 +596,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, f2fs_lock_op(sbi); /* protect xattr_ver */ down_write(&F2FS_I(inode)->i_sem); - err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); + err = __f2fs_setxattr(inode, index, name, value, size, ipage); up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index b21d9ebdeff3..79bc2bb32aeb 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -120,12 +120,12 @@ extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else #define f2fs_xattr_handlers NULL -static inline int f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len) +static inline int f2fs_setxattr(struct inode *inode, int index, + const char *name, const void *value, size_t size) { return -EOPNOTSUPP; } -static inline int f2fs_getxattr(struct inode *inode, int name_index, +static inline int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size) { return -EOPNOTSUPP; -- cgit v1.2.3 From c02745ef684f9a6f98f30388ed048ee460db5483 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Apr 2014 12:23:14 +0900 Subject: f2fs: pass flags field to setxattr functions This patch passes the "flags" field to the low level setxattr functions to use XATTR_REPLACE in the following patches. Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 2 +- fs/f2fs/xattr.c | 13 +++++++------ fs/f2fs/xattr.h | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index e93e4ec7d165..dbe2141d10ad 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, } } - error = f2fs_setxattr(inode, name_index, "", value, size, ipage); + error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); kfree(value); if (!error) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 1de057d9e7a7..bbe9c2badf5d 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -108,7 +108,8 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); + return f2fs_setxattr(dentry->d_inode, type, name, + value, size, NULL, flags); } static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, @@ -157,7 +158,7 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, #ifdef CONFIG_F2FS_FS_SECURITY static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *ipage); + struct page *ipage, int); static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *page) @@ -168,7 +169,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, for (xattr = xattr_array; xattr->name != NULL; xattr++) { err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, - xattr->value_len, (struct page *)page); + xattr->value_len, (struct page *)page, 0); if (err < 0) break; } @@ -479,7 +480,7 @@ cleanup: static int __f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *ipage) + struct page *ipage, int flags) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; @@ -586,7 +587,7 @@ exit: int f2fs_setxattr(struct inode *inode, int index, const char *name, const void *value, size_t size, - struct page *ipage) + struct page *ipage, int flags) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int err; @@ -596,7 +597,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_lock_op(sbi); /* protect xattr_ver */ down_write(&F2FS_I(inode)->i_sem); - err = __f2fs_setxattr(inode, index, name, value, size, ipage); + err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 79bc2bb32aeb..34ab7dbcf5e3 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -114,14 +114,14 @@ extern const struct xattr_handler f2fs_xattr_security_handler; extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, - const void *, size_t, struct page *); + const void *, size_t, struct page *, int); extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else #define f2fs_xattr_handlers NULL static inline int f2fs_setxattr(struct inode *inode, int index, - const char *name, const void *value, size_t size) + const char *name, const void *value, size_t size, int flags) { return -EOPNOTSUPP; } -- cgit v1.2.3 From 916decbf3913ccdc68f0228e001a6d270cf5682e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 23 Apr 2014 12:28:18 +0900 Subject: f2fs: return errors right after checking them This patch adds two error conditions early in the setxattr operations. Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index bbe9c2badf5d..6073f9f88416 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -509,8 +509,16 @@ static int __f2fs_setxattr(struct inode *inode, int index, here = __find_xattr(base_addr, index, len, name); found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; - last = here; + if ((flags & XATTR_REPLACE) && !found) { + error = -ENODATA; + goto exit; + } else if ((flags & XATTR_CREATE) && found) { + error = -EEXIST; + goto exit; + } + + last = here; while (!IS_XATTR_LAST_ENTRY(last)) last = XATTR_NEXT_ENTRY(last); -- cgit v1.2.3 From 2aea39eca6b68d6ae7eb545332df0695f56a3d3f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 Apr 2014 09:49:52 +0900 Subject: f2fs: submit bio at the reclaim path If f2fs_write_data_page is called through the reclaim path, we should submit the bio right away. This patch resolves the following issue that Marc Dietrich reported. "It took me a while to bisect a problem which causes my ARM (tegra2) netbook to frequently stall for 5-10 seconds when I enable EXA acceleration (opentegra experimental ddx)." And this patch fixes that. Reported-by: Marc Dietrich Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 150c12ace4af..0147de7e3973 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -833,6 +833,8 @@ out: unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, DATA, WRITE); return 0; redirty_out: -- cgit v1.2.3 From 6403eb1f646a49cc92f25c08f8716f8870a4a865 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 26 Apr 2014 19:59:52 +0800 Subject: f2fs: introduce help macro ADDRS_PER_PAGE() Introduce help macro ADDRS_PER_PAGE() to get the number of address pointers in direct node or inode. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 ++---- fs/f2fs/file.c | 5 +---- fs/f2fs/recovery.c | 5 +---- include/linux/f2fs_fs.h | 3 +++ 4 files changed, 7 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0147de7e3973..273fe1631af9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -652,8 +652,7 @@ static int get_data_block(struct inode *inode, sector_t iblock, goto put_out; } - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); bh_result->b_size = (((size_t)1) << blkbits); dn.ofs_in_node++; pgofs++; @@ -675,8 +674,7 @@ get_next: if (dn.data_blkaddr == NEW_ADDR) goto put_out; - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } if (maxblocks > (bh_result->b_size >> blkbits)) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60e7d5448a1d..bb365c932555 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -288,10 +288,7 @@ int truncate_blocks(struct inode *inode, u64 from) return err; } - if (IS_INODE(dn.node_page)) - count = ADDRS_PER_INODE(F2FS_I(inode)); - else - count = ADDRS_PER_BLOCK; + count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; f2fs_bug_on(count < 0); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9eb6487f383d..be1e3e881725 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -301,10 +301,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, goto out; start = start_bidx_of_node(ofs_of_node(page), fi); - if (IS_INODE(page)) - end = start + ADDRS_PER_INODE(fi); - else - end = start + ADDRS_PER_BLOCK; + end = start + ADDRS_PER_PAGE(page, fi); f2fs_lock_op(sbi); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index df53e1753a76..8c03f71307c6 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -146,6 +146,9 @@ struct f2fs_extent { #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +#define ADDRS_PER_PAGE(page, fi) \ + (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK) + #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) #define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) -- cgit v1.2.3 From a688b9d9e5cbec76edab74e724297b5488c07829 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Sun, 27 Apr 2014 14:21:21 +0800 Subject: f2fs: introduce struct flush_cmd_control to wrap the flush_merge fields Split the flush_merge fields from sm_i, and use the new struct flush_cmd_control to wrap it, so that we can igonre these fileds if flush_merge is disable, and it alse can the structs more neat. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 17 +++++++++----- fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++---------------------- fs/f2fs/super.c | 46 ++++++++++++++++++++----------------- 3 files changed, 78 insertions(+), 54 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 97da71d96ce3..fa0ec8116f48 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -325,6 +325,15 @@ struct flush_cmd { int ret; }; +struct flush_cmd_control { + struct task_struct *f2fs_issue_flush; /* flush thread */ + wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ + struct flush_cmd *issue_list; /* list for command issue */ + struct flush_cmd *dispatch_list; /* list for command dispatch */ + spinlock_t issue_lock; /* for issue list lock */ + struct flush_cmd *issue_tail; /* list tail of issue list */ +}; + struct f2fs_sm_info { struct sit_info *sit_info; /* whole segment information */ struct free_segmap_info *free_info; /* free segment information */ @@ -355,12 +364,8 @@ struct f2fs_sm_info { unsigned int min_ipu_util; /* in-place-update threshold */ /* for flush command control */ - struct task_struct *f2fs_issue_flush; /* flush thread */ - wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ - struct flush_cmd *issue_list; /* list for command issue */ - struct flush_cmd *dispatch_list; /* list for command dispatch */ - spinlock_t issue_lock; /* for issue list lock */ - struct flush_cmd *issue_tail; /* list tail of issue list */ + struct flush_cmd_control *cmd_control_info; + }; /* diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f6816e18db98..9ac4f861c6f6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -200,20 +200,20 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; - struct f2fs_sm_info *sm_i = SM_I(sbi); - wait_queue_head_t *q = &sm_i->flush_wait_queue; + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + wait_queue_head_t *q = &fcc->flush_wait_queue; repeat: if (kthread_should_stop()) return 0; - spin_lock(&sm_i->issue_lock); - if (sm_i->issue_list) { - sm_i->dispatch_list = sm_i->issue_list; - sm_i->issue_list = sm_i->issue_tail = NULL; + spin_lock(&fcc->issue_lock); + if (fcc->issue_list) { + fcc->dispatch_list = fcc->issue_list; + fcc->issue_list = fcc->issue_tail = NULL; } - spin_unlock(&sm_i->issue_lock); + spin_unlock(&fcc->issue_lock); - if (sm_i->dispatch_list) { + if (fcc->dispatch_list) { struct bio *bio = bio_alloc(GFP_NOIO, 0); struct flush_cmd *cmd, *next; int ret; @@ -221,22 +221,23 @@ repeat: bio->bi_bdev = sbi->sb->s_bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); - for (cmd = sm_i->dispatch_list; cmd; cmd = next) { + for (cmd = fcc->dispatch_list; cmd; cmd = next) { cmd->ret = ret; next = cmd->next; complete(&cmd->wait); } bio_put(bio); - sm_i->dispatch_list = NULL; + fcc->dispatch_list = NULL; } - wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list); + wait_event_interruptible(*q, + kthread_should_stop() || fcc->issue_list); goto repeat; } int f2fs_issue_flush(struct f2fs_sb_info *sbi) { - struct f2fs_sm_info *sm_i = SM_I(sbi); + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; struct flush_cmd *cmd; int ret; @@ -246,16 +247,16 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC | __GFP_ZERO); init_completion(&cmd->wait); - spin_lock(&sm_i->issue_lock); - if (sm_i->issue_list) - sm_i->issue_tail->next = cmd; + spin_lock(&fcc->issue_lock); + if (fcc->issue_list) + fcc->issue_tail->next = cmd; else - sm_i->issue_list = cmd; - sm_i->issue_tail = cmd; - spin_unlock(&sm_i->issue_lock); + fcc->issue_list = cmd; + fcc->issue_tail = cmd; + spin_unlock(&fcc->issue_lock); - if (!sm_i->dispatch_list) - wake_up(&sm_i->flush_wait_queue); + if (!fcc->dispatch_list) + wake_up(&fcc->flush_wait_queue); wait_for_completion(&cmd->wait); ret = cmd->ret; @@ -1873,12 +1874,22 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->max_discards = 0; if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { - spin_lock_init(&sm_info->issue_lock); - init_waitqueue_head(&sm_info->flush_wait_queue); - sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, + struct flush_cmd_control *fcc = + kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + + if (!fcc) + return -ENOMEM; + spin_lock_init(&fcc->issue_lock); + init_waitqueue_head(&fcc->flush_wait_queue); + + fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(sm_info->f2fs_issue_flush)) - return PTR_ERR(sm_info->f2fs_issue_flush); + if (IS_ERR(fcc->f2fs_issue_flush)) { + err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); + return err; + } + sm_info->cmd_control_info = fcc; } err = build_sit_info(sbi); @@ -1987,10 +1998,14 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) void destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); + struct flush_cmd_control *fcc; + if (!sm_info) return; - if (sm_info->f2fs_issue_flush) - kthread_stop(sm_info->f2fs_issue_flush); + fcc = sm_info->cmd_control_info; + if (fcc && fcc->f2fs_issue_flush) + kthread_stop(fcc->f2fs_issue_flush); + kfree(fcc); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index bea642aec0fe..a7ed92e2948a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -641,29 +641,33 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if flush_merge is not passed in mount option. */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { - struct f2fs_sm_info *sm_info = sbi->sm_info; - - if (sm_info->f2fs_issue_flush) - kthread_stop(sm_info->f2fs_issue_flush); - sm_info->issue_list = sm_info->dispatch_list = NULL; - sm_info->f2fs_issue_flush = NULL; - } else if (test_opt(sbi, FLUSH_MERGE)) { - struct f2fs_sm_info *sm_info = sbi->sm_info; - - if (!sm_info->f2fs_issue_flush) { - dev_t dev = sbi->sb->s_bdev->bd_dev; - - spin_lock_init(&sm_info->issue_lock); - init_waitqueue_head(&sm_info->flush_wait_queue); - sm_info->f2fs_issue_flush = - kthread_run(issue_flush_thread, sbi, + struct flush_cmd_control *fcc = + sbi->sm_info->cmd_control_info; + + if (fcc && fcc->f2fs_issue_flush) + kthread_stop(fcc->f2fs_issue_flush); + kfree(fcc); + sbi->sm_info->cmd_control_info = NULL; + } else if (test_opt(sbi, FLUSH_MERGE) && + !sbi->sm_info->cmd_control_info) { + dev_t dev = sbi->sb->s_bdev->bd_dev; + struct flush_cmd_control *fcc = + kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + + if (!fcc) { + err = -ENOMEM; + goto restore_gc; + } + spin_lock_init(&fcc->issue_lock); + init_waitqueue_head(&fcc->flush_wait_queue); + fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(sm_info->f2fs_issue_flush)) { - err = PTR_ERR(sm_info->f2fs_issue_flush); - sm_info->f2fs_issue_flush = NULL; - goto restore_gc; - } + if (IS_ERR(fcc->f2fs_issue_flush)) { + err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); + goto restore_gc; } + sbi->sm_info->cmd_control_info = fcc; } skip: /* Update the POSIXACL Flag */ -- cgit v1.2.3 From 2163d19815b3dfdb243cee2de2478ae7efce1942 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Sun, 27 Apr 2014 14:21:33 +0800 Subject: f2fs: introduce help function {create,destroy}_flush_cmd_control Introduce help function {create,destroy}_flush_cmd_control to clean up the create/destory flush merge operation. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/segment.c | 60 +++++++++++++++++++++++++++++++++++-------------------- fs/f2fs/super.c | 27 +++---------------------- 3 files changed, 43 insertions(+), 47 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fa0ec8116f48..1ca958ab40e3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1184,8 +1184,9 @@ void destroy_node_manager_caches(void); */ void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); -int issue_flush_thread(void *); int f2fs_issue_flush(struct f2fs_sb_info *); +int create_flush_cmd_control(struct f2fs_sb_info *); +void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9ac4f861c6f6..2ecbffb91f37 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -197,7 +197,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) f2fs_sync_fs(sbi->sb, true); } -int issue_flush_thread(void *data) +static int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; @@ -264,6 +264,40 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) return ret; } +int create_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + struct flush_cmd_control *fcc; + int err = 0; + + fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + if (!fcc) + return -ENOMEM; + spin_lock_init(&fcc->issue_lock); + init_waitqueue_head(&fcc->flush_wait_queue); + fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, + "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(fcc->f2fs_issue_flush)) { + err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); + return err; + } + sbi->sm_info->cmd_control_info = fcc; + + return err; +} + +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + struct flush_cmd_control *fcc = + sbi->sm_info->cmd_control_info; + + if (fcc && fcc->f2fs_issue_flush) + kthread_stop(fcc->f2fs_issue_flush); + kfree(fcc); + sbi->sm_info->cmd_control_info = NULL; +} + static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, enum dirty_type dirty_type) { @@ -1845,7 +1879,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - dev_t dev = sbi->sb->s_bdev->bd_dev; struct f2fs_sm_info *sm_info; int err; @@ -1874,22 +1907,9 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->max_discards = 0; if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { - struct flush_cmd_control *fcc = - kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); - - if (!fcc) - return -ENOMEM; - spin_lock_init(&fcc->issue_lock); - init_waitqueue_head(&fcc->flush_wait_queue); - - fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, - "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(fcc->f2fs_issue_flush)) { - err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); + err = create_flush_cmd_control(sbi); + if (err) return err; - } - sm_info->cmd_control_info = fcc; } err = build_sit_info(sbi); @@ -1998,14 +2018,10 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) void destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); - struct flush_cmd_control *fcc; if (!sm_info) return; - fcc = sm_info->cmd_control_info; - if (fcc && fcc->f2fs_issue_flush) - kthread_stop(fcc->f2fs_issue_flush); - kfree(fcc); + destroy_flush_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a7ed92e2948a..b2b18637cb9e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -641,33 +641,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * or if flush_merge is not passed in mount option. */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { - struct flush_cmd_control *fcc = - sbi->sm_info->cmd_control_info; - - if (fcc && fcc->f2fs_issue_flush) - kthread_stop(fcc->f2fs_issue_flush); - kfree(fcc); - sbi->sm_info->cmd_control_info = NULL; + destroy_flush_cmd_control(sbi); } else if (test_opt(sbi, FLUSH_MERGE) && !sbi->sm_info->cmd_control_info) { - dev_t dev = sbi->sb->s_bdev->bd_dev; - struct flush_cmd_control *fcc = - kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); - - if (!fcc) { - err = -ENOMEM; - goto restore_gc; - } - spin_lock_init(&fcc->issue_lock); - init_waitqueue_head(&fcc->flush_wait_queue); - fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, - "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(fcc->f2fs_issue_flush)) { - err = PTR_ERR(fcc->f2fs_issue_flush); - kfree(fcc); + err = create_flush_cmd_control(sbi); + if (err) goto restore_gc; - } - sbi->sm_info->cmd_control_info = fcc; } skip: /* Update the POSIXACL Flag */ -- cgit v1.2.3 From 267378d4de696d4397cd611e62957d19b2a61357 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 23 Apr 2014 14:10:24 +0800 Subject: f2fs: introduce f2fs_seek_block to support SEEK_{DATA, HOLE} in llseek In This patch we introduce f2fs_seek_block to support SEEK_{DATA,HOLE} of lseek(2). change log from v1: o fix bug when lseek from middle of page and fix wrong calculation of PGOFS_OF_NEXT_DNODE macro. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ++++ fs/f2fs/file.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 1ca958ab40e3..e77be7cb1e60 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1079,6 +1079,12 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) +/* get offset of first page in next direct node */ +#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \ + ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \ + (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \ + ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi)) + /* * file.c */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bb365c932555..d99d17325046 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -194,6 +194,96 @@ out: return ret; } +static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxbytes = inode->i_sb->s_maxbytes; + struct dnode_of_data dn; + pgoff_t pgofs, end_offset; + loff_t data_ofs = offset, isize; + int err = 0; + + mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); + if (offset >= isize) + goto fail; + + /* handle inline data case */ + if (f2fs_has_inline_data(inode)) { + if (whence == SEEK_HOLE) + data_ofs = isize; + goto found; + } + + pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT); + + for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + if (err && err != -ENOENT) { + goto fail; + } else if (err == -ENOENT) { + /* direct node is not exist */ + if (whence == SEEK_DATA) { + pgofs = PGOFS_OF_NEXT_DNODE(pgofs, + F2FS_I(inode)); + continue; + } else { + goto found; + } + } + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + + /* find data/hole in dnode block */ + for (; dn.ofs_in_node < end_offset; + dn.ofs_in_node++, pgofs++, + data_ofs = pgofs << PAGE_CACHE_SHIFT) { + block_t blkaddr; + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + + if ((whence == SEEK_DATA && blkaddr != NULL_ADDR) || + (whence == SEEK_HOLE && blkaddr == NULL_ADDR)) { + f2fs_put_dnode(&dn); + goto found; + } + } + f2fs_put_dnode(&dn); + } + + if (whence == SEEK_DATA) + goto fail; + else + data_ofs = isize; +found: + mutex_unlock(&inode->i_mutex); + return vfs_setpos(file, data_ofs, maxbytes); +fail: + mutex_unlock(&inode->i_mutex); + return -ENXIO; +} + +static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxbytes = inode->i_sb->s_maxbytes; + + switch (whence) { + case SEEK_SET: + case SEEK_CUR: + case SEEK_END: + return generic_file_llseek_size(file, offset, whence, + maxbytes, i_size_read(inode)); + case SEEK_DATA: + case SEEK_HOLE: + return f2fs_seek_block(file, offset, whence); + } + + return -EINVAL; +} + static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); @@ -675,7 +765,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) #endif const struct file_operations f2fs_file_operations = { - .llseek = generic_file_llseek, + .llseek = f2fs_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, -- cgit v1.2.3 From fe369bc8ba205537864cb86ba08b390ad20201c4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Apr 2014 17:02:48 +0900 Subject: f2fs: return i_size if the hole is outside of i_size When SEEK_HOLE is requeted, it should return i_size if the hole position is found outside of i_size. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d99d17325046..31128571e284 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -255,9 +255,9 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) if (whence == SEEK_DATA) goto fail; - else - data_ofs = isize; found: + if (whence == SEEK_HOLE && data_ofs > isize) + data_ofs = isize; mutex_unlock(&inode->i_mutex); return vfs_setpos(file, data_ofs, maxbytes); fail: -- cgit v1.2.3 From 7f7670fe9fe47e7e56db658eb8831febe47627f2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 28 Apr 2014 18:12:36 +0900 Subject: f2fs: consider fallocated space for SEEK_DATA If an amount of data are allocated though fallocate and user writes a couple of data among the space, f2fs should return the data offset made by user when SEEK_DATA is requested. For example, (N: NEW_ADDR by fallocate, X: NEW_ADDR by user) 1) fallocate 0 ~ 10MB f -> N N N N N N N N N N N N ... N 2) write 4KB at 5MB offset f -> N N N N N X N N N N N N ... N 3) SEEK_DATA from 0 should return 5MB offset So, this patch adds a routine to search the first dirty page to handle that. Then, the SEEK_DATA flow skips NEW_ADDR offsets until any dirty page is found. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 31128571e284..b9f4fbf5c07e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -194,13 +195,48 @@ out: return ret; } +static pgoff_t __get_first_dirty_index(struct address_space *mapping, + pgoff_t pgofs, int whence) +{ + struct pagevec pvec; + int nr_pages; + + if (whence != SEEK_DATA) + return 0; + + /* find first dirty page index */ + pagevec_init(&pvec, 0); + nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); + pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; + pagevec_release(&pvec); + return pgofs; +} + +static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, + int whence) +{ + switch (whence) { + case SEEK_DATA: + if ((blkaddr == NEW_ADDR && dirty == pgofs) || + (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) + return true; + break; + case SEEK_HOLE: + if (blkaddr == NULL_ADDR) + return true; + break; + } + return false; +} + static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes = inode->i_sb->s_maxbytes; struct dnode_of_data dn; - pgoff_t pgofs, end_offset; - loff_t data_ofs = offset, isize; + pgoff_t pgofs, end_offset, dirty; + loff_t data_ofs = offset; + loff_t isize; int err = 0; mutex_lock(&inode->i_mutex); @@ -218,6 +254,8 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT); + dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence); + for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); @@ -244,8 +282,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); - if ((whence == SEEK_DATA && blkaddr != NULL_ADDR) || - (whence == SEEK_HOLE && blkaddr == NULL_ADDR)) { + if (__found_offset(blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } -- cgit v1.2.3 From 5c1f9927ec1a4753e845193bde99ac3b03b08818 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 28 Apr 2014 17:58:34 +0800 Subject: f2fs: set errno when f2fs_iget failed in recover_dentry We should set the error number correctly when we fail in recover_dentry(), so the recover flow could stop for the reason as error number shows instead of continuing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index be1e3e881725..e950a2f50ac1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -75,7 +75,8 @@ retry: einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { WARN_ON(1); - if (PTR_ERR(einode) == -ENOENT) + err = PTR_ERR(einode); + if (err == -ENOENT) err = -EEXIST; goto out_unmap_put; } -- cgit v1.2.3 From 817202d937e6cca7e60f42e6495aaa51d70d9d7e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 28 Apr 2014 17:59:43 +0800 Subject: f2fs: readahead multi pages of directory for performance We have no so such readahead mechanism in ->iterate() path as the one in ->read() path, it cause low performance when we read large directory. This patch add readahead in f2fs_readdir() for better performance. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 6 ++++++ fs/f2fs/f2fs.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 972fd0ef230f..3581c2bde21b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -637,11 +637,17 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; + struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); unsigned char d_type = DT_UNKNOWN; bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + /* readahead for multi pages of dir */ + if (npages - n > 1 && !ra_has_index(ra, n)) + page_cache_sync_readahead(inode->i_mapping, ra, file, n, + min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); + for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n); if (IS_ERR(dentry_page)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index e77be7cb1e60..2b67679f88b6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -182,6 +182,8 @@ enum { #define F2FS_LINK_MAX 32000 /* maximum link count per file */ +#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ + /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ -- cgit v1.2.3 From 8aa6f1c5bd7043734fff1961a4795da9cc5d0f50 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 29 Apr 2014 09:03:03 +0800 Subject: f2fs: fix to truncate inline data in inode page when setattr Previous we do not truncate inline data in inode page when setattr, so following case could still read the inline data which has already truncated: 1.write inline data 2.ftruncate size to 0 3.ftruncate size to max inline data size 4.read from offset 0 This patch introduces truncate_inline_data() to fix this problem. change log from v1: o fix a bug and do not truncate first page data after truncate inline data. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 3 +++ fs/f2fs/inline.c | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2b67679f88b6..676a2c6ccec7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1410,5 +1410,6 @@ bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_data(struct inode *, pgoff_t); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); +void truncate_inline_data(struct inode *, u64); int recover_inline_data(struct inode *, struct page *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b9f4fbf5c07e..d97e5c458f36 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -369,6 +369,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; + if (f2fs_has_inline_data(inode)) + return truncate_inline_data(inode, from); + if (!offset) return; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 3258c7cf00d5..d215dbb09f07 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -176,6 +176,24 @@ int f2fs_write_inline_data(struct inode *inode, return 0; } +void truncate_inline_data(struct inode *inode, u64 from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *ipage; + + if (from >= MAX_INLINE_DATA) + return; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return; + + zero_user_segment(ipage, INLINE_DATA_OFFSET + from, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + set_page_dirty(ipage); + f2fs_put_page(ipage, 1); +} + int recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); -- cgit v1.2.3 From 54b591dfda1f5ab0bc2a9ce1bee5364110168777 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 29 Apr 2014 17:28:32 +0900 Subject: f2fs: split grab_cache_page and wait_on_page_writeback for node pages This patch splits grab_cache_page_write_begin into grab_cache_page and wait_on_page_writeback for node pages. This patch intends to enhance the latency to get node pages by alleviating unnecessary wait_on_page_writeback. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 ++ fs/f2fs/inline.c | 6 ++++++ fs/f2fs/node.c | 8 ++++---- fs/f2fs/node.h | 2 +- fs/f2fs/xattr.c | 3 +++ 5 files changed, 16 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3581c2bde21b..c3f148555c37 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -268,6 +268,8 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; + f2fs_wait_on_page_writeback(ipage, NODE); + /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); ri->i_namelen = cpu_to_le32(name->len); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d215dbb09f07..8bf34f052f67 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -156,6 +156,7 @@ int f2fs_write_inline_data(struct inode *inode, return err; ipage = dn.inode_page; + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); src_addr = kmap(page); @@ -188,6 +189,8 @@ void truncate_inline_data(struct inode *inode, u64 from) if (IS_ERR(ipage)) return; + f2fs_wait_on_page_writeback(ipage, NODE); + zero_user_segment(ipage, INLINE_DATA_OFFSET + from, INLINE_DATA_OFFSET + MAX_INLINE_DATA); set_page_dirty(ipage); @@ -218,6 +221,8 @@ process_inline: ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + f2fs_wait_on_page_writeback(ipage, NODE); + src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); @@ -229,6 +234,7 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2803ef6cf533..059aaf5dda2b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -853,8 +853,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - dn->nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); if (!page) return ERR_PTR(-ENOMEM); @@ -871,6 +870,7 @@ struct page *new_node_page(struct dnode_of_data *dn, new_ni.ino = dn->inode->i_ino; set_node_addr(sbi, &new_ni, NEW_ADDR, false); + f2fs_wait_on_page_writeback(page, NODE); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); SetPageUptodate(page); @@ -950,8 +950,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) struct page *page; int err; repeat: - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) return ERR_PTR(-ENOMEM); @@ -1562,6 +1561,7 @@ static void recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(page); inline_size = inline_xattr_size(inode); + f2fs_wait_on_page_writeback(ipage, NODE); memcpy(dst_addr, src_addr, inline_size); update_inode(inode, ipage); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index a076c88bdca5..4ee29d506f8c 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -272,7 +272,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - wait_on_page_writeback(p); + f2fs_wait_on_page_writeback(p, NODE); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 6073f9f88416..1f546b4b6b61 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -349,6 +349,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (ipage) { inline_addr = inline_xattr_addr(ipage); + f2fs_wait_on_page_writeback(ipage, NODE); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -356,6 +357,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return PTR_ERR(page); } inline_addr = inline_xattr_addr(page); + f2fs_wait_on_page_writeback(page, NODE); } memcpy(inline_addr, txattr_addr, inline_size); f2fs_put_page(page, 1); @@ -376,6 +378,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return PTR_ERR(xpage); } f2fs_bug_on(new_nid); + f2fs_wait_on_page_writeback(xpage, NODE); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); -- cgit v1.2.3 From 9ac1349ad7e57013d6aa171cb014ad4166a1b50c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 29 Apr 2014 17:35:10 +0900 Subject: f2fs: avoid grab_cache_page_write_begin for data pages We don't need to wait on page writeback for these cases. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/inline.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 273fe1631af9..91ff104b3849 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -417,7 +417,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (unlikely(dn.data_blkaddr == NEW_ADDR)) return ERR_PTR(-EINVAL); - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -455,7 +455,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; repeat: - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8bf34f052f67..1bba5228c197 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -95,6 +95,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) if (err) goto out; + f2fs_wait_on_page_writeback(page, DATA); zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ @@ -133,7 +134,7 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) else if (to_size <= MAX_INLINE_DATA) return 0; - page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS); + page = grab_cache_page(inode->i_mapping, 0); if (!page) return -ENOMEM; -- cgit v1.2.3 From bde446866c9200fcfa27ccde213d83db9a6827db Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Apr 2014 09:18:53 +0900 Subject: f2fs: no need to wait on page writebck to meta pages This patch removes grab_cache_page_write_begin for meta pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 1346ce916b84..4206f1664be5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -33,12 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; repeat: - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) { cond_resched(); goto repeat; } - + f2fs_wait_on_page_writeback(page, META); SetPageUptodate(page); return page; } -- cgit v1.2.3 From d5f66990bb928e7490ba4da94d585f618adcee5e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Apr 2014 09:22:45 +0900 Subject: f2fs: decrease the lock granularity during write_begin This patch reduces the lock granularity during write_begin. When the system is under memory pressure, it would be better to reduce the locking time for the data pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 91ff104b3849..a7bb98f5831a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -909,6 +909,10 @@ repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; + + /* to avoid latency during memory pressure */ + unlock_page(page); + *pagep = page; if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) @@ -920,10 +924,18 @@ repeat: f2fs_unlock_op(sbi); if (err) { - f2fs_put_page(page, 1); + f2fs_put_page(page, 0); return err; } inline_data: + lock_page(page); + if (unlikely(page->mapping != mapping)) { + f2fs_put_page(page, 1); + goto repeat; + } + + f2fs_wait_on_page_writeback(page, DATA); + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; -- cgit v1.2.3 From 8198899b94b74bb16c0b240483fd81269b199d2e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 30 Apr 2014 15:04:39 +0900 Subject: f2fs: deactivate inode page if the inode is evicted If the inode page is clean during its inode eviction, it'd better drop the page to reduce further memory pressure. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index f7a655373c46..adc622c6bdce 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -295,4 +295,5 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: clear_inode(inode); + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); } -- cgit v1.2.3 From 8b376249e7e659c41896e0d3e9b25d1230a4e7f6 Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Sun, 4 May 2014 16:37:06 +0800 Subject: f2fs: fix checkpatch warning fix the following checkpatch warning: WARNING: do {} while (0) macros should not be semicolon terminated Signed-off-by: Zhang Zhen Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 4ee29d506f8c..7281112cd1c8 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -59,12 +59,12 @@ struct nat_entry { do { \ ne->checkpointed = false; \ list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ - } while (0); + } while (0) #define __clear_nat_cache_dirty(nm_i, ne) \ do { \ ne->checkpointed = true; \ list_move_tail(&ne->list, &nm_i->nat_entries); \ - } while (0); + } while (0) #define inc_node_version(version) (++version) static inline void node_info_from_raw_nat(struct node_info *ni, -- cgit v1.2.3 From 62aed044ea4bef36ac3f6885611b013b11c9be2d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:46:04 +0800 Subject: f2fs: add a tracepoint for f2fs_write_begin This patch adds a tracepoint for f2fs_write_begin to trace write op of user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ include/trace/events/f2fs.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a7bb98f5831a..cde0d69969b9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -900,6 +900,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct dnode_of_data dn; int err = 0; + trace_f2fs_write_begin(inode, pos, len, flags); + f2fs_balance_fs(sbi); repeat: err = f2fs_convert_inline_data(inode, pos + len); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 67f38faac589..1e9375aa9340 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -659,6 +659,36 @@ DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, TP_CONDITION(bio) ); +TRACE_EVENT(f2fs_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, pos) + __field(unsigned int, len) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u", + show_dev_ino(__entry), + (unsigned long long)__entry->pos, + __entry->len, + __entry->flags) +); + DECLARE_EVENT_CLASS(f2fs__page, TP_PROTO(struct page *page, int type), -- cgit v1.2.3 From dfb2bf38bf89e15a65f9996566b2945921388a2f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:47:23 +0800 Subject: f2fs: add a tracepoint for f2fs_write_end This patch adds a tracepoint for f2fs_write_end to trace write op of user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ include/trace/events/f2fs.h | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cde0d69969b9..ea58fb698ac6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -989,6 +989,8 @@ static int f2fs_write_end(struct file *file, { struct inode *inode = page->mapping->host; + trace_f2fs_write_end(inode, pos, len, copied); + SetPageUptodate(page); set_page_dirty(page); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1e9375aa9340..483804b55742 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -689,6 +689,36 @@ TRACE_EVENT(f2fs_write_begin, __entry->flags) ); +TRACE_EVENT(f2fs_write_end, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, pos) + __field(unsigned int, len) + __field(unsigned int, copied) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->copied = copied; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u", + show_dev_ino(__entry), + (unsigned long long)__entry->pos, + __entry->len, + __entry->copied) +); + DECLARE_EVENT_CLASS(f2fs__page, TP_PROTO(struct page *page, int type), -- cgit v1.2.3 From ecda0de3430455378f1c02523bf3ad71d91d613a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:48:26 +0800 Subject: f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page This patch adds a tracepoint for f2fs_write_{meta,node,data}_page to trace when page is writting out. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/data.c | 2 ++ fs/f2fs/node.c | 2 ++ include/trace/events/f2fs.h | 7 +++++++ 4 files changed, 13 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 4206f1664be5..c95d62281d7e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -157,6 +157,8 @@ static int f2fs_write_meta_page(struct page *page, struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + trace_f2fs_writepage(page, META); + if (unlikely(sbi->por_doing)) goto redirty_out; if (wbc->for_reclaim) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ea58fb698ac6..b997d552880e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -788,6 +788,8 @@ static int f2fs_write_data_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, DATA); + if (page->index < end_index) goto write; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 059aaf5dda2b..49bdddbcadea 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1199,6 +1199,8 @@ static int f2fs_write_node_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, NODE); + if (unlikely(sbi->por_doing)) goto redirty_out; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 483804b55742..d70991e69e58 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -751,6 +751,13 @@ DECLARE_EVENT_CLASS(f2fs__page, __entry->dirty) ); +DEFINE_EVENT(f2fs__page, f2fs_writepage, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty, TP_PROTO(struct page *page, int type), -- cgit v1.2.3 From e57484343898094bb8f72a2aa1a50929d27aa027 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:51:24 +0800 Subject: f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages This patch adds a tracepoint for f2fs_write_{meta,node,data}_pages to trace when pages are fsyncing/flushing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 ++ fs/f2fs/data.c | 2 ++ fs/f2fs/node.c | 2 ++ include/trace/events/f2fs.h | 64 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c95d62281d7e..fe968c7bfc90 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -186,6 +186,8 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff, written; + trace_f2fs_writepages(mapping->host, wbc, META); + /* collect a number of dirty meta pages and write together */ if (wbc->for_kupdate || get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b997d552880e..21bfafaafe83 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -860,6 +860,8 @@ static int f2fs_write_data_pages(struct address_space *mapping, int ret; long diff; + trace_f2fs_writepages(mapping->host, wbc, DATA); + /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 49bdddbcadea..3d60d3d34ed2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1242,6 +1242,8 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff; + trace_f2fs_writepages(mapping->host, wbc, NODE); + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index d70991e69e58..91b1fcc5ec93 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -772,6 +772,70 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); +TRACE_EVENT(f2fs_writepages, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type), + + TP_ARGS(inode, wbc, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, type) + __field(int, dir) + __field(long, nr_to_write) + __field(long, pages_skipped) + __field(loff_t, range_start) + __field(loff_t, range_end) + __field(pgoff_t, writeback_index) + __field(int, sync_mode) + __field(char, for_kupdate) + __field(char, for_background) + __field(char, tagged_writepages) + __field(char, for_reclaim) + __field(char, range_cyclic) + __field(char, for_sync) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->type = type; + __entry->dir = S_ISDIR(inode->i_mode); + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->range_start = wbc->range_start; + __entry->range_end = wbc->range_end; + __entry->writeback_index = inode->i_mapping->writeback_index; + __entry->sync_mode = wbc->sync_mode; + __entry->for_kupdate = wbc->for_kupdate; + __entry->for_background = wbc->for_background; + __entry->tagged_writepages = wbc->tagged_writepages; + __entry->for_reclaim = wbc->for_reclaim; + __entry->range_cyclic = wbc->range_cyclic; + __entry->for_sync = wbc->for_sync; + ), + + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, " + "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, " + "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u", + show_dev_ino(__entry), + show_block_type(__entry->type), + show_file_type(__entry->dir), + __entry->nr_to_write, + __entry->pages_skipped, + __entry->range_start, + __entry->range_end, + (unsigned long)__entry->writeback_index, + __entry->sync_mode, + __entry->for_kupdate, + __entry->for_background, + __entry->tagged_writepages, + __entry->for_reclaim, + __entry->range_cyclic, + __entry->for_sync) +); + TRACE_EVENT(f2fs_submit_page_mbio, TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), -- cgit v1.2.3 From c20e89cde669799eff62bf8c00ca9a4819c4e11f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 May 2014 16:53:08 +0800 Subject: f2fs: add a tracepoint for f2fs_read_data_page This patch adds a tracepoint for f2fs_read_data_page to trace when page is readed by user. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ include/trace/events/f2fs.h | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 21bfafaafe83..8c250a5d6f26 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -713,6 +713,8 @@ static int f2fs_read_data_page(struct file *file, struct page *page) struct inode *inode = page->mapping->host; int ret; + trace_f2fs_readpage(page, DATA); + /* If the file has inline data, try to read it directlly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 91b1fcc5ec93..b983990b4a9f 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -732,6 +732,7 @@ DECLARE_EVENT_CLASS(f2fs__page, __field(int, dir) __field(pgoff_t, index) __field(int, dirty) + __field(int, uptodate) ), TP_fast_assign( @@ -741,14 +742,17 @@ DECLARE_EVENT_CLASS(f2fs__page, __entry->dir = S_ISDIR(page->mapping->host->i_mode); __entry->index = page->index; __entry->dirty = PageDirty(page); + __entry->uptodate = PageUptodate(page); ), - TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d", + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, " + "dirty = %d, uptodate = %d", show_dev_ino(__entry), show_block_type(__entry->type), show_file_type(__entry->dir), (unsigned long)__entry->index, - __entry->dirty) + __entry->dirty, + __entry->uptodate) ); DEFINE_EVENT(f2fs__page, f2fs_writepage, @@ -758,6 +762,13 @@ DEFINE_EVENT(f2fs__page, f2fs_writepage, TP_ARGS(page, type) ); +DEFINE_EVENT(f2fs__page, f2fs_readpage, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty, TP_PROTO(struct page *page, int type), -- cgit v1.2.3 From adf8d90b6a949dc80e827263fccb31f8eb08a55d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 8 May 2014 17:00:35 +0800 Subject: f2fs: avoid to use slab memory in f2fs_issue_flush for efficiency If we use slab memory in f2fs_issue_flush(), we will face memory pressure and latency time caused by racing of kmem_cache_{alloc,free}. Let's alloc memory in stack instead of slab. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2ecbffb91f37..f25f0e07e26f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -25,7 +25,6 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; -static struct kmem_cache *flush_cmd_slab; /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since @@ -238,30 +237,28 @@ repeat: int f2fs_issue_flush(struct f2fs_sb_info *sbi) { struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; - struct flush_cmd *cmd; - int ret; + struct flush_cmd cmd; if (!test_opt(sbi, FLUSH_MERGE)) return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); - cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC | __GFP_ZERO); - init_completion(&cmd->wait); + init_completion(&cmd.wait); + cmd.next = NULL; spin_lock(&fcc->issue_lock); if (fcc->issue_list) - fcc->issue_tail->next = cmd; + fcc->issue_tail->next = &cmd; else - fcc->issue_list = cmd; - fcc->issue_tail = cmd; + fcc->issue_list = &cmd; + fcc->issue_tail = &cmd; spin_unlock(&fcc->issue_lock); if (!fcc->dispatch_list) wake_up(&fcc->flush_wait_queue); - wait_for_completion(&cmd->wait); - ret = cmd->ret; - kmem_cache_free(flush_cmd_slab, cmd); - return ret; + wait_for_completion(&cmd.wait); + + return cmd.ret; } int create_flush_cmd_control(struct f2fs_sb_info *sbi) @@ -2036,17 +2033,10 @@ int __init create_segment_manager_caches(void) sizeof(struct discard_entry)); if (!discard_entry_slab) return -ENOMEM; - flush_cmd_slab = f2fs_kmem_cache_create("flush_command", - sizeof(struct flush_cmd)); - if (!flush_cmd_slab) { - kmem_cache_destroy(discard_entry_slab); - return -ENOMEM; - } return 0; } void destroy_segment_manager_caches(void) { kmem_cache_destroy(discard_entry_slab); - kmem_cache_destroy(flush_cmd_slab); } -- cgit v1.2.3 From 70ff5dfeb6913cc434d42c7e0cbff16b99d9ae15 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 8 May 2014 17:09:30 +0800 Subject: f2fs: use inode_init_owner() to simplify codes This patch uses exported inode_init_owner() to simplify codes in f2fs_new_inode(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a9409d19dfd4..9138c32aa698 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -41,18 +41,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) } f2fs_unlock_op(sbi); - inode->i_uid = current_fsuid(); - - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else { - inode->i_gid = current_fsgid(); - } + inode_init_owner(inode, dir, mode); inode->i_ino = ino; - inode->i_mode = mode; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_generation = sbi->s_next_generation++; -- cgit v1.2.3 From 9b29d481b15a5ec7c4c6777d79be1b884c27bb06 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 May 2014 08:20:08 +0900 Subject: MAINTAINERS: change the email address for f2fs This patch changes the valid email address to maintain the f2fs file system. Signed-off-by: Jaegeuk Kim --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 51ebb779c5f3..38ffb57da0a6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3774,7 +3774,7 @@ F: fs/fscache/ F: include/linux/fscache*.h F2FS FILE SYSTEM -M: Jaegeuk Kim +M: Jaegeuk Kim L: linux-f2fs-devel@lists.sourceforge.net W: http://en.wikipedia.org/wiki/F2FS T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git -- cgit v1.2.3 From f6238a72092c7be20565509643b70276853e8f2b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 31 May 2014 01:00:49 +0900 Subject: MAINTAINERS: add a co-maintainer from samsung for F2FS This patch adds a samsung guy for an F2FS maintainer. Signed-off-by: Jaegeuk Kim --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 38ffb57da0a6..e15414124db6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3775,6 +3775,7 @@ F: include/linux/fscache*.h F2FS FILE SYSTEM M: Jaegeuk Kim +M: Changman Lee L: linux-f2fs-devel@lists.sourceforge.net W: http://en.wikipedia.org/wiki/F2FS T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git -- cgit v1.2.3 From d631abdac9d541d282a1461b5aeae70aa3866e9f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 1 Jun 2014 23:24:30 +0900 Subject: f2fs: fix recursive lock by f2fs_setxattr This patch should resolve the following recursive lock. [] call_rwsem_down_write_failed+0x13/0x20 [] f2fs_setxattr+0x5c/0xa0 [f2fs] [] __f2fs_set_acl+0x1b9/0x340 [f2fs] [] f2fs_init_acl+0x4a/0xcb [f2fs] [] __f2fs_add_link+0x26e/0x780 [f2fs] [] f2fs_mkdir+0xb8/0x150 [f2fs] [] vfs_mkdir+0xb7/0x160 [] SyS_mkdir+0xab/0xe0 [] tracesys+0xe1/0xe6 [] 0xffffffffffffffff The call path indicates: - f2fs_add_link : down_write(&fi->i_sem); - init_inode_metadata - f2fs_init_acl - __f2fs_set_acl - f2fs_setxattr : down_write(&fi->i_sem); Here we should not call f2fs_setxattr, but __f2fs_setxattr. But __f2fs_setxattr is a static function in xattr.c, so that I found the other generic approach to use f2fs_setxattr. In f2fs_setxattr, the page pointer is only given from init_inode_metadata. So, this patch adds this condition to avoid this in f2fs_setxattr. Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 1f546b4b6b61..8bea941ee309 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -156,10 +156,6 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, } #ifdef CONFIG_F2FS_FS_SECURITY -static int __f2fs_setxattr(struct inode *inode, int index, - const char *name, const void *value, size_t size, - struct page *ipage, int); - static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *page) { @@ -167,7 +163,7 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, int err = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, xattr->value_len, (struct page *)page, 0); if (err < 0) @@ -603,6 +599,10 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int err; + /* this case is only from init_inode_metadata */ + if (ipage) + return __f2fs_setxattr(inode, index, name, value, + size, ipage, flags); f2fs_balance_fs(sbi); f2fs_lock_op(sbi); -- cgit v1.2.3 From bfec07d0f8ed78b10df3ca3bc23e27de1166ea45 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 28 May 2014 08:56:09 +0800 Subject: f2fs: avoid overflow when large directory feathure is enabled When large directory feathure is enable, We have one case which could cause overflow in dir_buckets() as following: special case: level + dir_level >= 32 and level < MAX_DIR_HASH_DEPTH / 2. Here we define MAX_DIR_BUCKETS to limit the return value when the condition could trigger potential overflow. Changes from V1 o modify description of calculation in f2fs.txt suggested by Changman Lee. Suggested-by: Changman Lee Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 8 ++++---- fs/f2fs/dir.c | 4 ++-- include/linux/f2fs_fs.h | 3 +++ 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 25311e113e75..51afba17bbae 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -461,11 +461,11 @@ The number of blocks and buckets are determined by, # of blocks in level #n = | `- 4, Otherwise - ,- 2^ (n + dir_level), - | if n < MAX_DIR_HASH_DEPTH / 2, + ,- 2^(n + dir_level), + | if n + dir_level < MAX_DIR_HASH_DEPTH / 2, # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1), - Otherwise + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), + Otherwise When F2FS finds a file name in a directory, at first a hash value of the file name is calculated. Then, F2FS scans the hash table in level #0 to find the diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c3f148555c37..966acb039e3b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -23,10 +23,10 @@ static unsigned long dir_blocks(struct inode *inode) static unsigned int dir_buckets(unsigned int level, int dir_level) { - if (level < MAX_DIR_HASH_DEPTH / 2) + if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) return 1 << (level + dir_level); else - return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1); + return MAX_DIR_BUCKETS; } static unsigned int bucket_blocks(unsigned int level) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 8c03f71307c6..ba6f3127738f 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -394,6 +394,9 @@ typedef __le32 f2fs_hash_t; /* MAX level for dir lookup */ #define MAX_DIR_HASH_DEPTH 63 +/* MAX buckets in one level of dir */ +#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) + #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) -- cgit v1.2.3 From bac4eef6537a663585f3fb3d633a629c72e3b73d Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 27 May 2014 08:41:07 +0800 Subject: f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages Previously we allocate pages with no mapping in ra_sum_pages(), so we may encounter a crash in event trace of f2fs_submit_page_mbio where we access mapping data of the page. We'd better allocate pages in bd_inode mapping and invalidate these pages after we restore data from pages. It could avoid crash in above scenario. Changes from V1 o remove redundant code in ra_sum_pages() suggested by Jaegeuk Kim. Call Trace: [] ? ftrace_raw_event_f2fs_write_checkpoint+0x80/0x80 [f2fs] [] f2fs_submit_page_mbio+0x1cb/0x200 [f2fs] [] restore_node_summary+0x13a/0x280 [f2fs] [] build_curseg+0x2bd/0x620 [f2fs] [] build_segment_manager+0x1cb/0x920 [f2fs] [] f2fs_fill_super+0x535/0x8e0 [f2fs] [] mount_bdev+0x16a/0x1a0 [] f2fs_mount+0x1f/0x30 [f2fs] [] mount_fs+0x36/0x170 [] vfs_kern_mount+0x55/0xe0 [] do_mount+0x1e8/0x900 [] SyS_mount+0x82/0xc0 [] sysenter_do_call+0x12/0x22 Suggested-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 52 ++++++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3d60d3d34ed2..02a59e9027b1 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1658,35 +1658,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { - struct page *page; - int page_idx = start; + struct inode *inode = sbi->sb->s_bdev->bd_inode; + struct address_space *mapping = inode->i_mapping; + int i, page_idx = start; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; - for (; page_idx < start + nrpages; page_idx++) { - /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) + for (i = 0; page_idx < start + nrpages; page_idx++, i++) { + /* alloc page in bd_inode for reading node summary info */ + pages[i] = grab_cache_page(mapping, page_idx); + if (!pages[i]) break; - - lock_page(page); - page->index = page_idx; - list_add_tail(&page->lru, pages); + f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page->index, &fio); - f2fs_submit_merged_bio(sbi, META, READ); - - return page_idx - start; + return i; } int restore_node_summary(struct f2fs_sb_info *sbi, @@ -1694,11 +1688,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi, { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page, *tmp; + struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - int i, last_offset, nrpages, err = 0; - LIST_HEAD(page_list); + struct page *pages[bio_blocks]; + int i, idx, last_offset, nrpages, err = 0; /* scan the node segment */ last_offset = sbi->blocks_per_seg; @@ -1709,29 +1703,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, bio_blocks); /* read ahead node pages */ - nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages); + nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; - list_for_each_entry_safe(page, tmp, &page_list, lru) { + for (idx = 0; idx < nrpages; idx++) { if (err) goto skip; - lock_page(page); - if (unlikely(!PageUptodate(page))) { + lock_page(pages[idx]); + if (unlikely(!PageUptodate(pages[idx]))) { err = -EIO; } else { - rn = F2FS_NODE(page); + rn = F2FS_NODE(pages[idx]); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; sum_entry++; } - unlock_page(page); + unlock_page(pages[idx]); skip: - list_del(&page->lru); - __free_pages(page, 0); + page_cache_release(pages[idx]); } + + invalidate_mapping_pages(inode->i_mapping, addr, + addr + nrpages); } return err; } -- cgit v1.2.3 From 1dbe4152168d44fa164edbdc9f1243de70b98f7a Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 12 May 2014 12:27:43 +0900 Subject: f2fs: large volume support f2fs's cp has one page which consists of struct f2fs_checkpoint and version bitmap of sit and nat. To support lots of segments, we need more blocks for sit bitmap. So let's arrange sit bitmap as following: +-----------------+------------+ | f2fs_checkpoint | sit bitmap | | + nat bitmap | | +-----------------+------------+ 0 4k N blocks Signed-off-by: Changman Lee [Jaegeuk Kim: simple code change for readability] Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 45 ++++++++++++++++++++++++++++++++++++++++----- fs/f2fs/f2fs.h | 13 +++++++++++-- include/linux/f2fs_fs.h | 2 ++ 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index fe968c7bfc90..ecba8da3308b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -371,7 +371,9 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) return; sbi->por_doing = true; - start_blk = __start_cp_addr(sbi) + 1; + + start_blk = __start_cp_addr(sbi) + 1 + + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); orphan_blkaddr = __start_sum_addr(sbi) - 1; ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); @@ -512,8 +514,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; + unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + block_t cp_blk_no; + int i; - sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -544,6 +549,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); + if (cp_blks <= 1) + goto done; + + cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); + if (cur_page == cp2) + cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + + for (i = 1; i < cp_blks; i++) { + void *sit_bitmap_ptr; + unsigned char *ckpt = (unsigned char *)sbi->ckpt; + + cur_page = get_meta_page(sbi, cp_blk_no + i); + sit_bitmap_ptr = page_address(cur_page); + memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); + f2fs_put_page(cur_page, 1); + } +done: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); return 0; @@ -736,6 +758,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) __u32 crc32 = 0; void *kaddr; int i; + int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); /* * This avoids to conduct wrong roll-forward operations and uses @@ -786,16 +809,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) / F2FS_ORPHANS_PER_BLOCK; - ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + cp_payload_blks + data_sum_blocks + + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks); + cp_payload_blks + data_sum_blocks + + orphan_blocks); } if (sbi->n_orphans) @@ -821,6 +847,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); + for (i = 1; i < 1 + cp_payload_blks; i++) { + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, + (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + } + if (sbi->n_orphans) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 676a2c6ccec7..9684b1f77a7d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -764,9 +764,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - int offset = (flag == NAT_BITMAP) ? + int offset; + + if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { + if (flag == NAT_BITMAP) + return &ckpt->sit_nat_version_bitmap; + else + return ((unsigned char *)ckpt + F2FS_BLKSIZE); + } else { + offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + return &ckpt->sit_nat_version_bitmap + offset; + } } static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ba6f3127738f..6ff0b0b42d47 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,6 +19,7 @@ #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ @@ -75,6 +76,7 @@ struct f2fs_super_block { __le16 volume_name[512]; /* volume name */ __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __le32 cp_payload; } __packed; /* -- cgit v1.2.3 From b6fe5873cb422417ae3fc914954bc5a10fd4e003 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 4 Jun 2014 00:39:42 +0900 Subject: f2fs: fix to recover data written by dio If data are overwritten through dio, previous f2fs doesn't remain the fsync mark due to no additional node writes. Note that this patch should resolve the xfstests:311. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 12 ++++++++++++ 3 files changed, 16 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8c250a5d6f26..39fe7d70791a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1041,6 +1041,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; + /* clear fsync mark to recover these blocks */ + fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, get_data_block); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9684b1f77a7d..f628c3c5e63f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1168,6 +1168,7 @@ struct node_info; bool available_free_memory(struct f2fs_sb_info *, int); int is_checkpointed_node(struct f2fs_sb_info *, nid_t); bool fsync_mark_done(struct f2fs_sb_info *, nid_t); +void fsync_mark_clear(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 02a59e9027b1..a0a1f25ed20f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -153,6 +153,18 @@ bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) return fsync_done; } +void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) + e->fsync_done = false; + write_unlock(&nm_i->nat_tree_lock); +} + static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; -- cgit v1.2.3 From 6fa1df533a93161c54c987bbffaadf03563aa78d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Jun 2014 02:12:59 +0900 Subject: f2fs: recover fallocated space If a fallocated file is fsynced, we should recover the i_size after sudden power cut. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d97e5c458f36..78110dab5681 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -682,6 +682,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, i_size_read(inode) < new_size) { i_size_write(inode, new_size); mark_inode_dirty(inode); + f2fs_write_inode(inode, NULL); } return ret; -- cgit v1.2.3 From 86928f984e8b166fcd0c7c241501bc00f53eb623 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 7 Jun 2014 03:05:03 +0900 Subject: f2fs: avoid not to call remove_dirty_inode There is an errorneous case during the recovery like below. In recovery_dentry, 1) dir = f2fs_iget(); 2) mark the dir with FI_DELAY_IPUT 3) goto unmap_out After the end of recovery routine, there is no dirty dentries so the dir cannot be released by iput in remove_dirty_dir_inode. This patch fixes such the bug case by handling the iget and iput in the recovery_dentry procedure. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index e950a2f50ac1..a112368a4a86 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -52,20 +52,13 @@ static int recover_dentry(struct page *ipage, struct inode *inode) goto out; } - if (is_inode_flag_set(F2FS_I(dir), FI_DIRTY_DIR)) { - iput(dir); - } else { - add_dirty_dir_inode(dir); - set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); - } - name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; if (unlikely(name.len > F2FS_NAME_LEN)) { WARN_ON(1); err = -ENAMETOOLONG; - goto out; + goto out_err; } retry: de = f2fs_find_entry(dir, &name, &page); @@ -90,11 +83,23 @@ retry: goto retry; } err = __f2fs_add_link(dir, &name, inode); + if (err) + goto out_err; + + if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) { + iput(dir); + } else { + add_dirty_dir_inode(dir); + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + } + goto out; out_unmap_put: kunmap(page); f2fs_put_page(page, 0); +out_err: + iput(dir); out: f2fs_msg(inode->i_sb, KERN_NOTICE, "%s: ino = %x, name = %s, dir = %lx, err = %d", -- cgit v1.2.3 From 9ab701349247368f9d57a993b95a5bb05bb37e10 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 8 Jun 2014 04:30:14 +0900 Subject: f2fs: support f2fs_fiemap This patch links f2fs_fiemap with generic function with get_block. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 ++++++ fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 1 + 3 files changed, 8 insertions(+) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 39fe7d70791a..c1fb6dd10911 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -708,6 +708,12 @@ out: return err; } +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, get_data_block); +} + static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f628c3c5e63f..e51c732b0dd9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1271,6 +1271,7 @@ struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct page *, struct f2fs_io_info *); +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); /* * gc.c diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 78110dab5681..9c49c593d8eb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -540,6 +540,7 @@ const struct inode_operations f2fs_file_inode_operations = { .listxattr = f2fs_listxattr, .removexattr = generic_removexattr, #endif + .fiemap = f2fs_fiemap, }; static void fill_zero(struct inode *inode, pgoff_t index, -- cgit v1.2.3