From 65e5cd0a151d53d3d79ef4d81783d1dbc01d4b61 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 14 May 2013 15:47:43 +0900 Subject: f2fs: fix inconsistency of block count during recovery Currently f2fs recovers the dentry of fsynced files. When power-off-recovery is conducted, this newly recovered inode should increase node block count as well as inode block count. This patch resolves this inconsistency that results in: 1. create a file 2. write data 3. fsync 4. reboot without sync 5. mount and recover the file 6. node block count is 1 and inode block count is 2 : fall into the inconsistent state 7. unlink the file : trigger the following BUG_ON ------------[ cut here ]------------ kernel BUG at /home/zeus/f2fs_test/src/fs/f2fs/f2fs.h:716! Call Trace: [] ? get_node_page+0x50/0x1a0 [f2fs] [] remove_inode_page+0x8c/0x100 [f2fs] [] ? f2fs_evict_inode+0x180/0x2d0 [f2fs] [] f2fs_evict_inode+0x1be/0x2d0 [f2fs] [] evict+0xa7/0x1a0 [] iput+0x105/0x190 [] d_kill+0xe0/0x120 [] dput+0xe7/0x1e0 [] __fput+0x19d/0x2d0 [] ____fput+0xe/0x10 [] task_work_run+0xb5/0xe0 [] do_notify_resume+0x71/0xb0 [] int_signal+0x12/0x17 Reported-and-Tested-by: Chris Fries Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3df43b4efd89..96415345a428 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1492,6 +1492,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) new_ni = old_ni; new_ni.ino = ino; + if (!inc_valid_node_count(sbi, NULL, 1)) + WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR); inc_valid_inode_count(sbi); -- cgit v1.2.3 From 650495dedc34daf8590c708a5b48f82ed2787b75 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 May 2013 08:38:35 +0900 Subject: f2fs: fix the inconsistent state of data pages In get_lock_data_page, if there is a data race between get_dnode_of_data for node and grab_cache_page for data, f2fs is able to face with the following BUG_ON(dn.data_blkaddr == NEW_ADDR). kernel BUG at /home/zeus/f2fs_test/src/fs/f2fs/data.c:251! [] get_lock_data_page+0x1ec/0x210 [f2fs] Call Trace: [] f2fs_readdir+0x89/0x210 [f2fs] [] ? fillonedir+0x100/0x100 [] ? fillonedir+0x100/0x100 [] vfs_readdir+0xb8/0xe0 [] sys_getdents+0x8f/0x110 [] system_call_fastpath+0x16/0x1b This bug is able to be occurred when the block address of the data block is changed after f2fs_put_dnode(). In order to avoid that, this patch fixes the lock order of node and data blocks in which the node block lock is covered by the data block lock. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 91ff93b0b0f4..05fb5c6077b8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -233,18 +233,23 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct page *page; int err; +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) + if (err) { + f2fs_put_page(page, 1); return ERR_PTR(err); + } f2fs_put_dnode(&dn); - if (dn.data_blkaddr == NULL_ADDR) + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); -repeat: - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); + } if (PageUptodate(page)) return page; -- cgit v1.2.3 From addbe45b005d73f876d55bcfc16f4a6ce52a55e3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 May 2013 10:49:13 +0900 Subject: f2fs: remove redundant assignment We don't need to assign a value redundantly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 60c8a5097058..294198775f8b 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -126,7 +126,6 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) entry = get_fsync_inode(head, ino_of_node(page)); if (entry) { - entry->blkaddr = blkaddr; if (IS_INODE(page) && is_dent_dnode(page)) set_inode_flag(F2FS_I(entry->inode), FI_INC_LINK); @@ -150,10 +149,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) kmem_cache_free(fsync_entry_slab, entry); goto unlock_out; } - list_add_tail(&entry->list, head); - entry->blkaddr = blkaddr; } + entry->blkaddr = blkaddr; + if (IS_INODE(page)) { err = recover_inode(entry->inode, page); if (err == -ENOENT) { -- cgit v1.2.3 From 8c26d7d5717adf7f06d98c4416852d09566edd7c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 May 2013 16:12:18 +0900 Subject: f2fs: fix por_doing variable coverage The reason of using sbi->por_doing is to alleviate data writes during the recovery. The find_fsync_dnodes() produces some dirty dentry pages, so we should cover it too with sbi->por_doing. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 294198775f8b..4d895149a6f0 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -381,6 +381,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ + sbi->por_doing = 1; err = find_fsync_dnodes(sbi, &inode_list); if (err) goto out; @@ -389,13 +390,12 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) goto out; /* step #2: recover data */ - sbi->por_doing = 1; err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - sbi->por_doing = 0; BUG_ON(!list_empty(&inode_list)); out: destroy_fsync_dnodes(sbi, &inode_list); kmem_cache_destroy(fsync_entry_slab); + sbi->por_doing = 0; write_checkpoint(sbi, false); return err; } -- cgit v1.2.3 From 74d0b917ef7789097e12d60fc054efa427ce9171 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 May 2013 16:40:02 +0900 Subject: f2fs: fix BUG_ON during f2fs_evict_inode(dir) During the dentry recovery routine, recover_inode() triggers __f2fs_add_link with its directory inode. In the following scenario, a bug is captured. 1. dir = f2fs_iget(pino) 2. __f2fs_add_link(dir, name) 3. iput(dir) -> f2fs_evict_inode() faces with BUG_ON(atomic_read(fi->dirty_dents)) Kernel BUG at ffffffffa01c0676 [verbose debug info unavailable] [] f2fs_evict_inode+0x276/0x300 [f2fs] Call Trace: [] evict+0xb0/0x1b0 [] iput+0x105/0x190 [] recover_fsync_data+0x3bc/0x1070 [f2fs] [] ? io_schedule+0xaa/0xd0 [] ? __wait_on_bit_lock+0x7b/0xc0 [] ? __lock_page+0x67/0x70 [] ? kmem_cache_alloc+0x31/0x140 [] ? __d_instantiate+0x92/0xf0 [] ? security_d_instantiate+0x1b/0x30 [] ? d_instantiate+0x54/0x70 This means that we should flush all the dentry pages between iget and iput(). But, during the recovery routine, it is unallowed due to consistency, so we have to wait the whole recovery process. And then, write_checkpoint flushes all the dirty dentry blocks, and nicely we can put the stale dir inodes from the dirty_dir_inode_list. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 23 +++++++++++++++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/recovery.c | 14 +++++++++----- 3 files changed, 34 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b1de01da1a40..3d1144908ac6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -514,6 +514,29 @@ void remove_dirty_dir_inode(struct inode *inode) } out: spin_unlock(&sbi->dir_inode_lock); + + /* Only from the recovery routine */ + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) + iput(inode); +} + +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + struct inode *inode = NULL; + + spin_lock(&sbi->dir_inode_lock); + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode->i_ino == ino) { + inode = entry->inode; + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + return inode; } void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20aab02f2a42..ef6cac8c16a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -846,6 +846,7 @@ enum { FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ + FI_DELAY_IPUT, /* used for the recovery */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1012,6 +1013,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); void remove_dirty_dir_inode(struct inode *); +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool); void init_orphan_info(struct f2fs_sb_info *); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4d895149a6f0..23f580397e6c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -42,6 +42,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) { struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); struct f2fs_inode *raw_inode = &(raw_node->i); + nid_t pino = le32_to_cpu(raw_inode->i_pino); struct qstr name; struct f2fs_dir_entry *de; struct page *page; @@ -51,10 +52,14 @@ static int recover_dentry(struct page *ipage, struct inode *inode) if (!is_dent_dnode(ipage)) goto out; - dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; + dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); + if (!dir) { + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; + } + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); } name.len = le32_to_cpu(raw_inode->i_namelen); @@ -67,7 +72,6 @@ static int recover_dentry(struct page *ipage, struct inode *inode) } else { err = __f2fs_add_link(dir, &name, inode); } - iput(dir); out: kunmap(ipage); return err; -- cgit v1.2.3 From 0a364af18f27b86869149c4d128262ec1e0ccb25 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 May 2013 08:57:43 +0900 Subject: f2fs: remove unnecessary por_doing check This por_doing check is totally not related to the recovery process. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 47abc9722b17..729b28548546 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -149,8 +149,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - if (!sbi->por_doing) - d_instantiate(dentry, inode); + d_instantiate(dentry, inode); unlock_new_inode(inode); return 0; out: -- cgit v1.2.3 From 1646cfac952ff87fcbc18a77164472aa61d08094 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 09:42:28 +0900 Subject: f2fs: skip get_node_page if locked node page is passed If get_dnode_of_data gets a locked node page, let's skip redundant get_node_page calls. This is for the futher enhancement. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 96415345a428..f63f0a4046c6 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -408,10 +408,13 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) level = get_node_path(index, offset, noffset); nids[0] = dn->inode->i_ino; - npage[0] = get_node_page(sbi, nids[0]); - if (IS_ERR(npage[0])) - return PTR_ERR(npage[0]); + npage[0] = dn->inode_page; + if (!npage[0]) { + npage[0] = get_node_page(sbi, nids[0]); + if (IS_ERR(npage[0])) + return PTR_ERR(npage[0]); + } parent = npage[0]; if (level != 0) nids[1] = get_nid(parent, offset[0], true); -- cgit v1.2.3 From 64aa7ed98db489d1c41ef140876ada38498678ab Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 09:55:50 +0900 Subject: f2fs: change get_new_data_page to pass a locked node page This patch is for passing a locked node page to get_dnode_of_data. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 12 +++++++----- fs/f2fs/dir.c | 4 ++-- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 05fb5c6077b8..af7454939362 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -280,8 +280,8 @@ repeat: * Also, caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). */ -struct page *get_new_data_page(struct inode *inode, pgoff_t index, - bool new_i_size) +struct page *get_new_data_page(struct inode *inode, + struct page *npage, pgoff_t index, bool new_i_size) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; @@ -289,18 +289,20 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, struct dnode_of_data dn; int err; - set_new_dnode(&dn, inode, NULL, NULL, 0); + set_new_dnode(&dn, inode, npage, npage, 0); err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) return ERR_PTR(err); if (dn.data_blkaddr == NULL_ADDR) { if (reserve_new_block(&dn)) { - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); return ERR_PTR(-ENOSPC); } } - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); repeat: page = grab_cache_page(mapping, index); if (!page) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1ac6b93036b7..7db6e58622d9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -287,7 +287,7 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) struct f2fs_dir_entry *de; void *kaddr; - dentry_page = get_new_data_page(inode, 0, true); + dentry_page = get_new_data_page(inode, NULL, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -448,7 +448,7 @@ start: bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = get_new_data_page(dir, block, true); + dentry_page = get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ef6cac8c16a5..cbae2b663eba 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1027,7 +1027,7 @@ int reserve_new_block(struct dnode_of_data *); void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); -struct page *get_new_data_page(struct inode *, pgoff_t, bool); +struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); int do_write_data_page(struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cae864f8dfc..b8e34db37ae8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -387,7 +387,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi); ilock = mutex_lock_op(sbi); - page = get_new_data_page(inode, index, false); + page = get_new_data_page(inode, NULL, index, false); mutex_unlock_op(sbi, ilock); if (!IS_ERR(page)) { -- cgit v1.2.3 From 44a83ff6a81d84ab83bcb43a49ff1ba6c7e17cd1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 10:10:29 +0900 Subject: f2fs: update inode page after creation I found a bug when testing power-off-recovery as follows. [Bug Scenario] 1. create a file 2. fsync the file 3. reboot w/o any sync 4. try to recover the file - found its fsync mark - found its dentry mark : try to recover its dentry - get its file name - get its parent inode number : here we got zero value The reason why we get the wrong parent inode number is that we didn't synchronize the inode page with its newly created inode information perfectly. Especially, previous f2fs stores fi->i_pino and writes it to the cached node page in a wrong order, which incurs the zero-valued i_pino during the recovery. So, this patch modifies the creation flow to fix the synchronization order of inode page with its inode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 1 + fs/f2fs/dir.c | 85 +++++++++++++++++++++++++++++++--------------------------- fs/f2fs/f2fs.h | 3 +-- fs/f2fs/node.c | 12 +++------ 4 files changed, 51 insertions(+), 50 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index af7454939362..c320f7f31327 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -279,6 +279,7 @@ repeat: * * Also, caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). + * Note that, npage is set only by make_empty_dir. */ struct page *get_new_data_page(struct inode *inode, struct page *npage, pgoff_t index, bool new_i_size) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7db6e58622d9..fc1dacf55b3a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -264,15 +264,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_put_page(page, 1); } -void init_dent_inode(const struct qstr *name, struct page *ipage) +static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_node *rn; - if (IS_ERR(ipage)) - return; - - wait_on_page_writeback(ipage); - /* copy name info. to this inode page */ rn = (struct f2fs_node *)page_address(ipage); rn->i.i_namelen = cpu_to_le32(name->len); @@ -280,14 +275,15 @@ void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -static int make_empty_dir(struct inode *inode, struct inode *parent) +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; void *kaddr; - dentry_page = get_new_data_page(inode, NULL, 0, true); + dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -317,42 +313,47 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) return 0; } -static int init_inode_metadata(struct inode *inode, +static struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { + struct page *page; + int err; + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - int err; - err = new_inode_page(inode, name); - if (err) - return err; + page = new_inode_page(inode, name); + if (IS_ERR(page)) + return page; if (S_ISDIR(inode->i_mode)) { - err = make_empty_dir(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + err = make_empty_dir(inode, dir, page); + if (err) + goto error; } err = f2fs_init_acl(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + if (err) + goto error; + + wait_on_page_writeback(page); } else { - struct page *ipage; - ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - set_cold_node(inode, ipage); - init_dent_inode(name, ipage); - f2fs_put_page(ipage, 1); + page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(page)) + return page; + + wait_on_page_writeback(page); + set_cold_node(inode, page); } - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + + init_dent_inode(name, page); + + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) inc_nlink(inode); - update_inode_page(inode); - } - return 0; + return page; + +error: + f2fs_put_page(page, 1); + remove_inode_page(inode); + return ERR_PTR(err); } static void update_parent_metadata(struct inode *dir, struct inode *inode, @@ -423,6 +424,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; int err = 0; int i; @@ -465,12 +467,13 @@ start: ++level; goto start; add_dentry: - err = init_inode_metadata(inode, dir, name); - if (err) - goto fail; - wait_on_page_writeback(dentry_page); + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } de = &dentry_blk->dentry[bit_pos]; de->hash_code = dentry_hash; de->name_len = cpu_to_le16(namelen); @@ -481,10 +484,12 @@ add_dentry: test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); set_page_dirty(dentry_page); - update_parent_metadata(dir, inode, current_depth); - - /* update parent inode number before releasing dentry page */ + /* we don't need to mark_inode_dirty now */ F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, current_depth); fail: kunmap(dentry_page); f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cbae2b663eba..9360a03fcc96 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -914,7 +914,6 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); -void init_dent_inode(const struct qstr *, struct page *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); @@ -949,7 +948,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int remove_inode_page(struct inode *); -int new_inode_page(struct inode *, const struct qstr *); +struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f63f0a4046c6..b41482de492f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -806,19 +806,15 @@ int remove_inode_page(struct inode *inode) return 0; } -int new_inode_page(struct inode *inode, const struct qstr *name) +struct page *new_inode_page(struct inode *inode, const struct qstr *name) { - struct page *page; struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - page = new_node_page(&dn, 0); - init_dent_inode(name, page); - if (IS_ERR(page)) - return PTR_ERR(page); - f2fs_put_page(page, 1); - return 0; + + /* caller should f2fs_put_page(page, 1); */ + return new_node_page(&dn, 0); } struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) -- cgit v1.2.3 From f356fe0cba0e3523e538987916bd2acedd4e6f41 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 May 2013 15:04:49 +0900 Subject: f2fs: add debug msgs in the recovery routine This patch adds some trivial debugging messages in the recovery process. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 1 - fs/f2fs/recovery.c | 44 +++++++++++++++++++++++++------------------- 2 files changed, 25 insertions(+), 20 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b41482de492f..5a59780b22d0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1495,7 +1495,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR); inc_valid_inode_count(sbi); - f2fs_put_page(ipage, 1); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 23f580397e6c..6ad4e539c60a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -49,9 +49,6 @@ static int recover_dentry(struct page *ipage, struct inode *inode) struct inode *dir; int err = 0; - if (!is_dent_dnode(ipage)) - goto out; - dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); if (!dir) { dir = f2fs_iget(inode->i_sb, pino); @@ -73,6 +70,9 @@ static int recover_dentry(struct page *ipage, struct inode *inode) err = __f2fs_add_link(dir, &name, inode); } out: + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " + "ino = %x, name = %s, dir = %lx, err = %d", + ino_of_node(ipage), raw_inode->i_name, dir->i_ino, err); kunmap(ipage); return err; } @@ -83,6 +83,9 @@ static int recover_inode(struct inode *inode, struct page *node_page) struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); + if (!IS_INODE(node_page)) + return 0; + inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_size_write(inode, le64_to_cpu(raw_inode->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); @@ -92,7 +95,12 @@ static int recover_inode(struct inode *inode, struct page *node_page) inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - return recover_dentry(node_page, inode); + if (is_dent_dnode(node_page)) + return recover_dentry(node_page, inode); + + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", + ino_of_node(node_page), raw_inode->i_name); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) @@ -123,7 +131,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) lock_page(page); if (cp_ver != cpver_of_node(page)) - goto unlock_out; + break; if (!is_fsync_dnode(page)) goto next; @@ -137,40 +145,33 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) - goto unlock_out; + break; } /* add this fsync inode to the list */ entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); if (!entry) { err = -ENOMEM; - goto unlock_out; + break; } entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - goto unlock_out; + break; } list_add_tail(&entry->list, head); } entry->blkaddr = blkaddr; - if (IS_INODE(page)) { - err = recover_inode(entry->inode, page); - if (err == -ENOENT) { - goto next; - } else if (err) { - err = -EINVAL; - goto unlock_out; - } - } + err = recover_inode(entry->inode, page); + if (err && err != -ENOENT) + break; next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } -unlock_out: unlock_page(page); out: __free_pages(page, 0); @@ -248,7 +249,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - int err = 0; + int err = 0, recovered = 0; int ilock; start = start_bidx_of_node(ofs_of_node(page)); @@ -293,6 +294,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); update_extent_cache(dest, &dn); + recovered++; } dn.ofs_in_node++; } @@ -310,6 +312,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); + + f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " + "recovered_data = %d blocks", + inode->i_ino, recovered); return 0; } -- cgit v1.2.3 From bfe35965ecdc6038314d03456b94d9ba451c289d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 May 2013 20:03:12 +0200 Subject: f2fs, lockdep: annotate mutex_lock_all() Majianpeng reported a lockdep splat for f2fs. It turns out mutex_lock_all() acquires an array of locks (in global/local lock style). Any such operation is always serialized using cp_mutex, therefore there is no fs_lock[] lock-order issue; tell lockdep about this using the mutex_lock_nest_lock() primitive. Reported-by: majianpeng Signed-off-by: Peter Zijlstra Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9360a03fcc96..9182b2710db7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -495,9 +495,17 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void mutex_lock_all(struct f2fs_sb_info *sbi) { - int i = 0; - for (; i < NR_GLOBAL_LOCKS; i++) - mutex_lock(&sbi->fs_lock[i]); + int i; + + for (i = 0; i < NR_GLOBAL_LOCKS; i++) { + /* + * This is the only time we take multiple fs_lock[] + * instances; the order is immaterial since we + * always hold cp_mutex, which serializes multiple + * such operations. + */ + mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); + } } static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 81fb5e874675517c57e9edd913065f1e17ebd362 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Tue, 14 May 2013 18:20:28 +0800 Subject: f2fs: remove unecessary variable and code Code cleanup without behavior changed. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d8e84e49a5c3..3a0d027aad7e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -126,17 +126,16 @@ void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; + unsigned int segno = -1; unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); + segno + 1); if (segno >= total_segs) break; __set_test_and_free(sbi, segno); - offset = segno + 1; } mutex_unlock(&dirty_i->seglist_lock); } @@ -144,17 +143,16 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; + unsigned int segno = -1; unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); + segno + 1); if (segno >= total_segs) break; - offset = segno + 1; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) dirty_i->nr_dirty[PRE]--; @@ -364,11 +362,11 @@ next: static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int segno = curseg->segno; + unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) - return !test_bit(segno + 1, free_i->free_segmap); + if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); return 0; } @@ -495,7 +493,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) int dir = ALLOC_LEFT; write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + GET_SUM_BLOCK(sbi, segno)); if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) dir = ALLOC_RIGHT; -- cgit v1.2.3 From 145b04e5ed66033b5a3d315394dd1384e3f5f70a Mon Sep 17 00:00:00 2001 From: majianpeng Date: Tue, 14 May 2013 20:06:46 +0800 Subject: f2fs: use list_for_each_entry rather than list_for_each_entry_safe We can do this, since now we use a global mutex, f2fs_stat_mutex to protect its list operations. Signed-off-by: Jianpeng Ma [Jaegeuk Kim: add description] Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8d9943786c31..0d6c6aafb235 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -175,12 +175,12 @@ get_cache: static int stat_show(struct seq_file *s, void *v) { - struct f2fs_stat_info *si, *next; + struct f2fs_stat_info *si; int i = 0; int j; mutex_lock(&f2fs_stat_mutex); - list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { + list_for_each_entry(si, &f2fs_stat_list, stat_list) { char devname[BDEVNAME_SIZE]; update_general_status(si->sbi); -- cgit v1.2.3 From 9851e6e18943f2537acb44a4eb51c6958e8dbc3e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 28 Apr 2013 09:04:18 +0900 Subject: f2fs: reorganize f2fs_vm_page_mkwrite Few things can be changed in the default mkwrite function 1) Make file_update_time at the start before acquiring any lock 2) the condition page_offset(page) >= i_size_read(inode) should be changed to page_offset(page) > i_size_read 3) Move wait_on_page_writeback. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b8e34db37ae8..9937ba1a4d24 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -63,9 +63,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); + file_update_time(vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping || - page_offset(page) >= i_size_read(inode) || + page_offset(page) > i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); err = -EFAULT; @@ -76,10 +77,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto out; - - /* fill the page */ - wait_on_page_writeback(page); + goto mapped; /* page is wholly or partially inside EOF */ if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { @@ -90,7 +88,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, set_page_dirty(page); SetPageUptodate(page); - file_update_time(vma->vm_file); +mapped: + /* fill the page */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(err); -- cgit v1.2.3 From 9a55ed656c9afbe41316ab2373bc063359b7683f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 10:23:40 +0900 Subject: f2fs: remove unnecessary kmap/kunmap operations The allocated page used by the recovery is not on HIGHMEM, so that we don't need to use kmap/kunmap. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6ad4e539c60a..f91ff0f0044d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -40,11 +40,11 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, static int recover_dentry(struct page *ipage, struct inode *inode) { - struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); + void *kaddr = page_address(ipage); + struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct qstr name; - struct f2fs_dir_entry *de; struct page *page; struct inode *dir; int err = 0; @@ -62,8 +62,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; - de = f2fs_find_entry(dir, &name, &page); - if (de) { + if (f2fs_find_entry(dir, &name, &page)) { kunmap(page); f2fs_put_page(page, 0); } else { @@ -73,7 +72,6 @@ out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", ino_of_node(ipage), raw_inode->i_name, dir->i_ino, err); - kunmap(ipage); return err; } -- cgit v1.2.3 From 45856aff0d9091f4836e333951c66eca382a8573 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 10:26:09 +0900 Subject: f2fs: fix to unlock page before exit If we got an error after lock_page, we should unlock it before exit. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f91ff0f0044d..3a4b51c03321 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -347,7 +347,7 @@ static int recover_data(struct f2fs_sb_info *sbi, lock_page(page); if (cp_ver != cpver_of_node(page)) - goto unlock_out; + break; entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) @@ -355,7 +355,7 @@ static int recover_data(struct f2fs_sb_info *sbi, err = do_recover_data(sbi, entry->inode, page, blkaddr); if (err) - goto out; + break; if (entry->blkaddr == blkaddr) { iput(entry->inode); @@ -366,7 +366,6 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } -unlock_out: unlock_page(page); out: __free_pages(page, 0); -- cgit v1.2.3 From 2c2c149f7dabd5a4d41cae5d2c2ce1d130acf72c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 14:48:49 +0900 Subject: f2fs: don't do checkpoint if error is occurred If we met an error during the dentry recovery, we should not conduct checkpoint. Otherwise, some errorneous dentry blocks overwrites the existing blocks that contain the remaining recovery information. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 3a4b51c03321..5148d90049b0 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -403,6 +403,7 @@ out: destroy_fsync_dnodes(sbi, &inode_list); kmem_cache_destroy(fsync_entry_slab); sbi->por_doing = 0; - write_checkpoint(sbi, false); + if (!err) + write_checkpoint(sbi, false); return err; } -- cgit v1.2.3 From 6f85b3520325a67ee4ac33e75bbcdbc25c79ce69 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 16:15:22 +0900 Subject: f2fs: avoid RECLAIM_FS-ON-W: deadlock This patch tries to avoid the following deadlock condition of which the reclaim path can trigger f2fs_balance_fs again. ================================= [ INFO: inconsistent lock state ] --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd0/41 [HC0[0]:SC0[0]:HE1:SE1] takes: (&sbi->gc_mutex){+.+.?.}, at: f2fs_balance_fs+0xe6/0x100 [f2fs] {RECLAIM_FS-ON-W} state was registered at: [] mark_held_locks+0xb9/0x140 [] lockdep_trace_alloc+0x85/0xf0 [] __alloc_pages_nodemask+0x7c/0x9b0 [] alloc_pages_current+0xb8/0x180 [] __page_cache_alloc+0xaf/0xd0 [] find_or_create_page+0x4c/0xb0 [] find_data_page+0x14e/0x210 [f2fs] [] f2fs_gc+0x9eb/0xd90 [f2fs] [] f2fs_balance_fs+0xee/0x100 [f2fs] [] f2fs_setattr+0x6c/0x200 [f2fs] [] notify_change+0x1db/0x3a0 [] do_truncate+0x60/0xa0 [] vfs_truncate+0x185/0x1b0 [] do_sys_truncate+0x5c/0xa0 [] SyS_truncate+0xe/0x10 [] system_call_fastpath+0x16/0x1b Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- fs/f2fs/inode.c | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c320f7f31327..1644fffea251 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -199,7 +199,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (dn.data_blkaddr == NEW_ADDR) return ERR_PTR(-EINVAL); - page = grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); if (!page) return ERR_PTR(-ENOMEM); @@ -234,7 +234,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; repeat: - page = grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); if (!page) return ERR_PTR(-ENOMEM); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 91ac7f9d88ee..a18946e2a8b4 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -130,8 +130,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE | - __GFP_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; -- cgit v1.2.3 From 77888c1e42e8c76e16204cd99c19a01829421402 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 20:28:47 +0900 Subject: f2fs: add f2fs_readonly() Introduce a simple macro function for readability. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 +++++ fs/f2fs/file.c | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9182b2710db7..6594ce15a7bb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -887,6 +887,11 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) return 0; } +static inline int f2fs_readonly(struct super_block *sb) +{ + return sb->s_flags & MS_RDONLY; +} + /* * file.c */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9937ba1a4d24..316bcfe80448 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -114,7 +114,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) .for_reclaim = 0, }; - if (inode->i_sb->s_flags & MS_RDONLY) + if (f2fs_readonly(inode->i_sb)) return 0; trace_f2fs_sync_file_enter(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8555f7df82c7..3ac305da7cee 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -170,7 +170,7 @@ static int f2fs_freeze(struct super_block *sb) { int err; - if (sb->s_flags & MS_RDONLY) + if (f2fs_readonly(sb)) return 0; err = f2fs_sync_fs(sb, 1); -- cgit v1.2.3 From b638f0c4b8fca9d2f82805a2d6601b09283e0d32 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 May 2013 10:17:56 +0900 Subject: f2fs: fix wrong condition check While an orphan inode has zero link_count, f2fs_gc is able to select the inode for foreground gc. - f2fs_gc - do_garbage_collect - gc_data_segment : f2fs_iget is failed : get_valid_blocks() != 0, so that retry --> here we got the infinite loop. This patch resolved this issue. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a18946e2a8b4..b44a4c1194ea 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -109,12 +109,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; - - if (!sbi->por_doing && inode->i_nlink == 0) { - ret = -ENOENT; - goto bad_inode; - } - make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; -- cgit v1.2.3 From b292dcab068e141d8a820b77cbcc88d98c610eb4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 08:02:02 +0900 Subject: f2fs: reuse the locked dnode page and its inode This patch fixes the following deadlock bug during the recovery. INFO: task mount:1322 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. mount D ffffffff81125870 0 1322 1266 0x00000000 ffff8801207e39d8 0000000000000046 ffff88012ab1dee0 0000000000000046 ffff8801207e3a08 ffff880115903f40 ffff8801207e3fd8 ffff8801207e3fd8 ffff8801207e3fd8 ffff880115903f40 ffff8801207e39d8 ffff88012fc94520 Call Trace: [] ? __lock_page+0x70/0x70 [] schedule+0x29/0x70 [] io_schedule+0x8f/0xd0 [] sleep_on_page+0xe/0x20 [] __wait_on_bit_lock+0x5a/0xc0 [] __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x40/0x40 [] find_lock_page+0x67/0x80 [] find_or_create_page+0x3f/0xb0 [] ? sync_inode_page+0xa8/0xd0 [f2fs] [] get_node_page+0x67/0x180 [f2fs] [] recover_fsync_data+0xacb/0xff0 [f2fs] [] ? _raw_spin_unlock+0x3e/0x40 [] f2fs_fill_super+0x7d4/0x850 [f2fs] [] mount_bdev+0x1c9/0x210 [] ? validate_superblock+0x180/0x180 [f2fs] [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x43/0x1b0 [] ? __alloc_percpu+0x10/0x20 [] vfs_kern_mount+0x76/0x120 [] do_mount+0x237/0xa10 [] ? strndup_user+0x5b/0x80 [] SyS_mount+0x90/0xe0 [] system_call_fastpath+0x16/0x1b The bug is triggered when check_index_in_prev_nodes tries to get the direct node page by calling get_node_page. At this point, if the direct node page is already locked by get_dnode_of_data, its caller, we got a deadlock condition. This patch adds additional condition check for the reuse of locked direct node pages prior to the get_node_page call. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 2 +- fs/f2fs/recovery.c | 26 +++++++++++++++++++++----- 3 files changed, 23 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6594ce15a7bb..7b050298d6c9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -900,6 +900,7 @@ void truncate_data_blocks(struct dnode_of_data *); void f2fs_truncate(struct inode *); int f2fs_setattr(struct dentry *, struct iattr *); int truncate_hole(struct inode *, pgoff_t, pgoff_t); +int truncate_data_blocks_range(struct dnode_of_data *, int); long f2fs_ioctl(struct file *, unsigned int, unsigned long); long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 316bcfe80448..deefd258b847 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -168,7 +168,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 5148d90049b0..eceb6652532d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -189,14 +189,14 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, } static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, - block_t blkaddr) + block_t blkaddr, struct dnode_of_data *dn) { struct seg_entry *sentry; unsigned int segno = GET_SEGNO(sbi, blkaddr); unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); struct f2fs_summary sum; - nid_t ino; + nid_t ino, nid; void *kaddr; struct inode *inode; struct page *node_page; @@ -224,10 +224,26 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, f2fs_put_page(sum_page, 1); } + /* Use the locked dnode page and inode */ + nid = le32_to_cpu(sum.nid); + if (dn->inode->i_ino == nid) { + struct dnode_of_data tdn = *dn; + tdn.nid = nid; + tdn.node_page = dn->inode_page; + tdn.ofs_in_node = sum.ofs_in_node; + truncate_data_blocks_range(&tdn, 1); + return; + } else if (dn->nid == nid) { + struct dnode_of_data tdn = *dn; + tdn.ofs_in_node = sum.ofs_in_node; + truncate_data_blocks_range(&tdn, 1); + return; + } + /* Get the node page */ - node_page = get_node_page(sbi, le32_to_cpu(sum.nid)); + node_page = get_node_page(sbi, nid); bidx = start_bidx_of_node(ofs_of_node(node_page)) + - le16_to_cpu(sum.ofs_in_node); + le16_to_cpu(sum.ofs_in_node); ino = ino_of_node(node_page); f2fs_put_page(node_page, 1); @@ -285,7 +301,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* Check the previous node page having this index */ - check_index_in_prev_nodes(sbi, dest); + check_index_in_prev_nodes(sbi, dest, &dn); set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); -- cgit v1.2.3 From 39cf72cf09c8f36a383919e7675bdb15bd4db53b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 08:20:01 +0900 Subject: f2fs: fix to handle do_recover_data errors This patch adds error handling codes of check_index_in_prev_nodes and its caller, do_recover_data. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index eceb6652532d..dcd8e860aad3 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -188,7 +188,7 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, } } -static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, +static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, block_t blkaddr, struct dnode_of_data *dn) { struct seg_entry *sentry; @@ -205,7 +205,7 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) - return; + return 0; /* Get the previous summary */ for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { @@ -232,16 +232,18 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, tdn.node_page = dn->inode_page; tdn.ofs_in_node = sum.ofs_in_node; truncate_data_blocks_range(&tdn, 1); - return; + return 0; } else if (dn->nid == nid) { struct dnode_of_data tdn = *dn; tdn.ofs_in_node = sum.ofs_in_node; truncate_data_blocks_range(&tdn, 1); - return; + return 0; } /* Get the node page */ node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); bidx = start_bidx_of_node(ofs_of_node(node_page)) + le16_to_cpu(sum.ofs_in_node); ino = ino_of_node(node_page); @@ -250,10 +252,11 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, /* Deallocate previous index in the node page */ inode = f2fs_iget(sbi->sb, ino); if (IS_ERR(inode)) - return; + return PTR_ERR(inode); truncate_hole(inode, bidx, bidx + 1); iput(inode); + return 0; } static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, @@ -301,7 +304,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* Check the previous node page having this index */ - check_index_in_prev_nodes(sbi, dest, &dn); + err = check_index_in_prev_nodes(sbi, dest, &dn); + if (err) + goto err; set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); @@ -324,13 +329,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, set_page_dirty(dn.node_page); recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); +err: f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " - "recovered_data = %d blocks", - inode->i_ino, recovered); - return 0; + "recovered_data = %d blocks, err = %d", + inode->i_ino, recovered, err); + return err; } static int recover_data(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 93ff10d690ca536fdbd6b5d5d97e4ab54b2a421f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 12:03:47 +0900 Subject: f2fs: should not make_bad_inode on f2fs_link failure If -ENOSPC is met during f2fs_link, we should not make the inode as bad. The inode is still alive. Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 729b28548546..71aa30559c53 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -191,7 +191,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); - make_bad_inode(inode); iput(inode); return err; } -- cgit v1.2.3 From 6f6fd833e1857e79a363fb20497237367bcfb7ee Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 12:06:26 +0900 Subject: f2fs: use ihold Use the following helper function committed by Al. commit 7de9c6ee3ecffd99e1628e81a5ea5468f7581a1f Author: Al Viro Date: Sat Oct 23 11:11:40 2010 -0400 new helper: ihold() ... Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 71aa30559c53..efe0a127cb76 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_balance_fs(sbi); inode->i_ctime = CURRENT_TIME; - atomic_inc(&inode->i_count); + ihold(inode); set_inode_flag(F2FS_I(inode), FI_INC_LINK); ilock = mutex_lock_op(sbi); -- cgit v1.2.3 From f28c06fa6f3d3215a1ba5e62ebc5ce7229d7a895 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 May 2013 13:02:13 +0300 Subject: f2fs: dereferencing an ERR_PTR There is an error path where "dir" is an ERR_PTR. Signed-off-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dcd8e860aad3..0dd2ce1b492f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -71,7 +71,8 @@ static int recover_dentry(struct page *ipage, struct inode *inode) out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", - ino_of_node(ipage), raw_inode->i_name, dir->i_ino, err); + ino_of_node(ipage), raw_inode->i_name, + IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } -- cgit v1.2.3 From a9841c4dbbdd8a2fb919ea305ffa95ab5ec80af2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 May 2013 12:41:04 +0900 Subject: f2fs: align data types between on-disk and in-memory block addresses The on-disk block address is defined as __le32, but in-memory block address, block_t, does as u64. Let's synchronize them to 32 bits. Reported-by: Dan Carpenter Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 ++++- include/linux/f2fs_fs.h | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7b050298d6c9..92fd4e9285c0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -37,7 +37,10 @@ typecheck(unsigned long long, b) && \ ((long long)((a) - (b)) > 0)) -typedef u64 block_t; +typedef u32 block_t; /* + * should not change u32, since it is the on-disk block + * address format, __le32. + */ typedef u32 nid_t; struct f2fs_mount_info { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index df6fab82f87e..383d5e39b280 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -20,8 +20,8 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ -#define NULL_ADDR 0x0U -#define NEW_ADDR -1U +#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ +#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ #define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) -- cgit v1.2.3 From 35b09d82c3cf3fc0b8b6d923e7fd82ff7926aafc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 May 2013 22:57:53 +0900 Subject: f2fs: push some variables to debug part Some, counters are needed only for the statistical information while debugging. So, those can be controlled using CONFIG_F2FS_STAT_FS, pushing the usage for few variables under this flag. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 ++++ fs/f2fs/data.c | 6 ++++++ fs/f2fs/f2fs.h | 6 ++++-- fs/f2fs/gc.c | 2 ++ fs/f2fs/segment.c | 5 +++++ 5 files changed, 21 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3d1144908ac6..01ddc911ac9b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -478,7 +478,9 @@ retry: } } list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS sbi->n_dirty_dirs++; +#endif BUG_ON(!S_ISDIR(inode->i_mode)); out: @@ -508,7 +510,9 @@ void remove_dirty_dir_inode(struct inode *inode) if (entry->inode == inode) { list_del(&entry->list); kmem_cache_free(inode_entry_slab, entry); +#ifdef CONFIG_F2FS_STAT_FS sbi->n_dirty_dirs--; +#endif break; } } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1644fffea251..93917e31dbdf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -68,7 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, struct buffer_head *bh_result) { struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif pgoff_t start_fofs, end_fofs; block_t start_blkaddr; @@ -78,7 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } +#ifdef CONFIG_F2FS_STAT_FS sbi->total_hit_ext++; +#endif start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; start_blkaddr = fi->ext.blk_addr; @@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, else bh_result->b_size = UINT_MAX; +#ifdef CONFIG_F2FS_STAT_FS sbi->read_hit_ext++; +#endif read_unlock(&fi->ext.ext_lock); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 92fd4e9285c0..40b137acb8a2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -372,7 +372,6 @@ struct f2fs_sb_info { /* for directory inode management */ struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ - unsigned int n_dirty_dirs; /* # of dir inodes */ /* basic file system units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -409,12 +408,15 @@ struct f2fs_sb_info { * for stat information. * one is for the LFS mode, and the other is for the SSR mode. */ +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ - unsigned int last_victim[2]; /* last victim segment # */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ int bg_gc; /* background gc calls */ + unsigned int n_dirty_dirs; /* # of dir inodes */ +#endif + unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 14961593e93c..25b083c81d50 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,7 +76,9 @@ static int gc_thread_func(void *data) else wait_ms = increase_sleep_time(wait_ms); +#ifdef CONFIG_F2FS_STAT_FS sbi->bg_gc++; +#endif /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3a0d027aad7e..be668ffb001c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -610,7 +610,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else new_curseg(sbi, type, false); out: +#ifdef CONFIG_F2FS_STAT_FS sbi->segment_count[curseg->alloc_type]++; +#endif + return; } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -846,7 +849,9 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS sbi->block_count[curseg->alloc_type]++; +#endif /* * SIT information should be updated before segment allocation, -- cgit v1.2.3 From 4777f86b7c0a587dde275a5c1ff3022b2e601313 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 May 2013 22:58:07 +0900 Subject: f2fs: remove unneeded initializations in f2fs_parent_dir There is no need to initialize few pointers in f2fs_parent_dir as the values are not checked and instead directly initialized values are used. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fc1dacf55b3a..b278bfbc3646 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -215,9 +215,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) { - struct page *page = NULL; - struct f2fs_dir_entry *de = NULL; - struct f2fs_dentry_block *dentry_blk = NULL; + struct page *page; + struct f2fs_dir_entry *de; + struct f2fs_dentry_block *dentry_blk; page = get_lock_data_page(dir, 0); if (IS_ERR(page)) -- cgit v1.2.3 From a06a2416038d317a6430e453f5bc5fd81834554d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 May 2013 22:58:40 +0900 Subject: f2fs: optimize several routines in node.h There are various functions with common code which could be separated out to make common routines. So, made new routines and in order to retain the same call path and no major changes, written some macros to access those routines. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 67 +++++++++++++++++----------------------------------------- 1 file changed, 19 insertions(+), 48 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0a2d72f0024d..a503661307db 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -275,25 +275,20 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ -static inline int is_cold_file(struct inode *inode) +static inline int is_file(struct inode *inode, int type) { - return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; + return F2FS_I(inode)->i_advise & type; } -static inline void set_cold_file(struct inode *inode) +static inline void set_file(struct inode *inode, int type) { - F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; + F2FS_I(inode)->i_advise |= type; } -static inline int is_cp_file(struct inode *inode) -{ - return F2FS_I(inode)->i_advise & FADVISE_CP_BIT; -} - -static inline void set_cp_file(struct inode *inode) -{ - F2FS_I(inode)->i_advise |= FADVISE_CP_BIT; -} +#define is_cold_file(inode) is_file(inode, FADVISE_COLD_BIT) +#define is_cp_file(inode) is_file(inode, FADVISE_CP_BIT) +#define set_cold_file(inode) set_file(inode, FADVISE_COLD_BIT) +#define set_cp_file(inode) set_file(inode, FADVISE_CP_BIT) static inline int is_cold_data(struct page *page) { @@ -310,29 +305,16 @@ static inline void clear_cold_data(struct page *page) ClearPageChecked(page); } -static inline int is_cold_node(struct page *page) +static inline int is_node(struct page *page, int type) { void *kaddr = page_address(page); struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << COLD_BIT_SHIFT); + return le32_to_cpu(rn->footer.flag) & (1 << type); } -static inline unsigned char is_fsync_dnode(struct page *page) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << FSYNC_BIT_SHIFT); -} - -static inline unsigned char is_dent_dnode(struct page *page) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << DENT_BIT_SHIFT); -} +#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) static inline void set_cold_node(struct inode *inode, struct page *page) { @@ -346,26 +328,15 @@ static inline void set_cold_node(struct inode *inode, struct page *page) rn->footer.flag = cpu_to_le32(flag); } -static inline void set_fsync_mark(struct page *page, int mark) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - if (mark) - flag |= (0x1 << FSYNC_BIT_SHIFT); - else - flag &= ~(0x1 << FSYNC_BIT_SHIFT); - rn->footer.flag = cpu_to_le32(flag); -} - -static inline void set_dentry_mark(struct page *page, int mark) +static inline void set_mark(struct page *page, int mark, int type) { - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = (struct f2fs_node *)page_address(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) - flag |= (0x1 << DENT_BIT_SHIFT); + flag |= (0x1 << type); else - flag &= ~(0x1 << DENT_BIT_SHIFT); + flag &= ~(0x1 << type); rn->footer.flag = cpu_to_le32(flag); } +#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) -- cgit v1.2.3 From 7a267f8d7463346a139e49c8beac1b8bfe32ef97 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 26 May 2013 11:05:32 +0900 Subject: f2fs: return proper error from start_gc_thread when there is an error from kthread_run, then return proper error rather than returning -ENOMEM. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 25b083c81d50..ddc2c6750eee 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -91,23 +91,28 @@ int start_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; + int err = 0; if (!test_opt(sbi, BG_GC)) - return 0; + goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) - return -ENOMEM; + if (!gc_th) { + err = -ENOMEM; + goto out; + } sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { + err = PTR_ERR(gc_th->f2fs_gc_task); kfree(gc_th); sbi->gc_thread = NULL; - return -ENOMEM; } - return 0; + +out: + return err; } void stop_gc_thread(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 3b10b1fd2b6bc82eeb346ff6a6621d065908ea6d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 May 2013 10:32:01 +0900 Subject: f2fs: iput only if whole data blocks are flushed If there remains some unwritten blocks from the recovery, we should not call iput on that directory inode. Otherwise, we can loose some dentry blocks after the recovery. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 01ddc911ac9b..0d3701dce462 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -501,8 +501,10 @@ void remove_dirty_dir_inode(struct inode *inode) return; spin_lock(&sbi->dir_inode_lock); - if (atomic_read(&F2FS_I(inode)->dirty_dents)) - goto out; + if (atomic_read(&F2FS_I(inode)->dirty_dents)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } list_for_each(this, head) { struct dir_inode_entry *entry; @@ -516,7 +518,6 @@ void remove_dirty_dir_inode(struct inode *inode) break; } } -out: spin_unlock(&sbi->dir_inode_lock); /* Only from the recovery routine */ -- cgit v1.2.3 From 6b8213d9a4ca0d7a02a38757068ba79cd96206f0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 May 2013 09:19:22 +0900 Subject: f2fs: fix dentry recovery routine The error scenario is: 1. create /a (1.a link /a /b) 2. sync 3. unlinke /a 4. create /a 5. fsync /a 6. Sudden power-off When the f2fs recovers the fsynced dentry, /a, we discover an exsiting dentry at f2fs_find_entry() in recover_dentry(). In such the case, we should unlink the existing dentry and its inode and then recover newly created dentry. Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 0dd2ce1b492f..539ca32f4483 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -44,9 +44,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); nid_t pino = le32_to_cpu(raw_inode->i_pino); + struct f2fs_dir_entry *de; struct qstr name; struct page *page; - struct inode *dir; + struct inode *dir, *einode; int err = 0; dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); @@ -61,13 +62,26 @@ static int recover_dentry(struct page *ipage, struct inode *inode) name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; - - if (f2fs_find_entry(dir, &name, &page)) { +retry: + de = f2fs_find_entry(dir, &name, &page); + if (de && inode->i_ino == le32_to_cpu(de->ino)) { kunmap(page); f2fs_put_page(page, 0); - } else { - err = __f2fs_add_link(dir, &name, inode); + goto out; + } + if (de) { + einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); + if (IS_ERR(einode)) { + WARN_ON(1); + if (PTR_ERR(einode) == -ENOENT) + err = -EEXIST; + goto out; + } + f2fs_delete_entry(de, page, einode); + iput(einode); + goto retry; } + err = __f2fs_add_link(dir, &name, inode); out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", -- cgit v1.2.3 From afc3eda2a897b402e59f42f22eb89bba52297dd3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 May 2013 09:59:27 +0900 Subject: f2fs: fix incorrect iputs during the dentry recovery - iget/iput flow in the dentry recovery process 1. *dir* = f2fs_iget 2. set FI_DELAY_IPUT to *dir* 3. add *dir* to the dirty_dir_list - __f2fs_add_link - recover_dentry) 4. iput *dir* by remove_dirty_dir_inode - sync_dirty_dir_inodes - write_chekcpoint If *dir*'s i_count is not 1 (i.e., root dir), remove_dirty_dir_inode is called later and then iput is triggered again due to the FI_DELAY_IPUT flag. So, let's unset the flag properly once iput is triggered. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0d3701dce462..6f56e5781dc3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -521,8 +521,10 @@ void remove_dirty_dir_inode(struct inode *inode) spin_unlock(&sbi->dir_inode_lock); /* Only from the recovery routine */ - if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { + clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); iput(inode); + } } struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) -- cgit v1.2.3 From 83d5d6f66b375f21bee4c2e17178f7c073a66301 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 May 2013 12:25:47 +0900 Subject: f2fs: cover cp_file information with ilock If a file is linked with other files, it should be checkpointed at every fsync calls. For this, we use set_cp_file() with FADVISE_CP_BIT, but previously we didn't cover the flag by the global lock. This patch fixes that the inode page stores this correctly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 8 +++++++- fs/f2fs/namei.c | 6 ------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b278bfbc3646..67e2d1361fa2 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -346,8 +346,14 @@ static struct page *init_inode_metadata(struct inode *inode, init_dent_inode(name, page); - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + set_cp_file(inode); inc_nlink(inode); + } return page; error: diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index efe0a127cb76..1fe150276fad 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -181,12 +181,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (err) goto out; - /* - * This file should be checkpointed during fsync. - * We lost i_pino from now on. - */ - set_cp_file(inode); - d_instantiate(dentry, inode); return 0; out: -- cgit v1.2.3 From 1e03e38b35b8e72d65fd5d931627bd6ff02926c1 Mon Sep 17 00:00:00 2001 From: Jason Hrycay Date: Fri, 31 May 2013 12:45:11 -0500 Subject: f2fs: handle errors from get_node_page calls Add check for error pointers returned from get_node_page in order to avoid dereferencing a bad address on the next use. Signed-off-by: Jason Hrycay Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0b02dce31356..ae61f359554f 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -218,6 +218,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, return -ENODATA; page = get_node_page(sbi, fi->i_xattr_nid); + if (IS_ERR(page)) + return PTR_ERR(page); base_addr = page_address(page); list_for_each_xattr(entry, base_addr) { @@ -268,6 +270,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) return 0; page = get_node_page(sbi, fi->i_xattr_nid); + if (IS_ERR(page)) + return PTR_ERR(page); base_addr = page_address(page); list_for_each_xattr(entry, base_addr) { -- cgit v1.2.3 From b2b3460a9404136e0a99b9f7cb56e08ec41ea933 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 1 Jun 2013 16:20:26 +0900 Subject: f2fs: reorganise the function get_victim_by_default Fix the function get_victim_by_default, where it checks for the condition that p.min_segno != NULL_SEGNO as shown: if (p.min_segno != NULL_SEGNO) goto got_it; and if above condition is true then got_it: if (p.min_segno != NULL_SEGNO) { So this condition is being checked twice. Hence move the goto statement after the if condition so that duplication of condition check is avoided. Also this function makes a call to get_max_cost() to compute the max cost based on the f2fs_sbi_info and victim policy. Since get_max_cost depends on on three parameters of victim_sel_policy => alloc_mode, gc_mode & ofs_unit, once this victim policy is initialised, these value will not change till the execution time of get_victim_by_default() & also f2fs_sbi_info structure parameters will not change. Hence making calls to get_max_cost() in while loop does not seems to be a good point. Instead we can call it once in begining and store the results in local variable, which later can serve our purpose for comparing the cost with max cost inside the while loop. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ddc2c6750eee..3a9df36491a5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -241,14 +241,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct victim_sel_policy p; - unsigned int secno; + unsigned int secno, max_cost; int nsearched = 0; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); p.min_segno = NULL_SEGNO; - p.min_cost = get_max_cost(sbi, &p); + p.min_cost = max_cost = get_max_cost(sbi, &p); mutex_lock(&dirty_i->seglist_lock); @@ -287,7 +287,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = cost; } - if (cost == get_max_cost(sbi, &p)) + if (cost == max_cost) continue; if (nsearched++ >= MAX_VICTIM_SEARCH) { @@ -295,8 +295,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, break; } } -got_it: if (p.min_segno != NULL_SEGNO) { +got_it: if (p.alloc_mode == LFS) { secno = GET_SECNO(sbi, p.min_segno); if (gc_type == FG_GC) -- cgit v1.2.3 From 5deb82671ae344b28b4e744020afcbc76df1779b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 5 Jun 2013 17:42:45 +0900 Subject: f2fs: fix iget/iput of dir during recovery It is possible that iput is skipped after iget during the recovery. In recover_dentry(), dir = f2fs_iget(); ... if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out; In this case, this dir is not able to be added in dirty_dir_inode_list. The actual linking is done only when set_page_dirty() is called. So let's add this newly got inode into the list explicitly, and put it at the end of the recovery routine. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 55 +++++++++++++++++++++++++++++++++++++--------------- fs/f2fs/f2fs.h | 1 + fs/f2fs/recovery.c | 1 + 3 files changed, 41 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6f56e5781dc3..9a7750909221 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -450,13 +450,30 @@ fail_no_cp: return -EINVAL; } -void set_dirty_dir_page(struct inode *inode, struct page *page) +static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct list_head *head = &sbi->dir_inode_list; - struct dir_inode_entry *new; struct list_head *this; + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) + return -EEXIST; + } + list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs++; +#endif + return 0; +} + +void set_dirty_dir_page(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; + if (!S_ISDIR(inode->i_mode)) return; retry: @@ -469,25 +486,31 @@ retry: INIT_LIST_HEAD(&new->list); spin_lock(&sbi->dir_inode_lock); - list_for_each(this, head) { - struct dir_inode_entry *entry; - entry = list_entry(this, struct dir_inode_entry, list); - if (entry->inode == inode) { - kmem_cache_free(inode_entry_slab, new); - goto out; - } - } - list_add_tail(&new->list, head); -#ifdef CONFIG_F2FS_STAT_FS - sbi->n_dirty_dirs++; -#endif + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); - BUG_ON(!S_ISDIR(inode->i_mode)); -out: inc_page_count(sbi, F2FS_DIRTY_DENTS); inode_inc_dirty_dents(inode); SetPagePrivate(page); + spin_unlock(&sbi->dir_inode_lock); +} + +void add_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); spin_unlock(&sbi->dir_inode_lock); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 40b137acb8a2..d6e63da513cf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1030,6 +1030,7 @@ void remove_orphan_inode(struct f2fs_sb_info *, nid_t); int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); +void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 539ca32f4483..ddde14f0eacb 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -58,6 +58,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) goto out; } set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + add_dirty_dir_inode(dir); } name.len = le32_to_cpu(raw_inode->i_namelen); -- cgit v1.2.3 From 8ae8f1627f39bae505b90cade50cd8a911b8bda6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 3 Jun 2013 19:46:19 +0900 Subject: f2fs: support xattr security labels This patch adds the support of security labels for f2fs, which will be used by Linus Security Models (LSMs). Quote from http://en.wikipedia.org/wiki/Linux_Security_Modules: "Linux Security Modules (LSM) is a framework that allows the Linux kernel to support a variety of computer security models while avoiding favoritism toward any single security implementation. The framework is licensed under the terms of the GNU General Public License and is standard part of the Linux kernel since Linux 2.6. AppArmor, SELinux, Smack and TOMOYO Linux are the currently accepted modules in the official kernel.". Signed-off-by: Jaegeuk Kim --- fs/f2fs/Kconfig | 12 +++++++++++ fs/f2fs/acl.c | 2 +- fs/f2fs/dir.c | 5 +++++ fs/f2fs/f2fs.h | 2 +- fs/f2fs/node.c | 12 +++++++---- fs/f2fs/xattr.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++------- fs/f2fs/xattr.h | 24 ++++++++++++++-------- 7 files changed, 100 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index fd27e7e6326e..e06e0995e00f 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -51,3 +51,15 @@ config F2FS_FS_POSIX_ACL Linux website . If you don't know what Access Control Lists are, say N + +config F2FS_FS_SECURITY + bool "F2FS Security Labels" + depends on F2FS_FS_XATTR + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the f2fs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 44abc2f286e0..b7826ec1b470 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -250,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } } - error = f2fs_setxattr(inode, name_index, "", value, size); + error = f2fs_setxattr(inode, name_index, "", value, size, NULL); kfree(value); if (!error) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 67e2d1361fa2..eaea5b50d9c1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -13,6 +13,7 @@ #include "f2fs.h" #include "node.h" #include "acl.h" +#include "xattr.h" static unsigned long dir_blocks(struct inode *inode) { @@ -334,6 +335,10 @@ static struct page *init_inode_metadata(struct inode *inode, if (err) goto error; + err = f2fs_init_security(inode, dir, name, page); + if (err) + goto error; + wait_on_page_writeback(page); } else { page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d6e63da513cf..4f2c209ae8c1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -968,7 +968,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int remove_inode_page(struct inode *); struct page *new_inode_page(struct inode *, const struct qstr *); -struct page *new_node_page(struct dnode_of_data *, unsigned int); +struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5a59780b22d0..b02440c5b2eb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -433,7 +433,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i]); + npage[i] = new_node_page(dn, noffset[i], NULL); if (IS_ERR(npage[i])) { alloc_nid_failed(sbi, nids[i]); err = PTR_ERR(npage[i]); @@ -814,10 +814,11 @@ struct page *new_inode_page(struct inode *inode, const struct qstr *name) set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); /* caller should f2fs_put_page(page, 1); */ - return new_node_page(&dn, 0); + return new_node_page(&dn, 0, NULL); } -struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) +struct page *new_node_page(struct dnode_of_data *dn, + unsigned int ofs, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct address_space *mapping = sbi->node_inode->i_mapping; @@ -850,7 +851,10 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) set_cold_node(dn->inode, page); dn->node_page = page; - sync_inode_page(dn); + if (ipage) + update_inode(dn->inode, ipage); + else + sync_inode_page(dn); set_page_dirty(page); if (ofs == 0) inc_valid_inode_count(sbi); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ae61f359554f..3ab07ecd86ca 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -20,6 +20,7 @@ */ #include #include +#include #include "f2fs.h" #include "xattr.h" @@ -43,6 +44,10 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, prefix = XATTR_TRUSTED_PREFIX; prefix_len = XATTR_TRUSTED_PREFIX_LEN; break; + case F2FS_XATTR_INDEX_SECURITY: + prefix = XATTR_SECURITY_PREFIX; + prefix_len = XATTR_SECURITY_PREFIX_LEN; + break; default: return -EINVAL; } @@ -50,7 +55,7 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { memcpy(list, prefix, prefix_len); - memcpy(list+prefix_len, name, name_len); + memcpy(list + prefix_len, name, name_len); list[prefix_len + name_len] = '\0'; } return total_len; @@ -70,13 +75,14 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, if (!capable(CAP_SYS_ADMIN)) return -EPERM; break; + case F2FS_XATTR_INDEX_SECURITY: + break; default: return -EINVAL; } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, - buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -93,13 +99,15 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (!capable(CAP_SYS_ADMIN)) return -EPERM; break; + case F2FS_XATTR_INDEX_SECURITY: + break; default: return -EINVAL; } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(dentry->d_inode, type, name, value, size); + return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); } static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, @@ -145,6 +153,31 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, return 0; } +#ifdef CONFIG_F2FS_FS_SECURITY +static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *page) +{ + const struct xattr *xattr; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, (struct page *)page); + if (err < 0) + break; + } + return err; +} + +int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return security_inode_init_security(inode, dir, qstr, + &f2fs_initxattrs, ipage); +} +#endif + const struct xattr_handler f2fs_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .flags = F2FS_XATTR_INDEX_USER, @@ -169,6 +202,14 @@ const struct xattr_handler f2fs_xattr_advise_handler = { .set = f2fs_xattr_advise_set, }; +const struct xattr_handler f2fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = F2FS_XATTR_INDEX_SECURITY, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -176,6 +217,9 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, #endif [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, +#endif [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, }; @@ -186,6 +230,9 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { &f2fs_xattr_acl_default_handler, #endif &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + &f2fs_xattr_security_handler, +#endif &f2fs_xattr_advise_handler, NULL, }; @@ -300,7 +347,7 @@ cleanup: } int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len) + const void *value, size_t value_len, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -339,7 +386,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); mark_inode_dirty(inode); - page = new_node_page(&dn, XATTR_NODE_OFFSET); + page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); if (IS_ERR(page)) { alloc_nid_failed(sbi, fi->i_xattr_nid); fi->i_xattr_nid = 0; @@ -439,7 +486,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } - update_inode_page(inode); + if (ipage) + update_inode(inode, ipage); + else + update_inode_page(inode); mutex_unlock_op(sbi, ilock); return 0; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 49c9558305e3..3c0817bef25d 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -112,21 +112,19 @@ extern const struct xattr_handler f2fs_xattr_trusted_handler; extern const struct xattr_handler f2fs_xattr_acl_access_handler; extern const struct xattr_handler f2fs_xattr_acl_default_handler; extern const struct xattr_handler f2fs_xattr_advise_handler; +extern const struct xattr_handler f2fs_xattr_security_handler; extern const struct xattr_handler *f2fs_xattr_handlers[]; -extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len); -extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size); -extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, - size_t buffer_size); - +extern int f2fs_setxattr(struct inode *, int, const char *, + const void *, size_t, struct page *); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else #define f2fs_xattr_handlers NULL static inline int f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len) + const char *name, const void *value, size_t value_len) { return -EOPNOTSUPP; } @@ -142,4 +140,14 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, } #endif +#ifdef CONFIG_F2FS_FS_SECURITY +extern int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct page *); +#else +static inline int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return 0; +} +#endif #endif /* __F2FS_XATTR_H__ */ -- cgit v1.2.3 From 5fb08372a689360b7db51b0cfb9a068fddf279a2 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 7 Jun 2013 14:16:53 +0800 Subject: f2fs: set sb->s_fs_info before calling parse_options() In f2fs_fill_super(), set sb->s_fs_info before calling parse_options(), then we can get f2fs_sb_info via F2FS_SB(sb) in parse_options(). So that the second argument "sbi" of func parse_options() is no longer needed. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3ac305da7cee..4fdcdff45e6a 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -303,9 +303,9 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi, - char *options) +static int parse_options(struct super_block *sb, char *options) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; char *p; int arg = 0; @@ -541,6 +541,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_sb_buf; } + sb->s_fs_info = sbi; /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; @@ -553,7 +554,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi, POSIX_ACL); #endif /* parse mount options */ - err = parse_options(sb, sbi, (char *)data); + err = parse_options(sb, (char *)data); if (err) goto free_sb_buf; @@ -565,7 +566,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; - sb->s_fs_info = sbi; sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); -- cgit v1.2.3 From 2d4d9fb591fe83d9f0559afaa9736ebc8edad0aa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Jun 2013 16:33:07 +0900 Subject: f2fs: fix i_blocks translation on various types of files Basically an inode manages the number of allocated blocks with inode->i_blocks which is represented in a unit of sectors, not file system blocks. But, f2fs has used i_blocks in a unit of file system blocks, and f2fs_getattr translates it to the number of sectors when fstat is called. However, previously f2fs_file_inode_operations only has this, so this patch adds it to all the types of inode_operations. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 2 +- fs/f2fs/namei.c | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4f2c209ae8c1..c344a4d640cb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -903,6 +903,7 @@ static inline int f2fs_readonly(struct super_block *sb) int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); void f2fs_truncate(struct inode *); +int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); int truncate_hole(struct inode *, pgoff_t, pgoff_t); int truncate_data_blocks_range(struct dnode_of_data *, int); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index deefd258b847..8d2fce9e7e41 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -291,7 +291,7 @@ void f2fs_truncate(struct inode *inode) } } -static int f2fs_getattr(struct vfsmount *mnt, +int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1fe150276fad..810444ee6b5e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -490,6 +490,7 @@ const struct inode_operations f2fs_dir_inode_operations = { .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, .rename = f2fs_rename, + .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, #ifdef CONFIG_F2FS_FS_XATTR @@ -504,6 +505,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .getattr = f2fs_getattr, .setattr = f2fs_setattr, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, @@ -514,6 +516,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { }; const struct inode_operations f2fs_special_inode_operations = { + .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, #ifdef CONFIG_F2FS_FS_XATTR -- cgit v1.2.3 From 699489bbbea4fc3b9b735d69941cf4fca91ce1d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Jun 2013 22:08:23 +0900 Subject: f2fs: sync dir->i_size with its block allocation If new dentry block is allocated and its i_size is updated, we should update its inode block together in order to sync i_size and its block allocation. Otherwise, we can loose additional dentry block due to the unconsistent i_size. Errorneous Scenario ------------------- In the recovery routine, - recovery_dentry | - __f2fs_add_link | | - get_new_data_page | | | - i_size_write(new_i_size) | | | - mark_inode_dirty_sync(dir) | | - update_parent_metadata | | | - mark_inode_dirty(dir) | - write_checkpoint - sync_dirty_dir_inodes - filemap_flush(dentry_blocks) - f2fs_write_data_page - skip to write the last dentry block due to index < i_size In the above flow, new_i_size is not updated to its inode block so that the last dentry block will be lost accordingly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 ++ fs/f2fs/dir.c | 9 ++++----- fs/f2fs/f2fs.h | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 93917e31dbdf..5b145fcc2864 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -339,6 +339,8 @@ repeat: if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + /* Only the directory inode sets new_i_size */ + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); mark_inode_dirty_sync(inode); } return page; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index eaea5b50d9c1..69ca049b5168 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -370,22 +370,20 @@ error: static void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - bool need_dir_update = false; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; if (F2FS_I(dir)->i_current_depth != current_depth) { F2FS_I(dir)->i_current_depth = current_depth; - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (need_dir_update) + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) update_inode_page(dir); else mark_inode_dirty(dir); @@ -502,6 +500,7 @@ add_dentry: update_parent_metadata(dir, inode, current_depth); fail: + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c344a4d640cb..27edf59ac12c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -859,6 +859,7 @@ enum { FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ + FI_UPDATE_DIR, /* should update inode block for consistency */ FI_DELAY_IPUT, /* used for the recovery */ }; -- cgit v1.2.3 From 6a3e8ef0de1e548d1cf9bcf51d9b7b6f4141fec5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Jun 2013 21:25:28 +0900 Subject: f2fs: use the F2FS specific flags in f2fs_ioctl() In f2fs_ioctl() function, it is using generic flags. Since F2FS specific flags are defined. So lets use those flags. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8d2fce9e7e41..85b665d176a2 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -575,10 +575,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) int ret; switch (cmd) { - case FS_IOC_GETFLAGS: + case F2FS_IOC_GETFLAGS: flags = fi->i_flags & FS_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); - case FS_IOC_SETFLAGS: + case F2FS_IOC_SETFLAGS: { unsigned int oldflags; -- cgit v1.2.3 From d7cc950b4c910e4440485be784493880a0d09086 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Jun 2013 21:25:40 +0900 Subject: f2fs: optimise the truncate_data_blocks_range() range The function truncate_data_blocks_range() decrements the valid block count of inode via dec_valid_block_count(). Since this function updates the i_blocks field of inode, we can update this field once we have calculated total the number of blocks to be freed. Therefore we can decrement valid blocks outside of the for loop. if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } 'nr_free' tells the total number of blocks freed. So, we can just directly pass this value to dec_valid_block_count() and update the i_blocks. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 85b665d176a2..2f649b848521 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -185,10 +185,10 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) update_extent_cache(NULL_ADDR, dn); invalidate_blocks(sbi, blkaddr); - dec_valid_block_count(sbi, dn->inode, 1); nr_free++; } if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } -- cgit v1.2.3 From b3783873cc2214542d3da9a1aa800b20919d5889 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 10 Jun 2013 09:17:01 +0900 Subject: f2fs: avoid freqeunt write_inode calls If update_inode is called, we don't need to do write_inode. So, let's use a *dirty* flag for each inode. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 1 + fs/f2fs/inode.c | 4 ++++ fs/f2fs/super.c | 12 ++++++++++++ 4 files changed, 18 insertions(+) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 27edf59ac12c..a05aa65cfc2c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -856,6 +856,7 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2f649b848521..fda226ff1849 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -147,6 +147,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + mark_inode_dirty_sync(inode); ret = f2fs_write_inode(inode, NULL); if (ret) goto out; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b44a4c1194ea..2b2d45d19e3e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -192,6 +192,7 @@ void update_inode(struct inode *inode, struct page *node_page) set_cold_node(inode, node_page); set_page_dirty(node_page); + clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); } int update_inode_page(struct inode *inode) @@ -217,6 +218,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; + if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + return 0; + if (wbc) f2fs_balance_fs(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4fdcdff45e6a..ba56549bb2f3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -112,6 +112,17 @@ static int f2fs_drop_inode(struct inode *inode) return generic_drop_inode(inode); } +/* + * f2fs_dirty_inode() is called from __mark_inode_dirty() + * + * We should call set_dirty_inode to write the dirty inode through write_inode. + */ +static void f2fs_dirty_inode(struct inode *inode, int flags) +{ + set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + return; +} + static void f2fs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); @@ -249,6 +260,7 @@ static struct super_operations f2fs_sops = { .drop_inode = f2fs_drop_inode, .destroy_inode = f2fs_destroy_inode, .write_inode = f2fs_write_inode, + .dirty_inode = f2fs_dirty_inode, .show_options = f2fs_show_options, .evict_inode = f2fs_evict_inode, .put_super = f2fs_put_super, -- cgit v1.2.3 From e79efe3b69d6454eb8ec734a24d49f0f4c7d26f5 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Thu, 13 Jun 2013 16:59:27 +0800 Subject: f2fs: remove unnecessary parameter "offset" from __add_sum_entry() We can get the value directly from pointer "curseg". Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index be668ffb001c..77f31c0584e5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -255,11 +255,11 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) * This function should be resided under the curseg_mutex lock */ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, - struct f2fs_summary *sum, unsigned short offset) + struct f2fs_summary *sum) { struct curseg_info *curseg = CURSEG_I(sbi, type); void *addr = curseg->sum_blk; - addr += offset * sizeof(struct f2fs_summary); + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); return; } @@ -845,7 +845,7 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, * because, this function updates a summary entry in the * current summary block. */ - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); @@ -946,7 +946,7 @@ void recover_data_page(struct f2fs_sb_info *sbi, curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); @@ -983,7 +983,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, } curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); /* change the current log to the next block addr in advance */ if (next_segno != segno) { -- cgit v1.2.3 From 8d8451af6875f8841dc20987d1363405020a9172 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Thu, 13 Jun 2013 16:59:28 +0800 Subject: f2fs: make locate_dirty_segment() as static It's used only locally and could be static. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/segment.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a05aa65cfc2c..3e7cb33b64d3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -996,7 +996,6 @@ void destroy_node_manager_caches(void); */ void f2fs_balance_fs(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); -void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); void clear_prefree_segments(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 77f31c0584e5..b15debcccc23 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -94,7 +94,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, * Adding dirty entry into seglist is not critical operation. * If a given segment is one of current working segments, it won't be added. */ -void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned short valid_blocks; -- cgit v1.2.3 From b25958b6ecf1dce087e62b9aa27cf8f2fe9b5c86 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Thu, 13 Jun 2013 16:59:29 +0800 Subject: f2fs: optimize do_write_data_page() Since "need_inplace_update() == true" is a very rare case, using unlikely() to give compiler a chance to optimize the code. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5b145fcc2864..6d4a743caf86 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -497,8 +497,9 @@ int do_write_data_page(struct page *page) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && - need_inplace_update(inode)) { + if (unlikely(old_blk_addr != NEW_ADDR && + !is_cold_data(page) && + need_inplace_update(inode))) { rewrite_data_page(F2FS_SB(inode->i_sb), page, old_blk_addr); } else { -- cgit v1.2.3 From 354a3399dc6f7e556d04e1c731cd50e08eeb44bd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 14 Jun 2013 08:52:35 +0900 Subject: f2fs: recover wrong pino after checkpoint during fsync If a file is linked, f2fs loose its parent inode number so that fsync calls for the linked file should do checkpoint all the time. But, if we can recover its parent inode number after the checkpoint, we can adjust roll-forward mechanism for the further fsync calls, which is able to improve the fsync performance significatly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/f2fs/file.c | 31 ++++++++++++++++++++++++++++++- fs/f2fs/namei.c | 2 +- fs/f2fs/node.h | 15 +++++++++++---- fs/f2fs/segment.c | 2 +- 6 files changed, 45 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 69ca049b5168..4f21452f929d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -356,7 +356,7 @@ static struct page *init_inode_metadata(struct inode *inode, * We lost i_pino from now on. */ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { - set_cp_file(inode); + file_lost_pino(inode); inc_nlink(inode); } return page; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3e7cb33b64d3..863a5e91d84a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -151,7 +151,7 @@ struct extent_info { * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. */ #define FADVISE_COLD_BIT 0x01 -#define FADVISE_CP_BIT 0x02 +#define FADVISE_LOST_PINO_BIT 0x02 struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fda226ff1849..d2d2b7dbdcc1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -102,6 +102,24 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { .remap_pages = generic_file_remap_pages, }; +static int get_parent_ino(struct inode *inode, nid_t *pino) +{ + struct dentry *dentry; + + inode = igrab(inode); + dentry = d_find_any_alias(inode); + iput(inode); + if (!dentry) + return 0; + + inode = igrab(dentry->d_parent->d_inode); + dput(dentry); + + *pino = inode->i_ino; + iput(inode); + return 1; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -134,7 +152,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; - else if (is_cp_file(inode)) + else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) need_cp = true; @@ -142,8 +160,19 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) need_cp = true; if (need_cp) { + nid_t pino; + /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + F2FS_I(inode)->i_pino = pino; + file_got_pino(inode); + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 810444ee6b5e..64c07169df05 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -112,7 +112,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, int count = le32_to_cpu(sbi->raw_super->extension_count); for (i = 0; i < count; i++) { if (is_multimedia_file(name, extlist[i])) { - set_cold_file(inode); + file_set_cold(inode); break; } } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index a503661307db..c65fb4f4230f 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -285,10 +285,17 @@ static inline void set_file(struct inode *inode, int type) F2FS_I(inode)->i_advise |= type; } -#define is_cold_file(inode) is_file(inode, FADVISE_COLD_BIT) -#define is_cp_file(inode) is_file(inode, FADVISE_CP_BIT) -#define set_cold_file(inode) set_file(inode, FADVISE_COLD_BIT) -#define set_cp_file(inode) set_file(inode, FADVISE_CP_BIT) +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) static inline int is_cold_data(struct page *page) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b15debcccc23..0e1a60a4eb3f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -796,7 +796,7 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; - else if (is_cold_data(page) || is_cold_file(inode)) + else if (is_cold_data(page) || file_is_cold(inode)) return CURSEG_COLD_DATA; else return CURSEG_WARM_DATA; -- cgit v1.2.3 From 696c018c7718f5e33e1107da19c4d64a25018878 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 16 Jun 2013 09:48:48 +0900 Subject: f2fs: add remount_fs callback support Add the f2fs_remount function call which will be used during the filesystem remounting. This function will help us to change the mount options specific to f2fs. Also modify the f2fs background_gc mount option, which will allow the user to dynamically trun on/off the garbage collection in f2fs based on the background_gc value. If background_gc=on, Garbage collection will be turned off & if background_gc=off, Garbage collection will be truned on. By default the garbage collection is on in f2fs. Change Log: v2: Incorporated the review comments by Gu Zheng. Removing the restore part for VFS flags Updating comments with proper flag conditions Display GC background option as ON/OFF Revised conditions to stop GC in case of remount v1: Initial changes for adding remount_fs callback support. Cc: Gu Zheng Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Reviewed-by: Gu Zheng [Jaegeuk Kim: change /** with /* for the coding style] Signed-off-by: Jaegeuk Kim --- Documentation/filesystems/f2fs.txt | 9 +- fs/f2fs/super.c | 235 ++++++++++++++++++++++++------------- 2 files changed, 160 insertions(+), 84 deletions(-) (limited to 'fs') diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index bd3c56c67380..b91e2f26b672 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -98,8 +98,13 @@ Cleaning Overhead MOUNT OPTIONS ================================================================================ -background_gc_off Turn off cleaning operations, namely garbage collection, - triggered in background when I/O subsystem is idle. +background_gc=%s Turn on/off cleaning operations, namely garbage + collection, triggered in background when I/O subsystem is + idle. If background_gc=on, it will turn on the garbage + collection and if background_gc=off, garbage collection + will be truned off. + Default value for this option is on. So garbage + collection is on by default. disable_roll_forward Disable the roll-forward recovery routine discard Issue discard/TRIM commands when a segment is cleaned. no_heap Disable heap-style segment allocation which finds free diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ba56549bb2f3..75c7dc363e92 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -34,7 +34,7 @@ static struct kmem_cache *f2fs_inode_cachep; enum { - Opt_gc_background_off, + Opt_gc_background, Opt_disable_roll_forward, Opt_discard, Opt_noheap, @@ -46,7 +46,7 @@ enum { }; static match_table_t f2fs_tokens = { - {Opt_gc_background_off, "background_gc_off"}, + {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, @@ -76,6 +76,91 @@ static void init_once(void *foo) inode_init_once(&fi->vfs_inode); } +static int parse_options(struct super_block *sb, char *options) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p, *name; + int arg = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = NULL; + token = match_token(p, f2fs_tokens, args); + + switch (token) { + case Opt_gc_background: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strncmp(name, "on", 2)) + set_opt(sbi, BG_GC); + else if (!strncmp(name, "off", 3)) + clear_opt(sbi, BG_GC); + else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_discard: + set_opt(sbi, DISCARD); + break; + case Opt_noheap: + set_opt(sbi, NOHEAP); + break; +#ifdef CONFIG_F2FS_FS_XATTR + case Opt_nouser_xattr: + clear_opt(sbi, XATTR_USER); + break; +#else + case Opt_nouser_xattr: + f2fs_msg(sb, KERN_INFO, + "nouser_xattr options not supported"); + break; +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_noacl: + clear_opt(sbi, POSIX_ACL); + break; +#else + case Opt_noacl: + f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + break; +#endif + case Opt_active_logs: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) + return -EINVAL; + sbi->active_logs = arg; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + default: + f2fs_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" or missing value", + p); + return -EINVAL; + } + } + return 0; +} + static struct inode *f2fs_alloc_inode(struct super_block *sb) { struct f2fs_inode_info *fi; @@ -225,10 +310,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (test_opt(sbi, BG_GC)) - seq_puts(seq, ",background_gc_on"); + if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + seq_printf(seq, ",background_gc=%s", "on"); else - seq_puts(seq, ",background_gc_off"); + seq_printf(seq, ",background_gc=%s", "off"); if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, DISCARD)) @@ -255,6 +340,58 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) return 0; } +static int f2fs_remount(struct super_block *sb, int *flags, char *data) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_mount_info org_mount_opt; + int err, active_logs; + + /* + * Save the old mount options in case we + * need to restore them. + */ + org_mount_opt = sbi->mount_opt; + active_logs = sbi->active_logs; + + /* parse mount options */ + err = parse_options(sb, data); + if (err) + goto restore_opts; + + /* + * Previous and new state of filesystem is RO, + * so no point in checking GC conditions. + */ + if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + goto skip; + + /* + * We stop the GC thread if FS is mounted as RO + * or if background_gc = off is passed in mount + * option. Also sync the filesystem. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if (sbi->gc_thread) { + stop_gc_thread(sbi); + f2fs_sync_fs(sb, 1); + } + } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + err = start_gc_thread(sbi); + if (err) + goto restore_opts; + } +skip: + /* Update the POSIXACL Flag */ + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + return 0; + +restore_opts: + sbi->mount_opt = org_mount_opt; + sbi->active_logs = active_logs; + return err; +} + static struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, .drop_inode = f2fs_drop_inode, @@ -268,6 +405,7 @@ static struct super_operations f2fs_sops = { .freeze_fs = f2fs_freeze, .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, + .remount_fs = f2fs_remount, }; static struct inode *f2fs_nfs_get_inode(struct super_block *sb, @@ -315,79 +453,6 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static int parse_options(struct super_block *sb, char *options) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - substring_t args[MAX_OPT_ARGS]; - char *p; - int arg = 0; - - if (!options) - return 0; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); - - switch (token) { - case Opt_gc_background_off: - clear_opt(sbi, BG_GC); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_discard: - set_opt(sbi, DISCARD); - break; - case Opt_noheap: - set_opt(sbi, NOHEAP); - break; -#ifdef CONFIG_F2FS_FS_XATTR - case Opt_nouser_xattr: - clear_opt(sbi, XATTR_USER); - break; -#else - case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); - break; -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_noacl: - clear_opt(sbi, POSIX_ACL); - break; -#else - case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); - break; -#endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) - return -EINVAL; - sbi->active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); - return -EINVAL; - } - } - return 0; -} - static loff_t max_file_size(unsigned bits) { loff_t result = ADDRS_PER_INODE; @@ -686,10 +751,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) "Cannot recover all fsync data errno=%ld", err); } - /* After POR, we can run background GC thread */ - err = start_gc_thread(sbi); - if (err) - goto fail; + /* + * If filesystem is not mounted as read-only then + * do start the gc_thread. + */ + if (!(sb->s_flags & MS_RDONLY)) { + /* After POR, we can run background GC thread.*/ + err = start_gc_thread(sbi); + if (err) + goto fail; + } err = f2fs_build_stats(sbi); if (err) -- cgit v1.2.3 From 7e586fa0244578320fcced9cc08c6b124f727c35 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Jun 2013 20:47:19 +0900 Subject: f2fs: fix crc endian conversion While calculating CRC for the checkpoint block, we use __u32, but when storing the crc value to the disk, we use __le32. Let's fix the inconsistency. Reported-and-Tested-by: Oded Gabbay Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 12 ++++++------ fs/f2fs/f2fs.h | 19 +++++++++++++++---- 2 files changed, 21 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9a7750909221..66a6b85a51d8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -357,8 +357,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, unsigned long blk_size = sbi->blocksize; struct f2fs_checkpoint *cp_block; unsigned long long cur_version = 0, pre_version = 0; - unsigned int crc = 0; size_t crc_offset; + __u32 crc = 0; /* Read the 1st cp block in this CP pack */ cp_page_1 = get_meta_page(sbi, cp_addr); @@ -369,7 +369,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp1; - crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; @@ -384,7 +384,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp2; - crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; @@ -648,7 +648,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) block_t start_blk; struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; - unsigned int crc32 = 0; + __u32 crc32 = 0; void *kaddr; int i; @@ -717,8 +717,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); - *(__le32 *)((unsigned char *)ckpt + - le32_to_cpu(ckpt->checksum_offset)) + *((__le32 *)((unsigned char *)ckpt + + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); start_blk = __start_cp_addr(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 863a5e91d84a..467d42d65c48 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -47,14 +47,25 @@ struct f2fs_mount_info { unsigned int opt; }; -static inline __u32 f2fs_crc32(void *buff, size_t len) +#define CRCPOLY_LE 0xedb88320 + +static inline __u32 f2fs_crc32(void *buf, size_t len) { - return crc32_le(F2FS_SUPER_MAGIC, buff, len); + unsigned char *p = (unsigned char *)buf; + __u32 crc = F2FS_SUPER_MAGIC; + int i; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; } -static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) { - return f2fs_crc32(buff, buff_size) == blk_crc; + return f2fs_crc32(buf, buf_size) == blk_crc; } /* -- cgit v1.2.3 From 060dd67b3c0d451ea2c41e6a87811b4736a984e4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 24 Jun 2013 07:47:23 +0900 Subject: f2fs: fix an endian conversion bug detected by sparse This patch should fix the following bug reported by kbuild test robot. fs/f2fs/recovery.c:233:33: sparse: incorrect type in assignment (different base types) parse warnings: (new ones prefixed by >>) >> recovery.c:233: sparse: incorrect type in assignment (different base types) recovery.c:233: expected unsigned int [unsigned] [assigned] ofs_in_node recovery.c:233: got restricted __le16 [assigned] [usertype] ofs_in_node >> recovery.c:238: sparse: incorrect type in assignment (different base types) recovery.c:238: expected unsigned int [unsigned] ofs_in_node recovery.c:238: got restricted __le16 [assigned] [usertype] ofs_in_node Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ddde14f0eacb..9db8239642f4 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -246,12 +246,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, struct dnode_of_data tdn = *dn; tdn.nid = nid; tdn.node_page = dn->inode_page; - tdn.ofs_in_node = sum.ofs_in_node; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); truncate_data_blocks_range(&tdn, 1); return 0; } else if (dn->nid == nid) { struct dnode_of_data tdn = *dn; - tdn.ofs_in_node = sum.ofs_in_node; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); truncate_data_blocks_range(&tdn, 1); return 0; } -- cgit v1.2.3 From 8736fbf00372dcc0bc7b04b86d737eb5db31fff6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 16 Jun 2013 09:49:11 +0900 Subject: f2fs: optimize the init_dirty_segmap function Optimize the while loop condition Since this condition will always be true and while loop will be terminated by the following condition in code: if (segno >= TOTAL_SEGS(sbi)) break; Hence we can replace the while loop condition with while(1) instead of always checking for segno to be less than Total segs. Also we do not need to use TOTAL_SEGS() everytime. We can store this value in a local variable since this value is constant. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0e1a60a4eb3f..3ac4d29f0cd4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1582,13 +1582,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno = 0, offset = 0; + unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); unsigned short valid_blocks; - while (segno < TOTAL_SEGS(sbi)) { + while (1) { /* find dirty segment based on free segmap */ - segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset); - if (segno >= TOTAL_SEGS(sbi)) + segno = find_next_inuse(free_i, total_segs, offset); + if (segno >= total_segs) break; offset = segno + 1; valid_blocks = get_valid_blocks(sbi, segno, 0); -- cgit v1.2.3 From 6cc4af56066d8e9c62584cf61c6ce50fd0ab139a Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 20 Jun 2013 17:52:39 +0800 Subject: f2fs: code cleanup and simplify in func {find/add}_gc_inode This patch simplifies list operations in find_gc_inode and add_gc_inode. Just simple code cleanup. Signed-off-by: Gu Zheng [Jaegeuk Kim: add description] Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3a9df36491a5..35f9b1a196aa 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -321,28 +321,21 @@ static const struct victim_selection default_v_ops = { static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) { - struct list_head *this; struct inode_entry *ie; - list_for_each(this, ilist) { - ie = list_entry(this, struct inode_entry, list); + list_for_each_entry(ie, ilist, list) if (ie->inode->i_ino == ino) return ie->inode; - } return NULL; } static void add_gc_inode(struct inode *inode, struct list_head *ilist) { - struct list_head *this; - struct inode_entry *new_ie, *ie; + struct inode_entry *new_ie; - list_for_each(this, ilist) { - ie = list_entry(this, struct inode_entry, list); - if (ie->inode == inode) { - iput(inode); - return; - } + if (inode == find_gc_inode(inode->i_ino, ilist)) { + iput(inode); + return; } repeat: new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); -- cgit v1.2.3 From 763bfe1bc575dcce56dc5c570dc005d94911705f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jun 2013 09:59:40 +0900 Subject: f2fs: remove reusing any prefree segments This patch removes check_prefree_segments initially designed to enhance the performance by narrowing the range of LBA usage across the whole block device. When allocating a new segment, previous f2fs tries to find proper prefree segments, and then, if finds a segment, it reuses the segment for further data or node block allocation. However, I found that this was totally wrong approach since the prefree segments have several data or node blocks that will be used by the roll-forward mechanism operated after sudden-power-off. Let's assume the following scenario. /* write 8MB with fsync */ for (i = 0; i < 2048; i++) { offset = i * 4096; write(fd, offset, 4KB); fsync(fd); } In this case, naive segment allocation sequence will be like: data segment: x, x+1, x+2, x+3 node segment: y, y+1, y+2, y+3. But, if we can reuse prefree segments, the sequence can be like: data segment: x, x+1, y, y+1 node segment: y, y+1, y+2, y+3. Because, y, y+1, and y+2 became prefree segments one by one, and those are reused by data allocation. After conducting this workload, we should consider how to recover the latest inode with its data. If we reuse the prefree segments such as y or y+1, we lost the old node blocks so that f2fs even cannot start roll-forward recovery. Therefore, I suggest that we should remove reusing prefree segments. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 56 +------------------------------------------------------ 1 file changed, 1 insertion(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3ac4d29f0cd4..a86d125a9885 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -309,56 +309,6 @@ static void write_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } -static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; - unsigned int segno; - unsigned int ofs = 0; - - /* - * If there is not enough reserved sections, - * we should not reuse prefree segments. - */ - if (has_not_enough_free_secs(sbi, 0)) - return NULL_SEGNO; - - /* - * NODE page should not reuse prefree segment, - * since those information is used for SPOR. - */ - if (IS_NODESEG(type)) - return NULL_SEGNO; -next: - segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); - ofs += sbi->segs_per_sec; - - if (segno < TOTAL_SEGS(sbi)) { - int i; - - /* skip intermediate segments in a section */ - if (segno % sbi->segs_per_sec) - goto next; - - /* skip if the section is currently used */ - if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) - goto next; - - /* skip if whole section is not prefree */ - for (i = 1; i < sbi->segs_per_sec; i++) - if (!test_bit(segno + i, prefree_segmap)) - goto next; - - /* skip if whole section was not free at the last checkpoint */ - for (i = 0; i < sbi->segs_per_sec; i++) - if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) - goto next; - - return segno; - } - return NULL_SEGNO; -} - static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -597,11 +547,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, goto out; } - curseg->next_segno = check_prefree_segments(sbi, type); - - if (curseg->next_segno != NULL_SEGNO) - change_curseg(sbi, type, false); - else if (type == CURSEG_WARM_NODE) + if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); -- cgit v1.2.3 From 5ebefc5b409a194a09da7ad1962b4bfce10a6859 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 27 Jun 2013 09:28:54 +0800 Subject: f2fs: remove the unused argument "sbi" of func destroy_fsync_dnodes() As destroy_fsync_dnodes() is a simple list-cleanup func, so delete the unused and unrelated f2fs_sb_info argument of it. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9db8239642f4..d56d951c2253 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -192,8 +192,7 @@ out: return err; } -static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, - struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head) { struct fsync_inode_entry *entry, *tmp; @@ -438,7 +437,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); BUG_ON(!list_empty(&inode_list)); out: - destroy_fsync_dnodes(sbi, &inode_list); + destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); sbi->por_doing = 0; if (!err) -- cgit v1.2.3 From a1dd3c13ce65b726fddfe72b9d2f1009db983ce6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jun 2013 13:04:08 +0900 Subject: f2fs: fix to recover i_size from roll-forward If user requests many data writes and fsync together, the last updated i_size should be stored to the inode block consistently. But, previous write_end just marks the inode as dirty and doesn't update its metadata into its inode block. After that, fsync just writes the inode block with newly updated data index excluding inode metadata updates. So, this patch introduces write_end in which updates inode block too when the i_size is changed. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6d4a743caf86..e88f46f122aa 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -701,6 +701,27 @@ err: return err; } +static int f2fs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + SetPageUptodate(page); + set_page_dirty(page); + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + unlock_page(page); + page_cache_release(page); + return copied; +} + static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { @@ -757,7 +778,7 @@ const struct address_space_operations f2fs_dblock_aops = { .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, - .write_end = nobh_write_end, + .write_end = f2fs_write_end, .set_page_dirty = f2fs_set_data_page_dirty, .invalidatepage = f2fs_invalidate_data_page, .releasepage = f2fs_release_data_page, -- cgit v1.2.3