diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 06:11:44 +0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-10 06:11:44 +0400 |
commit | 64b2d1fbbfda07765dae3f601862796a61b2c451 (patch) | |
tree | 67947ede8fc007a9f0925e697a302a02bd087032 | |
parent | b1cce8032f6abe900b078d24f3c3938726528f97 (diff) | |
parent | 9ab701349247368f9d57a993b95a5bb05bb37e10 (diff) | |
download | linux-64b2d1fbbfda07765dae3f601862796a61b2c451.tar.xz |
Merge tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"In this round, there is no special interesting feature, but we've
investigated a couple of tuning points with respect to the I/O flow.
Several major bug fixes and a bunch of clean-ups also have been made.
This patch-set includes the following major enhancement patches:
- enhance wait_on_page_writeback
- support SEEK_DATA and SEEK_HOLE
- enhance readahead flows
- enhance IO flushes
- support fiemap
- add some tracepoints
The other bug fixes are as follows:
- fix to support a large volume > 2TB correctly
- recovery bug fix wrt fallocated space
- fix recursive lock on xattr operations
- fix some cases on the remount flow
And, there are a bunch of cleanups"
* tag 'for-f2fs-3.16' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (52 commits)
f2fs: support f2fs_fiemap
f2fs: avoid not to call remove_dirty_inode
f2fs: recover fallocated space
f2fs: fix to recover data written by dio
f2fs: large volume support
f2fs: avoid crash when trace f2fs_submit_page_mbio event in ra_sum_pages
f2fs: avoid overflow when large directory feathure is enabled
f2fs: fix recursive lock by f2fs_setxattr
MAINTAINERS: add a co-maintainer from samsung for F2FS
MAINTAINERS: change the email address for f2fs
f2fs: use inode_init_owner() to simplify codes
f2fs: avoid to use slab memory in f2fs_issue_flush for efficiency
f2fs: add a tracepoint for f2fs_read_data_page
f2fs: add a tracepoint for f2fs_write_{meta,node,data}_pages
f2fs: add a tracepoint for f2fs_write_{meta,node,data}_page
f2fs: add a tracepoint for f2fs_write_end
f2fs: add a tracepoint for f2fs_write_begin
f2fs: fix checkpatch warning
f2fs: deactivate inode page if the inode is evicted
f2fs: decrease the lock granularity during write_begin
...
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 8 | ||||
-rw-r--r-- | MAINTAINERS | 3 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 2 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 118 | ||||
-rw-r--r-- | fs/f2fs/data.c | 63 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 12 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 49 | ||||
-rw-r--r-- | fs/f2fs/file.c | 139 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 40 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 18 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 11 | ||||
-rw-r--r-- | fs/f2fs/node.c | 154 | ||||
-rw-r--r-- | fs/f2fs/node.h | 19 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 35 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 134 | ||||
-rw-r--r-- | fs/f2fs/super.c | 32 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 110 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 8 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 8 | ||||
-rw-r--r-- | include/trace/events/f2fs.h | 146 |
20 files changed, 797 insertions, 312 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 25311e113e75..51afba17bbae 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -461,11 +461,11 @@ The number of blocks and buckets are determined by, # of blocks in level #n = | `- 4, Otherwise - ,- 2^ (n + dir_level), - | if n < MAX_DIR_HASH_DEPTH / 2, + ,- 2^(n + dir_level), + | if n + dir_level < MAX_DIR_HASH_DEPTH / 2, # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1), - Otherwise + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), + Otherwise When F2FS finds a file name in a directory, at first a hash value of the file name is calculated. Then, F2FS scans the hash table in level #0 to find the diff --git a/MAINTAINERS b/MAINTAINERS index a1f4b576628a..948379508e44 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3792,7 +3792,8 @@ F: fs/fscache/ F: include/linux/fscache*.h F2FS FILE SYSTEM -M: Jaegeuk Kim <jaegeuk.kim@samsung.com> +M: Jaegeuk Kim <jaegeuk@kernel.org> +M: Changman Lee <cm224.lee@samsung.com> L: linux-f2fs-devel@lists.sourceforge.net W: http://en.wikipedia.org/wiki/F2FS T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index e93e4ec7d165..dbe2141d10ad 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, } } - error = f2fs_setxattr(inode, name_index, "", value, size, ipage); + error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0); kfree(value); if (!error) diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index c405b8f17054..0b4710c1d370 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -33,12 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) struct address_space *mapping = META_MAPPING(sbi); struct page *page = NULL; repeat: - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) { cond_resched(); goto repeat; } - + f2fs_wait_on_page_writeback(page, META); SetPageUptodate(page); return page; } @@ -72,7 +72,7 @@ out: return page; } -inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) { switch (type) { case META_NAT: @@ -154,6 +154,8 @@ static int f2fs_write_meta_page(struct page *page, struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + trace_f2fs_writepage(page, META); + if (unlikely(sbi->por_doing)) goto redirty_out; if (wbc->for_reclaim) @@ -171,10 +173,7 @@ no_write: return 0; redirty_out: - dec_page_count(sbi, F2FS_DIRTY_META); - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } @@ -184,6 +183,8 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff, written; + trace_f2fs_writepages(mapping->host, wbc, META); + /* collect a number of dirty meta pages and write together */ if (wbc->for_kupdate || get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) @@ -367,7 +368,9 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) return; sbi->por_doing = true; - start_blk = __start_cp_addr(sbi) + 1; + + start_blk = __start_cp_addr(sbi) + 1 + + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); orphan_blkaddr = __start_sum_addr(sbi) - 1; ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); @@ -508,8 +511,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) unsigned long blk_size = sbi->blocksize; unsigned long long cp1_version = 0, cp2_version = 0; unsigned long long cp_start_blk_no; + unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + block_t cp_blk_no; + int i; - sbi->ckpt = kzalloc(blk_size, GFP_KERNEL); + sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL); if (!sbi->ckpt) return -ENOMEM; /* @@ -540,6 +546,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi) cp_block = (struct f2fs_checkpoint *)page_address(cur_page); memcpy(sbi->ckpt, cp_block, blk_size); + if (cp_blks <= 1) + goto done; + + cp_blk_no = le32_to_cpu(fsb->cp_blkaddr); + if (cur_page == cp2) + cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg); + + for (i = 1; i < cp_blks; i++) { + void *sit_bitmap_ptr; + unsigned char *ckpt = (unsigned char *)sbi->ckpt; + + cur_page = get_meta_page(sbi, cp_blk_no + i); + sit_bitmap_ptr = page_address(cur_page); + memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size); + f2fs_put_page(cur_page, 1); + } +done: f2fs_put_page(cp1, 1); f2fs_put_page(cp2, 1); return 0; @@ -552,14 +575,13 @@ fail_no_cp: static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head = &sbi->dir_inode_list; - struct dir_inode_entry *entry; - list_for_each_entry(entry, head, list) - if (unlikely(entry->inode == inode)) - return -EEXIST; + if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) + return -EEXIST; - list_add_tail(&new->list, head); + set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + F2FS_I(inode)->dirty_dir = new; + list_add_tail(&new->list, &sbi->dir_inode_list); stat_inc_dirty_dir(sbi); return 0; } @@ -608,31 +630,26 @@ void add_dirty_dir_inode(struct inode *inode) void remove_dirty_dir_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); - struct list_head *head; struct dir_inode_entry *entry; if (!S_ISDIR(inode->i_mode)) return; spin_lock(&sbi->dir_inode_lock); - if (get_dirty_dents(inode)) { + if (get_dirty_dents(inode) || + !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) { spin_unlock(&sbi->dir_inode_lock); return; } - head = &sbi->dir_inode_list; - list_for_each_entry(entry, head, list) { - if (entry->inode == inode) { - list_del(&entry->list); - stat_dec_dirty_dir(sbi); - spin_unlock(&sbi->dir_inode_lock); - kmem_cache_free(inode_entry_slab, entry); - goto done; - } - } + entry = F2FS_I(inode)->dirty_dir; + list_del(&entry->list); + F2FS_I(inode)->dirty_dir = NULL; + clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR); + stat_dec_dirty_dir(sbi); spin_unlock(&sbi->dir_inode_lock); + kmem_cache_free(inode_entry_slab, entry); -done: /* Only from the recovery routine */ if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); @@ -640,26 +657,6 @@ done: } } -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) -{ - - struct list_head *head; - struct inode *inode = NULL; - struct dir_inode_entry *entry; - - spin_lock(&sbi->dir_inode_lock); - - head = &sbi->dir_inode_list; - list_for_each_entry(entry, head, list) { - if (entry->inode->i_ino == ino) { - inode = entry->inode; - break; - } - } - spin_unlock(&sbi->dir_inode_lock); - return inode; -} - void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) { struct list_head *head; @@ -758,6 +755,13 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) __u32 crc32 = 0; void *kaddr; int i; + int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload); + + /* + * This avoids to conduct wrong roll-forward operations and uses + * metapages, so should be called prior to sync_meta_pages below. + */ + discard_next_dnode(sbi); /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) @@ -802,16 +806,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1) / F2FS_ORPHANS_PER_BLOCK; - ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks); + ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + + orphan_blocks); if (is_umount) { set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE); + cp_payload_blks + data_sum_blocks + + orphan_blocks + NR_CURSEG_NODE_TYPE); } else { clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); ckpt->cp_pack_total_block_count = cpu_to_le32(2 + - data_sum_blocks + orphan_blocks); + cp_payload_blks + data_sum_blocks + + orphan_blocks); } if (sbi->n_orphans) @@ -837,6 +844,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) set_page_dirty(cp_page); f2fs_put_page(cp_page, 1); + for (i = 1; i < 1 + cp_payload_blks; i++) { + cp_page = grab_meta_page(sbi, start_blk++); + kaddr = page_address(cp_page); + memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE, + (1 << sbi->log_blocksize)); + set_page_dirty(cp_page); + f2fs_put_page(cp_page, 1); + } + if (sbi->n_orphans) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 45abd60e2bff..c1fb6dd10911 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -417,7 +417,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (unlikely(dn.data_blkaddr == NEW_ADDR)) return ERR_PTR(-EINVAL); - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -455,7 +455,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; repeat: - page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); + page = grab_cache_page(mapping, index); if (!page) return ERR_PTR(-ENOMEM); @@ -652,8 +652,7 @@ static int get_data_block(struct inode *inode, sector_t iblock, goto put_out; } - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); bh_result->b_size = (((size_t)1) << blkbits); dn.ofs_in_node++; pgofs++; @@ -675,8 +674,7 @@ get_next: if (dn.data_blkaddr == NEW_ADDR) goto put_out; - end_offset = IS_INODE(dn.node_page) ? - ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); } if (maxblocks > (bh_result->b_size >> blkbits)) { @@ -710,11 +708,19 @@ out: return err; } +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, get_data_block); +} + static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; int ret; + trace_f2fs_readpage(page, DATA); + /* If the file has inline data, try to read it directlly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); @@ -790,6 +796,8 @@ static int f2fs_write_data_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, DATA); + if (page->index < end_index) goto write; @@ -798,10 +806,8 @@ static int f2fs_write_data_page(struct page *page, * this page does not have to be written to disk. */ offset = i_size & (PAGE_CACHE_SIZE - 1); - if ((page->index >= end_index + 1) || !offset) { - inode_dec_dirty_dents(inode); + if ((page->index >= end_index + 1) || !offset) goto out; - } zero_user_segment(page, offset, PAGE_CACHE_SIZE); write: @@ -810,7 +816,6 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { - inode_dec_dirty_dents(inode); err = do_write_data_page(page, &fio); goto done; } @@ -832,15 +837,16 @@ done: clear_cold_data(page); out: + inode_dec_dirty_dents(inode); unlock_page(page); if (need_balance_fs) f2fs_balance_fs(sbi); + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, DATA, WRITE); return 0; redirty_out: - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } @@ -862,12 +868,15 @@ static int f2fs_write_data_pages(struct address_space *mapping, int ret; long diff; + trace_f2fs_writepages(mapping->host, wbc, DATA); + /* deal with chardevs and other special file */ if (!mapping->a_ops->writepage) return 0; if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && - get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA)) + get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA) && + available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; diff = nr_pages_to_write(sbi, DATA, wbc); @@ -903,6 +912,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, struct dnode_of_data dn; int err = 0; + trace_f2fs_write_begin(inode, pos, len, flags); + f2fs_balance_fs(sbi); repeat: err = f2fs_convert_inline_data(inode, pos + len); @@ -912,6 +923,10 @@ repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) return -ENOMEM; + + /* to avoid latency during memory pressure */ + unlock_page(page); + *pagep = page; if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) @@ -923,10 +938,18 @@ repeat: f2fs_unlock_op(sbi); if (err) { - f2fs_put_page(page, 1); + f2fs_put_page(page, 0); return err; } inline_data: + lock_page(page); + if (unlikely(page->mapping != mapping)) { + f2fs_put_page(page, 1); + goto repeat; + } + + f2fs_wait_on_page_writeback(page, DATA); + if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; @@ -978,6 +1001,8 @@ static int f2fs_write_end(struct file *file, { struct inode *inode = page->mapping->host; + trace_f2fs_write_end(inode, pos, len, copied); + SetPageUptodate(page); set_page_dirty(page); @@ -1022,6 +1047,9 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (check_direct_IO(inode, rw, iov, offset, nr_segs)) return 0; + /* clear fsync mark to recover these blocks */ + fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); + return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, get_data_block); } @@ -1061,6 +1089,11 @@ static int f2fs_set_data_page_dirty(struct page *page) static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { + struct inode *inode = mapping->host; + + if (f2fs_has_inline_data(inode)) + return 0; + return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 972fd0ef230f..966acb039e3b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -23,10 +23,10 @@ static unsigned long dir_blocks(struct inode *inode) static unsigned int dir_buckets(unsigned int level, int dir_level) { - if (level < MAX_DIR_HASH_DEPTH / 2) + if (level + dir_level < MAX_DIR_HASH_DEPTH / 2) return 1 << (level + dir_level); else - return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1); + return MAX_DIR_BUCKETS; } static unsigned int bucket_blocks(unsigned int level) @@ -268,6 +268,8 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_inode *ri; + f2fs_wait_on_page_writeback(ipage, NODE); + /* copy name info. to this inode page */ ri = F2FS_INODE(ipage); ri->i_namelen = cpu_to_le32(name->len); @@ -637,11 +639,17 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct f2fs_dentry_block *dentry_blk = NULL; struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; + struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); unsigned char d_type = DT_UNKNOWN; bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + /* readahead for multi pages of dir */ + if (npages - n > 1 && !ra_has_index(ra, n)) + page_cache_sync_readahead(inode->i_mapping, ra, file, n, + min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES)); + for (; n < npages; n++) { dentry_page = get_lock_data_page(inode, n); if (IS_ERR(dentry_page)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2ecac8312359..e51c732b0dd9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -182,6 +182,8 @@ enum { #define F2FS_LINK_MAX 32000 /* maximum link count per file */ +#define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ + /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 16 /* minimum extent length */ @@ -218,6 +220,7 @@ struct f2fs_inode_info { nid_t i_xattr_nid; /* node id that contains xattrs */ unsigned long long xattr_ver; /* cp version of xattr modification */ struct extent_info ext; /* in-memory extent cache entry */ + struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ }; static inline void get_extent_info(struct extent_info *ext, @@ -243,6 +246,7 @@ static inline void set_raw_extent(struct extent_info *ext, struct f2fs_nm_info { block_t nat_blkaddr; /* base disk address of NAT */ nid_t max_nid; /* maximum possible node ids */ + nid_t available_nids; /* maximum available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ unsigned int ram_thresh; /* control the memory footprint */ @@ -323,6 +327,15 @@ struct flush_cmd { int ret; }; +struct flush_cmd_control { + struct task_struct *f2fs_issue_flush; /* flush thread */ + wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ + struct flush_cmd *issue_list; /* list for command issue */ + struct flush_cmd *dispatch_list; /* list for command dispatch */ + spinlock_t issue_lock; /* for issue list lock */ + struct flush_cmd *issue_tail; /* list tail of issue list */ +}; + struct f2fs_sm_info { struct sit_info *sit_info; /* whole segment information */ struct free_segmap_info *free_info; /* free segment information */ @@ -353,12 +366,8 @@ struct f2fs_sm_info { unsigned int min_ipu_util; /* in-place-update threshold */ /* for flush command control */ - struct task_struct *f2fs_issue_flush; /* flush thread */ - wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ - struct flush_cmd *issue_list; /* list for command issue */ - struct flush_cmd *dispatch_list; /* list for command dispatch */ - spinlock_t issue_lock; /* for issue list lock */ - struct flush_cmd *issue_tail; /* list tail of issue list */ + struct flush_cmd_control *cmd_control_info; + }; /* @@ -755,9 +764,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - int offset = (flag == NAT_BITMAP) ? + int offset; + + if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) { + if (flag == NAT_BITMAP) + return &ckpt->sit_nat_version_bitmap; + else + return ((unsigned char *)ckpt + F2FS_BLKSIZE); + } else { + offset = (flag == NAT_BITMAP) ? le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0; - return &ckpt->sit_nat_version_bitmap + offset; + return &ckpt->sit_nat_version_bitmap + offset; + } } static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) @@ -958,6 +976,7 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) enum { FI_NEW_INODE, /* indicate newly allocated inode */ FI_DIRTY_INODE, /* indicate inode is dirty or not */ + FI_DIRTY_DIR, /* indicate directory has dirty pages */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ @@ -1071,6 +1090,12 @@ static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) +/* get offset of first page in next direct node */ +#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \ + ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \ + (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \ + ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi)) + /* * file.c */ @@ -1140,8 +1165,10 @@ f2fs_hash_t f2fs_dentry_hash(const char *, size_t); struct dnode_of_data; struct node_info; +bool available_free_memory(struct f2fs_sb_info *, int); int is_checkpointed_node(struct f2fs_sb_info *, nid_t); bool fsync_mark_done(struct f2fs_sb_info *, nid_t); +void fsync_mark_clear(struct f2fs_sb_info *, nid_t); void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); @@ -1176,9 +1203,12 @@ void destroy_node_manager_caches(void); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); int f2fs_issue_flush(struct f2fs_sb_info *); +int create_flush_cmd_control(struct f2fs_sb_info *); +void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void clear_prefree_segments(struct f2fs_sb_info *); +void discard_next_dnode(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); @@ -1221,7 +1251,6 @@ int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); -struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool); void init_orphan_info(struct f2fs_sb_info *); @@ -1242,6 +1271,7 @@ struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int do_write_data_page(struct page *, struct f2fs_io_info *); +int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); /* * gc.c @@ -1391,5 +1421,6 @@ bool f2fs_may_inline(struct inode *); int f2fs_read_inline_data(struct inode *, struct page *); int f2fs_convert_inline_data(struct inode *, pgoff_t); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); +void truncate_inline_data(struct inode *, u64); int recover_inline_data(struct inode *, struct page *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 60e7d5448a1d..9c49c593d8eb 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -19,6 +19,7 @@ #include <linux/compat.h> #include <linux/uaccess.h> #include <linux/mount.h> +#include <linux/pagevec.h> #include "f2fs.h" #include "node.h" @@ -194,6 +195,132 @@ out: return ret; } +static pgoff_t __get_first_dirty_index(struct address_space *mapping, + pgoff_t pgofs, int whence) +{ + struct pagevec pvec; + int nr_pages; + + if (whence != SEEK_DATA) + return 0; + + /* find first dirty page index */ + pagevec_init(&pvec, 0); + nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); + pgofs = nr_pages ? pvec.pages[0]->index: LONG_MAX; + pagevec_release(&pvec); + return pgofs; +} + +static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, + int whence) +{ + switch (whence) { + case SEEK_DATA: + if ((blkaddr == NEW_ADDR && dirty == pgofs) || + (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) + return true; + break; + case SEEK_HOLE: + if (blkaddr == NULL_ADDR) + return true; + break; + } + return false; +} + +static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxbytes = inode->i_sb->s_maxbytes; + struct dnode_of_data dn; + pgoff_t pgofs, end_offset, dirty; + loff_t data_ofs = offset; + loff_t isize; + int err = 0; + + mutex_lock(&inode->i_mutex); + + isize = i_size_read(inode); + if (offset >= isize) + goto fail; + + /* handle inline data case */ + if (f2fs_has_inline_data(inode)) { + if (whence == SEEK_HOLE) + data_ofs = isize; + goto found; + } + + pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT); + + dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence); + + for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); + if (err && err != -ENOENT) { + goto fail; + } else if (err == -ENOENT) { + /* direct node is not exist */ + if (whence == SEEK_DATA) { + pgofs = PGOFS_OF_NEXT_DNODE(pgofs, + F2FS_I(inode)); + continue; + } else { + goto found; + } + } + + end_offset = IS_INODE(dn.node_page) ? + ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK; + + /* find data/hole in dnode block */ + for (; dn.ofs_in_node < end_offset; + dn.ofs_in_node++, pgofs++, + data_ofs = pgofs << PAGE_CACHE_SHIFT) { + block_t blkaddr; + blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); + + if (__found_offset(blkaddr, dirty, pgofs, whence)) { + f2fs_put_dnode(&dn); + goto found; + } + } + f2fs_put_dnode(&dn); + } + + if (whence == SEEK_DATA) + goto fail; +found: + if (whence == SEEK_HOLE && data_ofs > isize) + data_ofs = isize; + mutex_unlock(&inode->i_mutex); + return vfs_setpos(file, data_ofs, maxbytes); +fail: + mutex_unlock(&inode->i_mutex); + return -ENXIO; +} + +static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + loff_t maxbytes = inode->i_sb->s_maxbytes; + + switch (whence) { + case SEEK_SET: + case SEEK_CUR: + case SEEK_END: + return generic_file_llseek_size(file, offset, whence, + maxbytes, i_size_read(inode)); + case SEEK_DATA: + case SEEK_HOLE: + return f2fs_seek_block(file, offset, whence); + } + + return -EINVAL; +} + static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); @@ -242,6 +369,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; + if (f2fs_has_inline_data(inode)) + return truncate_inline_data(inode, from); + if (!offset) return; @@ -288,10 +418,7 @@ int truncate_blocks(struct inode *inode, u64 from) return err; } - if (IS_INODE(dn.node_page)) - count = ADDRS_PER_INODE(F2FS_I(inode)); - else - count = ADDRS_PER_BLOCK; + count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; f2fs_bug_on(count < 0); @@ -413,6 +540,7 @@ const struct inode_operations f2fs_file_inode_operations = { .listxattr = f2fs_listxattr, .removexattr = generic_removexattr, #endif + .fiemap = f2fs_fiemap, }; static void fill_zero(struct inode *inode, pgoff_t index, @@ -555,6 +683,7 @@ static int expand_inode_data(struct inode *inode, loff_t offset, i_size_read(inode) < new_size) { i_size_write(inode, new_size); mark_inode_dirty(inode); + f2fs_write_inode(inode, NULL); } return ret; @@ -678,7 +807,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) #endif const struct file_operations f2fs_file_operations = { - .llseek = generic_file_llseek, + .llseek = f2fs_llseek, .read = do_sync_read, .write = do_sync_write, .aio_read = generic_file_aio_read, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 383db1fabcf4..1bba5228c197 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -81,8 +81,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) f2fs_lock_op(sbi); ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } /* * i_addr[0] is not used for inline data, @@ -90,11 +92,10 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) */ set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, 0); - if (err) { - f2fs_unlock_op(sbi); - return err; - } + if (err) + goto out; + f2fs_wait_on_page_writeback(page, DATA); zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ @@ -118,6 +119,7 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) sync_inode_page(&dn); f2fs_put_dnode(&dn); +out: f2fs_unlock_op(sbi); return err; } @@ -132,7 +134,7 @@ int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size) else if (to_size <= MAX_INLINE_DATA) return 0; - page = grab_cache_page_write_begin(inode->i_mapping, 0, AOP_FLAG_NOFS); + page = grab_cache_page(inode->i_mapping, 0); if (!page) return -ENOMEM; @@ -155,6 +157,7 @@ int f2fs_write_inline_data(struct inode *inode, return err; ipage = dn.inode_page; + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); src_addr = kmap(page); @@ -175,6 +178,26 @@ int f2fs_write_inline_data(struct inode *inode, return 0; } +void truncate_inline_data(struct inode *inode, u64 from) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct page *ipage; + + if (from >= MAX_INLINE_DATA) + return; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return; + + f2fs_wait_on_page_writeback(ipage, NODE); + + zero_user_segment(ipage, INLINE_DATA_OFFSET + from, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + set_page_dirty(ipage); + f2fs_put_page(ipage, 1); +} + int recover_inline_data(struct inode *inode, struct page *npage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -199,6 +222,8 @@ process_inline: ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + f2fs_wait_on_page_writeback(ipage, NODE); + src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); @@ -210,6 +235,7 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(IS_ERR(ipage)); + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ee829d360468..adc622c6bdce 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -12,6 +12,7 @@ #include <linux/f2fs_fs.h> #include <linux/buffer_head.h> #include <linux/writeback.h> +#include <linux/bitops.h> #include "f2fs.h" #include "node.h" @@ -21,20 +22,20 @@ void f2fs_set_inode_flags(struct inode *inode) { unsigned int flags = F2FS_I(inode)->i_flags; - - inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | - S_NOATIME | S_DIRSYNC); + unsigned int new_fl = 0; if (flags & FS_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & FS_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & FS_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & FS_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & FS_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } static void __get_inode_rdev(struct inode *inode, struct f2fs_inode *ri) @@ -294,4 +295,5 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: clear_inode(inode); + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); } diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a9409d19dfd4..9138c32aa698 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -41,18 +41,9 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) } f2fs_unlock_op(sbi); - inode->i_uid = current_fsuid(); - - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else { - inode->i_gid = current_fsgid(); - } + inode_init_owner(inode, dir, mode); inode->i_ino = ino; - inode->i_mode = mode; inode->i_blocks = 0; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_generation = sbi->s_next_generation++; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 57caa6eaf47b..9dfb9a042fd2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -26,20 +26,26 @@ static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; -static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type) +bool available_free_memory(struct f2fs_sb_info *sbi, int type) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; unsigned long mem_size = 0; + bool res = false; si_meminfo(&val); - if (type == FREE_NIDS) - mem_size = nm_i->fcnt * sizeof(struct free_nid); - else if (type == NAT_ENTRIES) - mem_size += nm_i->nat_cnt * sizeof(struct nat_entry); - mem_size >>= 12; - - /* give 50:50 memory for free nids and nat caches respectively */ - return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11)); + /* give 25%, 25%, 50% memory for each components respectively */ + if (type == FREE_NIDS) { + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == NAT_ENTRIES) { + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + } else if (type == DIRTY_DENTS) { + mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); + res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + } + return res; } static void clear_node_page_dirty(struct page *page) @@ -147,6 +153,18 @@ bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) return fsync_done; } +void fsync_mark_clear(struct f2fs_sb_info *sbi, nid_t nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct nat_entry *e; + + write_lock(&nm_i->nat_tree_lock); + e = __lookup_nat_cache(nm_i, nid); + if (e) + e->fsync_done = false; + write_unlock(&nm_i->nat_tree_lock); +} + static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; @@ -179,9 +197,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); - nat_set_ino(e, le32_to_cpu(ne->ino)); - nat_set_version(e, ne->version); + node_info_from_raw_nat(&e->ni, ne); } write_unlock(&nm_i->nat_tree_lock); } @@ -243,7 +259,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (available_free_memory(nm_i, NAT_ENTRIES)) + if (available_free_memory(sbi, NAT_ENTRIES)) return 0; write_lock(&nm_i->nat_tree_lock); @@ -849,8 +865,7 @@ struct page *new_node_page(struct dnode_of_data *dn, if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) return ERR_PTR(-EPERM); - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - dn->nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); if (!page) return ERR_PTR(-ENOMEM); @@ -867,6 +882,7 @@ struct page *new_node_page(struct dnode_of_data *dn, new_ni.ino = dn->inode->i_ino; set_node_addr(sbi, &new_ni, NEW_ADDR, false); + f2fs_wait_on_page_writeback(page, NODE); fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); set_cold_node(dn->inode, page); SetPageUptodate(page); @@ -946,8 +962,7 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) struct page *page; int err; repeat: - page = grab_cache_page_write_begin(NODE_MAPPING(sbi), - nid, AOP_FLAG_NOFS); + page = grab_cache_page(NODE_MAPPING(sbi), nid); if (!page) return ERR_PTR(-ENOMEM); @@ -1194,6 +1209,8 @@ static int f2fs_write_node_page(struct page *page, .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE, }; + trace_f2fs_writepage(page, NODE); + if (unlikely(sbi->por_doing)) goto redirty_out; @@ -1225,10 +1242,7 @@ static int f2fs_write_node_page(struct page *page, return 0; redirty_out: - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - account_page_redirty(page); - set_page_dirty(page); + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; } @@ -1238,6 +1252,8 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); long diff; + trace_f2fs_writepages(mapping->host, wbc, NODE); + /* balancing f2fs's metadata in background */ f2fs_balance_fs_bg(sbi); @@ -1313,13 +1329,14 @@ static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, radix_tree_delete(&nm_i->free_nid_root, i->nid); } -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) +static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; struct nat_entry *ne; bool allocated = false; - if (!available_free_memory(nm_i, FREE_NIDS)) + if (!available_free_memory(sbi, FREE_NIDS)) return -1; /* 0 nid should not be used */ @@ -1372,9 +1389,10 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) kmem_cache_free(free_nid_slab, i); } -static void scan_nat_page(struct f2fs_nm_info *nm_i, +static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page, nid_t start_nid) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; int i; @@ -1389,7 +1407,7 @@ static void scan_nat_page(struct f2fs_nm_info *nm_i, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); f2fs_bug_on(blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) { - if (add_free_nid(nm_i, start_nid, true) < 0) + if (add_free_nid(sbi, start_nid, true) < 0) break; } } @@ -1413,7 +1431,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) while (1) { struct page *page = get_current_nat_page(sbi, nid); - scan_nat_page(nm_i, page, nid); + scan_nat_page(sbi, page, nid); f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); @@ -1433,7 +1451,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); nid = le32_to_cpu(nid_in_journal(sum, i)); if (addr == NULL_ADDR) - add_free_nid(nm_i, nid, true); + add_free_nid(sbi, nid, true); else remove_free_nid(nm_i, nid); } @@ -1450,7 +1468,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i = NULL; retry: - if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) + if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) return false; spin_lock(&nm_i->free_nid_list_lock); @@ -1510,7 +1528,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); f2fs_bug_on(!i || i->state != NID_ALLOC); - if (!available_free_memory(nm_i, FREE_NIDS)) { + if (!available_free_memory(sbi, FREE_NIDS)) { __del_from_free_nid_list(nm_i, i); need_free = true; } else { @@ -1532,7 +1550,7 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, clear_node_page_dirty(page); } -void recover_inline_xattr(struct inode *inode, struct page *page) +static void recover_inline_xattr(struct inode *inode, struct page *page) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); void *src_addr, *dst_addr; @@ -1557,6 +1575,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page) src_addr = inline_xattr_addr(page); inline_size = inline_xattr_size(inode); + f2fs_wait_on_page_writeback(ipage, NODE); memcpy(dst_addr, src_addr, inline_size); update_inode(inode, ipage); @@ -1612,6 +1631,11 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct node_info old_ni, new_ni; struct page *ipage; + get_node_info(sbi, ino, &old_ni); + + if (unlikely(old_ni.blk_addr != NULL_ADDR)) + return -EINVAL; + ipage = grab_cache_page(NODE_MAPPING(sbi), ino); if (!ipage) return -ENOMEM; @@ -1619,7 +1643,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); - get_node_info(sbi, ino, &old_ni); SetPageUptodate(ipage); fill_node_footer(ipage, ino, ino, 0, true); @@ -1645,35 +1668,29 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) /* * ra_sum_pages() merge contiguous pages into one bio and submit. - * these pre-readed pages are linked in pages list. + * these pre-readed pages are alloced in bd_inode's mapping tree. */ -static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, +static int ra_sum_pages(struct f2fs_sb_info *sbi, struct page **pages, int start, int nrpages) { - struct page *page; - int page_idx = start; + struct inode *inode = sbi->sb->s_bdev->bd_inode; + struct address_space *mapping = inode->i_mapping; + int i, page_idx = start; struct f2fs_io_info fio = { .type = META, .rw = READ_SYNC | REQ_META | REQ_PRIO }; - for (; page_idx < start + nrpages; page_idx++) { - /* alloc temporal page for read node summary info*/ - page = alloc_page(GFP_F2FS_ZERO); - if (!page) + for (i = 0; page_idx < start + nrpages; page_idx++, i++) { + /* alloc page in bd_inode for reading node summary info */ + pages[i] = grab_cache_page(mapping, page_idx); + if (!pages[i]) break; - - lock_page(page); - page->index = page_idx; - list_add_tail(&page->lru, pages); + f2fs_submit_page_mbio(sbi, pages[i], page_idx, &fio); } - list_for_each_entry(page, pages, lru) - f2fs_submit_page_mbio(sbi, page, page->index, &fio); - f2fs_submit_merged_bio(sbi, META, READ); - - return page_idx - start; + return i; } int restore_node_summary(struct f2fs_sb_info *sbi, @@ -1681,11 +1698,11 @@ int restore_node_summary(struct f2fs_sb_info *sbi, { struct f2fs_node *rn; struct f2fs_summary *sum_entry; - struct page *page, *tmp; + struct inode *inode = sbi->sb->s_bdev->bd_inode; block_t addr; int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); - int i, last_offset, nrpages, err = 0; - LIST_HEAD(page_list); + struct page *pages[bio_blocks]; + int i, idx, last_offset, nrpages, err = 0; /* scan the node segment */ last_offset = sbi->blocks_per_seg; @@ -1696,29 +1713,31 @@ int restore_node_summary(struct f2fs_sb_info *sbi, nrpages = min(last_offset - i, bio_blocks); /* read ahead node pages */ - nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages); + nrpages = ra_sum_pages(sbi, pages, addr, nrpages); if (!nrpages) return -ENOMEM; - list_for_each_entry_safe(page, tmp, &page_list, lru) { + for (idx = 0; idx < nrpages; idx++) { if (err) goto skip; - lock_page(page); - if (unlikely(!PageUptodate(page))) { + lock_page(pages[idx]); + if (unlikely(!PageUptodate(pages[idx]))) { err = -EIO; } else { - rn = F2FS_NODE(page); + rn = F2FS_NODE(pages[idx]); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; sum_entry++; } - unlock_page(page); + unlock_page(pages[idx]); skip: - list_del(&page->lru); - __free_pages(page, 0); + page_cache_release(pages[idx]); } + + invalidate_mapping_pages(inode->i_mapping, addr, + addr + nrpages); } return err; } @@ -1756,9 +1775,7 @@ retry: write_unlock(&nm_i->nat_tree_lock); goto retry; } - nat_set_blkaddr(ne, le32_to_cpu(raw_ne.block_addr)); - nat_set_ino(ne, le32_to_cpu(raw_ne.ino)); - nat_set_version(ne, raw_ne.version); + node_info_from_raw_nat(&ne->ni, &raw_ne); __set_nat_cache_dirty(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); } @@ -1791,7 +1808,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t nid; struct f2fs_nat_entry raw_ne; int offset = -1; - block_t new_blkaddr; if (nat_get_blkaddr(ne) == NEW_ADDR) continue; @@ -1827,11 +1843,7 @@ to_nat_page: f2fs_bug_on(!nat_blk); raw_ne = nat_blk->entries[nid - start_nid]; flush_now: - new_blkaddr = nat_get_blkaddr(ne); - - raw_ne.ino = cpu_to_le32(nat_get_ino(ne)); - raw_ne.block_addr = cpu_to_le32(new_blkaddr); - raw_ne.version = nat_get_version(ne); + raw_nat_from_node_info(&raw_ne, &ne->ni); if (offset < 0) { nat_blk->entries[nid - start_nid] = raw_ne; @@ -1841,7 +1853,7 @@ flush_now: } if (nat_get_blkaddr(ne) == NULL_ADDR && - add_free_nid(NM_I(sbi), nid, false) <= 0) { + add_free_nid(sbi, nid, false) <= 0) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); @@ -1869,8 +1881,10 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); + nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; + /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3; + nm_i->available_nids = nm_i->max_nid - 3; nm_i->fcnt = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 5decc1a375f0..7281112cd1c8 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -59,12 +59,12 @@ struct nat_entry { do { \ ne->checkpointed = false; \ list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ - } while (0); + } while (0) #define __clear_nat_cache_dirty(nm_i, ne) \ do { \ ne->checkpointed = true; \ list_move_tail(&ne->list, &nm_i->nat_entries); \ - } while (0); + } while (0) #define inc_node_version(version) (++version) static inline void node_info_from_raw_nat(struct node_info *ni, @@ -75,9 +75,18 @@ static inline void node_info_from_raw_nat(struct node_info *ni, ni->version = raw_ne->version; } -enum nid_type { +static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, + struct node_info *ni) +{ + raw_ne->ino = cpu_to_le32(ni->ino); + raw_ne->block_addr = cpu_to_le32(ni->blk_addr); + raw_ne->version = ni->version; +} + +enum mem_type { FREE_NIDS, /* indicates the free nid list */ - NAT_ENTRIES /* indicates the cached nat entry */ + NAT_ENTRIES, /* indicates the cached nat entry */ + DIRTY_DENTS /* indicates dirty dentry pages */ }; /* @@ -263,7 +272,7 @@ static inline void set_nid(struct page *p, int off, nid_t nid, bool i) { struct f2fs_node *rn = F2FS_NODE(p); - wait_on_page_writeback(p); + f2fs_wait_on_page_writeback(p, NODE); if (i) rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index b1ae89f0f44e..a112368a4a86 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -46,15 +46,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) struct inode *dir, *einode; int err = 0; - dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); - if (!dir) { - dir = f2fs_iget(inode->i_sb, pino); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; - } - set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); - add_dirty_dir_inode(dir); + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; } name.len = le32_to_cpu(raw_inode->i_namelen); @@ -63,7 +58,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) if (unlikely(name.len > F2FS_NAME_LEN)) { WARN_ON(1); err = -ENAMETOOLONG; - goto out; + goto out_err; } retry: de = f2fs_find_entry(dir, &name, &page); @@ -73,7 +68,8 @@ retry: einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { WARN_ON(1); - if (PTR_ERR(einode) == -ENOENT) + err = PTR_ERR(einode); + if (err == -ENOENT) err = -EEXIST; goto out_unmap_put; } @@ -87,11 +83,23 @@ retry: goto retry; } err = __f2fs_add_link(dir, &name, inode); + if (err) + goto out_err; + + if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) { + iput(dir); + } else { + add_dirty_dir_inode(dir); + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + } + goto out; out_unmap_put: kunmap(page); f2fs_put_page(page, 0); +out_err: + iput(dir); out: f2fs_msg(inode->i_sb, KERN_NOTICE, "%s: ino = %x, name = %s, dir = %lx, err = %d", @@ -299,10 +307,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, goto out; start = start_bidx_of_node(ofs_of_node(page), fi); - if (IS_INODE(page)) - end = start + ADDRS_PER_INODE(fi); - else - end = start + ADDRS_PER_BLOCK; + end = start + ADDRS_PER_PAGE(page, fi); f2fs_lock_op(sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 085f548be7a3..f25f0e07e26f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -25,7 +25,6 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; -static struct kmem_cache *flush_cmd_slab; /* * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since @@ -200,20 +199,20 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) static int issue_flush_thread(void *data) { struct f2fs_sb_info *sbi = data; - struct f2fs_sm_info *sm_i = SM_I(sbi); - wait_queue_head_t *q = &sm_i->flush_wait_queue; + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + wait_queue_head_t *q = &fcc->flush_wait_queue; repeat: if (kthread_should_stop()) return 0; - spin_lock(&sm_i->issue_lock); - if (sm_i->issue_list) { - sm_i->dispatch_list = sm_i->issue_list; - sm_i->issue_list = sm_i->issue_tail = NULL; + spin_lock(&fcc->issue_lock); + if (fcc->issue_list) { + fcc->dispatch_list = fcc->issue_list; + fcc->issue_list = fcc->issue_tail = NULL; } - spin_unlock(&sm_i->issue_lock); + spin_unlock(&fcc->issue_lock); - if (sm_i->dispatch_list) { + if (fcc->dispatch_list) { struct bio *bio = bio_alloc(GFP_NOIO, 0); struct flush_cmd *cmd, *next; int ret; @@ -221,47 +220,79 @@ repeat: bio->bi_bdev = sbi->sb->s_bdev; ret = submit_bio_wait(WRITE_FLUSH, bio); - for (cmd = sm_i->dispatch_list; cmd; cmd = next) { + for (cmd = fcc->dispatch_list; cmd; cmd = next) { cmd->ret = ret; next = cmd->next; complete(&cmd->wait); } - sm_i->dispatch_list = NULL; + bio_put(bio); + fcc->dispatch_list = NULL; } - wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list); + wait_event_interruptible(*q, + kthread_should_stop() || fcc->issue_list); goto repeat; } int f2fs_issue_flush(struct f2fs_sb_info *sbi) { - struct f2fs_sm_info *sm_i = SM_I(sbi); - struct flush_cmd *cmd; - int ret; + struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info; + struct flush_cmd cmd; if (!test_opt(sbi, FLUSH_MERGE)) return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); - cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC); - cmd->next = NULL; - cmd->ret = 0; - init_completion(&cmd->wait); + init_completion(&cmd.wait); + cmd.next = NULL; - spin_lock(&sm_i->issue_lock); - if (sm_i->issue_list) - sm_i->issue_tail->next = cmd; + spin_lock(&fcc->issue_lock); + if (fcc->issue_list) + fcc->issue_tail->next = &cmd; else - sm_i->issue_list = cmd; - sm_i->issue_tail = cmd; - spin_unlock(&sm_i->issue_lock); + fcc->issue_list = &cmd; + fcc->issue_tail = &cmd; + spin_unlock(&fcc->issue_lock); - if (!sm_i->dispatch_list) - wake_up(&sm_i->flush_wait_queue); + if (!fcc->dispatch_list) + wake_up(&fcc->flush_wait_queue); - wait_for_completion(&cmd->wait); - ret = cmd->ret; - kmem_cache_free(flush_cmd_slab, cmd); - return ret; + wait_for_completion(&cmd.wait); + + return cmd.ret; +} + +int create_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + dev_t dev = sbi->sb->s_bdev->bd_dev; + struct flush_cmd_control *fcc; + int err = 0; + + fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); + if (!fcc) + return -ENOMEM; + spin_lock_init(&fcc->issue_lock); + init_waitqueue_head(&fcc->flush_wait_queue); + fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, + "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); + if (IS_ERR(fcc->f2fs_issue_flush)) { + err = PTR_ERR(fcc->f2fs_issue_flush); + kfree(fcc); + return err; + } + sbi->sm_info->cmd_control_info = fcc; + + return err; +} + +void destroy_flush_cmd_control(struct f2fs_sb_info *sbi) +{ + struct flush_cmd_control *fcc = + sbi->sm_info->cmd_control_info; + + if (fcc && fcc->f2fs_issue_flush) + kthread_stop(fcc->f2fs_issue_flush); + kfree(fcc); + sbi->sm_info->cmd_control_info = NULL; } static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, @@ -336,13 +367,26 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static void f2fs_issue_discard(struct f2fs_sb_info *sbi, +static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart); sector_t len = SECTOR_FROM_BLOCK(sbi, blklen); - blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); + return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); +} + +void discard_next_dnode(struct f2fs_sb_info *sbi) +{ + struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); + block_t blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + + if (f2fs_issue_discard(sbi, blkaddr, 1)) { + struct page *page = grab_meta_page(sbi, blkaddr); + /* zero-filled page */ + set_page_dirty(page); + f2fs_put_page(page, 1); + } } static void add_discard_addrs(struct f2fs_sb_info *sbi, @@ -1832,7 +1876,6 @@ int build_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - dev_t dev = sbi->sb->s_bdev->bd_dev; struct f2fs_sm_info *sm_info; int err; @@ -1860,14 +1903,10 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->nr_discards = 0; sm_info->max_discards = 0; - if (test_opt(sbi, FLUSH_MERGE)) { - spin_lock_init(&sm_info->issue_lock); - init_waitqueue_head(&sm_info->flush_wait_queue); - - sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, - "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); - if (IS_ERR(sm_info->f2fs_issue_flush)) - return PTR_ERR(sm_info->f2fs_issue_flush); + if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) { + err = create_flush_cmd_control(sbi); + if (err) + return err; } err = build_sit_info(sbi); @@ -1976,10 +2015,10 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi) void destroy_segment_manager(struct f2fs_sb_info *sbi) { struct f2fs_sm_info *sm_info = SM_I(sbi); + if (!sm_info) return; - if (sm_info->f2fs_issue_flush) - kthread_stop(sm_info->f2fs_issue_flush); + destroy_flush_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); @@ -1994,17 +2033,10 @@ int __init create_segment_manager_caches(void) sizeof(struct discard_entry)); if (!discard_entry_slab) return -ENOMEM; - flush_cmd_slab = f2fs_kmem_cache_create("flush_command", - sizeof(struct flush_cmd)); - if (!flush_cmd_slab) { - kmem_cache_destroy(discard_entry_slab); - return -ENOMEM; - } return 0; } void destroy_segment_manager_caches(void) { kmem_cache_destroy(discard_entry_slab); - kmem_cache_destroy(flush_cmd_slab); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index c756923a7302..b2b18637cb9e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -514,7 +514,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, BG_GC)) seq_printf(seq, ",background_gc=%s", "on"); else seq_printf(seq, ",background_gc=%s", "off"); @@ -542,7 +542,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); - if (test_opt(sbi, FLUSH_MERGE)) + if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); @@ -594,6 +594,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct f2fs_mount_info org_mount_opt; int err, active_logs; + bool need_restart_gc = false; + bool need_stop_gc = false; sync_filesystem(sb); @@ -611,7 +613,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) /* * Previous and new state of filesystem is RO, - * so no point in checking GC conditions. + * so skip checking GC and FLUSH_MERGE conditions. */ if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) goto skip; @@ -625,18 +627,40 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) if (sbi->gc_thread) { stop_gc_thread(sbi); f2fs_sync_fs(sb, 1); + need_restart_gc = true; } } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; + need_stop_gc = true; + } + + /* + * We stop issue flush thread if FS is mounted as RO + * or if flush_merge is not passed in mount option. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { + destroy_flush_cmd_control(sbi); + } else if (test_opt(sbi, FLUSH_MERGE) && + !sbi->sm_info->cmd_control_info) { + err = create_flush_cmd_control(sbi); + if (err) + goto restore_gc; } skip: /* Update the POSIXACL Flag */ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); return 0; - +restore_gc: + if (need_restart_gc) { + if (start_gc_thread(sbi)) + f2fs_msg(sbi->sb, KERN_WARNING, + "background gc thread is stop"); + } else if (need_stop_gc) { + stop_gc_thread(sbi); + } restore_opts: sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 503c2451131e..8bea941ee309 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -26,7 +26,7 @@ #include "xattr.h" static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) + size_t list_size, const char *name, size_t len, int type) { struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); int total_len, prefix_len = 0; @@ -53,11 +53,11 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, return -EINVAL; } - total_len = prefix_len + name_len + 1; + total_len = prefix_len + len + 1; if (list && total_len <= list_size) { memcpy(list, prefix, prefix_len); - memcpy(list + prefix_len, name, name_len); - list[prefix_len + name_len] = '\0'; + memcpy(list + prefix_len, name, len); + list[prefix_len + len] = '\0'; } return total_len; } @@ -108,11 +108,12 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); + return f2fs_setxattr(dentry->d_inode, type, name, + value, size, NULL, flags); } static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, - size_t list_size, const char *name, size_t name_len, int type) + size_t list_size, const char *name, size_t len, int type) { const char *xname = F2FS_SYSTEM_ADVISE_PREFIX; size_t size; @@ -155,9 +156,6 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, } #ifdef CONFIG_F2FS_FS_SECURITY -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage); static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *page) { @@ -165,9 +163,9 @@ static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, int err = 0; for (xattr = xattr_array; xattr->name != NULL; xattr++) { - err = __f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, xattr->name, xattr->value, - xattr->value_len, (struct page *)page); + xattr->value_len, (struct page *)page, 0); if (err < 0) break; } @@ -241,26 +239,26 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { NULL, }; -static inline const struct xattr_handler *f2fs_xattr_handler(int name_index) +static inline const struct xattr_handler *f2fs_xattr_handler(int index) { const struct xattr_handler *handler = NULL; - if (name_index > 0 && name_index < ARRAY_SIZE(f2fs_xattr_handler_map)) - handler = f2fs_xattr_handler_map[name_index]; + if (index > 0 && index < ARRAY_SIZE(f2fs_xattr_handler_map)) + handler = f2fs_xattr_handler_map[index]; return handler; } -static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int name_index, - size_t name_len, const char *name) +static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, + size_t len, const char *name) { struct f2fs_xattr_entry *entry; list_for_each_xattr(entry, base_addr) { - if (entry->e_name_index != name_index) + if (entry->e_name_index != index) continue; - if (entry->e_name_len != name_len) + if (entry->e_name_len != len) continue; - if (!memcmp(entry->e_name, name, name_len)) + if (!memcmp(entry->e_name, name, len)) break; } return entry; @@ -347,6 +345,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, if (ipage) { inline_addr = inline_xattr_addr(ipage); + f2fs_wait_on_page_writeback(ipage, NODE); } else { page = get_node_page(sbi, inode->i_ino); if (IS_ERR(page)) { @@ -354,6 +353,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return PTR_ERR(page); } inline_addr = inline_xattr_addr(page); + f2fs_wait_on_page_writeback(page, NODE); } memcpy(inline_addr, txattr_addr, inline_size); f2fs_put_page(page, 1); @@ -374,6 +374,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return PTR_ERR(xpage); } f2fs_bug_on(new_nid); + f2fs_wait_on_page_writeback(xpage, NODE); } else { struct dnode_of_data dn; set_new_dnode(&dn, inode, NULL, NULL, new_nid); @@ -396,42 +397,43 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, return 0; } -int f2fs_getxattr(struct inode *inode, int name_index, const char *name, +int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size) { struct f2fs_xattr_entry *entry; void *base_addr; int error = 0; - size_t value_len, name_len; + size_t size, len; if (name == NULL) return -EINVAL; - name_len = strlen(name); - if (name_len > F2FS_NAME_LEN) + + len = strlen(name); + if (len > F2FS_NAME_LEN) return -ERANGE; base_addr = read_all_xattrs(inode, NULL); if (!base_addr) return -ENOMEM; - entry = __find_xattr(base_addr, name_index, name_len, name); + entry = __find_xattr(base_addr, index, len, name); if (IS_XATTR_LAST_ENTRY(entry)) { error = -ENODATA; goto cleanup; } - value_len = le16_to_cpu(entry->e_value_size); + size = le16_to_cpu(entry->e_value_size); - if (buffer && value_len > buffer_size) { + if (buffer && size > buffer_size) { error = -ERANGE; goto cleanup; } if (buffer) { char *pval = entry->e_name + entry->e_name_len; - memcpy(buffer, pval, value_len); + memcpy(buffer, pval, size); } - error = value_len; + error = size; cleanup: kzfree(base_addr); @@ -475,15 +477,15 @@ cleanup: return error; } -static int __f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len, - struct page *ipage) +static int __f2fs_setxattr(struct inode *inode, int index, + const char *name, const void *value, size_t size, + struct page *ipage, int flags) { struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_xattr_entry *here, *last; void *base_addr; int found, newsize; - size_t name_len; + size_t len; __u32 new_hsize; int error = -ENOMEM; @@ -491,11 +493,11 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, return -EINVAL; if (value == NULL) - value_len = 0; + size = 0; - name_len = strlen(name); + len = strlen(name); - if (name_len > F2FS_NAME_LEN || value_len > MAX_VALUE_LEN(inode)) + if (len > F2FS_NAME_LEN || size > MAX_VALUE_LEN(inode)) return -ERANGE; base_addr = read_all_xattrs(inode, ipage); @@ -503,16 +505,23 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, goto exit; /* find entry with wanted name. */ - here = __find_xattr(base_addr, name_index, name_len, name); + here = __find_xattr(base_addr, index, len, name); found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1; - last = here; + if ((flags & XATTR_REPLACE) && !found) { + error = -ENODATA; + goto exit; + } else if ((flags & XATTR_CREATE) && found) { + error = -EEXIST; + goto exit; + } + + last = here; while (!IS_XATTR_LAST_ENTRY(last)) last = XATTR_NEXT_ENTRY(last); - newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + - name_len + value_len); + newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size); /* 1. Check space */ if (value) { @@ -555,12 +564,12 @@ static int __f2fs_setxattr(struct inode *inode, int name_index, * We just write new entry. */ memset(last, 0, newsize); - last->e_name_index = name_index; - last->e_name_len = name_len; - memcpy(last->e_name, name, name_len); - pval = last->e_name + name_len; - memcpy(pval, value, value_len); - last->e_value_size = cpu_to_le16(value_len); + last->e_name_index = index; + last->e_name_len = len; + memcpy(last->e_name, name, len); + pval = last->e_name + len; + memcpy(pval, value, size); + last->e_value_size = cpu_to_le16(size); new_hsize += newsize; } @@ -583,18 +592,23 @@ exit: return error; } -int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len, struct page *ipage) +int f2fs_setxattr(struct inode *inode, int index, const char *name, + const void *value, size_t size, + struct page *ipage, int flags) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); int err; + /* this case is only from init_inode_metadata */ + if (ipage) + return __f2fs_setxattr(inode, index, name, value, + size, ipage, flags); f2fs_balance_fs(sbi); f2fs_lock_op(sbi); /* protect xattr_ver */ down_write(&F2FS_I(inode)->i_sem); - err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); + err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); up_write(&F2FS_I(inode)->i_sem); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index b21d9ebdeff3..34ab7dbcf5e3 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -114,18 +114,18 @@ extern const struct xattr_handler f2fs_xattr_security_handler; extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, - const void *, size_t, struct page *); + const void *, size_t, struct page *, int); extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else #define f2fs_xattr_handlers NULL -static inline int f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len) +static inline int f2fs_setxattr(struct inode *inode, int index, + const char *name, const void *value, size_t size, int flags) { return -EOPNOTSUPP; } -static inline int f2fs_getxattr(struct inode *inode, int name_index, +static inline int f2fs_getxattr(struct inode *inode, int index, const char *name, void *buffer, size_t buffer_size) { return -EOPNOTSUPP; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index df53e1753a76..6ff0b0b42d47 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -19,6 +19,7 @@ #define F2FS_LOG_SECTORS_PER_BLOCK 3 /* 4KB: F2FS_BLKSIZE */ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ +#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE) #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ @@ -75,6 +76,7 @@ struct f2fs_super_block { __le16 volume_name[512]; /* volume name */ __le32 extension_count; /* # of extensions below */ __u8 extension_list[F2FS_MAX_EXTENSION][8]; /* extension array */ + __le32 cp_payload; } __packed; /* @@ -146,6 +148,9 @@ struct f2fs_extent { #define ADDRS_PER_BLOCK 1018 /* Address Pointers in a Direct Block */ #define NIDS_PER_BLOCK 1018 /* Node IDs in an Indirect Block */ +#define ADDRS_PER_PAGE(page, fi) \ + (IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK) + #define NODE_DIR1_BLOCK (DEF_ADDRS_PER_INODE + 1) #define NODE_DIR2_BLOCK (DEF_ADDRS_PER_INODE + 2) #define NODE_IND1_BLOCK (DEF_ADDRS_PER_INODE + 3) @@ -391,6 +396,9 @@ typedef __le32 f2fs_hash_t; /* MAX level for dir lookup */ #define MAX_DIR_HASH_DEPTH 63 +/* MAX buckets in one level of dir */ +#define MAX_DIR_BUCKETS (1 << ((MAX_DIR_HASH_DEPTH / 2) - 1)) + #define SIZE_OF_DIR_ENTRY 11 /* by byte */ #define SIZE_OF_DENTRY_BITMAP ((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \ BITS_PER_BYTE) diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 67f38faac589..b983990b4a9f 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -659,6 +659,66 @@ DEFINE_EVENT_CONDITION(f2fs__submit_bio, f2fs_submit_read_bio, TP_CONDITION(bio) ); +TRACE_EVENT(f2fs_write_begin, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int flags), + + TP_ARGS(inode, pos, len, flags), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, pos) + __field(unsigned int, len) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->flags = flags; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, flags = %u", + show_dev_ino(__entry), + (unsigned long long)__entry->pos, + __entry->len, + __entry->flags) +); + +TRACE_EVENT(f2fs_write_end, + + TP_PROTO(struct inode *inode, loff_t pos, unsigned int len, + unsigned int copied), + + TP_ARGS(inode, pos, len, copied), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(loff_t, pos) + __field(unsigned int, len) + __field(unsigned int, copied) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->pos = pos; + __entry->len = len; + __entry->copied = copied; + ), + + TP_printk("dev = (%d,%d), ino = %lu, pos = %llu, len = %u, copied = %u", + show_dev_ino(__entry), + (unsigned long long)__entry->pos, + __entry->len, + __entry->copied) +); + DECLARE_EVENT_CLASS(f2fs__page, TP_PROTO(struct page *page, int type), @@ -672,6 +732,7 @@ DECLARE_EVENT_CLASS(f2fs__page, __field(int, dir) __field(pgoff_t, index) __field(int, dirty) + __field(int, uptodate) ), TP_fast_assign( @@ -681,14 +742,31 @@ DECLARE_EVENT_CLASS(f2fs__page, __entry->dir = S_ISDIR(page->mapping->host->i_mode); __entry->index = page->index; __entry->dirty = PageDirty(page); + __entry->uptodate = PageUptodate(page); ), - TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, dirty = %d", + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, index = %lu, " + "dirty = %d, uptodate = %d", show_dev_ino(__entry), show_block_type(__entry->type), show_file_type(__entry->dir), (unsigned long)__entry->index, - __entry->dirty) + __entry->dirty, + __entry->uptodate) +); + +DEFINE_EVENT(f2fs__page, f2fs_writepage, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) +); + +DEFINE_EVENT(f2fs__page, f2fs_readpage, + + TP_PROTO(struct page *page, int type), + + TP_ARGS(page, type) ); DEFINE_EVENT(f2fs__page, f2fs_set_page_dirty, @@ -705,6 +783,70 @@ DEFINE_EVENT(f2fs__page, f2fs_vm_page_mkwrite, TP_ARGS(page, type) ); +TRACE_EVENT(f2fs_writepages, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc, int type), + + TP_ARGS(inode, wbc, type), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(int, type) + __field(int, dir) + __field(long, nr_to_write) + __field(long, pages_skipped) + __field(loff_t, range_start) + __field(loff_t, range_end) + __field(pgoff_t, writeback_index) + __field(int, sync_mode) + __field(char, for_kupdate) + __field(char, for_background) + __field(char, tagged_writepages) + __field(char, for_reclaim) + __field(char, range_cyclic) + __field(char, for_sync) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->type = type; + __entry->dir = S_ISDIR(inode->i_mode); + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->range_start = wbc->range_start; + __entry->range_end = wbc->range_end; + __entry->writeback_index = inode->i_mapping->writeback_index; + __entry->sync_mode = wbc->sync_mode; + __entry->for_kupdate = wbc->for_kupdate; + __entry->for_background = wbc->for_background; + __entry->tagged_writepages = wbc->tagged_writepages; + __entry->for_reclaim = wbc->for_reclaim; + __entry->range_cyclic = wbc->range_cyclic; + __entry->for_sync = wbc->for_sync; + ), + + TP_printk("dev = (%d,%d), ino = %lu, %s, %s, nr_to_write %ld, " + "skipped %ld, start %lld, end %lld, wb_idx %lu, sync_mode %d, " + "kupdate %u background %u tagged %u reclaim %u cyclic %u sync %u", + show_dev_ino(__entry), + show_block_type(__entry->type), + show_file_type(__entry->dir), + __entry->nr_to_write, + __entry->pages_skipped, + __entry->range_start, + __entry->range_end, + (unsigned long)__entry->writeback_index, + __entry->sync_mode, + __entry->for_kupdate, + __entry->for_background, + __entry->tagged_writepages, + __entry->for_reclaim, + __entry->range_cyclic, + __entry->for_sync) +); + TRACE_EVENT(f2fs_submit_page_mbio, TP_PROTO(struct page *page, int rw, int type, block_t blk_addr), |