From 71baba4b92dc1fa1bc461742c6ab1942ec6034e9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:28 -0800 Subject: mm, page_alloc: rename __GFP_WAIT to __GFP_RECLAIM __GFP_WAIT was used to signal that the caller was in atomic context and could not sleep. Now it is possible to distinguish between true atomic context and callers that are not willing to sleep. The latter should clear __GFP_DIRECT_RECLAIM so kswapd will still wake. As clearing __GFP_WAIT behaves differently, there is a risk that people will clear the wrong flags. This patch renames __GFP_WAIT to __GFP_RECLAIM to clearly indicate what it does -- setting it allows all reclaim activity, clearing them prevents it. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mel Gorman Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Cc: Christoph Lameter Acked-by: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/mdt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index fe529a87a208..03246cac3338 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -72,7 +72,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) } /* Default GFP flags using highmem */ -#define NILFS_MDT_GFP (__GFP_WAIT | __GFP_IO | __GFP_HIGHMEM) +#define NILFS_MDT_GFP (__GFP_RECLAIM | __GFP_IO | __GFP_HIGHMEM) int nilfs_mdt_get_block(struct inode *, unsigned long, int, void (*init_block)(struct inode *, -- cgit v1.2.3 From c62d25556be6c965dc14288e796a576e8e39a7e9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 6 Nov 2015 16:28:49 -0800 Subject: mm, fs: introduce mapping_gfp_constraint() There are many places which use mapping_gfp_mask to restrict a more generic gfp mask which would be used for allocations which are not directly related to the page cache but they are performed in the same context. Let's introduce a helper function which makes the restriction explicit and easier to track. This patch doesn't introduce any functional changes. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Suggested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 3 +-- fs/btrfs/compression.c | 7 +++---- fs/btrfs/ctree.h | 2 +- fs/btrfs/free-space-cache.c | 4 ++-- fs/buffer.c | 2 +- fs/ceph/addr.c | 7 ++++--- fs/cifs/file.c | 2 +- fs/ext4/inode.c | 2 +- fs/ext4/readpage.c | 2 +- fs/logfs/segment.c | 2 +- fs/mpage.c | 4 ++-- fs/namei.c | 2 +- fs/nilfs2/inode.c | 4 ++-- fs/ntfs/file.c | 4 ++-- fs/splice.c | 2 +- include/linux/pagemap.h | 7 +++++++ mm/filemap.c | 4 ++-- mm/readahead.c | 4 ++-- 19 files changed, 36 insertions(+), 30 deletions(-) (limited to 'fs/nilfs2') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 3c2d4abd71c5..1d47d2e9487c 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -491,7 +491,7 @@ struct page **drm_gem_get_pages(struct drm_gem_object *obj) * __GFP_DMA32 to be set in mapping_gfp_mask(inode->i_mapping) * so shmem can relocate pages during swapin if required. */ - BUG_ON((mapping_gfp_mask(mapping) & __GFP_DMA32) && + BUG_ON(mapping_gfp_constraint(mapping, __GFP_DMA32) && (page_to_pfn(p) >= 0x00100000UL)); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7e505d4be7c0..399aab265db3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2214,9 +2214,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) * Fail silently without starting the shrinker */ mapping = file_inode(obj->base.filp)->i_mapping; - gfp = mapping_gfp_mask(mapping); + gfp = mapping_gfp_constraint(mapping, ~(__GFP_IO | __GFP_RECLAIM)); gfp |= __GFP_NORETRY | __GFP_NOWARN; - gfp &= ~(__GFP_IO | __GFP_RECLAIM); sg = st->sgl; st->nents = 0; for (i = 0; i < page_count; i++) { diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 57ee8ca29b06..36dfeff2c1f4 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -482,13 +482,12 @@ static noinline int add_ra_bio_pages(struct inode *inode, goto next; } - page = __page_cache_alloc(mapping_gfp_mask(mapping) & - ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_constraint(mapping, + ~__GFP_FS)); if (!page) break; - if (add_to_page_cache_lru(page, mapping, pg_index, - GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) { page_cache_release(page); goto next; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 938efe33be80..eb90f0f1a124 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3316,7 +3316,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) { - return mapping_gfp_mask(mapping) & ~__GFP_FS; + return mapping_gfp_constraint(mapping, ~__GFP_FS); } /* extent-tree.c */ diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index abe3a66bd3ba..ed05da1b977e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -85,8 +85,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, } mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & - ~(__GFP_FS | __GFP_HIGHMEM)); + mapping_gfp_constraint(inode->i_mapping, + ~(__GFP_FS | __GFP_HIGHMEM))); return inode; } diff --git a/fs/buffer.c b/fs/buffer.c index 82283abb2795..51aff0296ce2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -999,7 +999,7 @@ grow_dev_page(struct block_device *bdev, sector_t block, int ret = 0; /* Will call free_more_memory() */ gfp_t gfp_mask; - gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp; + gfp_mask = mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS) | gfp; /* * XXX: __getblk_slow() can not really deal with failure and diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 9d23e788d1df..b7d218a168fb 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1283,8 +1283,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) int ret1; struct address_space *mapping = inode->i_mapping; struct page *page = find_or_create_page(mapping, 0, - mapping_gfp_mask(mapping) & - ~__GFP_FS); + mapping_gfp_constraint(mapping, + ~__GFP_FS)); if (!page) { ret = VM_FAULT_OOM; goto out; @@ -1428,7 +1428,8 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, if (i_size_read(inode) == 0) return; page = find_or_create_page(mapping, 0, - mapping_gfp_mask(mapping) & ~__GFP_FS); + mapping_gfp_constraint(mapping, + ~__GFP_FS)); if (!page) return; if (PageUptodate(page)) { diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 47c5c97e2dd3..0068e82217c3 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3380,7 +3380,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, struct page *page, *tpage; unsigned int expected_index; int rc; - gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping); + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); INIT_LIST_HEAD(tmplist); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 612fbcf76b5c..60aaecd5598b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3344,7 +3344,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, int err = 0; page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, - mapping_gfp_mask(mapping) & ~__GFP_FS); + mapping_gfp_constraint(mapping, ~__GFP_FS)); if (!page) return -ENOMEM; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 560af0437704..1061611ae14d 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -166,7 +166,7 @@ int ext4_mpage_readpages(struct address_space *mapping, page = list_entry(pages->prev, struct page, lru); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, - GFP_KERNEL & mapping_gfp_mask(mapping))) + mapping_gfp_constraint(mapping, GFP_KERNEL))) goto next_page; } diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 7f9b096d8d57..6de0fbfc6c00 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c @@ -57,7 +57,7 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, filler_t *filler = super->s_devops->readpage; struct page *page; - BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS); + BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS)); if (use_filler) page = read_cache_page(mapping, index, filler, sb); else { diff --git a/fs/mpage.c b/fs/mpage.c index 09abba7653aa..1480d3a18037 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -361,7 +361,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, sector_t last_block_in_bio = 0; struct buffer_head map_bh; unsigned long first_logical_block = 0; - gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(mapping); + gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); map_bh.b_state = 0; map_bh.b_size = 0; @@ -397,7 +397,7 @@ int mpage_readpage(struct page *page, get_block_t get_block) sector_t last_block_in_bio = 0; struct buffer_head map_bh; unsigned long first_logical_block = 0; - gfp_t gfp = GFP_KERNEL & mapping_gfp_mask(page->mapping); + gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); map_bh.b_state = 0; map_bh.b_size = 0; diff --git a/fs/namei.c b/fs/namei.c index 0d3340b32e14..3c18970a8899 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4604,7 +4604,7 @@ EXPORT_SYMBOL(__page_symlink); int page_symlink(struct inode *inode, const char *symname, int len) { return __page_symlink(inode, symname, len, - !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS)); + !mapping_gfp_constraint(inode->i_mapping, __GFP_FS)); } EXPORT_SYMBOL(page_symlink); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 4a73d6dffabf..ac2f64943ff4 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -356,7 +356,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) goto failed; mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); @@ -522,7 +522,7 @@ static int __nilfs_read_inode(struct super_block *sb, up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); mapping_set_gfp_mask(inode->i_mapping, - mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + mapping_gfp_constraint(inode->i_mapping, ~__GFP_FS)); return 0; failed_unmap: diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 262561fea923..9d383e5eff0e 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -525,8 +525,8 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, } } err = add_to_page_cache_lru(*cached_page, mapping, - index, - GFP_KERNEL & mapping_gfp_mask(mapping)); + index, + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (unlikely(err)) { if (err == -EEXIST) continue; diff --git a/fs/splice.c b/fs/splice.c index 5fc1e50a7f30..801c21cd77fe 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -360,7 +360,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; error = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL & mapping_gfp_mask(mapping)); + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (unlikely(error)) { page_cache_release(page); if (error == -EEXIST) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a6c78e00ea96..26eabf5ec718 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -69,6 +69,13 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping) return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; } +/* Restricts the given gfp_mask to what the mapping allows. */ +static inline gfp_t mapping_gfp_constraint(struct address_space *mapping, + gfp_t gfp_mask) +{ + return mapping_gfp_mask(mapping) & gfp_mask; +} + /* * This is non-atomic. Only to be used before the mapping is activated. * Probably needs a barrier... diff --git a/mm/filemap.c b/mm/filemap.c index 6ef3674c0763..1bb007624b53 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1722,7 +1722,7 @@ no_cached_page: goto out; } error = add_to_page_cache_lru(page, mapping, index, - GFP_KERNEL & mapping_gfp_mask(mapping)); + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error) { page_cache_release(page); if (error == -EEXIST) { @@ -1824,7 +1824,7 @@ static int page_cache_read(struct file *file, pgoff_t offset) return -ENOMEM; ret = add_to_page_cache_lru(page, mapping, offset, - GFP_KERNEL & mapping_gfp_mask(mapping)); + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) diff --git a/mm/readahead.c b/mm/readahead.c index 998ad592f408..ba22d7fe0afb 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -90,7 +90,7 @@ int read_cache_pages(struct address_space *mapping, struct list_head *pages, page = list_to_page(pages); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, - GFP_KERNEL & mapping_gfp_mask(mapping))) { + mapping_gfp_constraint(mapping, GFP_KERNEL))) { read_cache_pages_invalidate_page(mapping, page); continue; } @@ -128,7 +128,7 @@ static int read_pages(struct address_space *mapping, struct file *filp, struct page *page = list_to_page(pages); list_del(&page->lru); if (!add_to_page_cache_lru(page, mapping, page->index, - GFP_KERNEL & mapping_gfp_mask(mapping))) { + mapping_gfp_constraint(mapping, GFP_KERNEL))) { mapping->a_ops->readpage(filp, page); } page_cache_release(page); -- cgit v1.2.3 From da80a39fc962ceca085ddfb7d63e00309b305f17 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 6 Nov 2015 16:31:40 -0800 Subject: nilfs2: drop null test before destroy functions Remove unneeded NULL test. The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; @@ -if (x != NULL) \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x); // Signed-off-by: Julia Lawall Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/super.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f47585bfeb01..c69455a543bc 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1405,14 +1405,10 @@ static void nilfs_destroy_cachep(void) */ rcu_barrier(); - if (nilfs_inode_cachep) - kmem_cache_destroy(nilfs_inode_cachep); - if (nilfs_transaction_cachep) - kmem_cache_destroy(nilfs_transaction_cachep); - if (nilfs_segbuf_cachep) - kmem_cache_destroy(nilfs_segbuf_cachep); - if (nilfs_btree_path_cache) - kmem_cache_destroy(nilfs_btree_path_cache); + kmem_cache_destroy(nilfs_inode_cachep); + kmem_cache_destroy(nilfs_transaction_cachep); + kmem_cache_destroy(nilfs_segbuf_cachep); + kmem_cache_destroy(nilfs_btree_path_cache); } static int __init nilfs_init_cachep(void) -- cgit v1.2.3 From b7bed712d090c340b97d455c5cb62d151e004503 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:31:43 -0800 Subject: nilfs2: use nilfs_warning() in allocator implementation This uses nilfs_warning() to replace "printk(KERN_WARNING ...);" in the bitmap based allocator implementation of nilfs2. The warning messages are modified to include the device name and the inode number in each message. This makes it clear which metadata file of which device has output warnings such as "entry number xxxx already freed". Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 8df0f3b7839b..afe983643fdf 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -583,8 +583,10 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) - printk(KERN_WARNING "%s: entry number %llu already freed\n", - __func__, (unsigned long long)req->pr_entry_nr); + nilfs_warning(inode->i_sb, __func__, + "entry number %llu already freed: ino=%lu\n", + (unsigned long long)req->pr_entry_nr, + (unsigned long)inode->i_ino); else nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); @@ -620,8 +622,10 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) - printk(KERN_WARNING "%s: entry number %llu already freed\n", - __func__, (unsigned long long)req->pr_entry_nr); + nilfs_warning(inode->i_sb, __func__, + "entry number %llu already freed: ino=%lu\n", + (unsigned long long)req->pr_entry_nr, + (unsigned long)inode->i_ino); else nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); @@ -734,10 +738,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) if (!nilfs_clear_bit_atomic( nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) { - printk(KERN_WARNING - "%s: entry number %llu already freed\n", - __func__, - (unsigned long long)entry_nrs[j]); + nilfs_warning(inode->i_sb, __func__, + "entry number %llu already freed: ino=%lu\n", + (unsigned long long)entry_nrs[j], + (unsigned long)inode->i_ino); } else { n++; } -- cgit v1.2.3 From 4e9e63a671fbe13f448fb2e69dfdbb6c2a008368 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:31:45 -0800 Subject: nilfs2: do not call nilfs_mdt_bgl_lock() needlessly In the bitmap based allocator implementation, nilfs_mdt_bgl_lock() helper is frequently used to get a spinlock protecting a target block group. This reduces its usage and simplifies arguments of some related functions by directly passing a pointer to the spinlock. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 84 ++++++++++++++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 44 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index afe983643fdf..ff0d62ce165b 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -133,38 +133,34 @@ nilfs_palloc_bitmap_blkoff(const struct inode *inode, unsigned long group) /** * nilfs_palloc_group_desc_nfrees - get the number of free entries in a group - * @inode: inode of metadata file using this allocator - * @group: group number * @desc: pointer to descriptor structure for the group + * @lock: spin lock protecting @desc */ static unsigned long -nilfs_palloc_group_desc_nfrees(struct inode *inode, unsigned long group, - const struct nilfs_palloc_group_desc *desc) +nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, + spinlock_t *lock) { unsigned long nfree; - spin_lock(nilfs_mdt_bgl_lock(inode, group)); + spin_lock(lock); nfree = le32_to_cpu(desc->pg_nfrees); - spin_unlock(nilfs_mdt_bgl_lock(inode, group)); + spin_unlock(lock); return nfree; } /** * nilfs_palloc_group_desc_add_entries - adjust count of free entries - * @inode: inode of metadata file using this allocator - * @group: group number * @desc: pointer to descriptor structure for the group + * @lock: spin lock protecting @desc * @n: delta to be added */ static void -nilfs_palloc_group_desc_add_entries(struct inode *inode, - unsigned long group, - struct nilfs_palloc_group_desc *desc, - u32 n) +nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, + spinlock_t *lock, u32 n) { - spin_lock(nilfs_mdt_bgl_lock(inode, group)); + spin_lock(lock); le32_add_cpu(&desc->pg_nfrees, n); - spin_unlock(nilfs_mdt_bgl_lock(inode, group)); + spin_unlock(lock); } /** @@ -332,17 +328,15 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, /** * nilfs_palloc_find_available_slot - find available slot in a group - * @inode: inode of metadata file using this allocator - * @group: group number - * @target: offset number of an entry in the group (start point) * @bitmap: bitmap of the group + * @target: offset number of an entry in the group (start point) * @bsize: size in bits + * @lock: spin lock protecting @bitmap */ -static int nilfs_palloc_find_available_slot(struct inode *inode, - unsigned long group, +static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, - unsigned char *bitmap, - int bsize) + int bsize, + spinlock_t *lock) { int curr, pos, end, i; @@ -351,12 +345,11 @@ static int nilfs_palloc_find_available_slot(struct inode *inode, if (end > bsize) end = bsize; pos = nilfs_find_next_zero_bit(bitmap, end, target); - if (pos < end && - !nilfs_set_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), pos, bitmap)) + if (pos < end && !nilfs_set_bit_atomic(lock, pos, bitmap)) return pos; - } else + } else { end = 0; + } for (i = 0, curr = end; i < bsize; @@ -370,10 +363,8 @@ static int nilfs_palloc_find_available_slot(struct inode *inode, if (end > bsize) end = bsize; pos = nilfs_find_next_zero_bit(bitmap, end, curr); - if ((pos < end) && - !nilfs_set_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), pos, - bitmap)) + if (pos < end && + !nilfs_set_bit_atomic(lock, pos, bitmap)) return pos; } } @@ -477,6 +468,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, unsigned long group_offset, maxgroup_offset; unsigned long n, entries_per_group, groups_per_desc_block; unsigned long i, j; + spinlock_t *lock; int pos, ret; ngroups = nilfs_palloc_groups_count(inode); @@ -501,8 +493,8 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, n = nilfs_palloc_rest_groups_in_desc_block(inode, group, maxgroup); for (j = 0; j < n; j++, desc++, group++) { - if (nilfs_palloc_group_desc_nfrees(inode, group, desc) - > 0) { + lock = nilfs_mdt_bgl_lock(inode, group); + if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) { ret = nilfs_palloc_get_bitmap_block( inode, group, 1, &bitmap_bh); if (ret < 0) @@ -510,12 +502,12 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, bitmap_kaddr = kmap(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); pos = nilfs_palloc_find_available_slot( - inode, group, group_offset, bitmap, - entries_per_group); + bitmap, group_offset, + entries_per_group, lock); if (pos >= 0) { /* found a free entry */ nilfs_palloc_group_desc_add_entries( - inode, group, desc, -1); + desc, lock, -1); req->pr_entry_nr = entries_per_group * group + pos; kunmap(desc_bh->b_page); @@ -573,6 +565,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, unsigned long group, group_offset; unsigned char *bitmap; void *desc_kaddr, *bitmap_kaddr; + spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); desc_kaddr = kmap(req->pr_desc_bh->b_page); @@ -580,15 +573,15 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); + lock = nilfs_mdt_bgl_lock(inode, group); - if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) + if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) nilfs_warning(inode->i_sb, __func__, "entry number %llu already freed: ino=%lu\n", (unsigned long long)req->pr_entry_nr, (unsigned long)inode->i_ino); else - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + nilfs_palloc_group_desc_add_entries(desc, lock, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); @@ -613,6 +606,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, void *desc_kaddr, *bitmap_kaddr; unsigned char *bitmap; unsigned long group, group_offset; + spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); desc_kaddr = kmap(req->pr_desc_bh->b_page); @@ -620,14 +614,15 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); - if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) + lock = nilfs_mdt_bgl_lock(inode, group); + + if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) nilfs_warning(inode->i_sb, __func__, "entry number %llu already freed: ino=%lu\n", (unsigned long long)req->pr_entry_nr, (unsigned long)inode->i_ino); else - nilfs_palloc_group_desc_add_entries(inode, group, desc, 1); + nilfs_palloc_group_desc_add_entries(desc, lock, 1); kunmap(req->pr_bitmap_bh->b_page); kunmap(req->pr_desc_bh->b_page); @@ -712,6 +707,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned char *bitmap; void *desc_kaddr, *bitmap_kaddr; unsigned long group, group_offset; + spinlock_t *lock; int i, j, n, ret; for (i = 0; i < nitems; i = j) { @@ -730,14 +726,14 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) inode, group, desc_bh, desc_kaddr); bitmap_kaddr = kmap(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); + lock = nilfs_mdt_bgl_lock(inode, group); for (j = i, n = 0; (j < nitems) && nilfs_palloc_group_is_in(inode, group, entry_nrs[j]); j++) { nilfs_palloc_group(inode, entry_nrs[j], &group_offset); - if (!nilfs_clear_bit_atomic( - nilfs_mdt_bgl_lock(inode, group), - group_offset, bitmap)) { + if (!nilfs_clear_bit_atomic(lock, group_offset, + bitmap)) { nilfs_warning(inode->i_sb, __func__, "entry number %llu already freed: ino=%lu\n", (unsigned long long)entry_nrs[j], @@ -746,7 +742,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) n++; } } - nilfs_palloc_group_desc_add_entries(inode, group, desc, n); + nilfs_palloc_group_desc_add_entries(desc, lock, n); kunmap(bitmap_bh->b_page); kunmap(desc_bh->b_page); -- cgit v1.2.3 From 18c41b37f0f16a0d6e5b1a73563d0c1333e7ef70 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:31:48 -0800 Subject: nilfs2: refactor nilfs_palloc_find_available_slot() The current implementation of nilfs_palloc_find_available_slot() function is overkill. The underlying bit search routine is well optimized, so this uses it more simply in nilfs_palloc_find_available_slot(). Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index ff0d62ce165b..b15daf871f99 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -335,39 +335,33 @@ void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, */ static int nilfs_palloc_find_available_slot(unsigned char *bitmap, unsigned long target, - int bsize, + unsigned bsize, spinlock_t *lock) { - int curr, pos, end, i; + int pos, end = bsize; - if (target > 0) { - end = (target + BITS_PER_LONG - 1) & ~(BITS_PER_LONG - 1); - if (end > bsize) - end = bsize; - pos = nilfs_find_next_zero_bit(bitmap, end, target); - if (pos < end && !nilfs_set_bit_atomic(lock, pos, bitmap)) - return pos; - } else { - end = 0; + if (likely(target < bsize)) { + pos = target; + do { + pos = nilfs_find_next_zero_bit(bitmap, end, pos); + if (pos >= end) + break; + if (!nilfs_set_bit_atomic(lock, pos, bitmap)) + return pos; + } while (++pos < end); + + end = target; } - for (i = 0, curr = end; - i < bsize; - i += BITS_PER_LONG, curr += BITS_PER_LONG) { - /* wrap around */ - if (curr >= bsize) - curr = 0; - while (*((unsigned long *)bitmap + curr / BITS_PER_LONG) - != ~0UL) { - end = curr + BITS_PER_LONG; - if (end > bsize) - end = bsize; - pos = nilfs_find_next_zero_bit(bitmap, end, curr); - if (pos < end && - !nilfs_set_bit_atomic(lock, pos, bitmap)) - return pos; - } + /* wrap around */ + for (pos = 0; pos < end; pos++) { + pos = nilfs_find_next_zero_bit(bitmap, end, pos); + if (pos >= end) + break; + if (!nilfs_set_bit_atomic(lock, pos, bitmap)) + return pos; } + return -ENOSPC; } -- cgit v1.2.3 From b22580948c39d71fb150c1d53148a381011dd109 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:31:51 -0800 Subject: nilfs2: get rid of nilfs_palloc_group_is_in() This unfolds nilfs_palloc_group_is_in() helper function into nilfs_palloc_freev() function to simplify a range check and an index calculation repeatedy performed in a loop of the function. Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index b15daf871f99..5b7ee36f84c7 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -672,22 +672,6 @@ void nilfs_palloc_abort_free_entry(struct inode *inode, req->pr_desc_bh = NULL; } -/** - * nilfs_palloc_group_is_in - judge if an entry is in a group - * @inode: inode of metadata file using this allocator - * @group: group number - * @nr: serial number of the entry (e.g. inode number) - */ -static int -nilfs_palloc_group_is_in(struct inode *inode, unsigned long group, __u64 nr) -{ - __u64 first, last; - - first = group * nilfs_palloc_entries_per_group(inode); - last = first + nilfs_palloc_entries_per_group(inode) - 1; - return (nr >= first) && (nr <= last); -} - /** * nilfs_palloc_freev - deallocate a set of persistent objects * @inode: inode of metadata file using this allocator @@ -701,6 +685,8 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned char *bitmap; void *desc_kaddr, *bitmap_kaddr; unsigned long group, group_offset; + __u64 group_min_nr; + const unsigned long epg = nilfs_palloc_entries_per_group(inode); spinlock_t *lock; int i, j, n, ret; @@ -715,6 +701,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) brelse(desc_bh); return ret; } + + /* Get the first entry number of the group */ + group_min_nr = (__u64)group * epg; + desc_kaddr = kmap(desc_bh->b_page); desc = nilfs_palloc_block_get_group_desc( inode, group, desc_bh, desc_kaddr); @@ -722,10 +712,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) bitmap = bitmap_kaddr + bh_offset(bitmap_bh); lock = nilfs_mdt_bgl_lock(inode, group); for (j = i, n = 0; - (j < nitems) && nilfs_palloc_group_is_in(inode, group, - entry_nrs[j]); + j < nitems && entry_nrs[j] >= group_min_nr && + entry_nrs[j] < group_min_nr + epg; j++) { - nilfs_palloc_group(inode, entry_nrs[j], &group_offset); + group_offset = entry_nrs[j] - group_min_nr; if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) { nilfs_warning(inode->i_sb, __func__, -- cgit v1.2.3 From da019954dd821682d6b2a8330c9c90acb943c456 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:31:54 -0800 Subject: nilfs2: add helper functions to delete blocks from dat file This adds delete functions for data blocks of metadata files using bitmap based allocator. nilfs_palloc_delete_entry_block() deletes an entry block (e.g. block storing dat entries), and nilfs_palloc_delete_bitmap_block() deletes a bitmap block, respectively. These helpers are intended to be used in the successive change on deallocator of block addresses ("nilfs2: free unused dat file blocks during garbage collection"). Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 5b7ee36f84c7..225b79768865 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -235,6 +235,26 @@ static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, return ret; } +/** + * nilfs_palloc_delete_block - delete a block on the persistent allocator file + * @inode: inode of metadata file using this allocator + * @blkoff: block offset + * @prev: nilfs_bh_assoc struct of the last used buffer + * @lock: spin lock protecting @prev + */ +static int nilfs_palloc_delete_block(struct inode *inode, unsigned long blkoff, + struct nilfs_bh_assoc *prev, + spinlock_t *lock) +{ + spin_lock(lock); + if (prev->bh && blkoff == prev->blkoff) { + brelse(prev->bh); + prev->bh = NULL; + } + spin_unlock(lock); + return nilfs_mdt_delete_block(inode, blkoff); +} + /** * nilfs_palloc_get_desc_block - get buffer head of a group descriptor block * @inode: inode of metadata file using this allocator @@ -273,6 +293,22 @@ static int nilfs_palloc_get_bitmap_block(struct inode *inode, &cache->prev_bitmap, &cache->lock); } +/** + * nilfs_palloc_delete_bitmap_block - delete a bitmap block + * @inode: inode of metadata file using this allocator + * @group: group number + */ +static int nilfs_palloc_delete_bitmap_block(struct inode *inode, + unsigned long group) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_delete_block(inode, + nilfs_palloc_bitmap_blkoff(inode, + group), + &cache->prev_bitmap, &cache->lock); +} + /** * nilfs_palloc_get_entry_block - get buffer head of an entry block * @inode: inode of metadata file using this allocator @@ -291,6 +327,20 @@ int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, &cache->prev_entry, &cache->lock); } +/** + * nilfs_palloc_delete_entry_block - delete an entry block + * @inode: inode of metadata file using this allocator + * @nr: serial number of the entry + */ +static int nilfs_palloc_delete_entry_block(struct inode *inode, __u64 nr) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_delete_block(inode, + nilfs_palloc_entry_blkoff(inode, nr), + &cache->prev_entry, &cache->lock); +} + /** * nilfs_palloc_block_get_group_desc - get kernel address of a group descriptor * @inode: inode of metadata file using this allocator -- cgit v1.2.3 From d0c14a9ee79467cd6a04b281577e1e6f74806ab2 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:31:56 -0800 Subject: nilfs2: free unused dat file blocks during garbage collection As a nilfs2 volume ages, the amount of available disk space decreases little by little due to bloat of DAT (disk address translation) metadata file. Even if we delete all files in a file system and free their block addresses from the DAT file through a garbage collection, empty DAT blocks are not freed. This fixes the issue by extending the deallocator of block addresses so that empty data blocks and empty bitmap blocks of DAT are deleted. The following comparison shows the effect of this patch. Each shows disk amount information of a nilfs2 volume that we cleaned out by deleting all files and running gc after having filled 90% of its capacity. Before: Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda1 500105212 3022844 472072192 1% /test After: Filesystem 1K-blocks Used Available Use% Mounted on /dev/sda1 500105212 16380 475078656 1% /test Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 91 ++++++++++++++++++++++++++++++++++++++++++++----------- fs/nilfs2/alloc.h | 1 + 2 files changed, 75 insertions(+), 17 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 225b79768865..b335a32e9561 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -154,13 +154,17 @@ nilfs_palloc_group_desc_nfrees(const struct nilfs_palloc_group_desc *desc, * @lock: spin lock protecting @desc * @n: delta to be added */ -static void +static u32 nilfs_palloc_group_desc_add_entries(struct nilfs_palloc_group_desc *desc, spinlock_t *lock, u32 n) { + u32 nfree; + spin_lock(lock); le32_add_cpu(&desc->pg_nfrees, n); + nfree = le32_to_cpu(desc->pg_nfrees); spin_unlock(lock); + return nfree; } /** @@ -735,12 +739,18 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) unsigned char *bitmap; void *desc_kaddr, *bitmap_kaddr; unsigned long group, group_offset; - __u64 group_min_nr; + __u64 group_min_nr, last_nrs[8]; const unsigned long epg = nilfs_palloc_entries_per_group(inode); + const unsigned epb = NILFS_MDT(inode)->mi_entries_per_block; + unsigned entry_start, end, pos; spinlock_t *lock; - int i, j, n, ret; + int i, j, k, ret; + u32 nfree; for (i = 0; i < nitems; i = j) { + int change_group = false; + int nempties = 0, n = 0; + group = nilfs_palloc_group(inode, entry_nrs[i], &group_offset); ret = nilfs_palloc_get_desc_block(inode, group, 0, &desc_bh); if (ret < 0) @@ -755,17 +765,13 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) /* Get the first entry number of the group */ group_min_nr = (__u64)group * epg; - desc_kaddr = kmap(desc_bh->b_page); - desc = nilfs_palloc_block_get_group_desc( - inode, group, desc_bh, desc_kaddr); bitmap_kaddr = kmap(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); lock = nilfs_mdt_bgl_lock(inode, group); - for (j = i, n = 0; - j < nitems && entry_nrs[j] >= group_min_nr && - entry_nrs[j] < group_min_nr + epg; - j++) { - group_offset = entry_nrs[j] - group_min_nr; + + j = i; + entry_start = rounddown(group_offset, epb); + do { if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap)) { nilfs_warning(inode->i_sb, __func__, @@ -775,18 +781,69 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) } else { n++; } - } - nilfs_palloc_group_desc_add_entries(desc, lock, n); + + j++; + if (j >= nitems || entry_nrs[j] < group_min_nr || + entry_nrs[j] >= group_min_nr + epg) { + change_group = true; + } else { + group_offset = entry_nrs[j] - group_min_nr; + if (group_offset >= entry_start && + group_offset < entry_start + epb) { + /* This entry is in the same block */ + continue; + } + } + + /* Test if the entry block is empty or not */ + end = entry_start + epb; + pos = nilfs_find_next_bit(bitmap, end, entry_start); + if (pos >= end) { + last_nrs[nempties++] = entry_nrs[j - 1]; + if (nempties >= ARRAY_SIZE(last_nrs)) + break; + } + + if (change_group) + break; + + /* Go on to the next entry block */ + entry_start = rounddown(group_offset, epb); + } while (true); kunmap(bitmap_bh->b_page); - kunmap(desc_bh->b_page); + mark_buffer_dirty(bitmap_bh); + brelse(bitmap_bh); + for (k = 0; k < nempties; k++) { + ret = nilfs_palloc_delete_entry_block(inode, + last_nrs[k]); + if (ret && ret != -ENOENT) { + nilfs_warning(inode->i_sb, __func__, + "failed to delete block of entry %llu: ino=%lu, err=%d\n", + (unsigned long long)last_nrs[k], + (unsigned long)inode->i_ino, ret); + } + } + + desc_kaddr = kmap_atomic(desc_bh->b_page); + desc = nilfs_palloc_block_get_group_desc( + inode, group, desc_bh, desc_kaddr); + nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n); + kunmap_atomic(desc_kaddr); mark_buffer_dirty(desc_bh); - mark_buffer_dirty(bitmap_bh); nilfs_mdt_mark_dirty(inode); - - brelse(bitmap_bh); brelse(desc_bh); + + if (nfree == nilfs_palloc_entries_per_group(inode)) { + ret = nilfs_palloc_delete_bitmap_block(inode, group); + if (ret && ret != -ENOENT) { + nilfs_warning(inode->i_sb, __func__, + "failed to delete bitmap block of group %lu: ino=%lu, err=%d\n", + group, + (unsigned long)inode->i_ino, ret); + } + } } return 0; } diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 4bd6451b5703..6e6f49aa53df 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -77,6 +77,7 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_set_bit_atomic ext2_set_bit_atomic #define nilfs_clear_bit_atomic ext2_clear_bit_atomic #define nilfs_find_next_zero_bit find_next_zero_bit_le +#define nilfs_find_next_bit find_next_bit_le /** * struct nilfs_bh_assoc - block offset and buffer head association -- cgit v1.2.3 From 58497703837048ac501ce56056eb74b4361108fc Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Fri, 6 Nov 2015 16:31:59 -0800 Subject: nilfs2: add a tracepoint for tracking stage transition of segment construction This patch adds a tracepoint for tracking stage transition of block collection in segment construction. With the tracepoint, we can analysis the behavior of segment construction in depth. It would be useful for bottleneck detection and debugging, etc. The tracepoint is created with the standard trace API of linux (like ext3, ext4, f2fs and btrfs). So we can analysis with existing tools easily. Of course, more detailed analysis will be possible if we can create nilfs specific analysis tools. Below is an example of event dump with Brendan Gregg's perf-tools (https://github.com/brendangregg/perf-tools). Time consumption between each stage can be obtained. $ sudo bin/tpoint nilfs2:nilfs2_collection_stage_transition Tracing nilfs2:nilfs2_collection_stage_transition. Ctrl-C to end. segctord-14875 [003] ...1 28311.067794: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_INIT segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_GC segctord-14875 [003] ...1 28311.068139: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_FILE segctord-14875 [003] ...1 28311.068486: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_IFILE segctord-14875 [003] ...1 28311.068540: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_CPFILE segctord-14875 [003] ...1 28311.068561: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_SUFILE segctord-14875 [003] ...1 28311.068565: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_DAT segctord-14875 [003] ...1 28311.068573: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_SR segctord-14875 [003] ...1 28311.068574: nilfs2_collection_stage_transition: sci = ffff8800ce6de000 stage = ST_DONE For capturing transition correctly, this patch adds wrappers for the member scnt of nilfs_cstage. With this change, every transition of the stage can produce trace event in a correct manner. Signed-off-by: Hitoshi Mitake Signed-off-by: Ryusuke Konishi Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/segment.c | 71 +++++++++++++++++++++++++++++++------------ fs/nilfs2/segment.h | 3 +- include/trace/events/nilfs2.h | 50 ++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 21 deletions(-) create mode 100644 include/trace/events/nilfs2.h (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index c6abbad9b8e3..ef354043b87c 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -77,6 +77,36 @@ enum { NILFS_ST_DONE, }; +#define CREATE_TRACE_POINTS +#include + +/* + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are + * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of + * the variable must use them because transition of stage count must involve + * trace events (trace_nilfs2_collection_stage_transition). + * + * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't + * produce tracepoint events. It is provided just for making the intention + * clear. + */ +static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) +{ + sci->sc_stage.scnt++; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) +{ + sci->sc_stage.scnt = next_scnt; + trace_nilfs2_collection_stage_transition(sci); +} + +static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) +{ + return sci->sc_stage.scnt; +} + /* State flags of collection */ #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ @@ -1062,7 +1092,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) size_t ndone; int err = 0; - switch (sci->sc_stage.scnt) { + switch (nilfs_sc_cstage_get(sci)) { case NILFS_ST_INIT: /* Pre-processes */ sci->sc_stage.flags = 0; @@ -1071,7 +1101,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_nblk_inc = 0; sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; if (mode == SC_LSEG_DSYNC) { - sci->sc_stage.scnt = NILFS_ST_DSYNC; + nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); goto dsync_mode; } } @@ -1079,10 +1109,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.dirty_file_ptr = NULL; sci->sc_stage.gc_inode_ptr = NULL; if (mode == SC_FLUSH_DAT) { - sci->sc_stage.scnt = NILFS_ST_DAT; + nilfs_sc_cstage_set(sci, NILFS_ST_DAT); goto dat_stage; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_GC: if (nilfs_doing_gc()) { head = &sci->sc_gc_inodes; @@ -1103,7 +1133,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } sci->sc_stage.gc_inode_ptr = NULL; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_FILE: head = &sci->sc_dirty_files; ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, @@ -1125,10 +1155,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } sci->sc_stage.dirty_file_ptr = NULL; if (mode == SC_FLUSH_FILE) { - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; } - sci->sc_stage.scnt++; + nilfs_sc_cstage_inc(sci); sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; /* Fall through */ case NILFS_ST_IFILE: @@ -1136,7 +1166,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; + nilfs_sc_cstage_inc(sci); /* Creating a checkpoint */ err = nilfs_segctor_create_checkpoint(sci); if (unlikely(err)) @@ -1147,7 +1177,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_SUFILE: err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, &ndone); @@ -1163,7 +1193,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) &nilfs_sc_file_ops); if (unlikely(err)) break; - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_DAT: dat_stage: err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, @@ -1171,10 +1201,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) break; if (mode == SC_FLUSH_DAT) { - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; } - sci->sc_stage.scnt++; /* Fall through */ + nilfs_sc_cstage_inc(sci); /* Fall through */ case NILFS_ST_SR: if (mode == SC_LSEG_SR) { /* Appending a super root */ @@ -1184,7 +1214,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) } /* End of a logical segment */ sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; case NILFS_ST_DSYNC: dsync_mode: @@ -1197,7 +1227,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) break; sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; - sci->sc_stage.scnt = NILFS_ST_DONE; + nilfs_sc_cstage_set(sci, NILFS_ST_DONE); return 0; case NILFS_ST_DONE: return 0; @@ -1442,7 +1472,8 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, goto failed; /* The current segment is filled up */ - if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE) + if (mode != SC_LSEG_SR || + nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE) break; nilfs_clear_logs(&sci->sc_segbufs); @@ -1946,7 +1977,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int err; - sci->sc_stage.scnt = NILFS_ST_INIT; + nilfs_sc_cstage_set(sci, NILFS_ST_INIT); sci->sc_cno = nilfs->ns_cno; err = nilfs_segctor_collect_dirty_files(sci, nilfs); @@ -1974,7 +2005,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) goto failed; /* Avoid empty segment */ - if (sci->sc_stage.scnt == NILFS_ST_DONE && + if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE && nilfs_segbuf_empty(sci->sc_curseg)) { nilfs_segctor_abort_construction(sci, nilfs, 1); goto out; @@ -1988,7 +2019,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_fill_in_file_bmap(sci); if (mode == SC_LSEG_SR && - sci->sc_stage.scnt >= NILFS_ST_CPFILE) { + nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) { err = nilfs_segctor_fill_in_checkpoint(sci); if (unlikely(err)) goto failed_to_write; @@ -2007,7 +2038,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (unlikely(err)) goto failed_to_write; - if (sci->sc_stage.scnt == NILFS_ST_DONE || + if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE || nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { /* * At this point, we avoid double buffering @@ -2020,7 +2051,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (err) goto failed_to_write; } - } while (sci->sc_stage.scnt != NILFS_ST_DONE); + } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE); out: nilfs_segctor_drop_written_files(sci, nilfs); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index a48d6de1e02c..0408b9b2814b 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -67,7 +67,8 @@ struct nilfs_recovery_info { /** * struct nilfs_cstage - Context of collection stage - * @scnt: Stage count + * @scnt: Stage count, must be accessed via wrappers: + * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() * @flags: State flags * @dirty_file_ptr: Pointer on dirty_files list, or inode of a target file * @gc_inode_ptr: Pointer on the list of gc-inodes diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h new file mode 100644 index 000000000000..573da00a486d --- /dev/null +++ b/include/trace/events/nilfs2.h @@ -0,0 +1,50 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM nilfs2 + +#if !defined(_TRACE_NILFS2_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NILFS2_H + +#include + +struct nilfs_sc_info; + +#define show_collection_stage(type) \ + __print_symbolic(type, \ + { NILFS_ST_INIT, "ST_INIT" }, \ + { NILFS_ST_GC, "ST_GC" }, \ + { NILFS_ST_FILE, "ST_FILE" }, \ + { NILFS_ST_IFILE, "ST_IFILE" }, \ + { NILFS_ST_CPFILE, "ST_CPFILE" }, \ + { NILFS_ST_SUFILE, "ST_SUFILE" }, \ + { NILFS_ST_DAT, "ST_DAT" }, \ + { NILFS_ST_SR, "ST_SR" }, \ + { NILFS_ST_DSYNC, "ST_DSYNC" }, \ + { NILFS_ST_DONE, "ST_DONE"}) + +TRACE_EVENT(nilfs2_collection_stage_transition, + + TP_PROTO(struct nilfs_sc_info *sci), + + TP_ARGS(sci), + + TP_STRUCT__entry( + __field(void *, sci) + __field(int, stage) + ), + + TP_fast_assign( + __entry->sci = sci; + __entry->stage = sci->sc_stage.scnt; + ), + + TP_printk("sci = %p stage = %s", + __entry->sci, + show_collection_stage(__entry->stage)) +); + +#endif /* _TRACE_NILFS2_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE nilfs2 +#include -- cgit v1.2.3 From 44fda114601fa5edebeacecb265f09d802670bc0 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Fri, 6 Nov 2015 16:32:02 -0800 Subject: nilfs2: add a tracepoint for transaction events This patch adds a tracepoint for transaction events of nilfs. With the tracepoint, these events can be tracked: begin, abort, commit, trylock, lock, and unlock. Basically, these events have corresponding functions e.g. begin event corresponds nilfs_transaction_begin(). The unlock event is an exception. It corresponds to the iteration in nilfs_transaction_lock(). Only one tracepoint is introcued: nilfs2_transaction_transition. The above events are distinguished with newly introduced enum. With this tracepoint, we can analyse a critical section of segment constructoin. Sample output by tpoint of perf-tools: cp-4457 [000] ...1 63.266220: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800bf5ccc58 count = 1 flags = 9 state = BEGIN cp-4457 [000] ...1 63.266221: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800bf5ccc58 count = 0 flags = 9 state = COMMIT cp-4457 [000] ...1 63.266221: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800bf5ccc58 count = 0 flags = 9 state = COMMIT segctord-4371 [001] ...1 68.261196: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800b889bdf8 count = 0 flags = 10 state = TRYLOCK segctord-4371 [001] ...1 68.261280: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800b889bdf8 count = 0 flags = 10 state = LOCK segctord-4371 [001] ...1 68.261877: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800b889bdf8 count = 1 flags = 10 state = BEGIN segctord-4371 [001] ...1 68.262116: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800b889bdf8 count = 0 flags = 18 state = COMMIT segctord-4371 [001] ...1 68.265032: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800b889bdf8 count = 0 flags = 18 state = UNLOCK segctord-4371 [001] ...1 132.376847: nilfs2_transaction_transition: sb = ffff8802112b8800 ti = ffff8800b889bdf8 count = 0 flags = 10 state = TRYLOCK This patch also does trivial cleaning of comma usage in collection stage transition event for consistent coding style. Signed-off-by: Hitoshi Mitake Signed-off-by: Ryusuke Konishi Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/segment.c | 33 ++++++++++++++++++++++++++- include/trace/events/nilfs2.h | 53 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index ef354043b87c..3fc47326fbe1 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -214,11 +214,18 @@ int nilfs_transaction_begin(struct super_block *sb, { struct the_nilfs *nilfs; int ret = nilfs_prepare_segment_lock(ti); + struct nilfs_transaction_info *trace_ti; if (unlikely(ret < 0)) return ret; - if (ret > 0) + if (ret > 0) { + trace_ti = current->journal_info; + + trace_nilfs2_transaction_transition(sb, trace_ti, + trace_ti->ti_count, trace_ti->ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; + } sb_start_intwrite(sb); @@ -229,6 +236,11 @@ int nilfs_transaction_begin(struct super_block *sb, ret = -ENOSPC; goto failed; } + + trace_ti = current->journal_info; + trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count, + trace_ti->ti_flags, + TRACE_NILFS2_TRANSACTION_BEGIN); return 0; failed: @@ -261,6 +273,8 @@ int nilfs_transaction_commit(struct super_block *sb) ti->ti_flags |= NILFS_TI_COMMIT; if (ti->ti_count > 0) { ti->ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); return 0; } if (nilfs->ns_writer) { @@ -272,6 +286,9 @@ int nilfs_transaction_commit(struct super_block *sb) nilfs_segctor_do_flush(sci, 0); } up_read(&nilfs->ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); + current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_SYNC) @@ -290,10 +307,15 @@ void nilfs_transaction_abort(struct super_block *sb) BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); if (ti->ti_count > 0) { ti->ti_count--; + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); return; } up_read(&nilfs->ns_segctor_sem); + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); + current->journal_info = ti->ti_save; if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) kmem_cache_free(nilfs_transaction_cachep, ti); @@ -339,6 +361,9 @@ static void nilfs_transaction_lock(struct super_block *sb, current->journal_info = ti; for (;;) { + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK); + down_write(&nilfs->ns_segctor_sem); if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) break; @@ -350,6 +375,9 @@ static void nilfs_transaction_lock(struct super_block *sb, } if (gcflag) ti->ti_flags |= NILFS_TI_GC; + + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK); } static void nilfs_transaction_unlock(struct super_block *sb) @@ -362,6 +390,9 @@ static void nilfs_transaction_unlock(struct super_block *sb) up_write(&nilfs->ns_segctor_sem); current->journal_info = ti->ti_save; + + trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, + ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK); } static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h index 573da00a486d..e5649ac211ca 100644 --- a/include/trace/events/nilfs2.h +++ b/include/trace/events/nilfs2.h @@ -42,6 +42,59 @@ TRACE_EVENT(nilfs2_collection_stage_transition, show_collection_stage(__entry->stage)) ); +#ifndef TRACE_HEADER_MULTI_READ +enum nilfs2_transaction_transition_state { + TRACE_NILFS2_TRANSACTION_BEGIN, + TRACE_NILFS2_TRANSACTION_COMMIT, + TRACE_NILFS2_TRANSACTION_ABORT, + TRACE_NILFS2_TRANSACTION_TRYLOCK, + TRACE_NILFS2_TRANSACTION_LOCK, + TRACE_NILFS2_TRANSACTION_UNLOCK, +}; +#endif + +#define show_transaction_state(type) \ + __print_symbolic(type, \ + { TRACE_NILFS2_TRANSACTION_BEGIN, "BEGIN" }, \ + { TRACE_NILFS2_TRANSACTION_COMMIT, "COMMIT" }, \ + { TRACE_NILFS2_TRANSACTION_ABORT, "ABORT" }, \ + { TRACE_NILFS2_TRANSACTION_TRYLOCK, "TRYLOCK" }, \ + { TRACE_NILFS2_TRANSACTION_LOCK, "LOCK" }, \ + { TRACE_NILFS2_TRANSACTION_UNLOCK, "UNLOCK" }) + +TRACE_EVENT(nilfs2_transaction_transition, + TP_PROTO(struct super_block *sb, + struct nilfs_transaction_info *ti, + int count, + unsigned int flags, + enum nilfs2_transaction_transition_state state), + + TP_ARGS(sb, ti, count, flags, state), + + TP_STRUCT__entry( + __field(void *, sb) + __field(void *, ti) + __field(int, count) + __field(unsigned int, flags) + __field(int, state) + ), + + TP_fast_assign( + __entry->sb = sb; + __entry->ti = ti; + __entry->count = count; + __entry->flags = flags; + __entry->state = state; + ), + + TP_printk("sb = %p ti = %p count = %d flags = %x state = %s", + __entry->sb, + __entry->ti, + __entry->count, + __entry->flags, + show_transaction_state(__entry->state)) +); + #endif /* _TRACE_NILFS2_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 83eec5e6dd10f0b1ab83ee660c8be883b3da7ba8 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Fri, 6 Nov 2015 16:32:05 -0800 Subject: nilfs2: add tracepoints for analyzing sufile manipulation This patch adds tracepoints which would be useful for analyzing segment usage from a perspective of high level sufile manipulation (check, alloc, free). sufile is an important in-place updated metadata file, so analyzing the behavior would be useful for performance turning. example of usage (a case of allocation): $ sudo bin/tpoint nilfs2:nilfs2_segment_usage_allocated Tracing nilfs2:nilfs2_segment_usage_allocated. Ctrl-C to end. segctord-17800 [002] ...1 10671.867294: nilfs2_segment_usage_allocated: sufile = ffff880054f908a8 segnum = 2 segctord-17800 [002] ...1 10675.073477: nilfs2_segment_usage_allocated: sufile = ffff880054f908a8 segnum = 3 Signed-off-by: Hitoshi Mitake Signed-off-by: Ryusuke Konishi Cc: Steven Rostedt Cc: Benixon Dhas Cc: TK Kato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/sufile.c | 8 ++++++ include/trace/events/nilfs2.h | 67 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 2a869c35c362..7ff8f15207ab 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -30,6 +30,8 @@ #include "mdt.h" #include "sufile.h" +#include + /** * struct nilfs_sufile_info - on-memory private data of sufile * @mi: on-memory private data of metadata file @@ -358,6 +360,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) break; /* never happens */ } } + trace_nilfs2_segment_usage_check(sufile, segnum, cnt); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &su_bh); if (ret < 0) @@ -388,6 +391,9 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) nilfs_mdt_mark_dirty(sufile); brelse(su_bh); *segnump = segnum; + + trace_nilfs2_segment_usage_allocated(sufile, segnum); + goto out_header; } @@ -490,6 +496,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, NILFS_SUI(sufile)->ncleansegs++; nilfs_mdt_mark_dirty(sufile); + + trace_nilfs2_segment_usage_freed(sufile, segnum); } /** diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h index e5649ac211ca..1b65ba687925 100644 --- a/include/trace/events/nilfs2.h +++ b/include/trace/events/nilfs2.h @@ -95,6 +95,73 @@ TRACE_EVENT(nilfs2_transaction_transition, show_transaction_state(__entry->state)) ); +TRACE_EVENT(nilfs2_segment_usage_check, + TP_PROTO(struct inode *sufile, + __u64 segnum, + unsigned long cnt), + + TP_ARGS(sufile, segnum, cnt), + + TP_STRUCT__entry( + __field(struct inode *, sufile) + __field(__u64, segnum) + __field(unsigned long, cnt) + ), + + TP_fast_assign( + __entry->sufile = sufile; + __entry->segnum = segnum; + __entry->cnt = cnt; + ), + + TP_printk("sufile = %p segnum = %llu cnt = %lu", + __entry->sufile, + __entry->segnum, + __entry->cnt) +); + +TRACE_EVENT(nilfs2_segment_usage_allocated, + TP_PROTO(struct inode *sufile, + __u64 segnum), + + TP_ARGS(sufile, segnum), + + TP_STRUCT__entry( + __field(struct inode *, sufile) + __field(__u64, segnum) + ), + + TP_fast_assign( + __entry->sufile = sufile; + __entry->segnum = segnum; + ), + + TP_printk("sufile = %p segnum = %llu", + __entry->sufile, + __entry->segnum) +); + +TRACE_EVENT(nilfs2_segment_usage_freed, + TP_PROTO(struct inode *sufile, + __u64 segnum), + + TP_ARGS(sufile, segnum), + + TP_STRUCT__entry( + __field(struct inode *, sufile) + __field(__u64, segnum) + ), + + TP_fast_assign( + __entry->sufile = sufile; + __entry->segnum = segnum; + ), + + TP_printk("sufile = %p segnum = %llu", + __entry->sufile, + __entry->segnum) +); + #endif /* _TRACE_NILFS2_H */ /* This part must be outside protection */ -- cgit v1.2.3 From a9cd207c23ca4fa5bd5f1092e867e87542e349a3 Mon Sep 17 00:00:00 2001 From: Hitoshi Mitake Date: Fri, 6 Nov 2015 16:32:08 -0800 Subject: nilfs2: add tracepoints for analyzing reading and writing metadata files This patch adds tracepoints for analyzing requests of reading and writing metadata files. The tracepoints cover every in-place mdt files (cpfile, sufile, and datfile). Example of tracing mdt_insert_new_block(): cp-14635 [000] ...1 30598.199309: nilfs2_mdt_insert_new_block: inode = ffff88022a8d0178 ino = 3 block = 155 cp-14635 [000] ...1 30598.199520: nilfs2_mdt_insert_new_block: inode = ffff88022a8d0178 ino = 3 block = 5 cp-14635 [000] ...1 30598.200828: nilfs2_mdt_insert_new_block: inode = ffff88022a8d0178 ino = 3 block = 253 Signed-off-by: Hitoshi Mitake Signed-off-by: Ryusuke Konishi Cc: Steven Rostedt Cc: TK Kato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/mdt.c | 6 +++++ include/trace/events/nilfs2.h | 54 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index dee34d990281..1125f40233ff 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -33,6 +33,7 @@ #include "page.h" #include "mdt.h" +#include #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) @@ -68,6 +69,9 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_uptodate(bh); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(inode); + + trace_nilfs2_mdt_insert_new_block(inode, inode->i_ino, block); + return 0; } @@ -158,6 +162,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, get_bh(bh); submit_bh(mode, bh); ret = 0; + + trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); out: get_bh(bh); *out_bh = bh; diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h index 1b65ba687925..c7805818fcc6 100644 --- a/include/trace/events/nilfs2.h +++ b/include/trace/events/nilfs2.h @@ -162,6 +162,60 @@ TRACE_EVENT(nilfs2_segment_usage_freed, __entry->segnum) ); +TRACE_EVENT(nilfs2_mdt_insert_new_block, + TP_PROTO(struct inode *inode, + unsigned long ino, + unsigned long block), + + TP_ARGS(inode, ino, block), + + TP_STRUCT__entry( + __field(struct inode *, inode) + __field(unsigned long, ino) + __field(unsigned long, block) + ), + + TP_fast_assign( + __entry->inode = inode; + __entry->ino = ino; + __entry->block = block; + ), + + TP_printk("inode = %p ino = %lu block = %lu", + __entry->inode, + __entry->ino, + __entry->block) +); + +TRACE_EVENT(nilfs2_mdt_submit_block, + TP_PROTO(struct inode *inode, + unsigned long ino, + unsigned long blkoff, + int mode), + + TP_ARGS(inode, ino, blkoff, mode), + + TP_STRUCT__entry( + __field(struct inode *, inode) + __field(unsigned long, ino) + __field(unsigned long, blkoff) + __field(int, mode) + ), + + TP_fast_assign( + __entry->inode = inode; + __entry->ino = ino; + __entry->blkoff = blkoff; + __entry->mode = mode; + ), + + TP_printk("inode = %p ino = %lu blkoff = %lu mode = %x", + __entry->inode, + __entry->ino, + __entry->blkoff, + __entry->mode) +); + #endif /* _TRACE_NILFS2_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 09ef29e0f6ac9f08ba4cc501ab4a3c33be526343 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:32:14 -0800 Subject: nilfs2: fix gcc unused-but-set-variable warnings Fix the following build warnings: $ make W=1 [...] CC [M] fs/nilfs2/btree.o fs/nilfs2/btree.c: In function 'nilfs_btree_split': fs/nilfs2/btree.c:923:8: warning: variable 'newptr' set but not used [-Wunused-but-set-variable] __u64 newptr; ^ fs/nilfs2/btree.c:922:8: warning: variable 'newkey' set but not used [-Wunused-but-set-variable] __u64 newkey; ^ CC [M] fs/nilfs2/dat.o fs/nilfs2/dat.c: In function 'nilfs_dat_prepare_end': fs/nilfs2/dat.c:158:8: warning: variable 'start' set but not used [-Wunused-but-set-variable] __u64 start; ^ CC [M] fs/nilfs2/segment.o fs/nilfs2/segment.c: In function 'nilfs_segctor_do_immediate_flush': fs/nilfs2/segment.c:2433:6: warning: variable 'err' set but not used [-Wunused-but-set-variable] int err; ^ CC [M] fs/nilfs2/sufile.o fs/nilfs2/sufile.c: In function 'nilfs_sufile_alloc': fs/nilfs2/sufile.c:320:27: warning: variable 'ncleansegs' set but not used [-Wunused-but-set-variable] unsigned long nsegments, ncleansegs, nsus, cnt; ^ CC [M] fs/nilfs2/alloc.o fs/nilfs2/alloc.c: In function 'nilfs_palloc_prepare_alloc_entry': fs/nilfs2/alloc.c:478:38: warning: variable 'groups_per_desc_block' set but not used [-Wunused-but-set-variable] unsigned long n, entries_per_group, groups_per_desc_block; ^ Signed-off-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/alloc.c | 3 +-- fs/nilfs2/btree.c | 5 ----- fs/nilfs2/dat.c | 2 -- fs/nilfs2/segment.c | 3 +-- fs/nilfs2/sufile.c | 3 +-- 5 files changed, 3 insertions(+), 13 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index b335a32e9561..2ccbf5531554 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -514,7 +514,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, void *desc_kaddr, *bitmap_kaddr; unsigned long group, maxgroup, ngroups; unsigned long group_offset, maxgroup_offset; - unsigned long n, entries_per_group, groups_per_desc_block; + unsigned long n, entries_per_group; unsigned long i, j; spinlock_t *lock; int pos, ret; @@ -523,7 +523,6 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, maxgroup = ngroups - 1; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); entries_per_group = nilfs_palloc_entries_per_group(inode); - groups_per_desc_block = nilfs_palloc_groups_per_desc_block(inode); for (i = 0; i < ngroups; i += n) { if (group >= ngroups) { diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 919fd5bb14a8..f609a8532ec5 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -919,8 +919,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - __u64 newkey; - __u64 newptr; int nchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); @@ -942,9 +940,6 @@ static void nilfs_btree_split(struct nilfs_bmap *btree, if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); - newkey = nilfs_btree_node_get_key(right, 0); - newptr = path[level].bp_newreq.bpr_ptr; - if (move) { path[level].bp_index -= nilfs_btree_node_get_nchildren(node); nilfs_btree_node_insert(right, path[level].bp_index, diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 0d5fada91191..7dc23f100e57 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -155,7 +155,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; - __u64 start; sector_t blocknr; void *kaddr; int ret; @@ -169,7 +168,6 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) kaddr = kmap_atomic(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); - start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); kunmap_atomic(kaddr); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 3fc47326fbe1..3b65adaae7e4 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -2492,7 +2492,6 @@ static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) { int mode = 0; - int err; spin_lock(&sci->sc_state_lock); mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? @@ -2500,7 +2499,7 @@ static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) spin_unlock(&sci->sc_state_lock); if (mode) { - err = nilfs_segctor_do_construct(sci, mode); + nilfs_segctor_do_construct(sci, mode); spin_lock(&sci->sc_state_lock); sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 7ff8f15207ab..52821ffc11f4 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -319,7 +319,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) size_t susz = NILFS_MDT(sufile)->mi_entry_size; __u64 segnum, maxsegnum, last_alloc; void *kaddr; - unsigned long nsegments, ncleansegs, nsus, cnt; + unsigned long nsegments, nsus, cnt; int ret, j; down_write(&NILFS_MDT(sufile)->mi_sem); @@ -329,7 +329,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) goto out_sem; kaddr = kmap_atomic(header_bh->b_page); header = kaddr + bh_offset(header_bh); - ncleansegs = le64_to_cpu(header->sh_ncleansegs); last_alloc = le64_to_cpu(header->sh_last_alloc); kunmap_atomic(kaddr); -- cgit v1.2.3 From 4f05028f8d1af782cfd03d09e0a052e9745dc5ad Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2015 16:32:16 -0800 Subject: nilfs2: fix gcc uninitialized-variable warnings in powerpc build Some false positive warnings are reported for powerpc build. The following warnings are reported in http://kisskb.ellerman.id.au/kisskb/buildresult/12519703/ CC fs/nilfs2/super.o fs/nilfs2/super.c: In function 'nilfs_resize_fs': fs/nilfs2/super.c:376:2: warning: 'blocknr' may be used uninitialized in this function [-Wuninitialized] fs/nilfs2/super.c:362:11: note: 'blocknr' was declared here CC fs/nilfs2/recovery.o fs/nilfs2/recovery.c: In function 'nilfs_salvage_orphan_logs': fs/nilfs2/recovery.c:631:21: warning: 'sum' may be used uninitialized in this function [-Wuninitialized] fs/nilfs2/recovery.c:585:32: note: 'sum' was declared here fs/nilfs2/recovery.c: In function 'nilfs_search_super_root': fs/nilfs2/recovery.c:873:11: warning: 'sum' may be used uninitialized in this function [-Wuninitialized] Another similar warning is reported in http://kisskb.ellerman.id.au/kisskb/buildresult/12520079/ CC fs/nilfs2/btree.o fs/nilfs2/btree.c: In function 'nilfs_btree_convert_and_insert': include/asm-generic/bitops/non-atomic.h:105:20: warning: 'bh' may be used uninitialized in this function [-Wuninitialized] fs/nilfs2/btree.c:1859:22: note: 'bh' was declared here This cleans out these warnings by forcing the variables to be initialized. Signed-off-by: Ryusuke Konishi Reported-by: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/btree.c | 2 +- fs/nilfs2/recovery.c | 4 ++-- fs/nilfs2/super.c | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/nilfs2') diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index f609a8532ec5..3a3821b00486 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1851,7 +1851,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n) { - struct buffer_head *bh; + struct buffer_head *bh = NULL; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; struct nilfs_bmap_stats stats; int ret; diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ff00a0b7acb9..9b4f205d1173 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -582,7 +582,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { struct buffer_head *bh_sum = NULL; - struct nilfs_segment_summary *sum; + struct nilfs_segment_summary *sum = NULL; sector_t pseg_start; sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ unsigned long nsalvaged_blocks = 0; @@ -814,7 +814,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { struct buffer_head *bh_sum = NULL; - struct nilfs_segment_summary *sum; + struct nilfs_segment_summary *sum = NULL; sector_t pseg_start, pseg_end, sr_pseg_start = 0; sector_t seg_start, seg_end; /* range of full segment (block number) */ sector_t b, end; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index c69455a543bc..354013ea22ec 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -361,7 +361,7 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) struct nilfs_super_block *nsbp; sector_t blocknr, newblocknr; unsigned long offset; - int sb2i = -1; /* array index of the secondary superblock */ + int sb2i; /* array index of the secondary superblock */ int ret = 0; /* nilfs->ns_sem must be locked by the caller. */ @@ -372,6 +372,9 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off) } else if (nilfs->ns_sbh[0]->b_blocknr > nilfs->ns_first_data_block) { sb2i = 0; blocknr = nilfs->ns_sbh[0]->b_blocknr; + } else { + sb2i = -1; + blocknr = 0; } if (sb2i >= 0 && (u64)blocknr << nilfs->ns_blocksize_bits == sb2off) goto out; /* super block location is unchanged */ -- cgit v1.2.3