From dc89e9824464e91fa0b06267864ceabe3186fd8b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Jan 2011 17:05:48 -0500 Subject: Btrfs: use a slab for the free space entries Since we alloc/free free space entries a whole lot, lets use a slab to keep track of them. This makes some of my tests slightly faster. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7f78cc78fdd0..2c98d209e6ac 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -40,6 +40,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep; extern struct kmem_cache *btrfs_transaction_cachep; extern struct kmem_cache *btrfs_bit_radix_cachep; extern struct kmem_cache *btrfs_path_cachep; +extern struct kmem_cache *btrfs_free_space_cachep; struct btrfs_ordered_sum; #define BTRFS_MAGIC "_BHRfS_M" -- cgit v1.2.3 From a41ad394a03b802497958d7c98a9dcf607266645 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Jan 2011 15:30:16 -0500 Subject: Btrfs: convert to the new truncate sequence ->truncate() is going away, instead all of the work needs to be done in ->setattr(). So this converts us over to do this. It's fairly straightforward, just get rid of our .truncate inode operation and call btrfs_truncate() directly from btrfs_setsize. This works out better for us since truncate can technically return ENOSPC, and before we had no way of letting anybody know. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 +- fs/btrfs/file.c | 5 ++-- fs/btrfs/inode.c | 80 +++++++++++++++++++++++++------------------------------- 3 files changed, 40 insertions(+), 47 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c98d209e6ac..34142d5647df 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2537,7 +2537,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending); void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); -int btrfs_cont_expand(struct inode *inode, loff_t size); +int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size); int btrfs_invalidate_inodes(struct btrfs_root *root); void btrfs_add_delayed_iput(struct inode *inode); void btrfs_run_delayed_iputs(struct btrfs_root *root); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 24a19c2743ca..3786eca2a905 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -817,7 +817,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; if (start_pos > inode->i_size) { - err = btrfs_cont_expand(inode, start_pos); + err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); if (err) return err; } @@ -1330,7 +1330,8 @@ static long btrfs_fallocate(struct file *file, int mode, goto out; if (alloc_start > inode->i_size) { - ret = btrfs_cont_expand(inode, alloc_start); + ret = btrfs_cont_expand(inode, i_size_read(inode), + alloc_start); if (ret) goto out; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2d2e079713d7..3662ffec17d9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -84,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, }; -static void btrfs_truncate(struct inode *inode); +static int btrfs_setsize(struct inode *inode, loff_t newsize); +static int btrfs_truncate(struct inode *inode); static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end); static noinline int cow_file_range(struct inode *inode, struct page *locked_page, @@ -2371,6 +2372,11 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { + if (!S_ISREG(inode->i_mode)) { + WARN_ON(1); + iput(inode); + continue; + } nr_truncate++; btrfs_truncate(inode); } else { @@ -3538,7 +3544,7 @@ out: return ret; } -int btrfs_cont_expand(struct inode *inode, loff_t size) +int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -3546,7 +3552,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) struct extent_map *em = NULL; struct extent_state *cached_state = NULL; u64 mask = root->sectorsize - 1; - u64 hole_start = (inode->i_size + mask) & ~mask; + u64 hole_start = (oldsize + mask) & ~mask; u64 block_end = (size + mask) & ~mask; u64 last_byte; u64 cur_offset; @@ -3617,27 +3623,17 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) return err; } -static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) +static int btrfs_setsize(struct inode *inode, loff_t newsize) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; + loff_t oldsize = i_size_read(inode); unsigned long nr; int ret; - if (attr->ia_size == inode->i_size) + if (newsize == oldsize) return 0; - if (attr->ia_size > inode->i_size) { - unsigned long limit; - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; - if (attr->ia_size > inode->i_sb->s_maxbytes) - return -EFBIG; - if (limit != RLIM_INFINITY && attr->ia_size > limit) { - send_sig(SIGXFSZ, current, 0); - return -EFBIG; - } - } - trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) return PTR_ERR(trans); @@ -3651,16 +3647,16 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - if (attr->ia_size > inode->i_size) { - ret = btrfs_cont_expand(inode, attr->ia_size); + if (newsize > oldsize) { + i_size_write(inode, newsize); + btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); + truncate_pagecache(inode, oldsize, newsize); + ret = btrfs_cont_expand(inode, oldsize, newsize); if (ret) { - btrfs_truncate(inode); + btrfs_setsize(inode, oldsize); return ret; } - i_size_write(inode, attr->ia_size); - btrfs_ordered_update_i_size(inode, inode->i_size, NULL); - trans = btrfs_start_transaction(root, 0); BUG_ON(IS_ERR(trans)); btrfs_set_trans_block_group(trans, inode); @@ -3676,22 +3672,22 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); - return 0; - } + } else { - /* - * We're truncating a file that used to have good data down to - * zero. Make sure it gets into the ordered flush list so that - * any new writes get down to disk quickly. - */ - if (attr->ia_size == 0) - BTRFS_I(inode)->ordered_data_close = 1; + /* + * We're truncating a file that used to have good data down to + * zero. Make sure it gets into the ordered flush list so that + * any new writes get down to disk quickly. + */ + if (newsize == 0) + BTRFS_I(inode)->ordered_data_close = 1; - /* we don't support swapfiles, so vmtruncate shouldn't fail */ - ret = vmtruncate(inode, attr->ia_size); - BUG_ON(ret); + /* we don't support swapfiles, so vmtruncate shouldn't fail */ + truncate_setsize(inode, newsize); + ret = btrfs_truncate(inode); + } - return 0; + return ret; } static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -3708,7 +3704,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - err = btrfs_setattr_size(inode, attr); + err = btrfs_setsize(inode, attr->ia_size); if (err) return err; } @@ -6478,7 +6474,7 @@ out: return ret; } -static void btrfs_truncate(struct inode *inode) +static int btrfs_truncate(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; @@ -6486,14 +6482,9 @@ static void btrfs_truncate(struct inode *inode) unsigned long nr; u64 mask = root->sectorsize - 1; - if (!S_ISREG(inode->i_mode)) { - WARN_ON(1); - return; - } - ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) - return; + return ret; btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); btrfs_ordered_update_i_size(inode, inode->i_size, NULL); @@ -6568,6 +6559,8 @@ static void btrfs_truncate(struct inode *inode) ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); btrfs_btree_balance_dirty(root, nr); + + return ret; } /* @@ -7367,7 +7360,6 @@ static const struct address_space_operations btrfs_symlink_aops = { }; static const struct inode_operations btrfs_file_inode_operations = { - .truncate = btrfs_truncate, .getattr = btrfs_getattr, .setattr = btrfs_setattr, .setxattr = btrfs_setxattr, -- cgit v1.2.3 From 66b4ffd110f9b48b8d8c1319ee446b53b8d073bf Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 31 Jan 2011 16:22:42 -0500 Subject: Btrfs: handle errors in btrfs_orphan_cleanup If we cannot truncate an inode for some reason we will never delete the orphan item associated with that inode, which means that we will loop forever in btrfs_orphan_cleanup. Instead of doing this just return error so we fail to mount. It sucks, but hey it's better than hanging. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 15 ++++++++++++--- fs/btrfs/extent-tree.c | 3 ++- fs/btrfs/inode.c | 47 +++++++++++++++++++++++++++++++---------------- fs/btrfs/ioctl.c | 4 +++- fs/btrfs/relocation.c | 2 +- 6 files changed, 50 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 34142d5647df..841330f3d68d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2529,7 +2529,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); -void btrfs_orphan_cleanup(struct btrfs_root *root); +int btrfs_orphan_cleanup(struct btrfs_root *root); void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, struct btrfs_pending_snapshot *pending, u64 *bytes_to_reserve); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e1aa8d607bc7..495b1ac45f8c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2058,9 +2058,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, if (!(sb->s_flags & MS_RDONLY)) { down_read(&fs_info->cleanup_work_sem); - btrfs_orphan_cleanup(fs_info->fs_root); - btrfs_orphan_cleanup(fs_info->tree_root); + err = btrfs_orphan_cleanup(fs_info->fs_root); + if (!err) + err = btrfs_orphan_cleanup(fs_info->tree_root); up_read(&fs_info->cleanup_work_sem); + if (err) { + close_ctree(tree_root); + return ERR_PTR(err); + } } return tree_root; @@ -2435,8 +2440,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) root_objectid = gang[ret - 1]->root_key.objectid + 1; for (i = 0; i < ret; i++) { + int err; + root_objectid = gang[i]->root_key.objectid; - btrfs_orphan_cleanup(gang[i]); + err = btrfs_orphan_cleanup(gang[i]); + if (err) + return err; } root_objectid++; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 27376c97d85f..a8f4e8d2ba60 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7619,7 +7619,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root) reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); BUG_ON(!reloc_root); - btrfs_orphan_cleanup(reloc_root); + ret = btrfs_orphan_cleanup(reloc_root); + BUG_ON(ret); return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d83025063ee7..0600265cb9b0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2284,7 +2284,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) * this cleans up any orphans that may be left on the list from the last use * of this root. */ -void btrfs_orphan_cleanup(struct btrfs_root *root) +int btrfs_orphan_cleanup(struct btrfs_root *root) { struct btrfs_path *path; struct extent_buffer *leaf; @@ -2294,10 +2294,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) int ret = 0, nr_unlink = 0, nr_truncate = 0; if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) - return; + return 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + ret = -ENOMEM; + goto out; + } path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; @@ -2306,11 +2309,8 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) { - printk(KERN_ERR "Error searching slot for orphan: %d" - "\n", ret); - break; - } + if (ret < 0) + goto out; /* * if ret == 0 means we found what we were searching for, which @@ -2318,6 +2318,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) * find the key and see if we have stuff that matches */ if (ret > 0) { + ret = 0; if (path->slots[0] == 0) break; path->slots[0]--; @@ -2345,7 +2346,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); - BUG_ON(IS_ERR(inode)); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } /* * add this inode to the orphan list so btrfs_orphan_del does @@ -2363,7 +2367,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) */ if (is_bad_inode(inode)) { trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } btrfs_orphan_del(trans, inode); btrfs_end_transaction(trans, root); iput(inode); @@ -2378,16 +2385,16 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; - btrfs_truncate(inode); + ret = btrfs_truncate(inode); } else { nr_unlink++; } /* this will do delete_inode and everything for us */ iput(inode); + if (ret) + goto out; } - btrfs_free_path(path); - root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; if (root->orphan_block_rsv) @@ -2396,14 +2403,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) if (root->orphan_block_rsv || root->orphan_item_inserted) { trans = btrfs_join_transaction(root, 1); - BUG_ON(IS_ERR(trans)); - btrfs_end_transaction(trans, root); + if (!IS_ERR(trans)) + btrfs_end_transaction(trans, root); } if (nr_unlink) printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); if (nr_truncate) printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); + +out: + if (ret) + printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret); + btrfs_free_path(path); + return ret; } /* @@ -4156,8 +4169,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) if (!IS_ERR(inode) && root != sub_root) { down_read(&root->fs_info->cleanup_work_sem); if (!(inode->i_sb->s_flags & MS_RDONLY)) - btrfs_orphan_cleanup(sub_root); + ret = btrfs_orphan_cleanup(sub_root); up_read(&root->fs_info->cleanup_work_sem); + if (ret) + inode = ERR_PTR(ret); } return inode; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5fdb2abc4fa7..ad9b8c0e930b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -409,7 +409,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, if (ret) goto fail; - btrfs_orphan_cleanup(pending_snapshot->snap); + ret = btrfs_orphan_cleanup(pending_snapshot->snap); + if (ret) + goto fail; parent = dget_parent(dentry); inode = btrfs_lookup_dentry(parent->d_inode, dentry); diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 31ade5802ae8..c863c8447015 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4209,7 +4209,7 @@ out: if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); else - btrfs_orphan_cleanup(fs_root); + err = btrfs_orphan_cleanup(fs_root); } return err; } -- cgit v1.2.3 From 22a94d44bd6876a90630338229da6c0436d46593 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 16 Mar 2011 16:47:17 -0400 Subject: Btrfs: add checks to verify dir items are correct We need to make sure the dir items we get are valid dir items. So any time we try and read one check it with verify_dir_item, which will do various sanity checks to make sure it looks sane. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 +++ fs/btrfs/dir-item.c | 35 +++++++++++++++++++++++++++++++++++ fs/btrfs/inode.c | 3 +++ fs/btrfs/tree-log.c | 7 +++++++ fs/btrfs/xattr.c | 2 ++ 5 files changed, 50 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 841330f3d68d..6036fdb88c53 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2393,6 +2393,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, struct btrfs_path *path, u64 dir, const char *name, u16 name_len, int mod); +int verify_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_dir_item *dir_item); /* orphan.c */ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index f0cad5ae5be7..02c97ad61b6d 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -377,6 +377,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, leaf = path->nodes[0]; dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item); + if (verify_dir_item(root, leaf, dir_item)) + return NULL; + total_len = btrfs_item_size_nr(leaf, path->slots[0]); while (cur < total_len) { this_len = sizeof(*dir_item) + @@ -429,3 +432,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans, } return ret; } + +int verify_dir_item(struct btrfs_root *root, + struct extent_buffer *leaf, + struct btrfs_dir_item *dir_item) +{ + u16 namelen = BTRFS_NAME_LEN; + u8 type = btrfs_dir_type(leaf, dir_item); + + if (type >= BTRFS_FT_MAX) { + printk(KERN_CRIT "btrfs: invalid dir item type: %d\n", + (int)type); + return 1; + } + + if (type == BTRFS_FT_XATTR) + namelen = XATTR_NAME_MAX; + + if (btrfs_dir_name_len(leaf, dir_item) > namelen) { + printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n", + (unsigned)btrfs_dir_data_len(leaf, dir_item)); + return 1; + } + + /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */ + if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) { + printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n", + (unsigned)btrfs_dir_data_len(leaf, dir_item)); + return 1; + } + + return 0; +} diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 888dbdb3b128..e010000d4bc9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4272,6 +4272,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; + if (verify_dir_item(root, leaf, di)) + break; + name_len = btrfs_dir_name_len(leaf, di); if (name_len <= sizeof(tmp_name)) { name_ptr = tmp_name; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a4bbb854dfd2..429cfcfadf90 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1286,6 +1286,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans, ptr_end = ptr + item_size; while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; + if (verify_dir_item(root, eb, di)) + return -EIO; name_len = btrfs_dir_name_len(eb, di); ret = replay_one_name(trans, root, path, eb, di, key); BUG_ON(ret); @@ -1412,6 +1414,11 @@ again: ptr_end = ptr + item_size; while (ptr < ptr_end) { di = (struct btrfs_dir_item *)ptr; + if (verify_dir_item(root, eb, di)) { + ret = -EIO; + goto out; + } + name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); if (!name) { diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index a5776531dc2b..e5d22f280956 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -242,6 +242,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) break; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); + if (verify_dir_item(root, leaf, di)) + continue; name_len = btrfs_dir_name_len(leaf, di); total_size += name_len + 1; -- cgit v1.2.3 From 4e69b598f6cfb0940b75abf7e179d6020e94ad1e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 21 Mar 2011 10:11:24 -0400 Subject: Btrfs: cleanup how we setup free space clusters This patch makes the free space cluster refilling code a little easier to understand, and fixes some things with the bitmap part of it. Currently we either want to refill a cluster with 1) All normal extent entries (those without bitmaps) 2) A bitmap entry with enough space The current code has this ugly jump around logic that will first try and fill up the cluster with extent entries and then if it can't do that it will try and find a bitmap to use. So instead split this out into two functions, one that tries to find only normal entries, and one that tries to find bitmaps. This also fixes a suboptimal thing we would do with bitmaps. If we used a bitmap we would just tell the cluster that we were pointing at a bitmap and it would do the tree search in the block group for that entry every time we tried to make an allocation. Instead of doing that now we just add it to the clusters group. I tested this with my ENOSPC tests and xfstests and it survived. Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 - fs/btrfs/free-space-cache.c | 364 ++++++++++++++++++++++---------------------- 2 files changed, 182 insertions(+), 185 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6036fdb88c53..0ee679b6c1b7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -783,9 +783,6 @@ struct btrfs_free_cluster { /* first extent starting offset */ u64 window_start; - /* if this cluster simply points at a bitmap in the block group */ - bool points_to_bitmap; - struct btrfs_block_group_cache *block_group; /* * when a cluster is allocated from a block group, we put the diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4ab35ea0443f..f03ef97c3b21 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1644,30 +1644,28 @@ __btrfs_return_cluster_to_free_space( { struct btrfs_free_space *entry; struct rb_node *node; - bool bitmap; spin_lock(&cluster->lock); if (cluster->block_group != block_group) goto out; - bitmap = cluster->points_to_bitmap; cluster->block_group = NULL; cluster->window_start = 0; list_del_init(&cluster->block_group_list); - cluster->points_to_bitmap = false; - - if (bitmap) - goto out; node = rb_first(&cluster->root); while (node) { + bool bitmap; + entry = rb_entry(node, struct btrfs_free_space, offset_index); node = rb_next(&entry->offset_index); rb_erase(&entry->offset_index, &cluster->root); - BUG_ON(entry->bitmap); - try_merge_free_space(block_group, entry, false); + + bitmap = (entry->bitmap != NULL); + if (!bitmap) + try_merge_free_space(block_group, entry, false); tree_insert_offset(&block_group->free_space_offset, - entry->offset, &entry->offset_index, 0); + entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; @@ -1790,50 +1788,24 @@ int btrfs_return_cluster_to_free_space( static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, + struct btrfs_free_space *entry, u64 bytes, u64 min_start) { - struct btrfs_free_space *entry; int err; u64 search_start = cluster->window_start; u64 search_bytes = bytes; u64 ret = 0; - spin_lock(&block_group->tree_lock); - spin_lock(&cluster->lock); - - if (!cluster->points_to_bitmap) - goto out; - - if (cluster->block_group != block_group) - goto out; - - /* - * search_start is the beginning of the bitmap, but at some point it may - * be a good idea to point to the actual start of the free area in the - * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only - * to 1 to make sure we get the bitmap entry - */ - entry = tree_search_offset(block_group, - offset_to_bitmap(block_group, search_start), - 1, 0); - if (!entry || !entry->bitmap) - goto out; - search_start = min_start; search_bytes = bytes; err = search_bitmap(block_group, entry, &search_start, &search_bytes); if (err) - goto out; + return 0; ret = search_start; bitmap_clear_bits(block_group, entry, ret, bytes); - if (entry->bytes == 0) - free_bitmap(block_group, entry); -out: - spin_unlock(&cluster->lock); - spin_unlock(&block_group->tree_lock); return ret; } @@ -1851,10 +1823,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, struct rb_node *node; u64 ret = 0; - if (cluster->points_to_bitmap) - return btrfs_alloc_from_bitmap(block_group, cluster, bytes, - min_start); - spin_lock(&cluster->lock); if (bytes > cluster->max_size) goto out; @@ -1867,9 +1835,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, goto out; entry = rb_entry(node, struct btrfs_free_space, offset_index); - while(1) { - if (entry->bytes < bytes || entry->offset < min_start) { + if (entry->bytes < bytes || + (!entry->bitmap && entry->offset < min_start)) { struct rb_node *node; node = rb_next(&entry->offset_index); @@ -1879,10 +1847,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, offset_index); continue; } - ret = entry->offset; - entry->offset += bytes; - entry->bytes -= bytes; + if (entry->bitmap) { + ret = btrfs_alloc_from_bitmap(block_group, + cluster, entry, bytes, + min_start); + if (ret == 0) { + struct rb_node *node; + node = rb_next(&entry->offset_index); + if (!node) + break; + entry = rb_entry(node, struct btrfs_free_space, + offset_index); + continue; + } + } else { + + ret = entry->offset; + + entry->offset += bytes; + entry->bytes -= bytes; + } if (entry->bytes == 0) rb_erase(&entry->offset_index, &cluster->root); @@ -1899,6 +1884,11 @@ out: block_group->free_space -= bytes; if (entry->bytes == 0) { block_group->free_extents--; + if (entry->bitmap) { + kfree(entry->bitmap); + block_group->total_bitmaps--; + recalculate_thresholds(block_group); + } kmem_cache_free(btrfs_free_space_cachep, entry); } @@ -1919,6 +1909,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, unsigned long found_bits; unsigned long start = 0; unsigned long total_found = 0; + int ret; bool found = false; i = offset_to_bit(entry->offset, block_group->sectorsize, @@ -1941,7 +1932,7 @@ again: } if (!found_bits) - return -1; + return -ENOSPC; if (!found) { start = i; @@ -1965,11 +1956,144 @@ again: cluster->window_start = start * block_group->sectorsize + entry->offset; - cluster->points_to_bitmap = true; + rb_erase(&entry->offset_index, &block_group->free_space_offset); + ret = tree_insert_offset(&cluster->root, entry->offset, + &entry->offset_index, 1); + BUG_ON(ret); return 0; } +/* + * This searches the block group for just extents to fill the cluster with. + */ +static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + u64 offset, u64 bytes, u64 min_bytes) +{ + struct btrfs_free_space *first = NULL; + struct btrfs_free_space *entry = NULL; + struct btrfs_free_space *prev = NULL; + struct btrfs_free_space *last; + struct rb_node *node; + u64 window_start; + u64 window_free; + u64 max_extent; + u64 max_gap = 128 * 1024; + + entry = tree_search_offset(block_group, offset, 0, 1); + if (!entry) + return -ENOSPC; + + /* + * We don't want bitmaps, so just move along until we find a normal + * extent entry. + */ + while (entry->bitmap) { + node = rb_next(&entry->offset_index); + if (!node) + return -ENOSPC; + entry = rb_entry(node, struct btrfs_free_space, offset_index); + } + + window_start = entry->offset; + window_free = entry->bytes; + max_extent = entry->bytes; + first = entry; + last = entry; + prev = entry; + + while (window_free <= min_bytes) { + node = rb_next(&entry->offset_index); + if (!node) + return -ENOSPC; + entry = rb_entry(node, struct btrfs_free_space, offset_index); + + if (entry->bitmap) + continue; + /* + * we haven't filled the empty size and the window is + * very large. reset and try again + */ + if (entry->offset - (prev->offset + prev->bytes) > max_gap || + entry->offset - window_start > (min_bytes * 2)) { + first = entry; + window_start = entry->offset; + window_free = entry->bytes; + last = entry; + max_extent = entry->bytes; + } else { + last = entry; + window_free += entry->bytes; + if (entry->bytes > max_extent) + max_extent = entry->bytes; + } + prev = entry; + } + + cluster->window_start = first->offset; + + node = &first->offset_index; + + /* + * now we've found our entries, pull them out of the free space + * cache and put them into the cluster rbtree + */ + do { + int ret; + + entry = rb_entry(node, struct btrfs_free_space, offset_index); + node = rb_next(&entry->offset_index); + if (entry->bitmap) + continue; + + rb_erase(&entry->offset_index, &block_group->free_space_offset); + ret = tree_insert_offset(&cluster->root, entry->offset, + &entry->offset_index, 0); + BUG_ON(ret); + } while (node && entry != last); + + cluster->max_size = max_extent; + + return 0; +} + +/* + * This specifically looks for bitmaps that may work in the cluster, we assume + * that we have already failed to find extents that will work. + */ +static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + u64 offset, u64 bytes, u64 min_bytes) +{ + struct btrfs_free_space *entry; + struct rb_node *node; + int ret = -ENOSPC; + + if (block_group->total_bitmaps == 0) + return -ENOSPC; + + entry = tree_search_offset(block_group, + offset_to_bitmap(block_group, offset), + 0, 1); + if (!entry) + return -ENOSPC; + + node = &entry->offset_index; + do { + entry = rb_entry(node, struct btrfs_free_space, offset_index); + node = rb_next(&entry->offset_index); + if (!entry->bitmap) + continue; + if (entry->bytes < min_bytes) + continue; + ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, + bytes, min_bytes); + } while (ret && node); + + return ret; +} + /* * here we try to find a cluster of blocks in a block group. The goal * is to find at least bytes free and up to empty_size + bytes free. @@ -1984,15 +2108,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size) { - struct btrfs_free_space *entry = NULL; - struct rb_node *node; - struct btrfs_free_space *next; - struct btrfs_free_space *last = NULL; u64 min_bytes; - u64 window_start; - u64 window_free; - u64 max_extent = 0; - bool found_bitmap = false; int ret; /* for metadata, allow allocates with more holes */ @@ -2029,134 +2145,19 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ret = 0; goto out; } -again: - entry = tree_search_offset(block_group, offset, found_bitmap, 1); - if (!entry) { - ret = -ENOSPC; - goto out; - } - - /* - * If found_bitmap is true, we exhausted our search for extent entries, - * and we just want to search all of the bitmaps that we can find, and - * ignore any extent entries we find. - */ - while (entry->bitmap || found_bitmap || - (!entry->bitmap && entry->bytes < min_bytes)) { - struct rb_node *node = rb_next(&entry->offset_index); - - if (entry->bitmap && entry->bytes > bytes + empty_size) { - ret = btrfs_bitmap_cluster(block_group, entry, cluster, - offset, bytes, min_bytes); - if (!ret) - goto got_it; - } - - if (!node) { - ret = -ENOSPC; - goto out; - } - entry = rb_entry(node, struct btrfs_free_space, offset_index); - } - - /* - * We already searched all the extent entries from the passed in offset - * to the end and didn't find enough space for the cluster, and we also - * didn't find any bitmaps that met our criteria, just go ahead and exit - */ - if (found_bitmap) { - ret = -ENOSPC; - goto out; - } - - cluster->points_to_bitmap = false; - window_start = entry->offset; - window_free = entry->bytes; - last = entry; - max_extent = entry->bytes; - - while (1) { - /* out window is just right, lets fill it */ - if (window_free >= min_bytes) - break; - - node = rb_next(&last->offset_index); - if (!node) { - if (found_bitmap) - goto again; - ret = -ENOSPC; - goto out; - } - next = rb_entry(node, struct btrfs_free_space, offset_index); - - /* - * we found a bitmap, so if this search doesn't result in a - * cluster, we know to go and search again for the bitmaps and - * start looking for space there - */ - if (next->bitmap) { - if (!found_bitmap) - offset = next->offset; - found_bitmap = true; - last = next; - continue; - } - - /* - * we haven't filled the empty size and the window is - * very large. reset and try again - */ - if (next->offset - (last->offset + last->bytes) > 128 * 1024 || - next->offset - window_start > (bytes + empty_size) * 2) { - entry = next; - window_start = entry->offset; - window_free = entry->bytes; - last = entry; - max_extent = entry->bytes; - } else { - last = next; - window_free += next->bytes; - if (entry->bytes > max_extent) - max_extent = entry->bytes; - } - } - - cluster->window_start = entry->offset; - - /* - * now we've found our entries, pull them out of the free space - * cache and put them into the cluster rbtree - * - * The cluster includes an rbtree, but only uses the offset index - * of each free space cache entry. - */ - while (1) { - node = rb_next(&entry->offset_index); - if (entry->bitmap && node) { - entry = rb_entry(node, struct btrfs_free_space, - offset_index); - continue; - } else if (entry->bitmap && !node) { - break; - } - - rb_erase(&entry->offset_index, &block_group->free_space_offset); - ret = tree_insert_offset(&cluster->root, entry->offset, - &entry->offset_index, 0); - BUG_ON(ret); - if (!node || entry == last) - break; + ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, + min_bytes); + if (ret) + ret = setup_cluster_bitmap(block_group, cluster, offset, + bytes, min_bytes); - entry = rb_entry(node, struct btrfs_free_space, offset_index); + if (!ret) { + atomic_inc(&block_group->count); + list_add_tail(&cluster->block_group_list, + &block_group->cluster_list); + cluster->block_group = block_group; } - - cluster->max_size = max_extent; -got_it: - ret = 0; - atomic_inc(&block_group->count); - list_add_tail(&cluster->block_group_list, &block_group->cluster_list); - cluster->block_group = block_group; out: spin_unlock(&cluster->lock); spin_unlock(&block_group->tree_lock); @@ -2173,7 +2174,6 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) spin_lock_init(&cluster->refill_lock); cluster->root = RB_ROOT; cluster->max_size = 0; - cluster->points_to_bitmap = false; INIT_LIST_HEAD(&cluster->block_group_list); cluster->block_group = NULL; } -- cgit v1.2.3 From 1abe9b8a138c9988ba8f7bfded6453649a31541f Mon Sep 17 00:00:00 2001 From: liubo Date: Thu, 24 Mar 2011 11:18:59 +0000 Subject: Btrfs: add initial tracepoint support for btrfs Tracepoints can provide insight into why btrfs hits bugs and be greatly helpful for debugging, e.g dd-7822 [000] 2121.641088: btrfs_inode_request: root = 5(FS_TREE), gen = 4, ino = 256, blocks = 8, disk_i_size = 0, last_trans = 8, logged_trans = 0 dd-7822 [000] 2121.641100: btrfs_inode_new: root = 5(FS_TREE), gen = 8, ino = 257, blocks = 0, disk_i_size = 0, last_trans = 0, logged_trans = 0 btrfs-transacti-7804 [001] 2146.935420: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29368320 (orig_level = 0), cow_buf = 29388800 (cow_level = 0) btrfs-transacti-7804 [001] 2146.935473: btrfs_cow_block: root = 1(ROOT_TREE), refs = 2, orig_buf = 29364224 (orig_level = 0), cow_buf = 29392896 (cow_level = 0) btrfs-transacti-7804 [001] 2146.972221: btrfs_transaction_commit: root = 1(ROOT_TREE), gen = 8 flush-btrfs-2-7821 [001] 2155.824210: btrfs_chunk_alloc: root = 3(CHUNK_TREE), offset = 1103101952, size = 1073741824, num_stripes = 1, sub_stripes = 0, type = DATA flush-btrfs-2-7821 [001] 2155.824241: btrfs_cow_block: root = 2(EXTENT_TREE), refs = 2, orig_buf = 29388800 (orig_level = 0), cow_buf = 29396992 (cow_level = 0) flush-btrfs-2-7821 [001] 2155.824255: btrfs_cow_block: root = 4(DEV_TREE), refs = 2, orig_buf = 29372416 (orig_level = 0), cow_buf = 29401088 (cow_level = 0) flush-btrfs-2-7821 [000] 2155.824329: btrfs_cow_block: root = 3(CHUNK_TREE), refs = 2, orig_buf = 20971520 (orig_level = 0), cow_buf = 20975616 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898019: btrfs_cow_block: root = 5(FS_TREE), refs = 2, orig_buf = 29384704 (orig_level = 0), cow_buf = 29405184 (cow_level = 0) btrfs-endio-wri-7800 [001] 2155.898043: btrfs_cow_block: root = 7(CSUM_TREE), refs = 2, orig_buf = 29376512 (orig_level = 0), cow_buf = 29409280 (cow_level = 0) Here is what I have added: 1) ordere_extent: btrfs_ordered_extent_add btrfs_ordered_extent_remove btrfs_ordered_extent_start btrfs_ordered_extent_put These provide critical information to understand how ordered_extents are updated. 2) extent_map: btrfs_get_extent extent_map is used in both read and write cases, and it is useful for tracking how btrfs specific IO is running. 3) writepage: __extent_writepage btrfs_writepage_end_io_hook Pages are cirtical resourses and produce a lot of corner cases during writeback, so it is valuable to know how page is written to disk. 4) inode: btrfs_inode_new btrfs_inode_request btrfs_inode_evict These can show where and when a inode is created, when a inode is evicted. 5) sync: btrfs_sync_file btrfs_sync_fs These show sync arguments. 6) transaction: btrfs_transaction_commit In transaction based filesystem, it will be useful to know the generation and who does commit. 7) back reference and cow: btrfs_delayed_tree_ref btrfs_delayed_data_ref btrfs_delayed_ref_head btrfs_cow_block Btrfs natively supports back references, these tracepoints are helpful on understanding btrfs's COW mechanism. 8) chunk: btrfs_chunk_alloc btrfs_chunk_free Chunk is a link between physical offset and logical offset, and stands for space infomation in btrfs, and these are helpful on tracing space things. 9) reserved_extent: btrfs_reserved_extent_alloc btrfs_reserved_extent_free These can show how btrfs uses its space. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 3 + fs/btrfs/ctree.h | 1 + fs/btrfs/delayed-ref.c | 6 + fs/btrfs/extent-tree.c | 4 + fs/btrfs/extent_io.c | 2 + fs/btrfs/file.c | 1 + fs/btrfs/inode.c | 12 + fs/btrfs/ordered-data.c | 8 + fs/btrfs/super.c | 5 + fs/btrfs/transaction.c | 2 + fs/btrfs/volumes.c | 16 +- fs/btrfs/volumes.h | 11 + include/trace/events/btrfs.h | 667 +++++++++++++++++++++++++++++++++++++++++++ 13 files changed, 727 insertions(+), 11 deletions(-) create mode 100644 include/trace/events/btrfs.h (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 8680110f0a5a..465b5d7d6b48 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -535,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0); + + trace_btrfs_cow_block(root, buf, *cow_ret); + return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0ee679b6c1b7..9d0f59142afa 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "extent_io.h" #include "extent_map.h" diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e807b143b857..bce28f653899 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans, INIT_LIST_HEAD(&head_ref->cluster); mutex_init(&head_ref->mutex); + trace_btrfs_delayed_ref_head(ref, head_ref, action); + existing = tree_insert(&delayed_refs->root, &ref->rb_node); if (existing) { @@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans, } full_ref->level = level; + trace_btrfs_delayed_tree_ref(ref, full_ref, action); + existing = tree_insert(&delayed_refs->root, &ref->rb_node); if (existing) { @@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans, full_ref->objectid = owner; full_ref->offset = offset; + trace_btrfs_delayed_data_ref(ref, full_ref, action); + existing = tree_insert(&delayed_refs->root, &ref->rb_node); if (existing) { diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index cd794c35a636..86ea471d3801 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5412,6 +5412,8 @@ again: dump_space_info(sinfo, num_bytes, 1); } + trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); + return ret; } @@ -5433,6 +5435,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) update_reserved_bytes(cache, len, 0, 1); btrfs_put_block_group(cache); + trace_btrfs_reserved_extent_free(root, start, len); + return ret; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1bbd26b4fc5c..77c65a0bea34 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2192,6 +2192,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, else write_flags = WRITE; + trace___extent_writepage(page, inode, wbc); + WARN_ON(!PageLocked(page)); pg_offset = i_size & (PAGE_CACHE_SIZE - 1); if (page->index > end_index || diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a85b044cf39e..656bc0a892b1 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1205,6 +1205,7 @@ int btrfs_sync_file(struct file *file, int datasync) int ret = 0; struct btrfs_trans_handle *trans; + trace_btrfs_sync_file(file, datasync); /* we wait first, since the writeback may change the inode */ root->log_batch++; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e9813bd7d556..eaa271484199 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1787,6 +1787,8 @@ out: static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate) { + trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); + ClearPagePrivate2(page); return btrfs_finish_ordered_io(page->mapping->host, start, end); } @@ -3718,6 +3720,8 @@ void btrfs_evict_inode(struct inode *inode) unsigned long nr; int ret; + trace_btrfs_inode_evict(inode); + truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || root == root->fs_info->tree_root)) @@ -4510,6 +4514,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, return ERR_PTR(-ENOMEM); if (dir) { + trace_btrfs_inode_request(dir); + ret = btrfs_set_inode_index(dir, index); if (ret) { iput(inode); @@ -4584,6 +4590,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, insert_inode_hash(inode); inode_tree_add(inode); + + trace_btrfs_inode_new(inode); + return inode; fail: if (dir) @@ -5261,6 +5270,9 @@ insert: } write_unlock(&em_tree->lock); out: + + trace_btrfs_get_extent(root, em); + if (path) btrfs_free_path(path); if (trans) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 083a55477375..a1c940425307 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, INIT_LIST_HEAD(&entry->list); INIT_LIST_HEAD(&entry->root_extent_list); + trace_btrfs_ordered_extent_add(inode, entry); + spin_lock(&tree->lock); node = tree_insert(&tree->tree, file_offset, &entry->rb_node); @@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) struct list_head *cur; struct btrfs_ordered_sum *sum; + trace_btrfs_ordered_extent_put(entry->inode, entry); + if (atomic_dec_and_test(&entry->refs)) { while (!list_empty(&entry->list)) { cur = entry->list.next; @@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); + trace_btrfs_ordered_extent_remove(inode, entry); + /* * we have no more ordered extents for this inode and * no dirty pages. We can safely remove it from the @@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode, u64 start = entry->file_offset; u64 end = start + entry->len - 1; + trace_btrfs_ordered_extent_start(inode, entry); + /* * pages in the range can be dirty, clean or writeback. We * start IO on any dirty ones so the wait doesn't stall waiting diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d39a9895d932..2edfc039f098 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -52,6 +52,9 @@ #include "export.h" #include "compression.h" +#define CREATE_TRACE_POINTS +#include + static const struct super_operations btrfs_super_ops; static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, @@ -620,6 +623,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) struct btrfs_root *root = btrfs_sb(sb); int ret; + trace_btrfs_sync_fs(wait); + if (!wait) { filemap_flush(root->fs_info->btree_inode->i_mapping); return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3d73c8d93bbb..5b4bc685bb0e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1389,6 +1389,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, put_transaction(cur_trans); put_transaction(cur_trans); + trace_btrfs_transaction_commit(root); + mutex_unlock(&root->fs_info->trans_mutex); if (current->journal_info == trans) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd13eb81ee40..8ba3c9ebff93 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -33,17 +33,6 @@ #include "volumes.h" #include "async-thread.h" -struct map_lookup { - u64 type; - int io_align; - int io_width; - int stripe_len; - int sector_size; - int num_stripes; - int sub_stripes; - struct btrfs_bio_stripe stripes[]; -}; - static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_device *device); @@ -1923,6 +1912,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, BUG_ON(ret); + trace_btrfs_chunk_free(root, map, chunk_offset, em->len); + if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset); BUG_ON(ret); @@ -2650,6 +2641,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, *num_bytes = chunk_bytes_by_type(type, calc_size, map->num_stripes, sub_stripes); + trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes); + em = alloc_extent_map(GFP_NOFS); if (!em) { ret = -ENOMEM; @@ -2758,6 +2751,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans, item_size); BUG_ON(ret); } + kfree(chunk); return 0; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7fb59d45fe8c..7b38d0668b51 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -145,6 +145,17 @@ struct btrfs_device_info { u64 max_avail; }; +struct map_lookup { + u64 type; + int io_align; + int io_width; + int stripe_len; + int sector_size; + int num_stripes; + int sub_stripes; + struct btrfs_bio_stripe stripes[]; +}; + /* Used to sort the devices by max_avail(descending sort) */ int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h new file mode 100644 index 000000000000..f445cff66ab7 --- /dev/null +++ b/include/trace/events/btrfs.h @@ -0,0 +1,667 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM btrfs + +#if !defined(_TRACE_BTRFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_BTRFS_H + +#include +#include + +struct btrfs_root; +struct btrfs_fs_info; +struct btrfs_inode; +struct extent_map; +struct btrfs_ordered_extent; +struct btrfs_delayed_ref_node; +struct btrfs_delayed_tree_ref; +struct btrfs_delayed_data_ref; +struct btrfs_delayed_ref_head; +struct map_lookup; +struct extent_buffer; + +#define show_ref_type(type) \ + __print_symbolic(type, \ + { BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \ + { BTRFS_EXTENT_DATA_REF_KEY, "EXTENT_DATA_REF" }, \ + { BTRFS_EXTENT_REF_V0_KEY, "EXTENT_REF_V0" }, \ + { BTRFS_SHARED_BLOCK_REF_KEY, "SHARED_BLOCK_REF" }, \ + { BTRFS_SHARED_DATA_REF_KEY, "SHARED_DATA_REF" }) + +#define __show_root_type(obj) \ + __print_symbolic(obj, \ + { BTRFS_ROOT_TREE_OBJECTID, "ROOT_TREE" }, \ + { BTRFS_EXTENT_TREE_OBJECTID, "EXTENT_TREE" }, \ + { BTRFS_CHUNK_TREE_OBJECTID, "CHUNK_TREE" }, \ + { BTRFS_DEV_TREE_OBJECTID, "DEV_TREE" }, \ + { BTRFS_FS_TREE_OBJECTID, "FS_TREE" }, \ + { BTRFS_ROOT_TREE_DIR_OBJECTID, "ROOT_TREE_DIR" }, \ + { BTRFS_CSUM_TREE_OBJECTID, "CSUM_TREE" }, \ + { BTRFS_TREE_LOG_OBJECTID, "TREE_LOG" }, \ + { BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \ + { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }) + +#define show_root_type(obj) \ + obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) || \ + (obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-" + +TRACE_EVENT(btrfs_transaction_commit, + + TP_PROTO(struct btrfs_root *root), + + TP_ARGS(root), + + TP_STRUCT__entry( + __field( u64, generation ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->generation = root->fs_info->generation; + __entry->root_objectid = root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), gen = %llu", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->generation) +); + +DECLARE_EVENT_CLASS(btrfs__inode, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( blkcnt_t, blocks ) + __field( u64, disk_i_size ) + __field( u64, generation ) + __field( u64, last_trans ) + __field( u64, logged_trans ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->blocks = inode->i_blocks; + __entry->disk_i_size = BTRFS_I(inode)->disk_i_size; + __entry->generation = BTRFS_I(inode)->generation; + __entry->last_trans = BTRFS_I(inode)->last_trans; + __entry->logged_trans = BTRFS_I(inode)->logged_trans; + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, " + "disk_i_size = %llu, last_trans = %llu, logged_trans = %llu", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->generation, + (unsigned long)__entry->ino, + (unsigned long long)__entry->blocks, + (unsigned long long)__entry->disk_i_size, + (unsigned long long)__entry->last_trans, + (unsigned long long)__entry->logged_trans) +); + +DEFINE_EVENT(btrfs__inode, btrfs_inode_new, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(btrfs__inode, btrfs_inode_request, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +#define __show_map_type(type) \ + __print_symbolic(type, \ + { EXTENT_MAP_LAST_BYTE, "LAST_BYTE" }, \ + { EXTENT_MAP_HOLE, "HOLE" }, \ + { EXTENT_MAP_INLINE, "INLINE" }, \ + { EXTENT_MAP_DELALLOC, "DELALLOC" }) + +#define show_map_type(type) \ + type, (type >= EXTENT_MAP_LAST_BYTE) ? "-" : __show_map_type(type) + +#define show_map_flags(flag) \ + __print_flags(flag, "|", \ + { EXTENT_FLAG_PINNED, "PINNED" }, \ + { EXTENT_FLAG_COMPRESSED, "COMPRESSED" }, \ + { EXTENT_FLAG_VACANCY, "VACANCY" }, \ + { EXTENT_FLAG_PREALLOC, "PREALLOC" }) + +TRACE_EVENT(btrfs_get_extent, + + TP_PROTO(struct btrfs_root *root, struct extent_map *map), + + TP_ARGS(root, map), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, start ) + __field( u64, len ) + __field( u64, orig_start ) + __field( u64, block_start ) + __field( u64, block_len ) + __field( unsigned long, flags ) + __field( int, refs ) + __field( unsigned int, compress_type ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->start = map->start; + __entry->len = map->len; + __entry->orig_start = map->orig_start; + __entry->block_start = map->block_start; + __entry->block_len = map->block_len; + __entry->flags = map->flags; + __entry->refs = atomic_read(&map->refs); + __entry->compress_type = map->compress_type; + ), + + TP_printk("root = %llu(%s), start = %llu, len = %llu, " + "orig_start = %llu, block_start = %llu(%s), " + "block_len = %llu, flags = %s, refs = %u, " + "compress_type = %u", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->start, + (unsigned long long)__entry->len, + (unsigned long long)__entry->orig_start, + show_map_type(__entry->block_start), + (unsigned long long)__entry->block_len, + show_map_flags(__entry->flags), + __entry->refs, __entry->compress_type) +); + +#define show_ordered_flags(flags) \ + __print_symbolic(flags, \ + { BTRFS_ORDERED_IO_DONE, "IO_DONE" }, \ + { BTRFS_ORDERED_COMPLETE, "COMPLETE" }, \ + { BTRFS_ORDERED_NOCOW, "NOCOW" }, \ + { BTRFS_ORDERED_COMPRESSED, "COMPRESSED" }, \ + { BTRFS_ORDERED_PREALLOC, "PREALLOC" }, \ + { BTRFS_ORDERED_DIRECT, "DIRECT" }) + +DECLARE_EVENT_CLASS(btrfs__ordered_extent, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( u64, file_offset ) + __field( u64, start ) + __field( u64, len ) + __field( u64, disk_len ) + __field( u64, bytes_left ) + __field( unsigned long, flags ) + __field( int, compress_type ) + __field( int, refs ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->file_offset = ordered->file_offset; + __entry->start = ordered->start; + __entry->len = ordered->len; + __entry->disk_len = ordered->disk_len; + __entry->bytes_left = ordered->bytes_left; + __entry->flags = ordered->flags; + __entry->compress_type = ordered->compress_type; + __entry->refs = atomic_read(&ordered->refs); + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %llu, file_offset = %llu, " + "start = %llu, len = %llu, disk_len = %llu, " + "bytes_left = %llu, flags = %s, compress_type = %d, " + "refs = %d", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->ino, + (unsigned long long)__entry->file_offset, + (unsigned long long)__entry->start, + (unsigned long long)__entry->len, + (unsigned long long)__entry->disk_len, + (unsigned long long)__entry->bytes_left, + show_ordered_flags(__entry->flags), + __entry->compress_type, __entry->refs) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_add, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_remove, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_start, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_put, + + TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered), + + TP_ARGS(inode, ordered) +); + +DECLARE_EVENT_CLASS(btrfs__writepage, + + TP_PROTO(struct page *page, struct inode *inode, + struct writeback_control *wbc), + + TP_ARGS(page, inode, wbc), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( pgoff_t, index ) + __field( long, nr_to_write ) + __field( long, pages_skipped ) + __field( loff_t, range_start ) + __field( loff_t, range_end ) + __field( char, nonblocking ) + __field( char, for_kupdate ) + __field( char, for_reclaim ) + __field( char, range_cyclic ) + __field( pgoff_t, writeback_index ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = inode->i_ino; + __entry->index = page->index; + __entry->nr_to_write = wbc->nr_to_write; + __entry->pages_skipped = wbc->pages_skipped; + __entry->range_start = wbc->range_start; + __entry->range_end = wbc->range_end; + __entry->nonblocking = wbc->nonblocking; + __entry->for_kupdate = wbc->for_kupdate; + __entry->for_reclaim = wbc->for_reclaim; + __entry->range_cyclic = wbc->range_cyclic; + __entry->writeback_index = inode->i_mapping->writeback_index; + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, " + "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, " + "range_end = %llu, nonblocking = %d, for_kupdate = %d, " + "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu", + show_root_type(__entry->root_objectid), + (unsigned long)__entry->ino, __entry->index, + __entry->nr_to_write, __entry->pages_skipped, + __entry->range_start, __entry->range_end, + __entry->nonblocking, __entry->for_kupdate, + __entry->for_reclaim, __entry->range_cyclic, + (unsigned long)__entry->writeback_index) +); + +DEFINE_EVENT(btrfs__writepage, __extent_writepage, + + TP_PROTO(struct page *page, struct inode *inode, + struct writeback_control *wbc), + + TP_ARGS(page, inode, wbc) +); + +TRACE_EVENT(btrfs_writepage_end_io_hook, + + TP_PROTO(struct page *page, u64 start, u64 end, int uptodate), + + TP_ARGS(page, start, end, uptodate), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( pgoff_t, index ) + __field( u64, start ) + __field( u64, end ) + __field( int, uptodate ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->ino = page->mapping->host->i_ino; + __entry->index = page->index; + __entry->start = start; + __entry->end = end; + __entry->uptodate = uptodate; + __entry->root_objectid = + BTRFS_I(page->mapping->host)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, " + "end = %llu, uptodate = %d", + show_root_type(__entry->root_objectid), + (unsigned long)__entry->ino, (unsigned long)__entry->index, + (unsigned long long)__entry->start, + (unsigned long long)__entry->end, __entry->uptodate) +); + +TRACE_EVENT(btrfs_sync_file, + + TP_PROTO(struct file *file, int datasync), + + TP_ARGS(file, datasync), + + TP_STRUCT__entry( + __field( ino_t, ino ) + __field( ino_t, parent ) + __field( int, datasync ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + + __entry->ino = inode->i_ino; + __entry->parent = dentry->d_parent->d_inode->i_ino; + __entry->datasync = datasync; + __entry->root_objectid = + BTRFS_I(inode)->root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d", + show_root_type(__entry->root_objectid), + (unsigned long)__entry->ino, (unsigned long)__entry->parent, + __entry->datasync) +); + +TRACE_EVENT(btrfs_sync_fs, + + TP_PROTO(int wait), + + TP_ARGS(wait), + + TP_STRUCT__entry( + __field( int, wait ) + ), + + TP_fast_assign( + __entry->wait = wait; + ), + + TP_printk("wait = %d", __entry->wait) +); + +#define show_ref_action(action) \ + __print_symbolic(action, \ + { BTRFS_ADD_DELAYED_REF, "ADD_DELAYED_REF" }, \ + { BTRFS_DROP_DELAYED_REF, "DROP_DELAYED_REF" }, \ + { BTRFS_ADD_DELAYED_EXTENT, "ADD_DELAYED_EXTENT" }, \ + { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" }) + + +TRACE_EVENT(btrfs_delayed_tree_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_tree_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + __field( int, action ) + __field( u64, parent ) + __field( u64, ref_root ) + __field( int, level ) + __field( int, type ) + ), + + TP_fast_assign( + __entry->bytenr = ref->bytenr; + __entry->num_bytes = ref->num_bytes; + __entry->action = action; + __entry->parent = full_ref->parent; + __entry->ref_root = full_ref->root; + __entry->level = full_ref->level; + __entry->type = ref->type; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, " + "parent = %llu(%s), ref_root = %llu(%s), level = %d, " + "type = %s", + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->num_bytes, + show_ref_action(__entry->action), + show_root_type(__entry->parent), + show_root_type(__entry->ref_root), + __entry->level, show_ref_type(__entry->type)) +); + +TRACE_EVENT(btrfs_delayed_data_ref, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_data_ref *full_ref, + int action), + + TP_ARGS(ref, full_ref, action), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + __field( int, action ) + __field( u64, parent ) + __field( u64, ref_root ) + __field( u64, owner ) + __field( u64, offset ) + __field( int, type ) + ), + + TP_fast_assign( + __entry->bytenr = ref->bytenr; + __entry->num_bytes = ref->num_bytes; + __entry->action = action; + __entry->parent = full_ref->parent; + __entry->ref_root = full_ref->root; + __entry->owner = full_ref->objectid; + __entry->offset = full_ref->offset; + __entry->type = ref->type; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, " + "parent = %llu(%s), ref_root = %llu(%s), owner = %llu, " + "offset = %llu, type = %s", + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->num_bytes, + show_ref_action(__entry->action), + show_root_type(__entry->parent), + show_root_type(__entry->ref_root), + (unsigned long long)__entry->owner, + (unsigned long long)__entry->offset, + show_ref_type(__entry->type)) +); + +TRACE_EVENT(btrfs_delayed_ref_head, + + TP_PROTO(struct btrfs_delayed_ref_node *ref, + struct btrfs_delayed_ref_head *head_ref, + int action), + + TP_ARGS(ref, head_ref, action), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + __field( int, action ) + __field( int, is_data ) + ), + + TP_fast_assign( + __entry->bytenr = ref->bytenr; + __entry->num_bytes = ref->num_bytes; + __entry->action = action; + __entry->is_data = head_ref->is_data; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d", + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->num_bytes, + show_ref_action(__entry->action), + __entry->is_data) +); + +#define show_chunk_type(type) \ + __print_flags(type, "|", \ + { BTRFS_BLOCK_GROUP_DATA, "DATA" }, \ + { BTRFS_BLOCK_GROUP_SYSTEM, "SYSTEM"}, \ + { BTRFS_BLOCK_GROUP_METADATA, "METADATA"}, \ + { BTRFS_BLOCK_GROUP_RAID0, "RAID0" }, \ + { BTRFS_BLOCK_GROUP_RAID1, "RAID1" }, \ + { BTRFS_BLOCK_GROUP_DUP, "DUP" }, \ + { BTRFS_BLOCK_GROUP_RAID10, "RAID10"}) + +DECLARE_EVENT_CLASS(btrfs__chunk, + + TP_PROTO(struct btrfs_root *root, struct map_lookup *map, + u64 offset, u64 size), + + TP_ARGS(root, map, offset, size), + + TP_STRUCT__entry( + __field( int, num_stripes ) + __field( u64, type ) + __field( int, sub_stripes ) + __field( u64, offset ) + __field( u64, size ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + __entry->num_stripes = map->num_stripes; + __entry->type = map->type; + __entry->sub_stripes = map->sub_stripes; + __entry->offset = offset; + __entry->size = size; + __entry->root_objectid = root->root_key.objectid; + ), + + TP_printk("root = %llu(%s), offset = %llu, size = %llu, " + "num_stripes = %d, sub_stripes = %d, type = %s", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->offset, + (unsigned long long)__entry->size, + __entry->num_stripes, __entry->sub_stripes, + show_chunk_type(__entry->type)) +); + +DEFINE_EVENT(btrfs__chunk, btrfs_chunk_alloc, + + TP_PROTO(struct btrfs_root *root, struct map_lookup *map, + u64 offset, u64 size), + + TP_ARGS(root, map, offset, size) +); + +DEFINE_EVENT(btrfs__chunk, btrfs_chunk_free, + + TP_PROTO(struct btrfs_root *root, struct map_lookup *map, + u64 offset, u64 size), + + TP_ARGS(root, map, offset, size) +); + +TRACE_EVENT(btrfs_cow_block, + + TP_PROTO(struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *cow), + + TP_ARGS(root, buf, cow), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, buf_start ) + __field( int, refs ) + __field( u64, cow_start ) + __field( int, buf_level ) + __field( int, cow_level ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->buf_start = buf->start; + __entry->refs = atomic_read(&buf->refs); + __entry->cow_start = cow->start; + __entry->buf_level = btrfs_header_level(buf); + __entry->cow_level = btrfs_header_level(cow); + ), + + TP_printk("root = %llu(%s), refs = %d, orig_buf = %llu " + "(orig_level = %d), cow_buf = %llu (cow_level = %d)", + show_root_type(__entry->root_objectid), + __entry->refs, + (unsigned long long)__entry->buf_start, + __entry->buf_level, + (unsigned long long)__entry->cow_start, + __entry->cow_level) +); + +DECLARE_EVENT_CLASS(btrfs__reserved_extent, + + TP_PROTO(struct btrfs_root *root, u64 start, u64 len), + + TP_ARGS(root, start, len), + + TP_STRUCT__entry( + __field( u64, root_objectid ) + __field( u64, start ) + __field( u64, len ) + ), + + TP_fast_assign( + __entry->root_objectid = root->root_key.objectid; + __entry->start = start; + __entry->len = len; + ), + + TP_printk("root = %llu(%s), start = %llu, len = %llu", + show_root_type(__entry->root_objectid), + (unsigned long long)__entry->start, + (unsigned long long)__entry->len) +); + +DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_alloc, + + TP_PROTO(struct btrfs_root *root, u64 start, u64 len), + + TP_ARGS(root, start, len) +); + +DEFINE_EVENT(btrfs__reserved_extent, btrfs_reserved_extent_free, + + TP_PROTO(struct btrfs_root *root, u64 start, u64 len), + + TP_ARGS(root, start, len) +); + +#endif /* _TRACE_BTRFS_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 75e7cb7fe0c391561bd3af36515be3f3c64a04c6 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Tue, 22 Mar 2011 10:12:20 +0000 Subject: Btrfs: Per file/directory controls for COW and compression Data compression and data cow are controlled across the entire FS by mount options right now. ioctls are needed to set this on a per file or per directory basis. This has been proposed previously, but VFS developers wanted us to use generic ioctls rather than btrfs-specific ones. According to Chris's comment, there should be just one true compression method(probably LZO) stored in the super. However, before this, we would wait for that one method is stable enough to be adopted into the super. So I list it as a long term goal, and just store it in ram today. After applying this patch, we can use the generic "FS_IOC_SETFLAGS" ioctl to control file and directory's datacow and compression attribute. NOTE: - The compression type is selected by such rules: If we mount btrfs with compress options, ie, zlib/lzo, the type is it. Otherwise, we'll use the default compress type (zlib today). v1->v2: - rebase to the latest btrfs. v2->v3: - fix a problem, i.e. when a file is set NOCOW via mount option, then this NOCOW will be screwed by inheritance from parent directory. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 6 ++++++ fs/btrfs/inode.c | 31 ++++++++++++++++++++++++++++--- fs/btrfs/ioctl.c | 41 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 72 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9d0f59142afa..8302ecd4197f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1282,6 +1282,7 @@ struct btrfs_root { #define BTRFS_INODE_NODUMP (1 << 8) #define BTRFS_INODE_NOATIME (1 << 9) #define BTRFS_INODE_DIRSYNC (1 << 10) +#define BTRFS_INODE_COMPRESS (1 << 11) /* some macros to generate set/get funcs for the struct fields. This * assumes there is a lefoo_to_cpu for every type, so lets make a simple diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2bdb124333ab..125639ddaffe 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1854,6 +1854,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); + /* + * In the long term, we'll store the compression type in the super + * block, and it'll be used for per file compression control. + */ + fs_info->compress_type = BTRFS_COMPRESS_ZLIB; + ret = btrfs_parse_options(tree_root, options); if (ret) { err = ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eaa271484199..7a7a202b82ab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -384,7 +384,8 @@ again: */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && (btrfs_test_opt(root, COMPRESS) || - (BTRFS_I(inode)->force_compress))) { + (BTRFS_I(inode)->force_compress) || + (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); @@ -1256,7 +1257,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); else if (!btrfs_test_opt(root, COMPRESS) && - !(BTRFS_I(inode)->force_compress)) + !(BTRFS_I(inode)->force_compress) && + !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) ret = cow_file_range(inode, locked_page, start, end, page_started, nr_written, 1); else @@ -4584,7 +4586,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, if ((mode & S_IFREG)) { if (btrfs_test_opt(root, NODATASUM)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW)) + if (btrfs_test_opt(root, NODATACOW) || + (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; } @@ -6866,6 +6869,26 @@ static int btrfs_getattr(struct vfsmount *mnt, return 0; } +/* + * If a file is moved, it will inherit the cow and compression flags of the new + * directory. + */ +static void fixup_inode_flags(struct inode *dir, struct inode *inode) +{ + struct btrfs_inode *b_dir = BTRFS_I(dir); + struct btrfs_inode *b_inode = BTRFS_I(inode); + + if (b_dir->flags & BTRFS_INODE_NODATACOW) + b_inode->flags |= BTRFS_INODE_NODATACOW; + else + b_inode->flags &= ~BTRFS_INODE_NODATACOW; + + if (b_dir->flags & BTRFS_INODE_COMPRESS) + b_inode->flags |= BTRFS_INODE_COMPRESS; + else + b_inode->flags &= ~BTRFS_INODE_COMPRESS; +} + static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { @@ -6999,6 +7022,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } + fixup_inode_flags(new_dir, old_inode); + ret = btrfs_add_link(trans, new_dir, old_inode, new_dentry->d_name.name, new_dentry->d_name.len, 0, index); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 88d3cb2eaf75..32c980ae0f1c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -138,6 +138,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg) return 0; } +static int check_flags(unsigned int flags) +{ + if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ + FS_NOATIME_FL | FS_NODUMP_FL | \ + FS_SYNC_FL | FS_DIRSYNC_FL | \ + FS_NOCOMP_FL | FS_COMPR_FL | \ + FS_NOCOW_FL | FS_COW_FL)) + return -EOPNOTSUPP; + + if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) + return -EINVAL; + + if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL)) + return -EINVAL; + + return 0; +} + static int btrfs_ioctl_setflags(struct file *file, void __user *arg) { struct inode *inode = file->f_path.dentry->d_inode; @@ -153,10 +171,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; - if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ - FS_NOATIME_FL | FS_NODUMP_FL | \ - FS_SYNC_FL | FS_DIRSYNC_FL)) - return -EOPNOTSUPP; + ret = check_flags(flags); + if (ret) + return ret; if (!is_owner_or_cap(inode)) return -EACCES; @@ -201,6 +218,22 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) else ip->flags &= ~BTRFS_INODE_DIRSYNC; + /* + * The COMPRESS flag can only be changed by users, while the NOCOMPRESS + * flag may be changed automatically if compression code won't make + * things smaller. + */ + if (flags & FS_NOCOMP_FL) { + ip->flags &= ~BTRFS_INODE_COMPRESS; + ip->flags |= BTRFS_INODE_NOCOMPRESS; + } else if (flags & FS_COMPR_FL) { + ip->flags |= BTRFS_INODE_COMPRESS; + ip->flags &= ~BTRFS_INODE_NOCOMPRESS; + } + if (flags & FS_NOCOW_FL) + ip->flags |= BTRFS_INODE_NODATACOW; + else if (flags & FS_COW_FL) + ip->flags &= ~BTRFS_INODE_NODATACOW; trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); -- cgit v1.2.3 From b4d00d569a49fcef02195635dbf8d15904b1fb63 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 24 Mar 2011 10:24:25 +0000 Subject: Btrfs: make update_reserved_bytes() public Make the function public as we should update the reserved extents calculations after taking out an extent for trimming. Signed-off-by: Li Dongyang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/extent-tree.c | 16 +++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8302ecd4197f..9e21176cdf57 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2157,6 +2157,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, u64 root_objectid, u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo); int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7922f296420d..0671f5b77eb8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,8 +36,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo); static int __btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, @@ -4233,8 +4231,8 @@ int btrfs_pin_extent(struct btrfs_root *root, * update size of reserved extents. this function may return -EAGAIN * if 'reserve' is true or 'sinfo' is false. */ -static int update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int sinfo) +int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int sinfo) { int ret = 0; if (sinfo) { @@ -4714,10 +4712,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - ret = update_reserved_bytes(cache, buf->len, 0, 0); + ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); if (ret == -EAGAIN) { /* block group became read-only */ - update_reserved_bytes(cache, buf->len, 0, 1); + btrfs_update_reserved_bytes(cache, buf->len, 0, 1); goto out; } @@ -5206,7 +5204,7 @@ checks: search_start - offset); BUG_ON(offset > search_start); - ret = update_reserved_bytes(block_group, num_bytes, 1, + ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, (data & BTRFS_BLOCK_GROUP_DATA)); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); @@ -5432,7 +5430,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) ret = btrfs_discard_extent(root, start, len); btrfs_add_free_space(cache, start, len); - update_reserved_bytes(cache, len, 0, 1); + btrfs_update_reserved_bytes(cache, len, 0, 1); btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, start, len); @@ -5634,7 +5632,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, put_caching_control(caching_ctl); } - ret = update_reserved_bytes(block_group, ins->offset, 1, 1); + ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); BUG_ON(ret); btrfs_put_block_group(block_group); ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, -- cgit v1.2.3 From 5378e60734f5b7bfe1b43dc191aaf6131c1befe7 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 24 Mar 2011 10:24:27 +0000 Subject: Btrfs: adjust btrfs_discard_extent() return errors and trimmed bytes Callers of btrfs_discard_extent() should check if we are mounted with -o discard, as we want to make fitrim to work even the fs is not mounted with -o discard. Also we should use REQ_DISCARD to map the free extent to get a full mapping, last we only return errors if 1. the error is not a EOPNOTSUPP 2. no device supports discard Signed-off-by: Li Dongyang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 +- fs/btrfs/disk-io.c | 5 ++++- fs/btrfs/extent-tree.c | 43 ++++++++++++++++++++++++++----------------- 3 files changed, 31 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9e21176cdf57..a18b7bc2b22c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2229,7 +2229,7 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end); int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes); + u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 125639ddaffe..8ecc5419d8b6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3054,7 +3054,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, break; /* opt_discard */ - ret = btrfs_error_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_error_discard_extent(root, start, + end + 1 - start, + NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); btrfs_error_unpin_extent_range(root, start, end); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0671f5b77eb8..e990d2d1ba4a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1738,39 +1738,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, return ret; } -static void btrfs_issue_discard(struct block_device *bdev, +static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len) { - blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); + return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); } static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) + u64 num_bytes, u64 *actual_bytes) { int ret; - u64 map_length = num_bytes; + u64 discarded_bytes = 0; struct btrfs_multi_bio *multi = NULL; - if (!btrfs_test_opt(root, DISCARD)) - return 0; /* Tell the block device(s) that the sectors can be discarded */ - ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, - bytenr, &map_length, &multi, 0); + ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, + bytenr, &num_bytes, &multi, 0); if (!ret) { struct btrfs_bio_stripe *stripe = multi->stripes; int i; - if (map_length > num_bytes) - map_length = num_bytes; for (i = 0; i < multi->num_stripes; i++, stripe++) { - btrfs_issue_discard(stripe->dev->bdev, - stripe->physical, - map_length); + ret = btrfs_issue_discard(stripe->dev->bdev, + stripe->physical, + stripe->length); + if (!ret) + discarded_bytes += stripe->length; + else if (ret != -EOPNOTSUPP) + break; } kfree(multi); } + if (discarded_bytes && ret == -EOPNOTSUPP) + ret = 0; + + if (actual_bytes) + *actual_bytes = discarded_bytes; + return ret; } @@ -4371,7 +4377,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, if (ret) break; - ret = btrfs_discard_extent(root, start, end + 1 - start); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_discard_extent(root, start, + end + 1 - start, NULL); clear_extent_dirty(unpin, start, end, GFP_NOFS); unpin_extent_range(root, start, end); @@ -5427,7 +5435,8 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } - ret = btrfs_discard_extent(root, start, len); + if (btrfs_test_opt(root, DISCARD)) + ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); btrfs_update_reserved_bytes(cache, len, 0, 1); @@ -8765,7 +8774,7 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) } int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, - u64 num_bytes) + u64 num_bytes, u64 *actual_bytes) { - return btrfs_discard_extent(root, bytenr, num_bytes); + return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); } -- cgit v1.2.3 From f7039b1d5c32241f87a513e33120db36bf30264d Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 24 Mar 2011 10:24:28 +0000 Subject: Btrfs: add btrfs_trim_fs() to handle FITRIM We take an free extent out from allocator, trim it, then put it back, but before we trim the block group, we should make sure the block group is cached, so plus a little change to make cache_block_group() run without a transaction. Signed-off-by: Li Dongyang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 50 +++++++++++++++++++++++- fs/btrfs/free-space-cache.c | 92 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/free-space-cache.h | 2 + fs/btrfs/ioctl.c | 46 +++++++++++++++++++++++ 5 files changed, 190 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a18b7bc2b22c..93a0191aded6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2232,6 +2232,7 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e990d2d1ba4a..1efeda3b2f6f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -440,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (!trans->transaction->in_commit && + if (trans && (!trans->transaction->in_commit) && (root && root != root->fs_info->tree_root)) { spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { @@ -8778,3 +8778,51 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, { return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); } + +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache = NULL; + u64 group_trimmed; + u64 start; + u64 end; + u64 trimmed = 0; + int ret = 0; + + cache = btrfs_lookup_block_group(fs_info, range->start); + + while (cache) { + if (cache->key.objectid >= (range->start + range->len)) { + btrfs_put_block_group(cache); + break; + } + + start = max(range->start, cache->key.objectid); + end = min(range->start + range->len, + cache->key.objectid + cache->key.offset); + + if (end - start >= range->minlen) { + if (!block_group_cache_done(cache)) { + ret = cache_block_group(cache, NULL, root, 0); + if (!ret) + wait_block_group_cache_done(cache); + } + ret = btrfs_trim_block_group(cache, + &group_trimmed, + start, + end, + range->minlen); + + trimmed += group_trimmed; + if (ret) { + btrfs_put_block_group(cache); + break; + } + } + + cache = next_block_group(fs_info->tree_root, cache); + } + + range->len = trimmed; + return ret; +} diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f03ef97c3b21..0037427d8a9d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2178,3 +2178,95 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) cluster->block_group = NULL; } +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space *entry = NULL; + struct btrfs_fs_info *fs_info = block_group->fs_info; + u64 bytes = 0; + u64 actually_trimmed; + int ret = 0; + + *trimmed = 0; + + while (start < end) { + spin_lock(&block_group->tree_lock); + + if (block_group->free_space < minlen) { + spin_unlock(&block_group->tree_lock); + break; + } + + entry = tree_search_offset(block_group, start, 0, 1); + if (!entry) + entry = tree_search_offset(block_group, + offset_to_bitmap(block_group, + start), + 1, 1); + + if (!entry || entry->offset >= end) { + spin_unlock(&block_group->tree_lock); + break; + } + + if (entry->bitmap) { + ret = search_bitmap(block_group, entry, &start, &bytes); + if (!ret) { + if (start >= end) { + spin_unlock(&block_group->tree_lock); + break; + } + bytes = min(bytes, end - start); + bitmap_clear_bits(block_group, entry, + start, bytes); + if (entry->bytes == 0) + free_bitmap(block_group, entry); + } else { + start = entry->offset + BITS_PER_BITMAP * + block_group->sectorsize; + spin_unlock(&block_group->tree_lock); + ret = 0; + continue; + } + } else { + start = entry->offset; + bytes = min(entry->bytes, end - start); + unlink_free_space(block_group, entry); + kfree(entry); + } + + spin_unlock(&block_group->tree_lock); + + if (bytes >= minlen) { + int update_ret; + update_ret = btrfs_update_reserved_bytes(block_group, + bytes, 1, 1); + + ret = btrfs_error_discard_extent(fs_info->extent_root, + start, + bytes, + &actually_trimmed); + + btrfs_add_free_space(block_group, + start, bytes); + if (!update_ret) + btrfs_update_reserved_bytes(block_group, + bytes, 0, 1); + + if (ret) + break; + *trimmed += actually_trimmed; + } + start += bytes; + bytes = 0; + + if (fatal_signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + cond_resched(); + } + + return ret; +} diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index e49ca5c321b5..65c3b935289f 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, int btrfs_return_cluster_to_free_space( struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster); +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen); #endif diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 32c980ae0f1c..649f47d2afb4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -258,6 +259,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) return put_user(inode->i_generation, arg); } +static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) +{ + struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_device *device; + struct request_queue *q; + struct fstrim_range range; + u64 minlen = ULLONG_MAX; + u64 num_devices = 0; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&fs_info->fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { + if (!device->bdev) + continue; + q = bdev_get_queue(device->bdev); + if (blk_queue_discard(q)) { + num_devices++; + minlen = min((u64)q->limits.discard_granularity, + minlen); + } + } + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + if (!num_devices) + return -EOPNOTSUPP; + + if (copy_from_user(&range, arg, sizeof(range))) + return -EFAULT; + + range.minlen = max(range.minlen, minlen); + ret = btrfs_trim_fs(root, &range); + if (ret < 0) + return ret; + + if (copy_to_user(arg, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, @@ -2426,6 +2470,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_setflags(file, argp); case FS_IOC_GETVERSION: return btrfs_ioctl_getversion(file, argp); + case FITRIM: + return btrfs_ioctl_fitrim(file, argp); case BTRFS_IOC_SNAP_CREATE: return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SNAP_CREATE_V2: -- cgit v1.2.3 From c59021f846881a957ac5afe456d0f59d6a517b61 Mon Sep 17 00:00:00 2001 From: liubo Date: Mon, 7 Mar 2011 02:13:14 +0000 Subject: Btrfs: fix OOPS of empty filesystem after balance btrfs will remove unused block groups after balance. When a empty filesystem is balanced, the block group with tag "DATA" may be dropped, and after umount and mount again, it will not find "DATA" space_info and lead to OOPS. So we initial the necessary space_infos(DATA, SYSTEM, METADATA) to avoid OOPS. Reported-by: Daniel J Blueman Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 6 ++++++ fs/btrfs/extent-tree.c | 23 +++++++++++++++++++++++ 3 files changed, 30 insertions(+) (limited to 'fs/btrfs/ctree.h') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 93a0191aded6..d47ce8307854 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2234,6 +2234,7 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); +int btrfs_init_space_info(struct btrfs_fs_info *fs_info); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8ecc5419d8b6..b3fc8475870f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2065,6 +2065,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->metadata_alloc_profile = (u64)-1; fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; + ret = btrfs_init_space_info(fs_info); + if (ret) { + printk(KERN_ERR "Failed to initial space info: %d\n", ret); + goto fail_block_groups; + } + ret = btrfs_read_block_groups(extent_root); if (ret) { printk(KERN_ERR "Failed to read block groups: %d\n", ret); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a561060f5ffb..f619c3cb13b7 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8778,6 +8778,29 @@ out: return ret; } +int btrfs_init_space_info(struct btrfs_fs_info *fs_info) +{ + struct btrfs_space_info *space_info; + int ret; + + ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0, + &space_info); + if (ret) + return ret; + + ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0, + &space_info); + if (ret) + return ret; + + ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0, + &space_info); + if (ret) + return ret; + + return ret; +} + int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) { return unpin_extent_range(root, start, end); -- cgit v1.2.3