From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- fs/btrfs/acl.c | 1 + fs/btrfs/async-thread.c | 1 + fs/btrfs/compression.c | 1 + fs/btrfs/ctree.c | 1 + fs/btrfs/ctree.h | 1 + fs/btrfs/delayed-ref.c | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 1 + fs/btrfs/extent_io.c | 1 - fs/btrfs/extent_map.c | 1 - fs/btrfs/file-item.c | 1 + fs/btrfs/file.c | 1 + fs/btrfs/free-space-cache.c | 1 + fs/btrfs/inode.c | 1 + fs/btrfs/ioctl.c | 1 + fs/btrfs/locking.c | 1 - fs/btrfs/ordered-data.c | 1 - fs/btrfs/ref-cache.c | 1 + fs/btrfs/relocation.c | 1 + fs/btrfs/super.c | 1 + fs/btrfs/transaction.c | 1 + fs/btrfs/tree-log.c | 1 + fs/btrfs/volumes.c | 1 + 23 files changed, 19 insertions(+), 4 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6df6d6ed74fd..6ef7b26724ec 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "ctree.h" #include "btrfs_inode.h" diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c0861e781cdb..462859a30141 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 28b92a7218ab..629880ec8ac4 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4bc570a396e..8e29260cd0bb 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -17,6 +17,7 @@ */ #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0af2e3868573..8a6518a3f041 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "extent_io.h" #include "extent_map.h" diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 84e6781413b1..902ce507c4e3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -17,6 +17,7 @@ */ #include +#include #include #include "ctree.h" #include "delayed-ref.h" diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11d0ad30e203..e57ded75d910 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1727b26fb194..fa2907d531a2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "compat.h" #include "hash.h" #include "ctree.h" diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c99121ac5d6b..0adceb525167 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 28d87ba60ce8..454ca52d6451 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1,5 +1,4 @@ #include -#include #include #include #include diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9b99886562d0..54a255065aa3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include "ctree.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ee3323c7fc1c..29ff749ff4ca 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index dd831ed31eea..f488fac04d99 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "ctree.h" #include "free-space-cache.h" diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02bb099845fd..2fe494c84e60 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2845c6ceecd2..9b3d73a0fdc8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 1c36e5cd8f55..6151f2ea38bb 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ #include -#include #include #include #include diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a8ffecd0b491..ecb22ff7d1fd 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include #include #include #include diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index d0cc62bccb94..a97314cf6bd6 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -17,6 +17,7 @@ */ #include +#include #include #include "ctree.h" #include "ref-cache.h" diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0b23942cbc0d..e558dd941ded 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9ac612e6ca60..693a664318fe 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2d654c1c794d..c0d0e3e7bc63 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1255fcc8ade5..af57dd2b43d4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -17,6 +17,7 @@ */ #include +#include #include "ctree.h" #include "transaction.h" #include "disk-io.h" diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9df8e3f1ccab..1692ec9cd7b0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -17,6 +17,7 @@ */ #include #include +#include #include #include #include -- cgit v1.2.3 From 2f3014fc2ab1e25c36531e19164c48182c168995 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Thu, 25 Mar 2010 17:22:45 +0000 Subject: Btrfs: remove duplicate include in ioctl.c fs/btrfs/ioctl.c: ctree.h is included more than once. Signed-off-by: Andrea Gelmini Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2845c6ceecd2..5c9f8b30608c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -48,7 +48,6 @@ #include "print-tree.h" #include "volumes.h" #include "locking.h" -#include "ctree.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) -- cgit v1.2.3 From 90d2c51dbb4db05c040bc7db264bb7ab35e35455 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 25 Mar 2010 12:37:12 +0000 Subject: Btrfs: add NULL check for do_walk_down() btrfs_find_create_tree_block() may return NULL, so we must check the returned value, or we will access a NULL pointer. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1727b26fb194..503a18eaef52 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5205,6 +5205,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, next = btrfs_find_tree_block(root, bytenr, blocksize); if (!next) { next = btrfs_find_create_tree_block(root, bytenr, blocksize); + if (!next) + return -ENOMEM; reada = 1; } btrfs_tree_lock(next); @@ -5417,7 +5419,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (ret > 0) { path->slots[level]++; continue; - } + } else if (ret < 0) + return ret; level = wc->level; } return 0; -- cgit v1.2.3 From 471fa17dff556ad38caf26de097c0630530d8cbe Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 25 Mar 2010 12:35:14 +0000 Subject: Btrfs: Remove unnecessary finish_wait() in wait_current_trans() We only need to call finish_wait() after wait loop. By the way, this patch makes code of waiting loop similar to example in wait.h(no functional change) Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2d654c1c794d..43054285f638 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -147,18 +147,13 @@ static void wait_current_trans(struct btrfs_root *root) while (1) { prepare_to_wait(&root->fs_info->transaction_wait, &wait, TASK_UNINTERRUPTIBLE); - if (cur_trans->blocked) { - mutex_unlock(&root->fs_info->trans_mutex); - schedule(); - mutex_lock(&root->fs_info->trans_mutex); - finish_wait(&root->fs_info->transaction_wait, - &wait); - } else { - finish_wait(&root->fs_info->transaction_wait, - &wait); + if (!cur_trans->blocked) break; - } + mutex_unlock(&root->fs_info->trans_mutex); + schedule(); + mutex_lock(&root->fs_info->trans_mutex); } + finish_wait(&root->fs_info->transaction_wait, &wait); put_transaction(cur_trans); } } -- cgit v1.2.3 From ab59381ea43f81c977cbd09add26950aaf6cb9fe Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 25 Mar 2010 12:34:49 +0000 Subject: Btrfs: Add error handle for btrfs_search_slot() in btrfs_read_chunk_tree() We need to check return value of btrfs_search_slot() in btrfs_read_chunk_tree() and do corresponding error handing. Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9df8e3f1ccab..c6c80093eac0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3389,6 +3389,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) key.type = 0; again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto error; while (1) { leaf = path->nodes[0]; slot = path->slots[0]; -- cgit v1.2.3 From f3eae7e8a5ed124bbc781e18ea10c21856017322 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 25 Mar 2010 12:32:59 +0000 Subject: Btrfs: Simplify num_stripes's calculation logical for __btrfs_alloc_chunk() We can use this simple method to make source more readable. Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c6c80093eac0..bf3bec3e4130 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2198,9 +2198,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { - num_stripes = min_t(u64, 2, fs_devices->rw_devices); - if (num_stripes < 2) + if (fs_devices->rw_devices < 2) return -ENOSPC; + num_stripes = 2; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { -- cgit v1.2.3 From 683be16eb6e19a35aca2473668652259ed074094 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:24:48 +0000 Subject: Btrfs: dereferencing freed memory The original code dereferenced range on the next line. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5c9f8b30608c..874d36e5f167 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1374,6 +1374,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) sizeof(*range))) { ret = -EFAULT; kfree(range); + goto out; } /* compression requires us to start the IO */ if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { -- cgit v1.2.3 From c2b96929e2ca6914cf4a66cd8fe2a34c4a98277f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:24:15 +0000 Subject: Btrfs: handle kmalloc() failure in inode lookup ioctl Return -ENOMEM if kmalloc() fails. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 874d36e5f167..74d89133f768 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1211,6 +1211,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, return -EPERM; args = kmalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + if (copy_from_user(args, argp, sizeof(*args))) { kfree(args); return -EFAULT; -- cgit v1.2.3 From 6cf8bfbf5e88edfb09a2bf0631a067060f534592 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:22:10 +0000 Subject: Btrfs: check btrfs_get_extent return for IS_ERR() btrfs_get_extent() never returns NULL, only a valid pointer or ERR_PTR() Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 74d89133f768..2b7dd88fc54f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -510,7 +510,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, em = btrfs_get_extent(inode, NULL, 0, start, len, 0); unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); - if (!em) + if (IS_ERR(em)) return 0; } -- cgit v1.2.3 From 1b1d1f6625e517a08640ddb4b8f8a0e025243fe3 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Mar 2010 20:49:55 +0000 Subject: Btrfs: fail to mount if we have problems reading the block groups We don't actually check the return value of btrfs_read_block_groups, so we can possibly succeed to mount, but then fail to say read the superblock xattr for selinux which will cause the vfs code to deactivate the super. This is a problem because in find_free_extent we just assume that we will find the right space_info for the allocation we want. But if we failed to read the block groups, we won't have setup any space_info's, and we'll hit a NULL pointer deref in find_free_extent. This patch fixes that problem by checking the return value of btrfs_read_block_groups, and failing out properly. I've also added a check in find_free_extent so if for some reason we don't find an appropriate space_info, we just return -ENOSPC. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++++++--- fs/btrfs/extent-tree.c | 5 ++++- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11d0ad30e203..5ae1c0fcfce0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1922,7 +1922,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, csum_root->track_dirty = 1; - btrfs_read_block_groups(extent_root); + ret = btrfs_read_block_groups(extent_root); + if (ret) { + printk(KERN_ERR "Failed to read block groups: %d\n", ret); + goto fail_block_groups; + } fs_info->generation = generation; fs_info->last_trans_committed = generation; @@ -1932,7 +1936,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); if (IS_ERR(fs_info->cleaner_kthread)) - goto fail_csum_root; + goto fail_block_groups; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, @@ -2020,7 +2024,8 @@ fail_cleaner: filemap_write_and_wait(fs_info->btree_inode->i_mapping); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); -fail_csum_root: +fail_block_groups: + btrfs_free_block_groups(fs_info); free_extent_buffer(csum_root->node); free_extent_buffer(csum_root->commit_root); fail_dev_root: diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 503a18eaef52..4c910359c807 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4170,6 +4170,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ins->offset = 0; space_info = __find_space_info(root->fs_info, data); + if (!space_info) { + printk(KERN_ERR "No space info for %d\n", data); + return -ENOSPC; + } if (orig_root->ref_cows || empty_size) allowed_chunk_alloc = 1; @@ -7372,7 +7376,6 @@ static int find_first_block_group(struct btrfs_root *root, } path->slots[0]++; } - ret = -ENOENT; out: return ret; } -- cgit v1.2.3 From 287a0ab91d25ca982f895a76402e5893b47ed7a6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Mar 2010 18:07:23 +0000 Subject: Btrfs: kill max_extent mount option As Yan pointed out, theres not much reason for all this complicated math to account for file extents being split up into max_extent chunks, since they are likely to all end up in the same leaf anyway. Since there isn't much reason to use max_extent, just remove the option altogether so we have one less thing we need to test. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 - fs/btrfs/disk-io.c | 1 - fs/btrfs/extent-tree.c | 4 ++-- fs/btrfs/inode.c | 59 ++++--------------------------------------------- fs/btrfs/ordered-data.c | 6 +++-- fs/btrfs/super.c | 23 +++---------------- 6 files changed, 13 insertions(+), 81 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 11115847d875..ae8c40922c54 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -834,7 +834,6 @@ struct btrfs_fs_info { u64 last_trans_log_full_commit; u64 open_ioctl_trans; unsigned long mount_opt; - u64 max_extent; u64 max_inline; u64 alloc_start; struct btrfs_transaction *running_transaction; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5ae1c0fcfce0..6632e5c4c8bb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1634,7 +1634,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, atomic_set(&fs_info->async_submit_draining, 0); atomic_set(&fs_info->nr_async_bios, 0); fs_info->sb = sb; - fs_info->max_extent = (u64)-1; fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4c910359c807..5f6bc283c0c8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2846,7 +2846,7 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, } spin_unlock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->reserved_extents--; + BTRFS_I(inode)->reserved_extents -= num_items; BUG_ON(BTRFS_I(inode)->reserved_extents < 0); if (meta_sinfo->bytes_delalloc < num_bytes) { @@ -3111,7 +3111,7 @@ again: return -ENOSPC; } - BTRFS_I(inode)->reserved_extents++; + BTRFS_I(inode)->reserved_extents += num_items; check_force_delalloc(meta_sinfo); spin_unlock(&meta_sinfo->lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2a337a09c650..a85b90c86cb0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -796,7 +796,7 @@ static noinline int cow_file_range(struct inode *inode, while (disk_num_bytes > 0) { unsigned long op; - cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); + cur_alloc_size = disk_num_bytes; ret = btrfs_reserve_extent(trans, root, cur_alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); @@ -1227,30 +1227,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static int btrfs_split_extent_hook(struct inode *inode, struct extent_state *orig, u64 split) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 size; - if (!(orig->state & EXTENT_DELALLOC)) return 0; - size = orig->end - orig->start + 1; - if (size > root->fs_info->max_extent) { - u64 num_extents; - u64 new_size; - - new_size = orig->end - split + 1; - num_extents = div64_u64(size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); - - /* - * if we break a large extent up then leave oustanding_extents - * be, since we've already accounted for the large extent. - */ - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) < num_extents) - return 0; - } - spin_lock(&BTRFS_I(inode)->accounting_lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->accounting_lock); @@ -1268,38 +1247,10 @@ static int btrfs_merge_extent_hook(struct inode *inode, struct extent_state *new, struct extent_state *other) { - struct btrfs_root *root = BTRFS_I(inode)->root; - u64 new_size, old_size; - u64 num_extents; - /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) return 0; - old_size = other->end - other->start + 1; - if (new->start < other->start) - new_size = other->end - new->start + 1; - else - new_size = new->end - other->start + 1; - - /* we're not bigger than the max, unreserve the space and go */ - if (new_size <= root->fs_info->max_extent) { - spin_lock(&BTRFS_I(inode)->accounting_lock); - BTRFS_I(inode)->outstanding_extents--; - spin_unlock(&BTRFS_I(inode)->accounting_lock); - return 0; - } - - /* - * If we grew by another max_extent, just return, we want to keep that - * reserved amount. - */ - num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent); - if (div64_u64(new_size + root->fs_info->max_extent - 1, - root->fs_info->max_extent) > num_extents) - return 0; - spin_lock(&BTRFS_I(inode)->accounting_lock); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); @@ -1328,6 +1279,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_delalloc_reserve_space(root, inode, end - start + 1); + spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += end - start + 1; root->fs_info->delalloc_bytes += end - start + 1; @@ -1356,6 +1308,7 @@ static int btrfs_clear_bit_hook(struct inode *inode, if (bits & EXTENT_DO_ACCOUNTING) { spin_lock(&BTRFS_I(inode)->accounting_lock); + WARN_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_unreserve_metadata_for_delalloc(root, inode, 1); @@ -5384,7 +5337,6 @@ free: void btrfs_drop_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; - if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) generic_delete_inode(inode); else @@ -5788,18 +5740,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; - u64 alloc_size; u64 cur_offset = start; u64 num_bytes = end - start; int ret = 0; u64 i_size; while (num_bytes > 0) { - alloc_size = min(num_bytes, root->fs_info->max_extent); - trans = btrfs_start_transaction(root, 1); - ret = btrfs_reserve_extent(trans, root, alloc_size, + ret = btrfs_reserve_extent(trans, root, num_bytes, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a8ffecd0b491..5c99882b9763 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -303,6 +303,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *entry) { struct btrfs_ordered_inode_tree *tree; + struct btrfs_root *root = BTRFS_I(inode)->root; struct rb_node *node; tree = &BTRFS_I(inode)->ordered_tree; @@ -312,12 +313,13 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); spin_lock(&BTRFS_I(inode)->accounting_lock); + WARN_ON(!BTRFS_I(inode)->outstanding_extents); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->accounting_lock); btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, inode, 1); - spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_lock(&root->fs_info->ordered_extent_lock); list_del_init(&entry->root_extent_list); /* @@ -329,7 +331,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { list_del_init(&BTRFS_I(inode)->ordered_operations); } - spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); + spin_unlock(&root->fs_info->ordered_extent_lock); return 0; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9ac612e6ca60..d11b12fc086b 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -64,10 +64,9 @@ static void btrfs_put_super(struct super_block *sb) enum { Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, - Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, - Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, - Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, - Opt_flushoncommit, + Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, + Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, + Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, Opt_err, }; @@ -79,7 +78,6 @@ static match_table_t tokens = { {Opt_nodatasum, "nodatasum"}, {Opt_nodatacow, "nodatacow"}, {Opt_nobarrier, "nobarrier"}, - {Opt_max_extent, "max_extent=%s"}, {Opt_max_inline, "max_inline=%s"}, {Opt_alloc_start, "alloc_start=%s"}, {Opt_thread_pool, "thread_pool=%d"}, @@ -188,18 +186,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) info->thread_pool_size); } break; - case Opt_max_extent: - num = match_strdup(&args[0]); - if (num) { - info->max_extent = memparse(num, NULL); - kfree(num); - - info->max_extent = max_t(u64, - info->max_extent, root->sectorsize); - printk(KERN_INFO "btrfs: max_extent at %llu\n", - (unsigned long long)info->max_extent); - } - break; case Opt_max_inline: num = match_strdup(&args[0]); if (num) { @@ -529,9 +515,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nodatacow"); if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ",nobarrier"); - if (info->max_extent != (u64)-1) - seq_printf(seq, ",max_extent=%llu", - (unsigned long long)info->max_extent); if (info->max_inline != 8192 * 1024) seq_printf(seq, ",max_inline=%llu", (unsigned long long)info->max_inline); -- cgit v1.2.3 From 0cad8a1130f77c7c445e3298c0e3593b3c0ef439 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 17 Mar 2010 20:45:56 +0000 Subject: Btrfs: fix chunk allocate size calculation If the amount of free space left in a device is less than what we think should be the minimum size, just ignore the minimum size and use the amount we have. I ran into this running tests on a 600mb volume, the chunk allocator wouldn't let me allocate the last 52mb of the disk for data because we want to have at least 64mb chunks for data. This patch fixes that problem. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bf3bec3e4130..9bf1f581b872 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2244,8 +2244,10 @@ again: do_div(calc_size, stripe_len); calc_size *= stripe_len; } + /* we don't want tiny stripes */ - calc_size = max_t(u64, min_stripe_size, calc_size); + if (!looped) + calc_size = max_t(u64, min_stripe_size, calc_size); do_div(calc_size, stripe_len); calc_size *= stripe_len; -- cgit v1.2.3 From 28ecb60906e86e74e9ad4ac7e0218d8631e73a94 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 17 Mar 2010 13:31:04 +0000 Subject: Btrfs: use add_to_page_cache_lru, use __page_cache_alloc Pagecache pages should be allocated with __page_cache_alloc, so they obey pagecache memory policies. add_to_page_cache_lru is exported, so it should be used. Benefits over using a private pagevec: neater code, 128 bytes fewer stack used, percpu lru ordering is preserved, and finally don't need to flush pagevec before returning so batching may be shared with other LRU insertions. Signed-off-by: Nick Piggin : Signed-off-by: Chris Mason --- fs/btrfs/compression.c | 22 ++++------------------ fs/btrfs/extent_io.c | 15 +-------------- 2 files changed, 5 insertions(+), 32 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 28b92a7218ab..1d54c5308df5 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -31,7 +31,6 @@ #include #include #include -#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -445,7 +444,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, unsigned long nr_pages = 0; struct extent_map *em; struct address_space *mapping = inode->i_mapping; - struct pagevec pvec; struct extent_map_tree *em_tree; struct extent_io_tree *tree; u64 end; @@ -461,7 +459,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; - pagevec_init(&pvec, 0); while (last_offset < compressed_end) { page_index = last_offset >> PAGE_CACHE_SHIFT; @@ -478,26 +475,17 @@ static noinline int add_ra_bio_pages(struct inode *inode, goto next; } - page = alloc_page(mapping_gfp_mask(mapping) & ~__GFP_FS); + page = __page_cache_alloc(mapping_gfp_mask(mapping) & + ~__GFP_FS); if (!page) break; - page->index = page_index; - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (add_to_page_cache(page, mapping, - page->index, GFP_NOFS)) { + if (add_to_page_cache_lru(page, mapping, page_index, + GFP_NOFS)) { page_cache_release(page); goto next; } - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add_file(&pvec); - end = last_offset + PAGE_CACHE_SIZE - 1; /* * at this point, we have a locked page in the page cache @@ -551,8 +539,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, next: last_offset += PAGE_CACHE_SIZE; } - if (pagevec_count(&pvec)) - __pagevec_lru_add_file(&pvec); return 0; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c99121ac5d6b..fc742e59815e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2679,33 +2679,20 @@ int extent_readpages(struct extent_io_tree *tree, { struct bio *bio = NULL; unsigned page_idx; - struct pagevec pvec; unsigned long bio_flags = 0; - pagevec_init(&pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); prefetchw(&page->flags); list_del(&page->lru); - /* - * what we want to do here is call add_to_page_cache_lru, - * but that isn't exported, so we reproduce it here - */ - if (!add_to_page_cache(page, mapping, + if (!add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) { - - /* open coding of lru_cache_add, also not exported */ - page_cache_get(page); - if (!pagevec_add(&pvec, page)) - __pagevec_lru_add_file(&pvec); __extent_read_full_page(tree, page, get_extent, &bio, 0, &bio_flags); } page_cache_release(page); } - if (pagevec_count(&pvec)) - __pagevec_lru_add_file(&pvec); BUG_ON(!list_empty(pages)); if (bio) submit_one_bio(READ, bio, 0, bio_flags); -- cgit v1.2.3 From b5cb160084fad438c513d0952849e597ffe9e3d9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 12 Mar 2010 19:28:18 +0000 Subject: Btrfs: fix small race with delalloc flushing waitqueue's Everytime we start a new flushing thread, we init the waitqueue if there isn't a flushing thread running. The problem with this is we check space_info->flushing, which we clear right before doing a wake_up on the flushing waitqueue, which causes problems if we init the waitqueue in the middle of clearing the flushing flagh and calling wake_up. This is hard to hit, but the code is wrong anyway, so init the flushing/allocating waitqueue when creating the space info and let it be. I haven't seen the panic since I've been using this patch. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5f6bc283c0c8..101041d4d2b2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2676,6 +2676,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, INIT_LIST_HEAD(&found->block_groups); init_rwsem(&found->groups_sem); + init_waitqueue_head(&found->flush_wait); + init_waitqueue_head(&found->allocate_wait); spin_lock_init(&found->lock); found->flags = flags; found->total_bytes = total_bytes; @@ -2944,12 +2946,10 @@ static void flush_delalloc(struct btrfs_root *root, spin_lock(&info->lock); - if (!info->flushing) { + if (!info->flushing) info->flushing = 1; - init_waitqueue_head(&info->flush_wait); - } else { + else wait = true; - } spin_unlock(&info->lock); @@ -3011,7 +3011,6 @@ static int maybe_allocate_chunk(struct btrfs_root *root, if (!info->allocating_chunk) { info->force_alloc = 1; info->allocating_chunk = 1; - init_waitqueue_head(&info->allocate_wait); } else { wait = true; } -- cgit v1.2.3 From 6bdb72ded1e281cd8844918c39d00cdd0e59f655 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Mar 2010 17:27:13 +0000 Subject: Btrfs: create snapshot references in same commit as snapshot This creates the reference to a new snapshot in the same commit as the snapshot itself. This avoids the need for a second commit in order for a snapshot to be persistent, and also avoids the problem of "leaking" a new snapshot tree root if the host crashes before the second commit takes place. It is not at all clear to me why it wasn't always done this way. If there is still a reason for the two-stage {create,finish}_pending_snapshots() approach I'm missing something! :) I've been running this for a couple weeks under pretty heavy usage (a few snapshots per minute) without obvious problems. Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 97 ++++++++++++++++---------------------------------- 1 file changed, 31 insertions(+), 66 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 43054285f638..01cebd661997 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -755,10 +755,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root_item *new_root_item; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; + struct btrfs_root *parent_root; + struct inode *parent_inode; struct extent_buffer *tmp; struct extent_buffer *old; int ret; u64 objectid; + int namelen; + u64 index = 0; + + parent_inode = pending->dentry->d_parent->d_inode; + parent_root = BTRFS_I(parent_inode)->root; new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { @@ -769,79 +776,59 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, if (ret) goto fail; - record_root_in_trans(trans, root); - btrfs_set_root_last_snapshot(&root->root_item, trans->transid); - memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); - key.objectid = objectid; /* record when the snapshot was created in key.offset */ key.offset = trans->transid; btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - old = btrfs_lock_root_node(root); - btrfs_cow_block(trans, root, old, NULL, 0, &old); - btrfs_set_lock_blocking(old); - - btrfs_copy_root(trans, root, old, &tmp, objectid); - btrfs_tree_unlock(old); - free_extent_buffer(old); - - btrfs_set_root_node(new_root_item, tmp); - ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, - new_root_item); - btrfs_tree_unlock(tmp); - free_extent_buffer(tmp); - if (ret) - goto fail; - - key.offset = (u64)-1; memcpy(&pending->root_key, &key, sizeof(key)); -fail: - kfree(new_root_item); - return ret; -} - -static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, - struct btrfs_pending_snapshot *pending) -{ - int ret; - int namelen; - u64 index = 0; - struct btrfs_trans_handle *trans; - struct inode *parent_inode; - struct btrfs_root *parent_root; - - parent_inode = pending->dentry->d_parent->d_inode; - parent_root = BTRFS_I(parent_inode)->root; - trans = btrfs_join_transaction(parent_root, 1); + pending->root_key.offset = (u64)-1; + record_root_in_trans(trans, parent_root); /* * insert the directory item */ namelen = strlen(pending->name); ret = btrfs_set_inode_index(parent_inode, &index); + BUG_ON(ret); ret = btrfs_insert_dir_item(trans, parent_root, pending->name, namelen, parent_inode->i_ino, &pending->root_key, BTRFS_FT_DIR, index); - - if (ret) - goto fail; + BUG_ON(ret); btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); ret = btrfs_update_inode(trans, parent_root, parent_inode); BUG_ON(ret); + record_root_in_trans(trans, root); + btrfs_set_root_last_snapshot(&root->root_item, trans->transid); + memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); + + old = btrfs_lock_root_node(root); + btrfs_cow_block(trans, root, old, NULL, 0, &old); + btrfs_set_lock_blocking(old); + + btrfs_copy_root(trans, root, old, &tmp, objectid); + btrfs_tree_unlock(old); + free_extent_buffer(old); + + btrfs_set_root_node(new_root_item, tmp); + ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, + new_root_item); + BUG_ON(ret); + btrfs_tree_unlock(tmp); + free_extent_buffer(tmp); + ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, pending->root_key.objectid, parent_root->root_key.objectid, parent_inode->i_ino, index, pending->name, namelen); - BUG_ON(ret); fail: - btrfs_end_transaction(trans, fs_info->fs_root); + kfree(new_root_item); return ret; } @@ -862,25 +849,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, return 0; } -static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) -{ - struct btrfs_pending_snapshot *pending; - struct list_head *head = &trans->transaction->pending_snapshots; - int ret; - - while (!list_empty(head)) { - pending = list_entry(head->next, - struct btrfs_pending_snapshot, list); - ret = finish_pending_snapshot(fs_info, pending); - BUG_ON(ret); - list_del(&pending->list); - kfree(pending->name); - kfree(pending); - } - return 0; -} - static void update_super_roots(struct btrfs_root *root) { struct btrfs_root_item *root_item; @@ -1092,9 +1060,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, btrfs_finish_extent_commit(trans, root); - /* do the directory inserts of any pending snapshot creations */ - finish_pending_snapshots(trans, root->fs_info); - mutex_lock(&root->fs_info->trans_mutex); cur_trans->commit_done = 1; -- cgit v1.2.3 From 109f6aef5fc436f355ad027f4d97bd696df2049a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 2 Apr 2010 09:20:18 -0400 Subject: Btrfs: add check for changed leaves in setup_leaf_for_split setup_leaf_for_split needs to drop the path and search again, and has checks to see if the item we want to split changed size. But, it misses the case where the leaf changed and now has enough room for the item we want to insert. This adds an extra check to make sure the leaf really needs splitting before we call btrfs_split_leaf(), which keeps us from trying to split a leaf with a single item. btrfs_split_leaf() will blindly split the single item leaf, leaving us with one good leaf and one empty leaf and then a crash. Signed-off-by: Chris Mason --- fs/btrfs/ctree.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4bc570a396e..babf7fbaec84 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -3040,6 +3040,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) goto err; + /* the leaf has changed, it now has room. return now */ + if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) + goto err; + if (key.type == BTRFS_EXTENT_DATA_KEY) { fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); -- cgit v1.2.3 From ab6e24103cbd215e922938a4f58c75194761a60e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 19 Mar 2010 14:38:13 +0000 Subject: Btrfs: fix data enospc check overflow Because we account for reserved space we get from the allocator before we actually account for allocating delalloc space, we can have a small window where the amount of "used" space in a space_info is more than the total amount of space in the space_info. This will cause a overflow in our check, so it will seem like we have _tons_ of free space, and we'll allow reservations to occur that will end up larger than the amount of space we have. I've seen users report ENOSPC panic's in cow_file_range a few times recently, so I tried to reproduce this problem and found I could reproduce it if I ran one of my tests in a loop for like 20 minutes. With this patch my test ran all night without issues. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'fs/btrfs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 101041d4d2b2..786899da3eb9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3234,7 +3234,8 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, u64 bytes) { struct btrfs_space_info *data_sinfo; - int ret = 0, committed = 0; + u64 used; + int ret = 0, committed = 0, flushed = 0; /* make sure bytes are sectorsize aligned */ bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); @@ -3246,12 +3247,21 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, again: /* make sure we have enough space to handle the data first */ spin_lock(&data_sinfo->lock); - if (data_sinfo->total_bytes - data_sinfo->bytes_used - - data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - - data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { + used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc + + data_sinfo->bytes_reserved + data_sinfo->bytes_pinned + + data_sinfo->bytes_readonly + data_sinfo->bytes_may_use + + data_sinfo->bytes_super; + + if (used + bytes > data_sinfo->total_bytes) { struct btrfs_trans_handle *trans; + if (!flushed) { + spin_unlock(&data_sinfo->lock); + flush_delalloc(root, data_sinfo); + flushed = 1; + goto again; + } + /* * if we don't have enough free bytes in this space then we need * to alloc a new chunk. -- cgit v1.2.3 From 9f680ce04ea19dabbbafe01b57b61930a9b70741 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 6 Apr 2010 09:37:47 -0400 Subject: Btrfs: make sure the chunk allocator doesn't create zero length chunks A recent commit allowed for smaller chunks to be created, but didn't make sure they were always bigger than a stripe. After some divides, this led to zero length stripes. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9bf1f581b872..b584e9a2add2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2249,6 +2249,12 @@ again: if (!looped) calc_size = max_t(u64, min_stripe_size, calc_size); + /* + * we're about to do_div by the stripe_len so lets make sure + * we end up with something bigger than a stripe + */ + calc_size = max_t(u64, calc_size, stripe_len * 4); + do_div(calc_size, stripe_len); calc_size *= stripe_len; -- cgit v1.2.3