From 23d79d81b13431756fee428acde49c9b770da132 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 02:55:29 +0200 Subject: btrfs: use GFP_NOFS in __alloc_extent_buffer directly Same mask from all callers. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4ebabd237153..619592d86c2a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4598,11 +4598,11 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb) static struct extent_buffer * __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, - unsigned long len, gfp_t mask) + unsigned long len) { struct extent_buffer *eb = NULL; - eb = kmem_cache_zalloc(extent_buffer_cache, mask); + eb = kmem_cache_zalloc(extent_buffer_cache, GFP_NOFS); if (eb == NULL) return NULL; eb->start = start; @@ -4643,7 +4643,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) struct extent_buffer *new; unsigned long num_pages = num_extent_pages(src->start, src->len); - new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS); + new = __alloc_extent_buffer(NULL, src->start, src->len); if (new == NULL) return NULL; @@ -4672,7 +4672,7 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) unsigned long num_pages = num_extent_pages(0, len); unsigned long i; - eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); + eb = __alloc_extent_buffer(NULL, start, len); if (!eb) return NULL; @@ -4824,7 +4824,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, if (eb) return eb; - eb = __alloc_extent_buffer(fs_info, start, len, GFP_NOFS); + eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) return NULL; -- cgit v1.2.3 From 3f556f7853ec4845a7c219d026cbcdf4cfa8cea7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 03:20:26 +0200 Subject: btrfs: unify extent buffer allocation api Make the extent buffer allocation interface consistent. Cloned eb will set a valid fs_info. For dummy eb, we can drop the length parameter and set it from fs_info. The built-in sanity checks may pass a NULL fs_info that's queried for nodesize, but we know it's 4096. Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 5 ++--- fs/btrfs/extent_io.c | 23 ++++++++++++++++++----- fs/btrfs/extent_io.h | 3 ++- fs/btrfs/tests/extent-buffer-tests.c | 2 +- fs/btrfs/tests/inode-tests.c | 4 ++-- fs/btrfs/tests/qgroup-tests.c | 21 ++++++++++----------- 6 files changed, 35 insertions(+), 23 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 50eca331812c..276d4187cbf0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1363,8 +1363,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm->slot != 0); - eb_rewin = alloc_dummy_extent_buffer(eb->start, - fs_info->tree_root->nodesize); + eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start); if (!eb_rewin) { btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); @@ -1444,7 +1443,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) } else if (old_root) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - eb = alloc_dummy_extent_buffer(logical, root->nodesize); + eb = alloc_dummy_extent_buffer(root->fs_info, logical); } else { btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK); eb = btrfs_clone_extent_buffer(eb_root); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 619592d86c2a..dc424e32545a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4643,7 +4643,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) struct extent_buffer *new; unsigned long num_pages = num_extent_pages(src->start, src->len); - new = __alloc_extent_buffer(NULL, src->start, src->len); + new = __alloc_extent_buffer(src->fs_info, src->start, src->len); if (new == NULL) return NULL; @@ -4666,13 +4666,26 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) return new; } -struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) +struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start) { struct extent_buffer *eb; - unsigned long num_pages = num_extent_pages(0, len); + unsigned long len; + unsigned long num_pages; unsigned long i; - eb = __alloc_extent_buffer(NULL, start, len); + if (!fs_info) { + /* + * Called only from tests that don't always have a fs_info + * available, but we know that nodesize is 4096 + */ + len = 4096; + } else { + len = fs_info->tree_root->nodesize; + } + num_pages = num_extent_pages(0, len); + + eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) return NULL; @@ -4770,7 +4783,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, eb = find_extent_buffer(fs_info, start); if (eb) return eb; - eb = alloc_dummy_extent_buffer(start, len); + eb = alloc_dummy_extent_buffer(fs_info, start); if (!eb) return NULL; eb->fs_info = fs_info; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index ece9ce87edff..e6553e3d35c8 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -263,7 +263,8 @@ void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, unsigned long len); -struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len); +struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index cc286ce97d1e..f51963a8f929 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -53,7 +53,7 @@ static int test_btrfs_split_item(void) return -ENOMEM; } - path->nodes[0] = eb = alloc_dummy_extent_buffer(0, 4096); + path->nodes[0] = eb = alloc_dummy_extent_buffer(NULL, 4096); if (!eb) { test_msg("Could not allocate dummy buffer\n"); ret = -ENOMEM; diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 3ae0f5b8bb80..a116b55ce788 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -255,7 +255,7 @@ static noinline int test_btrfs_get_extent(void) goto out; } - root->node = alloc_dummy_extent_buffer(0, 4096); + root->node = alloc_dummy_extent_buffer(NULL, 4096); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; @@ -843,7 +843,7 @@ static int test_hole_first(void) goto out; } - root->node = alloc_dummy_extent_buffer(0, 4096); + root->node = alloc_dummy_extent_buffer(NULL, 4096); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); goto out; diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index ec3dcb202357..7336b1c09cd8 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -404,6 +404,16 @@ int btrfs_test_qgroups(void) ret = -ENOMEM; goto out; } + /* We are using this root as our extent root */ + root->fs_info->extent_root = root; + + /* + * Some of the paths we test assume we have a filled out fs_info, so we + * just need to add the root in there so we don't panic. + */ + root->fs_info->tree_root = root; + root->fs_info->quota_root = root; + root->fs_info->quota_enabled = 1; /* * Can't use bytenr 0, some things freak out @@ -448,17 +458,6 @@ int btrfs_test_qgroups(void) goto out; } - /* We are using this root as our extent root */ - root->fs_info->extent_root = root; - - /* - * Some of the paths we test assume we have a filled out fs_info, so we - * just need to addt he root in there so we don't panic. - */ - root->fs_info->tree_root = root; - root->fs_info->quota_root = root; - root->fs_info->quota_enabled = 1; - test_msg("Running qgroup tests\n"); ret = test_no_shared_qgroup(root); if (ret) -- cgit v1.2.3 From ce3e69847e3ec79a38421bfd3d6f554d5e481231 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 03:00:04 +0200 Subject: btrfs: sink parameter len to alloc_extent_buffer Because we're using globally known nodesize. Do the same for the sanity test function variant. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 ++--- fs/btrfs/extent_io.c | 5 +++-- fs/btrfs/extent_io.h | 4 ++-- fs/btrfs/tests/qgroup-tests.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 548cb540e516..9c204533fd22 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1128,9 +1128,8 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr) { if (btrfs_test_is_dummy_root(root)) - return alloc_test_extent_buffer(root->fs_info, bytenr, - root->nodesize); - return alloc_extent_buffer(root->fs_info, bytenr, root->nodesize); + return alloc_test_extent_buffer(root->fs_info, bytenr); + return alloc_extent_buffer(root->fs_info, bytenr); } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dc424e32545a..c4ca90ab687e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4775,7 +4775,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start, unsigned long len) + u64 start) { struct extent_buffer *eb, *exists = NULL; int ret; @@ -4821,8 +4821,9 @@ free_eb: #endif struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start, unsigned long len) + u64 start) { + unsigned long len = fs_info->tree_root->nodesize; unsigned long num_pages = num_extent_pages(start, len); unsigned long i; unsigned long index = start >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e6553e3d35c8..71268e508b7a 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -262,7 +262,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start, unsigned long len); + u64 start); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); @@ -378,5 +378,5 @@ noinline u64 find_lock_delalloc_range(struct inode *inode, u64 *end, u64 max_bytes); #endif struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start, unsigned long len); + u64 start); #endif diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 7336b1c09cd8..73f299ebdabb 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -419,7 +419,7 @@ int btrfs_test_qgroups(void) * Can't use bytenr 0, some things freak out * *cough*backref walking code*cough* */ - root->node = alloc_test_extent_buffer(root->fs_info, 4096, 4096); + root->node = alloc_test_extent_buffer(root->fs_info, 4096); if (!root->node) { test_msg("Couldn't allocate dummy buffer\n"); ret = -ENOMEM; -- cgit v1.2.3 From 9ee49a047dc53fd21808cbb7f3b6a3345463e834 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 14 Jan 2015 19:52:13 +0100 Subject: btrfs: switch extent_state state to unsigned Currently there's a 4B hole in the structure between refs and state and there are only 16 bits used so we can make it unsigned. This will get a better packing and may save some stack space for local variables. The size of extent_state gets reduced by 8B and there are usually a lot of slab objects. struct extent_state { u64 start; /* 0 8 */ u64 end; /* 8 8 */ struct rb_node rb_node; /* 16 24 */ wait_queue_head_t wq; /* 40 24 */ /* --- cacheline 1 boundary (64 bytes) --- */ atomic_t refs; /* 64 4 */ /* XXX 4 bytes hole, try to pack */ long unsigned int state; /* 72 8 */ u64 private; /* 80 8 */ /* size: 88, cachelines: 2, members: 7 */ /* sum members: 84, holes: 1, sum holes: 4 */ /* last cacheline: 24 bytes */ }; Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 45 ++++++++++++++++--------------- fs/btrfs/extent_io.h | 58 ++++++++++++++++++++-------------------- fs/btrfs/inode.c | 4 +-- fs/btrfs/tests/extent-io-tests.c | 3 +-- 4 files changed, 55 insertions(+), 55 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c4ca90ab687e..65bd285ef361 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -64,7 +64,7 @@ void btrfs_leak_debug_check(void) while (!list_empty(&states)) { state = list_entry(states.next, struct extent_state, leak_list); - pr_err("BTRFS: state leak: start %llu end %llu state %lu in tree %d refs %d\n", + pr_err("BTRFS: state leak: start %llu end %llu state %u in tree %d refs %d\n", state->start, state->end, state->state, extent_state_in_tree(state), atomic_read(&state->refs)); @@ -396,21 +396,21 @@ static void merge_state(struct extent_io_tree *tree, } static void set_state_cb(struct extent_io_tree *tree, - struct extent_state *state, unsigned long *bits) + struct extent_state *state, unsigned *bits) { if (tree->ops && tree->ops->set_bit_hook) tree->ops->set_bit_hook(tree->mapping->host, state, bits); } static void clear_state_cb(struct extent_io_tree *tree, - struct extent_state *state, unsigned long *bits) + struct extent_state *state, unsigned *bits) { if (tree->ops && tree->ops->clear_bit_hook) tree->ops->clear_bit_hook(tree->mapping->host, state, bits); } static void set_state_bits(struct extent_io_tree *tree, - struct extent_state *state, unsigned long *bits); + struct extent_state *state, unsigned *bits); /* * insert an extent_state struct into the tree. 'bits' are set on the @@ -426,7 +426,7 @@ static int insert_state(struct extent_io_tree *tree, struct extent_state *state, u64 start, u64 end, struct rb_node ***p, struct rb_node **parent, - unsigned long *bits) + unsigned *bits) { struct rb_node *node; @@ -511,10 +511,10 @@ static struct extent_state *next_state(struct extent_state *state) */ static struct extent_state *clear_state_bit(struct extent_io_tree *tree, struct extent_state *state, - unsigned long *bits, int wake) + unsigned *bits, int wake) { struct extent_state *next; - unsigned long bits_to_clear = *bits & ~EXTENT_CTLBITS; + unsigned bits_to_clear = *bits & ~EXTENT_CTLBITS; if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; @@ -570,7 +570,7 @@ static void extent_io_tree_panic(struct extent_io_tree *tree, int err) * This takes the tree lock, and returns 0 on success and < 0 on error. */ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, int wake, int delete, + unsigned bits, int wake, int delete, struct extent_state **cached_state, gfp_t mask) { @@ -789,9 +789,9 @@ out: static void set_state_bits(struct extent_io_tree *tree, struct extent_state *state, - unsigned long *bits) + unsigned *bits) { - unsigned long bits_to_set = *bits & ~EXTENT_CTLBITS; + unsigned bits_to_set = *bits & ~EXTENT_CTLBITS; set_state_cb(tree, state, bits); if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { @@ -803,7 +803,7 @@ static void set_state_bits(struct extent_io_tree *tree, static void cache_state_if_flags(struct extent_state *state, struct extent_state **cached_ptr, - const u64 flags) + unsigned flags) { if (cached_ptr && !(*cached_ptr)) { if (!flags || (state->state & flags)) { @@ -833,7 +833,7 @@ static void cache_state(struct extent_state *state, static int __must_check __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, unsigned long exclusive_bits, + unsigned bits, unsigned exclusive_bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask) { @@ -1034,7 +1034,7 @@ search_again: } int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, u64 * failed_start, + unsigned bits, u64 * failed_start, struct extent_state **cached_state, gfp_t mask) { return __set_extent_bit(tree, start, end, bits, 0, failed_start, @@ -1060,7 +1060,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, * boundary bits like LOCK. */ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, unsigned long clear_bits, + unsigned bits, unsigned clear_bits, struct extent_state **cached_state, gfp_t mask) { struct extent_state *state; @@ -1268,14 +1268,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, } int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, gfp_t mask) + unsigned bits, gfp_t mask) { return set_extent_bit(tree, start, end, bits, NULL, NULL, mask); } int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, gfp_t mask) + unsigned bits, gfp_t mask) { return clear_extent_bit(tree, start, end, bits, 0, 0, NULL, mask); } @@ -1330,10 +1330,11 @@ int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, * us if waiting is desired. */ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, struct extent_state **cached_state) + unsigned bits, struct extent_state **cached_state) { int err; u64 failed_start; + while (1) { err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, EXTENT_LOCKED, &failed_start, @@ -1440,7 +1441,7 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) */ static struct extent_state * find_first_extent_bit_state(struct extent_io_tree *tree, - u64 start, unsigned long bits) + u64 start, unsigned bits) { struct rb_node *node; struct extent_state *state; @@ -1474,7 +1475,7 @@ out: * If nothing was found, 1 is returned. If found something, return 0. */ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, unsigned long bits, + u64 *start_ret, u64 *end_ret, unsigned bits, struct extent_state **cached_state) { struct extent_state *state; @@ -1753,7 +1754,7 @@ out_failed: int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, struct page *locked_page, - unsigned long clear_bits, + unsigned clear_bits, unsigned long page_ops) { struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; @@ -1810,7 +1811,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, */ u64 count_range_bits(struct extent_io_tree *tree, u64 *start, u64 search_end, u64 max_bytes, - unsigned long bits, int contig) + unsigned bits, int contig) { struct rb_node *node; struct extent_state *state; @@ -1928,7 +1929,7 @@ out: * range is found set. */ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, int filled, struct extent_state *cached) + unsigned bits, int filled, struct extent_state *cached) { struct extent_state *state = NULL; struct rb_node *node; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 71268e508b7a..695b0ccfb755 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -4,22 +4,22 @@ #include /* bits for the extent state */ -#define EXTENT_DIRTY 1 -#define EXTENT_WRITEBACK (1 << 1) -#define EXTENT_UPTODATE (1 << 2) -#define EXTENT_LOCKED (1 << 3) -#define EXTENT_NEW (1 << 4) -#define EXTENT_DELALLOC (1 << 5) -#define EXTENT_DEFRAG (1 << 6) -#define EXTENT_BOUNDARY (1 << 9) -#define EXTENT_NODATASUM (1 << 10) -#define EXTENT_DO_ACCOUNTING (1 << 11) -#define EXTENT_FIRST_DELALLOC (1 << 12) -#define EXTENT_NEED_WAIT (1 << 13) -#define EXTENT_DAMAGED (1 << 14) -#define EXTENT_NORESERVE (1 << 15) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) -#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) +#define EXTENT_DIRTY (1U << 0) +#define EXTENT_WRITEBACK (1U << 1) +#define EXTENT_UPTODATE (1U << 2) +#define EXTENT_LOCKED (1U << 3) +#define EXTENT_NEW (1U << 4) +#define EXTENT_DELALLOC (1U << 5) +#define EXTENT_DEFRAG (1U << 6) +#define EXTENT_BOUNDARY (1U << 9) +#define EXTENT_NODATASUM (1U << 10) +#define EXTENT_DO_ACCOUNTING (1U << 11) +#define EXTENT_FIRST_DELALLOC (1U << 12) +#define EXTENT_NEED_WAIT (1U << 13) +#define EXTENT_DAMAGED (1U << 14) +#define EXTENT_NORESERVE (1U << 15) +#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) /* * flags for bio submission. The high bits indicate the compression @@ -81,9 +81,9 @@ struct extent_io_ops { int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); void (*set_bit_hook)(struct inode *inode, struct extent_state *state, - unsigned long *bits); + unsigned *bits); void (*clear_bit_hook)(struct inode *inode, struct extent_state *state, - unsigned long *bits); + unsigned *bits); void (*merge_extent_hook)(struct inode *inode, struct extent_state *new, struct extent_state *other); @@ -108,7 +108,7 @@ struct extent_state { /* ADD NEW ELEMENTS AFTER THIS */ wait_queue_head_t wq; atomic_t refs; - unsigned long state; + unsigned state; /* for use by the FS */ u64 private; @@ -188,7 +188,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, int try_release_extent_buffer(struct page *page); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, struct extent_state **cached); + unsigned bits, struct extent_state **cached); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached, gfp_t mask); @@ -202,21 +202,21 @@ void extent_io_exit(void); u64 count_range_bits(struct extent_io_tree *tree, u64 *start, u64 search_end, - u64 max_bytes, unsigned long bits, int contig); + u64 max_bytes, unsigned bits, int contig); void free_extent_state(struct extent_state *state); int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, int filled, + unsigned bits, int filled, struct extent_state *cached_state); int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, gfp_t mask); + unsigned bits, gfp_t mask); int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, int wake, int delete, + unsigned bits, int wake, int delete, struct extent_state **cached, gfp_t mask); int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, gfp_t mask); + unsigned bits, gfp_t mask); int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, u64 *failed_start, + unsigned bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); @@ -229,14 +229,14 @@ int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - unsigned long bits, unsigned long clear_bits, + unsigned bits, unsigned clear_bits, struct extent_state **cached_state, gfp_t mask); int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); int find_first_extent_bit(struct extent_io_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, unsigned long bits, + u64 *start_ret, u64 *end_ret, unsigned bits, struct extent_state **cached_state); int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); @@ -323,7 +323,7 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, struct page *locked_page, - unsigned long bits_to_clear, + unsigned bits_to_clear, unsigned long page_ops); struct bio * btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5e529208ffea..220f0c3f2df6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1604,7 +1604,7 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root, * have pending delalloc work to be done. */ static void btrfs_set_bit_hook(struct inode *inode, - struct extent_state *state, unsigned long *bits) + struct extent_state *state, unsigned *bits) { if ((*bits & EXTENT_DEFRAG) && !(*bits & EXTENT_DELALLOC)) @@ -1645,7 +1645,7 @@ static void btrfs_set_bit_hook(struct inode *inode, */ static void btrfs_clear_bit_hook(struct inode *inode, struct extent_state *state, - unsigned long *bits) + unsigned *bits) { u64 len = state->end + 1 - state->start; diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 7e99c2f98dd0..9e9f2368177d 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -258,8 +258,7 @@ static int test_find_delalloc(void) } ret = 0; out_bits: - clear_extent_bits(&tmp, 0, total_dirty - 1, - (unsigned long)-1, GFP_NOFS); + clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_NOFS); out: if (locked_page) page_cache_release(locked_page); -- cgit v1.2.3 From 6e9606d2a2dce098c1739fb3cd82a1c34fd73d3a Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Tue, 20 Jan 2015 15:11:34 +0800 Subject: Btrfs: add ref_count and free function for btrfs_bio 1: ref_count is simple than current RBIO_HOLD_BBIO_MAP_BIT flag to keep btrfs_bio's memory in raid56 recovery implement. 2: free function for bbio will make code clean and flexible, plus forced data type checking in compile. Changelog v1->v2: Rename following by David Sterba's suggestion: put_btrfs_bio() -> btrfs_put_bio() get_btrfs_bio() -> btrfs_get_bio() bbio->ref_count -> bbio->refs Signed-off-by: Zhao Lei Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 2 +- fs/btrfs/extent_io.c | 2 +- fs/btrfs/raid56.c | 38 ++++++++------------------------------ fs/btrfs/reada.c | 4 ++-- fs/btrfs/scrub.c | 12 ++++++------ fs/btrfs/volumes.c | 43 ++++++++++++++++++++++++++++++++++++------- fs/btrfs/volumes.h | 10 +++------- 7 files changed, 57 insertions(+), 54 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3af53f8313af..1d361ac9abc9 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1926,7 +1926,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, */ ret = 0; } - kfree(bbio); + btrfs_put_bbio(bbio); } if (actual_bytes) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 65bd285ef361..dab8af4450e1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2058,7 +2058,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, sector = bbio->stripes[mirror_num-1].physical >> 9; bio->bi_iter.bi_sector = sector; dev = bbio->stripes[mirror_num-1].dev; - kfree(bbio); + btrfs_put_bbio(bbio); if (!dev || !dev->bdev || !dev->writeable) { bio_put(bio); return -EIO; diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index e301d3302edf..cbc416204452 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -58,15 +58,6 @@ */ #define RBIO_CACHE_READY_BIT 3 -/* - * bbio and raid_map is managed by the caller, so we shouldn't free - * them here. And besides that, all rbios with this flag should not - * be cached, because we need raid_map to check the rbios' stripe - * is the same or not, but it is very likely that the caller has - * free raid_map, so don't cache those rbios. - */ -#define RBIO_HOLD_BBIO_MAP_BIT 4 - #define RBIO_CACHE_SIZE 1024 enum btrfs_rbio_ops { @@ -834,19 +825,6 @@ done_nolock: remove_rbio_from_cache(rbio); } -static inline void -__free_bbio(struct btrfs_bio *bbio, int need) -{ - if (need) - kfree(bbio); -} - -static inline void free_bbio(struct btrfs_raid_bio *rbio) -{ - __free_bbio(rbio->bbio, - !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)); -} - static void __free_raid_bio(struct btrfs_raid_bio *rbio) { int i; @@ -866,8 +844,7 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) } } - free_bbio(rbio); - + btrfs_put_bbio(rbio->bbio); kfree(rbio); } @@ -1774,7 +1751,7 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio, rbio = alloc_rbio(root, bbio, stripe_len); if (IS_ERR(rbio)) { - __free_bbio(bbio, 1); + btrfs_put_bbio(bbio); return PTR_ERR(rbio); } bio_list_add(&rbio->bio_list, bio); @@ -1990,8 +1967,7 @@ cleanup: cleanup_io: if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { - if (err == 0 && - !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags)) + if (err == 0) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); @@ -2153,7 +2129,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio = alloc_rbio(root, bbio, stripe_len); if (IS_ERR(rbio)) { - __free_bbio(bbio, generic_io); + if (generic_io) + btrfs_put_bbio(bbio); return PTR_ERR(rbio); } @@ -2164,7 +2141,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, rbio->faila = find_logical_bio_stripe(rbio, bio); if (rbio->faila == -1) { BUG(); - __free_bbio(bbio, generic_io); + if (generic_io) + btrfs_put_bbio(bbio); kfree(rbio); return -EIO; } @@ -2173,7 +2151,7 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio, btrfs_bio_counter_inc_noblocked(root->fs_info); rbio->generic_bio_cnt = 1; } else { - set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags); + btrfs_get_bbio(bbio); } /* diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 4d3d4e5287c5..0e7beea92b4c 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -461,7 +461,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, spin_unlock(&fs_info->reada_lock); btrfs_dev_replace_unlock(&fs_info->dev_replace); - kfree(bbio); + btrfs_put_bbio(bbio); return re; error: @@ -486,7 +486,7 @@ error: kref_put(&zone->refcnt, reada_zone_release); spin_unlock(&fs_info->reada_lock); } - kfree(bbio); + btrfs_put_bbio(bbio); kfree(re); return re_exist; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9d07c981ec82..1388e7127eea 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -855,7 +855,7 @@ static inline void scrub_get_recover(struct scrub_recover *recover) static inline void scrub_put_recover(struct scrub_recover *recover) { if (atomic_dec_and_test(&recover->refs)) { - kfree(recover->bbio); + btrfs_put_bbio(recover->bbio); kfree(recover); } } @@ -1373,13 +1373,13 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, ret = btrfs_map_sblock(fs_info, REQ_GET_READ_MIRRORS, logical, &mapped_length, &bbio, 0, 1); if (ret || !bbio || mapped_length < sublen) { - kfree(bbio); + btrfs_put_bbio(bbio); return -EIO; } recover = kzalloc(sizeof(struct scrub_recover), GFP_NOFS); if (!recover) { - kfree(bbio); + btrfs_put_bbio(bbio); return -ENOMEM; } @@ -2748,7 +2748,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) rbio_out: bio_put(bio); bbio_out: - kfree(bbio); + btrfs_put_bbio(bbio); bitmap_or(sparity->ebitmap, sparity->ebitmap, sparity->dbitmap, sparity->nsectors); spin_lock(&sctx->stat_lock); @@ -3879,14 +3879,14 @@ static void scrub_remap_extent(struct btrfs_fs_info *fs_info, &mapped_length, &bbio, 0); if (ret || !bbio || mapped_length < extent_len || !bbio->stripes[0].dev->bdev) { - kfree(bbio); + btrfs_put_bbio(bbio); return; } *extent_physical = bbio->stripes[0].physical; *extent_mirror_num = bbio->mirror_num; *extent_dev = bbio->stripes[0].dev; - kfree(bbio); + btrfs_put_bbio(bbio); } static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c0f1d524c371..0843bcd6304c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4901,6 +4901,37 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) } } +static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) +{ + struct btrfs_bio *bbio = kzalloc( + sizeof(struct btrfs_bio) + + sizeof(struct btrfs_bio_stripe) * (total_stripes) + + sizeof(int) * (real_stripes) + + sizeof(u64) * (real_stripes), + GFP_NOFS); + if (!bbio) + return NULL; + + atomic_set(&bbio->error, 0); + atomic_set(&bbio->refs, 1); + + return bbio; +} + +void btrfs_get_bbio(struct btrfs_bio *bbio) +{ + WARN_ON(!atomic_read(&bbio->refs)); + atomic_inc(&bbio->refs); +} + +void btrfs_put_bbio(struct btrfs_bio *bbio) +{ + if (!bbio) + return; + if (atomic_dec_and_test(&bbio->refs)) + kfree(bbio); +} + static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, @@ -5052,7 +5083,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, * is not left of the left cursor */ ret = -EIO; - kfree(tmp_bbio); + btrfs_put_bbio(tmp_bbio); goto out; } @@ -5087,11 +5118,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } else { WARN_ON(1); ret = -EIO; - kfree(tmp_bbio); + btrfs_put_bbio(tmp_bbio); goto out; } - kfree(tmp_bbio); + btrfs_put_bbio(tmp_bbio); } else if (mirror_num > map->num_stripes) { mirror_num = 0; } @@ -5213,13 +5244,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, tgtdev_indexes = num_stripes; } - bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), - GFP_NOFS); + bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes); if (!bbio) { ret = -ENOMEM; goto out; } - atomic_set(&bbio->error, 0); if (dev_replace_is_ongoing) bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); @@ -5558,7 +5587,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e bio_endio_nodec(bio, err); else bio_endio(bio, err); - kfree(bbio); + btrfs_put_bbio(bbio); } static void btrfs_end_bio(struct bio *bio, int err) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index fb0e8c3f296e..4313e8ff91e5 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -295,6 +295,7 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); #define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) struct btrfs_bio { + atomic_t refs; atomic_t stripes_pending; struct btrfs_fs_info *fs_info; bio_end_io_t *end_io; @@ -394,13 +395,8 @@ struct btrfs_balance_control { int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, u64 end, u64 *length); - -#define btrfs_bio_size(total_stripes, real_stripes) \ - (sizeof(struct btrfs_bio) + \ - (sizeof(struct btrfs_bio_stripe) * (total_stripes)) + \ - (sizeof(int) * (real_stripes)) + \ - (sizeof(u64) * (real_stripes))) - +void btrfs_get_bbio(struct btrfs_bio *bbio); +void btrfs_put_bbio(struct btrfs_bio *bbio); int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num); -- cgit v1.2.3 From 289454ad26a2d752e04b07234a175feda9ec0f4e Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 6 Jan 2015 01:01:03 +0900 Subject: btrfs: clear bio reference after submit_one_bio() After submit_one_bio(), `bio' can go away. However submit_extent_page() leave `bio' referable if submit_one_bio() failed (e.g. -ENOMEM on OOM). It will cause invalid paging request when submit_extent_page() is called next time. I reproduced ENOMEM case with the following script (need CONFIG_FAIL_PAGE_ALLOC, and CONFIG_FAULT_INJECTION_DEBUG_FS). #!/bin/bash dmesgout=dmesg.txt start=100000 end=300000 step=1000 # btrfs options device=/dev/vdb1 directory=/mnt/btrfs # fault-injection options percent=100 times=3 mkdir -p $directory || exit 1 mount -o compress $device $directory || exit 1 rm -f $directory/file || exit 1 dd if=/dev/zero of=$directory/file bs=1M count=512 || exit 1 for interval in `seq $start $step $end`; do dmesg -C echo 1 > /proc/sys/vm/drop_caches sync export FAILCMD_TYPE=fail_page_alloc ./failcmd.sh -p $percent -t $times -i $interval \ --ignore-gfp-highmem=N --ignore-gfp-wait=N --min-order=0 \ -- \ cat $directory/file > /dev/null dmesg > ${dmesgout} if grep -q BUG: ${dmesgout}; then cat ${dmesgout} exit 1 fi done umount $directory exit 0 Signed-off-by: Naohiro Aota Tested-by: Satoru Takeuchi Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index dab8af4450e1..a7f66009519a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2817,8 +2817,10 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, prev_bio_flags); - if (ret < 0) + if (ret < 0) { + *bio_ret = NULL; return ret; + } bio = NULL; } else { return 0; -- cgit v1.2.3 From dcab6a3b2ae657a2017637083c28ee303b6b1b8e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 11 Feb 2015 15:08:59 -0500 Subject: Btrfs: account for large extents with enospc On our gluster boxes we stream large tar balls of backups onto our fses. With 160gb of ram this means we get really large contiguous ranges of dirty data, but the way our ENOSPC stuff works is that as long as it's contiguous we only hold metadata reservation for one extent. The problem is we limit our extents to 128mb, so we'll end up with at least 800 extents so our enospc accounting is quite a bit lower than what we need. To keep track of this make sure we increase outstanding_extents for every multiple of the max extent size so we can be sure to have enough reserved metadata space. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/extent-tree.c | 16 +++++++++---- fs/btrfs/extent_io.c | 2 +- fs/btrfs/inode.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 76 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d3562dd43c66..b3dd55f52f71 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -198,6 +198,8 @@ static int btrfs_csum_sizes[] = { 4, 0 }; #define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) +#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024) + /* * The key defines the order in the tree, and so it also defines (optimal) * block layout. diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 50de1fa6fc9e..0f6737063142 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4963,19 +4963,25 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root, /** * drop_outstanding_extent - drop an outstanding extent * @inode: the inode we're dropping the extent for + * @num_bytes: the number of bytes we're relaseing. * * This is called when we are freeing up an outstanding extent, either called * after an error or after an extent is written. This will return the number of * reserved extents that need to be freed. This must be called with * BTRFS_I(inode)->lock held. */ -static unsigned drop_outstanding_extent(struct inode *inode) +static unsigned drop_outstanding_extent(struct inode *inode, u64 num_bytes) { unsigned drop_inode_space = 0; unsigned dropped_extents = 0; + unsigned num_extents = 0; - BUG_ON(!BTRFS_I(inode)->outstanding_extents); - BTRFS_I(inode)->outstanding_extents--; + num_extents = (unsigned)div64_u64(num_bytes + + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + ASSERT(num_extents); + ASSERT(BTRFS_I(inode)->outstanding_extents >= num_extents); + BTRFS_I(inode)->outstanding_extents -= num_extents; if (BTRFS_I(inode)->outstanding_extents == 0 && test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, @@ -5146,7 +5152,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) out_fail: spin_lock(&BTRFS_I(inode)->lock); - dropped = drop_outstanding_extent(inode); + dropped = drop_outstanding_extent(inode, num_bytes); /* * If the inodes csum_bytes is the same as the original * csum_bytes then we know we haven't raced with any free()ers @@ -5225,7 +5231,7 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); spin_lock(&BTRFS_I(inode)->lock); - dropped = drop_outstanding_extent(inode); + dropped = drop_outstanding_extent(inode, num_bytes); if (num_bytes) to_free = calc_csum_metadata_size(inode, num_bytes, 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a7f66009519a..29850d4a3827 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3242,7 +3242,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode, page, &delalloc_start, &delalloc_end, - 128 * 1024 * 1024); + BTRFS_MAX_EXTENT_SIZE); if (nr_delalloc == 0) { delalloc_start = delalloc_end + 1; continue; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b957921ba59..8564d8ce03de 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1530,10 +1530,45 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, static void btrfs_split_extent_hook(struct inode *inode, struct extent_state *orig, u64 split) { + u64 size; + /* not delalloc, ignore it */ if (!(orig->state & EXTENT_DELALLOC)) return; + size = orig->end - orig->start + 1; + if (size > BTRFS_MAX_EXTENT_SIZE) { + u64 num_extents; + u64 new_size; + + /* + * We need the largest size of the remaining extent to see if we + * need to add a new outstanding extent. Think of the following + * case + * + * [MEAX_EXTENT_SIZEx2 - 4k][4k] + * + * The new_size would just be 4k and we'd think we had enough + * outstanding extents for this if we only took one side of the + * split, same goes for the other direction. We need to see if + * the larger size still is the same amount of extents as the + * original size, because if it is we need to add a new + * outstanding extent. But if we split up and the larger size + * is less than the original then we are good to go since we've + * already accounted for the extra extent in our original + * accounting. + */ + new_size = orig->end - split + 1; + if ((split - orig->start) > new_size) + new_size = split - orig->start; + + num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE) < num_extents) + return; + } + spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); @@ -1549,10 +1584,34 @@ static void btrfs_merge_extent_hook(struct inode *inode, struct extent_state *new, struct extent_state *other) { + u64 new_size, old_size; + u64 num_extents; + /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) return; + old_size = other->end - other->start + 1; + new_size = old_size + (new->end - new->start + 1); + + /* we're not bigger than the max, unreserve the space and go */ + if (new_size <= BTRFS_MAX_EXTENT_SIZE) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); + return; + } + + /* + * If we grew by another max_extent, just return, we want to keep that + * reserved amount. + */ + num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE); + if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, + BTRFS_MAX_EXTENT_SIZE) > num_extents) + return; + spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents--; spin_unlock(&BTRFS_I(inode)->lock); @@ -1648,6 +1707,8 @@ static void btrfs_clear_bit_hook(struct inode *inode, unsigned *bits) { u64 len = state->end + 1 - state->start; + u64 num_extents = div64_u64(len + BTRFS_MAX_EXTENT_SIZE -1, + BTRFS_MAX_EXTENT_SIZE); spin_lock(&BTRFS_I(inode)->lock); if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) @@ -1667,7 +1728,7 @@ static void btrfs_clear_bit_hook(struct inode *inode, *bits &= ~EXTENT_FIRST_DELALLOC; } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents--; + BTRFS_I(inode)->outstanding_extents -= num_extents; spin_unlock(&BTRFS_I(inode)->lock); } -- cgit v1.2.3