diff options
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/backref.c | 8 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 2 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 6 | ||||
-rw-r--r-- | fs/btrfs/export.c | 10 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 16 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 80 | ||||
-rw-r--r-- | fs/btrfs/file.c | 2 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 45 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 8 | ||||
-rw-r--r-- | fs/btrfs/send.c | 8 | ||||
-rw-r--r-- | fs/btrfs/super.c | 2 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 33 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 6 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 8 |
14 files changed, 171 insertions, 63 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index ecbc63d3143e..9a2ec79e8cfb 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1828,7 +1828,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, int found = 0; struct extent_buffer *eb; struct btrfs_inode_extref *extref; - struct extent_buffer *leaf; u32 item_size; u32 cur_offset; unsigned long ptr; @@ -1856,9 +1855,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); - leaf = path->nodes[0]; - item_size = btrfs_item_size_nr(leaf, slot); - ptr = btrfs_item_ptr_offset(leaf, slot); + item_size = btrfs_item_size_nr(eb, slot); + ptr = btrfs_item_ptr_offset(eb, slot); cur_offset = 0; while (cur_offset < item_size) { @@ -1872,7 +1870,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root, if (ret) break; - cur_offset += btrfs_inode_extref_name_len(leaf, extref); + cur_offset += btrfs_inode_extref_name_len(eb, extref); cur_offset += sizeof(*extref); } btrfs_tree_read_unlock_blocking(eb); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 81220b2203c6..0ef5cc13fae2 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,8 +44,6 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 -/* DIO is ready to submit */ -#define BTRFS_INODE_DIO_READY 12 /* * The following 3 bits are meant only for the btree inode. * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0d98aee34fee..1e60d00d4ea7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2847,6 +2847,8 @@ int open_ctree(struct super_block *sb, !extent_buffer_uptodate(chunk_root->node)) { printk(KERN_ERR "BTRFS: failed to read chunk root on %s\n", sb->s_id); + if (!IS_ERR(chunk_root->node)) + free_extent_buffer(chunk_root->node); chunk_root->node = NULL; goto fail_tree_roots; } @@ -2885,6 +2887,8 @@ retry_root_backup: !extent_buffer_uptodate(tree_root->node)) { printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", sb->s_id); + if (!IS_ERR(tree_root->node)) + free_extent_buffer(tree_root->node); tree_root->node = NULL; goto recovery_tree_root; } @@ -3765,9 +3769,7 @@ void close_ctree(struct btrfs_root *root) * block groups queued for removal, the deletion will be * skipped when we quit the cleaner thread. */ - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_delete_unused_bgs(root->fs_info); - mutex_unlock(&root->fs_info->cleaner_mutex); ret = btrfs_commit_super(root); if (ret) diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index 8d052209f473..2513a7f53334 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -112,11 +112,11 @@ static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh, u32 generation; if (fh_type == FILEID_BTRFS_WITH_PARENT) { - if (fh_len != BTRFS_FID_SIZE_CONNECTABLE) + if (fh_len < BTRFS_FID_SIZE_CONNECTABLE) return NULL; root_objectid = fid->root_objectid; } else if (fh_type == FILEID_BTRFS_WITH_PARENT_ROOT) { - if (fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) + if (fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT) return NULL; root_objectid = fid->parent_root_objectid; } else @@ -136,11 +136,11 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh, u32 generation; if ((fh_type != FILEID_BTRFS_WITH_PARENT || - fh_len != BTRFS_FID_SIZE_CONNECTABLE) && + fh_len < BTRFS_FID_SIZE_CONNECTABLE) && (fh_type != FILEID_BTRFS_WITH_PARENT_ROOT || - fh_len != BTRFS_FID_SIZE_CONNECTABLE_ROOT) && + fh_len < BTRFS_FID_SIZE_CONNECTABLE_ROOT) && (fh_type != FILEID_BTRFS_WITHOUT_PARENT || - fh_len != BTRFS_FID_SIZE_NON_CONNECTABLE)) + fh_len < BTRFS_FID_SIZE_NON_CONNECTABLE)) return NULL; objectid = fid->objectid; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5411f0ab5683..601d7d45d164 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2828,6 +2828,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *head; int ret; int run_all = count == (unsigned long)-1; + bool can_flush_pending_bgs = trans->can_flush_pending_bgs; /* We'll clean this up in btrfs_cleanup_transaction */ if (trans->aborted) @@ -2844,6 +2845,7 @@ again: #ifdef SCRAMBLE_DELAYED_REFS delayed_refs->run_delayed_start = find_middle(&delayed_refs->root); #endif + trans->can_flush_pending_bgs = false; ret = __btrfs_run_delayed_refs(trans, root, count); if (ret < 0) { btrfs_abort_transaction(trans, root, ret); @@ -2893,6 +2895,7 @@ again: } out: assert_qgroups_uptodate(trans); + trans->can_flush_pending_bgs = can_flush_pending_bgs; return 0; } @@ -3742,10 +3745,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->bytes_reserved = 0; found->bytes_readonly = 0; found->bytes_may_use = 0; - if (total_bytes > 0) - found->full = 0; - else - found->full = 1; + found->full = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE; found->chunk_alloc = 0; found->flush = 0; @@ -4309,7 +4309,8 @@ out: * the block groups that were made dirty during the lifetime of the * transaction. */ - if (trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) { + if (trans->can_flush_pending_bgs && + trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) { btrfs_create_pending_block_groups(trans, trans->root); btrfs_trans_release_chunk_metadata(trans); } @@ -8668,7 +8669,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, } if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) { - btrfs_drop_and_free_fs_root(tree_root->fs_info, root); + btrfs_add_dropped_root(trans, root); } else { free_extent_buffer(root->node); free_extent_buffer(root->commit_root); @@ -9563,7 +9564,9 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, struct btrfs_block_group_item item; struct btrfs_key key; int ret = 0; + bool can_flush_pending_bgs = trans->can_flush_pending_bgs; + trans->can_flush_pending_bgs = false; list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) { if (ret) goto next; @@ -9584,6 +9587,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, next: list_del_init(&block_group->bg_list); } + trans->can_flush_pending_bgs = can_flush_pending_bgs; } int btrfs_make_block_group(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f1018cfbfefa..3915c9473e94 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, bio_end_io_t end_io_func, int mirror_num, unsigned long prev_bio_flags, - unsigned long bio_flags) + unsigned long bio_flags, + bool force_bio_submit) { int ret = 0; struct bio *bio; @@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, contig = bio_end_sector(bio) == sector; if (prev_bio_flags != bio_flags || !contig || + force_bio_submit || merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) || bio_add_page(bio, page, page_size, offset) < page_size) { ret = submit_one_bio(rw, bio, mirror_num, @@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode = page->mapping->host; u64 start = page_offset(page); @@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree, } while (cur <= end) { unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; + bool force_bio_submit = false; if (cur >= last_byte) { char *userpage; @@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree, block_start = em->block_start; if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) block_start = EXTENT_MAP_HOLE; + + /* + * If we have a file range that points to a compressed extent + * and it's followed by a consecutive file range that points to + * to the same compressed extent (possibly with a different + * offset and/or length, so it either points to the whole extent + * or only part of it), we must make sure we do not submit a + * single bio to populate the pages for the 2 ranges because + * this makes the compressed extent read zero out the pages + * belonging to the 2nd range. Imagine the following scenario: + * + * File layout + * [0 - 8K] [8K - 24K] + * | | + * | | + * points to extent X, points to extent X, + * offset 4K, length of 8K offset 0, length 16K + * + * [extent X, compressed length = 4K uncompressed length = 16K] + * + * If the bio to read the compressed extent covers both ranges, + * it will decompress extent X into the pages belonging to the + * first range and then it will stop, zeroing out the remaining + * pages that belong to the other range that points to extent X. + * So here we make sure we submit 2 bios, one for the first + * range and another one for the third range. Both will target + * the same physical extent from disk, but we can't currently + * make the compressed bio endio callback populate the pages + * for both ranges because each compressed bio is tightly + * coupled with a single extent map, and each range can have + * an extent map with a different offset value relative to the + * uncompressed data of our extent and different lengths. This + * is a corner case so we prioritize correctness over + * non-optimal behavior (submitting 2 bios for the same extent). + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && + prev_em_start && *prev_em_start != (u64)-1 && + *prev_em_start != em->orig_start) + force_bio_submit = true; + + if (prev_em_start) + *prev_em_start = em->orig_start; + free_extent_map(em); em = NULL; @@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree, bdev, bio, pnr, end_bio_extent_readpage, mirror_num, *bio_flags, - this_bio_flag); + this_bio_flag, + force_bio_submit); if (!ret) { nr++; *bio_flags = this_bio_flag; @@ -3084,7 +3132,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { struct inode *inode; struct btrfs_ordered_extent *ordered; @@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree, for (index = 0; index < nr_pages; index++) { __do_readpage(tree, pages[index], get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, prev_em_start); page_cache_release(pages[index]); } } @@ -3114,7 +3163,8 @@ static void __extent_readpages(struct extent_io_tree *tree, int nr_pages, get_extent_t *get_extent, struct extent_map **em_cached, struct bio **bio, int mirror_num, - unsigned long *bio_flags, int rw) + unsigned long *bio_flags, int rw, + u64 *prev_em_start) { u64 start = 0; u64 end = 0; @@ -3135,7 +3185,7 @@ static void __extent_readpages(struct extent_io_tree *tree, index - first_index, start, end, get_extent, em_cached, bio, mirror_num, bio_flags, - rw); + rw, prev_em_start); start = page_start; end = start + PAGE_CACHE_SIZE - 1; first_index = index; @@ -3146,7 +3196,8 @@ static void __extent_readpages(struct extent_io_tree *tree, __do_contiguous_readpages(tree, &pages[first_index], index - first_index, start, end, get_extent, em_cached, bio, - mirror_num, bio_flags, rw); + mirror_num, bio_flags, rw, + prev_em_start); } static int __extent_read_full_page(struct extent_io_tree *tree, @@ -3172,7 +3223,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, } ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num, - bio_flags, rw); + bio_flags, rw, NULL); return ret; } @@ -3198,7 +3249,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page, int ret; ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num, - &bio_flags, READ); + &bio_flags, READ, NULL); if (bio) ret = submit_one_bio(READ, bio, mirror_num, bio_flags); return ret; @@ -3451,7 +3502,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, sector, iosize, pg_offset, bdev, &epd->bio, max_nr, end_bio_extent_writepage, - 0, 0, 0); + 0, 0, 0, false); if (ret) SetPageError(page); } @@ -3754,7 +3805,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, ret = submit_extent_page(rw, tree, wbc, p, offset >> 9, PAGE_CACHE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, - 0, epd->bio_flags, bio_flags); + 0, epd->bio_flags, bio_flags, false); epd->bio_flags = bio_flags; if (ret) { set_btree_ioerr(p); @@ -4158,6 +4209,7 @@ int extent_readpages(struct extent_io_tree *tree, struct page *page; struct extent_map *em_cached = NULL; int nr = 0; + u64 prev_em_start = (u64)-1; for (page_idx = 0; page_idx < nr_pages; page_idx++) { page = list_entry(pages->prev, struct page, lru); @@ -4174,12 +4226,12 @@ int extent_readpages(struct extent_io_tree *tree, if (nr < ARRAY_SIZE(pagepool)) continue; __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ); + &bio, 0, &bio_flags, READ, &prev_em_start); nr = 0; } if (nr) __extent_readpages(tree, pagepool, nr, get_extent, &em_cached, - &bio, 0, &bio_flags, READ); + &bio, 0, &bio_flags, READ, &prev_em_start); if (em_cached) free_extent_map(em_cached); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b823fac91c92..8c6f247ba81d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2584,7 +2584,7 @@ static long btrfs_fallocate(struct file *file, int mode, alloc_start); if (ret) goto out; - } else { + } else if (offset + len > inode->i_size) { /* * If we are fallocating from the end of the file onward we * need to zero out the end of the page if i_size lands in the diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a0fa7253a2d7..611b66d73e80 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_free_io_failure_record(inode, 0, (u64)-1); @@ -7408,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, return em; } +struct btrfs_dio_data { + u64 outstanding_extents; + u64 reserve; +}; static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -7415,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, struct extent_map *em; struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_state *cached_state = NULL; + struct btrfs_dio_data *dio_data = NULL; u64 start = iblock << inode->i_blkbits; u64 lockstart, lockend; u64 len = bh_result->b_size; - u64 *outstanding_extents = NULL; int unlock_bits = EXTENT_LOCKED; int ret = 0; @@ -7436,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, * that anything that needs to check if there's a transction doesn't get * confused. */ - outstanding_extents = current->journal_info; + dio_data = current->journal_info; current->journal_info = NULL; } @@ -7568,17 +7573,18 @@ unlock: * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (*outstanding_extents) { - (*outstanding_extents)--; + if (dio_data->outstanding_extents) { + (dio_data->outstanding_extents)--; } else { spin_lock(&BTRFS_I(inode)->lock); BTRFS_I(inode)->outstanding_extents++; spin_unlock(&BTRFS_I(inode)->lock); } - current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); - set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); + WARN_ON(dio_data->reserve < len); + dio_data->reserve -= len; + current->journal_info = dio_data; } /* @@ -7601,8 +7607,8 @@ unlock: unlock_err: clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, unlock_bits, 1, 0, &cached_state, GFP_NOFS); - if (outstanding_extents) - current->journal_info = outstanding_extents; + if (dio_data) + current->journal_info = dio_data; return ret; } @@ -8329,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - u64 outstanding_extents = 0; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_dio_data dio_data = { 0 }; size_t count = 0; int flags = 0; bool wakeup = true; @@ -8367,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, ret = btrfs_delalloc_reserve_space(inode, count); if (ret) goto out; - outstanding_extents = div64_u64(count + + dio_data.outstanding_extents = div64_u64(count + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); @@ -8376,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * do the accounting properly if we go over the number we * originally calculated. Abuse current->journal_info for this. */ - current->journal_info = &outstanding_extents; + dio_data.reserve = round_up(count, root->sectorsize); + current->journal_info = &dio_data; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { inode_dio_end(inode); @@ -8391,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; if (ret < 0 && ret != -EIOCBQUEUED) { - /* - * If the error comes from submitting stage, - * btrfs_get_blocsk_direct() has free'd data space, - * and metadata space will be handled by - * finish_ordered_fn, don't do that again to make - * sure bytes_may_use is correct. - */ - if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, - &BTRFS_I(inode)->runtime_flags)) - btrfs_delalloc_release_space(inode, count); + if (dio_data.reserve) + btrfs_delalloc_release_space(inode, + dio_data.reserve); } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0adf5422fce9..8d20f3b1cab0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4639,6 +4639,11 @@ locked: bctl->flags |= BTRFS_BALANCE_TYPE_MASK; } + if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { + ret = -EINVAL; + goto out_bctl; + } + do_balance: /* * Ownership of bctl and mutually_exclusive_operation_running @@ -4650,12 +4655,15 @@ do_balance: need_unlock = false; ret = btrfs_balance(bctl, bargs); + bctl = NULL; if (arg) { if (copy_to_user(arg, bargs, sizeof(*bargs))) ret = -EFAULT; } +out_bctl: + kfree(bctl); out_bargs: kfree(bargs); out_unlock: diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index aa72bfd28f7d..a739b825bdd3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1920,10 +1920,12 @@ static int did_overwrite_ref(struct send_ctx *sctx, /* * We know that it is or will be overwritten. Check this now. * The current inode being processed might have been the one that caused - * inode 'ino' to be orphanized, therefore ow_inode can actually be the - * same as sctx->send_progress. + * inode 'ino' to be orphanized, therefore check if ow_inode matches + * the current inode being processed. */ - if (ow_inode <= sctx->send_progress) + if ((ow_inode < sctx->send_progress) || + (ino != sctx->cur_ino && ow_inode == sctx->cur_ino && + gen == sctx->cur_inode_gen)) ret = 1; else ret = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2b07b3581781..11d1eab9234d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) * groups on disk until we're mounted read-write again * unless we clean them up here. */ - mutex_lock(&root->fs_info->cleaner_mutex); btrfs_delete_unused_bgs(fs_info); - mutex_unlock(&root->fs_info->cleaner_mutex); btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8f259b3a66b3..a5b06442f0bf 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans, btrfs_unpin_free_ino(root); clear_btree_io_tree(&root->dirty_log_pages); } + + /* We can free old roots now. */ + spin_lock(&trans->dropped_roots_lock); + while (!list_empty(&trans->dropped_roots)) { + root = list_first_entry(&trans->dropped_roots, + struct btrfs_root, root_list); + list_del_init(&root->root_list); + spin_unlock(&trans->dropped_roots_lock); + btrfs_drop_and_free_fs_root(fs_info, root); + spin_lock(&trans->dropped_roots_lock); + } + spin_unlock(&trans->dropped_roots_lock); up_write(&fs_info->commit_root_sem); } @@ -255,11 +267,13 @@ loop: INIT_LIST_HEAD(&cur_trans->pending_ordered); INIT_LIST_HEAD(&cur_trans->dirty_bgs); INIT_LIST_HEAD(&cur_trans->io_bgs); + INIT_LIST_HEAD(&cur_trans->dropped_roots); mutex_init(&cur_trans->cache_write_mutex); cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); INIT_LIST_HEAD(&cur_trans->deleted_bgs); spin_lock_init(&cur_trans->deleted_bgs_lock); + spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, fs_info->btree_inode->i_mapping); @@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans, } +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + + /* Add ourselves to the transaction dropped list */ + spin_lock(&cur_trans->dropped_roots_lock); + list_add_tail(&root->root_list, &cur_trans->dropped_roots); + spin_unlock(&cur_trans->dropped_roots_lock); + + /* Make sure we don't try to update the root at commit time */ + spin_lock(&root->fs_info->fs_roots_radix_lock); + radix_tree_tag_clear(&root->fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + BTRFS_ROOT_TRANS_TAG); + spin_unlock(&root->fs_info->fs_roots_radix_lock); +} + int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -525,6 +557,7 @@ again: h->delayed_ref_elem.seq = 0; h->type = type; h->allocating_chunk = false; + h->can_flush_pending_bgs = true; h->reloc_reserved = false; h->sync = false; INIT_LIST_HEAD(&h->qgroup_ref_list); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index edc2fbc262d7..a994bb097ee5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -65,6 +65,7 @@ struct btrfs_transaction { struct list_head switch_commits; struct list_head dirty_bgs; struct list_head io_bgs; + struct list_head dropped_roots; u64 num_dirty_bgs; /* @@ -76,6 +77,7 @@ struct btrfs_transaction { spinlock_t dirty_bgs_lock; struct list_head deleted_bgs; spinlock_t deleted_bgs_lock; + spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; int dirty_bg_run; @@ -116,6 +118,7 @@ struct btrfs_trans_handle { short aborted; short adding_csums; bool allocating_chunk; + bool can_flush_pending_bgs; bool reloc_reserved; bool sync; unsigned int type; @@ -216,5 +219,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info); int btrfs_transaction_in_commit(struct btrfs_fs_info *info); void btrfs_put_transaction(struct btrfs_transaction *transaction); void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info); - +void btrfs_add_dropped_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root); #endif diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2ca784a14e84..595279a8b99f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -376,6 +376,14 @@ struct map_lookup { #define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4) #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) +#define BTRFS_BALANCE_ARGS_MASK \ + (BTRFS_BALANCE_ARGS_PROFILES | \ + BTRFS_BALANCE_ARGS_USAGE | \ + BTRFS_BALANCE_ARGS_DEVID | \ + BTRFS_BALANCE_ARGS_DRANGE | \ + BTRFS_BALANCE_ARGS_VRANGE | \ + BTRFS_BALANCE_ARGS_LIMIT) + /* * Profile changing flags. When SOFT is set we won't relocate chunk if * it already has the target profile (even though it may be |