From 41a2ee75aab0290a5899677437736ec715dcd1b6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Jan 2020 09:02:21 -0500 Subject: btrfs: introduce per-inode file extent tree In order to keep track of where we have file extents on disk, and thus where it is safe to adjust the i_size to, we need to have a tree in place to keep track of the contiguous areas we have file extents for. Add helpers to use this tree, as it's not required for NO_HOLES file systems. We will use this by setting DIRTY for areas we know we have file extent item's set, and clearing it when we remove file extent items for truncation. Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/trace') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 17088a112ed0..16ade35e8170 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -88,6 +88,7 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \ { IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \ { IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \ + { IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT" }, \ { IO_TREE_SELFTEST, "SELFTEST" }) #define BTRFS_GROUP_FLAGS \ -- cgit v1.2.3 From 3f1c64ce04387773d2b0d8ef6a7e573ff80e4436 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 17 Jan 2020 09:02:24 -0500 Subject: btrfs: delete the ordered isize update code Now that we have a safe way to update the isize, remove all of this code as it's no longer needed. Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 128 ------------------------------------------- fs/btrfs/ordered-data.h | 7 --- include/trace/events/btrfs.h | 1 - 3 files changed, 136 deletions(-) (limited to 'include/trace') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a65f189a5b94..66170c8cea6f 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -785,134 +785,6 @@ out: return entry; } -/* - * After an extent is done, call this to conditionally update the on disk - * i_size. i_size is updated to cover any fully written part of the file. - */ -int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, - struct btrfs_ordered_extent *ordered) -{ - struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; - u64 disk_i_size; - u64 new_i_size; - u64 i_size = i_size_read(inode); - struct rb_node *node; - struct rb_node *prev = NULL; - struct btrfs_ordered_extent *test; - int ret = 1; - u64 orig_offset = offset; - - spin_lock_irq(&tree->lock); - if (ordered) { - offset = entry_end(ordered); - if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) - offset = min(offset, - ordered->file_offset + - ordered->truncated_len); - } else { - offset = ALIGN(offset, btrfs_inode_sectorsize(inode)); - } - disk_i_size = BTRFS_I(inode)->disk_i_size; - - /* - * truncate file. - * If ordered is not NULL, then this is called from endio and - * disk_i_size will be updated by either truncate itself or any - * in-flight IOs which are inside the disk_i_size. - * - * Because btrfs_setsize() may set i_size with disk_i_size if truncate - * fails somehow, we need to make sure we have a precise disk_i_size by - * updating it as usual. - * - */ - if (!ordered && disk_i_size > i_size) { - BTRFS_I(inode)->disk_i_size = orig_offset; - ret = 0; - goto out; - } - - /* - * if the disk i_size is already at the inode->i_size, or - * this ordered extent is inside the disk i_size, we're done - */ - if (disk_i_size == i_size) - goto out; - - /* - * We still need to update disk_i_size if outstanding_isize is greater - * than disk_i_size. - */ - if (offset <= disk_i_size && - (!ordered || ordered->outstanding_isize <= disk_i_size)) - goto out; - - /* - * walk backward from this ordered extent to disk_i_size. - * if we find an ordered extent then we can't update disk i_size - * yet - */ - if (ordered) { - node = rb_prev(&ordered->rb_node); - } else { - prev = tree_search(tree, offset); - /* - * we insert file extents without involving ordered struct, - * so there should be no ordered struct cover this offset - */ - if (prev) { - test = rb_entry(prev, struct btrfs_ordered_extent, - rb_node); - BUG_ON(offset_in_entry(test, offset)); - } - node = prev; - } - for (; node; node = rb_prev(node)) { - test = rb_entry(node, struct btrfs_ordered_extent, rb_node); - - /* We treat this entry as if it doesn't exist */ - if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags)) - continue; - - if (entry_end(test) <= disk_i_size) - break; - if (test->file_offset >= i_size) - break; - - /* - * We don't update disk_i_size now, so record this undealt - * i_size. Or we will not know the real i_size. - */ - if (test->outstanding_isize < offset) - test->outstanding_isize = offset; - if (ordered && - ordered->outstanding_isize > test->outstanding_isize) - test->outstanding_isize = ordered->outstanding_isize; - goto out; - } - new_i_size = min_t(u64, offset, i_size); - - /* - * Some ordered extents may completed before the current one, and - * we hold the real i_size in ->outstanding_isize. - */ - if (ordered && ordered->outstanding_isize > new_i_size) - new_i_size = min_t(u64, ordered->outstanding_isize, i_size); - BTRFS_I(inode)->disk_i_size = new_i_size; - ret = 0; -out: - /* - * We need to do this because we can't remove ordered extents until - * after the i_disk_size has been updated and then the inode has been - * updated to reflect the change, so we need to tell anybody who finds - * this ordered extent that we've already done all the real work, we - * just haven't completed all the other work. - */ - if (ordered) - set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags); - spin_unlock_irq(&tree->lock); - return ret; -} - /* * search the ordered extents for one corresponding to 'offset' and * try to find a checksum. This is used because we allow pages to diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 3beb4da4ab41..a46f319d9ae0 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -52,11 +52,6 @@ enum { BTRFS_ORDERED_DIRECT, /* We had an io error when writing this out */ BTRFS_ORDERED_IOERR, - /* - * indicates whether this ordered extent has done its due diligence in - * updating the isize - */ - BTRFS_ORDERED_UPDATED_ISIZE, /* Set when we have to truncate an extent */ BTRFS_ORDERED_TRUNCATED, /* Regular IO for COW */ @@ -182,8 +177,6 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range( struct btrfs_inode *inode, u64 file_offset, u64 len); -int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, - struct btrfs_ordered_extent *ordered); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u8 *sum, int len); u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr, diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 16ade35e8170..f1f2b6a04052 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -469,7 +469,6 @@ DEFINE_EVENT( { (1 << BTRFS_ORDERED_PREALLOC), "PREALLOC" }, \ { (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \ { (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \ - { (1 << BTRFS_ORDERED_UPDATED_ISIZE), "UPDATED_ISIZE" }, \ { (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" }) -- cgit v1.2.3 From fe119a6eeb670585e29dbe3932e00ad29ae8f5f9 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 20 Jan 2020 16:09:18 +0200 Subject: btrfs: switch to per-transaction pinned extents This commit flips the switch to start tracking/processing pinned extents on a per-transaction basis. It mostly replaces all references from btrfs_fs_info::(pinned_extents|freed_extents[]) to btrfs_transaction::pinned_extents. Two notable modifications that warrant explicit mention are changing clean_pinned_extents to get a reference to the previously running transaction. The other one is removal of call to btrfs_destroy_pinned_extent since transactions are going to be cleaned in btrfs_cleanup_one_transaction. Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 38 +++++++++++++++++++++++++------------- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/disk-io.c | 30 +++++------------------------- fs/btrfs/extent-io-tree.h | 4 ++-- fs/btrfs/extent-tree.c | 31 ++++++++----------------------- fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/transaction.c | 2 ++ fs/btrfs/transaction.h | 1 + include/trace/events/btrfs.h | 4 ++-- 9 files changed, 48 insertions(+), 68 deletions(-) (limited to 'include/trace') diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9fec78a8c759..b8f39a679064 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -460,7 +460,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end int ret; while (start < end) { - ret = find_first_extent_bit(info->pinned_extents, start, + ret = find_first_extent_bit(&info->excluded_extents, start, &extent_start, &extent_end, EXTENT_DIRTY | EXTENT_UPTODATE, NULL); @@ -1248,30 +1248,42 @@ out: return ret; } -static bool clean_pinned_extents(struct btrfs_block_group *bg) +static bool clean_pinned_extents(struct btrfs_trans_handle *trans, + struct btrfs_block_group *bg) { struct btrfs_fs_info *fs_info = bg->fs_info; + struct btrfs_transaction *prev_trans = NULL; const u64 start = bg->start; const u64 end = start + bg->length - 1; int ret; + spin_lock(&fs_info->trans_lock); + if (trans->transaction->list.prev != &fs_info->trans_list) { + prev_trans = list_last_entry(&trans->transaction->list, + struct btrfs_transaction, list); + refcount_inc(&prev_trans->use_count); + } + spin_unlock(&fs_info->trans_lock); + /* * Hold the unused_bg_unpin_mutex lock to avoid racing with * btrfs_finish_extent_commit(). If we are at transaction N, another * task might be running finish_extent_commit() for the previous * transaction N - 1, and have seen a range belonging to the block - * group in freed_extents[] before we were able to clear the whole - * block group range from freed_extents[]. This means that task can - * lookup for the block group after we unpinned it from freed_extents - * and removed it, leading to a BUG_ON() at unpin_extent_range(). + * group in pinned_extents before we were able to clear the whole block + * group range from pinned_extents. This means that task can lookup for + * the block group after we unpinned it from pinned_extents and removed + * it, leading to a BUG_ON() at unpin_extent_range(). */ mutex_lock(&fs_info->unused_bg_unpin_mutex); - ret = clear_extent_bits(&fs_info->freed_extents[0], start, end, - EXTENT_DIRTY); - if (ret) - goto err; + if (prev_trans) { + ret = clear_extent_bits(&prev_trans->pinned_extents, start, end, + EXTENT_DIRTY); + if (ret) + goto err; + } - ret = clear_extent_bits(&fs_info->freed_extents[1], start, end, + ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end, EXTENT_DIRTY); if (ret) goto err; @@ -1380,7 +1392,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * We could have pending pinned extents for this block group, * just delete them, we don't care about them anymore. */ - if (!clean_pinned_extents(block_group)) + if (!clean_pinned_extents(trans, block_group)) goto end_trans; /* @@ -2890,7 +2902,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, &cache->space_info->total_bytes_pinned, num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); - set_extent_dirty(info->pinned_extents, + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 22d0cb0019d1..bb237d577725 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,8 +596,8 @@ struct btrfs_fs_info { /* keep track of unallocated space */ atomic64_t free_chunk_space; - struct extent_io_tree freed_extents[2]; - struct extent_io_tree *pinned_extents; + /* Track ranges which are used by log trees blocks/logged data extents */ + struct extent_io_tree excluded_extents; /* logical->physical extent mapping */ struct extent_map_tree mapping_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 194c98a61095..e1e111c8b08b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2075,10 +2075,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) btrfs_drop_and_free_fs_root(fs_info, gang[i]); } - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) btrfs_free_log_root_tree(NULL, fs_info); - btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); - } } static void btrfs_init_scrub(struct btrfs_fs_info *fs_info) @@ -2749,11 +2747,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) fs_info->block_group_cache_tree = RB_ROOT; fs_info->first_logical_byte = (u64)-1; - extent_io_tree_init(fs_info, &fs_info->freed_extents[0], - IO_TREE_FS_INFO_FREED_EXTENTS0, NULL); - extent_io_tree_init(fs_info, &fs_info->freed_extents[1], - IO_TREE_FS_INFO_FREED_EXTENTS1, NULL); - fs_info->pinned_extents = &fs_info->freed_extents[0]; + extent_io_tree_init(fs_info, &fs_info->excluded_extents, + IO_TREE_FS_EXCLUDED_EXTENTS, NULL); set_bit(BTRFS_FS_BARRIER, &fs_info->flags); mutex_init(&fs_info->ordered_operations_mutex); @@ -4434,16 +4429,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, } static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info, - struct extent_io_tree *pinned_extents) + struct extent_io_tree *unpin) { - struct extent_io_tree *unpin; u64 start; u64 end; int ret; - bool loop = true; - unpin = pinned_extents; -again: while (1) { struct extent_state *cached_state = NULL; @@ -4468,15 +4459,6 @@ again: cond_resched(); } - if (loop) { - if (unpin == &fs_info->freed_extents[0]) - unpin = &fs_info->freed_extents[1]; - else - unpin = &fs_info->freed_extents[0]; - loop = false; - goto again; - } - return 0; } @@ -4567,8 +4549,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); - btrfs_destroy_pinned_extent(fs_info, - fs_info->pinned_extents); + btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents); cur_trans->state =TRANS_STATE_COMPLETED; wake_up(&cur_trans->commit_wait); @@ -4620,7 +4601,6 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info) btrfs_destroy_all_ordered_extents(fs_info); btrfs_destroy_delayed_inodes(fs_info); btrfs_assert_delayed_root_empty(fs_info); - btrfs_destroy_pinned_extent(fs_info, fs_info->pinned_extents); btrfs_destroy_all_delalloc_inodes(fs_info); mutex_unlock(&fs_info->transaction_kthread_mutex); diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h index cc3037f9765e..b4a7bad3e82e 100644 --- a/fs/btrfs/extent-io-tree.h +++ b/fs/btrfs/extent-io-tree.h @@ -36,8 +36,8 @@ struct io_failure_record; #define CHUNK_TRIMMED EXTENT_DEFRAG enum { - IO_TREE_FS_INFO_FREED_EXTENTS0, - IO_TREE_FS_INFO_FREED_EXTENTS1, + IO_TREE_FS_PINNED_EXTENTS, + IO_TREE_FS_EXCLUDED_EXTENTS, IO_TREE_INODE_IO, IO_TREE_INODE_IO_FAILURE, IO_TREE_RELOC_BLOCKS, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f97e631aaca5..136fffb76428 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -64,10 +64,8 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, u64 start, u64 num_bytes) { u64 end = start + num_bytes - 1; - set_extent_bits(&fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE); - set_extent_bits(&fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE); + set_extent_bits(&fs_info->excluded_extents, start, end, + EXTENT_UPTODATE); return 0; } @@ -79,10 +77,8 @@ void btrfs_free_excluded_extents(struct btrfs_block_group *cache) start = cache->start; end = start + cache->length - 1; - clear_extent_bits(&fs_info->freed_extents[0], - start, end, EXTENT_UPTODATE); - clear_extent_bits(&fs_info->freed_extents[1], - start, end, EXTENT_UPTODATE); + clear_extent_bits(&fs_info->excluded_extents, start, end, + EXTENT_UPTODATE); } static u64 generic_ref_to_space_flags(struct btrfs_ref *ref) @@ -2605,7 +2601,7 @@ static int pin_down_extent(struct btrfs_trans_handle *trans, percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); - set_extent_dirty(fs_info->pinned_extents, bytenr, + set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); return 0; } @@ -2761,11 +2757,6 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info) } } - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) - fs_info->pinned_extents = &fs_info->freed_extents[1]; - else - fs_info->pinned_extents = &fs_info->freed_extents[0]; - up_write(&fs_info->commit_root_sem); btrfs_update_global_block_rsv(fs_info); @@ -2906,10 +2897,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) u64 end; int ret; - if (fs_info->pinned_extents == &fs_info->freed_extents[0]) - unpin = &fs_info->freed_extents[1]; - else - unpin = &fs_info->freed_extents[0]; + unpin = &trans->transaction->pinned_extents; while (!TRANS_ABORTED(trans)) { struct extent_state *cached_state = NULL; @@ -2921,12 +2909,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans) mutex_unlock(&fs_info->unused_bg_unpin_mutex); break; } - if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) { - clear_extent_bits(&fs_info->freed_extents[0], start, + if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags)) + clear_extent_bits(&fs_info->excluded_extents, start, end, EXTENT_UPTODATE); - clear_extent_bits(&fs_info->freed_extents[1], start, - end, EXTENT_UPTODATE); - } if (btrfs_test_opt(fs_info, DISCARD_SYNC)) ret = btrfs_discard_extent(fs_info, start, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9d6372139547..bd9c4b5da549 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1086,7 +1086,7 @@ static noinline_for_stack int write_pinned_extent_entries( * We shouldn't have switched the pinned extents yet so this is the * right one */ - unpin = block_group->fs_info->pinned_extents; + unpin = &trans->transaction->pinned_extents; start = block_group->start; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 37680351b7c3..fdfdfc426539 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -336,6 +336,8 @@ loop: list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(fs_info, &cur_trans->dirty_pages, IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode); + extent_io_tree_init(fs_info, &cur_trans->pinned_extents, + IO_TREE_FS_PINNED_EXTENTS, NULL); fs_info->generation++; cur_trans->transid = fs_info->generation; fs_info->running_transaction = cur_trans; diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 453cea7c7a72..31ae8d273065 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -71,6 +71,7 @@ struct btrfs_transaction { */ struct list_head io_bgs; struct list_head dropped_roots; + struct extent_io_tree pinned_extents; /* * we need to make sure block group deletion doesn't race with diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f1f2b6a04052..bcbc763b8814 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -81,8 +81,8 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS); #define show_extent_io_tree_owner(owner) \ __print_symbolic(owner, \ - { IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" }, \ - { IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" }, \ + { IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS" }, \ + { IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS" }, \ { IO_TREE_INODE_IO, "INODE_IO" }, \ { IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE" }, \ { IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \ -- cgit v1.2.3