From 6e4b2479ab38b3f949a85964da212295d32102f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:06 +0200 Subject: btrfs: mark the len field in struct btrfs_ordered_sum as unsigned len can't ever be negative, so mark it as an u32 instead of int. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ordered-data.h') diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f0f1138d23c3..2e54820a5e6f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -20,7 +20,7 @@ struct btrfs_ordered_sum { /* * this is the length in bytes covered by the sums array below. */ - int len; + u32 len; struct list_head list; /* last field is a variable length array of csums */ u8 sums[]; -- cgit v1.2.3 From 5cfe76f846d5034058c0c4813259d5d831757c36 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:07 +0200 Subject: btrfs: rename the bytenr field in struct btrfs_ordered_sum to logical btrfs_ordered_sum::bytendr stores a logical address. Make that clear by renaming it to ->logical. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 8 ++++---- fs/btrfs/inode.c | 2 +- fs/btrfs/ordered-data.h | 8 ++++---- fs/btrfs/relocation.c | 4 ++-- fs/btrfs/tree-log.c | 12 ++++++------ fs/btrfs/zoned.c | 4 ++-- 6 files changed, 19 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/ordered-data.h') diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 24b974bb828a..415e50904db3 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -560,7 +560,7 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, goto fail; } - sums->bytenr = start; + sums->logical = start; sums->len = size; offset = bytes_to_csum_size(fs_info, start - key.offset); @@ -749,7 +749,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) sums->len = bio->bi_iter.bi_size; INIT_LIST_HEAD(&sums->list); - sums->bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT; + sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT; index = 0; shash->tfm = fs_info->csum_shash; @@ -799,7 +799,7 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) ordered = btrfs_lookup_ordered_extent(inode, offset); ASSERT(ordered); /* Logic error */ - sums->bytenr = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + sums->logical = (bio->bi_iter.bi_sector << SECTOR_SHIFT) + total_bytes; index = 0; } @@ -1086,7 +1086,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, again: next_offset = (u64)-1; found_next = 0; - bytenr = sums->bytenr + total_bytes; + bytenr = sums->logical + total_bytes; file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; file_key.offset = bytenr; file_key.type = BTRFS_EXTENT_CSUM_KEY; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 40b5f2df9ecc..ad8196f31cdb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2850,7 +2850,7 @@ static int add_pending_csums(struct btrfs_trans_handle *trans, trans->adding_csums = true; if (!csum_root) csum_root = btrfs_csum_root(trans->fs_info, - sum->bytenr); + sum->logical); ret = btrfs_csum_file_blocks(trans, csum_root, sum); trans->adding_csums = false; if (ret) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 2e54820a5e6f..ebc980ac967a 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -14,13 +14,13 @@ struct btrfs_ordered_inode_tree { }; struct btrfs_ordered_sum { - /* bytenr is the start of this extent on disk */ - u64 bytenr; - /* - * this is the length in bytes covered by the sums array below. + * Logical start address and length for of the blocks covered by + * the sums array. */ + u64 logical; u32 len; + struct list_head list; /* last field is a variable length array of csums */ u8 sums[]; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0bda67ad349e..1333c8e2ddad 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4379,8 +4379,8 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len) * disk_len vs real len like with real inodes since it's all * disk length. */ - new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr; - sums->bytenr = new_bytenr; + new_bytenr = ordered->disk_bytenr + sums->logical - disk_bytenr; + sums->logical = new_bytenr; btrfs_add_ordered_sum(ordered, sums); } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ecb73da5d25a..f91a6175fd11 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -859,10 +859,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, struct btrfs_ordered_sum, list); csum_root = btrfs_csum_root(fs_info, - sums->bytenr); + sums->logical); if (!ret) ret = btrfs_del_csums(trans, csum_root, - sums->bytenr, + sums->logical, sums->len); if (!ret) ret = btrfs_csum_file_blocks(trans, @@ -4221,7 +4221,7 @@ static int log_csums(struct btrfs_trans_handle *trans, struct btrfs_root *log_root, struct btrfs_ordered_sum *sums) { - const u64 lock_end = sums->bytenr + sums->len - 1; + const u64 lock_end = sums->logical + sums->len - 1; struct extent_state *cached_state = NULL; int ret; @@ -4239,7 +4239,7 @@ static int log_csums(struct btrfs_trans_handle *trans, * file which happens to refer to the same extent as well. Such races * can leave checksum items in the log with overlapping ranges. */ - ret = lock_extent(&log_root->log_csum_range, sums->bytenr, lock_end, + ret = lock_extent(&log_root->log_csum_range, sums->logical, lock_end, &cached_state); if (ret) return ret; @@ -4252,11 +4252,11 @@ static int log_csums(struct btrfs_trans_handle *trans, * some checksums missing in the fs/subvolume tree. So just delete (or * trim and adjust) any existing csum items in the log for this range. */ - ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len); + ret = btrfs_del_csums(trans, log_root, sums->logical, sums->len); if (!ret) ret = btrfs_csum_file_blocks(trans, log_root, sums); - unlock_extent(&log_root->log_csum_range, sums->bytenr, lock_end, + unlock_extent(&log_root->log_csum_range, sums->logical, lock_end, &cached_state); return ret; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 98d6b8cc3874..eca49e6e0e5f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1711,9 +1711,9 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) list_for_each_entry(sum, &ordered->list, list) { if (logical < orig_logical) - sum->bytenr -= orig_logical - logical; + sum->logical -= orig_logical - logical; else - sum->bytenr += logical - orig_logical; + sum->logical += logical - orig_logical; } } -- cgit v1.2.3 From cbfce4c7fbde23cc8bcba44822a58c728caf6ec9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:08 +0200 Subject: btrfs: optimize the logical to physical mapping for zoned writes The current code to store the final logical to physical mapping for a zone append write in the extent tree is rather inefficient. It first has to split the ordered extent so that there is one ordered extent per bio, so that it can look up the ordered extent on I/O completion in btrfs_record_physical_zoned and store the physical LBA returned by the block driver in the ordered extent. btrfs_rewrite_logical_zoned then has to do a lookup in the chunk tree to see what physical address the logical address for this bio / ordered extent is mapped to, and then rewrite it in the extent tree. To optimize this process, we can store the physical address assigned in the chunk tree to the original logical address and a pointer to btrfs_ordered_sum structure the in the btrfs_bio structure, and then use this information to rewrite the logical address in the btrfs_ordered_sum structure directly at I/O completion time in btrfs_record_physical_zoned. btrfs_rewrite_logical_zoned then simply updates the logical address in the extent tree and the ordered_extent itself. The code in btrfs_rewrite_logical_zoned now runs for all data I/O completions in zoned file systems, which is fine as there is no remapping to do for non-append writes to conventional zones or for relocation, and the overhead for quickly breaking out of the loop is very low. Because zoned file systems now need the ordered_sums structure to record the actual write location returned by zone append, allocate dummy structures without the csum array for them when the I/O doesn't use checksums, and free them when completing the ordered_extent. Note that the btrfs_bio doesn't grow as the new field are places into a union that is so far not used for data writes and has plenty of space left in it. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/bio.c | 5 ++++ fs/btrfs/bio.h | 17 +++++++++++--- fs/btrfs/file-item.c | 30 ++++++++++++++++++++++++ fs/btrfs/file-item.h | 1 + fs/btrfs/inode.c | 6 +---- fs/btrfs/ordered-data.c | 1 - fs/btrfs/ordered-data.h | 6 ----- fs/btrfs/zoned.c | 61 ++++++++++++++++++------------------------------- 8 files changed, 73 insertions(+), 54 deletions(-) (limited to 'fs/btrfs/ordered-data.h') diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index ea6c81f9d1a3..54cfab7394d3 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -431,6 +431,7 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio) u64 zone_start = round_down(physical, dev->fs_info->zone_size); ASSERT(btrfs_dev_is_sequential(dev, physical)); + btrfs_bio(bio)->orig_physical = physical; bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT; } btrfs_debug_in_rcu(dev->fs_info, @@ -685,6 +686,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) ret = btrfs_bio_csum(bbio); if (ret) goto fail_put_bio; + } else if (use_append) { + ret = btrfs_alloc_dummy_sum(bbio); + if (ret) + goto fail_put_bio; } } diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h index a8eca3a65673..8a29980159b4 100644 --- a/fs/btrfs/bio.h +++ b/fs/btrfs/bio.h @@ -39,8 +39,8 @@ struct btrfs_bio { union { /* - * Data checksumming and original I/O information for internal - * use in the btrfs_submit_bio machinery. + * For data reads: checksumming and original I/O information. + * (for internal use in the btrfs_submit_bio machinery only) */ struct { u8 *csum; @@ -48,7 +48,18 @@ struct btrfs_bio { struct bvec_iter saved_iter; }; - /* For metadata parentness verification. */ + /* + * For data writes: + * - pointer to the checksums for this bio + * - original physical address from the allocator + * (for zone append only) + */ + struct { + struct btrfs_ordered_sum *sums; + u64 orig_physical; + }; + + /* For metadata reads: parentness verification. */ struct btrfs_tree_parent_check parent_check; }; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 415e50904db3..0cb4a9921d21 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -818,11 +818,41 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio) } this_sum_bytes = 0; + + /* + * The ->sums assignment is for zoned writes, where a bio never spans + * ordered extents and is only done unconditionally because that's cheaper + * than a branch. + */ + bbio->sums = sums; btrfs_add_ordered_sum(ordered, sums); btrfs_put_ordered_extent(ordered); return 0; } +/* + * Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to + * record the updated logical address on Zone Append completion. + * Allocate just the structure with an empty sums array here for that case. + */ +blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio) +{ + struct btrfs_ordered_extent *ordered = + btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset); + + if (WARN_ON_ONCE(!ordered)) + return BLK_STS_IOERR; + + bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS); + if (!bbio->sums) + return BLK_STS_RESOURCE; + bbio->sums->len = bbio->bio.bi_iter.bi_size; + bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; + btrfs_add_ordered_sum(ordered, bbio->sums); + btrfs_put_ordered_extent(ordered); + return 0; +} + /* * Remove one checksum overlapping a range. * diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h index 6be8725cd574..4ec669b69008 100644 --- a/fs/btrfs/file-item.h +++ b/fs/btrfs/file-item.h @@ -50,6 +50,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_ordered_sum *sums); blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio); +blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio); int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit, bool nowait); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ad8196f31cdb..31c5b7c176d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3301,14 +3301,10 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - /* A valid ->physical implies a write on a sequential zone. */ - if (ordered_extent->physical != (u64)-1) { + if (btrfs_is_zoned(fs_info)) { btrfs_rewrite_logical_zoned(ordered_extent); btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, ordered_extent->disk_num_bytes); - } else if (btrfs_is_data_reloc_root(inode->root)) { - btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, - ordered_extent->disk_num_bytes); } if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a9778a91511e..324a5a8c844a 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -209,7 +209,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( entry->compress_type = compress_type; entry->truncated_len = (u64)-1; entry->qgroup_rsv = ret; - entry->physical = (u64)-1; ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); entry->flags = flags; diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index ebc980ac967a..dc700aa515b5 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -151,12 +151,6 @@ struct btrfs_ordered_extent { struct completion completion; struct btrfs_work flush_work; struct list_head work_list; - - /* - * Used to reverse-map physical address returned from ZONE_APPEND write - * command in a workqueue context - */ - u64 physical; }; static inline void diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index eca49e6e0e5f..b55b0d4ee86f 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1657,51 +1657,28 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio) void btrfs_record_physical_zoned(struct btrfs_bio *bbio) { const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; - struct btrfs_ordered_extent *ordered; + struct btrfs_ordered_sum *sum = bbio->sums; - ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset); - if (WARN_ON(!ordered)) - return; - - ordered->physical = physical; - btrfs_put_ordered_extent(ordered); + if (physical < bbio->orig_physical) + sum->logical -= bbio->orig_physical - physical; + else + sum->logical += physical - bbio->orig_physical; } void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) { struct btrfs_inode *inode = BTRFS_I(ordered->inode); - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct extent_map_tree *em_tree; + struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_map *em; - struct btrfs_ordered_sum *sum; - u64 orig_logical = ordered->disk_bytenr; - struct map_lookup *map; - u64 physical = ordered->physical; - u64 chunk_start_phys; - u64 logical; + struct btrfs_ordered_sum *sum = + list_first_entry(&ordered->list, typeof(*sum), list); + u64 logical = sum->logical; - em = btrfs_get_chunk_map(fs_info, orig_logical, 1); - if (IS_ERR(em)) - return; - map = em->map_lookup; - chunk_start_phys = map->stripes[0].physical; - - if (WARN_ON_ONCE(map->num_stripes > 1) || - WARN_ON_ONCE((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) || - WARN_ON_ONCE(physical < chunk_start_phys) || - WARN_ON_ONCE(physical > chunk_start_phys + em->orig_block_len)) { - free_extent_map(em); - return; - } - logical = em->start + (physical - map->stripes[0].physical); - free_extent_map(em); - - if (orig_logical == logical) - return; + if (ordered->disk_bytenr == logical) + goto out; ordered->disk_bytenr = logical; - em_tree = &inode->extent_tree; write_lock(&em_tree->lock); em = search_extent_mapping(em_tree, ordered->file_offset, ordered->num_bytes); @@ -1709,11 +1686,17 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) free_extent_map(em); write_unlock(&em_tree->lock); - list_for_each_entry(sum, &ordered->list, list) { - if (logical < orig_logical) - sum->logical -= orig_logical - logical; - else - sum->logical += logical - orig_logical; +out: + /* + * If we end up here for nodatasum I/O, the btrfs_ordered_sum structures + * were allocated by btrfs_alloc_dummy_sum only to record the logical + * addresses and don't contain actual checksums. We thus must free them + * here so that we don't attempt to log the csums later. + */ + if ((inode->flags & BTRFS_INODE_NODATASUM) || + test_bit(BTRFS_FS_STATE_NO_CSUMS, &inode->root->fs_info->fs_state)) { + list_del(&sum->list); + kfree(sum); } } -- cgit v1.2.3 From b0307e28642efa3bcc6353b9af213711f6d3848e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:11 +0200 Subject: btrfs: return the new ordered_extent from btrfs_split_ordered_extent Return the ordered_extent split from the passed in one. This will be needed to be able to store an ordered_extent in the btrfs_bio. Reviewed-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 8 +++++++- fs/btrfs/ordered-data.c | 19 ++++++++++--------- fs/btrfs/ordered-data.h | 3 ++- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/ordered-data.h') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f7e06839a645..9f026e632bf7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2719,6 +2719,7 @@ int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, { u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; u64 len = bbio->bio.bi_iter.bi_size; + struct btrfs_ordered_extent *new; int ret; /* Must always be called for the beginning of an ordered extent. */ @@ -2740,7 +2741,12 @@ int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, return ret; } - return btrfs_split_ordered_extent(ordered, len); + new = btrfs_split_ordered_extent(ordered, len); + if (IS_ERR(new)) + return PTR_ERR(new); + btrfs_put_ordered_extent(new); + + return 0; } /* diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 324a5a8c844a..15a410bbbe70 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1116,7 +1116,8 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, } /* Split out a new ordered extent for this first @len bytes of @ordered. */ -int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len) +struct btrfs_ordered_extent *btrfs_split_ordered_extent( + struct btrfs_ordered_extent *ordered, u64 len) { struct inode *inode = ordered->inode; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; @@ -1135,16 +1136,16 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len) * reduce the original extent to a zero length either. */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) - return -EINVAL; + return ERR_PTR(-EINVAL); /* We cannot split once ordered extent is past end_bio. */ if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) - return -EINVAL; + return ERR_PTR(-EINVAL); /* We cannot split a compressed ordered extent. */ if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) - return -EINVAL; + return ERR_PTR(-EINVAL); /* Checksum list should be empty. */ if (WARN_ON_ONCE(!list_empty(&ordered->list))) - return -EINVAL; + return ERR_PTR(-EINVAL); spin_lock_irq(&tree->lock); /* Remove from tree once */ @@ -1171,13 +1172,13 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len) /* * The splitting extent is already counted and will be added again in - * btrfs_add_ordered_extent(). Subtract len to avoid double counting. + * btrfs_alloc_ordered_extent(). Subtract len to avoid double counting. */ percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch); - return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len, - disk_bytenr, len, 0, flags, - ordered->compress_type); + return btrfs_alloc_ordered_extent(BTRFS_I(inode), file_offset, len, len, + disk_bytenr, len, 0, flags, + ordered->compress_type); } int __init ordered_data_init(void) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index dc700aa515b5..0ae0f52c148f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -206,7 +206,8 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, struct extent_state **cached_state); bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, struct extent_state **cached_state); -int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len); +struct btrfs_ordered_extent *btrfs_split_ordered_extent( + struct btrfs_ordered_extent *ordered, u64 len); int __init ordered_data_init(void); void __cold ordered_data_exit(void); -- cgit v1.2.3 From 71df088c1cc090d232eb691d8f42284a2c6409eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:16 +0200 Subject: btrfs: defer splitting of ordered extents until I/O completion The btrfs zoned completion code currently needs an ordered_extent and extent_map per bio so that it can account for the non-predictable write location from Zone Append. To archive that it currently splits the ordered_extent and extent_map at I/O submission time, and then records the actual physical address in the ->physical field of the ordered_extent. This patch instead switches to record the "original" physical address that the btrfs allocator assigned in spare space in the btrfs_bio, and then rewrites the logical address in the btrfs_ordered_sum structure at I/O completion time. This allows the ordered extent completion handler to simply walk the list of ordered csums and split the ordered extent as needed. This removes an extra ordered extent and extent_map lookup and manipulation during the I/O submission path, and instead batches it in the I/O completion path where we need to touch these anyway. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/bio.c | 17 ------------ fs/btrfs/btrfs_inode.h | 2 -- fs/btrfs/inode.c | 18 ++++++++----- fs/btrfs/ordered-data.h | 1 + fs/btrfs/zoned.c | 70 ++++++++++++++++++++++++++++++++++++++++--------- fs/btrfs/zoned.h | 6 ++--- 6 files changed, 73 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/ordered-data.h') diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index b1cdd145aada..1b4d8d60c66f 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -61,20 +61,6 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf, return bbio; } -static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio) -{ - struct btrfs_ordered_extent *ordered; - int ret; - - ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset); - if (WARN_ON_ONCE(!ordered)) - return BLK_STS_IOERR; - ret = btrfs_extract_ordered_extent(bbio, ordered); - btrfs_put_ordered_extent(ordered); - - return errno_to_blk_status(ret); -} - static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info, struct btrfs_bio *orig_bbio, u64 map_length, bool use_append) @@ -668,9 +654,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) if (use_append) { bio->bi_opf &= ~REQ_OP_WRITE; bio->bi_opf |= REQ_OP_ZONE_APPEND; - ret = btrfs_bio_extract_ordered_extent(bbio); - if (ret) - goto fail_put_bio; } /* diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 08c996023394..8abf96cfea8f 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -410,8 +410,6 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode) int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page, u32 pgoff, u8 *csum, const u8 * const csum_expected); -int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, - struct btrfs_ordered_extent *ordered); bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev, u32 bio_offset, struct bio_vec *bv); noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9f026e632bf7..ff3110a2b4e6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2714,8 +2714,8 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, } } -int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, - struct btrfs_ordered_extent *ordered) +static int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, + struct btrfs_ordered_extent *ordered) { u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; u64 len = bbio->bio.bi_iter.bi_size; @@ -3180,7 +3180,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans, * an ordered extent if the range of bytes in the file it covers are * fully written. */ -int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) +int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent) { struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode); struct btrfs_root *root = inode->root; @@ -3215,11 +3215,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - if (btrfs_is_zoned(fs_info)) { - btrfs_rewrite_logical_zoned(ordered_extent); + if (btrfs_is_zoned(fs_info)) btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr, ordered_extent->disk_num_bytes); - } if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) { truncated = true; @@ -3387,6 +3385,14 @@ out: return ret; } +int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered) +{ + if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) && + !test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) + btrfs_finish_ordered_zoned(ordered); + return btrfs_finish_one_ordered(ordered); +} + void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, struct page *page, u64 start, u64 end, bool uptodate) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 0ae0f52c148f..828bb039634a 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -161,6 +161,7 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) t->last = NULL; } +int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent); int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index b55b0d4ee86f..b4dd018ba332 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -15,6 +15,7 @@ #include "transaction.h" #include "dev-replace.h" #include "space-info.h" +#include "super.h" #include "fs.h" #include "accessors.h" #include "bio.h" @@ -1665,17 +1666,11 @@ void btrfs_record_physical_zoned(struct btrfs_bio *bbio) sum->logical += physical - bbio->orig_physical; } -void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) +static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered, + u64 logical) { - struct btrfs_inode *inode = BTRFS_I(ordered->inode); - struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map_tree *em_tree = &BTRFS_I(ordered->inode)->extent_tree; struct extent_map *em; - struct btrfs_ordered_sum *sum = - list_first_entry(&ordered->list, typeof(*sum), list); - u64 logical = sum->logical; - - if (ordered->disk_bytenr == logical) - goto out; ordered->disk_bytenr = logical; @@ -1685,6 +1680,54 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) em->block_start = logical; free_extent_map(em); write_unlock(&em_tree->lock); +} + +static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered, + u64 logical, u64 len) +{ + struct btrfs_ordered_extent *new; + + if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && + split_extent_map(BTRFS_I(ordered->inode), ordered->file_offset, + ordered->num_bytes, len)) + return false; + + new = btrfs_split_ordered_extent(ordered, len); + if (IS_ERR(new)) + return false; + + if (new->disk_bytenr != logical) + btrfs_rewrite_logical_zoned(new, logical); + btrfs_finish_one_ordered(new); + return true; +} + +void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered) +{ + struct btrfs_inode *inode = BTRFS_I(ordered->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_ordered_sum *sum = + list_first_entry(&ordered->list, typeof(*sum), list); + u64 logical = sum->logical; + u64 len = sum->len; + + while (len < ordered->disk_num_bytes) { + sum = list_next_entry(sum, list); + if (sum->logical == logical + len) { + len += sum->len; + continue; + } + if (!btrfs_zoned_split_ordered(ordered, logical, len)) { + set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); + btrfs_err(fs_info, "failed to split ordered extent"); + goto out; + } + logical = sum->logical; + len = sum->len; + } + + if (ordered->disk_bytenr != logical) + btrfs_rewrite_logical_zoned(ordered, logical); out: /* @@ -1694,9 +1737,12 @@ out: * here so that we don't attempt to log the csums later. */ if ((inode->flags & BTRFS_INODE_NODATASUM) || - test_bit(BTRFS_FS_STATE_NO_CSUMS, &inode->root->fs_info->fs_state)) { - list_del(&sum->list); - kfree(sum); + test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) { + while ((sum = list_first_entry_or_null(&ordered->list, + typeof(*sum), list))) { + list_del(&sum->list); + kfree(sum); + } } } diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 3058ef559c98..27322b926038 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -30,6 +30,8 @@ struct btrfs_zoned_device_info { struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX]; }; +void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered); + #ifdef CONFIG_BLK_DEV_ZONED int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone); @@ -56,7 +58,6 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans, struct extent_buffer *eb); bool btrfs_use_zone_append(struct btrfs_bio *bbio); void btrfs_record_physical_zoned(struct btrfs_bio *bbio); -void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered); bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, struct btrfs_block_group **cache_ret); @@ -188,9 +189,6 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio) { } -static inline void btrfs_rewrite_logical_zoned( - struct btrfs_ordered_extent *ordered) { } - static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, struct btrfs_block_group **cache_ret) -- cgit v1.2.3 From ebfe4d4eb6ef6bd0f80293936808b77c1fc931b4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 09:53:59 +0200 Subject: btrfs: remove btrfs_add_ordered_extent All callers are gone now. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 25 +------------------------ fs/btrfs/ordered-data.h | 4 ---- 2 files changed, 1 insertion(+), 28 deletions(-) (limited to 'fs/btrfs/ordered-data.h') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index bb76bf01a332..3aee77f40f01 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -279,29 +279,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( return entry; } -/* - * Add a new btrfs_ordered_extent for the range, but drop the reference instead - * of returning it to the caller. - */ -int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, - u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, - u64 disk_num_bytes, u64 offset, unsigned long flags, - int compress_type) -{ - struct btrfs_ordered_extent *ordered; - - ordered = btrfs_alloc_ordered_extent(inode, file_offset, num_bytes, - ram_bytes, disk_bytenr, - disk_num_bytes, offset, flags, - compress_type); - - if (IS_ERR(ordered)) - return PTR_ERR(ordered); - btrfs_put_ordered_extent(ordered); - - return 0; -} - /* * Add a struct btrfs_ordered_sum into the list of checksums to be inserted * when an ordered extent is finished. If the list covers more than one @@ -579,7 +556,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, freespace_inode = btrfs_is_free_space_inode(btrfs_inode); btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); - /* This is paired with btrfs_add_ordered_extent. */ + /* This is paired with btrfs_alloc_ordered_extent. */ spin_lock(&btrfs_inode->lock); btrfs_mod_outstanding_extents(btrfs_inode, -1); spin_unlock(&btrfs_inode->lock); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 828bb039634a..0bd0f0368132 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -178,10 +178,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes, u64 offset, unsigned long flags, int compress_type); -int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, - u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, - u64 disk_num_bytes, u64 offset, unsigned long flags, - int compress_type); void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, -- cgit v1.2.3 From 122e9ede5355071359c10a8ebca138b162ef176b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 09:54:06 +0200 Subject: btrfs: add a btrfs_finish_ordered_extent helper Add a helper to complete an ordered_extent without first doing a lookup. The tracepoint cannot use the ordered_extent class as we also want to print the range. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 19 +++++++++++++++++++ fs/btrfs/ordered-data.h | 3 +++ include/trace/events/btrfs.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) (limited to 'fs/btrfs/ordered-data.h') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index aa4c203a1695..a629532283bc 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -368,6 +368,25 @@ static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered) btrfs_queue_work(wq, &ordered->work); } +bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, + struct page *page, u64 file_offset, u64 len, + bool uptodate) +{ + struct btrfs_inode *inode = BTRFS_I(ordered->inode); + unsigned long flags; + bool ret; + + trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate); + + spin_lock_irqsave(&inode->ordered_tree.lock, flags); + ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate); + spin_unlock_irqrestore(&inode->ordered_tree.lock, flags); + + if (ret) + btrfs_queue_ordered_fn(ordered); + return ret; +} + /* * Mark all ordered extents io inside the specified range finished. * diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 0bd0f0368132..173bd5c5df26 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -167,6 +167,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, struct btrfs_ordered_extent *entry); +bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, + struct page *page, u64 file_offset, u64 len, + bool uptodate); void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, struct page *page, u64 file_offset, u64 num_bytes, bool uptodate); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8ea9cea9bfeb..c6eee9b414cf 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -661,6 +661,35 @@ DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_mark_finished, TP_ARGS(inode, ordered) ); +TRACE_EVENT(btrfs_finish_ordered_extent, + + TP_PROTO(const struct btrfs_inode *inode, u64 start, u64 len, + bool uptodate), + + TP_ARGS(inode, start, len, uptodate), + + TP_STRUCT__entry_btrfs( + __field( u64, ino ) + __field( u64, start ) + __field( u64, len ) + __field( bool, uptodate ) + __field( u64, root_objectid ) + ), + + TP_fast_assign_btrfs(inode->root->fs_info, + __entry->ino = btrfs_ino(inode); + __entry->start = start; + __entry->len = len; + __entry->uptodate = uptodate; + __entry->root_objectid = inode->root->root_key.objectid; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d", + show_root_type(__entry->root_objectid), + __entry->ino, __entry->start, + __entry->len, !!__entry->uptodate) +); + DECLARE_EVENT_CLASS(btrfs__writepage, TP_PROTO(const struct page *page, const struct inode *inode, -- cgit v1.2.3