From cbfce4c7fbde23cc8bcba44822a58c728caf6ec9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:08 +0200 Subject: btrfs: optimize the logical to physical mapping for zoned writes The current code to store the final logical to physical mapping for a zone append write in the extent tree is rather inefficient. It first has to split the ordered extent so that there is one ordered extent per bio, so that it can look up the ordered extent on I/O completion in btrfs_record_physical_zoned and store the physical LBA returned by the block driver in the ordered extent. btrfs_rewrite_logical_zoned then has to do a lookup in the chunk tree to see what physical address the logical address for this bio / ordered extent is mapped to, and then rewrite it in the extent tree. To optimize this process, we can store the physical address assigned in the chunk tree to the original logical address and a pointer to btrfs_ordered_sum structure the in the btrfs_bio structure, and then use this information to rewrite the logical address in the btrfs_ordered_sum structure directly at I/O completion time in btrfs_record_physical_zoned. btrfs_rewrite_logical_zoned then simply updates the logical address in the extent tree and the ordered_extent itself. The code in btrfs_rewrite_logical_zoned now runs for all data I/O completions in zoned file systems, which is fine as there is no remapping to do for non-append writes to conventional zones or for relocation, and the overhead for quickly breaking out of the loop is very low. Because zoned file systems now need the ordered_sums structure to record the actual write location returned by zone append, allocate dummy structures without the csum array for them when the I/O doesn't use checksums, and free them when completing the ordered_extent. Note that the btrfs_bio doesn't grow as the new field are places into a union that is so far not used for data writes and has plenty of space left in it. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a9778a91511e..324a5a8c844a 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -209,7 +209,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( entry->compress_type = compress_type; entry->truncated_len = (u64)-1; entry->qgroup_rsv = ret; - entry->physical = (u64)-1; ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); entry->flags = flags; -- cgit v1.2.3 From b0307e28642efa3bcc6353b9af213711f6d3848e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:11 +0200 Subject: btrfs: return the new ordered_extent from btrfs_split_ordered_extent Return the ordered_extent split from the passed in one. This will be needed to be able to store an ordered_extent in the btrfs_bio. Reviewed-by: Naohiro Aota Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 8 +++++++- fs/btrfs/ordered-data.c | 19 ++++++++++--------- fs/btrfs/ordered-data.h | 3 ++- 3 files changed, 19 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f7e06839a645..9f026e632bf7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2719,6 +2719,7 @@ int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, { u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; u64 len = bbio->bio.bi_iter.bi_size; + struct btrfs_ordered_extent *new; int ret; /* Must always be called for the beginning of an ordered extent. */ @@ -2740,7 +2741,12 @@ int btrfs_extract_ordered_extent(struct btrfs_bio *bbio, return ret; } - return btrfs_split_ordered_extent(ordered, len); + new = btrfs_split_ordered_extent(ordered, len); + if (IS_ERR(new)) + return PTR_ERR(new); + btrfs_put_ordered_extent(new); + + return 0; } /* diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 324a5a8c844a..15a410bbbe70 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1116,7 +1116,8 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, } /* Split out a new ordered extent for this first @len bytes of @ordered. */ -int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len) +struct btrfs_ordered_extent *btrfs_split_ordered_extent( + struct btrfs_ordered_extent *ordered, u64 len) { struct inode *inode = ordered->inode; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; @@ -1135,16 +1136,16 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len) * reduce the original extent to a zero length either. */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) - return -EINVAL; + return ERR_PTR(-EINVAL); /* We cannot split once ordered extent is past end_bio. */ if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) - return -EINVAL; + return ERR_PTR(-EINVAL); /* We cannot split a compressed ordered extent. */ if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) - return -EINVAL; + return ERR_PTR(-EINVAL); /* Checksum list should be empty. */ if (WARN_ON_ONCE(!list_empty(&ordered->list))) - return -EINVAL; + return ERR_PTR(-EINVAL); spin_lock_irq(&tree->lock); /* Remove from tree once */ @@ -1171,13 +1172,13 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len) /* * The splitting extent is already counted and will be added again in - * btrfs_add_ordered_extent(). Subtract len to avoid double counting. + * btrfs_alloc_ordered_extent(). Subtract len to avoid double counting. */ percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch); - return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len, - disk_bytenr, len, 0, flags, - ordered->compress_type); + return btrfs_alloc_ordered_extent(BTRFS_I(inode), file_offset, len, len, + disk_bytenr, len, 0, flags, + ordered->compress_type); } int __init ordered_data_init(void) diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index dc700aa515b5..0ae0f52c148f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -206,7 +206,8 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start, struct extent_state **cached_state); bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, struct extent_state **cached_state); -int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len); +struct btrfs_ordered_extent *btrfs_split_ordered_extent( + struct btrfs_ordered_extent *ordered, u64 len); int __init ordered_data_init(void); void __cold ordered_data_exit(void); -- cgit v1.2.3 From 53d9981ca20efcded0f6cf6b480d713f490ca9f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:13 +0200 Subject: btrfs: split btrfs_alloc_ordered_extent to allocation and insertion helpers Split two low-level helpers out of btrfs_alloc_ordered_extent to allocate and insert the logic extent. The pure alloc helper will be used to improve btrfs_split_ordered_extent. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 114 +++++++++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 15a410bbbe70..ea1c2ec10573 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -146,35 +146,11 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, return ret; } -/* - * Add an ordered extent to the per-inode tree. - * - * @inode: Inode that this extent is for. - * @file_offset: Logical offset in file where the extent starts. - * @num_bytes: Logical length of extent in file. - * @ram_bytes: Full length of unencoded data. - * @disk_bytenr: Offset of extent on disk. - * @disk_num_bytes: Size of extent on disk. - * @offset: Offset into unencoded data where file data starts. - * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*). - * @compress_type: Compression algorithm used for data. - * - * Most of these parameters correspond to &struct btrfs_file_extent_item. The - * tree is given a single reference on the ordered extent that was inserted, and - * the returned pointer is given a second reference. - * - * Return: the new ordered extent or error pointer. - */ -struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( - struct btrfs_inode *inode, u64 file_offset, - u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, - u64 disk_num_bytes, u64 offset, unsigned long flags, - int compress_type) +static struct btrfs_ordered_extent *alloc_ordered_extent( + struct btrfs_inode *inode, u64 file_offset, u64 num_bytes, + u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes, + u64 offset, unsigned long flags, int compress_type) { - struct btrfs_root *root = inode->root; - struct btrfs_fs_info *fs_info = root->fs_info; - struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; - struct rb_node *node; struct btrfs_ordered_extent *entry; int ret; @@ -184,7 +160,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); if (ret < 0) return ERR_PTR(ret); - ret = 0; } else { /* * The ordered extent has reserved qgroup space, release now @@ -209,14 +184,7 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( entry->compress_type = compress_type; entry->truncated_len = (u64)-1; entry->qgroup_rsv = ret; - - ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); entry->flags = flags; - - percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes, - fs_info->delalloc_batch); - - /* one ref for the tree */ refcount_set(&entry->refs, 1); init_waitqueue_head(&entry->wait); INIT_LIST_HEAD(&entry->list); @@ -225,15 +193,40 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( INIT_LIST_HEAD(&entry->work_list); init_completion(&entry->completion); + /* + * We don't need the count_max_extents here, we can assume that all of + * that work has been done at higher layers, so this is truly the + * smallest the extent is going to get. + */ + spin_lock(&inode->lock); + btrfs_mod_outstanding_extents(inode, 1); + spin_unlock(&inode->lock); + + return entry; +} + +static void insert_ordered_extent(struct btrfs_ordered_extent *entry) +{ + struct btrfs_inode *inode = BTRFS_I(entry->inode); + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; + struct rb_node *node; + trace_btrfs_ordered_extent_add(inode, entry); + percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes, + fs_info->delalloc_batch); + + /* One ref for the tree. */ + refcount_inc(&entry->refs); + spin_lock_irq(&tree->lock); - node = tree_insert(&tree->tree, file_offset, - &entry->rb_node); + node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node); if (node) btrfs_panic(fs_info, -EEXIST, "inconsistency in ordered tree at offset %llu", - file_offset); + entry->file_offset); spin_unlock_irq(&tree->lock); spin_lock(&root->ordered_extent_lock); @@ -247,19 +240,42 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( spin_unlock(&fs_info->ordered_root_lock); } spin_unlock(&root->ordered_extent_lock); +} - /* - * We don't need the count_max_extents here, we can assume that all of - * that work has been done at higher layers, so this is truly the - * smallest the extent is going to get. - */ - spin_lock(&inode->lock); - btrfs_mod_outstanding_extents(inode, 1); - spin_unlock(&inode->lock); +/* + * Add an ordered extent to the per-inode tree. + * + * @inode: Inode that this extent is for. + * @file_offset: Logical offset in file where the extent starts. + * @num_bytes: Logical length of extent in file. + * @ram_bytes: Full length of unencoded data. + * @disk_bytenr: Offset of extent on disk. + * @disk_num_bytes: Size of extent on disk. + * @offset: Offset into unencoded data where file data starts. + * @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*). + * @compress_type: Compression algorithm used for data. + * + * Most of these parameters correspond to &struct btrfs_file_extent_item. The + * tree is given a single reference on the ordered extent that was inserted, and + * the returned pointer is given a second reference. + * + * Return: the new ordered extent or error pointer. + */ +struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( + struct btrfs_inode *inode, u64 file_offset, + u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, + u64 disk_num_bytes, u64 offset, unsigned long flags, + int compress_type) +{ + struct btrfs_ordered_extent *entry; - /* One ref for the returned entry to match semantics of lookup. */ - refcount_inc(&entry->refs); + ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0); + entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes, + disk_bytenr, disk_num_bytes, offset, flags, + compress_type); + if (!IS_ERR(entry)) + insert_ordered_extent(entry); return entry; } -- cgit v1.2.3 From 816f589b8d43f7c258b0c10a5ce29daf01614651 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:14 +0200 Subject: btrfs: atomically insert the new extent in btrfs_split_ordered_extent Currently there is a small race window in btrfs_split_ordered_extent, where the reduced old extent can be looked up on the per-inode rbtree or the per-root list while the newly split out one isn't visible yet. Fix this by open coding btrfs_alloc_ordered_extent in btrfs_split_ordered_extent, and holding the tree lock and root->ordered_extent_lock over the entire tree and extent manipulation. Note that this introduces new lock ordering because previously ordered_extent_lock was never held over the tree lock. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index ea1c2ec10573..0dc2bd0bd675 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1135,15 +1135,17 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end, struct btrfs_ordered_extent *btrfs_split_ordered_extent( struct btrfs_ordered_extent *ordered, u64 len) { - struct inode *inode = ordered->inode; - struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_inode *inode = BTRFS_I(ordered->inode); + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; + struct btrfs_root *root = inode->root; + struct btrfs_fs_info *fs_info = root->fs_info; u64 file_offset = ordered->file_offset; u64 disk_bytenr = ordered->disk_bytenr; unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS; + struct btrfs_ordered_extent *new; struct rb_node *node; - trace_btrfs_ordered_extent_split(BTRFS_I(inode), ordered); + trace_btrfs_ordered_extent_split(inode, ordered); ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED))); @@ -1163,7 +1165,16 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( if (WARN_ON_ONCE(!list_empty(&ordered->list))) return ERR_PTR(-EINVAL); - spin_lock_irq(&tree->lock); + new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr, + len, 0, flags, ordered->compress_type); + if (IS_ERR(new)) + return new; + + /* One ref for the tree. */ + refcount_inc(&new->refs); + + spin_lock_irq(&root->ordered_extent_lock); + spin_lock(&tree->lock); /* Remove from tree once */ node = &ordered->rb_node; rb_erase(node, &tree->tree); @@ -1182,19 +1193,19 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( if (node) btrfs_panic(fs_info, -EEXIST, "zoned: inconsistency in ordered tree at offset %llu", - ordered->file_offset); + ordered->file_offset); - spin_unlock_irq(&tree->lock); - - /* - * The splitting extent is already counted and will be added again in - * btrfs_alloc_ordered_extent(). Subtract len to avoid double counting. - */ - percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch); + node = tree_insert(&tree->tree, new->file_offset, &new->rb_node); + if (node) + btrfs_panic(fs_info, -EEXIST, + "zoned: inconsistency in ordered tree at offset %llu", + new->file_offset); + spin_unlock(&tree->lock); - return btrfs_alloc_ordered_extent(BTRFS_I(inode), file_offset, len, len, - disk_bytenr, len, 0, flags, - ordered->compress_type); + list_add_tail(&new->root_extent_list, &root->ordered_extents); + root->nr_ordered_extents++; + spin_unlock_irq(&root->ordered_extent_lock); + return new; } int __init ordered_data_init(void) -- cgit v1.2.3 From 52b1fdca23ac0fbcad363a1a5b426bf0d56b715a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 24 May 2023 17:03:15 +0200 Subject: btrfs: handle completed ordered extents in btrfs_split_ordered_extent To delay splitting ordered_extents to I/O completion time we need to be able to handle fully completed ordered extents in btrfs_split_ordered_extent. Besides a bit of accounting this primarily involved moving over the csums to the split bio for the range that it covers, which is simple enough because we always have one btrfs_ordered_sum per bio. Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 0dc2bd0bd675..bb76bf01a332 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1141,9 +1141,11 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( struct btrfs_fs_info *fs_info = root->fs_info; u64 file_offset = ordered->file_offset; u64 disk_bytenr = ordered->disk_bytenr; - unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS; + unsigned long flags = ordered->flags; + struct btrfs_ordered_sum *sum, *tmpsum; struct btrfs_ordered_extent *new; struct rb_node *node; + u64 offset = 0; trace_btrfs_ordered_extent_split(inode, ordered); @@ -1155,15 +1157,15 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) return ERR_PTR(-EINVAL); - /* We cannot split once ordered extent is past end_bio. */ - if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) - return ERR_PTR(-EINVAL); + /* We cannot split partially completed ordered extents. */ + if (ordered->bytes_left) { + ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); + if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes)) + return ERR_PTR(-EINVAL); + } /* We cannot split a compressed ordered extent. */ if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes)) return ERR_PTR(-EINVAL); - /* Checksum list should be empty. */ - if (WARN_ON_ONCE(!list_empty(&ordered->list))) - return ERR_PTR(-EINVAL); new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr, len, 0, flags, ordered->compress_type); @@ -1186,7 +1188,29 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( ordered->disk_bytenr += len; ordered->num_bytes -= len; ordered->disk_num_bytes -= len; - ordered->bytes_left -= len; + + if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { + ASSERT(ordered->bytes_left == 0); + new->bytes_left = 0; + } else { + ordered->bytes_left -= len; + } + + if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) { + if (ordered->truncated_len > len) { + ordered->truncated_len -= len; + } else { + new->truncated_len = ordered->truncated_len; + ordered->truncated_len = 0; + } + } + + list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) { + if (offset == len) + break; + list_move_tail(&sum->list, &new->list); + offset += sum->len; + } /* Re-insert the node */ node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node); -- cgit v1.2.3 From ebfe4d4eb6ef6bd0f80293936808b77c1fc931b4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 09:53:59 +0200 Subject: btrfs: remove btrfs_add_ordered_extent All callers are gone now. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 25 +------------------------ fs/btrfs/ordered-data.h | 4 ---- 2 files changed, 1 insertion(+), 28 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index bb76bf01a332..3aee77f40f01 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -279,29 +279,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( return entry; } -/* - * Add a new btrfs_ordered_extent for the range, but drop the reference instead - * of returning it to the caller. - */ -int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, - u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, - u64 disk_num_bytes, u64 offset, unsigned long flags, - int compress_type) -{ - struct btrfs_ordered_extent *ordered; - - ordered = btrfs_alloc_ordered_extent(inode, file_offset, num_bytes, - ram_bytes, disk_bytenr, - disk_num_bytes, offset, flags, - compress_type); - - if (IS_ERR(ordered)) - return PTR_ERR(ordered); - btrfs_put_ordered_extent(ordered); - - return 0; -} - /* * Add a struct btrfs_ordered_sum into the list of checksums to be inserted * when an ordered extent is finished. If the list covers more than one @@ -579,7 +556,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, freespace_inode = btrfs_is_free_space_inode(btrfs_inode); btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered); - /* This is paired with btrfs_add_ordered_extent. */ + /* This is paired with btrfs_alloc_ordered_extent. */ spin_lock(&btrfs_inode->lock); btrfs_mod_outstanding_extents(btrfs_inode, -1); spin_unlock(&btrfs_inode->lock); diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 828bb039634a..0bd0f0368132 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -178,10 +178,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent( u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes, u64 offset, unsigned long flags, int compress_type); -int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, - u64 num_bytes, u64 ram_bytes, u64 disk_bytenr, - u64 disk_num_bytes, u64 offset, unsigned long flags, - int compress_type); void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, struct btrfs_ordered_sum *sum); struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, -- cgit v1.2.3 From 53df25869a5659506cb35185997176eed49e125e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 09:54:04 +0200 Subject: btrfs: factor out a can_finish_ordered_extent helper Factor out a helper from btrfs_mark_ordered_io_finished that does the actual per-ordered_extent work to check if we want to schedule an I/O completion. This new helper will later be used complete an ordered_extent without first doing a lookup. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 100 ++++++++++++++++++++++++++---------------------- 1 file changed, 55 insertions(+), 45 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 3aee77f40f01..2e99fa5d73d9 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -303,6 +303,60 @@ static void finish_ordered_fn(struct btrfs_work *work) btrfs_finish_ordered_io(ordered_extent); } +static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered, + struct page *page, u64 file_offset, + u64 len, bool uptodate) +{ + struct btrfs_inode *inode = BTRFS_I(ordered->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + + lockdep_assert_held(&inode->ordered_tree.lock); + + if (page) { + ASSERT(page->mapping); + ASSERT(page_offset(page) <= file_offset); + ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE); + + /* + * Ordered (Private2) bit indicates whether we still have + * pending io unfinished for the ordered extent. + * + * If there's no such bit, we need to skip to next range. + */ + if (!btrfs_page_test_ordered(fs_info, page, file_offset, len)) + return false; + btrfs_page_clear_ordered(fs_info, page, file_offset, len); + } + + /* Now we're fine to update the accounting. */ + if (WARN_ON_ONCE(len > ordered->bytes_left)) { + btrfs_crit(fs_info, +"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu", + inode->root->root_key.objectid, btrfs_ino(inode), + ordered->file_offset, ordered->num_bytes, + len, ordered->bytes_left); + ordered->bytes_left = 0; + } else { + ordered->bytes_left -= len; + } + + if (!uptodate) + set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); + + if (ordered->bytes_left) + return false; + + /* + * All the IO of the ordered extent is finished, we need to queue + * the finish_func to be executed. + */ + set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags); + cond_wake_up(&ordered->wait); + refcount_inc(&ordered->refs); + trace_btrfs_ordered_extent_mark_finished(inode, ordered); + return true; +} + /* * Mark all ordered extents io inside the specified range finished. * @@ -333,10 +387,6 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, else wq = fs_info->endio_write_workers; - if (page) - ASSERT(page->mapping && page_offset(page) <= file_offset && - file_offset + num_bytes <= page_offset(page) + PAGE_SIZE); - spin_lock_irqsave(&tree->lock, flags); while (cur < file_offset + num_bytes) { u64 entry_end; @@ -389,47 +439,7 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, ASSERT(end + 1 - cur < U32_MAX); len = end + 1 - cur; - if (page) { - /* - * Ordered (Private2) bit indicates whether we still - * have pending io unfinished for the ordered extent. - * - * If there's no such bit, we need to skip to next range. - */ - if (!btrfs_page_test_ordered(fs_info, page, cur, len)) { - cur += len; - continue; - } - btrfs_page_clear_ordered(fs_info, page, cur, len); - } - - /* Now we're fine to update the accounting */ - if (unlikely(len > entry->bytes_left)) { - WARN_ON(1); - btrfs_crit(fs_info, -"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu", - inode->root->root_key.objectid, - btrfs_ino(inode), - entry->file_offset, - entry->num_bytes, - len, entry->bytes_left); - entry->bytes_left = 0; - } else { - entry->bytes_left -= len; - } - - if (!uptodate) - set_bit(BTRFS_ORDERED_IOERR, &entry->flags); - - /* - * All the IO of the ordered extent is finished, we need to queue - * the finish_func to be executed. - */ - if (entry->bytes_left == 0) { - set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); - cond_wake_up(&entry->wait); - refcount_inc(&entry->refs); - trace_btrfs_ordered_extent_mark_finished(inode, entry); + if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) { spin_unlock_irqrestore(&tree->lock, flags); btrfs_init_work(&entry->work, finish_ordered_fn, NULL, NULL); btrfs_queue_work(wq, &entry->work); -- cgit v1.2.3 From 2d6f107ea6875792607fbc40313a990fe3ea522b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 09:54:05 +0200 Subject: btrfs: factor out a btrfs_queue_ordered_fn helper Factor out a helper to queue up an ordered_extent completion in a work queue. This new helper will later be used complete an ordered_extent without first doing a lookup. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 2e99fa5d73d9..aa4c203a1695 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -357,6 +357,17 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered, return true; } +static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered) +{ + struct btrfs_inode *inode = BTRFS_I(ordered->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ? + fs_info->endio_freespace_worker : fs_info->endio_write_workers; + + btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); + btrfs_queue_work(wq, &ordered->work); +} + /* * Mark all ordered extents io inside the specified range finished. * @@ -375,18 +386,11 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, u64 num_bytes, bool uptodate) { struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct btrfs_workqueue *wq; struct rb_node *node; struct btrfs_ordered_extent *entry = NULL; unsigned long flags; u64 cur = file_offset; - if (btrfs_is_free_space_inode(inode)) - wq = fs_info->endio_freespace_worker; - else - wq = fs_info->endio_write_workers; - spin_lock_irqsave(&tree->lock, flags); while (cur < file_offset + num_bytes) { u64 entry_end; @@ -441,8 +445,7 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) { spin_unlock_irqrestore(&tree->lock, flags); - btrfs_init_work(&entry->work, finish_ordered_fn, NULL, NULL); - btrfs_queue_work(wq, &entry->work); + btrfs_queue_ordered_fn(entry); spin_lock_irqsave(&tree->lock, flags); } cur += len; -- cgit v1.2.3 From 122e9ede5355071359c10a8ebca138b162ef176b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 31 May 2023 09:54:06 +0200 Subject: btrfs: add a btrfs_finish_ordered_extent helper Add a helper to complete an ordered_extent without first doing a lookup. The tracepoint cannot use the ordered_extent class as we also want to print the range. Reviewed-by: Johannes Thumshirn Reviewed-by: Josef Bacik Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 19 +++++++++++++++++++ fs/btrfs/ordered-data.h | 3 +++ include/trace/events/btrfs.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) (limited to 'fs/btrfs/ordered-data.c') diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index aa4c203a1695..a629532283bc 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -368,6 +368,25 @@ static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered) btrfs_queue_work(wq, &ordered->work); } +bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, + struct page *page, u64 file_offset, u64 len, + bool uptodate) +{ + struct btrfs_inode *inode = BTRFS_I(ordered->inode); + unsigned long flags; + bool ret; + + trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate); + + spin_lock_irqsave(&inode->ordered_tree.lock, flags); + ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate); + spin_unlock_irqrestore(&inode->ordered_tree.lock, flags); + + if (ret) + btrfs_queue_ordered_fn(ordered); + return ret; +} + /* * Mark all ordered extents io inside the specified range finished. * diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 0bd0f0368132..173bd5c5df26 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -167,6 +167,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry); void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode, struct btrfs_ordered_extent *entry); +bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered, + struct page *page, u64 file_offset, u64 len, + bool uptodate); void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, struct page *page, u64 file_offset, u64 num_bytes, bool uptodate); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 8ea9cea9bfeb..c6eee9b414cf 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -661,6 +661,35 @@ DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_mark_finished, TP_ARGS(inode, ordered) ); +TRACE_EVENT(btrfs_finish_ordered_extent, + + TP_PROTO(const struct btrfs_inode *inode, u64 start, u64 len, + bool uptodate), + + TP_ARGS(inode, start, len, uptodate), + + TP_STRUCT__entry_btrfs( + __field( u64, ino ) + __field( u64, start ) + __field( u64, len ) + __field( bool, uptodate ) + __field( u64, root_objectid ) + ), + + TP_fast_assign_btrfs(inode->root->fs_info, + __entry->ino = btrfs_ino(inode); + __entry->start = start; + __entry->len = len; + __entry->uptodate = uptodate; + __entry->root_objectid = inode->root->root_key.objectid; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d", + show_root_type(__entry->root_objectid), + __entry->ino, __entry->start, + __entry->len, !!__entry->uptodate) +); + DECLARE_EVENT_CLASS(btrfs__writepage, TP_PROTO(const struct page *page, const struct inode *inode, -- cgit v1.2.3