diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 23:54:27 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-02-20 23:54:27 +0300 |
commit | 885ce48739189fac6645ff42d736ee0de0b5917d (patch) | |
tree | d69300909ff5b4a0291a7311cd25cf90801a650a /fs/btrfs/disk-io.c | |
parent | 274978f173276c5720a3cd8d0b6047d2c0d3a684 (diff) | |
parent | 964a54e5e1a0d70cd80bd5a0885a1938463625b1 (diff) | |
download | linux-885ce48739189fac6645ff42d736ee0de0b5917d.tar.xz |
Merge tag 'for-6.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"The usual mix of performance improvements and new features.
The core change is reworking how checksums are processed, with
followup cleanups and simplifications. There are two minor changes in
block layer and iomap code.
Features:
- block group allocation class heuristics:
- pack files by size (up to 128k, up to 8M, more) to avoid
fragmentation in block groups, assuming that file size and life
time is correlated, in particular this may help during balance
- with tracepoints and extensible in the future
Performance:
- send: cache directory utimes and only emit the command when
necessary
- speedup up to 10x
- smaller final stream produced (no redundant utimes commands
issued)
- compatibility not affected
- fiemap: skip backref checks for shared leaves
- speedup 3x on sample filesystem with all leaves shared (e.g. on
snapshots)
- micro optimized b-tree key lookup, speedup in metadata operations
(sample benchmark: fs_mark +10% of files/sec)
Core changes:
- change where checksumming is done in the io path:
- checksum and read repair does verification at lower layer
- cascaded cleanups and simplifications
- raid56 refactoring and cleanups
Fixes:
- sysfs: make sure that a run-time change of a feature is correctly
tracked by the feature files
- scrub: better reporting of tree block errors
Other:
- locally enable -Wmaybe-uninitialized after fixing all warnings
- misc cleanups, spelling fixes
Other code:
- block: export bio_split_rw
- iomap: remove IOMAP_F_ZONE_APPEND"
* tag 'for-6.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (109 commits)
btrfs: make kobj_type structures constant
btrfs: remove the bdev argument to btrfs_rmap_block
btrfs: don't rely on unchanging ->bi_bdev for zone append remaps
btrfs: never return true for reads in btrfs_use_zone_append
btrfs: pass a btrfs_bio to btrfs_use_append
btrfs: set bbio->file_offset in alloc_new_bio
btrfs: use file_offset to limit bios size in calc_bio_boundaries
btrfs: do unsigned integer division in the extent buffer binary search loop
btrfs: eliminate extra call when doing binary search on extent buffer
btrfs: raid56: handle endio in scrub_rbio
btrfs: raid56: handle endio in recover_rbio
btrfs: raid56: handle endio in rmw_rbio
btrfs: raid56: submit the read bios from scrub_assemble_read_bios
btrfs: raid56: fold rmw_read_wait_recover into rmw_read_bios
btrfs: raid56: fold recover_assemble_read_bios into recover_rbio
btrfs: raid56: add a bio_list_put helper
btrfs: raid56: wait for I/O completion in submit_read_bios
btrfs: raid56: simplify code flow in rmw_rbio
btrfs: raid56: simplify error handling and code flow in raid56_parity_write
btrfs: replace btrfs_wait_tree_block_writeback by wait_on_extent_buffer_writeback
...
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 225 |
1 files changed, 23 insertions, 202 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3aa04224315e..b53f0e30ce2b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -79,23 +79,6 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) } /* - * async submit bios are used to offload expensive checksumming - * onto the worker threads. They checksum file and metadata bios - * just before they are sent down the IO stack. - */ -struct async_submit_bio { - struct btrfs_inode *inode; - struct bio *bio; - enum btrfs_wq_submit_cmd submit_cmd; - int mirror_num; - - /* Optional parameter for used by direct io */ - u64 dio_file_offset; - struct btrfs_work work; - blk_status_t status; -}; - -/* * Compute the csum of a btree block and store the result to provided buffer. */ static void csum_tree_block(struct extent_buffer *buf, u8 *result) @@ -455,6 +438,22 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec return csum_one_extent_buffer(eb); } +blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio) +{ + struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; + struct bvec_iter iter; + struct bio_vec bv; + int ret = 0; + + bio_for_each_segment(bv, &bbio->bio, iter) { + ret = csum_dirty_buffer(fs_info, &bv); + if (ret) + break; + } + + return errno_to_blk_status(ret); +} + static int check_tree_block_fsid(struct extent_buffer *eb) { struct btrfs_fs_info *fs_info = eb->fs_info; @@ -700,172 +699,6 @@ err: return ret; } -static void run_one_async_start(struct btrfs_work *work) -{ - struct async_submit_bio *async; - blk_status_t ret; - - async = container_of(work, struct async_submit_bio, work); - switch (async->submit_cmd) { - case WQ_SUBMIT_METADATA: - ret = btree_submit_bio_start(async->bio); - break; - case WQ_SUBMIT_DATA: - ret = btrfs_submit_bio_start(async->inode, async->bio); - break; - case WQ_SUBMIT_DATA_DIO: - ret = btrfs_submit_bio_start_direct_io(async->inode, - async->bio, async->dio_file_offset); - break; - } - if (ret) - async->status = ret; -} - -/* - * In order to insert checksums into the metadata in large chunks, we wait - * until bio submission time. All the pages in the bio are checksummed and - * sums are attached onto the ordered extent record. - * - * At IO completion time the csums attached on the ordered extent record are - * inserted into the tree. - */ -static void run_one_async_done(struct btrfs_work *work) -{ - struct async_submit_bio *async = - container_of(work, struct async_submit_bio, work); - struct btrfs_inode *inode = async->inode; - struct btrfs_bio *bbio = btrfs_bio(async->bio); - - /* If an error occurred we just want to clean up the bio and move on */ - if (async->status) { - btrfs_bio_end_io(bbio, async->status); - return; - } - - /* - * All of the bios that pass through here are from async helpers. - * Use REQ_CGROUP_PUNT to issue them from the owning cgroup's context. - * This changes nothing when cgroups aren't in use. - */ - async->bio->bi_opf |= REQ_CGROUP_PUNT; - btrfs_submit_bio(inode->root->fs_info, async->bio, async->mirror_num); -} - -static void run_one_async_free(struct btrfs_work *work) -{ - struct async_submit_bio *async; - - async = container_of(work, struct async_submit_bio, work); - kfree(async); -} - -/* - * Submit bio to an async queue. - * - * Retrun: - * - true if the work has been succesfuly submitted - * - false in case of error - */ -bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num, - u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct async_submit_bio *async; - - async = kmalloc(sizeof(*async), GFP_NOFS); - if (!async) - return false; - - async->inode = inode; - async->bio = bio; - async->mirror_num = mirror_num; - async->submit_cmd = cmd; - - btrfs_init_work(&async->work, run_one_async_start, run_one_async_done, - run_one_async_free); - - async->dio_file_offset = dio_file_offset; - - async->status = 0; - - if (op_is_sync(bio->bi_opf)) - btrfs_queue_work(fs_info->hipri_workers, &async->work); - else - btrfs_queue_work(fs_info->workers, &async->work); - return true; -} - -static blk_status_t btree_csum_one_bio(struct bio *bio) -{ - struct bio_vec *bvec; - struct btrfs_root *root; - int ret = 0; - struct bvec_iter_all iter_all; - - ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment_all(bvec, bio, iter_all) { - root = BTRFS_I(bvec->bv_page->mapping->host)->root; - ret = csum_dirty_buffer(root->fs_info, bvec); - if (ret) - break; - } - - return errno_to_blk_status(ret); -} - -blk_status_t btree_submit_bio_start(struct bio *bio) -{ - /* - * when we're called for a write, we're already in the async - * submission context. Just jump into btrfs_submit_bio. - */ - return btree_csum_one_bio(bio); -} - -static bool should_async_write(struct btrfs_fs_info *fs_info, - struct btrfs_inode *bi) -{ - if (btrfs_is_zoned(fs_info)) - return false; - if (atomic_read(&bi->sync_writers)) - return false; - if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags)) - return false; - return true; -} - -void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) -{ - struct btrfs_fs_info *fs_info = inode->root->fs_info; - struct btrfs_bio *bbio = btrfs_bio(bio); - blk_status_t ret; - - bio->bi_opf |= REQ_META; - bbio->is_metadata = 1; - - if (btrfs_op(bio) != BTRFS_MAP_WRITE) { - btrfs_submit_bio(fs_info, bio, mirror_num); - return; - } - - /* - * Kthread helpers are used to submit writes so that checksumming can - * happen in parallel across all CPUs. - */ - if (should_async_write(fs_info, inode) && - btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_METADATA)) - return; - - ret = btree_csum_one_bio(bio); - if (ret) { - btrfs_bio_end_io(bbio, ret); - return; - } - - btrfs_submit_bio(fs_info, bio, mirror_num); -} - #ifdef CONFIG_MIGRATION static int btree_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode) @@ -1035,22 +868,6 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, } -void btrfs_clean_tree_block(struct extent_buffer *buf) -{ - struct btrfs_fs_info *fs_info = buf->fs_info; - if (btrfs_header_generation(buf) == - fs_info->running_transaction->transid) { - btrfs_assert_tree_write_locked(buf); - - if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { - percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, - -buf->len, - fs_info->dirty_metadata_batch); - clear_extent_buffer_dirty(buf); - } - } -} - static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { @@ -1910,6 +1727,9 @@ static int cleaner_kthread(void *arg) goto sleep; } + if (test_and_clear_bit(BTRFS_FS_FEATURE_CHANGED, &fs_info->flags)) + btrfs_sysfs_feature_update(fs_info); + btrfs_run_delayed_iputs(fs_info); again = btrfs_clean_one_deleted_snapshot(fs_info); @@ -5159,11 +4979,12 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info, start += fs_info->nodesize; if (!eb) continue; + + btrfs_tree_lock(eb); wait_on_extent_buffer_writeback(eb); + btrfs_clear_buffer_dirty(NULL, eb); + btrfs_tree_unlock(eb); - if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, - &eb->bflags)) - clear_extent_buffer_dirty(eb); free_extent_buffer_stale(eb); } } |