diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 22:48:25 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 22:48:25 +0300 |
commit | 037c50bfbeb33b4c74e120eef5b8b99d8f025418 (patch) | |
tree | f9f31655f03f980a097ccc5594ddec428d65ed22 /fs/btrfs/disk-io.c | |
parent | 2cf3f8133bda2a0945cc4c70e681ecb25b52b913 (diff) | |
parent | d1ed82f3559e151804743df0594f45d7ff6e55fa (diff) | |
download | linux-037c50bfbeb33b4c74e120eef5b8b99d8f025418.tar.xz |
Merge tag 'for-5.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"The updates this time are more under the hood and enhancing existing
features (subpage with compression and zoned namespaces).
Performance related:
- misc small inode logging improvements (+3% throughput, -11% latency
on sample dbench workload)
- more efficient directory logging: bulk item insertion, less tree
searches and locking
- speed up bulk insertion of items into a b-tree, which is used when
logging directories, when running delayed items for directories
(fsync and transaction commits) and when running the slow path
(full sync) of an fsync (bulk creation run time -4%, deletion -12%)
Core:
- continued subpage support
- make defragmentation work
- make compression write work
- zoned mode
- support ZNS (zoned namespaces), zone capacity is number of
usable blocks in each zone
- add dedicated block group (zoned) for relocation, to prevent
out of order writes in some cases
- greedy block group reclaim, pick the ones with least usable
space first
- preparatory work for send protocol updates
- error handling improvements
- cleanups and refactoring
Fixes:
- lockdep warnings
- in show_devname callback, on seeding device
- device delete on loop device due to conversions to workqueues
- fix deadlock between chunk allocation and chunk btree modifications
- fix tracking of missing device count and status"
* tag 'for-5.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (140 commits)
btrfs: remove root argument from check_item_in_log()
btrfs: remove root argument from add_link()
btrfs: remove root argument from btrfs_unlink_inode()
btrfs: remove root argument from drop_one_dir_item()
btrfs: clear MISSING device status bit in btrfs_close_one_device
btrfs: call btrfs_check_rw_degradable only if there is a missing device
btrfs: send: prepare for v2 protocol
btrfs: fix comment about sector sizes supported in 64K systems
btrfs: update device path inode time instead of bd_inode
fs: export an inode_update_time helper
btrfs: fix deadlock when defragging transparent huge pages
btrfs: sysfs: convert scnprintf and snprintf to sysfs_emit
btrfs: make btrfs_super_block size match BTRFS_SUPER_INFO_SIZE
btrfs: update comments for chunk allocation -ENOSPC cases
btrfs: fix deadlock between chunk allocation and chunk btree modifications
btrfs: zoned: use greedy gc for auto reclaim
btrfs: check-integrity: stop storing the block device name in btrfsic_dev_state
btrfs: use btrfs_get_dev_args_from_path in dev removal ioctls
btrfs: add a btrfs_get_dev_args_from_path helper
btrfs: handle device lookup with btrfs_dev_lookup_args
...
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 51 |
1 files changed, 30 insertions, 21 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 29e7598584c4..59c3be8c1f4c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -683,7 +683,7 @@ err: return ret; } -int btrfs_validate_metadata_buffer(struct btrfs_io_bio *io_bio, +int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, struct page *page, u64 start, u64 end, int mirror) { @@ -1036,7 +1036,7 @@ static int btree_set_page_dirty(struct page *page) BUG_ON(!eb); BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); BUG_ON(!atomic_read(&eb->refs)); - btrfs_assert_tree_locked(eb); + btrfs_assert_tree_write_locked(eb); return __set_page_dirty_nobuffers(page); } ASSERT(PagePrivate(page) && page->private); @@ -1061,7 +1061,7 @@ static int btree_set_page_dirty(struct page *page) ASSERT(eb); ASSERT(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); ASSERT(atomic_read(&eb->refs)); - btrfs_assert_tree_locked(eb); + btrfs_assert_tree_write_locked(eb); free_extent_buffer(eb); cur_bit += (fs_info->nodesize >> fs_info->sectorsize_bits); @@ -1125,7 +1125,7 @@ void btrfs_clean_tree_block(struct extent_buffer *buf) struct btrfs_fs_info *fs_info = buf->fs_info; if (btrfs_header_generation(buf) == fs_info->running_transaction->transid) { - btrfs_assert_tree_locked(buf); + btrfs_assert_tree_write_locked(buf); if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { percpu_counter_add_batch(&fs_info->dirty_metadata_bytes, @@ -1500,7 +1500,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev) goto fail; if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID && - root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + !btrfs_is_data_reloc_root(root)) { set_bit(BTRFS_ROOT_SHAREABLE, &root->state); btrfs_check_and_init_root_item(&root->root_item); } @@ -1644,6 +1644,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info) btrfs_extent_buffer_leak_debug_check(fs_info); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); + kfree(fs_info->subpage_info); kvfree(fs_info); } @@ -1953,8 +1954,7 @@ sleep: wake_up_process(fs_info->cleaner_kthread); mutex_unlock(&fs_info->transaction_kthread_mutex); - if (unlikely(test_bit(BTRFS_FS_STATE_ERROR, - &fs_info->fs_state))) + if (BTRFS_FS_ERROR(fs_info)) btrfs_cleanup_transaction(fs_info); if (!kthread_should_stop() && (!btrfs_transaction_blocked(fs_info) || @@ -2592,8 +2592,7 @@ static int validate_super(struct btrfs_fs_info *fs_info, /* * For 4K page size, we only support 4K sector size. - * For 64K page size, we support read-write for 64K sector size, and - * read-only for 4K sector size. + * For 64K page size, we support 64K and 4K sector sizes. */ if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) || (PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K && @@ -2883,6 +2882,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) spin_lock_init(&fs_info->buffer_lock); spin_lock_init(&fs_info->unused_bgs_lock); spin_lock_init(&fs_info->treelog_bg_lock); + spin_lock_init(&fs_info->zone_active_bgs_lock); + spin_lock_init(&fs_info->relocation_bg_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); mutex_init(&fs_info->reclaim_bgs_lock); @@ -2896,6 +2897,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); INIT_LIST_HEAD(&fs_info->reclaim_bgs); + INIT_LIST_HEAD(&fs_info->zone_active_bgs); #ifdef CONFIG_BTRFS_DEBUG INIT_LIST_HEAD(&fs_info->allocated_roots); INIT_LIST_HEAD(&fs_info->allocated_ebs); @@ -3228,12 +3230,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); btrfs_init_btree_inode(fs_info); - invalidate_bdev(fs_devices->latest_bdev); + invalidate_bdev(fs_devices->latest_dev->bdev); /* * Read super block and check the signature bytes only */ - disk_super = btrfs_read_dev_super(fs_devices->latest_bdev); + disk_super = btrfs_read_dev_super(fs_devices->latest_dev->bdev); if (IS_ERR(disk_super)) { err = PTR_ERR(disk_super); goto fail_alloc; @@ -3392,12 +3394,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } - if (sectorsize != PAGE_SIZE) { + if (sectorsize < PAGE_SIZE) { + struct btrfs_subpage_info *subpage_info; + btrfs_warn(fs_info, "read-write for sector size %u with page size %lu is experimental", sectorsize, PAGE_SIZE); - } - if (sectorsize != PAGE_SIZE) { if (btrfs_super_incompat_flags(fs_info->super_copy) & BTRFS_FEATURE_INCOMPAT_RAID56) { btrfs_err(fs_info, @@ -3406,6 +3408,11 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device err = -EINVAL; goto fail_alloc; } + subpage_info = kzalloc(sizeof(*subpage_info), GFP_KERNEL); + if (!subpage_info) + goto fail_alloc; + btrfs_init_subpage_info(subpage_info, sectorsize); + fs_info->subpage_info = subpage_info; } ret = btrfs_init_workqueues(fs_info, fs_devices); @@ -3465,7 +3472,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device * below in btrfs_init_dev_replace(). */ btrfs_free_extra_devids(fs_devices); - if (!fs_devices->latest_bdev) { + if (!fs_devices->latest_dev->bdev) { btrfs_err(fs_info, "failed to read devices"); goto fail_tree_roots; } @@ -3556,7 +3563,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_sysfs; } - if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) { + if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices && + !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); goto fail_sysfs; @@ -3881,7 +3889,9 @@ static int write_dev_supers(struct btrfs_device *device, bio->bi_opf |= REQ_FUA; btrfsic_submit_bio(bio); - btrfs_advance_sb_log(device, i); + + if (btrfs_advance_sb_log(device, i)) + errors++; } return errors < i ? 0 : -1; } @@ -4221,7 +4231,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, drop_ref = true; spin_unlock(&fs_info->fs_roots_radix_lock); - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { + if (BTRFS_FS_ERROR(fs_info)) { ASSERT(root->log_root == NULL); if (root->reloc_root) { btrfs_put_root(root->reloc_root); @@ -4372,8 +4382,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) btrfs_err(fs_info, "commit super ret %d", ret); } - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state) || - test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) + if (BTRFS_FS_ERROR(fs_info)) btrfs_error_commit_super(fs_info); kthread_stop(fs_info->transaction_kthread); @@ -4470,7 +4479,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) if (unlikely(test_bit(EXTENT_BUFFER_UNMAPPED, &buf->bflags))) return; #endif - btrfs_assert_tree_locked(buf); + btrfs_assert_tree_write_locked(buf); if (transid != fs_info->generation) WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n", buf->start, transid, fs_info->generation); |