diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-12 22:28:34 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-03-12 22:28:34 +0300 |
commit | 43a7548e28a6df12a6170421d9d016c576010baa (patch) | |
tree | f956b1fc70acceca74f4c69502e592fcbad7ec77 /fs/btrfs/volumes.c | |
parent | 35d4aeea10558d12022d752b20be371aced557da (diff) | |
parent | 1cab1375ba6d5337a25acb346996106c12bb2dd0 (diff) | |
download | linux-43a7548e28a6df12a6170421d9d016c576010baa.tar.xz |
Merge tag 'for-6.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"Mostly stabilization, refactoring and cleanup changes. There rest are
minor performance optimizations due to caching or lock contention
reduction and a few notable fixes.
Performance improvements:
- minor speedup in logging when repeatedly allocated structure is
preallocated only once, improves latency and decreases lock
contention
- minor throughput increase (+6%), reduced lock contention after
clearing delayed allocation bits, applies to several common
workload types
- skip full quota rescan if a new relation is added in the same
transaction
Fixes:
- zstd fix for inline compressed file in subpage mode, updated
version from the 6.8 time
- proper qgroup inheritance ioctl parameter validation
- more fiemap followup fixes after reduced locking done in 6.8:
- fix race when detecting delalloc ranges
Core changes:
- more debugging code:
- added assertions for a very rare crash in raid56 calculation
- tree-checker dumps page state to give more insights into
possible reference counting issues
- add checksum calculation offloading sysfs knob, for now enabled
under DEBUG only to determine a good heuristic for deciding the
offload or synchronous, depends on various factors (block group
profile, device speed) and is not as clear as initially thought
(checksum type)
- error handling improvements, added assertions
- more page to folio conversion (defrag, truncate), cached size and
shift
- preparation for more fine grained locking of sectors in subpage
mode
- cleanups and refactoring:
- include cleanups, forward declarations
- pointer-to-structure helpers
- redundant argument removals
- removed unused code
- slab cache updates, last use of SLAB_MEM_SPREAD removed"
* tag 'for-6.9-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (114 commits)
btrfs: reuse cloned extent buffer during fiemap to avoid re-allocations
btrfs: fix race when detecting delalloc ranges during fiemap
btrfs: fix off-by-one chunk length calculation at contains_pending_extent()
btrfs: qgroup: allow quick inherit if snapshot is created and added to the same parent
btrfs: qgroup: validate btrfs_qgroup_inherit parameter
btrfs: include device major and minor numbers in the device scan notice
btrfs: mark btrfs_put_caching_control() static
btrfs: remove SLAB_MEM_SPREAD flag use
btrfs: qgroup: always free reserved space for extent records
btrfs: tree-checker: dump the page status if hit something wrong
btrfs: compression: remove dead comments in btrfs_compress_heuristic()
btrfs: subpage: make writer lock utilize bitmap
btrfs: subpage: make reader lock utilize bitmap
btrfs: unexport btrfs_subpage_start_writer() and btrfs_subpage_end_and_test_writer()
btrfs: pass a valid extent map cache pointer to __get_extent_map()
btrfs: merge btrfs_del_delalloc_inode() helpers
btrfs: pass btrfs_device to btrfs_scratch_superblocks()
btrfs: handle transaction commit errors in flush_reservations()
btrfs: use KMEM_CACHE() to create btrfs_free_space cache
btrfs: use KMEM_CACHE() to create delayed ref caches
...
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 98 |
1 files changed, 73 insertions, 25 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e180da4cc227..a2d07fa3cfdf 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -14,10 +14,8 @@ #include <linux/namei.h> #include "misc.h" #include "ctree.h" -#include "extent_map.h" #include "disk-io.h" #include "transaction.h" -#include "print-tree.h" #include "volumes.h" #include "raid56.h" #include "rcu-string.h" @@ -769,8 +767,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (same_fsid_diff_dev) { generate_random_uuid(fs_devices->fsid); fs_devices->temp_fsid = true; - pr_info("BTRFS: device %s using temp-fsid %pU\n", - path, fs_devices->fsid); + pr_info("BTRFS: device %s (%d:%d) using temp-fsid %pU\n", + path, MAJOR(path_devt), MINOR(path_devt), + fs_devices->fsid); } mutex_lock(&fs_devices->device_list_mutex); @@ -799,8 +798,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (fs_devices->opened) { btrfs_err(NULL, -"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)", - path, fs_devices->fsid, current->comm, +"device %s (%d:%d) belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)", + path, MAJOR(path_devt), MINOR(path_devt), + fs_devices->fsid, current->comm, task_pid_nr(current)); mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); @@ -826,13 +826,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (disk_super->label[0]) pr_info( - "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n", +"BTRFS: device label %s devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n", disk_super->label, devid, found_transid, path, + MAJOR(path_devt), MINOR(path_devt), current->comm, task_pid_nr(current)); else pr_info( - "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n", +"BTRFS: device fsid %pU devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n", disk_super->fsid, devid, found_transid, path, + MAJOR(path_devt), MINOR(path_devt), current->comm, task_pid_nr(current)); } else if (!device->name || strcmp(device->name->str, path)) { @@ -1368,7 +1370,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, else btrfs_free_stale_devices(devt, NULL); - pr_debug("BTRFS: skip registering single non-seed device %s\n", path); + pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n", + path, MAJOR(devt), MINOR(devt)); device = NULL; goto free_disk_super; } @@ -1403,7 +1406,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start, if (in_range(physical_start, *start, len) || in_range(*start, physical_start, - physical_end - physical_start)) { + physical_end + 1 - physical_start)) { *start = physical_end + 1; return true; } @@ -2032,11 +2035,10 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info, copy_num, ret); } -void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, - struct block_device *bdev, - const char *device_path) +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device) { int copy_num; + struct block_device *bdev = device->bdev; if (!bdev) return; @@ -2052,7 +2054,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ - update_dev_time(device_path); + update_dev_time(device->name->str); } int btrfs_rm_device(struct btrfs_fs_info *fs_info, @@ -2187,8 +2189,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, * just flush the device and let the caller do the final bdev_release. */ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { - btrfs_scratch_superblocks(fs_info, device->bdev, - device->name->str); + btrfs_scratch_superblocks(fs_info, device); if (device->bdev) { sync_blockdev(device->bdev); invalidate_bdev(device->bdev); @@ -2301,8 +2302,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) mutex_unlock(&fs_devices->device_list_mutex); - btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev, - tgtdev->name->str); + btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev); btrfs_close_bdev(tgtdev); synchronize_rcu(); @@ -3393,7 +3393,17 @@ again: mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } - BUG_ON(ret == 0); /* Corruption */ + if (ret == 0) { + /* + * On the first search we would find chunk tree with + * offset -1, which is not possible. On subsequent + * loops this would find an existing item on an invalid + * offset (one less than the previous one, wrong + * alignment and size). + */ + ret = -EUCLEAN; + goto error; + } ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); @@ -3480,6 +3490,44 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, return 0; } +static void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, + const struct btrfs_disk_balance_args *disk) +{ + memset(cpu, 0, sizeof(*cpu)); + + cpu->profiles = le64_to_cpu(disk->profiles); + cpu->usage = le64_to_cpu(disk->usage); + cpu->devid = le64_to_cpu(disk->devid); + cpu->pstart = le64_to_cpu(disk->pstart); + cpu->pend = le64_to_cpu(disk->pend); + cpu->vstart = le64_to_cpu(disk->vstart); + cpu->vend = le64_to_cpu(disk->vend); + cpu->target = le64_to_cpu(disk->target); + cpu->flags = le64_to_cpu(disk->flags); + cpu->limit = le64_to_cpu(disk->limit); + cpu->stripes_min = le32_to_cpu(disk->stripes_min); + cpu->stripes_max = le32_to_cpu(disk->stripes_max); +} + +static void btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, + const struct btrfs_balance_args *cpu) +{ + memset(disk, 0, sizeof(*disk)); + + disk->profiles = cpu_to_le64(cpu->profiles); + disk->usage = cpu_to_le64(cpu->usage); + disk->devid = cpu_to_le64(cpu->devid); + disk->pstart = cpu_to_le64(cpu->pstart); + disk->pend = cpu_to_le64(cpu->pend); + disk->vstart = cpu_to_le64(cpu->vstart); + disk->vend = cpu_to_le64(cpu->vend); + disk->target = cpu_to_le64(cpu->target); + disk->flags = cpu_to_le64(cpu->flags); + disk->limit = cpu_to_le64(cpu->limit); + disk->stripes_min = cpu_to_le32(cpu->stripes_min); + disk->stripes_max = cpu_to_le32(cpu->stripes_max); +} + static int insert_balance_item(struct btrfs_fs_info *fs_info, struct btrfs_balance_control *bctl) { @@ -3624,7 +3672,7 @@ static void reset_balance_state(struct btrfs_fs_info *fs_info) struct btrfs_balance_control *bctl = fs_info->balance_ctl; int ret; - BUG_ON(!fs_info->balance_ctl); + ASSERT(fs_info->balance_ctl); spin_lock(&fs_info->balance_lock); fs_info->balance_ctl = NULL; @@ -5944,6 +5992,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map, int first, int dev_replace_is_ongoing) { + const enum btrfs_read_policy policy = READ_ONCE(fs_info->fs_devices->read_policy); int i; int num_stripes; int preferred_mirror; @@ -5958,13 +6007,12 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, else num_stripes = map->num_stripes; - switch (fs_info->fs_devices->read_policy) { + switch (policy) { default: /* Shouldn't happen, just warn and use pid instead of failing */ - btrfs_warn_rl(fs_info, - "unknown read_policy type %u, reset to pid", - fs_info->fs_devices->read_policy); - fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID; + btrfs_warn_rl(fs_info, "unknown read_policy type %u, reset to pid", + policy); + WRITE_ONCE(fs_info->fs_devices->read_policy, BTRFS_READ_POLICY_PID); fallthrough; case BTRFS_READ_POLICY_PID: preferred_mirror = first + (current->pid % num_stripes); |