diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 1800 |
1 files changed, 945 insertions, 855 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b9a0b26d08e1..58e0cac5779d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -14,10 +14,8 @@ #include <linux/namei.h> #include "misc.h" #include "ctree.h" -#include "extent_map.h" #include "disk-io.h" #include "transaction.h" -#include "print-tree.h" #include "volumes.h" #include "raid56.h" #include "rcu-string.h" @@ -35,11 +33,23 @@ #include "relocation.h" #include "scrub.h" #include "super.h" +#include "raid-stripe-tree.h" #define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \ BTRFS_BLOCK_GROUP_RAID10 | \ BTRFS_BLOCK_GROUP_RAID56_MASK) +struct btrfs_io_geometry { + u32 stripe_index; + u32 stripe_nr; + int mirror_num; + int num_stripes; + u64 stripe_offset; + u64 raid56_full_stripe_start; + int max_errors; + enum btrfs_map_op op; +}; + const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { .sub_stripes = 2, @@ -357,21 +367,19 @@ struct list_head * __attribute_const__ btrfs_get_fs_uuids(void) } /* - * alloc_fs_devices - allocate struct btrfs_fs_devices - * @fsid: if not NULL, copy the UUID to fs_devices::fsid - * @metadata_fsid: if not NULL, copy the UUID to fs_devices::metadata_fsid + * Allocate new btrfs_fs_devices structure identified by a fsid. + * + * @fsid: if not NULL, copy the UUID to fs_devices::fsid and to + * fs_devices::metadata_fsid * * Return a pointer to a new struct btrfs_fs_devices on success, or ERR_PTR(). * The returned struct is not linked onto any lists and can be destroyed with * kfree() right away. */ -static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid, - const u8 *metadata_fsid) +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) { struct btrfs_fs_devices *fs_devs; - ASSERT(fsid || !metadata_fsid); - fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL); if (!fs_devs) return ERR_PTR(-ENOMEM); @@ -385,8 +393,7 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid, if (fsid) { memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); - memcpy(fs_devs->metadata_uuid, - metadata_fsid ?: fsid, BTRFS_FSID_SIZE); + memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE); } return fs_devs; @@ -457,91 +464,46 @@ static noinline struct btrfs_fs_devices *find_fsid( return NULL; } -/* - * First check if the metadata_uuid is different from the fsid in the given - * fs_devices. Then check if the given fsid is the same as the metadata_uuid - * in the fs_devices. If it is, return true; otherwise, return false. - */ -static inline bool check_fsid_changed(const struct btrfs_fs_devices *fs_devices, - const u8 *fsid) -{ - return memcmp(fs_devices->fsid, fs_devices->metadata_uuid, - BTRFS_FSID_SIZE) != 0 && - memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE) == 0; -} - -static struct btrfs_fs_devices *find_fsid_with_metadata_uuid( - struct btrfs_super_block *disk_super) -{ - - struct btrfs_fs_devices *fs_devices; - - /* - * Handle scanned device having completed its fsid change but - * belonging to a fs_devices that was created by first scanning - * a device which didn't have its fsid/metadata_uuid changed - * at all and the CHANGING_FSID_V2 flag set. - */ - list_for_each_entry(fs_devices, &fs_uuids, fs_list) { - if (!fs_devices->fsid_change) - continue; - - if (match_fsid_fs_devices(fs_devices, disk_super->metadata_uuid, - fs_devices->fsid)) - return fs_devices; - } - - /* - * Handle scanned device having completed its fsid change but - * belonging to a fs_devices that was created by a device that - * has an outdated pair of fsid/metadata_uuid and - * CHANGING_FSID_V2 flag set. - */ - list_for_each_entry(fs_devices, &fs_uuids, fs_list) { - if (!fs_devices->fsid_change) - continue; - - if (check_fsid_changed(fs_devices, disk_super->metadata_uuid)) - return fs_devices; - } - - return find_fsid(disk_super->fsid, disk_super->metadata_uuid); -} - - static int btrfs_get_bdev_and_sb(const char *device_path, blk_mode_t flags, void *holder, - int flush, struct block_device **bdev, + int flush, struct file **bdev_file, struct btrfs_super_block **disk_super) { + struct block_device *bdev; int ret; - *bdev = blkdev_get_by_path(device_path, flags, holder, NULL); + *bdev_file = bdev_file_open_by_path(device_path, flags, holder, NULL); - if (IS_ERR(*bdev)) { - ret = PTR_ERR(*bdev); + if (IS_ERR(*bdev_file)) { + ret = PTR_ERR(*bdev_file); + btrfs_err(NULL, "failed to open device for path %s with flags 0x%x: %d", + device_path, flags, ret); goto error; } + bdev = file_bdev(*bdev_file); if (flush) - sync_blockdev(*bdev); - ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE); - if (ret) { - blkdev_put(*bdev, holder); - goto error; + sync_blockdev(bdev); + if (holder) { + ret = set_blocksize(*bdev_file, BTRFS_BDEV_BLOCKSIZE); + if (ret) { + fput(*bdev_file); + goto error; + } } - invalidate_bdev(*bdev); - *disk_super = btrfs_read_dev_super(*bdev); + invalidate_bdev(bdev); + *disk_super = btrfs_read_dev_super(bdev); if (IS_ERR(*disk_super)) { ret = PTR_ERR(*disk_super); - blkdev_put(*bdev, holder); + fput(*bdev_file); goto error; } return 0; error: - *bdev = NULL; + *disk_super = NULL; + *bdev_file = NULL; return ret; } @@ -562,13 +524,13 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device { struct btrfs_fs_devices *fs_devices, *tmp_fs_devices; struct btrfs_device *device, *tmp_device; - int ret = 0; + int ret; + bool freed = false; lockdep_assert_held(&uuid_mutex); - if (devt) - ret = -ENOENT; - + /* Return good status if there is no instance of devt. */ + ret = 0; list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) { mutex_lock(&fs_devices->device_list_mutex); @@ -579,8 +541,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device if (devt && devt != device->devt) continue; if (fs_devices->opened) { - /* for an already deleted device return 0 */ - if (devt && ret != 0) + if (devt) ret = -EBUSY; break; } @@ -590,7 +551,7 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device list_del(&device->dev_list); btrfs_free_device(device); - ret = 0; + freed = true; } mutex_unlock(&fs_devices->device_list_mutex); @@ -601,9 +562,81 @@ static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device } } + /* If there is at least one freed device return 0. */ + if (freed) + return 0; + return ret; } +static struct btrfs_fs_devices *find_fsid_by_device( + struct btrfs_super_block *disk_super, + dev_t devt, bool *same_fsid_diff_dev) +{ + struct btrfs_fs_devices *fsid_fs_devices; + struct btrfs_fs_devices *devt_fs_devices; + const bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) & + BTRFS_FEATURE_INCOMPAT_METADATA_UUID); + bool found_by_devt = false; + + /* Find the fs_device by the usual method, if found use it. */ + fsid_fs_devices = find_fsid(disk_super->fsid, + has_metadata_uuid ? disk_super->metadata_uuid : NULL); + + /* The temp_fsid feature is supported only with single device filesystem. */ + if (btrfs_super_num_devices(disk_super) != 1) + return fsid_fs_devices; + + /* + * A seed device is an integral component of the sprout device, which + * functions as a multi-device filesystem. So, temp-fsid feature is + * not supported. + */ + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) + return fsid_fs_devices; + + /* Try to find a fs_devices by matching devt. */ + list_for_each_entry(devt_fs_devices, &fs_uuids, fs_list) { + struct btrfs_device *device; + + list_for_each_entry(device, &devt_fs_devices->devices, dev_list) { + if (device->devt == devt) { + found_by_devt = true; + break; + } + } + if (found_by_devt) + break; + } + + if (found_by_devt) { + /* Existing device. */ + if (fsid_fs_devices == NULL) { + if (devt_fs_devices->opened == 0) { + /* Stale device. */ + return NULL; + } else { + /* temp_fsid is mounting a subvol. */ + return devt_fs_devices; + } + } else { + /* Regular or temp_fsid device mounting a subvol. */ + return devt_fs_devices; + } + } else { + /* New device. */ + if (fsid_fs_devices == NULL) { + return NULL; + } else { + /* sb::fsid is already used create a new temp_fsid. */ + *same_fsid_diff_dev = true; + return NULL; + } + } + + /* Not reached. */ +} + /* * This is only used on mount, and we are protected from competing things * messing with our fs_devices by the uuid_mutex, thus we do not need the @@ -613,7 +646,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device, blk_mode_t flags, void *holder) { - struct block_device *bdev; + struct file *bdev_file; struct btrfs_super_block *disk_super; u64 devid; int ret; @@ -624,7 +657,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, return -EINVAL; ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, - &bdev, &disk_super); + &bdev_file, &disk_super); if (ret) return ret; @@ -648,21 +681,31 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); fs_devices->seeding = true; } else { - if (bdev_read_only(bdev)) + if (bdev_read_only(file_bdev(bdev_file))) clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); else set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); } - if (!bdev_nonrot(bdev)) + if (!bdev_nonrot(file_bdev(bdev_file))) fs_devices->rotating = true; - if (bdev_max_discard_sectors(bdev)) + if (bdev_max_discard_sectors(file_bdev(bdev_file))) fs_devices->discardable = true; - device->bdev = bdev; + device->bdev_file = bdev_file; + device->bdev = file_bdev(bdev_file); clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); - device->holder = holder; + + if (device->devt != device->bdev->bd_dev) { + btrfs_warn(NULL, + "device %s maj:min changed from %d:%d to %d:%d", + device->name->str, MAJOR(device->devt), + MINOR(device->devt), MAJOR(device->bdev->bd_dev), + MINOR(device->bdev->bd_dev)); + + device->devt = device->bdev->bd_dev; + } fs_devices->open_devices++; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && @@ -676,12 +719,12 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices, error_free_page: btrfs_release_disk_super(disk_super); - blkdev_put(bdev, holder); + fput(bdev_file); return -EINVAL; } -u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb) +const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb) { bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID); @@ -726,84 +769,6 @@ out: } /* - * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices - * being created with a disk that has already completed its fsid change. Such - * disk can belong to an fs which has its FSID changed or to one which doesn't. - * Handle both cases here. - */ -static struct btrfs_fs_devices *find_fsid_inprogress( - struct btrfs_super_block *disk_super) -{ - struct btrfs_fs_devices *fs_devices; - - list_for_each_entry(fs_devices, &fs_uuids, fs_list) { - if (fs_devices->fsid_change) - continue; - - if (check_fsid_changed(fs_devices, disk_super->fsid)) - return fs_devices; - } - - return find_fsid(disk_super->fsid, NULL); -} - -static struct btrfs_fs_devices *find_fsid_changed( - struct btrfs_super_block *disk_super) -{ - struct btrfs_fs_devices *fs_devices; - - /* - * Handles the case where scanned device is part of an fs that had - * multiple successful changes of FSID but currently device didn't - * observe it. Meaning our fsid will be different than theirs. We need - * to handle two subcases : - * 1 - The fs still continues to have different METADATA/FSID uuids. - * 2 - The fs is switched back to its original FSID (METADATA/FSID - * are equal). - */ - list_for_each_entry(fs_devices, &fs_uuids, fs_list) { - /* Changed UUIDs */ - if (check_fsid_changed(fs_devices, disk_super->metadata_uuid) && - memcmp(fs_devices->fsid, disk_super->fsid, - BTRFS_FSID_SIZE) != 0) - return fs_devices; - - /* Unchanged UUIDs */ - if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, - BTRFS_FSID_SIZE) == 0 && - memcmp(fs_devices->fsid, disk_super->metadata_uuid, - BTRFS_FSID_SIZE) == 0) - return fs_devices; - } - - return NULL; -} - -static struct btrfs_fs_devices *find_fsid_reverted_metadata( - struct btrfs_super_block *disk_super) -{ - struct btrfs_fs_devices *fs_devices; - - /* - * Handle the case where the scanned device is part of an fs whose last - * metadata UUID change reverted it to the original FSID. At the same - * time fs_devices was first created by another constituent device - * which didn't fully observe the operation. This results in an - * btrfs_fs_devices created with metadata/fsid different AND - * btrfs_fs_devices::fsid_change set AND the metadata_uuid of the - * fs_devices equal to the FSID of the disk. - */ - list_for_each_entry(fs_devices, &fs_uuids, fs_list) { - if (!fs_devices->fsid_change) - continue; - - if (check_fsid_changed(fs_devices, disk_super->fsid)) - return fs_devices; - } - - return NULL; -} -/* * Add new device to list of registered devices * * Returns: @@ -821,10 +786,16 @@ static noinline struct btrfs_device *device_list_add(const char *path, u64 devid = btrfs_stack_device_id(&disk_super->dev_item); dev_t path_devt; int error; + bool same_fsid_diff_dev = false; bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID); - bool fsid_change_in_progress = (btrfs_super_flags(disk_super) & - BTRFS_SUPER_FLAG_CHANGING_FSID_V2); + + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID_V2) { + btrfs_err(NULL, +"device %s has incomplete metadata_uuid change, please use btrfstune to complete", + path); + return ERR_PTR(-EAGAIN); + } error = lookup_bdev(path, &path_devt); if (error) { @@ -833,27 +804,24 @@ static noinline struct btrfs_device *device_list_add(const char *path, return ERR_PTR(error); } - if (fsid_change_in_progress) { - if (!has_metadata_uuid) - fs_devices = find_fsid_inprogress(disk_super); - else - fs_devices = find_fsid_changed(disk_super); - } else if (has_metadata_uuid) { - fs_devices = find_fsid_with_metadata_uuid(disk_super); - } else { - fs_devices = find_fsid_reverted_metadata(disk_super); - if (!fs_devices) - fs_devices = find_fsid(disk_super->fsid, NULL); - } - + fs_devices = find_fsid_by_device(disk_super, path_devt, &same_fsid_diff_dev); if (!fs_devices) { - fs_devices = alloc_fs_devices(disk_super->fsid, - has_metadata_uuid ? disk_super->metadata_uuid : NULL); + fs_devices = alloc_fs_devices(disk_super->fsid); if (IS_ERR(fs_devices)) return ERR_CAST(fs_devices); - fs_devices->fsid_change = fsid_change_in_progress; + if (has_metadata_uuid) + memcpy(fs_devices->metadata_uuid, + disk_super->metadata_uuid, BTRFS_FSID_SIZE); + + if (same_fsid_diff_dev) { + generate_random_uuid(fs_devices->fsid); + fs_devices->temp_fsid = true; + pr_info("BTRFS: device %s (%d:%d) using temp-fsid %pU\n", + path, MAJOR(path_devt), MINOR(path_devt), + fs_devices->fsid); + } mutex_lock(&fs_devices->device_list_mutex); list_add(&fs_devices->fs_list, &fs_uuids); @@ -868,18 +836,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, mutex_lock(&fs_devices->device_list_mutex); device = btrfs_find_device(fs_devices, &args); - /* - * If this disk has been pulled into an fs devices created by - * a device which had the CHANGING_FSID_V2 flag then replace the - * metadata_uuid/fsid values of the fs_devices. - */ - if (fs_devices->fsid_change && - found_transid > fs_devices->latest_generation) { + if (found_transid > fs_devices->latest_generation) { memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); memcpy(fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(disk_super), BTRFS_FSID_SIZE); - fs_devices->fsid_change = false; } } @@ -888,8 +849,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (fs_devices->opened) { btrfs_err(NULL, -"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)", - path, fs_devices->fsid, current->comm, +"device %s (%d:%d) belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)", + path, MAJOR(path_devt), MINOR(path_devt), + fs_devices->fsid, current->comm, task_pid_nr(current)); mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); @@ -915,13 +877,15 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (disk_super->label[0]) pr_info( - "BTRFS: device label %s devid %llu transid %llu %s scanned by %s (%d)\n", +"BTRFS: device label %s devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n", disk_super->label, devid, found_transid, path, + MAJOR(path_devt), MINOR(path_devt), current->comm, task_pid_nr(current)); else pr_info( - "BTRFS: device fsid %pU devid %llu transid %llu %s scanned by %s (%d)\n", +"BTRFS: device fsid %pU devid %llu transid %llu %s (%d:%d) scanned by %s (%d)\n", disk_super->fsid, devid, found_transid, path, + MAJOR(path_devt), MINOR(path_devt), current->comm, task_pid_nr(current)); } else if (!device->name || !is_same_device(device, path)) { @@ -1033,7 +997,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) lockdep_assert_held(&uuid_mutex); - fs_devices = alloc_fs_devices(orig->fsid, NULL); + fs_devices = alloc_fs_devices(orig->fsid); if (IS_ERR(fs_devices)) return fs_devices; @@ -1104,9 +1068,10 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, if (device->devid == BTRFS_DEV_REPLACE_DEVID) continue; - if (device->bdev) { - blkdev_put(device->bdev, device->holder); + if (device->bdev_file) { + fput(device->bdev_file); device->bdev = NULL; + device->bdev_file = NULL; fs_devices->open_devices--; } if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { @@ -1151,7 +1116,7 @@ static void btrfs_close_bdev(struct btrfs_device *device) invalidate_bdev(device->bdev); } - blkdev_put(device->bdev, device->holder); + fput(device->bdev_file); } static void btrfs_close_one_device(struct btrfs_device *device) @@ -1176,6 +1141,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) if (device->bdev) { fs_devices->open_devices--; device->bdev = NULL; + device->bdev_file = NULL; } clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); btrfs_destroy_dev_zone_info(device); @@ -1363,7 +1329,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev return ERR_PTR(-EINVAL); /* pull in the page with our super */ - page = read_cache_page_gfp(bdev->bd_inode->i_mapping, index, GFP_KERNEL); + page = read_cache_page_gfp(bdev->bd_mapping, index, GFP_KERNEL); if (IS_ERR(page)) return ERR_CAST(page); @@ -1396,30 +1362,70 @@ int btrfs_forget_devices(dev_t devt) return ret; } +static bool btrfs_skip_registration(struct btrfs_super_block *disk_super, + const char *path, dev_t devt, + bool mount_arg_dev) +{ + struct btrfs_fs_devices *fs_devices; + + /* + * Do not skip device registration for mounted devices with matching + * maj:min but different paths. Booting without initrd relies on + * /dev/root initially, later replaced with the actual root device. + * A successful scan ensures grub2-probe selects the correct device. + */ + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + struct btrfs_device *device; + + mutex_lock(&fs_devices->device_list_mutex); + + if (!fs_devices->opened) { + mutex_unlock(&fs_devices->device_list_mutex); + continue; + } + + list_for_each_entry(device, &fs_devices->devices, dev_list) { + if (device->bdev && (device->bdev->bd_dev == devt) && + strcmp(device->name->str, path) != 0) { + mutex_unlock(&fs_devices->device_list_mutex); + + /* Do not skip registration. */ + return false; + } + } + mutex_unlock(&fs_devices->device_list_mutex); + } + + if (!mount_arg_dev && btrfs_super_num_devices(disk_super) == 1 && + !(btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING)) + return true; + + return false; +} + /* * Look for a btrfs signature on a device. This may be called out of the mount path * and we are not allowed to call set_blocksize during the scan. The superblock - * is read via pagecache + * is read via pagecache. + * + * With @mount_arg_dev it's a scan during mount time that will always register + * the device or return an error. Multi-device and seeding devices are registered + * in both cases. */ -struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags) +struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags, + bool mount_arg_dev) { struct btrfs_super_block *disk_super; bool new_device_added = false; struct btrfs_device *device = NULL; - struct block_device *bdev; - u64 bytenr, bytenr_orig; + struct file *bdev_file; + u64 bytenr; + dev_t devt; int ret; lockdep_assert_held(&uuid_mutex); /* - * we would like to check all the supers, but that would make - * a btrfs mount succeed after a mkfs from a different FS. - * So, we need to add a special mount option to scan for - * later supers, using BTRFS_SUPER_MIRROR_MAX instead - */ - - /* * Avoid an exclusive open here, as the systemd-udev may initiate the * device scan which may race with the user's mount or mkfs command, * resulting in failure. @@ -1429,31 +1435,49 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags) * values temporarily, as the device paths of the fsid are the only * required information for assembling the volume. */ - bdev = blkdev_get_by_path(path, flags, NULL, NULL); - if (IS_ERR(bdev)) - return ERR_CAST(bdev); + bdev_file = bdev_file_open_by_path(path, flags, NULL, NULL); + if (IS_ERR(bdev_file)) + return ERR_CAST(bdev_file); - bytenr_orig = btrfs_sb_offset(0); - ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr); + /* + * We would like to check all the super blocks, but doing so would + * allow a mount to succeed after a mkfs from a different filesystem. + * Currently, recovery from a bad primary btrfs superblock is done + * using the userspace command 'btrfs check --super'. + */ + ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr); if (ret) { device = ERR_PTR(ret); goto error_bdev_put; } - disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig); + disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr, + btrfs_sb_offset(0)); if (IS_ERR(disk_super)) { device = ERR_CAST(disk_super); goto error_bdev_put; } + devt = file_bdev(bdev_file)->bd_dev; + if (btrfs_skip_registration(disk_super, path, devt, mount_arg_dev)) { + pr_debug("BTRFS: skip registering single non-seed device %s (%d:%d)\n", + path, MAJOR(devt), MINOR(devt)); + + btrfs_free_stale_devices(devt, NULL); + + device = NULL; + goto free_disk_super; + } + device = device_list_add(path, disk_super, &new_device_added); if (!IS_ERR(device) && new_device_added) btrfs_free_stale_devices(device->devt, device); +free_disk_super: btrfs_release_disk_super(disk_super); error_bdev_put: - blkdev_put(bdev, NULL); + fput(bdev_file); return device; } @@ -1825,19 +1849,18 @@ out: static u64 find_next_chunk(struct btrfs_fs_info *fs_info) { - struct extent_map_tree *em_tree; - struct extent_map *em; struct rb_node *n; u64 ret = 0; - em_tree = &fs_info->mapping_tree; - read_lock(&em_tree->lock); - n = rb_last(&em_tree->map.rb_root); + read_lock(&fs_info->mapping_tree_lock); + n = rb_last(&fs_info->mapping_tree.rb_root); if (n) { - em = rb_entry(n, struct extent_map, rb_node); - ret = em->start + em->len; + struct btrfs_chunk_map *map; + + map = rb_entry(n, struct btrfs_chunk_map, rb_node); + ret = map->start + map->chunk_len; } - read_unlock(&em_tree->lock); + read_unlock(&fs_info->mapping_tree_lock); return ret; } @@ -2105,11 +2128,10 @@ static void btrfs_scratch_superblock(struct btrfs_fs_info *fs_info, copy_num, ret); } -void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, - struct block_device *bdev, - const char *device_path) +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device) { int copy_num; + struct block_device *bdev = device->bdev; if (!bdev) return; @@ -2125,12 +2147,12 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ - update_dev_time(device_path); + update_dev_time(device->name->str); } int btrfs_rm_device(struct btrfs_fs_info *fs_info, struct btrfs_dev_lookup_args *args, - struct block_device **bdev, void **holder) + struct file **bdev_file) { struct btrfs_trans_handle *trans; struct btrfs_device *device; @@ -2239,7 +2261,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, btrfs_assign_next_active_device(device, NULL); - if (device->bdev) { + if (device->bdev_file) { cur_devices->open_devices--; /* remove sysfs entry */ btrfs_sysfs_remove_device(device); @@ -2255,21 +2277,19 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, * free the device. * * We cannot call btrfs_close_bdev() here because we're holding the sb - * write lock, and blkdev_put() will pull in the ->open_mutex on the - * block device and it's dependencies. Instead just flush the device - * and let the caller do the final blkdev_put. + * write lock, and fput() on the block device will pull in the + * ->open_mutex on the block device and it's dependencies. Instead + * just flush the device and let the caller do the final bdev_release. */ if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { - btrfs_scratch_superblocks(fs_info, device->bdev, - device->name->str); + btrfs_scratch_superblocks(fs_info, device); if (device->bdev) { sync_blockdev(device->bdev); invalidate_bdev(device->bdev); } } - *bdev = device->bdev; - *holder = device->holder; + *bdev_file = device->bdev_file; synchronize_rcu(); btrfs_free_device(device); @@ -2375,8 +2395,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) mutex_unlock(&fs_devices->device_list_mutex); - btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev->bdev, - tgtdev->name->str); + btrfs_scratch_superblocks(tgtdev->fs_info, tgtdev); btrfs_close_bdev(tgtdev); synchronize_rcu(); @@ -2406,7 +2425,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, const char *path) { struct btrfs_super_block *disk_super; - struct block_device *bdev; + struct file *bdev_file; int ret; if (!path || !path[0]) @@ -2424,7 +2443,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, } ret = btrfs_get_bdev_and_sb(path, BLK_OPEN_READ, NULL, 0, - &bdev, &disk_super); + &bdev_file, &disk_super); if (ret) { btrfs_put_dev_args_from_path(args); return ret; @@ -2437,7 +2456,7 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, else memcpy(args->fsid, disk_super->fsid, BTRFS_FSID_SIZE); btrfs_release_disk_super(disk_super); - blkdev_put(bdev, NULL); + fput(bdev_file); return 0; } @@ -2494,7 +2513,7 @@ static struct btrfs_fs_devices *btrfs_init_sprout(struct btrfs_fs_info *fs_info) * Private copy of the seed devices, anchored at * fs_info->fs_devices->seed_list */ - seed_devices = alloc_fs_devices(NULL, NULL); + seed_devices = alloc_fs_devices(NULL); if (IS_ERR(seed_devices)) return seed_devices; @@ -2657,7 +2676,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path struct btrfs_root *root = fs_info->dev_root; struct btrfs_trans_handle *trans; struct btrfs_device *device; - struct block_device *bdev; + struct file *bdev_file; struct super_block *sb = fs_info->sb; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_fs_devices *seed_devices = NULL; @@ -2670,12 +2689,12 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path if (sb_rdonly(sb) && !fs_devices->seeding) return -EROFS; - bdev = blkdev_get_by_path(device_path, BLK_OPEN_WRITE, - fs_info->bdev_holder, NULL); - if (IS_ERR(bdev)) - return PTR_ERR(bdev); + bdev_file = bdev_file_open_by_path(device_path, BLK_OPEN_WRITE, + fs_info->bdev_holder, NULL); + if (IS_ERR(bdev_file)) + return PTR_ERR(bdev_file); - if (!btrfs_check_device_zone_type(fs_info, bdev)) { + if (!btrfs_check_device_zone_type(fs_info, file_bdev(bdev_file))) { ret = -EINVAL; goto error; } @@ -2687,11 +2706,11 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path locked = true; } - sync_blockdev(bdev); + sync_blockdev(file_bdev(bdev_file)); rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { - if (device->bdev == bdev) { + if (device->bdev == file_bdev(bdev_file)) { ret = -EEXIST; rcu_read_unlock(); goto error; @@ -2707,7 +2726,8 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path } device->fs_info = fs_info; - device->bdev = bdev; + device->bdev_file = bdev_file; + device->bdev = file_bdev(bdev_file); ret = lookup_bdev(device_path, &device->devt); if (ret) goto error_free_device; @@ -2728,14 +2748,13 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path device->io_align = fs_info->sectorsize; device->sector_size = fs_info->sectorsize; device->total_bytes = - round_down(bdev_nr_bytes(bdev), fs_info->sectorsize); + round_down(bdev_nr_bytes(device->bdev), fs_info->sectorsize); device->disk_total_bytes = device->total_bytes; device->commit_total_bytes = device->total_bytes; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - device->holder = fs_info->bdev_holder; device->dev_stats_valid = 1; - set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); + set_blocksize(device->bdev_file, BTRFS_BDEV_BLOCKSIZE); if (seeding_dev) { /* GFP_KERNEL allocation must not be under device_list_mutex */ @@ -2767,7 +2786,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path atomic64_add(device->total_bytes, &fs_info->free_chunk_space); - if (!bdev_nonrot(bdev)) + if (!bdev_nonrot(device->bdev)) fs_devices->rotating = true; orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy); @@ -2887,7 +2906,7 @@ error_free_zone: error_free_device: btrfs_free_device(device); error: - blkdev_put(bdev, fs_info->bdev_holder); + fput(bdev_file); if (locked) { mutex_unlock(&uuid_mutex); up_write(&sb->s_umount); @@ -2968,6 +2987,7 @@ int btrfs_grow_device(struct btrfs_trans_handle *trans, btrfs_set_super_total_bytes(super_copy, round_down(old_total + diff, fs_info->sectorsize)); device->fs_devices->total_rw_bytes += diff; + atomic64_add(diff, &fs_info->free_chunk_space); btrfs_device_set_total_bytes(device, new_size); btrfs_device_set_disk_total_bytes(device, new_size); @@ -3004,16 +3024,19 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) if (ret < 0) goto out; else if (ret > 0) { /* Logic error or corruption */ - btrfs_handle_fs_error(fs_info, -ENOENT, - "Failed lookup while freeing chunk."); - ret = -ENOENT; + btrfs_err(fs_info, "failed to lookup chunk %llu when freeing", + chunk_offset); + btrfs_abort_transaction(trans, -ENOENT); + ret = -EUCLEAN; goto out; } ret = btrfs_del_item(trans, root, path); - if (ret < 0) - btrfs_handle_fs_error(fs_info, ret, - "Failed to delete chunk item."); + if (ret < 0) { + btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset); + btrfs_abort_transaction(trans, ret); + goto out; + } out: btrfs_free_path(path); return ret; @@ -3065,45 +3088,118 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) return ret; } +struct btrfs_chunk_map *btrfs_find_chunk_map_nolock(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) +{ + struct rb_node *node = fs_info->mapping_tree.rb_root.rb_node; + struct rb_node *prev = NULL; + struct rb_node *orig_prev; + struct btrfs_chunk_map *map; + struct btrfs_chunk_map *prev_map = NULL; + + while (node) { + map = rb_entry(node, struct btrfs_chunk_map, rb_node); + prev = node; + prev_map = map; + + if (logical < map->start) { + node = node->rb_left; + } else if (logical >= map->start + map->chunk_len) { + node = node->rb_right; + } else { + refcount_inc(&map->refs); + return map; + } + } + + if (!prev) + return NULL; + + orig_prev = prev; + while (prev && logical >= prev_map->start + prev_map->chunk_len) { + prev = rb_next(prev); + prev_map = rb_entry(prev, struct btrfs_chunk_map, rb_node); + } + + if (!prev) { + prev = orig_prev; + prev_map = rb_entry(prev, struct btrfs_chunk_map, rb_node); + while (prev && logical < prev_map->start) { + prev = rb_prev(prev); + prev_map = rb_entry(prev, struct btrfs_chunk_map, rb_node); + } + } + + if (prev) { + u64 end = logical + length; + + /* + * Caller can pass a U64_MAX length when it wants to get any + * chunk starting at an offset of 'logical' or higher, so deal + * with underflow by resetting the end offset to U64_MAX. + */ + if (end < logical) + end = U64_MAX; + + if (end > prev_map->start && + logical < prev_map->start + prev_map->chunk_len) { + refcount_inc(&prev_map->refs); + return prev_map; + } + } + + return NULL; +} + +struct btrfs_chunk_map *btrfs_find_chunk_map(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) +{ + struct btrfs_chunk_map *map; + + read_lock(&fs_info->mapping_tree_lock); + map = btrfs_find_chunk_map_nolock(fs_info, logical, length); + read_unlock(&fs_info->mapping_tree_lock); + + return map; +} + /* - * btrfs_get_chunk_map() - Find the mapping containing the given logical extent. + * Find the mapping containing the given logical extent. + * * @logical: Logical block offset in bytes. * @length: Length of extent in bytes. * * Return: Chunk mapping or ERR_PTR. */ -struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, - u64 logical, u64 length) +struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, + u64 logical, u64 length) { - struct extent_map_tree *em_tree; - struct extent_map *em; + struct btrfs_chunk_map *map; - em_tree = &fs_info->mapping_tree; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, logical, length); - read_unlock(&em_tree->lock); + map = btrfs_find_chunk_map(fs_info, logical, length); - if (!em) { + if (unlikely(!map)) { btrfs_crit(fs_info, "unable to find chunk map for logical %llu length %llu", logical, length); return ERR_PTR(-EINVAL); } - if (em->start > logical || em->start + em->len <= logical) { + if (unlikely(map->start > logical || map->start + map->chunk_len <= logical)) { btrfs_crit(fs_info, "found a bad chunk map, wanted %llu-%llu, found %llu-%llu", - logical, logical + length, em->start, em->start + em->len); - free_extent_map(em); + logical, logical + length, map->start, + map->start + map->chunk_len); + btrfs_free_chunk_map(map); return ERR_PTR(-EINVAL); } - /* callers are responsible for dropping em's ref. */ - return em; + /* Callers are responsible for dropping the reference. */ + return map; } static int remove_chunk_item(struct btrfs_trans_handle *trans, - struct map_lookup *map, u64 chunk_offset) + struct btrfs_chunk_map *map, u64 chunk_offset) { int i; @@ -3128,23 +3224,21 @@ static int remove_chunk_item(struct btrfs_trans_handle *trans, int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) { struct btrfs_fs_info *fs_info = trans->fs_info; - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; u64 dev_extent_len = 0; int i, ret = 0; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; - em = btrfs_get_chunk_map(fs_info, chunk_offset, 1); - if (IS_ERR(em)) { + map = btrfs_get_chunk_map(fs_info, chunk_offset, 1); + if (IS_ERR(map)) { /* * This is a logic error, but we don't want to just rely on the * user having built with ASSERT enabled, so if ASSERT doesn't * do anything we still error out. */ ASSERT(0); - return PTR_ERR(em); + return PTR_ERR(map); } - map = em->map_lookup; /* * First delete the device extent items from the devices btree. @@ -3174,6 +3268,12 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) device->bytes_used - dev_extent_len); atomic64_add(dev_extent_len, &fs_info->free_chunk_space); btrfs_clear_space_info_full(fs_info); + + if (list_empty(&device->post_commit_list)) { + list_add_tail(&device->post_commit_list, + &trans->transaction->dev_update_list); + } + mutex_unlock(&fs_info->chunk_mutex); } } @@ -3247,7 +3347,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) goto out; } - trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len); + trace_btrfs_chunk_free(fs_info, map, chunk_offset, map->chunk_len); if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) { ret = btrfs_del_sys_chunk(fs_info, chunk_offset); @@ -3266,7 +3366,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset) */ btrfs_trans_release_chunk_metadata(trans); - ret = btrfs_remove_block_group(trans, chunk_offset, em); + ret = btrfs_remove_block_group(trans, map); if (ret) { btrfs_abort_transaction(trans, ret); goto out; @@ -3278,7 +3378,7 @@ out: trans->removing_chunk = false; } /* once for us */ - free_extent_map(em); + btrfs_free_chunk_map(map); return ret; } @@ -3489,6 +3589,44 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info, return 0; } +static void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, + const struct btrfs_disk_balance_args *disk) +{ + memset(cpu, 0, sizeof(*cpu)); + + cpu->profiles = le64_to_cpu(disk->profiles); + cpu->usage = le64_to_cpu(disk->usage); + cpu->devid = le64_to_cpu(disk->devid); + cpu->pstart = le64_to_cpu(disk->pstart); + cpu->pend = le64_to_cpu(disk->pend); + cpu->vstart = le64_to_cpu(disk->vstart); + cpu->vend = le64_to_cpu(disk->vend); + cpu->target = le64_to_cpu(disk->target); + cpu->flags = le64_to_cpu(disk->flags); + cpu->limit = le64_to_cpu(disk->limit); + cpu->stripes_min = le32_to_cpu(disk->stripes_min); + cpu->stripes_max = le32_to_cpu(disk->stripes_max); +} + +static void btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, + const struct btrfs_balance_args *cpu) +{ + memset(disk, 0, sizeof(*disk)); + + disk->profiles = cpu_to_le64(cpu->profiles); + disk->usage = cpu_to_le64(cpu->usage); + disk->devid = cpu_to_le64(cpu->devid); + disk->pstart = cpu_to_le64(cpu->pstart); + disk->pend = cpu_to_le64(cpu->pend); + disk->vstart = cpu_to_le64(cpu->vstart); + disk->vend = cpu_to_le64(cpu->vend); + disk->target = cpu_to_le64(cpu->target); + disk->flags = cpu_to_le64(cpu->flags); + disk->limit = cpu_to_le64(cpu->limit); + disk->stripes_min = cpu_to_le32(cpu->stripes_min); + disk->stripes_max = cpu_to_le32(cpu->stripes_max); +} + static int insert_balance_item(struct btrfs_fs_info *fs_info, struct btrfs_balance_control *bctl) { @@ -3633,7 +3771,7 @@ static void reset_balance_state(struct btrfs_fs_info *fs_info) struct btrfs_balance_control *bctl = fs_info->balance_ctl; int ret; - BUG_ON(!fs_info->balance_ctl); + ASSERT(fs_info->balance_ctl); spin_lock(&fs_info->balance_lock); fs_info->balance_ctl = NULL; @@ -4687,183 +4825,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_uuid_scan_kthread(void *data) -{ - struct btrfs_fs_info *fs_info = data; - struct btrfs_root *root = fs_info->tree_root; - struct btrfs_key key; - struct btrfs_path *path = NULL; - int ret = 0; - struct extent_buffer *eb; - int slot; - struct btrfs_root_item root_item; - u32 item_size; - struct btrfs_trans_handle *trans = NULL; - bool closing = false; - - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; - goto out; - } - - key.objectid = 0; - key.type = BTRFS_ROOT_ITEM_KEY; - key.offset = 0; - - while (1) { - if (btrfs_fs_closing(fs_info)) { - closing = true; - break; - } - ret = btrfs_search_forward(root, &key, path, - BTRFS_OLDEST_GENERATION); - if (ret) { - if (ret > 0) - ret = 0; - break; - } - - if (key.type != BTRFS_ROOT_ITEM_KEY || - (key.objectid < BTRFS_FIRST_FREE_OBJECTID && - key.objectid != BTRFS_FS_TREE_OBJECTID) || - key.objectid > BTRFS_LAST_FREE_OBJECTID) - goto skip; - - eb = path->nodes[0]; - slot = path->slots[0]; - item_size = btrfs_item_size(eb, slot); - if (item_size < sizeof(root_item)) - goto skip; - - read_extent_buffer(eb, &root_item, - btrfs_item_ptr_offset(eb, slot), - (int)sizeof(root_item)); - if (btrfs_root_refs(&root_item) == 0) - goto skip; - - if (!btrfs_is_empty_uuid(root_item.uuid) || - !btrfs_is_empty_uuid(root_item.received_uuid)) { - if (trans) - goto update_tree; - - btrfs_release_path(path); - /* - * 1 - subvol uuid item - * 1 - received_subvol uuid item - */ - trans = btrfs_start_transaction(fs_info->uuid_root, 2); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } - continue; - } else { - goto skip; - } -update_tree: - btrfs_release_path(path); - if (!btrfs_is_empty_uuid(root_item.uuid)) { - ret = btrfs_uuid_tree_add(trans, root_item.uuid, - BTRFS_UUID_KEY_SUBVOL, - key.objectid); - if (ret < 0) { - btrfs_warn(fs_info, "uuid_tree_add failed %d", - ret); - break; - } - } - - if (!btrfs_is_empty_uuid(root_item.received_uuid)) { - ret = btrfs_uuid_tree_add(trans, - root_item.received_uuid, - BTRFS_UUID_KEY_RECEIVED_SUBVOL, - key.objectid); - if (ret < 0) { - btrfs_warn(fs_info, "uuid_tree_add failed %d", - ret); - break; - } - } - -skip: - btrfs_release_path(path); - if (trans) { - ret = btrfs_end_transaction(trans); - trans = NULL; - if (ret) - break; - } - - if (key.offset < (u64)-1) { - key.offset++; - } else if (key.type < BTRFS_ROOT_ITEM_KEY) { - key.offset = 0; - key.type = BTRFS_ROOT_ITEM_KEY; - } else if (key.objectid < (u64)-1) { - key.offset = 0; - key.type = BTRFS_ROOT_ITEM_KEY; - key.objectid++; - } else { - break; - } - cond_resched(); - } - -out: - btrfs_free_path(path); - if (trans && !IS_ERR(trans)) - btrfs_end_transaction(trans); - if (ret) - btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret); - else if (!closing) - set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags); - up(&fs_info->uuid_tree_rescan_sem); - return 0; -} - -int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) -{ - struct btrfs_trans_handle *trans; - struct btrfs_root *tree_root = fs_info->tree_root; - struct btrfs_root *uuid_root; - struct task_struct *task; - int ret; - - /* - * 1 - root node - * 1 - root item - */ - trans = btrfs_start_transaction(tree_root, 2); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - uuid_root = btrfs_create_tree(trans, BTRFS_UUID_TREE_OBJECTID); - if (IS_ERR(uuid_root)) { - ret = PTR_ERR(uuid_root); - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - return ret; - } - - fs_info->uuid_root = uuid_root; - - ret = btrfs_commit_transaction(trans); - if (ret) - return ret; - - down(&fs_info->uuid_tree_rescan_sem); - task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); - if (IS_ERR(task)) { - /* fs_info->update_uuid_tree_gen remains 0 in all error case */ - btrfs_warn(fs_info, "failed to start uuid_scan task"); - up(&fs_info->uuid_tree_rescan_sem); - return PTR_ERR(task); - } - - return 0; -} - /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. @@ -4889,6 +4850,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 old_size = btrfs_device_get_total_bytes(device); u64 diff; u64 start; + u64 free_diff = 0; new_size = round_down(new_size, fs_info->sectorsize); start = new_size; @@ -4914,7 +4876,19 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) btrfs_device_set_total_bytes(device, new_size); if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { device->fs_devices->total_rw_bytes -= diff; - atomic64_sub(diff, &fs_info->free_chunk_space); + + /* + * The new free_chunk_space is new_size - used, so we have to + * subtract the delta of the old free_chunk_space which included + * old_size - used. If used > new_size then just subtract this + * entire device's free space. + */ + if (device->bytes_used < new_size) + free_diff = (old_size - device->bytes_used) - + (new_size - device->bytes_used); + else + free_diff = old_size - device->bytes_used; + atomic64_sub(free_diff, &fs_info->free_chunk_space); } /* @@ -5049,9 +5023,10 @@ done: if (ret) { mutex_lock(&fs_info->chunk_mutex); btrfs_device_set_total_bytes(device, old_size); - if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { device->fs_devices->total_rw_bytes += diff; - atomic64_add(diff, &fs_info->free_chunk_space); + atomic64_add(free_diff, &fs_info->free_chunk_space); + } mutex_unlock(&fs_info->chunk_mutex); } return ret; @@ -5422,69 +5397,140 @@ static int decide_stripe_size(struct btrfs_fs_devices *fs_devices, } } +static void chunk_map_device_set_bits(struct btrfs_chunk_map *map, unsigned int bits) +{ + for (int i = 0; i < map->num_stripes; i++) { + struct btrfs_io_stripe *stripe = &map->stripes[i]; + struct btrfs_device *device = stripe->dev; + + set_extent_bit(&device->alloc_state, stripe->physical, + stripe->physical + map->stripe_size - 1, + bits | EXTENT_NOWAIT, NULL); + } +} + +static void chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned int bits) +{ + for (int i = 0; i < map->num_stripes; i++) { + struct btrfs_io_stripe *stripe = &map->stripes[i]; + struct btrfs_device *device = stripe->dev; + + __clear_extent_bit(&device->alloc_state, stripe->physical, + stripe->physical + map->stripe_size - 1, + bits | EXTENT_NOWAIT, + NULL, NULL); + } +} + +void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map) +{ + write_lock(&fs_info->mapping_tree_lock); + rb_erase_cached(&map->rb_node, &fs_info->mapping_tree); + RB_CLEAR_NODE(&map->rb_node); + chunk_map_device_clear_bits(map, CHUNK_ALLOCATED); + write_unlock(&fs_info->mapping_tree_lock); + + /* Once for the tree reference. */ + btrfs_free_chunk_map(map); +} + +EXPORT_FOR_TESTS +int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map) +{ + struct rb_node **p; + struct rb_node *parent = NULL; + bool leftmost = true; + + write_lock(&fs_info->mapping_tree_lock); + p = &fs_info->mapping_tree.rb_root.rb_node; + while (*p) { + struct btrfs_chunk_map *entry; + + parent = *p; + entry = rb_entry(parent, struct btrfs_chunk_map, rb_node); + + if (map->start < entry->start) { + p = &(*p)->rb_left; + } else if (map->start > entry->start) { + p = &(*p)->rb_right; + leftmost = false; + } else { + write_unlock(&fs_info->mapping_tree_lock); + return -EEXIST; + } + } + rb_link_node(&map->rb_node, parent, p); + rb_insert_color_cached(&map->rb_node, &fs_info->mapping_tree, leftmost); + chunk_map_device_set_bits(map, CHUNK_ALLOCATED); + chunk_map_device_clear_bits(map, CHUNK_TRIMMED); + write_unlock(&fs_info->mapping_tree_lock); + + return 0; +} + +EXPORT_FOR_TESTS +struct btrfs_chunk_map *btrfs_alloc_chunk_map(int num_stripes, gfp_t gfp) +{ + struct btrfs_chunk_map *map; + + map = kmalloc(btrfs_chunk_map_size(num_stripes), gfp); + if (!map) + return NULL; + + refcount_set(&map->refs, 1); + RB_CLEAR_NODE(&map->rb_node); + + return map; +} + static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans, struct alloc_chunk_ctl *ctl, struct btrfs_device_info *devices_info) { struct btrfs_fs_info *info = trans->fs_info; - struct map_lookup *map = NULL; - struct extent_map_tree *em_tree; + struct btrfs_chunk_map *map; struct btrfs_block_group *block_group; - struct extent_map *em; u64 start = ctl->start; u64 type = ctl->type; int ret; - int i; - int j; - map = kmalloc(map_lookup_size(ctl->num_stripes), GFP_NOFS); + map = btrfs_alloc_chunk_map(ctl->num_stripes, GFP_NOFS); if (!map) return ERR_PTR(-ENOMEM); + + map->start = start; + map->chunk_len = ctl->chunk_size; + map->stripe_size = ctl->stripe_size; + map->type = type; + map->io_align = BTRFS_STRIPE_LEN; + map->io_width = BTRFS_STRIPE_LEN; + map->sub_stripes = ctl->sub_stripes; map->num_stripes = ctl->num_stripes; - for (i = 0; i < ctl->ndevs; ++i) { - for (j = 0; j < ctl->dev_stripes; ++j) { + for (int i = 0; i < ctl->ndevs; i++) { + for (int j = 0; j < ctl->dev_stripes; j++) { int s = i * ctl->dev_stripes + j; map->stripes[s].dev = devices_info[i].dev; map->stripes[s].physical = devices_info[i].dev_offset + j * ctl->stripe_size; } } - map->io_align = BTRFS_STRIPE_LEN; - map->io_width = BTRFS_STRIPE_LEN; - map->type = type; - map->sub_stripes = ctl->sub_stripes; trace_btrfs_chunk_alloc(info, map, start, ctl->chunk_size); - em = alloc_extent_map(); - if (!em) { - kfree(map); - return ERR_PTR(-ENOMEM); - } - set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); - em->map_lookup = map; - em->start = start; - em->len = ctl->chunk_size; - em->block_start = 0; - em->block_len = em->len; - em->orig_block_len = ctl->stripe_size; - - em_tree = &info->mapping_tree; - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 0); + ret = btrfs_add_chunk_map(info, map); if (ret) { - write_unlock(&em_tree->lock); - free_extent_map(em); + btrfs_free_chunk_map(map); return ERR_PTR(ret); } - write_unlock(&em_tree->lock); block_group = btrfs_make_block_group(trans, type, start, ctl->chunk_size); - if (IS_ERR(block_group)) - goto error_del_extent; + if (IS_ERR(block_group)) { + btrfs_remove_chunk_map(info, map); + return block_group; + } - for (i = 0; i < map->num_stripes; i++) { + for (int i = 0; i < map->num_stripes; i++) { struct btrfs_device *dev = map->stripes[i].dev; btrfs_device_set_bytes_used(dev, @@ -5497,23 +5543,10 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans, atomic64_sub(ctl->stripe_size * map->num_stripes, &info->free_chunk_space); - free_extent_map(em); check_raid56_incompat_flag(info, type); check_raid1c34_incompat_flag(info, type); return block_group; - -error_del_extent: - write_lock(&em_tree->lock); - remove_extent_mapping(em_tree, em); - write_unlock(&em_tree->lock); - - /* One for our allocation */ - free_extent_map(em); - /* One for the tree reference */ - free_extent_map(em); - - return block_group; } struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans, @@ -5589,8 +5622,7 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, struct btrfs_key key; struct btrfs_chunk *chunk; struct btrfs_stripe *stripe; - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; size_t item_size; int i; int ret; @@ -5619,14 +5651,13 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, */ lockdep_assert_held(&fs_info->chunk_mutex); - em = btrfs_get_chunk_map(fs_info, bg->start, bg->length); - if (IS_ERR(em)) { - ret = PTR_ERR(em); + map = btrfs_get_chunk_map(fs_info, bg->start, bg->length); + if (IS_ERR(map)) { + ret = PTR_ERR(map); btrfs_abort_transaction(trans, ret); return ret; } - map = em->map_lookup; item_size = btrfs_chunk_item_size(map->num_stripes); chunk = kzalloc(item_size, GFP_NOFS); @@ -5683,7 +5714,7 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans, out: kfree(chunk); - free_extent_map(em); + btrfs_free_chunk_map(map); return ret; } @@ -5728,7 +5759,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans) return 0; } -static inline int btrfs_chunk_max_errors(struct map_lookup *map) +static inline int btrfs_chunk_max_errors(struct btrfs_chunk_map *map) { const int index = btrfs_bg_flags_to_raid_index(map->type); @@ -5737,17 +5768,15 @@ static inline int btrfs_chunk_max_errors(struct map_lookup *map) bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset) { - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; int miss_ndevs = 0; int i; bool ret = true; - em = btrfs_get_chunk_map(fs_info, chunk_offset, 1); - if (IS_ERR(em)) + map = btrfs_get_chunk_map(fs_info, chunk_offset, 1); + if (IS_ERR(map)) return false; - map = em->map_lookup; for (i = 0; i < map->num_stripes; i++) { if (test_bit(BTRFS_DEV_STATE_MISSING, &map->stripes[i].dev->dev_state)) { @@ -5768,38 +5797,57 @@ bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset) if (miss_ndevs > btrfs_chunk_max_errors(map)) ret = false; end: - free_extent_map(em); + btrfs_free_chunk_map(map); return ret; } -void btrfs_mapping_tree_free(struct extent_map_tree *tree) +void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info) { - struct extent_map *em; + write_lock(&fs_info->mapping_tree_lock); + while (!RB_EMPTY_ROOT(&fs_info->mapping_tree.rb_root)) { + struct btrfs_chunk_map *map; + struct rb_node *node; - while (1) { - write_lock(&tree->lock); - em = lookup_extent_mapping(tree, 0, (u64)-1); - if (em) - remove_extent_mapping(tree, em); - write_unlock(&tree->lock); - if (!em) - break; - /* once for us */ - free_extent_map(em); - /* once for the tree */ - free_extent_map(em); + node = rb_first_cached(&fs_info->mapping_tree); + map = rb_entry(node, struct btrfs_chunk_map, rb_node); + rb_erase_cached(&map->rb_node, &fs_info->mapping_tree); + RB_CLEAR_NODE(&map->rb_node); + chunk_map_device_clear_bits(map, CHUNK_ALLOCATED); + /* Once for the tree ref. */ + btrfs_free_chunk_map(map); + cond_resched_rwlock_write(&fs_info->mapping_tree_lock); } + write_unlock(&fs_info->mapping_tree_lock); +} + +static int btrfs_chunk_map_num_copies(const struct btrfs_chunk_map *map) +{ + enum btrfs_raid_types index = btrfs_bg_flags_to_raid_index(map->type); + + if (map->type & BTRFS_BLOCK_GROUP_RAID5) + return 2; + + /* + * There could be two corrupted data stripes, we need to loop retry in + * order to rebuild the correct data. + * + * Fail a stripe at a time on every retry except the stripe under + * reconstruction. + */ + if (map->type & BTRFS_BLOCK_GROUP_RAID6) + return map->num_stripes; + + /* Non-RAID56, use their ncopies from btrfs_raid_array. */ + return btrfs_raid_array[index].ncopies; } int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) { - struct extent_map *em; - struct map_lookup *map; - enum btrfs_raid_types index; - int ret = 1; + struct btrfs_chunk_map *map; + int ret; - em = btrfs_get_chunk_map(fs_info, logical, len); - if (IS_ERR(em)) + map = btrfs_get_chunk_map(fs_info, logical, len); + if (IS_ERR(map)) /* * We could return errors for these cases, but that could get * ugly and we'd probably do the same thing which is just not do @@ -5808,72 +5856,53 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) */ return 1; - map = em->map_lookup; - index = btrfs_bg_flags_to_raid_index(map->type); - - /* Non-RAID56, use their ncopies from btrfs_raid_array. */ - if (!(map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)) - ret = btrfs_raid_array[index].ncopies; - else if (map->type & BTRFS_BLOCK_GROUP_RAID5) - ret = 2; - else if (map->type & BTRFS_BLOCK_GROUP_RAID6) - /* - * There could be two corrupted data stripes, we need - * to loop retry in order to rebuild the correct data. - * - * Fail a stripe at a time on every retry except the - * stripe under reconstruction. - */ - ret = map->num_stripes; - free_extent_map(em); + ret = btrfs_chunk_map_num_copies(map); + btrfs_free_chunk_map(map); return ret; } unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info, u64 logical) { - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; unsigned long len = fs_info->sectorsize; if (!btrfs_fs_incompat(fs_info, RAID56)) return len; - em = btrfs_get_chunk_map(fs_info, logical, len); + map = btrfs_get_chunk_map(fs_info, logical, len); - if (!WARN_ON(IS_ERR(em))) { - map = em->map_lookup; + if (!WARN_ON(IS_ERR(map))) { if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) len = btrfs_stripe_nr_to_offset(nr_data_stripes(map)); - free_extent_map(em); + btrfs_free_chunk_map(map); } return len; } int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len) { - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; int ret = 0; if (!btrfs_fs_incompat(fs_info, RAID56)) return 0; - em = btrfs_get_chunk_map(fs_info, logical, len); + map = btrfs_get_chunk_map(fs_info, logical, len); - if(!WARN_ON(IS_ERR(em))) { - map = em->map_lookup; + if (!WARN_ON(IS_ERR(map))) { if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) ret = 1; - free_extent_map(em); + btrfs_free_chunk_map(map); } return ret; } static int find_live_mirror(struct btrfs_fs_info *fs_info, - struct map_lookup *map, int first, + struct btrfs_chunk_map *map, int first, int dev_replace_is_ongoing) { + const enum btrfs_read_policy policy = READ_ONCE(fs_info->fs_devices->read_policy); int i; int num_stripes; int preferred_mirror; @@ -5888,13 +5917,12 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, else num_stripes = map->num_stripes; - switch (fs_info->fs_devices->read_policy) { + switch (policy) { default: /* Shouldn't happen, just warn and use pid instead of failing */ - btrfs_warn_rl(fs_info, - "unknown read_policy type %u, reset to pid", - fs_info->fs_devices->read_policy); - fs_info->fs_devices->read_policy = BTRFS_READ_POLICY_PID; + btrfs_warn_rl(fs_info, "unknown read_policy type %u, reset to pid", + policy); + WRITE_ONCE(fs_info->fs_devices->read_policy, BTRFS_READ_POLICY_PID); fallthrough; case BTRFS_READ_POLICY_PID: preferred_mirror = first + (current->pid % num_stripes); @@ -5931,6 +5959,7 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, } static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, + u64 logical, u16 total_stripes) { struct btrfs_io_context *bioc; @@ -5950,6 +5979,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ bioc->fs_info = fs_info; bioc->replace_stripe_src = -1; bioc->full_stripe_logical = (u64)-1; + bioc->logical = logical; return bioc; } @@ -5976,8 +6006,7 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, u64 logical, u64 *length_ret, u32 *num_stripes) { - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; struct btrfs_discard_stripe *stripes; u64 length = *length_ret; u64 offset; @@ -5995,11 +6024,9 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, int ret; int i; - em = btrfs_get_chunk_map(fs_info, logical, length); - if (IS_ERR(em)) - return ERR_CAST(em); - - map = em->map_lookup; + map = btrfs_get_chunk_map(fs_info, logical, length); + if (IS_ERR(map)) + return ERR_CAST(map); /* we don't discard raid56 yet */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { @@ -6007,8 +6034,8 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, goto out_free_map; } - offset = logical - em->start; - length = min_t(u64, em->start + em->len - logical, length); + offset = logical - map->start; + length = min_t(u64, map->start + map->chunk_len - logical, length); *length_ret = length; /* @@ -6105,10 +6132,10 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info, } } - free_extent_map(em); + btrfs_free_chunk_map(map); return stripes; out_free_map: - free_extent_map(em); + btrfs_free_chunk_map(map); return ERR_PTR(ret); } @@ -6129,20 +6156,19 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical) return ret; } -static void handle_ops_on_dev_replace(enum btrfs_map_op op, - struct btrfs_io_context *bioc, +static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc, struct btrfs_dev_replace *dev_replace, u64 logical, - int *num_stripes_ret, int *max_errors_ret) + struct btrfs_io_geometry *io_geom) { u64 srcdev_devid = dev_replace->srcdev->devid; /* * At this stage, num_stripes is still the real number of stripes, * excluding the duplicated stripes. */ - int num_stripes = *num_stripes_ret; + int num_stripes = io_geom->num_stripes; + int max_errors = io_geom->max_errors; int nr_extra_stripes = 0; - int max_errors = *max_errors_ret; int i; /* @@ -6183,7 +6209,7 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, * replace. * If we have 2 extra stripes, only choose the one with smaller physical. */ - if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) { + if (io_geom->op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) { struct btrfs_io_stripe *first = &bioc->stripes[num_stripes]; struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1]; @@ -6201,22 +6227,21 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, } } - *num_stripes_ret = num_stripes + nr_extra_stripes; - *max_errors_ret = max_errors + nr_extra_stripes; + io_geom->num_stripes = num_stripes + nr_extra_stripes; + io_geom->max_errors = max_errors + nr_extra_stripes; bioc->replace_nr_stripes = nr_extra_stripes; } -static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, - u64 offset, u32 *stripe_nr, u64 *stripe_offset, - u64 *full_stripe_start) +static u64 btrfs_max_io_len(struct btrfs_chunk_map *map, u64 offset, + struct btrfs_io_geometry *io_geom) { /* * Stripe_nr is the stripe where this block falls. stripe_offset is * the offset of this block in its stripe. */ - *stripe_offset = offset & BTRFS_STRIPE_LEN_MASK; - *stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT; - ASSERT(*stripe_offset < U32_MAX); + io_geom->stripe_offset = offset & BTRFS_STRIPE_LEN_MASK; + io_geom->stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT; + ASSERT(io_geom->stripe_offset < U32_MAX); if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { unsigned long full_stripe_len = @@ -6231,18 +6256,17 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, * to go rounddown(), not round_down(), as nr_data_stripes is * not ensured to be power of 2. */ - *full_stripe_start = - btrfs_stripe_nr_to_offset( - rounddown(*stripe_nr, nr_data_stripes(map))); + io_geom->raid56_full_stripe_start = btrfs_stripe_nr_to_offset( + rounddown(io_geom->stripe_nr, nr_data_stripes(map))); - ASSERT(*full_stripe_start + full_stripe_len > offset); - ASSERT(*full_stripe_start <= offset); + ASSERT(io_geom->raid56_full_stripe_start + full_stripe_len > offset); + ASSERT(io_geom->raid56_full_stripe_start <= offset); /* * For writes to RAID56, allow to write a full stripe set, but * no straddling of stripe sets. */ - if (op == BTRFS_MAP_WRITE) - return full_stripe_len - (offset - *full_stripe_start); + if (io_geom->op == BTRFS_MAP_WRITE) + return full_stripe_len - (offset - io_geom->raid56_full_stripe_start); } /* @@ -6250,16 +6274,178 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op, * a single disk). */ if (map->type & BTRFS_BLOCK_GROUP_STRIPE_MASK) - return BTRFS_STRIPE_LEN - *stripe_offset; + return BTRFS_STRIPE_LEN - io_geom->stripe_offset; return U64_MAX; } -static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map, - u32 stripe_index, u64 stripe_offset, u32 stripe_nr) +static int set_io_stripe(struct btrfs_fs_info *fs_info, u64 logical, + u64 *length, struct btrfs_io_stripe *dst, + struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom) { - dst->dev = map->stripes[stripe_index].dev; - dst->physical = map->stripes[stripe_index].physical + - stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr); + dst->dev = map->stripes[io_geom->stripe_index].dev; + + if (io_geom->op == BTRFS_MAP_READ && + btrfs_need_stripe_tree_update(fs_info, map->type)) + return btrfs_get_raid_extent_offset(fs_info, logical, length, + map->type, + io_geom->stripe_index, dst); + + dst->physical = map->stripes[io_geom->stripe_index].physical + + io_geom->stripe_offset + + btrfs_stripe_nr_to_offset(io_geom->stripe_nr); + return 0; +} + +static bool is_single_device_io(struct btrfs_fs_info *fs_info, + const struct btrfs_io_stripe *smap, + const struct btrfs_chunk_map *map, + int num_alloc_stripes, + enum btrfs_map_op op, int mirror_num) +{ + if (!smap) + return false; + + if (num_alloc_stripes != 1) + return false; + + if (btrfs_need_stripe_tree_update(fs_info, map->type) && op != BTRFS_MAP_READ) + return false; + + if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) + return false; + + return true; +} + +static void map_blocks_raid0(const struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom) +{ + io_geom->stripe_index = io_geom->stripe_nr % map->num_stripes; + io_geom->stripe_nr /= map->num_stripes; + if (io_geom->op == BTRFS_MAP_READ) + io_geom->mirror_num = 1; +} + +static void map_blocks_raid1(struct btrfs_fs_info *fs_info, + struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom, + bool dev_replace_is_ongoing) +{ + if (io_geom->op != BTRFS_MAP_READ) { + io_geom->num_stripes = map->num_stripes; + return; + } + + if (io_geom->mirror_num) { + io_geom->stripe_index = io_geom->mirror_num - 1; + return; + } + + io_geom->stripe_index = find_live_mirror(fs_info, map, 0, + dev_replace_is_ongoing); + io_geom->mirror_num = io_geom->stripe_index + 1; +} + +static void map_blocks_dup(const struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom) +{ + if (io_geom->op != BTRFS_MAP_READ) { + io_geom->num_stripes = map->num_stripes; + return; + } + + if (io_geom->mirror_num) { + io_geom->stripe_index = io_geom->mirror_num - 1; + return; + } + + io_geom->mirror_num = 1; +} + +static void map_blocks_raid10(struct btrfs_fs_info *fs_info, + struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom, + bool dev_replace_is_ongoing) +{ + u32 factor = map->num_stripes / map->sub_stripes; + int old_stripe_index; + + io_geom->stripe_index = (io_geom->stripe_nr % factor) * map->sub_stripes; + io_geom->stripe_nr /= factor; + + if (io_geom->op != BTRFS_MAP_READ) { + io_geom->num_stripes = map->sub_stripes; + return; + } + + if (io_geom->mirror_num) { + io_geom->stripe_index += io_geom->mirror_num - 1; + return; + } + + old_stripe_index = io_geom->stripe_index; + io_geom->stripe_index = find_live_mirror(fs_info, map, + io_geom->stripe_index, + dev_replace_is_ongoing); + io_geom->mirror_num = io_geom->stripe_index - old_stripe_index + 1; +} + +static void map_blocks_raid56_write(struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom, + u64 logical, u64 *length) +{ + int data_stripes = nr_data_stripes(map); + + /* + * Needs full stripe mapping. + * + * Push stripe_nr back to the start of the full stripe For those cases + * needing a full stripe, @stripe_nr is the full stripe number. + * + * Originally we go raid56_full_stripe_start / full_stripe_len, but + * that can be expensive. Here we just divide @stripe_nr with + * @data_stripes. + */ + io_geom->stripe_nr /= data_stripes; + + /* RAID[56] write or recovery. Return all stripes */ + io_geom->num_stripes = map->num_stripes; + io_geom->max_errors = btrfs_chunk_max_errors(map); + + /* Return the length to the full stripe end. */ + *length = min(logical + *length, + io_geom->raid56_full_stripe_start + map->start + + btrfs_stripe_nr_to_offset(data_stripes)) - + logical; + io_geom->stripe_index = 0; + io_geom->stripe_offset = 0; +} + +static void map_blocks_raid56_read(struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom) +{ + int data_stripes = nr_data_stripes(map); + + ASSERT(io_geom->mirror_num <= 1); + /* Just grab the data stripe directly. */ + io_geom->stripe_index = io_geom->stripe_nr % data_stripes; + io_geom->stripe_nr /= data_stripes; + + /* We distribute the parity blocks across stripes. */ + io_geom->stripe_index = + (io_geom->stripe_nr + io_geom->stripe_index) % map->num_stripes; + + if (io_geom->op == BTRFS_MAP_READ && io_geom->mirror_num < 1) + io_geom->mirror_num = 1; +} + +static void map_blocks_single(const struct btrfs_chunk_map *map, + struct btrfs_io_geometry *io_geom) +{ + io_geom->stripe_index = io_geom->stripe_nr % map->num_stripes; + io_geom->stripe_nr /= map->num_stripes; + io_geom->mirror_num = io_geom->stripe_index + 1; } /* @@ -6296,54 +6482,42 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup * * For RAID6 profile, mirror > 2 means mark another * data/P stripe error and rebuild from the remaining * stripes.. - * - * @need_raid_map: (Used only for integrity checker) whether the map wants - * a full stripe map (including all data and P/Q stripes) - * for RAID56. Should always be 1 except integrity checker. */ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, struct btrfs_io_context **bioc_ret, - struct btrfs_io_stripe *smap, int *mirror_num_ret, - int need_raid_map) + struct btrfs_io_stripe *smap, int *mirror_num_ret) { - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; + struct btrfs_io_geometry io_geom = { 0 }; u64 map_offset; - u64 stripe_offset; - u32 stripe_nr; - u32 stripe_index; - int data_stripes; - int i; int ret = 0; - int mirror_num = (mirror_num_ret ? *mirror_num_ret : 0); - int num_stripes; int num_copies; - int max_errors = 0; struct btrfs_io_context *bioc = NULL; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int dev_replace_is_ongoing = 0; u16 num_alloc_stripes; - u64 raid56_full_stripe_start = (u64)-1; u64 max_len; ASSERT(bioc_ret); - num_copies = btrfs_num_copies(fs_info, logical, fs_info->sectorsize); - if (mirror_num > num_copies) - return -EINVAL; + io_geom.mirror_num = (mirror_num_ret ? *mirror_num_ret : 0); + io_geom.num_stripes = 1; + io_geom.stripe_index = 0; + io_geom.op = op; - em = btrfs_get_chunk_map(fs_info, logical, *length); - if (IS_ERR(em)) - return PTR_ERR(em); + map = btrfs_get_chunk_map(fs_info, logical, *length); + if (IS_ERR(map)) + return PTR_ERR(map); - map = em->map_lookup; - data_stripes = nr_data_stripes(map); + num_copies = btrfs_chunk_map_num_copies(map); + if (io_geom.mirror_num > num_copies) + return -EINVAL; - map_offset = logical - em->start; - max_len = btrfs_max_io_len(map, op, map_offset, &stripe_nr, - &stripe_offset, &raid56_full_stripe_start); - *length = min_t(u64, em->len - map_offset, max_len); + map_offset = logical - map->start; + io_geom.raid56_full_stripe_start = (u64)-1; + max_len = btrfs_max_io_len(map, map_offset, &io_geom); + *length = min_t(u64, map->chunk_len - map_offset, max_len); if (dev_replace->replace_task != current) down_read(&dev_replace->rwsem); @@ -6356,110 +6530,46 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, if (!dev_replace_is_ongoing && dev_replace->replace_task != current) up_read(&dev_replace->rwsem); - num_stripes = 1; - stripe_index = 0; - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - stripe_index = stripe_nr % map->num_stripes; - stripe_nr /= map->num_stripes; - if (op == BTRFS_MAP_READ) - mirror_num = 1; - } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { - if (op != BTRFS_MAP_READ) { - num_stripes = map->num_stripes; - } else if (mirror_num) { - stripe_index = mirror_num - 1; - } else { - stripe_index = find_live_mirror(fs_info, map, 0, - dev_replace_is_ongoing); - mirror_num = stripe_index + 1; - } - - } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (op != BTRFS_MAP_READ) { - num_stripes = map->num_stripes; - } else if (mirror_num) { - stripe_index = mirror_num - 1; - } else { - mirror_num = 1; - } - - } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { - u32 factor = map->num_stripes / map->sub_stripes; - - stripe_index = (stripe_nr % factor) * map->sub_stripes; - stripe_nr /= factor; - - if (op != BTRFS_MAP_READ) - num_stripes = map->sub_stripes; - else if (mirror_num) - stripe_index += mirror_num - 1; - else { - int old_stripe_index = stripe_index; - stripe_index = find_live_mirror(fs_info, map, - stripe_index, - dev_replace_is_ongoing); - mirror_num = stripe_index - old_stripe_index + 1; - } - - } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - if (need_raid_map && (op != BTRFS_MAP_READ || mirror_num > 1)) { - /* - * Push stripe_nr back to the start of the full stripe - * For those cases needing a full stripe, @stripe_nr - * is the full stripe number. - * - * Originally we go raid56_full_stripe_start / full_stripe_len, - * but that can be expensive. Here we just divide - * @stripe_nr with @data_stripes. - */ - stripe_nr /= data_stripes; - - /* RAID[56] write or recovery. Return all stripes */ - num_stripes = map->num_stripes; - max_errors = btrfs_chunk_max_errors(map); - - /* Return the length to the full stripe end */ - *length = min(logical + *length, - raid56_full_stripe_start + em->start + - btrfs_stripe_nr_to_offset(data_stripes)) - - logical; - stripe_index = 0; - stripe_offset = 0; - } else { - /* - * Mirror #0 or #1 means the original data block. - * Mirror #2 is RAID5 parity block. - * Mirror #3 is RAID6 Q block. - */ - stripe_index = stripe_nr % data_stripes; - stripe_nr /= data_stripes; - if (mirror_num > 1) - stripe_index = data_stripes + mirror_num - 2; - - /* We distribute the parity blocks across stripes */ - stripe_index = (stripe_nr + stripe_index) % map->num_stripes; - if (op == BTRFS_MAP_READ && mirror_num <= 1) - mirror_num = 1; - } - } else { + switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case BTRFS_BLOCK_GROUP_RAID0: + map_blocks_raid0(map, &io_geom); + break; + case BTRFS_BLOCK_GROUP_RAID1: + case BTRFS_BLOCK_GROUP_RAID1C3: + case BTRFS_BLOCK_GROUP_RAID1C4: + map_blocks_raid1(fs_info, map, &io_geom, dev_replace_is_ongoing); + break; + case BTRFS_BLOCK_GROUP_DUP: + map_blocks_dup(map, &io_geom); + break; + case BTRFS_BLOCK_GROUP_RAID10: + map_blocks_raid10(fs_info, map, &io_geom, dev_replace_is_ongoing); + break; + case BTRFS_BLOCK_GROUP_RAID5: + case BTRFS_BLOCK_GROUP_RAID6: + if (op != BTRFS_MAP_READ || io_geom.mirror_num > 1) + map_blocks_raid56_write(map, &io_geom, logical, length); + else + map_blocks_raid56_read(map, &io_geom); + break; + default: /* * After this, stripe_nr is the number of stripes on this * device we have to walk to find the data, and stripe_index is * the number of our device in the stripe array */ - stripe_index = stripe_nr % map->num_stripes; - stripe_nr /= map->num_stripes; - mirror_num = stripe_index + 1; + map_blocks_single(map, &io_geom); + break; } - if (stripe_index >= map->num_stripes) { + if (io_geom.stripe_index >= map->num_stripes) { btrfs_crit(fs_info, "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u", - stripe_index, map->num_stripes); + io_geom.stripe_index, map->num_stripes); ret = -EINVAL; goto out; } - num_alloc_stripes = num_stripes; + num_alloc_stripes = io_geom.num_stripes; if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && op != BTRFS_MAP_READ) /* @@ -6476,17 +6586,16 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, * physical block information on the stack instead of allocating an * I/O context structure. */ - if (smap && num_alloc_stripes == 1 && - !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) { - set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr); + if (is_single_device_io(fs_info, smap, map, num_alloc_stripes, op, + io_geom.mirror_num)) { + ret = set_io_stripe(fs_info, logical, length, smap, map, &io_geom); if (mirror_num_ret) - *mirror_num_ret = mirror_num; + *mirror_num_ret = io_geom.mirror_num; *bioc_ret = NULL; - ret = 0; goto out; } - bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes); + bioc = alloc_btrfs_io_context(fs_info, logical, num_alloc_stripes); if (!bioc) { ret = -ENOMEM; goto out; @@ -6500,8 +6609,8 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, * * It's still mostly the same as other profiles, just with extra rotation. */ - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && - (op != BTRFS_MAP_READ || mirror_num > 1)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && + (op != BTRFS_MAP_READ || io_geom.mirror_num > 1)) { /* * For RAID56 @stripe_nr is already the number of full stripes * before us, which is also the rotation value (needs to modulo @@ -6510,37 +6619,52 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, * In this case, we just add @stripe_nr with @i, then do the * modulo, to reduce one modulo call. */ - bioc->full_stripe_logical = em->start + - btrfs_stripe_nr_to_offset(stripe_nr * data_stripes); - for (i = 0; i < num_stripes; i++) - set_io_stripe(&bioc->stripes[i], map, - (i + stripe_nr) % num_stripes, - stripe_offset, stripe_nr); + bioc->full_stripe_logical = map->start + + btrfs_stripe_nr_to_offset(io_geom.stripe_nr * + nr_data_stripes(map)); + for (int i = 0; i < io_geom.num_stripes; i++) { + struct btrfs_io_stripe *dst = &bioc->stripes[i]; + u32 stripe_index; + + stripe_index = (i + io_geom.stripe_nr) % io_geom.num_stripes; + dst->dev = map->stripes[stripe_index].dev; + dst->physical = + map->stripes[stripe_index].physical + + io_geom.stripe_offset + + btrfs_stripe_nr_to_offset(io_geom.stripe_nr); + } } else { /* * For all other non-RAID56 profiles, just copy the target * stripe into the bioc. */ - for (i = 0; i < num_stripes; i++) { - set_io_stripe(&bioc->stripes[i], map, stripe_index, - stripe_offset, stripe_nr); - stripe_index++; + for (int i = 0; i < io_geom.num_stripes; i++) { + ret = set_io_stripe(fs_info, logical, length, + &bioc->stripes[i], map, &io_geom); + if (ret < 0) + break; + io_geom.stripe_index++; } } + if (ret) { + *bioc_ret = NULL; + btrfs_put_bioc(bioc); + goto out; + } + if (op != BTRFS_MAP_READ) - max_errors = btrfs_chunk_max_errors(map); + io_geom.max_errors = btrfs_chunk_max_errors(map); if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && op != BTRFS_MAP_READ) { - handle_ops_on_dev_replace(op, bioc, dev_replace, logical, - &num_stripes, &max_errors); + handle_ops_on_dev_replace(bioc, dev_replace, logical, &io_geom); } *bioc_ret = bioc; - bioc->num_stripes = num_stripes; - bioc->max_errors = max_errors; - bioc->mirror_num = mirror_num; + bioc->num_stripes = io_geom.num_stripes; + bioc->max_errors = io_geom.max_errors; + bioc->mirror_num = io_geom.mirror_num; out: if (dev_replace_is_ongoing && dev_replace->replace_task != current) { @@ -6548,7 +6672,7 @@ out: /* Unlock and let waiting writers proceed */ up_read(&dev_replace->rwsem); } - free_extent_map(em); + btrfs_free_chunk_map(map); return ret; } @@ -6720,12 +6844,11 @@ static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, devid, uuid); } -u64 btrfs_calc_stripe_length(const struct extent_map *em) +u64 btrfs_calc_stripe_length(const struct btrfs_chunk_map *map) { - const struct map_lookup *map = em->map_lookup; const int data_stripes = calc_data_stripes(map->type, map->num_stripes); - return div_u64(em->len, data_stripes); + return div_u64(map->chunk_len, data_stripes); } #if BITS_PER_LONG == 32 @@ -6794,9 +6917,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, { BTRFS_DEV_LOOKUP_ARGS(args); struct btrfs_fs_info *fs_info = leaf->fs_info; - struct extent_map_tree *map_tree = &fs_info->mapping_tree; - struct map_lookup *map; - struct extent_map *em; + struct btrfs_chunk_map *map; u64 logical; u64 length; u64 devid; @@ -6830,35 +6951,22 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, return ret; } - read_lock(&map_tree->lock); - em = lookup_extent_mapping(map_tree, logical, 1); - read_unlock(&map_tree->lock); + map = btrfs_find_chunk_map(fs_info, logical, 1); /* already mapped? */ - if (em && em->start <= logical && em->start + em->len > logical) { - free_extent_map(em); + if (map && map->start <= logical && map->start + map->chunk_len > logical) { + btrfs_free_chunk_map(map); return 0; - } else if (em) { - free_extent_map(em); + } else if (map) { + btrfs_free_chunk_map(map); } - em = alloc_extent_map(); - if (!em) - return -ENOMEM; - map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); - if (!map) { - free_extent_map(em); + map = btrfs_alloc_chunk_map(num_stripes, GFP_NOFS); + if (!map) return -ENOMEM; - } - - set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); - em->map_lookup = map; - em->start = logical; - em->len = length; - em->orig_start = 0; - em->block_start = 0; - em->block_len = em->len; + map->start = logical; + map->chunk_len = length; map->num_stripes = num_stripes; map->io_width = btrfs_chunk_io_width(leaf, chunk); map->io_align = btrfs_chunk_io_align(leaf, chunk); @@ -6873,7 +6981,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, */ map->sub_stripes = btrfs_raid_array[index].sub_stripes; map->verified_stripes = 0; - em->orig_block_len = btrfs_calc_stripe_length(em); + map->stripe_size = btrfs_calc_stripe_length(map); for (i = 0; i < num_stripes; i++) { map->stripes[i].physical = btrfs_stripe_offset_nr(leaf, chunk, i); @@ -6889,7 +6997,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, devid, uuid); if (IS_ERR(map->stripes[i].dev)) { ret = PTR_ERR(map->stripes[i].dev); - free_extent_map(em); + btrfs_free_chunk_map(map); return ret; } } @@ -6898,15 +7006,13 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, &(map->stripes[i].dev->dev_state)); } - write_lock(&map_tree->lock); - ret = add_extent_mapping(map_tree, em, 0); - write_unlock(&map_tree->lock); + ret = btrfs_add_chunk_map(fs_info, map); if (ret < 0) { btrfs_err(fs_info, "failed to add chunk map, start=%llu len=%llu: %d", - em->start, em->len, ret); + map->start, map->chunk_len, ret); + btrfs_free_chunk_map(map); } - free_extent_map(em); return ret; } @@ -6954,7 +7060,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, if (!btrfs_test_opt(fs_info, DEGRADED)) return ERR_PTR(-ENOENT); - fs_devices = alloc_fs_devices(fsid, NULL); + fs_devices = alloc_fs_devices(fsid); if (IS_ERR(fs_devices)) return fs_devices; @@ -7216,26 +7322,21 @@ out_short_read: bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev) { - struct extent_map_tree *map_tree = &fs_info->mapping_tree; - struct extent_map *em; - u64 next_start = 0; + struct btrfs_chunk_map *map; + u64 next_start; bool ret = true; - read_lock(&map_tree->lock); - em = lookup_extent_mapping(map_tree, 0, (u64)-1); - read_unlock(&map_tree->lock); + map = btrfs_find_chunk_map(fs_info, 0, U64_MAX); /* No chunk at all? Return false anyway */ - if (!em) { + if (!map) { ret = false; goto out; } - while (em) { - struct map_lookup *map; + while (map) { int missing = 0; int max_tolerated; int i; - map = em->map_lookup; max_tolerated = btrfs_get_num_tolerated_disk_barrier_failures( map->type); @@ -7253,18 +7354,15 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, if (!failing_dev) btrfs_warn(fs_info, "chunk %llu missing %d devices, max tolerance is %d for writable mount", - em->start, missing, max_tolerated); - free_extent_map(em); + map->start, missing, max_tolerated); + btrfs_free_chunk_map(map); ret = false; goto out; } - next_start = extent_map_end(em); - free_extent_map(em); + next_start = map->start + map->chunk_len; + btrfs_free_chunk_map(map); - read_lock(&map_tree->lock); - em = lookup_extent_mapping(map_tree, next_start, - (u64)(-1) - next_start); - read_unlock(&map_tree->lock); + map = btrfs_find_chunk_map(fs_info, next_start, U64_MAX - next_start); } out: return ret; @@ -7757,20 +7855,15 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 physical_offset, u64 physical_len) { struct btrfs_dev_lookup_args args = { .devid = devid }; - struct extent_map_tree *em_tree = &fs_info->mapping_tree; - struct extent_map *em; - struct map_lookup *map; + struct btrfs_chunk_map *map; struct btrfs_device *dev; u64 stripe_len; bool found = false; int ret = 0; int i; - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, chunk_offset, 1); - read_unlock(&em_tree->lock); - - if (!em) { + map = btrfs_find_chunk_map(fs_info, chunk_offset, 1); + if (!map) { btrfs_err(fs_info, "dev extent physical offset %llu on devid %llu doesn't have corresponding chunk", physical_offset, devid); @@ -7778,12 +7871,11 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, goto out; } - map = em->map_lookup; - stripe_len = btrfs_calc_stripe_length(em); + stripe_len = btrfs_calc_stripe_length(map); if (physical_len != stripe_len) { btrfs_err(fs_info, "dev extent physical offset %llu on devid %llu length doesn't match chunk %llu, have %llu expect %llu", - physical_offset, devid, em->start, physical_len, + physical_offset, devid, map->start, physical_len, stripe_len); ret = -EUCLEAN; goto out; @@ -7806,7 +7898,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, if (map->verified_stripes >= map->num_stripes) { btrfs_err(fs_info, "too many dev extents for chunk %llu found", - em->start); + map->start); ret = -EUCLEAN; goto out; } @@ -7852,32 +7944,30 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, } out: - free_extent_map(em); + btrfs_free_chunk_map(map); return ret; } static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree; - struct extent_map *em; struct rb_node *node; int ret = 0; - read_lock(&em_tree->lock); - for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) { - em = rb_entry(node, struct extent_map, rb_node); - if (em->map_lookup->num_stripes != - em->map_lookup->verified_stripes) { + read_lock(&fs_info->mapping_tree_lock); + for (node = rb_first_cached(&fs_info->mapping_tree); node; node = rb_next(node)) { + struct btrfs_chunk_map *map; + + map = rb_entry(node, struct btrfs_chunk_map, rb_node); + if (map->num_stripes != map->verified_stripes) { btrfs_err(fs_info, "chunk %llu has missing dev extent, have %d expect %d", - em->start, em->map_lookup->verified_stripes, - em->map_lookup->num_stripes); + map->start, map->verified_stripes, map->num_stripes); ret = -EUCLEAN; goto out; } } out: - read_unlock(&em_tree->lock); + read_unlock(&fs_info->mapping_tree_lock); return ret; } @@ -8129,7 +8219,7 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info, ASSERT(mirror_num > 0); ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length, - &bioc, smap, &mirror_ret, true); + &bioc, smap, &mirror_ret); if (ret < 0) return ret; |