diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 290 |
1 files changed, 135 insertions, 155 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6d3f08bfff3..9cfc668f91f4 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -30,6 +30,7 @@ #include "tree-checker.h" #include "space-info.h" #include "block-group.h" +#include "discard.h" const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { [BTRFS_RAID_RAID10] = { @@ -66,6 +67,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { .tolerated_failures = 2, .devs_increment = 3, .ncopies = 3, + .nparity = 0, .raid_name = "raid1c3", .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3, .mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET, @@ -78,6 +80,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { .tolerated_failures = 3, .devs_increment = 4, .ncopies = 4, + .nparity = 0, .raid_name = "raid1c4", .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4, .mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, @@ -438,39 +441,6 @@ static noinline struct btrfs_fs_devices *find_fsid( ASSERT(fsid); - if (metadata_fsid) { - /* - * Handle scanned device having completed its fsid change but - * belonging to a fs_devices that was created by first scanning - * a device which didn't have its fsid/metadata_uuid changed - * at all and the CHANGING_FSID_V2 flag set. - */ - list_for_each_entry(fs_devices, &fs_uuids, fs_list) { - if (fs_devices->fsid_change && - memcmp(metadata_fsid, fs_devices->fsid, - BTRFS_FSID_SIZE) == 0 && - memcmp(fs_devices->fsid, fs_devices->metadata_uuid, - BTRFS_FSID_SIZE) == 0) { - return fs_devices; - } - } - /* - * Handle scanned device having completed its fsid change but - * belonging to a fs_devices that was created by a device that - * has an outdated pair of fsid/metadata_uuid and - * CHANGING_FSID_V2 flag set. - */ - list_for_each_entry(fs_devices, &fs_uuids, fs_list) { - if (fs_devices->fsid_change && - memcmp(fs_devices->metadata_uuid, - fs_devices->fsid, BTRFS_FSID_SIZE) != 0 && - memcmp(metadata_fsid, fs_devices->metadata_uuid, - BTRFS_FSID_SIZE) == 0) { - return fs_devices; - } - } - } - /* Handle non-split brain cases */ list_for_each_entry(fs_devices, &fs_uuids, fs_list) { if (metadata_fsid) { @@ -486,6 +456,47 @@ static noinline struct btrfs_fs_devices *find_fsid( return NULL; } +static struct btrfs_fs_devices *find_fsid_with_metadata_uuid( + struct btrfs_super_block *disk_super) +{ + + struct btrfs_fs_devices *fs_devices; + + /* + * Handle scanned device having completed its fsid change but + * belonging to a fs_devices that was created by first scanning + * a device which didn't have its fsid/metadata_uuid changed + * at all and the CHANGING_FSID_V2 flag set. + */ + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + if (fs_devices->fsid_change && + memcmp(disk_super->metadata_uuid, fs_devices->fsid, + BTRFS_FSID_SIZE) == 0 && + memcmp(fs_devices->fsid, fs_devices->metadata_uuid, + BTRFS_FSID_SIZE) == 0) { + return fs_devices; + } + } + /* + * Handle scanned device having completed its fsid change but + * belonging to a fs_devices that was created by a device that + * has an outdated pair of fsid/metadata_uuid and + * CHANGING_FSID_V2 flag set. + */ + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + if (fs_devices->fsid_change && + memcmp(fs_devices->metadata_uuid, + fs_devices->fsid, BTRFS_FSID_SIZE) != 0 && + memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid, + BTRFS_FSID_SIZE) == 0) { + return fs_devices; + } + } + + return find_fsid(disk_super->fsid, disk_super->metadata_uuid); +} + + static int btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, int flush, struct block_device **bdev, @@ -669,7 +680,9 @@ error_brelse: /* * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices - * being created with a disk that has already completed its fsid change. + * being created with a disk that has already completed its fsid change. Such + * disk can belong to an fs which has its FSID changed or to one which doesn't. + * Handle both cases here. */ static struct btrfs_fs_devices *find_fsid_inprogress( struct btrfs_super_block *disk_super) @@ -685,7 +698,7 @@ static struct btrfs_fs_devices *find_fsid_inprogress( } } - return NULL; + return find_fsid(disk_super->fsid, NULL); } @@ -697,17 +710,54 @@ static struct btrfs_fs_devices *find_fsid_changed( /* * Handles the case where scanned device is part of an fs that had * multiple successful changes of FSID but curently device didn't - * observe it. Meaning our fsid will be different than theirs. + * observe it. Meaning our fsid will be different than theirs. We need + * to handle two subcases : + * 1 - The fs still continues to have different METADATA/FSID uuids. + * 2 - The fs is switched back to its original FSID (METADATA/FSID + * are equal). */ list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + /* Changed UUIDs */ if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, BTRFS_FSID_SIZE) != 0 && memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid, BTRFS_FSID_SIZE) == 0 && memcmp(fs_devices->fsid, disk_super->fsid, - BTRFS_FSID_SIZE) != 0) { + BTRFS_FSID_SIZE) != 0) + return fs_devices; + + /* Unchanged UUIDs */ + if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid, + BTRFS_FSID_SIZE) == 0 && + memcmp(fs_devices->fsid, disk_super->metadata_uuid, + BTRFS_FSID_SIZE) == 0) + return fs_devices; + } + + return NULL; +} + +static struct btrfs_fs_devices *find_fsid_reverted_metadata( + struct btrfs_super_block *disk_super) +{ + struct btrfs_fs_devices *fs_devices; + + /* + * Handle the case where the scanned device is part of an fs whose last + * metadata UUID change reverted it to the original FSID. At the same + * time * fs_devices was first created by another constitutent device + * which didn't fully observe the operation. This results in an + * btrfs_fs_devices created with metadata/fsid different AND + * btrfs_fs_devices::fsid_change set AND the metadata_uuid of the + * fs_devices equal to the FSID of the disk. + */ + list_for_each_entry(fs_devices, &fs_uuids, fs_list) { + if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid, + BTRFS_FSID_SIZE) != 0 && + memcmp(fs_devices->metadata_uuid, disk_super->fsid, + BTRFS_FSID_SIZE) == 0 && + fs_devices->fsid_change) return fs_devices; - } } return NULL; @@ -734,24 +784,16 @@ static noinline struct btrfs_device *device_list_add(const char *path, BTRFS_SUPER_FLAG_CHANGING_FSID_V2); if (fsid_change_in_progress) { - if (!has_metadata_uuid) { - /* - * When we have an image which has CHANGING_FSID_V2 set - * it might belong to either a filesystem which has - * disks with completed fsid change or it might belong - * to fs with no UUID changes in effect, handle both. - */ + if (!has_metadata_uuid) fs_devices = find_fsid_inprogress(disk_super); - if (!fs_devices) - fs_devices = find_fsid(disk_super->fsid, NULL); - } else { + else fs_devices = find_fsid_changed(disk_super); - } } else if (has_metadata_uuid) { - fs_devices = find_fsid(disk_super->fsid, - disk_super->metadata_uuid); + fs_devices = find_fsid_with_metadata_uuid(disk_super); } else { - fs_devices = find_fsid(disk_super->fsid, NULL); + fs_devices = find_fsid_reverted_metadata(disk_super); + if (!fs_devices) + fs_devices = find_fsid(disk_super->fsid, NULL); } @@ -781,12 +823,18 @@ static noinline struct btrfs_device *device_list_add(const char *path, * a device which had the CHANGING_FSID_V2 flag then replace the * metadata_uuid/fsid values of the fs_devices. */ - if (has_metadata_uuid && fs_devices->fsid_change && + if (fs_devices->fsid_change && found_transid > fs_devices->latest_generation) { memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); - memcpy(fs_devices->metadata_uuid, - disk_super->metadata_uuid, BTRFS_FSID_SIZE); + + if (has_metadata_uuid) + memcpy(fs_devices->metadata_uuid, + disk_super->metadata_uuid, + BTRFS_FSID_SIZE); + else + memcpy(fs_devices->metadata_uuid, + disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->fsid_change = false; } @@ -1064,11 +1112,6 @@ static void btrfs_close_bdev(struct btrfs_device *device) static void btrfs_close_one_device(struct btrfs_device *device) { struct btrfs_fs_devices *fs_devices = device->fs_devices; - struct btrfs_device *new_device; - struct rcu_string *name; - - if (device->bdev) - fs_devices->open_devices--; if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) && device->devid != BTRFS_DEV_REPLACE_DEVID) { @@ -1080,23 +1123,22 @@ static void btrfs_close_one_device(struct btrfs_device *device) fs_devices->missing_devices--; btrfs_close_bdev(device); - - new_device = btrfs_alloc_device(NULL, &device->devid, - device->uuid); - BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ - - /* Safe because we are under uuid_mutex */ - if (device->name) { - name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(!name); /* -ENOMEM */ - rcu_assign_pointer(new_device->name, name); + if (device->bdev) { + fs_devices->open_devices--; + device->bdev = NULL; } + clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); - list_replace_rcu(&device->dev_list, &new_device->dev_list); - new_device->fs_devices = device->fs_devices; + device->fs_info = NULL; + atomic_set(&device->dev_stats_ccnt, 0); + extent_io_tree_release(&device->alloc_state); - synchronize_rcu(); - btrfs_free_device(device); + /* Verify the device is back in a pristine state */ + ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)); + ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)); + ASSERT(list_empty(&device->dev_alloc_list)); + ASSERT(list_empty(&device->post_commit_list)); + ASSERT(atomic_read(&device->reada_in_flight) == 0); } static int close_fs_devices(struct btrfs_fs_devices *fs_devices) @@ -2130,7 +2172,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) { struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices; - WARN_ON(!tgtdev); mutex_lock(&fs_devices->device_list_mutex); btrfs_sysfs_rm_device_link(fs_devices, tgtdev); @@ -2875,6 +2916,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) { struct btrfs_root *root = fs_info->chunk_root; struct btrfs_trans_handle *trans; + struct btrfs_block_group *block_group; int ret; /* @@ -2898,6 +2940,12 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset) if (ret) return ret; + block_group = btrfs_lookup_block_group(fs_info, chunk_offset); + if (!block_group) + return -ENOENT; + btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group); + btrfs_put_block_group(block_group); + trans = btrfs_start_trans_remove_block_group(root->fs_info, chunk_offset); if (IS_ERR(trans)) { @@ -3881,7 +3929,11 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, } } - num_devices = btrfs_num_devices(fs_info); + /* + * rw_devices will not change at the moment, device add/delete/replace + * are excluded by EXCL_OP + */ + num_devices = fs_info->fs_devices->rw_devices; /* * SINGLE profile on-disk has no profile bit, but in-memory we have a @@ -6107,75 +6159,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1); } -int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, - u64 physical, u64 **logical, int *naddrs, int *stripe_len) -{ - struct extent_map *em; - struct map_lookup *map; - u64 *buf; - u64 bytenr; - u64 length; - u64 stripe_nr; - u64 rmap_len; - int i, j, nr = 0; - - em = btrfs_get_chunk_map(fs_info, chunk_start, 1); - if (IS_ERR(em)) - return -EIO; - - map = em->map_lookup; - length = em->len; - rmap_len = map->stripe_len; - - if (map->type & BTRFS_BLOCK_GROUP_RAID10) - length = div_u64(length, map->num_stripes / map->sub_stripes); - else if (map->type & BTRFS_BLOCK_GROUP_RAID0) - length = div_u64(length, map->num_stripes); - else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - length = div_u64(length, nr_data_stripes(map)); - rmap_len = map->stripe_len * nr_data_stripes(map); - } - - buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS); - BUG_ON(!buf); /* -ENOMEM */ - - for (i = 0; i < map->num_stripes; i++) { - if (map->stripes[i].physical > physical || - map->stripes[i].physical + length <= physical) - continue; - - stripe_nr = physical - map->stripes[i].physical; - stripe_nr = div64_u64(stripe_nr, map->stripe_len); - - if (map->type & BTRFS_BLOCK_GROUP_RAID10) { - stripe_nr = stripe_nr * map->num_stripes + i; - stripe_nr = div_u64(stripe_nr, map->sub_stripes); - } else if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - stripe_nr = stripe_nr * map->num_stripes + i; - } /* else if RAID[56], multiply by nr_data_stripes(). - * Alternatively, just use rmap_len below instead of - * map->stripe_len */ - - bytenr = chunk_start + stripe_nr * rmap_len; - WARN_ON(nr >= map->num_stripes); - for (j = 0; j < nr; j++) { - if (buf[j] == bytenr) - break; - } - if (j == nr) { - WARN_ON(nr >= map->num_stripes); - buf[nr++] = bytenr; - } - } - - *logical = buf; - *naddrs = nr; - *stripe_len = rmap_len; - - free_extent_map(em); - return 0; -} - static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio) { bio->bi_private = bbio->private; @@ -6476,19 +6459,14 @@ static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes) { int index = btrfs_bg_flags_to_raid_index(type); int ncopies = btrfs_raid_array[index].ncopies; + const int nparity = btrfs_raid_array[index].nparity; int data_stripes; - switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { - case BTRFS_BLOCK_GROUP_RAID5: - data_stripes = num_stripes - 1; - break; - case BTRFS_BLOCK_GROUP_RAID6: - data_stripes = num_stripes - 2; - break; - default: + if (nparity) + data_stripes = num_stripes - nparity; + else data_stripes = num_stripes / ncopies; - break; - } + return div_u64(chunk_len, data_stripes); } @@ -7327,6 +7305,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, else btrfs_dev_stat_set(dev, i, 0); } + btrfs_info(fs_info, "device stats zeroed by %s (%d)", + current->comm, task_pid_nr(current)); } else { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) if (stats->nr_items > i) |