diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 249 |
1 files changed, 149 insertions, 100 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 50c5a8762aed..8222f6f74147 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1310,6 +1310,8 @@ again: if (ret) { btrfs_error(root->fs_info, ret, "Failed to remove dev extent item"); + } else { + trans->transaction->have_free_bgs = 1; } out: btrfs_free_path(path); @@ -4196,7 +4198,7 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) { - if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6))) + if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK)) return; btrfs_set_fs_incompat(info, RAID56); @@ -4803,10 +4805,8 @@ unsigned long btrfs_full_stripe_len(struct btrfs_root *root, BUG_ON(em->start > logical || em->start + em->len < logical); map = (struct map_lookup *)em->bdev; - if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) len = map->stripe_len * nr_data_stripes(map); - } free_extent_map(em); return len; } @@ -4826,8 +4826,7 @@ int btrfs_is_parity_mirror(struct btrfs_mapping_tree *map_tree, BUG_ON(em->start > logical || em->start + em->len < logical); map = (struct map_lookup *)em->bdev; - if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6)) + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) ret = 1; free_extent_map(em); return ret; @@ -4876,32 +4875,24 @@ static inline int parity_smaller(u64 a, u64 b) } /* Bubble-sort the stripe set to put the parity/syndrome stripes last */ -static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) +static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes) { struct btrfs_bio_stripe s; - int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; int i; u64 l; int again = 1; - int m; while (again) { again = 0; - for (i = 0; i < real_stripes - 1; i++) { - if (parity_smaller(raid_map[i], raid_map[i+1])) { + for (i = 0; i < num_stripes - 1; i++) { + if (parity_smaller(bbio->raid_map[i], + bbio->raid_map[i+1])) { s = bbio->stripes[i]; - l = raid_map[i]; + l = bbio->raid_map[i]; bbio->stripes[i] = bbio->stripes[i+1]; - raid_map[i] = raid_map[i+1]; + bbio->raid_map[i] = bbio->raid_map[i+1]; bbio->stripes[i+1] = s; - raid_map[i+1] = l; - - if (bbio->tgtdev_map) { - m = bbio->tgtdev_map[i]; - bbio->tgtdev_map[i] = - bbio->tgtdev_map[i + 1]; - bbio->tgtdev_map[i + 1] = m; - } + bbio->raid_map[i+1] = l; again = 1; } @@ -4909,10 +4900,48 @@ static void sort_parity_stripes(struct btrfs_bio *bbio, u64 *raid_map) } } +static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes) +{ + struct btrfs_bio *bbio = kzalloc( + /* the size of the btrfs_bio */ + sizeof(struct btrfs_bio) + + /* plus the variable array for the stripes */ + sizeof(struct btrfs_bio_stripe) * (total_stripes) + + /* plus the variable array for the tgt dev */ + sizeof(int) * (real_stripes) + + /* + * plus the raid_map, which includes both the tgt dev + * and the stripes + */ + sizeof(u64) * (total_stripes), + GFP_NOFS); + if (!bbio) + return NULL; + + atomic_set(&bbio->error, 0); + atomic_set(&bbio->refs, 1); + + return bbio; +} + +void btrfs_get_bbio(struct btrfs_bio *bbio) +{ + WARN_ON(!atomic_read(&bbio->refs)); + atomic_inc(&bbio->refs); +} + +void btrfs_put_bbio(struct btrfs_bio *bbio) +{ + if (!bbio) + return; + if (atomic_dec_and_test(&bbio->refs)) + kfree(bbio); +} + static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, - int mirror_num, u64 **raid_map_ret) + int mirror_num, int need_raid_map) { struct extent_map *em; struct map_lookup *map; @@ -4925,7 +4954,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 stripe_nr_orig; u64 stripe_nr_end; u64 stripe_len; - u64 *raid_map = NULL; int stripe_index; int i; int ret = 0; @@ -4976,7 +5004,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, stripe_offset = offset - stripe_offset; /* if we're here for raid56, we need to know the stripe aligned start */ - if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { unsigned long full_stripe_len = stripe_len * nr_data_stripes(map); raid56_full_stripe_start = offset; @@ -4989,8 +5017,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (rw & REQ_DISCARD) { /* we don't discard raid56 yet */ - if (map->type & - (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { ret = -EOPNOTSUPP; goto out; } @@ -5000,7 +5027,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, /* For writes to RAID[56], allow a full stripeset across all disks. For other RAID types and for RAID[56] reads, just allow a single stripe (on a single disk). */ - if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6) && + if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && (rw & REQ_WRITE)) { max_len = stripe_len * nr_data_stripes(map) - (offset - raid56_full_stripe_start); @@ -5047,7 +5074,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, u64 physical_of_found = 0; ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, - logical, &tmp_length, &tmp_bbio, 0, NULL); + logical, &tmp_length, &tmp_bbio, 0, 0); if (ret) { WARN_ON(tmp_bbio != NULL); goto out; @@ -5061,7 +5088,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, * is not left of the left cursor */ ret = -EIO; - kfree(tmp_bbio); + btrfs_put_bbio(tmp_bbio); goto out; } @@ -5096,11 +5123,11 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } else { WARN_ON(1); ret = -EIO; - kfree(tmp_bbio); + btrfs_put_bbio(tmp_bbio); goto out; } - kfree(tmp_bbio); + btrfs_put_bbio(tmp_bbio); } else if (mirror_num > map->num_stripes) { mirror_num = 0; } @@ -5166,15 +5193,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, mirror_num = stripe_index - old_stripe_index + 1; } - } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6)) { - u64 tmp; - - if (raid_map_ret && + } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + if (need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || mirror_num > 1)) { - int i, rot; - /* push stripe_nr back to the start of the full stripe */ stripe_nr = raid56_full_stripe_start; do_div(stripe_nr, stripe_len * nr_data_stripes(map)); @@ -5183,32 +5205,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_stripes = map->num_stripes; max_errors = nr_parity_stripes(map); - raid_map = kmalloc_array(num_stripes, sizeof(u64), - GFP_NOFS); - if (!raid_map) { - ret = -ENOMEM; - goto out; - } - - /* Work out the disk rotation on this stripe-set */ - tmp = stripe_nr; - rot = do_div(tmp, num_stripes); - - /* Fill in the logical address of each stripe */ - tmp = stripe_nr * nr_data_stripes(map); - for (i = 0; i < nr_data_stripes(map); i++) - raid_map[(i+rot) % num_stripes] = - em->start + (tmp + i) * map->stripe_len; - - raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE; - if (map->type & BTRFS_BLOCK_GROUP_RAID6) - raid_map[(i+rot+1) % num_stripes] = - RAID6_Q_STRIPE; - *length = map->stripe_len; stripe_index = 0; stripe_offset = 0; } else { + u64 tmp; + /* * Mirror #0 or #1 means the original data block. * Mirror #2 is RAID5 parity block. @@ -5246,17 +5248,42 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, tgtdev_indexes = num_stripes; } - bbio = kzalloc(btrfs_bio_size(num_alloc_stripes, tgtdev_indexes), - GFP_NOFS); + bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes); if (!bbio) { - kfree(raid_map); ret = -ENOMEM; goto out; } - atomic_set(&bbio->error, 0); if (dev_replace_is_ongoing) bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes); + /* build raid_map */ + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && + need_raid_map && ((rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) || + mirror_num > 1)) { + u64 tmp; + int i, rot; + + bbio->raid_map = (u64 *)((void *)bbio->stripes + + sizeof(struct btrfs_bio_stripe) * + num_alloc_stripes + + sizeof(int) * tgtdev_indexes); + + /* Work out the disk rotation on this stripe-set */ + tmp = stripe_nr; + rot = do_div(tmp, num_stripes); + + /* Fill in the logical address of each stripe */ + tmp = stripe_nr * nr_data_stripes(map); + for (i = 0; i < nr_data_stripes(map); i++) + bbio->raid_map[(i+rot) % num_stripes] = + em->start + (tmp + i) * map->stripe_len; + + bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE; + if (map->type & BTRFS_BLOCK_GROUP_RAID6) + bbio->raid_map[(i+rot+1) % num_stripes] = + RAID6_Q_STRIPE; + } + if (rw & REQ_DISCARD) { int factor = 0; int sub_stripes = 0; @@ -5340,6 +5367,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) max_errors = btrfs_chunk_max_errors(map); + if (bbio->raid_map) + sort_parity_stripes(bbio, num_stripes); + tgtdev_indexes = 0; if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && dev_replace->tgtdev != NULL) { @@ -5427,6 +5457,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } *bbio_ret = bbio; + bbio->map_type = map->type; bbio->num_stripes = num_stripes; bbio->max_errors = max_errors; bbio->mirror_num = mirror_num; @@ -5443,10 +5474,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, bbio->stripes[0].physical = physical_to_patch_in_first_stripe; bbio->mirror_num = map->num_stripes + 1; } - if (raid_map) { - sort_parity_stripes(bbio, raid_map); - *raid_map_ret = raid_map; - } out: if (dev_replace_is_ongoing) btrfs_dev_replace_unlock(dev_replace); @@ -5459,17 +5486,17 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, struct btrfs_bio **bbio_ret, int mirror_num) { return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, - mirror_num, NULL); + mirror_num, 0); } /* For Scrub/replace */ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, int rw, u64 logical, u64 *length, struct btrfs_bio **bbio_ret, int mirror_num, - u64 **raid_map_ret) + int need_raid_map) { return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, - mirror_num, raid_map_ret); + mirror_num, need_raid_map); } int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, @@ -5511,8 +5538,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, do_div(length, map->num_stripes / map->sub_stripes); else if (map->type & BTRFS_BLOCK_GROUP_RAID0) do_div(length, map->num_stripes); - else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | - BTRFS_BLOCK_GROUP_RAID6)) { + else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { do_div(length, nr_data_stripes(map)); rmap_len = map->stripe_len * nr_data_stripes(map); } @@ -5565,7 +5591,7 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e bio_endio_nodec(bio, err); else bio_endio(bio, err); - kfree(bbio); + btrfs_put_bbio(bbio); } static void btrfs_end_bio(struct bio *bio, int err) @@ -5808,7 +5834,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, u64 logical = (u64)bio->bi_iter.bi_sector << 9; u64 length = 0; u64 map_length; - u64 *raid_map = NULL; int ret; int dev_nr = 0; int total_devs = 1; @@ -5819,7 +5844,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, btrfs_bio_counter_inc_blocked(root->fs_info); ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, - mirror_num, &raid_map); + mirror_num, 1); if (ret) { btrfs_bio_counter_dec(root->fs_info); return ret; @@ -5832,15 +5857,13 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, bbio->fs_info = root->fs_info; atomic_set(&bbio->stripes_pending, bbio->num_stripes); - if (raid_map) { + if (bbio->raid_map) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ if (rw & WRITE) { - ret = raid56_parity_write(root, bio, bbio, - raid_map, map_length); + ret = raid56_parity_write(root, bio, bbio, map_length); } else { - ret = raid56_parity_recover(root, bio, bbio, - raid_map, map_length, + ret = raid56_parity_recover(root, bio, bbio, map_length, mirror_num, 1); } @@ -6238,17 +6261,22 @@ int btrfs_read_sys_array(struct btrfs_root *root) struct extent_buffer *sb; struct btrfs_disk_key *disk_key; struct btrfs_chunk *chunk; - u8 *ptr; - unsigned long sb_ptr; + u8 *array_ptr; + unsigned long sb_array_offset; int ret = 0; u32 num_stripes; u32 array_size; u32 len = 0; - u32 cur; + u32 cur_offset; struct btrfs_key key; - sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET, - BTRFS_SUPER_INFO_SIZE); + ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize); + /* + * This will create extent buffer of nodesize, superblock size is + * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will + * overallocate but we can keep it as-is, only the first page is used. + */ + sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); if (!sb) return -ENOMEM; btrfs_set_buffer_uptodate(sb); @@ -6271,35 +6299,56 @@ int btrfs_read_sys_array(struct btrfs_root *root) write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); - ptr = super_copy->sys_chunk_array; - sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); - cur = 0; + array_ptr = super_copy->sys_chunk_array; + sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array); + cur_offset = 0; + + while (cur_offset < array_size) { + disk_key = (struct btrfs_disk_key *)array_ptr; + len = sizeof(*disk_key); + if (cur_offset + len > array_size) + goto out_short_read; - while (cur < array_size) { - disk_key = (struct btrfs_disk_key *)ptr; btrfs_disk_key_to_cpu(&key, disk_key); - len = sizeof(*disk_key); ptr += len; - sb_ptr += len; - cur += len; + array_ptr += len; + sb_array_offset += len; + cur_offset += len; if (key.type == BTRFS_CHUNK_ITEM_KEY) { - chunk = (struct btrfs_chunk *)sb_ptr; + chunk = (struct btrfs_chunk *)sb_array_offset; + /* + * At least one btrfs_chunk with one stripe must be + * present, exact stripe count check comes afterwards + */ + len = btrfs_chunk_item_size(1); + if (cur_offset + len > array_size) + goto out_short_read; + + num_stripes = btrfs_chunk_num_stripes(sb, chunk); + len = btrfs_chunk_item_size(num_stripes); + if (cur_offset + len > array_size) + goto out_short_read; + ret = read_one_chunk(root, &key, sb, chunk); if (ret) break; - num_stripes = btrfs_chunk_num_stripes(sb, chunk); - len = btrfs_chunk_item_size(num_stripes); } else { ret = -EIO; break; } - ptr += len; - sb_ptr += len; - cur += len; + array_ptr += len; + sb_array_offset += len; + cur_offset += len; } free_extent_buffer(sb); return ret; + +out_short_read: + printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n", + len, cur_offset); + free_extent_buffer(sb); + return -EIO; } int btrfs_read_chunk_tree(struct btrfs_root *root) |