From 9b4e675a9978800995f83af0ed90e890ca501f31 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 16 May 2019 13:39:59 +0200 Subject: btrfs: detect fast implementation of crc32c on all architectures Currently, there's only check for fast crc32c implementation on X86, based on the CPU flags. This is used to decide if checksumming should be offloaded to worker threads or can be calculated by the caller. As there are more architectures that implement a faster version of crc32c (ARM, SPARC, s390, MIPS, PowerPC), also there are specialized hw cards. The detection is based on driver name, all generic C implementations contain 'generic', while the specialized versions do not. Alternatively the priority could be used, but this is not currently provided by the crypto API. The flag is set per-filesystem at mount time and used for the offloading decisions. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index deb74a8c191a..024c9fadeaeb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -40,10 +40,6 @@ #include "tree-checker.h" #include "ref-verify.h" -#ifdef CONFIG_X86 -#include -#endif - #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ BTRFS_SUPER_FLAG_ERROR |\ @@ -873,14 +869,13 @@ static blk_status_t btree_submit_bio_start(void *private_data, struct bio *bio, return btree_csum_one_bio(bio); } -static int check_async_write(struct btrfs_inode *bi) +static int check_async_write(struct btrfs_fs_info *fs_info, + struct btrfs_inode *bi) { if (atomic_read(&bi->sync_writers)) return 0; -#ifdef CONFIG_X86 - if (static_cpu_has(X86_FEATURE_XMM4_2)) + if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags)) return 0; -#endif return 1; } @@ -889,7 +884,7 @@ static blk_status_t btree_submit_bio_hook(struct inode *inode, struct bio *bio, unsigned long bio_flags) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - int async = check_async_write(BTRFS_I(inode)); + int async = check_async_write(fs_info, BTRFS_I(inode)); blk_status_t ret; if (bio_op(bio) != REQ_OP_WRITE) { -- cgit v1.2.3 From c8bf1b67039556884d0532f7b06acd524c90ed87 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 17 May 2019 11:43:17 +0200 Subject: btrfs: remove mapping tree structures indirection fs_info::mapping_tree is the physical<->logical mapping tree and uses the same underlying structure as extents, but is embedded to another structure. There are no other members and this indirection is useless. No functional change. Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 6 +---- fs/btrfs/dev-replace.c | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/extent-tree.c | 14 ++++++------ fs/btrfs/free-space-cache.c | 2 +- fs/btrfs/scrub.c | 8 +++---- fs/btrfs/volumes.c | 53 ++++++++++++++++++++------------------------- fs/btrfs/volumes.h | 3 +-- 8 files changed, 40 insertions(+), 50 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 02870c1bb68a..1baa8cc39571 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -99,10 +99,6 @@ static inline u32 count_max_extents(u64 size) return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); } -struct btrfs_mapping_tree { - struct extent_map_tree map_tree; -}; - static inline unsigned long btrfs_chunk_item_size(int num_stripes) { BUG_ON(num_stripes == 0); @@ -830,7 +826,7 @@ struct btrfs_fs_info { struct extent_io_tree *pinned_extents; /* logical->physical extent mapping */ - struct btrfs_mapping_tree mapping_tree; + struct extent_map_tree mapping_tree; /* * block reservation for extent, checksum, root tree and diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index b0ba5839ec08..6b2e9aa83ffa 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -723,7 +723,7 @@ static void btrfs_dev_replace_update_device_in_mapping_tree( struct btrfs_device *srcdev, struct btrfs_device *tgtdev) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; u64 start = 0; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 024c9fadeaeb..03e703d70701 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2684,7 +2684,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_LIST_HEAD(&fs_info->unused_bgs); - btrfs_mapping_init(&fs_info->mapping_tree); + extent_map_tree_init(&fs_info->mapping_tree); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index caaa79a3aa48..cbe6a7278008 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9960,7 +9960,7 @@ static int find_first_block_group(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree; struct extent_map *em; - em_tree = &root->fs_info->mapping_tree.map_tree; + em_tree = &root->fs_info->mapping_tree; read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, found_key.objectid, found_key.offset); @@ -10254,21 +10254,21 @@ btrfs_create_block_group_cache(struct btrfs_fs_info *fs_info, */ static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info) { - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct extent_map *em; struct btrfs_block_group_cache *bg; u64 start = 0; int ret = 0; while (1) { - read_lock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); /* * lookup_extent_mapping will return the first extent map * intersecting the range, so setting @len to 1 is enough to * get the first chunk. */ - em = lookup_extent_mapping(&map_tree->map_tree, start, 1); - read_unlock(&map_tree->map_tree.lock); + em = lookup_extent_mapping(map_tree, start, 1); + read_unlock(&map_tree->lock); if (!em) break; @@ -10864,7 +10864,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, if (remove_em) { struct extent_map_tree *em_tree; - em_tree = &fs_info->mapping_tree.map_tree; + em_tree = &fs_info->mapping_tree; write_lock(&em_tree->lock); remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); @@ -10882,7 +10882,7 @@ struct btrfs_trans_handle * btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, const u64 chunk_offset) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; unsigned int num_items; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9a76e7671938..db53ac88e159 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3358,7 +3358,7 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group) if (cleanup) { mutex_lock(&fs_info->chunk_mutex); - em_tree = &fs_info->mapping_tree.map_tree; + em_tree = &fs_info->mapping_tree; write_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, block_group->key.objectid, 1); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index f7b29f9db5e2..0827bdf4faf1 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3410,15 +3410,15 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_block_group_cache *cache) { struct btrfs_fs_info *fs_info = sctx->fs_info; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; struct extent_map *em; int i; int ret = 0; - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); - read_unlock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, chunk_offset, 1); + read_unlock(&map_tree->lock); if (!em) { /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 10f7de0cc7e6..a3fa741c8534 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1818,7 +1818,7 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) struct rb_node *n; u64 ret = 0; - em_tree = &fs_info->mapping_tree.map_tree; + em_tree = &fs_info->mapping_tree; read_lock(&em_tree->lock); n = rb_last(&em_tree->map.rb_root); if (n) { @@ -2941,7 +2941,7 @@ struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree; struct extent_map *em; - em_tree = &fs_info->mapping_tree.map_tree; + em_tree = &fs_info->mapping_tree; read_lock(&em_tree->lock); em = lookup_extent_mapping(em_tree, logical, length); read_unlock(&em_tree->lock); @@ -5144,7 +5144,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, em->block_len = em->len; em->orig_block_len = stripe_size; - em_tree = &info->mapping_tree.map_tree; + em_tree = &info->mapping_tree; write_lock(&em_tree->lock); ret = add_extent_mapping(em_tree, em, 0); if (ret) { @@ -5378,21 +5378,16 @@ end: return readonly; } -void btrfs_mapping_init(struct btrfs_mapping_tree *tree) -{ - extent_map_tree_init(&tree->map_tree); -} - -void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) +void btrfs_mapping_tree_free(struct extent_map_tree *tree) { struct extent_map *em; while (1) { - write_lock(&tree->map_tree.lock); - em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); + write_lock(&tree->lock); + em = lookup_extent_mapping(tree, 0, (u64)-1); if (em) - remove_extent_mapping(&tree->map_tree, em); - write_unlock(&tree->map_tree.lock); + remove_extent_mapping(tree, em); + write_unlock(&tree->lock); if (!em) break; /* once for us */ @@ -6687,7 +6682,7 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) { struct btrfs_fs_info *fs_info = leaf->fs_info; - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct map_lookup *map; struct extent_map *em; u64 logical; @@ -6712,9 +6707,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, return ret; } - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); - read_unlock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, logical, 1); + read_unlock(&map_tree->lock); /* already mapped? */ if (em && em->start <= logical && em->start + em->len > logical) { @@ -6783,9 +6778,9 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, } - write_lock(&map_tree->map_tree.lock); - ret = add_extent_mapping(&map_tree->map_tree, em, 0); - write_unlock(&map_tree->map_tree.lock); + write_lock(&map_tree->lock); + ret = add_extent_mapping(map_tree, em, 0); + write_unlock(&map_tree->lock); if (ret < 0) { btrfs_err(fs_info, "failed to add chunk map, start=%llu len=%llu: %d", @@ -7103,14 +7098,14 @@ out_short_read: bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev) { - struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; + struct extent_map_tree *map_tree = &fs_info->mapping_tree; struct extent_map *em; u64 next_start = 0; bool ret = true; - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1); - read_unlock(&map_tree->map_tree.lock); + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, 0, (u64)-1); + read_unlock(&map_tree->lock); /* No chunk at all? Return false anyway */ if (!em) { ret = false; @@ -7148,10 +7143,10 @@ bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, next_start = extent_map_end(em); free_extent_map(em); - read_lock(&map_tree->map_tree.lock); - em = lookup_extent_mapping(&map_tree->map_tree, next_start, + read_lock(&map_tree->lock); + em = lookup_extent_mapping(map_tree, next_start, (u64)(-1) - next_start); - read_unlock(&map_tree->map_tree.lock); + read_unlock(&map_tree->lock); } out: return ret; @@ -7612,7 +7607,7 @@ static int verify_one_dev_extent(struct btrfs_fs_info *fs_info, u64 chunk_offset, u64 devid, u64 physical_offset, u64 physical_len) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; struct btrfs_device *dev; @@ -7701,7 +7696,7 @@ out: static int verify_chunk_dev_extent_mapping(struct btrfs_fs_info *fs_info) { - struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct rb_node *node; int ret = 0; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 136a3eb64604..07156d974ac4 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -413,8 +413,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, int btrfs_read_sys_array(struct btrfs_fs_info *fs_info); int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info); int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type); -void btrfs_mapping_init(struct btrfs_mapping_tree *tree); -void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree); +void btrfs_mapping_tree_free(struct extent_map_tree *tree); blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num, int async_submit); int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, -- cgit v1.2.3 From 8c3e3582a4f0dbdaea49cfd71640a626c8bb7134 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 17 May 2019 11:43:36 +0200 Subject: btrfs: use u8 for raid_array members The raid_attr table is now 7 * 56 = 392 bytes long, consisting of just small numbers so we don't have to use ints. New size is 7 * 32 = 224, saving 3 cachelines. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/volumes.h | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 03e703d70701..fe14b971440f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3704,7 +3704,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 || (flags & BTRFS_AVAIL_ALLOC_BIT_SINGLE)) - min_tolerated = min(min_tolerated, + min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[BTRFS_RAID_SINGLE]. tolerated_failures); @@ -3713,7 +3713,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags) continue; if (!(flags & btrfs_raid_array[raid_type].bg_flag)) continue; - min_tolerated = min(min_tolerated, + min_tolerated = min_t(int, min_tolerated, btrfs_raid_array[raid_type]. tolerated_failures); } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 07156d974ac4..73520a6ed90a 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -336,16 +336,16 @@ struct btrfs_device_info { }; struct btrfs_raid_attr { - int sub_stripes; /* sub_stripes info for map */ - int dev_stripes; /* stripes per dev */ - int devs_max; /* max devs to use */ - int devs_min; /* min devs needed */ - int tolerated_failures; /* max tolerated fail devs */ - int devs_increment; /* ndevs has to be a multiple of this */ - int ncopies; /* how many copies to data has */ - int nparity; /* number of stripes worth of bytes to store + u8 sub_stripes; /* sub_stripes info for map */ + u8 dev_stripes; /* stripes per dev */ + u8 devs_max; /* max devs to use */ + u8 devs_min; /* min devs needed */ + u8 tolerated_failures; /* max tolerated fail devs */ + u8 devs_increment; /* ndevs has to be a multiple of this */ + u8 ncopies; /* how many copies to data has */ + u8 nparity; /* number of stripes worth of bytes to store * parity information */ - int mindev_error; /* error code if min devs requisite is unmet */ + u8 mindev_error; /* error code if min devs requisite is unmet */ const char raid_name[8]; /* name of the raid */ u64 bg_flag; /* block group flag of the raid */ }; -- cgit v1.2.3 From e7e16f4882edb5935ff0ba81a0df25ae0b80b549 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 3 Jun 2019 16:58:53 +0200 Subject: btrfs: add common checksum type validation Currently btrfs is only supporting CRC32C as checksumming algorithm. As this is about to change provide a function to validate the checksum type in the superblock against all possible algorithms. This makes adding new algorithms easier as there are fewer places to adjust when adding new algorithms. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index fe14b971440f..3f3bb70ca437 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -352,6 +352,16 @@ out: return ret; } +static bool btrfs_supported_super_csum(u16 csum_type) +{ + switch (csum_type) { + case BTRFS_CSUM_TYPE_CRC32: + return true; + default: + return false; + } +} + /* * Return 0 if the superblock checksum type matches the checksum value of that * algorithm. Pass the raw disk superblock data. @@ -362,7 +372,12 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, struct btrfs_super_block *disk_sb = (struct btrfs_super_block *)raw_disk_sb; u16 csum_type = btrfs_super_csum_type(disk_sb); - int ret = 0; + + if (!btrfs_supported_super_csum(csum_type)) { + btrfs_err(fs_info, "unsupported checksum algorithm %u", + csum_type); + return 1; + } if (csum_type == BTRFS_CSUM_TYPE_CRC32) { u32 crc = ~(u32)0; @@ -378,16 +393,10 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, btrfs_csum_final(crc, result); if (memcmp(raw_disk_sb, result, sizeof(result))) - ret = 1; + return 1; } - if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) { - btrfs_err(fs_info, "unsupported checksum algorithm %u", - csum_type); - ret = 1; - } - - return ret; + return 0; } int btrfs_verify_level_key(struct extent_buffer *eb, int level, @@ -2572,7 +2581,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info, ret = validate_super(fs_info, sb, -1); if (ret < 0) goto out; - if (btrfs_super_csum_type(sb) != BTRFS_CSUM_TYPE_CRC32) { + if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) { ret = -EUCLEAN; btrfs_err(fs_info, "invalid csum type, has %u want %u", btrfs_super_csum_type(sb), BTRFS_CSUM_TYPE_CRC32); -- cgit v1.2.3 From 8dc3f22c8ba02c5a5b889406259c50b3eaa61c65 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 3 Jun 2019 16:58:54 +0200 Subject: btrfs: check for supported superblock checksum type before checksum validation Now that we have factorerd out the superblock checksum type validation, we can check for supported superblock checksum types before doing the actual validation of the superblock read from disk. This leads the path to further simplifications of btrfs_check_super_csum() later on. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba [ add comment ] Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3f3bb70ca437..ce6dc95effae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2816,6 +2816,20 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } + /* + * Verify the type first, if that or the the checksum value are + * corrupted, we'll find out + */ + if (!btrfs_supported_super_csum(btrfs_super_csum_type( + (struct btrfs_super_block *) bh->b_data))) { + btrfs_err(fs_info, "unsupported checksum algorithm: %u", + btrfs_super_csum_type((struct btrfs_super_block *) + bh->b_data)); + err = -EINVAL; + brelse(bh); + goto fail_alloc; + } + /* * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). -- cgit v1.2.3 From 51bce6c9b97729835bb55ceb37febd5c8fa962dd Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 3 Jun 2019 16:58:55 +0200 Subject: btrfs: Simplify btrfs_check_super_csum() and get rid of size assumptions Now that we have already checked for a valid checksum type before calling btrfs_check_super_csum(), it can be simplified even further. While at it get rid of the implicit size assumption of the resulting checksum as well. This is a preparation for changing all checksum functionality to use the crypto layer later. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 42 ++++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 26 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ce6dc95effae..c9ce0b002008 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -371,30 +371,20 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, { struct btrfs_super_block *disk_sb = (struct btrfs_super_block *)raw_disk_sb; - u16 csum_type = btrfs_super_csum_type(disk_sb); - - if (!btrfs_supported_super_csum(csum_type)) { - btrfs_err(fs_info, "unsupported checksum algorithm %u", - csum_type); - return 1; - } - - if (csum_type == BTRFS_CSUM_TYPE_CRC32) { - u32 crc = ~(u32)0; - char result[sizeof(crc)]; + u32 crc = ~(u32)0; + char result[BTRFS_CSUM_SIZE]; - /* - * The super_block structure does not span the whole - * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space - * is filled with zeros and is included in the checksum. - */ - crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, - crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, result); + /* + * The super_block structure does not span the whole + * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is + * filled with zeros and is included in the checksum. + */ + crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, + crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + btrfs_csum_final(crc, result); - if (memcmp(raw_disk_sb, result, sizeof(result))) - return 1; - } + if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb))) + return 1; return 0; } @@ -2611,6 +2601,7 @@ int open_ctree(struct super_block *sb, u32 stripesize; u64 generation; u64 features; + u16 csum_type; struct btrfs_key location; struct buffer_head *bh; struct btrfs_super_block *disk_super; @@ -2820,11 +2811,10 @@ int open_ctree(struct super_block *sb, * Verify the type first, if that or the the checksum value are * corrupted, we'll find out */ - if (!btrfs_supported_super_csum(btrfs_super_csum_type( - (struct btrfs_super_block *) bh->b_data))) { + csum_type = btrfs_super_csum_type((struct btrfs_super_block *)bh->b_data); + if (!btrfs_supported_super_csum(csum_type)) { btrfs_err(fs_info, "unsupported checksum algorithm: %u", - btrfs_super_csum_type((struct btrfs_super_block *) - bh->b_data)); + csum_type); err = -EINVAL; brelse(bh); goto fail_alloc; -- cgit v1.2.3 From 6d97c6e31b553bc9f58b83ac3c4c79c17affbda8 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 3 Jun 2019 16:58:56 +0200 Subject: btrfs: add boilerplate code for directly including the crypto framework Add boilerplate code for directly including the crypto framework. This helps us flipping the switch for new algorithms. Reviewed-by: Nikolay Borisov Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 8 ++++++++ fs/btrfs/disk-io.c | 46 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a66ed58058d9..2e908c557fb2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -73,6 +73,7 @@ struct btrfs_ref; /* four bytes for CRC32 */ static const int btrfs_csum_sizes[] = { 4 }; +static const char *btrfs_csum_names[] = { "crc32c" }; #define BTRFS_EMPTY_DIR_SIZE 0 @@ -1163,6 +1164,8 @@ struct btrfs_fs_info { spinlock_t swapfile_pins_lock; struct rb_root swapfile_pins; + struct crypto_shash *csum_shash; + #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; @@ -2454,6 +2457,11 @@ static inline int btrfs_super_csum_size(const struct btrfs_super_block *s) return btrfs_csum_sizes[t]; } +static inline const char *btrfs_super_csum_name(u16 csum_type) +{ + /* csum type is validated at mount time */ + return btrfs_csum_names[csum_type]; +} /* * The leaf data grows from end-to-front in the node. diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c9ce0b002008..34222bbe4b48 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -2256,6 +2257,29 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, return 0; } +static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type) +{ + struct crypto_shash *csum_shash; + const char *csum_name = btrfs_super_csum_name(csum_type); + + csum_shash = crypto_alloc_shash(csum_name, 0, 0); + + if (IS_ERR(csum_shash)) { + btrfs_err(fs_info, "error allocating %s hash for checksum", + csum_name); + return PTR_ERR(csum_shash); + } + + fs_info->csum_shash = csum_shash; + + return 0; +} + +static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) +{ + crypto_free_shash(fs_info->csum_shash); +} + static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { @@ -2820,6 +2844,12 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } + ret = btrfs_init_csum_hash(fs_info, csum_type); + if (ret) { + err = ret; + goto fail_alloc; + } + /* * We want to check superblock checksum, the type is stored inside. * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k). @@ -2828,7 +2858,7 @@ int open_ctree(struct super_block *sb, btrfs_err(fs_info, "superblock checksum mismatch"); err = -EINVAL; brelse(bh); - goto fail_alloc; + goto fail_csum; } /* @@ -2865,11 +2895,11 @@ int open_ctree(struct super_block *sb, if (ret) { btrfs_err(fs_info, "superblock contains fatal errors"); err = -EINVAL; - goto fail_alloc; + goto fail_csum; } if (!btrfs_super_root(disk_super)) - goto fail_alloc; + goto fail_csum; /* check FS state, whether FS is broken. */ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) @@ -2891,7 +2921,7 @@ int open_ctree(struct super_block *sb, ret = btrfs_parse_options(fs_info, options, sb->s_flags); if (ret) { err = ret; - goto fail_alloc; + goto fail_csum; } features = btrfs_super_incompat_flags(disk_super) & @@ -2901,7 +2931,7 @@ int open_ctree(struct super_block *sb, "cannot mount because of unsupported optional features (%llx)", features); err = -EINVAL; - goto fail_alloc; + goto fail_csum; } features = btrfs_super_incompat_flags(disk_super); @@ -2945,7 +2975,7 @@ int open_ctree(struct super_block *sb, btrfs_err(fs_info, "unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups", nodesize, sectorsize); - goto fail_alloc; + goto fail_csum; } /* @@ -2961,7 +2991,7 @@ int open_ctree(struct super_block *sb, "cannot mount read-write because of unsupported optional features (%llx)", features); err = -EINVAL; - goto fail_alloc; + goto fail_csum; } ret = btrfs_init_workqueues(fs_info, fs_devices); @@ -3339,6 +3369,8 @@ fail_tree_roots: fail_sb_buffer: btrfs_stop_all_workers(fs_info); btrfs_free_block_groups(fs_info); +fail_csum: + btrfs_free_csum_hash(fs_info); fail_alloc: fail_iput: btrfs_mapping_tree_free(&fs_info->mapping_tree); -- cgit v1.2.3 From d5178578bcd461cc79118c7a139882350fe505aa Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 3 Jun 2019 16:58:57 +0200 Subject: btrfs: directly call into crypto framework for checksumming Currently btrfs_csum_data() relied on the crc32c() wrapper around the crypto framework for calculating the CRCs. As we have our own crypto_shash structure in the fs_info now, we can directly call into the crypto framework without going trough the wrapper. This way we can even remove the btrfs_csum_data() and btrfs_csum_final() wrappers. The module dependency on crc32c is preserved via MODULE_SOFTDEP("pre: crc32c"), which was previously provided by LIBCRC32C config option doing the same. Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/Kconfig | 3 ++- fs/btrfs/check-integrity.c | 11 +++++++---- fs/btrfs/compression.c | 17 +++++++++++------ fs/btrfs/disk-io.c | 46 +++++++++++++++++++++++----------------------- fs/btrfs/disk-io.h | 2 -- fs/btrfs/file-item.c | 18 +++++++++--------- fs/btrfs/inode.c | 23 +++++++++++++++-------- fs/btrfs/scrub.c | 30 +++++++++++++++++++++--------- fs/btrfs/super.c | 1 + 9 files changed, 89 insertions(+), 62 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 23537bc8c827..212b4a854f2c 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -2,7 +2,8 @@ config BTRFS_FS tristate "Btrfs filesystem support" - select LIBCRC32C + select CRYPTO + select CRYPTO_CRC32C select ZLIB_INFLATE select ZLIB_DEFLATE select LZO_COMPRESS diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 85774e2fa3e5..81a9731959a9 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -83,7 +83,7 @@ #include #include #include -#include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1710,9 +1710,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, char **datav, unsigned int num_pages) { struct btrfs_fs_info *fs_info = state->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct btrfs_header *h; u8 csum[BTRFS_CSUM_SIZE]; - u32 crc = ~(u32)0; unsigned int i; if (num_pages * PAGE_SIZE < state->metablock_size) @@ -1723,14 +1723,17 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state, if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE)) return 1; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + for (i = 0; i < num_pages; i++) { u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); size_t sublen = i ? PAGE_SIZE : (PAGE_SIZE - BTRFS_CSUM_SIZE); - crc = btrfs_csum_data(data, crc, sublen); + crypto_shash_update(shash, data, sublen); } - btrfs_csum_final(crc, csum); + crypto_shash_final(shash, csum); if (memcmp(csum, h->csum, state->csum_size)) return 1; diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 92291f266324..935c0c564c02 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -58,29 +59,33 @@ static int check_compressed_csum(struct btrfs_inode *inode, u64 disk_start) { struct btrfs_fs_info *fs_info = inode->root->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); int ret; struct page *page; unsigned long i; char *kaddr; - u32 csum; + u8 csum[BTRFS_CSUM_SIZE]; u8 *cb_sum = cb->sums; if (inode->flags & BTRFS_INODE_NODATASUM) return 0; + shash->tfm = fs_info->csum_shash; + for (i = 0; i < cb->nr_pages; i++) { page = cb->compressed_pages[i]; - csum = ~(u32)0; + crypto_shash_init(shash); kaddr = kmap_atomic(page); - csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE); - btrfs_csum_final(csum, (u8 *)&csum); + crypto_shash_update(shash, kaddr, PAGE_SIZE); kunmap_atomic(kaddr); + crypto_shash_final(shash, (u8 *)&csum); if (memcmp(&csum, cb_sum, csum_size)) { - btrfs_print_data_csum_error(inode, disk_start, csum, - *(u32 *)cb_sum, cb->mirror_num); + btrfs_print_data_csum_error(inode, disk_start, + *(u32 *)csum, *(u32 *)cb_sum, + cb->mirror_num); ret = -EIO; goto fail; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 34222bbe4b48..6c7dc24d4031 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -246,16 +246,6 @@ out: return em; } -u32 btrfs_csum_data(const char *data, u32 seed, size_t len) -{ - return crc32c(seed, data, len); -} - -void btrfs_csum_final(u32 crc, u8 *result) -{ - put_unaligned_le32(~crc, result); -} - /* * Compute the csum of a btree block and store the result to provided buffer. * @@ -263,6 +253,8 @@ void btrfs_csum_final(u32 crc, u8 *result) */ static int csum_tree_block(struct extent_buffer *buf, u8 *result) { + struct btrfs_fs_info *fs_info = buf->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@ -270,9 +262,12 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result) unsigned long map_start; unsigned long map_len; int err; - u32 crc = ~(u32)0; + + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); len = buf->len - offset; + while (len > 0) { /* * Note: we don't need to check for the err == 1 case here, as @@ -285,14 +280,13 @@ static int csum_tree_block(struct extent_buffer *buf, u8 *result) if (WARN_ON(err)) return err; cur_len = min(len, map_len - (offset - map_start)); - crc = btrfs_csum_data(kaddr + offset - map_start, - crc, cur_len); + crypto_shash_update(shash, kaddr + offset - map_start, cur_len); len -= cur_len; offset += cur_len; } memset(result, 0, BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, result); + crypto_shash_final(shash, result); return 0; } @@ -372,17 +366,20 @@ static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, { struct btrfs_super_block *disk_sb = (struct btrfs_super_block *)raw_disk_sb; - u32 crc = ~(u32)0; char result[BTRFS_CSUM_SIZE]; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); + + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); /* * The super_block structure does not span the whole * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space is * filled with zeros and is included in the checksum. */ - crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, - crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, result); + crypto_shash_update(shash, raw_disk_sb + BTRFS_CSUM_SIZE, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + crypto_shash_final(shash, result); if (memcmp(disk_sb->csum, result, btrfs_super_csum_size(disk_sb))) return 1; @@ -3512,17 +3509,20 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) static int write_dev_supers(struct btrfs_device *device, struct btrfs_super_block *sb, int max_mirrors) { + struct btrfs_fs_info *fs_info = device->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct buffer_head *bh; int i; int ret; int errors = 0; - u32 crc; u64 bytenr; int op_flags; if (max_mirrors == 0) max_mirrors = BTRFS_SUPER_MIRROR_MAX; + shash->tfm = fs_info->csum_shash; + for (i = 0; i < max_mirrors; i++) { bytenr = btrfs_sb_offset(i); if (bytenr + BTRFS_SUPER_INFO_SIZE >= @@ -3531,10 +3531,10 @@ static int write_dev_supers(struct btrfs_device *device, btrfs_set_super_bytenr(sb, bytenr); - crc = ~(u32)0; - crc = btrfs_csum_data((const char *)sb + BTRFS_CSUM_SIZE, crc, - BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); - btrfs_csum_final(crc, sb->csum); + crypto_shash_init(shash); + crypto_shash_update(shash, (const char *)sb + BTRFS_CSUM_SIZE, + BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE); + crypto_shash_final(shash, sb->csum); /* One reference for us, and we leave it for the caller */ bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE, diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0161aa1ea0b..e80f7c45a307 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -115,8 +115,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid, int level, struct btrfs_key *first_key); -u32 btrfs_csum_data(const char *data, u32 seed, size_t len); -void btrfs_csum_final(u32 crc, u8 *result); blk_status_t btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata); blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index de89fd1310a6..1a599f50837b 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -432,6 +433,7 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, u64 file_start, int contig) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); struct btrfs_ordered_sum *sums; struct btrfs_ordered_extent *ordered = NULL; char *data; @@ -465,6 +467,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, sums->bytenr = (u64)bio->bi_iter.bi_sector << 9; index = 0; + shash->tfm = fs_info->csum_shash; + bio_for_each_segment(bvec, bio, iter) { if (!contig) offset = page_offset(bvec.bv_page) + bvec.bv_offset; @@ -479,8 +483,6 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, - 1); for (i = 0; i < nr_sectors; i++) { - u32 tmp; - if (offset >= ordered->file_offset + ordered->len || offset < ordered->file_offset) { unsigned long bytes_left; @@ -506,15 +508,13 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio, index = 0; } - memset(&sums->sums[index], 0xff, csum_size); + crypto_shash_init(shash); data = kmap_atomic(bvec.bv_page); - tmp = btrfs_csum_data(data + bvec.bv_offset - + (i * fs_info->sectorsize), - *(u32 *)&sums->sums[index], - fs_info->sectorsize); + crypto_shash_update(shash, data + bvec.bv_offset + + (i * fs_info->sectorsize), + fs_info->sectorsize); kunmap_atomic(data); - btrfs_csum_final(tmp, - (char *)(sums->sums + index)); + crypto_shash_final(shash, (char *)(sums->sums + index)); index += csum_size; offset += fs_info->sectorsize; this_sum_bytes += fs_info->sectorsize; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9137bafc9376..3d356a0f8990 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3203,23 +3203,30 @@ static int __readpage_endio_check(struct inode *inode, int icsum, struct page *page, int pgoff, u64 start, size_t len) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); char *kaddr; - u32 csum_expected; - u32 csum = ~(u32)0; + u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); + u8 *csum_expected; + u8 csum[BTRFS_CSUM_SIZE]; - csum_expected = *(((u32 *)io_bio->csum) + icsum); + csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size; kaddr = kmap_atomic(page); - csum = btrfs_csum_data(kaddr + pgoff, csum, len); - btrfs_csum_final(csum, (u8 *)&csum); - if (csum != csum_expected) + shash->tfm = fs_info->csum_shash; + + crypto_shash_init(shash); + crypto_shash_update(shash, kaddr + pgoff, len); + crypto_shash_final(shash, csum); + + if (memcmp(csum, csum_expected, csum_size)) goto zeroit; kunmap_atomic(kaddr); return 0; zeroit: - btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, - io_bio->mirror_num); + btrfs_print_data_csum_error(BTRFS_I(inode), start, *(u32 *)csum, + *(u32 *)csum_expected, io_bio->mirror_num); memset(kaddr + pgoff, 1, len); flush_dcache_page(page); kunmap_atomic(kaddr); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 0e77bffd2a5a..9f0297d529d4 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "ctree.h" #include "volumes.h" #include "disk-io.h" @@ -1787,11 +1788,12 @@ static int scrub_checksum(struct scrub_block *sblock) static int scrub_checksum_data(struct scrub_block *sblock) { struct scrub_ctx *sctx = sblock->sctx; + struct btrfs_fs_info *fs_info = sctx->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); u8 csum[BTRFS_CSUM_SIZE]; u8 *on_disk_csum; struct page *page; void *buffer; - u32 crc = ~(u32)0; u64 len; int index; @@ -1799,6 +1801,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) if (!sblock->pagev[0]->have_csum) return 0; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + on_disk_csum = sblock->pagev[0]->csum; page = sblock->pagev[0]->page; buffer = kmap_atomic(page); @@ -1808,7 +1813,7 @@ static int scrub_checksum_data(struct scrub_block *sblock) for (;;) { u64 l = min_t(u64, len, PAGE_SIZE); - crc = btrfs_csum_data(buffer, crc, l); + crypto_shash_update(shash, buffer, l); kunmap_atomic(buffer); len -= l; if (len == 0) @@ -1820,7 +1825,7 @@ static int scrub_checksum_data(struct scrub_block *sblock) buffer = kmap_atomic(page); } - btrfs_csum_final(crc, csum); + crypto_shash_final(shash, csum); if (memcmp(csum, on_disk_csum, sctx->csum_size)) sblock->checksum_error = 1; @@ -1832,16 +1837,19 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) struct scrub_ctx *sctx = sblock->sctx; struct btrfs_header *h; struct btrfs_fs_info *fs_info = sctx->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); u8 calculated_csum[BTRFS_CSUM_SIZE]; u8 on_disk_csum[BTRFS_CSUM_SIZE]; struct page *page; void *mapped_buffer; u64 mapped_size; void *p; - u32 crc = ~(u32)0; u64 len; int index; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + BUG_ON(sblock->page_count < 1); page = sblock->pagev[0]->page; mapped_buffer = kmap_atomic(page); @@ -1875,7 +1883,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) for (;;) { u64 l = min_t(u64, len, mapped_size); - crc = btrfs_csum_data(p, crc, l); + crypto_shash_update(shash, p, l); kunmap_atomic(mapped_buffer); len -= l; if (len == 0) @@ -1889,7 +1897,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) p = mapped_buffer; } - btrfs_csum_final(crc, calculated_csum); + crypto_shash_final(shash, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) sblock->checksum_error = 1; @@ -1900,18 +1908,22 @@ static int scrub_checksum_super(struct scrub_block *sblock) { struct btrfs_super_block *s; struct scrub_ctx *sctx = sblock->sctx; + struct btrfs_fs_info *fs_info = sctx->fs_info; + SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); u8 calculated_csum[BTRFS_CSUM_SIZE]; u8 on_disk_csum[BTRFS_CSUM_SIZE]; struct page *page; void *mapped_buffer; u64 mapped_size; void *p; - u32 crc = ~(u32)0; int fail_gen = 0; int fail_cor = 0; u64 len; int index; + shash->tfm = fs_info->csum_shash; + crypto_shash_init(shash); + BUG_ON(sblock->page_count < 1); page = sblock->pagev[0]->page; mapped_buffer = kmap_atomic(page); @@ -1934,7 +1946,7 @@ static int scrub_checksum_super(struct scrub_block *sblock) for (;;) { u64 l = min_t(u64, len, mapped_size); - crc = btrfs_csum_data(p, crc, l); + crypto_shash_update(shash, p, l); kunmap_atomic(mapped_buffer); len -= l; if (len == 0) @@ -1948,7 +1960,7 @@ static int scrub_checksum_super(struct scrub_block *sblock) p = mapped_buffer; } - btrfs_csum_final(crc, calculated_csum); + crypto_shash_final(shash, calculated_csum); if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) ++fail_cor; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 526dbae5c4cf..6e196b8a0820 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2464,3 +2464,4 @@ late_initcall(init_btrfs_fs); module_exit(exit_btrfs_fs) MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crc32c"); -- cgit v1.2.3 From 9e967495e0e0ae8bb08f52aa71b29affc7075d31 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 22 Apr 2019 16:44:09 +0100 Subject: Btrfs: prevent send failures and crashes due to concurrent relocation Send always operates on read-only trees and always expected that while it is in progress, nothing changes in those trees. Due to that expectation and the fact that send is a read-only operation, it operates on commit roots and does not hold transaction handles. However relocation can COW nodes and leafs from read-only trees, which can cause unexpected failures and crashes (hitting BUG_ONs). while send using a node/leaf, it gets COWed, the transaction used to COW it is committed, a new transaction starts, the extent previously used for that node/leaf gets allocated, possibly for another tree, and the respective extent buffer' content changes while send is still using it. When this happens send normally fails with EIO being returned to user space and messages like the following are found in dmesg/syslog: [ 3408.699121] BTRFS error (device sdc): parent transid verify failed on 58703872 wanted 250 found 253 [ 3441.523123] BTRFS error (device sdc): did not find backref in send_root. inode=63211, offset=0, disk_byte=5222825984 found extent=5222825984 Other times, less often, we hit a BUG_ON() because an extent buffer that send is using used to be a node, and while send is still using it, it got COWed and got reused as a leaf while send is still using, producing the following trace: [ 3478.466280] ------------[ cut here ]------------ [ 3478.466282] kernel BUG at fs/btrfs/ctree.c:1806! [ 3478.466965] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI [ 3478.467635] CPU: 0 PID: 2165 Comm: btrfs Not tainted 5.0.0-btrfs-next-46 #1 [ 3478.468311] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [ 3478.469681] RIP: 0010:read_node_slot+0x122/0x130 [btrfs] (...) [ 3478.471758] RSP: 0018:ffffa437826bfaa0 EFLAGS: 00010246 [ 3478.472457] RAX: ffff961416ed7000 RBX: 000000000000003d RCX: 0000000000000002 [ 3478.473151] RDX: 000000000000003d RSI: ffff96141e387408 RDI: ffff961599b30000 [ 3478.473837] RBP: ffffa437826bfb8e R08: 0000000000000001 R09: ffffa437826bfb8e [ 3478.474515] R10: ffffa437826bfa70 R11: 0000000000000000 R12: ffff9614385c8708 [ 3478.475186] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 [ 3478.475840] FS: 00007f8e0e9cc8c0(0000) GS:ffff9615b6a00000(0000) knlGS:0000000000000000 [ 3478.476489] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3478.477127] CR2: 00007f98b67a056e CR3: 0000000005df6005 CR4: 00000000003606f0 [ 3478.477762] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 3478.478385] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 3478.479003] Call Trace: [ 3478.479600] ? do_raw_spin_unlock+0x49/0xc0 [ 3478.480202] tree_advance+0x173/0x1d0 [btrfs] [ 3478.480810] btrfs_compare_trees+0x30c/0x690 [btrfs] [ 3478.481388] ? process_extent+0x1280/0x1280 [btrfs] [ 3478.481954] btrfs_ioctl_send+0x1037/0x1270 [btrfs] [ 3478.482510] _btrfs_ioctl_send+0x80/0x110 [btrfs] [ 3478.483062] btrfs_ioctl+0x13fe/0x3120 [btrfs] [ 3478.483581] ? rq_clock_task+0x2e/0x60 [ 3478.484086] ? wake_up_new_task+0x1f3/0x370 [ 3478.484582] ? do_vfs_ioctl+0xa2/0x6f0 [ 3478.485075] ? btrfs_ioctl_get_supported_features+0x30/0x30 [btrfs] [ 3478.485552] do_vfs_ioctl+0xa2/0x6f0 [ 3478.486016] ? __fget+0x113/0x200 [ 3478.486467] ksys_ioctl+0x70/0x80 [ 3478.486911] __x64_sys_ioctl+0x16/0x20 [ 3478.487337] do_syscall_64+0x60/0x1b0 [ 3478.487751] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 3478.488159] RIP: 0033:0x7f8e0d7d4dd7 (...) [ 3478.489349] RSP: 002b:00007ffcf6fb4908 EFLAGS: 00000202 ORIG_RAX: 0000000000000010 [ 3478.489742] RAX: ffffffffffffffda RBX: 0000000000000105 RCX: 00007f8e0d7d4dd7 [ 3478.490142] RDX: 00007ffcf6fb4990 RSI: 0000000040489426 RDI: 0000000000000005 [ 3478.490548] RBP: 0000000000000005 R08: 00007f8e0d6f3700 R09: 00007f8e0d6f3700 [ 3478.490953] R10: 00007f8e0d6f39d0 R11: 0000000000000202 R12: 0000000000000005 [ 3478.491343] R13: 00005624e0780020 R14: 0000000000000000 R15: 0000000000000001 (...) [ 3478.493352] ---[ end trace d5f537302be4f8c8 ]--- Another possibility, much less likely to happen, is that send will not fail but the contents of the stream it produces may not be correct. To avoid this, do not allow send and relocation (balance) to run in parallel. In the long term the goal is to allow for both to be able to run concurrently without any problems, but that will take a significant effort in development and testing. Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 7 +++++++ fs/btrfs/disk-io.c | 2 ++ fs/btrfs/send.c | 14 ++++++++++++++ fs/btrfs/volumes.c | 8 ++++++++ 4 files changed, 31 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 31198499f175..02a29516dacf 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -793,6 +793,7 @@ enum { /* * Indicate that balance has been set up from the ioctl and is in the * main phase. The fs_info::balance_ctl is initialized. + * Set and cleared while holding fs_info::balance_mutex. */ BTRFS_FS_BALANCE_RUNNING, @@ -1175,6 +1176,12 @@ struct btrfs_fs_info { struct crypto_shash *csum_shash; + /* + * Number of send operations in progress. + * Updated while holding fs_info::balance_mutex. + */ + int send_in_progress; + #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; struct rb_root block_tree; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6c7dc24d4031..41a2bd2e0c56 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2809,6 +2809,8 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->swapfile_pins_lock); fs_info->swapfile_pins = RB_ROOT; + fs_info->send_in_progress = 0; + ret = btrfs_alloc_stripe_hash_table(fs_info); if (ret) { err = ret; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 49edcc709a99..69b59bf75882 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -6929,9 +6929,23 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) if (ret) goto out; + mutex_lock(&fs_info->balance_mutex); + if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { + mutex_unlock(&fs_info->balance_mutex); + btrfs_warn_rl(fs_info, + "cannot run send because a balance operation is in progress"); + ret = -EAGAIN; + goto out; + } + fs_info->send_in_progress++; + mutex_unlock(&fs_info->balance_mutex); + current->journal_info = BTRFS_SEND_TRANS_STUB; ret = send_subvol(sctx); current->journal_info = NULL; + mutex_lock(&fs_info->balance_mutex); + fs_info->send_in_progress--; + mutex_unlock(&fs_info->balance_mutex); if (ret < 0) goto out; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9e5167a0e406..41813813f840 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4161,6 +4161,14 @@ int btrfs_balance(struct btrfs_fs_info *fs_info, btrfs_bg_type_to_raid_name(data_target)); } + if (fs_info->send_in_progress) { + btrfs_warn_rl(fs_info, +"cannot run balance while send operations are in progress (%d in progress)", + fs_info->send_in_progress); + ret = -EAGAIN; + goto out; + } + ret = insert_balance_item(fs_info, bctl); if (ret && ret != -EEXIST) goto out; -- cgit v1.2.3