diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r-- | fs/btrfs/scrub.c | 230 |
1 files changed, 99 insertions, 131 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 8f6ceea33969..2e9a322773f2 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -39,21 +39,20 @@ struct scrub_block; struct scrub_ctx; /* - * the following three values only influence the performance. + * The following three values only influence the performance. + * * The last one configures the number of parallel and outstanding I/O - * operations. The first two values configure an upper limit for the number + * operations. The first one configures an upper limit for the number * of (dynamically allocated) pages that are added to a bio. */ -#define SCRUB_PAGES_PER_RD_BIO 32 /* 128k per bio */ -#define SCRUB_PAGES_PER_WR_BIO 32 /* 128k per bio */ -#define SCRUB_BIOS_PER_SCTX 64 /* 8MB per device in flight */ +#define SCRUB_PAGES_PER_BIO 32 /* 128KiB per bio for x86 */ +#define SCRUB_BIOS_PER_SCTX 64 /* 8MiB per device in flight for x86 */ /* - * the following value times PAGE_SIZE needs to be large enough to match the + * The following value times PAGE_SIZE needs to be large enough to match the * largest node/leaf/sector size that shall be supported. - * Values larger than BTRFS_STRIPE_LEN are not supported. */ -#define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ +#define SCRUB_MAX_PAGES_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K) struct scrub_recover { refcount_t refs; @@ -88,11 +87,7 @@ struct scrub_bio { blk_status_t status; u64 logical; u64 physical; -#if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO - struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO]; -#else - struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO]; -#endif + struct scrub_page *pagev[SCRUB_PAGES_PER_BIO]; int page_count; int next_free; struct btrfs_work work; @@ -163,7 +158,7 @@ struct scrub_ctx { struct list_head csum_list; atomic_t cancel_req; int readonly; - int pages_per_rd_bio; + int pages_per_bio; /* State of IO submission throttling affecting the associated device */ ktime_t throttle_deadline; @@ -174,7 +169,6 @@ struct scrub_ctx { struct scrub_bio *wr_curr_bio; struct mutex wr_lock; - int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ struct btrfs_device *wr_tgtdev; bool flush_all_writes; @@ -578,7 +572,7 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx( goto nomem; refcount_set(&sctx->refs, 1); sctx->is_dev_replace = is_dev_replace; - sctx->pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO; + sctx->pages_per_bio = SCRUB_PAGES_PER_BIO; sctx->curr = -1; sctx->fs_info = fs_info; INIT_LIST_HEAD(&sctx->csum_list); @@ -616,7 +610,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx( sctx->wr_curr_bio = NULL; if (is_dev_replace) { WARN_ON(!fs_info->dev_replace.tgtdev); - sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; sctx->flush_all_writes = false; } @@ -758,7 +751,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) eb = path->nodes[0]; ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); - item_size = btrfs_item_size_nr(eb, path->slots[0]); + item_size = btrfs_item_size(eb, path->slots[0]); if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { do { @@ -852,8 +845,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) have_csum = sblock_to_check->pagev[0]->have_csum; dev = sblock_to_check->pagev[0]->dev; - if (btrfs_is_zoned(fs_info) && !sctx->is_dev_replace) - return btrfs_repair_one_zone(fs_info, logical); + if (!sctx->is_dev_replace && btrfs_repair_one_zone(fs_info, logical)) + return 0; /* * We must use GFP_NOFS because the scrub task might be waiting for a @@ -1313,7 +1306,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, recover->bioc = bioc; recover->map_length = mapped_length; - BUG_ON(page_index >= SCRUB_MAX_PAGES_PER_BLOCK); + ASSERT(page_index < SCRUB_MAX_PAGES_PER_BLOCK); nmirrors = min(scrub_nr_raid_mirrors(bioc), BTRFS_MAX_MIRRORS); @@ -1675,7 +1668,7 @@ again: sbio->dev = sctx->wr_tgtdev; bio = sbio->bio; if (!bio) { - bio = btrfs_bio_alloc(sctx->pages_per_wr_bio); + bio = btrfs_bio_alloc(sctx->pages_per_bio); sbio->bio = bio; } @@ -1708,7 +1701,7 @@ again: sbio->pagev[sbio->page_count] = spage; scrub_page_get(spage); sbio->page_count++; - if (sbio->page_count == sctx->pages_per_wr_bio) + if (sbio->page_count == sctx->pages_per_bio) scrub_wr_submit(sctx); mutex_unlock(&sctx->wr_lock); @@ -1755,7 +1748,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) struct scrub_ctx *sctx = sbio->sctx; int i; - WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); + ASSERT(sbio->page_count <= SCRUB_PAGES_PER_BIO); if (sbio->status) { struct btrfs_dev_replace *dev_replace = &sbio->sctx->fs_info->dev_replace; @@ -2101,7 +2094,7 @@ again: sbio->dev = spage->dev; bio = sbio->bio; if (!bio) { - bio = btrfs_bio_alloc(sctx->pages_per_rd_bio); + bio = btrfs_bio_alloc(sctx->pages_per_bio); sbio->bio = bio; } @@ -2135,7 +2128,7 @@ again: scrub_block_get(sblock); /* one for the page added to the bio */ atomic_inc(&sblock->outstanding_pages); sbio->page_count++; - if (sbio->page_count == sctx->pages_per_rd_bio) + if (sbio->page_count == sctx->pages_per_bio) scrub_submit(sctx); return 0; @@ -2297,7 +2290,7 @@ leave_nomem: scrub_block_put(sblock); return -ENOMEM; } - BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); + ASSERT(index < SCRUB_MAX_PAGES_PER_BLOCK); scrub_page_get(spage); sblock->pagev[index] = spage; spage->sblock = sblock; @@ -2369,7 +2362,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) struct scrub_ctx *sctx = sbio->sctx; int i; - BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); + ASSERT(sbio->page_count <= SCRUB_PAGES_PER_BIO); if (sbio->status) { for (i = 0; i < sbio->page_count; i++) { struct scrub_page *spage = sbio->pagev[i]; @@ -2631,7 +2624,7 @@ leave_nomem: scrub_block_put(sblock); return -ENOMEM; } - BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); + ASSERT(index < SCRUB_MAX_PAGES_PER_BLOCK); /* For scrub block */ scrub_page_get(spage); sblock->pagev[index] = spage; @@ -2892,15 +2885,15 @@ static void scrub_parity_put(struct scrub_parity *sparity) static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, struct map_lookup *map, struct btrfs_device *sdev, - struct btrfs_path *path, u64 logic_start, u64 logic_end) { struct btrfs_fs_info *fs_info = sctx->fs_info; - struct btrfs_root *root = fs_info->extent_root; - struct btrfs_root *csum_root = fs_info->csum_root; + struct btrfs_root *root = btrfs_extent_root(fs_info, logic_start); + struct btrfs_root *csum_root; struct btrfs_extent_item *extent; struct btrfs_io_context *bioc = NULL; + struct btrfs_path *path; u64 flags; int ret; int slot; @@ -2919,6 +2912,16 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, int extent_mirror_num; int stop_loop = 0; + path = btrfs_alloc_path(); + if (!path) { + spin_lock(&sctx->stat_lock); + sctx->stat.malloc_errors++; + spin_unlock(&sctx->stat_lock); + return -ENOMEM; + } + path->search_commit_root = 1; + path->skip_locking = 1; + ASSERT(map->stripe_len <= U32_MAX); nsectors = map->stripe_len >> fs_info->sectorsize_bits; bitmap_len = scrub_calc_parity_bitmap_len(nsectors); @@ -2928,6 +2931,7 @@ static noinline_for_stack int scrub_raid56_parity(struct scrub_ctx *sctx, spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; spin_unlock(&sctx->stat_lock); + btrfs_free_path(path); return -ENOMEM; } @@ -3060,6 +3064,7 @@ again: extent_dev = bioc->stripes[0].dev; btrfs_put_bioc(bioc); + csum_root = btrfs_csum_root(fs_info, extent_logical); ret = btrfs_lookup_csums_range(csum_root, extent_logical, extent_logical + extent_len - 1, @@ -3116,7 +3121,7 @@ out: scrub_wr_submit(sctx); mutex_unlock(&sctx->wr_lock); - btrfs_release_path(path); + btrfs_free_path(path); return ret < 0 ? ret : 0; } @@ -3161,17 +3166,18 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, } static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, + struct btrfs_block_group *bg, struct map_lookup *map, struct btrfs_device *scrub_dev, - int num, u64 base, u64 length, - struct btrfs_block_group *cache) + int stripe_index, u64 dev_extent_len) { - struct btrfs_path *path, *ppath; + struct btrfs_path *path; struct btrfs_fs_info *fs_info = sctx->fs_info; - struct btrfs_root *root = fs_info->extent_root; - struct btrfs_root *csum_root = fs_info->csum_root; + struct btrfs_root *root; + struct btrfs_root *csum_root; struct btrfs_extent_item *extent; struct blk_plug plug; + const u64 chunk_logical = bg->start; u64 flags; int ret; int slot; @@ -3183,10 +3189,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, u64 physical_end; u64 generation; int mirror_num; - struct reada_control *reada1; - struct reada_control *reada2; struct btrfs_key key; - struct btrfs_key key_end; u64 increment = map->stripe_len; u64 offset; u64 extent_logical; @@ -3202,25 +3205,26 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, int extent_mirror_num; int stop_loop = 0; - physical = map->stripes[num].physical; + physical = map->stripes[stripe_index].physical; offset = 0; - nstripes = div64_u64(length, map->stripe_len); + nstripes = div64_u64(dev_extent_len, map->stripe_len); mirror_num = 1; increment = map->stripe_len; if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - offset = map->stripe_len * num; + offset = map->stripe_len * stripe_index; increment = map->stripe_len * map->num_stripes; } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { int factor = map->num_stripes / map->sub_stripes; - offset = map->stripe_len * (num / map->sub_stripes); + offset = map->stripe_len * (stripe_index / map->sub_stripes); increment = map->stripe_len * factor; - mirror_num = num % map->sub_stripes + 1; + mirror_num = stripe_index % map->sub_stripes + 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) { - mirror_num = num % map->num_stripes + 1; + mirror_num = stripe_index % map->num_stripes + 1; } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - mirror_num = num % map->num_stripes + 1; + mirror_num = stripe_index % map->num_stripes + 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - get_raid56_logic_offset(physical, num, map, &offset, NULL); + get_raid56_logic_offset(physical, stripe_index, map, &offset, + NULL); increment = map->stripe_len * nr_data_stripes(map); } @@ -3228,12 +3232,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, if (!path) return -ENOMEM; - ppath = btrfs_alloc_path(); - if (!ppath) { - btrfs_free_path(path); - return -ENOMEM; - } - /* * work on commit root. The related disk blocks are static as * long as COW is applied. This means, it is save to rewrite @@ -3241,20 +3239,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, */ path->search_commit_root = 1; path->skip_locking = 1; + path->reada = READA_FORWARD; - ppath->search_commit_root = 1; - ppath->skip_locking = 1; - /* - * trigger the readahead for extent tree csum tree and wait for - * completion. During readahead, the scrub is officially paused - * to not hold off transaction commits - */ - logical = base + offset; + logical = chunk_logical + offset; physical_end = physical + nstripes * map->stripe_len; if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - get_raid56_logic_offset(physical_end, num, + get_raid56_logic_offset(physical_end, stripe_index, map, &logic_end, NULL); - logic_end += base; + logic_end += chunk_logical; } else { logic_end = logical + increment * nstripes; } @@ -3262,32 +3254,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, atomic_read(&sctx->bios_in_flight) == 0); scrub_blocked_if_needed(fs_info); - /* FIXME it might be better to start readahead at commit root */ - key.objectid = logical; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = (u64)0; - key_end.objectid = logic_end; - key_end.type = BTRFS_METADATA_ITEM_KEY; - key_end.offset = (u64)-1; - reada1 = btrfs_reada_add(root, &key, &key_end); - - if (cache->flags & BTRFS_BLOCK_GROUP_DATA) { - key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - key.type = BTRFS_EXTENT_CSUM_KEY; - key.offset = logical; - key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID; - key_end.type = BTRFS_EXTENT_CSUM_KEY; - key_end.offset = logic_end; - reada2 = btrfs_reada_add(csum_root, &key, &key_end); - } else { - reada2 = NULL; - } - - if (!IS_ERR(reada1)) - btrfs_reada_wait(reada1); - if (!IS_ERR_OR_NULL(reada2)) - btrfs_reada_wait(reada2); - + root = btrfs_extent_root(fs_info, logical); + csum_root = btrfs_csum_root(fs_info, logical); /* * collect all data csums for the stripe to avoid seeking during @@ -3333,16 +3301,16 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, } if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - ret = get_raid56_logic_offset(physical, num, map, - &logical, + ret = get_raid56_logic_offset(physical, stripe_index, + map, &logical, &stripe_logical); - logical += base; + logical += chunk_logical; if (ret) { /* it is parity strip */ - stripe_logical += base; + stripe_logical += chunk_logical; stripe_end = stripe_logical + increment; ret = scrub_raid56_parity(sctx, map, scrub_dev, - ppath, stripe_logical, + stripe_logical, stripe_end); if (ret) goto out; @@ -3419,13 +3387,13 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, * Continuing would prevent reusing its device extents * for new block groups for a long time. */ - spin_lock(&cache->lock); - if (cache->removed) { - spin_unlock(&cache->lock); + spin_lock(&bg->lock); + if (bg->removed) { + spin_unlock(&bg->lock); ret = 0; goto out; } - spin_unlock(&cache->lock); + spin_unlock(&bg->lock); extent = btrfs_item_ptr(l, slot, struct btrfs_extent_item); @@ -3504,16 +3472,16 @@ again: loop: physical += map->stripe_len; ret = get_raid56_logic_offset(physical, - num, map, &logical, - &stripe_logical); - logical += base; + stripe_index, map, + &logical, &stripe_logical); + logical += chunk_logical; if (ret && physical < physical_end) { - stripe_logical += base; + stripe_logical += chunk_logical; stripe_end = stripe_logical + increment; ret = scrub_raid56_parity(sctx, - map, scrub_dev, ppath, + map, scrub_dev, stripe_logical, stripe_end); if (ret) @@ -3543,8 +3511,8 @@ skip: physical += map->stripe_len; spin_lock(&sctx->stat_lock); if (stop_loop) - sctx->stat.last_physical = map->stripes[num].physical + - length; + sctx->stat.last_physical = map->stripes[stripe_index].physical + + dev_extent_len; else sctx->stat.last_physical = physical; spin_unlock(&sctx->stat_lock); @@ -3560,14 +3528,14 @@ out: blk_finish_plug(&plug); btrfs_free_path(path); - btrfs_free_path(ppath); if (sctx->is_dev_replace && ret >= 0) { int ret2; - ret2 = sync_write_pointer_for_zoned(sctx, base + offset, - map->stripes[num].physical, - physical_end); + ret2 = sync_write_pointer_for_zoned(sctx, + chunk_logical + offset, + map->stripes[stripe_index].physical, + physical_end); if (ret2) ret = ret2; } @@ -3576,10 +3544,10 @@ out: } static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, + struct btrfs_block_group *bg, struct btrfs_device *scrub_dev, - u64 chunk_offset, u64 length, u64 dev_offset, - struct btrfs_block_group *cache) + u64 dev_extent_len) { struct btrfs_fs_info *fs_info = sctx->fs_info; struct extent_map_tree *map_tree = &fs_info->mapping_tree; @@ -3589,7 +3557,7 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, int ret = 0; read_lock(&map_tree->lock); - em = lookup_extent_mapping(map_tree, chunk_offset, 1); + em = lookup_extent_mapping(map_tree, bg->start, bg->length); read_unlock(&map_tree->lock); if (!em) { @@ -3597,26 +3565,24 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, * Might have been an unused block group deleted by the cleaner * kthread or relocation. */ - spin_lock(&cache->lock); - if (!cache->removed) + spin_lock(&bg->lock); + if (!bg->removed) ret = -EINVAL; - spin_unlock(&cache->lock); + spin_unlock(&bg->lock); return ret; } - - map = em->map_lookup; - if (em->start != chunk_offset) + if (em->start != bg->start) goto out; - - if (em->len < length) + if (em->len < dev_extent_len) goto out; + map = em->map_lookup; for (i = 0; i < map->num_stripes; ++i) { if (map->stripes[i].dev->bdev == scrub_dev->bdev && map->stripes[i].physical == dev_offset) { - ret = scrub_stripe(sctx, map, scrub_dev, i, - chunk_offset, length, cache); + ret = scrub_stripe(sctx, bg, map, scrub_dev, i, + dev_extent_len); if (ret) goto out; } @@ -3654,7 +3620,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, struct btrfs_path *path; struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_root *root = fs_info->dev_root; - u64 length; u64 chunk_offset; int ret = 0; int ro_set; @@ -3678,6 +3643,8 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, key.type = BTRFS_DEV_EXTENT_KEY; while (1) { + u64 dev_extent_len; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) break; @@ -3714,9 +3681,9 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, break; dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); - length = btrfs_dev_extent_length(l, dev_extent); + dev_extent_len = btrfs_dev_extent_length(l, dev_extent); - if (found_key.offset + length <= start) + if (found_key.offset + dev_extent_len <= start) goto skip; chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); @@ -3850,13 +3817,14 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_off(fs_info); down_write(&dev_replace->rwsem); - dev_replace->cursor_right = found_key.offset + length; + dev_replace->cursor_right = found_key.offset + dev_extent_len; dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; up_write(&dev_replace->rwsem); - ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, - found_key.offset, cache); + ASSERT(cache->start == chunk_offset); + ret = scrub_chunk(sctx, cache, scrub_dev, found_key.offset, + dev_extent_len); /* * flush, submit all pending read and write bios, afterwards @@ -3937,7 +3905,7 @@ skip_unfreeze: break; } skip: - key.offset = found_key.offset + length; + key.offset = found_key.offset + dev_extent_len; btrfs_release_path(path); } |