diff options
author | Qu Wenruo <wqu@suse.com> | 2022-08-08 08:45:40 +0300 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2022-09-26 13:27:55 +0300 |
commit | f3e01e0e3c1f929d2ac39c335b024054b55e445a (patch) | |
tree | 13cc5efd995b60d75cae4fd5d04fcf9023281335 | |
parent | 5dd3d8e4680be763bdb34300d507a55cca0ec86e (diff) | |
download | linux-f3e01e0e3c1f929d2ac39c335b024054b55e445a.tar.xz |
btrfs: scrub: introduce scrub_block::pages for more efficient memory usage for subpage
[BACKGROUND]
Currently for scrub, we allocate one page for one sector, this is fine
for PAGE_SIZE == sectorsize support, but can waste extra memory for
subpage support.
[CODE CHANGE]
Make scrub_block contain all the pages, so if we're scrubbing an extent
sized 64K, and our page size is also 64K, we only need to allocate one
page.
[LIFESPAN CHANGE]
Since now scrub_sector no longer holds a page, but is using
scrub_block::pages[] instead, we have to ensure scrub_block has a longer
lifespan for write bio. The lifespan for read bio is already large
enough.
Now scrub_block will only be released after the write bio finished.
[COMING NEXT]
Currently we only added scrub_block::pages[] for this purpose, but
scrub_sector is still utilizing the old scrub_sector::page.
The switch will happen in the next patch.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/scrub.c | 138 |
1 files changed, 116 insertions, 22 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2b3bf798730c..ee4ba7e54efd 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -54,6 +54,8 @@ struct scrub_ctx; */ #define SCRUB_MAX_SECTORS_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K) +#define SCRUB_MAX_PAGES (DIV_ROUND_UP(BTRFS_MAX_METADATA_BLOCKSIZE, PAGE_SIZE)) + struct scrub_recover { refcount_t refs; struct btrfs_io_context *bioc; @@ -94,8 +96,18 @@ struct scrub_bio { }; struct scrub_block { + /* + * Each page will have its page::private used to record the logical + * bytenr. + */ + struct page *pages[SCRUB_MAX_PAGES]; struct scrub_sector *sectors[SCRUB_MAX_SECTORS_PER_BLOCK]; + /* Logical bytenr of the sblock */ + u64 logical; + /* Length of sblock in bytes */ + u32 len; int sector_count; + atomic_t outstanding_sectors; refcount_t refs; /* free mem on transition to zero */ struct scrub_ctx *sctx; @@ -202,7 +214,45 @@ struct full_stripe_lock { struct mutex mutex; }; -static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx) +#ifndef CONFIG_64BIT +/* This structure is for archtectures whose (void *) is smaller than u64 */ +struct scrub_page_private { + u64 logical; +}; +#endif + +static int attach_scrub_page_private(struct page *page, u64 logical) +{ +#ifdef CONFIG_64BIT + attach_page_private(page, (void *)logical); + return 0; +#else + struct scrub_page_private *spp; + + spp = kmalloc(sizeof(*spp), GFP_KERNEL); + if (!spp) + return -ENOMEM; + spp->logical = logical; + attach_page_private(page, (void *)spp); + return 0; +#endif +} + +static void detach_scrub_page_private(struct page *page) +{ +#ifdef CONFIG_64BIT + detach_page_private(page); + return; +#else + struct scrub_page_private *spp; + + spp = detach_page_private(page); + kfree(spp); + return; +#endif +} + +static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx, u64 logical) { struct scrub_block *sblock; @@ -211,27 +261,55 @@ static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx) return NULL; refcount_set(&sblock->refs, 1); sblock->sctx = sctx; + sblock->logical = logical; sblock->no_io_error_seen = 1; + /* + * Scrub_block::pages will be allocated at alloc_scrub_sector() when + * the corresponding page is not allocated. + */ return sblock; } -/* Allocate a new scrub sector and attach it to @sblock */ -static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, gfp_t gfp) +/* + * Allocate a new scrub sector and attach it to @sblock. + * + * Will also allocate new pages for @sblock if needed. + */ +static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, + u64 logical, gfp_t gfp) { + const pgoff_t page_index = (logical - sblock->logical) >> PAGE_SHIFT; struct scrub_sector *ssector; ssector = kzalloc(sizeof(*ssector), gfp); if (!ssector) return NULL; - ssector->page = alloc_page(gfp); - if (!ssector->page) { - kfree(ssector); - return NULL; + + /* Allocate a new page if the slot is not allocated */ + if (!sblock->pages[page_index]) { + int ret; + + sblock->pages[page_index] = alloc_page(gfp); + if (!sblock->pages[page_index]) { + kfree(ssector); + return NULL; + } + ret = attach_scrub_page_private(sblock->pages[page_index], + sblock->logical + (page_index << PAGE_SHIFT)); + if (ret < 0) { + kfree(ssector); + __free_page(sblock->pages[page_index]); + sblock->pages[page_index] = NULL; + return NULL; + } } + atomic_set(&ssector->refs, 1); ssector->sblock = sblock; /* The sector to be added should not be used */ ASSERT(sblock->sectors[sblock->sector_count] == NULL); + ssector->logical = logical; + /* The sector count must be smaller than the limit */ ASSERT(sblock->sector_count < SCRUB_MAX_SECTORS_PER_BLOCK); @@ -958,7 +1036,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) * But alloc_scrub_block() will initialize sblock::ref anyway, * so we can use scrub_block_put() to clean them up. */ - sblocks_for_recheck[mirror_index] = alloc_scrub_block(sctx); + sblocks_for_recheck[mirror_index] = alloc_scrub_block(sctx, logical); if (!sblocks_for_recheck[mirror_index]) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -1362,7 +1440,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, sblock = sblocks_for_recheck[mirror_index]; sblock->sctx = sctx; - sector = alloc_scrub_sector(sblock, GFP_NOFS); + sector = alloc_scrub_sector(sblock, logical, GFP_NOFS); if (!sector) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -1372,7 +1450,6 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, } sector->flags = flags; sector->generation = generation; - sector->logical = logical; sector->have_csum = have_csum; if (have_csum) memcpy(sector->csum, @@ -1651,6 +1728,11 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical) return ret; } +static void scrub_block_get(struct scrub_block *sblock) +{ + refcount_inc(&sblock->refs); +} + static int scrub_add_sector_to_wr_bio(struct scrub_ctx *sctx, struct scrub_sector *sector) { @@ -1711,6 +1793,13 @@ again: sbio->sectors[sbio->sector_count] = sector; scrub_sector_get(sector); + /* + * Since ssector no longer holds a page, but uses sblock::pages, we + * have to ensure the sblock had not been freed before our write bio + * finished. + */ + scrub_block_get(sector->sblock); + sbio->sector_count++; if (sbio->sector_count == sctx->sectors_per_bio) scrub_wr_submit(sctx); @@ -1772,8 +1861,14 @@ static void scrub_wr_bio_end_io_worker(struct work_struct *work) } } - for (i = 0; i < sbio->sector_count; i++) + /* + * In scrub_add_sector_to_wr_bio() we grab extra ref for sblock, now in + * endio we should put the sblock. + */ + for (i = 0; i < sbio->sector_count; i++) { + scrub_block_put(sbio->sectors[i]->sblock); scrub_sector_put(sbio->sectors[i]); + } bio_put(sbio->bio); kfree(sbio); @@ -1947,11 +2042,6 @@ static int scrub_checksum_super(struct scrub_block *sblock) return fail_cor + fail_gen; } -static void scrub_block_get(struct scrub_block *sblock) -{ - refcount_inc(&sblock->refs); -} - static void scrub_block_put(struct scrub_block *sblock) { if (refcount_dec_and_test(&sblock->refs)) { @@ -1962,6 +2052,12 @@ static void scrub_block_put(struct scrub_block *sblock) for (i = 0; i < sblock->sector_count; i++) scrub_sector_put(sblock->sectors[i]); + for (i = 0; i < DIV_ROUND_UP(sblock->len, PAGE_SIZE); i++) { + if (sblock->pages[i]) { + detach_scrub_page_private(sblock->pages[i]); + __free_page(sblock->pages[i]); + } + } kfree(sblock); } } @@ -2251,7 +2347,7 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len, const u32 sectorsize = sctx->fs_info->sectorsize; int index; - sblock = alloc_scrub_block(sctx); + sblock = alloc_scrub_block(sctx, logical); if (!sblock) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2268,7 +2364,7 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len, */ u32 l = min(sectorsize, len); - sector = alloc_scrub_sector(sblock, GFP_KERNEL); + sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL); if (!sector) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2279,7 +2375,6 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len, sector->dev = dev; sector->flags = flags; sector->generation = gen; - sector->logical = logical; sector->physical = physical; sector->physical_for_dev_replace = physical_for_dev_replace; sector->mirror_num = mirror_num; @@ -2589,7 +2684,7 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity, ASSERT(IS_ALIGNED(len, sectorsize)); - sblock = alloc_scrub_block(sctx); + sblock = alloc_scrub_block(sctx, logical); if (!sblock) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2603,7 +2698,7 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity, for (index = 0; len > 0; index++) { struct scrub_sector *sector; - sector = alloc_scrub_sector(sblock, GFP_KERNEL); + sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL); if (!sector) { spin_lock(&sctx->stat_lock); sctx->stat.malloc_errors++; @@ -2618,7 +2713,6 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity, sector->dev = dev; sector->flags = flags; sector->generation = gen; - sector->logical = logical; sector->physical = physical; sector->mirror_num = mirror_num; if (csum) { |