summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQu Wenruo <wqu@suse.com>2022-08-08 08:45:40 +0300
committerDavid Sterba <dsterba@suse.com>2022-09-26 13:27:55 +0300
commitf3e01e0e3c1f929d2ac39c335b024054b55e445a (patch)
tree13cc5efd995b60d75cae4fd5d04fcf9023281335
parent5dd3d8e4680be763bdb34300d507a55cca0ec86e (diff)
downloadlinux-f3e01e0e3c1f929d2ac39c335b024054b55e445a.tar.xz
btrfs: scrub: introduce scrub_block::pages for more efficient memory usage for subpage
[BACKGROUND] Currently for scrub, we allocate one page for one sector, this is fine for PAGE_SIZE == sectorsize support, but can waste extra memory for subpage support. [CODE CHANGE] Make scrub_block contain all the pages, so if we're scrubbing an extent sized 64K, and our page size is also 64K, we only need to allocate one page. [LIFESPAN CHANGE] Since now scrub_sector no longer holds a page, but is using scrub_block::pages[] instead, we have to ensure scrub_block has a longer lifespan for write bio. The lifespan for read bio is already large enough. Now scrub_block will only be released after the write bio finished. [COMING NEXT] Currently we only added scrub_block::pages[] for this purpose, but scrub_sector is still utilizing the old scrub_sector::page. The switch will happen in the next patch. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/scrub.c138
1 files changed, 116 insertions, 22 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2b3bf798730c..ee4ba7e54efd 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -54,6 +54,8 @@ struct scrub_ctx;
*/
#define SCRUB_MAX_SECTORS_PER_BLOCK (BTRFS_MAX_METADATA_BLOCKSIZE / SZ_4K)
+#define SCRUB_MAX_PAGES (DIV_ROUND_UP(BTRFS_MAX_METADATA_BLOCKSIZE, PAGE_SIZE))
+
struct scrub_recover {
refcount_t refs;
struct btrfs_io_context *bioc;
@@ -94,8 +96,18 @@ struct scrub_bio {
};
struct scrub_block {
+ /*
+ * Each page will have its page::private used to record the logical
+ * bytenr.
+ */
+ struct page *pages[SCRUB_MAX_PAGES];
struct scrub_sector *sectors[SCRUB_MAX_SECTORS_PER_BLOCK];
+ /* Logical bytenr of the sblock */
+ u64 logical;
+ /* Length of sblock in bytes */
+ u32 len;
int sector_count;
+
atomic_t outstanding_sectors;
refcount_t refs; /* free mem on transition to zero */
struct scrub_ctx *sctx;
@@ -202,7 +214,45 @@ struct full_stripe_lock {
struct mutex mutex;
};
-static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx)
+#ifndef CONFIG_64BIT
+/* This structure is for archtectures whose (void *) is smaller than u64 */
+struct scrub_page_private {
+ u64 logical;
+};
+#endif
+
+static int attach_scrub_page_private(struct page *page, u64 logical)
+{
+#ifdef CONFIG_64BIT
+ attach_page_private(page, (void *)logical);
+ return 0;
+#else
+ struct scrub_page_private *spp;
+
+ spp = kmalloc(sizeof(*spp), GFP_KERNEL);
+ if (!spp)
+ return -ENOMEM;
+ spp->logical = logical;
+ attach_page_private(page, (void *)spp);
+ return 0;
+#endif
+}
+
+static void detach_scrub_page_private(struct page *page)
+{
+#ifdef CONFIG_64BIT
+ detach_page_private(page);
+ return;
+#else
+ struct scrub_page_private *spp;
+
+ spp = detach_page_private(page);
+ kfree(spp);
+ return;
+#endif
+}
+
+static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx, u64 logical)
{
struct scrub_block *sblock;
@@ -211,27 +261,55 @@ static struct scrub_block *alloc_scrub_block(struct scrub_ctx *sctx)
return NULL;
refcount_set(&sblock->refs, 1);
sblock->sctx = sctx;
+ sblock->logical = logical;
sblock->no_io_error_seen = 1;
+ /*
+ * Scrub_block::pages will be allocated at alloc_scrub_sector() when
+ * the corresponding page is not allocated.
+ */
return sblock;
}
-/* Allocate a new scrub sector and attach it to @sblock */
-static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock, gfp_t gfp)
+/*
+ * Allocate a new scrub sector and attach it to @sblock.
+ *
+ * Will also allocate new pages for @sblock if needed.
+ */
+static struct scrub_sector *alloc_scrub_sector(struct scrub_block *sblock,
+ u64 logical, gfp_t gfp)
{
+ const pgoff_t page_index = (logical - sblock->logical) >> PAGE_SHIFT;
struct scrub_sector *ssector;
ssector = kzalloc(sizeof(*ssector), gfp);
if (!ssector)
return NULL;
- ssector->page = alloc_page(gfp);
- if (!ssector->page) {
- kfree(ssector);
- return NULL;
+
+ /* Allocate a new page if the slot is not allocated */
+ if (!sblock->pages[page_index]) {
+ int ret;
+
+ sblock->pages[page_index] = alloc_page(gfp);
+ if (!sblock->pages[page_index]) {
+ kfree(ssector);
+ return NULL;
+ }
+ ret = attach_scrub_page_private(sblock->pages[page_index],
+ sblock->logical + (page_index << PAGE_SHIFT));
+ if (ret < 0) {
+ kfree(ssector);
+ __free_page(sblock->pages[page_index]);
+ sblock->pages[page_index] = NULL;
+ return NULL;
+ }
}
+
atomic_set(&ssector->refs, 1);
ssector->sblock = sblock;
/* The sector to be added should not be used */
ASSERT(sblock->sectors[sblock->sector_count] == NULL);
+ ssector->logical = logical;
+
/* The sector count must be smaller than the limit */
ASSERT(sblock->sector_count < SCRUB_MAX_SECTORS_PER_BLOCK);
@@ -958,7 +1036,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
* But alloc_scrub_block() will initialize sblock::ref anyway,
* so we can use scrub_block_put() to clean them up.
*/
- sblocks_for_recheck[mirror_index] = alloc_scrub_block(sctx);
+ sblocks_for_recheck[mirror_index] = alloc_scrub_block(sctx, logical);
if (!sblocks_for_recheck[mirror_index]) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -1362,7 +1440,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
sblock = sblocks_for_recheck[mirror_index];
sblock->sctx = sctx;
- sector = alloc_scrub_sector(sblock, GFP_NOFS);
+ sector = alloc_scrub_sector(sblock, logical, GFP_NOFS);
if (!sector) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -1372,7 +1450,6 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock,
}
sector->flags = flags;
sector->generation = generation;
- sector->logical = logical;
sector->have_csum = have_csum;
if (have_csum)
memcpy(sector->csum,
@@ -1651,6 +1728,11 @@ static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
return ret;
}
+static void scrub_block_get(struct scrub_block *sblock)
+{
+ refcount_inc(&sblock->refs);
+}
+
static int scrub_add_sector_to_wr_bio(struct scrub_ctx *sctx,
struct scrub_sector *sector)
{
@@ -1711,6 +1793,13 @@ again:
sbio->sectors[sbio->sector_count] = sector;
scrub_sector_get(sector);
+ /*
+ * Since ssector no longer holds a page, but uses sblock::pages, we
+ * have to ensure the sblock had not been freed before our write bio
+ * finished.
+ */
+ scrub_block_get(sector->sblock);
+
sbio->sector_count++;
if (sbio->sector_count == sctx->sectors_per_bio)
scrub_wr_submit(sctx);
@@ -1772,8 +1861,14 @@ static void scrub_wr_bio_end_io_worker(struct work_struct *work)
}
}
- for (i = 0; i < sbio->sector_count; i++)
+ /*
+ * In scrub_add_sector_to_wr_bio() we grab extra ref for sblock, now in
+ * endio we should put the sblock.
+ */
+ for (i = 0; i < sbio->sector_count; i++) {
+ scrub_block_put(sbio->sectors[i]->sblock);
scrub_sector_put(sbio->sectors[i]);
+ }
bio_put(sbio->bio);
kfree(sbio);
@@ -1947,11 +2042,6 @@ static int scrub_checksum_super(struct scrub_block *sblock)
return fail_cor + fail_gen;
}
-static void scrub_block_get(struct scrub_block *sblock)
-{
- refcount_inc(&sblock->refs);
-}
-
static void scrub_block_put(struct scrub_block *sblock)
{
if (refcount_dec_and_test(&sblock->refs)) {
@@ -1962,6 +2052,12 @@ static void scrub_block_put(struct scrub_block *sblock)
for (i = 0; i < sblock->sector_count; i++)
scrub_sector_put(sblock->sectors[i]);
+ for (i = 0; i < DIV_ROUND_UP(sblock->len, PAGE_SIZE); i++) {
+ if (sblock->pages[i]) {
+ detach_scrub_page_private(sblock->pages[i]);
+ __free_page(sblock->pages[i]);
+ }
+ }
kfree(sblock);
}
}
@@ -2251,7 +2347,7 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
const u32 sectorsize = sctx->fs_info->sectorsize;
int index;
- sblock = alloc_scrub_block(sctx);
+ sblock = alloc_scrub_block(sctx, logical);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2268,7 +2364,7 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
*/
u32 l = min(sectorsize, len);
- sector = alloc_scrub_sector(sblock, GFP_KERNEL);
+ sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
if (!sector) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2279,7 +2375,6 @@ static int scrub_sectors(struct scrub_ctx *sctx, u64 logical, u32 len,
sector->dev = dev;
sector->flags = flags;
sector->generation = gen;
- sector->logical = logical;
sector->physical = physical;
sector->physical_for_dev_replace = physical_for_dev_replace;
sector->mirror_num = mirror_num;
@@ -2589,7 +2684,7 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity,
ASSERT(IS_ALIGNED(len, sectorsize));
- sblock = alloc_scrub_block(sctx);
+ sblock = alloc_scrub_block(sctx, logical);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2603,7 +2698,7 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity,
for (index = 0; len > 0; index++) {
struct scrub_sector *sector;
- sector = alloc_scrub_sector(sblock, GFP_KERNEL);
+ sector = alloc_scrub_sector(sblock, logical, GFP_KERNEL);
if (!sector) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2618,7 +2713,6 @@ static int scrub_sectors_for_parity(struct scrub_parity *sparity,
sector->dev = dev;
sector->flags = flags;
sector->generation = gen;
- sector->logical = logical;
sector->physical = physical;
sector->mirror_num = mirror_num;
if (csum) {