summaryrefslogtreecommitdiff
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c205
1 files changed, 151 insertions, 54 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index efba5d1282ee..0be77993378e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -315,6 +315,16 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
atomic_inc(&fs_info->scrubs_running);
atomic_inc(&fs_info->scrubs_paused);
mutex_unlock(&fs_info->scrub_lock);
+
+ /*
+ * check if @scrubs_running=@scrubs_paused condition
+ * inside wait_event() is not an atomic operation.
+ * which means we may inc/dec @scrub_running/paused
+ * at any time. Let's wake up @scrub_pause_wait as
+ * much as we can to let commit transaction blocked less.
+ */
+ wake_up(&fs_info->scrub_pause_wait);
+
atomic_inc(&sctx->workers_pending);
}
@@ -418,7 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
sbio->index = i;
sbio->sctx = sctx;
sbio->page_count = 0;
- sbio->work.func = scrub_bio_end_io_worker;
+ btrfs_init_work(&sbio->work, scrub_bio_end_io_worker,
+ NULL, NULL);
if (i != SCRUB_BIOS_PER_SCTX - 1)
sctx->bios[i]->next_free = i + 1;
@@ -987,9 +998,10 @@ nodatasum_case:
fixup_nodatasum->root = fs_info->extent_root;
fixup_nodatasum->mirror_num = failed_mirror_index + 1;
scrub_pending_trans_workers_inc(sctx);
- fixup_nodatasum->work.func = scrub_fixup_nodatasum;
- btrfs_queue_worker(&fs_info->scrub_workers,
- &fixup_nodatasum->work);
+ btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum,
+ NULL, NULL);
+ btrfs_queue_work(fs_info->scrub_workers,
+ &fixup_nodatasum->work);
goto out;
}
@@ -1603,8 +1615,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err)
sbio->err = err;
sbio->bio = bio;
- sbio->work.func = scrub_wr_bio_end_io_worker;
- btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work);
+ btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL);
+ btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work);
}
static void scrub_wr_bio_end_io_worker(struct btrfs_work *work)
@@ -2072,7 +2084,7 @@ static void scrub_bio_end_io(struct bio *bio, int err)
sbio->err = err;
sbio->bio = bio;
- btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
+ btrfs_queue_work(fs_info->scrub_workers, &sbio->work);
}
static void scrub_bio_end_io_worker(struct btrfs_work *work)
@@ -2223,6 +2235,47 @@ behind_scrub_pages:
return 0;
}
+/*
+ * Given a physical address, this will calculate it's
+ * logical offset. if this is a parity stripe, it will return
+ * the most left data stripe's logical offset.
+ *
+ * return 0 if it is a data stripe, 1 means parity stripe.
+ */
+static int get_raid56_logic_offset(u64 physical, int num,
+ struct map_lookup *map, u64 *offset)
+{
+ int i;
+ int j = 0;
+ u64 stripe_nr;
+ u64 last_offset;
+ int stripe_index;
+ int rot;
+
+ last_offset = (physical - map->stripes[num].physical) *
+ nr_data_stripes(map);
+ *offset = last_offset;
+ for (i = 0; i < nr_data_stripes(map); i++) {
+ *offset = last_offset + i * map->stripe_len;
+
+ stripe_nr = *offset;
+ do_div(stripe_nr, map->stripe_len);
+ do_div(stripe_nr, nr_data_stripes(map));
+
+ /* Work out the disk rotation on this stripe-set */
+ rot = do_div(stripe_nr, map->num_stripes);
+ /* calculate which stripe this data locates */
+ rot += i;
+ stripe_index = rot % map->num_stripes;
+ if (stripe_index == num)
+ return 0;
+ if (stripe_index < num)
+ j++;
+ }
+ *offset = last_offset + j * map->stripe_len;
+ return 1;
+}
+
static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
struct map_lookup *map,
struct btrfs_device *scrub_dev,
@@ -2244,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 physical;
u64 logical;
u64 logic_end;
+ u64 physical_end;
u64 generation;
int mirror_num;
struct reada_control *reada1;
@@ -2257,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 extent_len;
struct btrfs_device *extent_dev;
int extent_mirror_num;
- int stop_loop;
-
- if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
- BTRFS_BLOCK_GROUP_RAID6)) {
- if (num >= nr_data_stripes(map)) {
- return 0;
- }
- }
+ int stop_loop = 0;
nstripes = length;
+ physical = map->stripes[num].physical;
offset = 0;
do_div(nstripes, map->stripe_len);
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
@@ -2284,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
increment = map->stripe_len;
mirror_num = num % map->num_stripes + 1;
+ } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ get_raid56_logic_offset(physical, num, map, &offset);
+ increment = map->stripe_len * nr_data_stripes(map);
+ mirror_num = 1;
} else {
increment = map->stripe_len;
mirror_num = 1;
@@ -2307,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
* to not hold off transaction commits
*/
logical = base + offset;
-
+ physical_end = physical + nstripes * map->stripe_len;
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ get_raid56_logic_offset(physical_end, num,
+ map, &logic_end);
+ logic_end += base;
+ } else {
+ logic_end = logical + increment * nstripes;
+ }
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
scrub_blocked_if_needed(fs_info);
@@ -2316,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
key_start.objectid = logical;
key_start.type = BTRFS_EXTENT_ITEM_KEY;
key_start.offset = (u64)0;
- key_end.objectid = base + offset + nstripes * increment;
+ key_end.objectid = logic_end;
key_end.type = BTRFS_METADATA_ITEM_KEY;
key_end.offset = (u64)-1;
reada1 = btrfs_reada_add(root, &key_start, &key_end);
@@ -2326,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
key_start.offset = logical;
key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key_end.type = BTRFS_EXTENT_CSUM_KEY;
- key_end.offset = base + offset + nstripes * increment;
+ key_end.offset = logic_end;
reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
if (!IS_ERR(reada1))
@@ -2344,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
/*
* now find all extents for each stripe and scrub them
*/
- logical = base + offset;
- physical = map->stripes[num].physical;
- logic_end = logical + increment * nstripes;
ret = 0;
- while (logical < logic_end) {
+ while (physical < physical_end) {
+ /* for raid56, we skip parity stripe */
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ ret = get_raid56_logic_offset(physical, num,
+ map, &logical);
+ logical += base;
+ if (ret)
+ goto skip;
+ }
/*
* canceled?
*/
@@ -2492,15 +2559,29 @@ again:
scrub_free_csums(sctx);
if (extent_logical + extent_len <
key.objectid + bytes) {
- logical += increment;
- physical += map->stripe_len;
-
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+ /*
+ * loop until we find next data stripe
+ * or we have finished all stripes.
+ */
+ do {
+ physical += map->stripe_len;
+ ret = get_raid56_logic_offset(
+ physical, num,
+ map, &logical);
+ logical += base;
+ } while (physical < physical_end && ret);
+ } else {
+ physical += map->stripe_len;
+ logical += increment;
+ }
if (logical < key.objectid + bytes) {
cond_resched();
goto again;
}
- if (logical >= logic_end) {
+ if (physical >= physical_end) {
stop_loop = 1;
break;
}
@@ -2509,6 +2590,7 @@ next:
path->slots[0]++;
}
btrfs_release_path(path);
+skip:
logical += increment;
physical += map->stripe_len;
spin_lock(&sctx->stat_lock);
@@ -2686,10 +2768,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
wait_event(sctx->list_wait,
atomic_read(&sctx->bios_in_flight) == 0);
- atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
+ atomic_inc(&fs_info->scrubs_paused);
+ wake_up(&fs_info->scrub_pause_wait);
+
+ /*
+ * must be called before we decrease @scrub_paused.
+ * make sure we don't block transaction commit while
+ * we are waiting pending workers finished.
+ */
wait_event(sctx->list_wait,
atomic_read(&sctx->workers_pending) == 0);
- scrub_blocked_if_needed(fs_info);
+ atomic_set(&sctx->wr_ctx.flush_all_writes, 0);
+
+ mutex_lock(&fs_info->scrub_lock);
+ __scrub_blocked_if_needed(fs_info);
+ atomic_dec(&fs_info->scrubs_paused);
+ mutex_unlock(&fs_info->scrub_lock);
+ wake_up(&fs_info->scrub_pause_wait);
btrfs_put_block_group(cache);
if (ret)
@@ -2757,33 +2852,35 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
int ret = 0;
+ int flags = WQ_FREEZABLE | WQ_UNBOUND;
+ int max_active = fs_info->thread_pool_size;
if (fs_info->scrub_workers_refcnt == 0) {
if (is_dev_replace)
- btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1,
- &fs_info->generic_worker);
+ fs_info->scrub_workers =
+ btrfs_alloc_workqueue("btrfs-scrub", flags,
+ 1, 4);
else
- btrfs_init_workers(&fs_info->scrub_workers, "scrub",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
- fs_info->scrub_workers.idle_thresh = 4;
- ret = btrfs_start_workers(&fs_info->scrub_workers);
- if (ret)
+ fs_info->scrub_workers =
+ btrfs_alloc_workqueue("btrfs-scrub", flags,
+ max_active, 4);
+ if (!fs_info->scrub_workers) {
+ ret = -ENOMEM;
goto out;
- btrfs_init_workers(&fs_info->scrub_wr_completion_workers,
- "scrubwrc",
- fs_info->thread_pool_size,
- &fs_info->generic_worker);
- fs_info->scrub_wr_completion_workers.idle_thresh = 2;
- ret = btrfs_start_workers(
- &fs_info->scrub_wr_completion_workers);
- if (ret)
+ }
+ fs_info->scrub_wr_completion_workers =
+ btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
+ max_active, 2);
+ if (!fs_info->scrub_wr_completion_workers) {
+ ret = -ENOMEM;
goto out;
- btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1,
- &fs_info->generic_worker);
- ret = btrfs_start_workers(&fs_info->scrub_nocow_workers);
- if (ret)
+ }
+ fs_info->scrub_nocow_workers =
+ btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
+ if (!fs_info->scrub_nocow_workers) {
+ ret = -ENOMEM;
goto out;
+ }
}
++fs_info->scrub_workers_refcnt;
out:
@@ -2793,9 +2890,9 @@ out:
static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info)
{
if (--fs_info->scrub_workers_refcnt == 0) {
- btrfs_stop_workers(&fs_info->scrub_workers);
- btrfs_stop_workers(&fs_info->scrub_wr_completion_workers);
- btrfs_stop_workers(&fs_info->scrub_nocow_workers);
+ btrfs_destroy_workqueue(fs_info->scrub_workers);
+ btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers);
+ btrfs_destroy_workqueue(fs_info->scrub_nocow_workers);
}
WARN_ON(fs_info->scrub_workers_refcnt < 0);
}
@@ -3106,10 +3203,10 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
nocow_ctx->len = len;
nocow_ctx->mirror_num = mirror_num;
nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
- nocow_ctx->work.func = copy_nocow_pages_worker;
+ btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL);
INIT_LIST_HEAD(&nocow_ctx->inodes);
- btrfs_queue_worker(&fs_info->scrub_nocow_workers,
- &nocow_ctx->work);
+ btrfs_queue_work(fs_info->scrub_nocow_workers,
+ &nocow_ctx->work);
return 0;
}