diff options
-rw-r--r-- | fs/btrfs/block-group.h | 15 | ||||
-rw-r--r-- | fs/btrfs/discard.c | 85 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 131 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.h | 6 |
4 files changed, 191 insertions, 46 deletions
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 884defd61dcd..a8d2edcd8760 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -13,6 +13,19 @@ enum btrfs_disk_cache_state { }; /* + * This describes the state of the block_group for async discard. This is due + * to the two pass nature of it where extent discarding is prioritized over + * bitmap discarding. BTRFS_DISCARD_RESET_CURSOR is set when we are resetting + * between lists to prevent contention for discard state variables + * (eg. discard_cursor). + */ +enum btrfs_discard_state { + BTRFS_DISCARD_EXTENTS, + BTRFS_DISCARD_BITMAPS, + BTRFS_DISCARD_RESET_CURSOR, +}; + +/* * Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to * only allocate a chunk if we really need one. * @@ -121,6 +134,8 @@ struct btrfs_block_group { struct list_head discard_list; int discard_index; u64 discard_eligible_time; + u64 discard_cursor; + enum btrfs_discard_state discard_state; /* For dirty block groups */ struct list_head dirty_list; diff --git a/fs/btrfs/discard.c b/fs/btrfs/discard.c index 0f1c6d01aab0..cb9a23217ec6 100644 --- a/fs/btrfs/discard.c +++ b/fs/btrfs/discard.c @@ -21,15 +21,11 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl, return &discard_ctl->discard_list[block_group->discard_index]; } -static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, - struct btrfs_block_group *block_group) +static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, + struct btrfs_block_group *block_group) { - spin_lock(&discard_ctl->lock); - - if (!btrfs_run_discard_work(discard_ctl)) { - spin_unlock(&discard_ctl->lock); + if (!btrfs_run_discard_work(discard_ctl)) return; - } if (list_empty(&block_group->discard_list) || block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) { @@ -37,11 +33,18 @@ static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, block_group->discard_index = BTRFS_DISCARD_INDEX_START; block_group->discard_eligible_time = (ktime_get_ns() + BTRFS_DISCARD_DELAY); + block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; } list_move_tail(&block_group->discard_list, get_discard_list(discard_ctl, block_group)); +} +static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl, + struct btrfs_block_group *block_group) +{ + spin_lock(&discard_ctl->lock); + __add_to_discard_list(discard_ctl, block_group); spin_unlock(&discard_ctl->lock); } @@ -60,6 +63,7 @@ static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl, block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED; block_group->discard_eligible_time = (ktime_get_ns() + BTRFS_DISCARD_UNUSED_DELAY); + block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR; list_add_tail(&block_group->discard_list, &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]); @@ -127,23 +131,40 @@ static struct btrfs_block_group *find_next_block_group( /** * peek_discard_list - wrap find_next_block_group() * @discard_ctl: discard control + * @discard_state: the discard_state of the block_group after state management * * This wraps find_next_block_group() and sets the block_group to be in use. + * discard_state's control flow is managed here. Variables related to + * discard_state are reset here as needed (eg. discard_cursor). @discard_state + * is remembered as it may change while we're discarding, but we want the + * discard to execute in the context determined here. */ static struct btrfs_block_group *peek_discard_list( - struct btrfs_discard_ctl *discard_ctl) + struct btrfs_discard_ctl *discard_ctl, + enum btrfs_discard_state *discard_state) { struct btrfs_block_group *block_group; const u64 now = ktime_get_ns(); spin_lock(&discard_ctl->lock); - +again: block_group = find_next_block_group(discard_ctl, now); - if (block_group && now < block_group->discard_eligible_time) + if (block_group && now > block_group->discard_eligible_time) { + if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED && + block_group->used != 0) { + __add_to_discard_list(discard_ctl, block_group); + goto again; + } + if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) { + block_group->discard_cursor = block_group->start; + block_group->discard_state = BTRFS_DISCARD_EXTENTS; + } + discard_ctl->block_group = block_group; + *discard_state = block_group->discard_state; + } else { block_group = NULL; - - discard_ctl->block_group = block_group; + } spin_unlock(&discard_ctl->lock); @@ -254,24 +275,54 @@ static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl, * btrfs_discard_workfn - discard work function * @work: work * - * This finds the next block_group to start discarding and then discards it. + * This finds the next block_group to start discarding and then discards a + * single region. It does this in a two-pass fashion: first extents and second + * bitmaps. Completely discarded block groups are sent to the unused_bgs path. */ static void btrfs_discard_workfn(struct work_struct *work) { struct btrfs_discard_ctl *discard_ctl; struct btrfs_block_group *block_group; + enum btrfs_discard_state discard_state; u64 trimmed = 0; discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work); - block_group = peek_discard_list(discard_ctl); + block_group = peek_discard_list(discard_ctl, &discard_state); if (!block_group || !btrfs_run_discard_work(discard_ctl)) return; - btrfs_trim_block_group(block_group, &trimmed, block_group->start, - btrfs_block_group_end(block_group), 0); + /* Perform discarding */ + if (discard_state == BTRFS_DISCARD_BITMAPS) + btrfs_trim_block_group_bitmaps(block_group, &trimmed, + block_group->discard_cursor, + btrfs_block_group_end(block_group), + 0, true); + else + btrfs_trim_block_group_extents(block_group, &trimmed, + block_group->discard_cursor, + btrfs_block_group_end(block_group), + 0, true); + + /* Determine next steps for a block_group */ + if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) { + if (discard_state == BTRFS_DISCARD_BITMAPS) { + btrfs_finish_discard_pass(discard_ctl, block_group); + } else { + block_group->discard_cursor = block_group->start; + spin_lock(&discard_ctl->lock); + if (block_group->discard_state != + BTRFS_DISCARD_RESET_CURSOR) + block_group->discard_state = + BTRFS_DISCARD_BITMAPS; + spin_unlock(&discard_ctl->lock); + } + } + + spin_lock(&discard_ctl->lock); + discard_ctl->block_group = NULL; + spin_unlock(&discard_ctl->lock); - btrfs_finish_discard_pass(discard_ctl, block_group); btrfs_discard_schedule_work(discard_ctl, false); } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3c2796bb6498..2e8fbd67ec9b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3322,8 +3322,12 @@ static int do_trimming(struct btrfs_block_group *block_group, return ret; } +/* + * If @async is set, then we will trim 1 region and return. + */ static int trim_no_bitmap(struct btrfs_block_group *block_group, - u64 *total_trimmed, u64 start, u64 end, u64 minlen) + u64 *total_trimmed, u64 start, u64 end, u64 minlen, + bool async) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; @@ -3340,36 +3344,25 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group, mutex_lock(&ctl->cache_writeout_mutex); spin_lock(&ctl->tree_lock); - if (ctl->free_space < minlen) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - break; - } + if (ctl->free_space < minlen) + goto out_unlock; entry = tree_search_offset(ctl, start, 0, 1); - if (!entry) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - break; - } + if (!entry) + goto out_unlock; - /* skip bitmaps */ - while (entry->bitmap) { + /* Skip bitmaps and if async, already trimmed entries */ + while (entry->bitmap || + (async && btrfs_free_space_trimmed(entry))) { node = rb_next(&entry->offset_index); - if (!node) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - goto out; - } + if (!node) + goto out_unlock; entry = rb_entry(node, struct btrfs_free_space, offset_index); } - if (entry->offset >= end) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - break; - } + if (entry->offset >= end) + goto out_unlock; extent_start = entry->offset; extent_bytes = entry->bytes; @@ -3394,10 +3387,15 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group, ret = do_trimming(block_group, total_trimmed, start, bytes, extent_start, extent_bytes, extent_trim_state, &trim_entry); - if (ret) + if (ret) { + block_group->discard_cursor = start + bytes; break; + } next: start += bytes; + block_group->discard_cursor = start; + if (async && *total_trimmed) + break; if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; @@ -3406,7 +3404,14 @@ next: cond_resched(); } -out: + + return ret; + +out_unlock: + block_group->discard_cursor = btrfs_block_group_end(block_group); + spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); + return ret; } @@ -3441,8 +3446,12 @@ static void end_trimming_bitmap(struct btrfs_free_space *entry) entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; } +/* + * If @async is set, then we will trim 1 region and return. + */ static int trim_bitmaps(struct btrfs_block_group *block_group, - u64 *total_trimmed, u64 start, u64 end, u64 minlen) + u64 *total_trimmed, u64 start, u64 end, u64 minlen, + bool async) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; @@ -3459,13 +3468,16 @@ static int trim_bitmaps(struct btrfs_block_group *block_group, spin_lock(&ctl->tree_lock); if (ctl->free_space < minlen) { + block_group->discard_cursor = + btrfs_block_group_end(block_group); spin_unlock(&ctl->tree_lock); mutex_unlock(&ctl->cache_writeout_mutex); break; } entry = tree_search_offset(ctl, offset, 1, 0); - if (!entry || btrfs_free_space_trimmed(entry)) { + if (!entry || (async && start == offset && + btrfs_free_space_trimmed(entry))) { spin_unlock(&ctl->tree_lock); mutex_unlock(&ctl->cache_writeout_mutex); next_bitmap = true; @@ -3498,6 +3510,16 @@ static int trim_bitmaps(struct btrfs_block_group *block_group, goto next; } + /* + * We already trimmed a region, but are using the locking above + * to reset the trim_state. + */ + if (async && *total_trimmed) { + spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); + goto out; + } + bytes = min(bytes, end - start); if (bytes < minlen) { entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; @@ -3520,6 +3542,8 @@ static int trim_bitmaps(struct btrfs_block_group *block_group, start, bytes, 0, &trim_entry); if (ret) { reset_trimming_bitmap(ctl, offset); + block_group->discard_cursor = + btrfs_block_group_end(block_group); break; } next: @@ -3529,6 +3553,7 @@ next: } else { start += bytes; } + block_group->discard_cursor = start; if (fatal_signal_pending(current)) { if (start != offset) @@ -3540,6 +3565,10 @@ next: cond_resched(); } + if (offset >= end) + block_group->discard_cursor = end; + +out: return ret; } @@ -3600,11 +3629,11 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group, btrfs_get_block_group_trimming(block_group); spin_unlock(&block_group->lock); - ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); if (ret) goto out; - ret = trim_bitmaps(block_group, trimmed, start, end, minlen); + ret = trim_bitmaps(block_group, trimmed, start, end, minlen, false); div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); /* If we ended in the middle of a bitmap, reset the trimming flag */ if (rem) @@ -3614,6 +3643,50 @@ out: return ret; } +int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen, + bool async) +{ + int ret; + + *trimmed = 0; + + spin_lock(&block_group->lock); + if (block_group->removed) { + spin_unlock(&block_group->lock); + return 0; + } + btrfs_get_block_group_trimming(block_group); + spin_unlock(&block_group->lock); + + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); + btrfs_put_block_group_trimming(block_group); + + return ret; +} + +int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen, + bool async) +{ + int ret; + + *trimmed = 0; + + spin_lock(&block_group->lock); + if (block_group->removed) { + spin_unlock(&block_group->lock); + return 0; + } + btrfs_get_block_group_trimming(block_group); + spin_unlock(&block_group->lock); + + ret = trim_bitmaps(block_group, trimmed, start, end, minlen, async); + btrfs_put_block_group_trimming(block_group); + + return ret; +} + /* * Find the left-most item in the cache tree, and then return the * smallest inode number in the item. diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index f799eb491410..765c5cbd99a5 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -138,6 +138,12 @@ int btrfs_return_cluster_to_free_space( struct btrfs_free_cluster *cluster); int btrfs_trim_block_group(struct btrfs_block_group *block_group, u64 *trimmed, u64 start, u64 end, u64 minlen); +int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen, + bool async); +int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen, + bool async); /* Support functions for running our sanity tests */ #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS |