diff options
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 274 |
1 files changed, 257 insertions, 17 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 2b0a627cb5f9..030847bf7cec 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -27,10 +27,17 @@ #include "disk-io.h" #include "extent_io.h" #include "inode-map.h" +#include "volumes.h" #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) +struct btrfs_trim_range { + u64 start; + u64 bytes; + struct list_head list; +}; + static int link_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info); static void unlink_free_space(struct btrfs_free_space_ctl *ctl, @@ -279,8 +286,7 @@ static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, int num_pages; int check_crcs = 0; - num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) check_crcs = 1; @@ -882,6 +888,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, int ret; struct btrfs_free_cluster *cluster = NULL; struct rb_node *node = rb_first(&ctl->free_space_offset); + struct btrfs_trim_range *trim_entry; /* Get the cluster for this block_group if it exists */ if (block_group && !list_empty(&block_group->cluster_list)) { @@ -917,6 +924,21 @@ int write_cache_extent_entries(struct io_ctl *io_ctl, cluster = NULL; } } + + /* + * Make sure we don't miss any range that was removed from our rbtree + * because trimming is running. Otherwise after a umount+mount (or crash + * after committing the transaction) we would leak free space and get + * an inconsistent free space cache report from fsck. + */ + list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) { + ret = io_ctl_add_entry(io_ctl, trim_entry->start, + trim_entry->bytes, NULL); + if (ret) + goto fail; + *entries += 1; + } + return 0; fail: return -ENOSPC; @@ -1136,12 +1158,15 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, io_ctl_set_generation(&io_ctl, trans->transid); + mutex_lock(&ctl->cache_writeout_mutex); /* Write out the extent entries in the free space cache */ ret = write_cache_extent_entries(&io_ctl, ctl, block_group, &entries, &bitmaps, &bitmap_list); - if (ret) + if (ret) { + mutex_unlock(&ctl->cache_writeout_mutex); goto out_nospc; + } /* * Some spaces that are freed in the current transaction are pinned, @@ -1149,11 +1174,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, * committed, we shouldn't lose them. */ ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); - if (ret) + if (ret) { + mutex_unlock(&ctl->cache_writeout_mutex); goto out_nospc; + } - /* At last, we write out all the bitmaps. */ + /* + * At last, we write out all the bitmaps and keep cache_writeout_mutex + * locked while doing it because a concurrent trim can be manipulating + * or freeing the bitmap. + */ ret = write_bitmap_entries(&io_ctl, &bitmap_list); + mutex_unlock(&ctl->cache_writeout_mutex); if (ret) goto out_nospc; @@ -1998,6 +2030,128 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, return merged; } +static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + bool update_stat) +{ + struct btrfs_free_space *bitmap; + unsigned long i; + unsigned long j; + const u64 end = info->offset + info->bytes; + const u64 bitmap_offset = offset_to_bitmap(ctl, end); + u64 bytes; + + bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); + if (!bitmap) + return false; + + i = offset_to_bit(bitmap->offset, ctl->unit, end); + j = find_next_zero_bit(bitmap->bitmap, BITS_PER_BITMAP, i); + if (j == i) + return false; + bytes = (j - i) * ctl->unit; + info->bytes += bytes; + + if (update_stat) + bitmap_clear_bits(ctl, bitmap, end, bytes); + else + __bitmap_clear_bits(ctl, bitmap, end, bytes); + + if (!bitmap->bytes) + free_bitmap(ctl, bitmap); + + return true; +} + +static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + bool update_stat) +{ + struct btrfs_free_space *bitmap; + u64 bitmap_offset; + unsigned long i; + unsigned long j; + unsigned long prev_j; + u64 bytes; + + bitmap_offset = offset_to_bitmap(ctl, info->offset); + /* If we're on a boundary, try the previous logical bitmap. */ + if (bitmap_offset == info->offset) { + if (info->offset == 0) + return false; + bitmap_offset = offset_to_bitmap(ctl, info->offset - 1); + } + + bitmap = tree_search_offset(ctl, bitmap_offset, 1, 0); + if (!bitmap) + return false; + + i = offset_to_bit(bitmap->offset, ctl->unit, info->offset) - 1; + j = 0; + prev_j = (unsigned long)-1; + for_each_clear_bit_from(j, bitmap->bitmap, BITS_PER_BITMAP) { + if (j > i) + break; + prev_j = j; + } + if (prev_j == i) + return false; + + if (prev_j == (unsigned long)-1) + bytes = (i + 1) * ctl->unit; + else + bytes = (i - prev_j) * ctl->unit; + + info->offset -= bytes; + info->bytes += bytes; + + if (update_stat) + bitmap_clear_bits(ctl, bitmap, info->offset, bytes); + else + __bitmap_clear_bits(ctl, bitmap, info->offset, bytes); + + if (!bitmap->bytes) + free_bitmap(ctl, bitmap); + + return true; +} + +/* + * We prefer always to allocate from extent entries, both for clustered and + * non-clustered allocation requests. So when attempting to add a new extent + * entry, try to see if there's adjacent free space in bitmap entries, and if + * there is, migrate that space from the bitmaps to the extent. + * Like this we get better chances of satisfying space allocation requests + * because we attempt to satisfy them based on a single cache entry, and never + * on 2 or more entries - even if the entries represent a contiguous free space + * region (e.g. 1 extent entry + 1 bitmap entry starting where the extent entry + * ends). + */ +static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + bool update_stat) +{ + /* + * Only work with disconnected entries, as we can change their offset, + * and must be extent entries. + */ + ASSERT(!info->bitmap); + ASSERT(RB_EMPTY_NODE(&info->offset_index)); + + if (ctl->total_bitmaps > 0) { + bool stole_end; + bool stole_front = false; + + stole_end = steal_from_bitmap_to_end(ctl, info, update_stat); + if (ctl->total_bitmaps > 0) + stole_front = steal_from_bitmap_to_front(ctl, info, + update_stat); + + if (stole_end || stole_front) + try_merge_free_space(ctl, info, update_stat); + } +} + int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, u64 offset, u64 bytes) { @@ -2010,6 +2164,7 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, info->offset = offset; info->bytes = bytes; + RB_CLEAR_NODE(&info->offset_index); spin_lock(&ctl->tree_lock); @@ -2029,6 +2184,14 @@ int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl, goto out; } link: + /* + * Only steal free space from adjacent bitmaps if we're sure we're not + * going to add the new free space to existing bitmap entries - because + * that would mean unnecessary work that would be reverted. Therefore + * attempt to steal space from bitmaps if we're adding an extent entry. + */ + steal_from_bitmap(ctl, info, true); + ret = link_free_space(ctl, info); if (ret) kmem_cache_free(btrfs_free_space_cachep, info); @@ -2165,6 +2328,8 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) ctl->start = block_group->key.objectid; ctl->private = block_group; ctl->op = &free_space_op; + INIT_LIST_HEAD(&ctl->trimming_ranges); + mutex_init(&ctl->cache_writeout_mutex); /* * we only want to have 32k of ram per block group for keeping @@ -2205,10 +2370,13 @@ __btrfs_return_cluster_to_free_space( entry = rb_entry(node, struct btrfs_free_space, offset_index); node = rb_next(&entry->offset_index); rb_erase(&entry->offset_index, &cluster->root); + RB_CLEAR_NODE(&entry->offset_index); bitmap = (entry->bitmap != NULL); - if (!bitmap) + if (!bitmap) { try_merge_free_space(ctl, entry, false); + steal_from_bitmap(ctl, entry, false); + } tree_insert_offset(&ctl->free_space_offset, entry->offset, &entry->offset_index, bitmap); } @@ -2778,10 +2946,12 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) static int do_trimming(struct btrfs_block_group_cache *block_group, u64 *total_trimmed, u64 start, u64 bytes, - u64 reserved_start, u64 reserved_bytes) + u64 reserved_start, u64 reserved_bytes, + struct btrfs_trim_range *trim_entry) { struct btrfs_space_info *space_info = block_group->space_info; struct btrfs_fs_info *fs_info = block_group->fs_info; + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; int ret; int update = 0; u64 trimmed = 0; @@ -2801,7 +2971,10 @@ static int do_trimming(struct btrfs_block_group_cache *block_group, if (!ret) *total_trimmed += trimmed; + mutex_lock(&ctl->cache_writeout_mutex); btrfs_add_free_space(block_group, reserved_start, reserved_bytes); + list_del(&trim_entry->list); + mutex_unlock(&ctl->cache_writeout_mutex); if (update) { spin_lock(&space_info->lock); @@ -2829,16 +3002,21 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, u64 bytes; while (start < end) { + struct btrfs_trim_range trim_entry; + + mutex_lock(&ctl->cache_writeout_mutex); spin_lock(&ctl->tree_lock); if (ctl->free_space < minlen) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); break; } entry = tree_search_offset(ctl, start, 0, 1); if (!entry) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); break; } @@ -2847,6 +3025,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, node = rb_next(&entry->offset_index); if (!node) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); goto out; } entry = rb_entry(node, struct btrfs_free_space, @@ -2855,6 +3034,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, if (entry->offset >= end) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); break; } @@ -2864,6 +3044,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, bytes = min(extent_start + extent_bytes, end) - start; if (bytes < minlen) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); goto next; } @@ -2871,9 +3052,13 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group, kmem_cache_free(btrfs_free_space_cachep, entry); spin_unlock(&ctl->tree_lock); + trim_entry.start = extent_start; + trim_entry.bytes = extent_bytes; + list_add_tail(&trim_entry.list, &ctl->trimming_ranges); + mutex_unlock(&ctl->cache_writeout_mutex); ret = do_trimming(block_group, total_trimmed, start, bytes, - extent_start, extent_bytes); + extent_start, extent_bytes, &trim_entry); if (ret) break; next: @@ -2902,17 +3087,21 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, while (offset < end) { bool next_bitmap = false; + struct btrfs_trim_range trim_entry; + mutex_lock(&ctl->cache_writeout_mutex); spin_lock(&ctl->tree_lock); if (ctl->free_space < minlen) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); break; } entry = tree_search_offset(ctl, offset, 1, 0); if (!entry) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); next_bitmap = true; goto next; } @@ -2921,6 +3110,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, ret2 = search_bitmap(ctl, entry, &start, &bytes); if (ret2 || start >= end) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); next_bitmap = true; goto next; } @@ -2928,6 +3118,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, bytes = min(bytes, end - start); if (bytes < minlen) { spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); goto next; } @@ -2936,9 +3127,13 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, free_bitmap(ctl, entry); spin_unlock(&ctl->tree_lock); + trim_entry.start = start; + trim_entry.bytes = bytes; + list_add_tail(&trim_entry.list, &ctl->trimming_ranges); + mutex_unlock(&ctl->cache_writeout_mutex); ret = do_trimming(block_group, total_trimmed, start, bytes, - start, bytes); + start, bytes, &trim_entry); if (ret) break; next: @@ -2968,11 +3163,52 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, *trimmed = 0; + spin_lock(&block_group->lock); + if (block_group->removed) { + spin_unlock(&block_group->lock); + return 0; + } + atomic_inc(&block_group->trimming); + spin_unlock(&block_group->lock); + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); if (ret) - return ret; + goto out; ret = trim_bitmaps(block_group, trimmed, start, end, minlen); +out: + spin_lock(&block_group->lock); + if (atomic_dec_and_test(&block_group->trimming) && + block_group->removed) { + struct extent_map_tree *em_tree; + struct extent_map *em; + + spin_unlock(&block_group->lock); + + em_tree = &block_group->fs_info->mapping_tree.map_tree; + write_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, block_group->key.objectid, + 1); + BUG_ON(!em); /* logic error, can't happen */ + remove_extent_mapping(em_tree, em); + write_unlock(&em_tree->lock); + + lock_chunks(block_group->fs_info->chunk_root); + list_del_init(&em->list); + unlock_chunks(block_group->fs_info->chunk_root); + + /* once for us and once for the tree */ + free_extent_map(em); + free_extent_map(em); + + /* + * We've left one free space entry and other tasks trimming + * this block group have left 1 entry each one. Free them. + */ + __btrfs_remove_free_space_cache(block_group->free_space_ctl); + } else { + spin_unlock(&block_group->lock); + } return ret; } @@ -3033,10 +3269,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, { struct inode *inode = NULL; - spin_lock(&root->cache_lock); - if (root->cache_inode) - inode = igrab(root->cache_inode); - spin_unlock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); + if (root->ino_cache_inode) + inode = igrab(root->ino_cache_inode); + spin_unlock(&root->ino_cache_lock); if (inode) return inode; @@ -3044,10 +3280,10 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root, if (IS_ERR(inode)) return inode; - spin_lock(&root->cache_lock); + spin_lock(&root->ino_cache_lock); if (!btrfs_fs_closing(root->fs_info)) - root->cache_inode = igrab(inode); - spin_unlock(&root->cache_lock); + root->ino_cache_inode = igrab(inode); + spin_unlock(&root->ino_cache_lock); return inode; } @@ -3176,6 +3412,7 @@ again: map = NULL; add_new_bitmap(ctl, info, offset); bitmap_info = info; + info = NULL; } bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); @@ -3186,6 +3423,8 @@ again: if (bytes) goto again; + if (info) + kmem_cache_free(btrfs_free_space_cachep, info); if (map) kfree(map); return 0; @@ -3260,6 +3499,7 @@ have_info: goto have_info; } + ret = 0; goto out; } |