From dc89e9824464e91fa0b06267864ceabe3186fd8b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 28 Jan 2011 17:05:48 -0500 Subject: Btrfs: use a slab for the free space entries Since we alloc/free free space entries a whole lot, lets use a slab to keep track of them. This makes some of my tests slightly faster. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a0390657451b..0282033041e1 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -393,7 +393,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, break; need_loop = 1; - e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); + e = kmem_cache_zalloc(btrfs_free_space_cachep, + GFP_NOFS); if (!e) { kunmap(page); unlock_page(page); @@ -405,7 +406,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, e->bytes = le64_to_cpu(entry->bytes); if (!e->bytes) { kunmap(page); - kfree(e); + kmem_cache_free(btrfs_free_space_cachep, e); unlock_page(page); page_cache_release(page); goto free_cache; @@ -420,7 +421,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); if (!e->bitmap) { kunmap(page); - kfree(e); + kmem_cache_free( + btrfs_free_space_cachep, e); unlock_page(page); page_cache_release(page); goto free_cache; @@ -1187,7 +1189,7 @@ static void free_bitmap(struct btrfs_block_group_cache *block_group, { unlink_free_space(block_group, bitmap_info); kfree(bitmap_info->bitmap); - kfree(bitmap_info); + kmem_cache_free(btrfs_free_space_cachep, bitmap_info); block_group->total_bitmaps--; recalculate_thresholds(block_group); } @@ -1342,8 +1344,8 @@ new_bitmap: /* no pre-allocated info, allocate a new one */ if (!info) { - info = kzalloc(sizeof(struct btrfs_free_space), - GFP_NOFS); + info = kmem_cache_zalloc(btrfs_free_space_cachep, + GFP_NOFS); if (!info) { spin_lock(&block_group->tree_lock); ret = -ENOMEM; @@ -1365,7 +1367,7 @@ out: if (info) { if (info->bitmap) kfree(info->bitmap); - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); } return ret; @@ -1398,7 +1400,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, else __unlink_free_space(block_group, right_info); info->bytes += right_info->bytes; - kfree(right_info); + kmem_cache_free(btrfs_free_space_cachep, right_info); merged = true; } @@ -1410,7 +1412,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, __unlink_free_space(block_group, left_info); info->offset = left_info->offset; info->bytes += left_info->bytes; - kfree(left_info); + kmem_cache_free(btrfs_free_space_cachep, left_info); merged = true; } @@ -1423,7 +1425,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, struct btrfs_free_space *info; int ret = 0; - info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); + info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); if (!info) return -ENOMEM; @@ -1450,7 +1452,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, link: ret = link_free_space(block_group, info); if (ret) - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); out: spin_unlock(&block_group->tree_lock); @@ -1520,7 +1522,7 @@ again: kfree(info->bitmap); block_group->total_bitmaps--; } - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); goto out_lock; } @@ -1556,7 +1558,7 @@ again: /* the hole we're creating ends at the end * of the info struct, just free the info */ - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); } spin_unlock(&block_group->tree_lock); @@ -1689,7 +1691,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) unlink_free_space(block_group, info); if (info->bitmap) kfree(info->bitmap); - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); if (need_resched()) { spin_unlock(&block_group->tree_lock); cond_resched(); @@ -1722,7 +1724,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, entry->offset += bytes; entry->bytes -= bytes; if (!entry->bytes) - kfree(entry); + kmem_cache_free(btrfs_free_space_cachep, entry); else link_free_space(block_group, entry); } @@ -1884,7 +1886,7 @@ out: block_group->free_space -= bytes; if (entry->bytes == 0) { block_group->free_extents--; - kfree(entry); + kmem_cache_free(btrfs_free_space_cachep, entry); } spin_unlock(&block_group->tree_lock); -- cgit v1.2.3 From 7d0d2e8e6b6f7da221a25238cf490a095c8c4788 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 18 Mar 2011 15:13:42 -0400 Subject: Btrfs: check free space in block group before searching for a cluster The free space cluster stuff is heavy duty, so there is no sense in going through the entire song and dance if there isn't enough space in the block group to begin with. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0282033041e1..f631df870f64 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1999,6 +1999,16 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, min_bytes = max(bytes, (bytes + empty_size) >> 2); spin_lock(&block_group->tree_lock); + + /* + * If we know we don't have enough space to make a cluster don't even + * bother doing all the work to try and find one. + */ + if (block_group->free_space < min_bytes) { + spin_unlock(&block_group->tree_lock); + return -ENOSPC; + } + spin_lock(&cluster->lock); /* someone already found a cluster, hooray */ -- cgit v1.2.3 From d0a365e84a886ce6b5b7f7a76be0bb24934ec8f0 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 18 Mar 2011 15:27:43 -0400 Subject: Btrfs: deal with min_bytes appropriately when looking for a cluster We do all this fun stuff with min_bytes, but either don't use it in the case of just normal extents, or use it completely wrong in the case of bitmaps. So fix this for both cases 1) In the extent case, stop looking for space with window_free >= min_bytes instead of bytes + empty_size. 2) In the bitmap case, we were looking for streches of free space that was at least min_bytes in size, which was not right at all. So instead search for stretches of free space that are at least bytes in size (this will make a difference when we have > page size blocks) and then only search for min_bytes amount of free space. Thanks, Reviewed-by: Li Zefan Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f631df870f64..63776ae72f9e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1910,8 +1910,8 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, i = offset_to_bit(entry->offset, block_group->sectorsize, max_t(u64, offset, entry->offset)); - search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); - total_bits = bytes_to_bits(bytes, block_group->sectorsize); + search_bits = bytes_to_bits(bytes, block_group->sectorsize); + total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); again: found_bits = 0; @@ -2034,8 +2034,7 @@ again: if (entry->bitmap && entry->bytes > bytes + empty_size) { ret = btrfs_bitmap_cluster(block_group, entry, cluster, - offset, bytes + empty_size, - min_bytes); + offset, bytes, min_bytes); if (!ret) goto got_it; } @@ -2065,7 +2064,7 @@ again: while (1) { /* out window is just right, lets fill it */ - if (window_free >= bytes + empty_size) + if (window_free >= min_bytes) break; node = rb_next(&last->offset_index); -- cgit v1.2.3 From 32cb0840ce8e13901fe71a9a8e834a531802ffc4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 18 Mar 2011 16:16:21 -0400 Subject: Btrfs: don't be as aggressive about using bitmaps We have been creating bitmaps for small extents unconditionally forever. This was great when testing to make sure the bitmap stuff was working, but is overkill normally. So instead of always adding small chunks of free space to bitmaps, only start doing it if we go past half of our extent threshold. This will keeps us from creating a bitmap for just one small free extent at the front of the block group, and will make the allocator a little faster as a result. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/free-space-cache.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 63776ae72f9e..4ab35ea0443f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1287,9 +1287,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, * If we are below the extents threshold then we can add this as an * extent, and don't have to deal with the bitmap */ - if (block_group->free_extents < block_group->extents_thresh && - info->bytes > block_group->sectorsize * 4) - return 0; + if (block_group->free_extents < block_group->extents_thresh) { + /* + * If this block group has some small extents we don't want to + * use up all of our free slots in the cache with them, we want + * to reserve them to larger extents, however if we have plent + * of cache left then go ahead an dadd them, no sense in adding + * the overhead of a bitmap if we don't have to. + */ + if (info->bytes <= block_group->sectorsize * 4) { + if (block_group->free_extents * 2 <= + block_group->extents_thresh) + return 0; + } else { + return 0; + } + } /* * some block groups are so tiny they can't be enveloped by a bitmap, so -- cgit v1.2.3 From 4e69b598f6cfb0940b75abf7e179d6020e94ad1e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 21 Mar 2011 10:11:24 -0400 Subject: Btrfs: cleanup how we setup free space clusters This patch makes the free space cluster refilling code a little easier to understand, and fixes some things with the bitmap part of it. Currently we either want to refill a cluster with 1) All normal extent entries (those without bitmaps) 2) A bitmap entry with enough space The current code has this ugly jump around logic that will first try and fill up the cluster with extent entries and then if it can't do that it will try and find a bitmap to use. So instead split this out into two functions, one that tries to find only normal entries, and one that tries to find bitmaps. This also fixes a suboptimal thing we would do with bitmaps. If we used a bitmap we would just tell the cluster that we were pointing at a bitmap and it would do the tree search in the block group for that entry every time we tried to make an allocation. Instead of doing that now we just add it to the clusters group. I tested this with my ENOSPC tests and xfstests and it survived. Signed-off-by: Josef Bacik --- fs/btrfs/ctree.h | 3 - fs/btrfs/free-space-cache.c | 364 ++++++++++++++++++++++---------------------- 2 files changed, 182 insertions(+), 185 deletions(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 6036fdb88c53..0ee679b6c1b7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -783,9 +783,6 @@ struct btrfs_free_cluster { /* first extent starting offset */ u64 window_start; - /* if this cluster simply points at a bitmap in the block group */ - bool points_to_bitmap; - struct btrfs_block_group_cache *block_group; /* * when a cluster is allocated from a block group, we put the diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4ab35ea0443f..f03ef97c3b21 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1644,30 +1644,28 @@ __btrfs_return_cluster_to_free_space( { struct btrfs_free_space *entry; struct rb_node *node; - bool bitmap; spin_lock(&cluster->lock); if (cluster->block_group != block_group) goto out; - bitmap = cluster->points_to_bitmap; cluster->block_group = NULL; cluster->window_start = 0; list_del_init(&cluster->block_group_list); - cluster->points_to_bitmap = false; - - if (bitmap) - goto out; node = rb_first(&cluster->root); while (node) { + bool bitmap; + entry = rb_entry(node, struct btrfs_free_space, offset_index); node = rb_next(&entry->offset_index); rb_erase(&entry->offset_index, &cluster->root); - BUG_ON(entry->bitmap); - try_merge_free_space(block_group, entry, false); + + bitmap = (entry->bitmap != NULL); + if (!bitmap) + try_merge_free_space(block_group, entry, false); tree_insert_offset(&block_group->free_space_offset, - entry->offset, &entry->offset_index, 0); + entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; @@ -1790,50 +1788,24 @@ int btrfs_return_cluster_to_free_space( static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster, + struct btrfs_free_space *entry, u64 bytes, u64 min_start) { - struct btrfs_free_space *entry; int err; u64 search_start = cluster->window_start; u64 search_bytes = bytes; u64 ret = 0; - spin_lock(&block_group->tree_lock); - spin_lock(&cluster->lock); - - if (!cluster->points_to_bitmap) - goto out; - - if (cluster->block_group != block_group) - goto out; - - /* - * search_start is the beginning of the bitmap, but at some point it may - * be a good idea to point to the actual start of the free area in the - * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only - * to 1 to make sure we get the bitmap entry - */ - entry = tree_search_offset(block_group, - offset_to_bitmap(block_group, search_start), - 1, 0); - if (!entry || !entry->bitmap) - goto out; - search_start = min_start; search_bytes = bytes; err = search_bitmap(block_group, entry, &search_start, &search_bytes); if (err) - goto out; + return 0; ret = search_start; bitmap_clear_bits(block_group, entry, ret, bytes); - if (entry->bytes == 0) - free_bitmap(block_group, entry); -out: - spin_unlock(&cluster->lock); - spin_unlock(&block_group->tree_lock); return ret; } @@ -1851,10 +1823,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, struct rb_node *node; u64 ret = 0; - if (cluster->points_to_bitmap) - return btrfs_alloc_from_bitmap(block_group, cluster, bytes, - min_start); - spin_lock(&cluster->lock); if (bytes > cluster->max_size) goto out; @@ -1867,9 +1835,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, goto out; entry = rb_entry(node, struct btrfs_free_space, offset_index); - while(1) { - if (entry->bytes < bytes || entry->offset < min_start) { + if (entry->bytes < bytes || + (!entry->bitmap && entry->offset < min_start)) { struct rb_node *node; node = rb_next(&entry->offset_index); @@ -1879,10 +1847,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, offset_index); continue; } - ret = entry->offset; - entry->offset += bytes; - entry->bytes -= bytes; + if (entry->bitmap) { + ret = btrfs_alloc_from_bitmap(block_group, + cluster, entry, bytes, + min_start); + if (ret == 0) { + struct rb_node *node; + node = rb_next(&entry->offset_index); + if (!node) + break; + entry = rb_entry(node, struct btrfs_free_space, + offset_index); + continue; + } + } else { + + ret = entry->offset; + + entry->offset += bytes; + entry->bytes -= bytes; + } if (entry->bytes == 0) rb_erase(&entry->offset_index, &cluster->root); @@ -1899,6 +1884,11 @@ out: block_group->free_space -= bytes; if (entry->bytes == 0) { block_group->free_extents--; + if (entry->bitmap) { + kfree(entry->bitmap); + block_group->total_bitmaps--; + recalculate_thresholds(block_group); + } kmem_cache_free(btrfs_free_space_cachep, entry); } @@ -1919,6 +1909,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, unsigned long found_bits; unsigned long start = 0; unsigned long total_found = 0; + int ret; bool found = false; i = offset_to_bit(entry->offset, block_group->sectorsize, @@ -1941,7 +1932,7 @@ again: } if (!found_bits) - return -1; + return -ENOSPC; if (!found) { start = i; @@ -1965,11 +1956,144 @@ again: cluster->window_start = start * block_group->sectorsize + entry->offset; - cluster->points_to_bitmap = true; + rb_erase(&entry->offset_index, &block_group->free_space_offset); + ret = tree_insert_offset(&cluster->root, entry->offset, + &entry->offset_index, 1); + BUG_ON(ret); return 0; } +/* + * This searches the block group for just extents to fill the cluster with. + */ +static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + u64 offset, u64 bytes, u64 min_bytes) +{ + struct btrfs_free_space *first = NULL; + struct btrfs_free_space *entry = NULL; + struct btrfs_free_space *prev = NULL; + struct btrfs_free_space *last; + struct rb_node *node; + u64 window_start; + u64 window_free; + u64 max_extent; + u64 max_gap = 128 * 1024; + + entry = tree_search_offset(block_group, offset, 0, 1); + if (!entry) + return -ENOSPC; + + /* + * We don't want bitmaps, so just move along until we find a normal + * extent entry. + */ + while (entry->bitmap) { + node = rb_next(&entry->offset_index); + if (!node) + return -ENOSPC; + entry = rb_entry(node, struct btrfs_free_space, offset_index); + } + + window_start = entry->offset; + window_free = entry->bytes; + max_extent = entry->bytes; + first = entry; + last = entry; + prev = entry; + + while (window_free <= min_bytes) { + node = rb_next(&entry->offset_index); + if (!node) + return -ENOSPC; + entry = rb_entry(node, struct btrfs_free_space, offset_index); + + if (entry->bitmap) + continue; + /* + * we haven't filled the empty size and the window is + * very large. reset and try again + */ + if (entry->offset - (prev->offset + prev->bytes) > max_gap || + entry->offset - window_start > (min_bytes * 2)) { + first = entry; + window_start = entry->offset; + window_free = entry->bytes; + last = entry; + max_extent = entry->bytes; + } else { + last = entry; + window_free += entry->bytes; + if (entry->bytes > max_extent) + max_extent = entry->bytes; + } + prev = entry; + } + + cluster->window_start = first->offset; + + node = &first->offset_index; + + /* + * now we've found our entries, pull them out of the free space + * cache and put them into the cluster rbtree + */ + do { + int ret; + + entry = rb_entry(node, struct btrfs_free_space, offset_index); + node = rb_next(&entry->offset_index); + if (entry->bitmap) + continue; + + rb_erase(&entry->offset_index, &block_group->free_space_offset); + ret = tree_insert_offset(&cluster->root, entry->offset, + &entry->offset_index, 0); + BUG_ON(ret); + } while (node && entry != last); + + cluster->max_size = max_extent; + + return 0; +} + +/* + * This specifically looks for bitmaps that may work in the cluster, we assume + * that we have already failed to find extents that will work. + */ +static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, + struct btrfs_free_cluster *cluster, + u64 offset, u64 bytes, u64 min_bytes) +{ + struct btrfs_free_space *entry; + struct rb_node *node; + int ret = -ENOSPC; + + if (block_group->total_bitmaps == 0) + return -ENOSPC; + + entry = tree_search_offset(block_group, + offset_to_bitmap(block_group, offset), + 0, 1); + if (!entry) + return -ENOSPC; + + node = &entry->offset_index; + do { + entry = rb_entry(node, struct btrfs_free_space, offset_index); + node = rb_next(&entry->offset_index); + if (!entry->bitmap) + continue; + if (entry->bytes < min_bytes) + continue; + ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, + bytes, min_bytes); + } while (ret && node); + + return ret; +} + /* * here we try to find a cluster of blocks in a block group. The goal * is to find at least bytes free and up to empty_size + bytes free. @@ -1984,15 +2108,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, struct btrfs_free_cluster *cluster, u64 offset, u64 bytes, u64 empty_size) { - struct btrfs_free_space *entry = NULL; - struct rb_node *node; - struct btrfs_free_space *next; - struct btrfs_free_space *last = NULL; u64 min_bytes; - u64 window_start; - u64 window_free; - u64 max_extent = 0; - bool found_bitmap = false; int ret; /* for metadata, allow allocates with more holes */ @@ -2029,134 +2145,19 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ret = 0; goto out; } -again: - entry = tree_search_offset(block_group, offset, found_bitmap, 1); - if (!entry) { - ret = -ENOSPC; - goto out; - } - - /* - * If found_bitmap is true, we exhausted our search for extent entries, - * and we just want to search all of the bitmaps that we can find, and - * ignore any extent entries we find. - */ - while (entry->bitmap || found_bitmap || - (!entry->bitmap && entry->bytes < min_bytes)) { - struct rb_node *node = rb_next(&entry->offset_index); - - if (entry->bitmap && entry->bytes > bytes + empty_size) { - ret = btrfs_bitmap_cluster(block_group, entry, cluster, - offset, bytes, min_bytes); - if (!ret) - goto got_it; - } - - if (!node) { - ret = -ENOSPC; - goto out; - } - entry = rb_entry(node, struct btrfs_free_space, offset_index); - } - - /* - * We already searched all the extent entries from the passed in offset - * to the end and didn't find enough space for the cluster, and we also - * didn't find any bitmaps that met our criteria, just go ahead and exit - */ - if (found_bitmap) { - ret = -ENOSPC; - goto out; - } - - cluster->points_to_bitmap = false; - window_start = entry->offset; - window_free = entry->bytes; - last = entry; - max_extent = entry->bytes; - - while (1) { - /* out window is just right, lets fill it */ - if (window_free >= min_bytes) - break; - - node = rb_next(&last->offset_index); - if (!node) { - if (found_bitmap) - goto again; - ret = -ENOSPC; - goto out; - } - next = rb_entry(node, struct btrfs_free_space, offset_index); - - /* - * we found a bitmap, so if this search doesn't result in a - * cluster, we know to go and search again for the bitmaps and - * start looking for space there - */ - if (next->bitmap) { - if (!found_bitmap) - offset = next->offset; - found_bitmap = true; - last = next; - continue; - } - - /* - * we haven't filled the empty size and the window is - * very large. reset and try again - */ - if (next->offset - (last->offset + last->bytes) > 128 * 1024 || - next->offset - window_start > (bytes + empty_size) * 2) { - entry = next; - window_start = entry->offset; - window_free = entry->bytes; - last = entry; - max_extent = entry->bytes; - } else { - last = next; - window_free += next->bytes; - if (entry->bytes > max_extent) - max_extent = entry->bytes; - } - } - - cluster->window_start = entry->offset; - - /* - * now we've found our entries, pull them out of the free space - * cache and put them into the cluster rbtree - * - * The cluster includes an rbtree, but only uses the offset index - * of each free space cache entry. - */ - while (1) { - node = rb_next(&entry->offset_index); - if (entry->bitmap && node) { - entry = rb_entry(node, struct btrfs_free_space, - offset_index); - continue; - } else if (entry->bitmap && !node) { - break; - } - - rb_erase(&entry->offset_index, &block_group->free_space_offset); - ret = tree_insert_offset(&cluster->root, entry->offset, - &entry->offset_index, 0); - BUG_ON(ret); - if (!node || entry == last) - break; + ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, + min_bytes); + if (ret) + ret = setup_cluster_bitmap(block_group, cluster, offset, + bytes, min_bytes); - entry = rb_entry(node, struct btrfs_free_space, offset_index); + if (!ret) { + atomic_inc(&block_group->count); + list_add_tail(&cluster->block_group_list, + &block_group->cluster_list); + cluster->block_group = block_group; } - - cluster->max_size = max_extent; -got_it: - ret = 0; - atomic_inc(&block_group->count); - list_add_tail(&cluster->block_group_list, &block_group->cluster_list); - cluster->block_group = block_group; out: spin_unlock(&cluster->lock); spin_unlock(&block_group->tree_lock); @@ -2173,7 +2174,6 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) spin_lock_init(&cluster->refill_lock); cluster->root = RB_ROOT; cluster->max_size = 0; - cluster->points_to_bitmap = false; INIT_LIST_HEAD(&cluster->block_group_list); cluster->block_group = NULL; } -- cgit v1.2.3 From f7039b1d5c32241f87a513e33120db36bf30264d Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Thu, 24 Mar 2011 10:24:28 +0000 Subject: Btrfs: add btrfs_trim_fs() to handle FITRIM We take an free extent out from allocator, trim it, then put it back, but before we trim the block group, we should make sure the block group is cached, so plus a little change to make cache_block_group() run without a transaction. Signed-off-by: Li Dongyang Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 50 +++++++++++++++++++++++- fs/btrfs/free-space-cache.c | 92 +++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/free-space-cache.h | 2 + fs/btrfs/ioctl.c | 46 +++++++++++++++++++++++ 5 files changed, 190 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a18b7bc2b22c..93a0191aded6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2232,6 +2232,7 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 *actual_bytes); int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 type); +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e990d2d1ba4a..1efeda3b2f6f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -440,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (!trans->transaction->in_commit && + if (trans && (!trans->transaction->in_commit) && (root && root != root->fs_info->tree_root)) { spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { @@ -8778,3 +8778,51 @@ int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, { return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); } + +int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_block_group_cache *cache = NULL; + u64 group_trimmed; + u64 start; + u64 end; + u64 trimmed = 0; + int ret = 0; + + cache = btrfs_lookup_block_group(fs_info, range->start); + + while (cache) { + if (cache->key.objectid >= (range->start + range->len)) { + btrfs_put_block_group(cache); + break; + } + + start = max(range->start, cache->key.objectid); + end = min(range->start + range->len, + cache->key.objectid + cache->key.offset); + + if (end - start >= range->minlen) { + if (!block_group_cache_done(cache)) { + ret = cache_block_group(cache, NULL, root, 0); + if (!ret) + wait_block_group_cache_done(cache); + } + ret = btrfs_trim_block_group(cache, + &group_trimmed, + start, + end, + range->minlen); + + trimmed += group_trimmed; + if (ret) { + btrfs_put_block_group(cache); + break; + } + } + + cache = next_block_group(fs_info->tree_root, cache); + } + + range->len = trimmed; + return ret; +} diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f03ef97c3b21..0037427d8a9d 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2178,3 +2178,95 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) cluster->block_group = NULL; } +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen) +{ + struct btrfs_free_space *entry = NULL; + struct btrfs_fs_info *fs_info = block_group->fs_info; + u64 bytes = 0; + u64 actually_trimmed; + int ret = 0; + + *trimmed = 0; + + while (start < end) { + spin_lock(&block_group->tree_lock); + + if (block_group->free_space < minlen) { + spin_unlock(&block_group->tree_lock); + break; + } + + entry = tree_search_offset(block_group, start, 0, 1); + if (!entry) + entry = tree_search_offset(block_group, + offset_to_bitmap(block_group, + start), + 1, 1); + + if (!entry || entry->offset >= end) { + spin_unlock(&block_group->tree_lock); + break; + } + + if (entry->bitmap) { + ret = search_bitmap(block_group, entry, &start, &bytes); + if (!ret) { + if (start >= end) { + spin_unlock(&block_group->tree_lock); + break; + } + bytes = min(bytes, end - start); + bitmap_clear_bits(block_group, entry, + start, bytes); + if (entry->bytes == 0) + free_bitmap(block_group, entry); + } else { + start = entry->offset + BITS_PER_BITMAP * + block_group->sectorsize; + spin_unlock(&block_group->tree_lock); + ret = 0; + continue; + } + } else { + start = entry->offset; + bytes = min(entry->bytes, end - start); + unlink_free_space(block_group, entry); + kfree(entry); + } + + spin_unlock(&block_group->tree_lock); + + if (bytes >= minlen) { + int update_ret; + update_ret = btrfs_update_reserved_bytes(block_group, + bytes, 1, 1); + + ret = btrfs_error_discard_extent(fs_info->extent_root, + start, + bytes, + &actually_trimmed); + + btrfs_add_free_space(block_group, + start, bytes); + if (!update_ret) + btrfs_update_reserved_bytes(block_group, + bytes, 0, 1); + + if (ret) + break; + *trimmed += actually_trimmed; + } + start += bytes; + bytes = 0; + + if (fatal_signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + + cond_resched(); + } + + return ret; +} diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index e49ca5c321b5..65c3b935289f 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, int btrfs_return_cluster_to_free_space( struct btrfs_block_group_cache *block_group, struct btrfs_free_cluster *cluster); +int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen); #endif diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 32c980ae0f1c..649f47d2afb4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -258,6 +259,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg) return put_user(inode->i_generation, arg); } +static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) +{ + struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info; + struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_device *device; + struct request_queue *q; + struct fstrim_range range; + u64 minlen = ULLONG_MAX; + u64 num_devices = 0; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&fs_info->fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { + if (!device->bdev) + continue; + q = bdev_get_queue(device->bdev); + if (blk_queue_discard(q)) { + num_devices++; + minlen = min((u64)q->limits.discard_granularity, + minlen); + } + } + mutex_unlock(&fs_info->fs_devices->device_list_mutex); + if (!num_devices) + return -EOPNOTSUPP; + + if (copy_from_user(&range, arg, sizeof(range))) + return -EFAULT; + + range.minlen = max(range.minlen, minlen); + ret = btrfs_trim_fs(root, &range); + if (ret < 0) + return ret; + + if (copy_to_user(arg, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + static noinline int create_subvol(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen, @@ -2426,6 +2470,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_setflags(file, argp); case FS_IOC_GETVERSION: return btrfs_ioctl_getversion(file, argp); + case FITRIM: + return btrfs_ioctl_fitrim(file, argp); case BTRFS_IOC_SNAP_CREATE: return btrfs_ioctl_snap_create(file, argp, 0); case BTRFS_IOC_SNAP_CREATE_V2: -- cgit v1.2.3