summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2020-01-17 17:02:22 +0300
committerDavid Sterba <dsterba@suse.com>2020-03-23 19:01:24 +0300
commit9ddc959e802bf7555a0be543205ddcba2bae98bf (patch)
treea9cb223bb5f9665fa51bbbf414e4da4158d0c078 /fs/btrfs/inode.c
parent41a2ee75aab0290a5899677437736ec715dcd1b6 (diff)
downloadlinux-9ddc959e802bf7555a0be543205ddcba2bae98bf.tar.xz
btrfs: use the file extent tree infrastructure
We want to use this everywhere we modify the file extent items permanently. These include: 1) Inserting new file extents for writes and prealloc extents. 2) Truncating inode items. 3) btrfs_cont_expand(). 4) Insert inline extents. 5) Insert new extents from log replay. 6) Insert a new extent for clone, as it could be past i_size. 7) Hole punching For hole punching in particular it might seem it's not necessary because anybody extending would use btrfs_cont_expand, however there is a corner that still can give us trouble. Start with an empty file and fallocate KEEP_SIZE 1M-2M We now have a 0 length file, and a hole file extent from 0-1M, and a prealloc extent from 1M-2M. Now punch 1M-1.5M Because this is past i_size we have [HOLE EXTENT][ NOTHING ][PREALLOC] [0 1M][1M 1.5M][1.5M 2M] with an i_size of 0. Now if we pwrite 0-1.5M we'll increas our i_size to 1.5M, but our disk_i_size is still 0 until the ordered extent completes. However if we now immediately truncate 2M on the file we'll just call btrfs_cont_expand(inode, 1.5M, 2M), since our old i_size is 1.5M. If we commit the transaction here and crash we'll expose the gap. To fix this we need to clear the file extent mapping for the range that we punched but didn't insert a corresponding file extent for. This will mean the truncate will only get an disk_i_size set to 1M if we crash before the finish ordered io happens. I've written an xfstest to reproduce the problem and validate this fix. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c55
1 files changed, 54 insertions, 1 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c481450dc76e..2865de52dc22 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -242,6 +242,15 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
/*
+ * We align size to sectorsize for inline extents just for simplicity
+ * sake.
+ */
+ size = ALIGN(size, root->fs_info->sectorsize);
+ ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
+ if (ret)
+ goto fail;
+
+ /*
* we're an inline extent, so nobody can
* extend the file past i_size without locking
* a page we already have locked.
@@ -2446,6 +2455,11 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;
+ ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos,
+ ram_bytes);
+ if (ret)
+ goto out;
+
/*
* Release the reserved range from inode dirty range map, as it is
* already moved into delayed_ref_head
@@ -4160,6 +4174,8 @@ search_again:
}
while (1) {
+ u64 clear_start = 0, clear_len = 0;
+
fi = NULL;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
@@ -4210,6 +4226,8 @@ search_again:
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
u64 num_dec;
+
+ clear_start = found_key.offset;
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
if (!del_item) {
u64 orig_num_bytes =
@@ -4217,6 +4235,7 @@ search_again:
extent_num_bytes = ALIGN(new_size -
found_key.offset,
fs_info->sectorsize);
+ clear_start = ALIGN(new_size, fs_info->sectorsize);
btrfs_set_file_extent_num_bytes(leaf, fi,
extent_num_bytes);
num_dec = (orig_num_bytes -
@@ -4242,6 +4261,7 @@ search_again:
inode_sub_bytes(inode, num_dec);
}
}
+ clear_len = num_dec;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
/*
* we can't truncate inline items that have had
@@ -4263,12 +4283,33 @@ search_again:
*/
ret = NEED_TRUNCATE_BLOCK;
break;
+ } else {
+ /*
+ * Inline extents are special, we just treat
+ * them as a full sector worth in the file
+ * extent tree just for simplicity sake.
+ */
+ clear_len = fs_info->sectorsize;
}
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
inode_sub_bytes(inode, item_end + 1 - new_size);
}
delete:
+ /*
+ * We use btrfs_truncate_inode_items() to clean up log trees for
+ * multiple fsyncs, and in this case we don't want to clear the
+ * file extent range because it's just the log.
+ */
+ if (root == BTRFS_I(inode)->root) {
+ ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
+ clear_start, clear_len);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ break;
+ }
+ }
+
if (del_item)
last_size = found_key.offset;
else
@@ -4591,14 +4632,21 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
}
last_byte = min(extent_map_end(em), block_end);
last_byte = ALIGN(last_byte, fs_info->sectorsize);
+ hole_size = last_byte - cur_offset;
+
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
struct extent_map *hole_em;
- hole_size = last_byte - cur_offset;
err = maybe_insert_hole(root, inode, cur_offset,
hole_size);
if (err)
break;
+
+ err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+ cur_offset, hole_size);
+ if (err)
+ break;
+
btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
@@ -4630,6 +4678,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_size - 1, 0);
}
free_extent_map(hole_em);
+ } else {
+ err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
+ cur_offset, hole_size);
+ if (err)
+ break;
}
next:
free_extent_map(em);