summaryrefslogtreecommitdiff
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c224
1 files changed, 164 insertions, 60 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 79d366a36223..985a21558437 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -199,14 +199,21 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
- if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
- set_bit(type, &entry->flags);
+ entry->physical = (u64)-1;
+ entry->disk = NULL;
+ entry->partno = (u8)-1;
- if (dio) {
- percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes,
- fs_info->delalloc_batch);
+ ASSERT(type == BTRFS_ORDERED_REGULAR ||
+ type == BTRFS_ORDERED_NOCOW ||
+ type == BTRFS_ORDERED_PREALLOC ||
+ type == BTRFS_ORDERED_COMPRESSED);
+ set_bit(type, &entry->flags);
+
+ percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
+ fs_info->delalloc_batch);
+
+ if (dio)
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
- }
/* one ref for the tree */
refcount_set(&entry->refs, 1);
@@ -256,6 +263,9 @@ int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type)
{
+ ASSERT(type == BTRFS_ORDERED_REGULAR ||
+ type == BTRFS_ORDERED_NOCOW ||
+ type == BTRFS_ORDERED_PREALLOC);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes, type, 0,
BTRFS_COMPRESS_NONE);
@@ -265,6 +275,9 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type)
{
+ ASSERT(type == BTRFS_ORDERED_REGULAR ||
+ type == BTRFS_ORDERED_NOCOW ||
+ type == BTRFS_ORDERED_PREALLOC);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes, type, 1,
BTRFS_COMPRESS_NONE);
@@ -272,11 +285,12 @@ int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
- u64 disk_num_bytes, int type,
- int compress_type)
+ u64 disk_num_bytes, int compress_type)
{
+ ASSERT(compress_type != BTRFS_COMPRESS_NONE);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
- num_bytes, disk_num_bytes, type, 0,
+ num_bytes, disk_num_bytes,
+ BTRFS_ORDERED_COMPRESSED, 0,
compress_type);
}
@@ -297,26 +311,33 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
}
/*
- * this is used to account for finished IO across a given range
- * of the file. The IO may span ordered extents. If
- * a given ordered_extent is completely done, 1 is returned, otherwise
- * 0.
+ * Finish IO for one ordered extent across a given range. The range can
+ * contain several ordered extents.
+ *
+ * @found_ret: Return the finished ordered extent
+ * @file_offset: File offset for the finished IO
+ * Will also be updated to one byte past the range that is
+ * recordered as finished. This allows caller to walk forward.
+ * @io_size: Length of the finish IO range
+ * @uptodate: If the IO finished without problem
*
- * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
- * to make sure this function only returns 1 once for a given ordered extent.
+ * Return true if any ordered extent is finished in the range, and update
+ * @found_ret and @file_offset.
+ * Return false otherwise.
*
- * file_offset is updated to one byte past the range that is recorded as
- * complete. This allows you to walk forward in the file.
+ * NOTE: Although The range can cross multiple ordered extents, only one
+ * ordered extent will be updated during one call. The caller is responsible to
+ * iterate all ordered extents in the range.
*/
-int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
- struct btrfs_ordered_extent **cached,
+bool btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
+ struct btrfs_ordered_extent **finished_ret,
u64 *file_offset, u64 io_size, int uptodate)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
- int ret;
+ bool finished = false;
unsigned long flags;
u64 dec_end;
u64 dec_start;
@@ -324,16 +345,12 @@ int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
spin_lock_irqsave(&tree->lock, flags);
node = tree_search(tree, *file_offset);
- if (!node) {
- ret = 1;
+ if (!node)
goto out;
- }
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (!offset_in_entry(entry, *file_offset)) {
- ret = 1;
+ if (!offset_in_entry(entry, *file_offset))
goto out;
- }
dec_start = max(*file_offset, entry->file_offset);
dec_end = min(*file_offset + io_size,
@@ -354,39 +371,50 @@ int btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode,
set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0) {
- ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+ /*
+ * Ensure only one caller can set the flag and finished_ret
+ * accordingly
+ */
+ finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
/* test_and_set_bit implies a barrier */
cond_wake_up_nomb(&entry->wait);
- } else {
- ret = 1;
}
out:
- if (!ret && cached && entry) {
- *cached = entry;
+ if (finished && finished_ret && entry) {
+ *finished_ret = entry;
refcount_inc(&entry->refs);
}
spin_unlock_irqrestore(&tree->lock, flags);
- return ret == 0;
+ return finished;
}
/*
- * this is used to account for finished IO across a given range
- * of the file. The IO should not span ordered extents. If
- * a given ordered_extent is completely done, 1 is returned, otherwise
- * 0.
+ * Finish IO for one ordered extent across a given range. The range can only
+ * contain one ordered extent.
+ *
+ * @cached: The cached ordered extent. If not NULL, we can skip the tree
+ * search and use the ordered extent directly.
+ * Will be also used to store the finished ordered extent.
+ * @file_offset: File offset for the finished IO
+ * @io_size: Length of the finish IO range
+ * @uptodate: If the IO finishes without problem
*
- * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
- * to make sure this function only returns 1 once for a given ordered extent.
+ * Return true if the ordered extent is finished in the range, and update
+ * @cached.
+ * Return false otherwise.
+ *
+ * NOTE: The range can NOT cross multiple ordered extents.
+ * Thus caller should ensure the range doesn't cross ordered extents.
*/
-int btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
- struct btrfs_ordered_extent **cached,
- u64 file_offset, u64 io_size, int uptodate)
+bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
+ struct btrfs_ordered_extent **cached,
+ u64 file_offset, u64 io_size, int uptodate)
{
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
unsigned long flags;
- int ret;
+ bool finished = false;
spin_lock_irqsave(&tree->lock, flags);
if (cached && *cached) {
@@ -395,41 +423,39 @@ int btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
}
node = tree_search(tree, file_offset);
- if (!node) {
- ret = 1;
+ if (!node)
goto out;
- }
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
have_entry:
- if (!offset_in_entry(entry, file_offset)) {
- ret = 1;
+ if (!offset_in_entry(entry, file_offset))
goto out;
- }
- if (io_size > entry->bytes_left) {
+ if (io_size > entry->bytes_left)
btrfs_crit(inode->root->fs_info,
"bad ordered accounting left %llu size %llu",
entry->bytes_left, io_size);
- }
+
entry->bytes_left -= io_size;
if (!uptodate)
set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
if (entry->bytes_left == 0) {
- ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+ /*
+ * Ensure only one caller can set the flag and finished_ret
+ * accordingly
+ */
+ finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
/* test_and_set_bit implies a barrier */
cond_wake_up_nomb(&entry->wait);
- } else {
- ret = 1;
}
out:
- if (!ret && cached && entry) {
+ if (finished && cached && entry) {
*cached = entry;
refcount_inc(&entry->refs);
}
spin_unlock_irqrestore(&tree->lock, flags);
- return ret == 0;
+ return finished;
}
/*
@@ -480,9 +506,8 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
false);
- if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
- percpu_counter_add_batch(&fs_info->dio_bytes, -entry->num_bytes,
- fs_info->delalloc_batch);
+ percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
+ fs_info->delalloc_batch);
tree = &btrfs_inode->ordered_tree;
spin_lock_irq(&tree->lock);
@@ -745,9 +770,10 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *ino
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
+ unsigned long flags;
tree = &inode->ordered_tree;
- spin_lock_irq(&tree->lock);
+ spin_lock_irqsave(&tree->lock, flags);
node = tree_search(tree, file_offset);
if (!node)
goto out;
@@ -758,7 +784,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *ino
if (entry)
refcount_inc(&entry->refs);
out:
- spin_unlock_irq(&tree->lock);
+ spin_unlock_irqrestore(&tree->lock, flags);
return entry;
}
@@ -898,6 +924,84 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
}
}
+static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
+ u64 len)
+{
+ struct inode *inode = ordered->inode;
+ u64 file_offset = ordered->file_offset + pos;
+ u64 disk_bytenr = ordered->disk_bytenr + pos;
+ u64 num_bytes = len;
+ u64 disk_num_bytes = len;
+ int type;
+ unsigned long flags_masked = ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT);
+ int compress_type = ordered->compress_type;
+ unsigned long weight;
+ int ret;
+
+ weight = hweight_long(flags_masked);
+ WARN_ON_ONCE(weight > 1);
+ if (!weight)
+ type = 0;
+ else
+ type = __ffs(flags_masked);
+
+ if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
+ WARN_ON_ONCE(1);
+ ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
+ file_offset, disk_bytenr, num_bytes,
+ disk_num_bytes, compress_type);
+ } else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
+ ret = btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset,
+ disk_bytenr, num_bytes, disk_num_bytes, type);
+ } else {
+ ret = btrfs_add_ordered_extent(BTRFS_I(inode), file_offset,
+ disk_bytenr, num_bytes, disk_num_bytes, type);
+ }
+
+ return ret;
+}
+
+int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
+ u64 post)
+{
+ struct inode *inode = ordered->inode;
+ struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
+ struct rb_node *node;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ int ret = 0;
+
+ spin_lock_irq(&tree->lock);
+ /* Remove from tree once */
+ node = &ordered->rb_node;
+ rb_erase(node, &tree->tree);
+ RB_CLEAR_NODE(node);
+ if (tree->last == node)
+ tree->last = NULL;
+
+ ordered->file_offset += pre;
+ ordered->disk_bytenr += pre;
+ ordered->num_bytes -= (pre + post);
+ ordered->disk_num_bytes -= (pre + post);
+ ordered->bytes_left -= (pre + post);
+
+ /* Re-insert the node */
+ node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);
+ if (node)
+ btrfs_panic(fs_info, -EEXIST,
+ "zoned: inconsistency in ordered tree at offset %llu",
+ ordered->file_offset);
+
+ spin_unlock_irq(&tree->lock);
+
+ if (pre)
+ ret = clone_ordered_extent(ordered, 0, pre);
+ if (post)
+ ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,
+ post);
+
+ return ret;
+}
+
int __init ordered_data_init(void)
{
btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",