diff options
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r-- | fs/btrfs/ordered-data.c | 253 |
1 files changed, 195 insertions, 58 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6c413bb451a3..6eb41b7c0c84 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -16,6 +16,7 @@ #include "compression.h" #include "delalloc-space.h" #include "qgroup.h" +#include "subpage.h" static struct kmem_cache *btrfs_ordered_extent_cache; @@ -300,81 +301,142 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, } /* - * Finish IO for one ordered extent across a given range. The range can - * contain several ordered extents. + * Mark all ordered extents io inside the specified range finished. * - * @found_ret: Return the finished ordered extent - * @file_offset: File offset for the finished IO - * Will also be updated to one byte past the range that is - * recordered as finished. This allows caller to walk forward. - * @io_size: Length of the finish IO range - * @uptodate: If the IO finished without problem - * - * Return true if any ordered extent is finished in the range, and update - * @found_ret and @file_offset. - * Return false otherwise. + * @page: The invovled page for the opeartion. + * For uncompressed buffered IO, the page status also needs to be + * updated to indicate whether the pending ordered io is finished. + * Can be NULL for direct IO and compressed write. + * For these cases, callers are ensured they won't execute the + * endio function twice. + * @finish_func: The function to be executed when all the IO of an ordered + * extent are finished. * - * NOTE: Although The range can cross multiple ordered extents, only one - * ordered extent will be updated during one call. The caller is responsible to - * iterate all ordered extents in the range. + * This function is called for endio, thus the range must have ordered + * extent(s) coveri it. */ -bool btrfs_dec_test_first_ordered_pending(struct btrfs_inode *inode, - struct btrfs_ordered_extent **finished_ret, - u64 *file_offset, u64 io_size, int uptodate) +void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode, + struct page *page, u64 file_offset, + u64 num_bytes, btrfs_func_t finish_func, + bool uptodate) { - struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; + struct btrfs_fs_info *fs_info = inode->root->fs_info; + struct btrfs_workqueue *wq; struct rb_node *node; struct btrfs_ordered_extent *entry = NULL; - bool finished = false; unsigned long flags; - u64 dec_end; - u64 dec_start; - u64 to_dec; + u64 cur = file_offset; + + if (btrfs_is_free_space_inode(inode)) + wq = fs_info->endio_freespace_worker; + else + wq = fs_info->endio_write_workers; + + if (page) + ASSERT(page->mapping && page_offset(page) <= file_offset && + file_offset + num_bytes <= page_offset(page) + PAGE_SIZE); spin_lock_irqsave(&tree->lock, flags); - node = tree_search(tree, *file_offset); - if (!node) - goto out; + while (cur < file_offset + num_bytes) { + u64 entry_end; + u64 end; + u32 len; - entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); - if (!in_range(*file_offset, entry->file_offset, entry->num_bytes)) - goto out; + node = tree_search(tree, cur); + /* No ordered extents at all */ + if (!node) + break; - dec_start = max(*file_offset, entry->file_offset); - dec_end = min(*file_offset + io_size, - entry->file_offset + entry->num_bytes); - *file_offset = dec_end; - if (dec_start > dec_end) { - btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu", - dec_start, dec_end); - } - to_dec = dec_end - dec_start; - if (to_dec > entry->bytes_left) { - btrfs_crit(fs_info, - "bad ordered accounting left %llu size %llu", - entry->bytes_left, to_dec); - } - entry->bytes_left -= to_dec; - if (!uptodate) - set_bit(BTRFS_ORDERED_IOERR, &entry->flags); + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + entry_end = entry->file_offset + entry->num_bytes; + /* + * |<-- OE --->| | + * cur + * Go to next OE. + */ + if (cur >= entry_end) { + node = rb_next(node); + /* No more ordered extents, exit */ + if (!node) + break; + entry = rb_entry(node, struct btrfs_ordered_extent, + rb_node); + + /* Go to next ordered extent and continue */ + cur = entry->file_offset; + continue; + } + /* + * | |<--- OE --->| + * cur + * Go to the start of OE. + */ + if (cur < entry->file_offset) { + cur = entry->file_offset; + continue; + } - if (entry->bytes_left == 0) { /* - * Ensure only one caller can set the flag and finished_ret - * accordingly + * Now we are definitely inside one ordered extent. + * + * |<--- OE --->| + * | + * cur */ - finished = !test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); - /* test_and_set_bit implies a barrier */ - cond_wake_up_nomb(&entry->wait); - } -out: - if (finished && finished_ret && entry) { - *finished_ret = entry; - refcount_inc(&entry->refs); + end = min(entry->file_offset + entry->num_bytes, + file_offset + num_bytes) - 1; + ASSERT(end + 1 - cur < U32_MAX); + len = end + 1 - cur; + + if (page) { + /* + * Ordered (Private2) bit indicates whether we still + * have pending io unfinished for the ordered extent. + * + * If there's no such bit, we need to skip to next range. + */ + if (!btrfs_page_test_ordered(fs_info, page, cur, len)) { + cur += len; + continue; + } + btrfs_page_clear_ordered(fs_info, page, cur, len); + } + + /* Now we're fine to update the accounting */ + if (unlikely(len > entry->bytes_left)) { + WARN_ON(1); + btrfs_crit(fs_info, +"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu", + inode->root->root_key.objectid, + btrfs_ino(inode), + entry->file_offset, + entry->num_bytes, + len, entry->bytes_left); + entry->bytes_left = 0; + } else { + entry->bytes_left -= len; + } + + if (!uptodate) + set_bit(BTRFS_ORDERED_IOERR, &entry->flags); + + /* + * All the IO of the ordered extent is finished, we need to queue + * the finish_func to be executed. + */ + if (entry->bytes_left == 0) { + set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); + cond_wake_up(&entry->wait); + refcount_inc(&entry->refs); + spin_unlock_irqrestore(&tree->lock, flags); + btrfs_init_work(&entry->work, finish_func, NULL, NULL); + btrfs_queue_work(wq, &entry->work); + spin_lock_irqsave(&tree->lock, flags); + } + cur += len; } spin_unlock_irqrestore(&tree->lock, flags); - return finished; } /* @@ -870,6 +932,81 @@ out: } /* + * Lookup the first ordered extent that overlaps the range + * [@file_offset, @file_offset + @len). + * + * The difference between this and btrfs_lookup_first_ordered_extent() is + * that this one won't return any ordered extent that does not overlap the range. + * And the difference against btrfs_lookup_ordered_extent() is, this function + * ensures the first ordered extent gets returned. + */ +struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range( + struct btrfs_inode *inode, u64 file_offset, u64 len) +{ + struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree; + struct rb_node *node; + struct rb_node *cur; + struct rb_node *prev; + struct rb_node *next; + struct btrfs_ordered_extent *entry = NULL; + + spin_lock_irq(&tree->lock); + node = tree->tree.rb_node; + /* + * Here we don't want to use tree_search() which will use tree->last + * and screw up the search order. + * And __tree_search() can't return the adjacent ordered extents + * either, thus here we do our own search. + */ + while (node) { + entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); + + if (file_offset < entry->file_offset) { + node = node->rb_left; + } else if (file_offset >= entry_end(entry)) { + node = node->rb_right; + } else { + /* + * Direct hit, got an ordered extent that starts at + * @file_offset + */ + goto out; + } + } + if (!entry) { + /* Empty tree */ + goto out; + } + + cur = &entry->rb_node; + /* We got an entry around @file_offset, check adjacent entries */ + if (entry->file_offset < file_offset) { + prev = cur; + next = rb_next(cur); + } else { + prev = rb_prev(cur); + next = cur; + } + if (prev) { + entry = rb_entry(prev, struct btrfs_ordered_extent, rb_node); + if (range_overlaps(entry, file_offset, len)) + goto out; + } + if (next) { + entry = rb_entry(next, struct btrfs_ordered_extent, rb_node); + if (range_overlaps(entry, file_offset, len)) + goto out; + } + /* No ordered extent in the range */ + entry = NULL; +out: + if (entry) + refcount_inc(&entry->refs); + spin_unlock_irq(&tree->lock); + return entry; +} + +/* * btrfs_flush_ordered_range - Lock the passed range and ensures all pending * ordered extents in it are run to completion. * |