summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-17 01:12:56 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-17 01:12:56 +0300
commita18f8775419d3df282dd83efdb51c5a64d092f31 (patch)
tree1e0abc5c1d30e8bc58dc23099017eca496992fd2 /fs/btrfs/extent_io.c
parent3eb514866f20c5eb74637279774b6d73b855480a (diff)
parente02d48eaaed77f6c36916a7aa65c451e1f9d9aab (diff)
downloadlinux-a18f8775419d3df282dd83efdb51c5a64d092f31.tar.xz
Merge tag 'for-5.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Highlights: - chunks that have been trimmed and unchanged since last mount are tracked and skipped on repeated trims - use hw assissed crc32c on more arches, speedups if native instructions or optimized implementation is available - the RAID56 incompat bit is automatically removed when the last block group of that type is removed Fixes: - fsync fix for reflink on NODATACOW files that could lead to ENOSPC - fix data loss after inode eviction, renaming it, and fsync it - fix fsync not persisting dentry deletions due to inode evictions - update ctime/mtime/iversion after hole punching - fix compression type validation (reported by KASAN) - send won't be allowed to start when relocation is in progress, this can cause spurious errors or produce incorrect send stream Core: - new tracepoints for space update - tree-checker: better check for end of extents for some tree items - preparatory work for more checksum algorithms - run delayed iput at unlink time and don't push the work to cleaner thread where it's not properly throttled - wrap block mapping to structures and helpers, base for further refactoring - split large files, part 1: - space info handling - block group reservations - delayed refs - delayed allocation - other cleanups and refactoring" * tag 'for-5.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (103 commits) btrfs: fix memory leak of path on error return path btrfs: move the subvolume reservation stuff out of extent-tree.c btrfs: migrate the delalloc space stuff to it's own home btrfs: migrate btrfs_trans_release_chunk_metadata btrfs: migrate the delayed refs rsv code btrfs: Evaluate io_tree in find_lock_delalloc_range() btrfs: migrate the global_block_rsv helpers to block-rsv.c btrfs: migrate the block-rsv code to block-rsv.c btrfs: stop using block_rsv_release_bytes everywhere btrfs: cleanup the target logic in __btrfs_block_rsv_release btrfs: export __btrfs_block_rsv_release btrfs: export btrfs_block_rsv_add_bytes btrfs: move btrfs_block_rsv definitions into it's own header btrfs: Simplify update of space_info in __reserve_metadata_bytes() btrfs: unexport can_overcommit btrfs: move reserve_metadata_bytes and supporting code to space-info.c btrfs: move dump_space_info to space-info.c btrfs: export block_rsv_use_bytes btrfs: move btrfs_space_info_add_*_bytes to space-info.c btrfs: move the space info update macro to space-info.h ...
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c149
1 files changed, 99 insertions, 50 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5106008f5e28..1ff438fd5bc2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -359,6 +359,24 @@ do_insert:
return NULL;
}
+/**
+ * __etree_search - searche @tree for an entry that contains @offset. Such
+ * entry would have entry->start <= offset && entry->end >= offset.
+ *
+ * @tree - the tree to search
+ * @offset - offset that should fall within an entry in @tree
+ * @next_ret - pointer to the first entry whose range ends after @offset
+ * @prev - pointer to the first entry whose range begins before @offset
+ * @p_ret - pointer where new node should be anchored (used when inserting an
+ * entry in the tree)
+ * @parent_ret - points to entry which would have been the parent of the entry,
+ * containing @offset
+ *
+ * This function returns a pointer to the entry that contains @offset byte
+ * address. If no such entry exists, then NULL is returned and the other
+ * pointer arguments to the function are filled, otherwise the found entry is
+ * returned and other pointers are left untouched.
+ */
static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
struct rb_node **next_ret,
struct rb_node **prev_ret,
@@ -504,9 +522,11 @@ static int insert_state(struct extent_io_tree *tree,
{
struct rb_node *node;
- if (end < start)
- WARN(1, KERN_ERR "BTRFS: end < start %llu %llu\n",
- end, start);
+ if (end < start) {
+ btrfs_err(tree->fs_info,
+ "insert state: end < start %llu %llu", end, start);
+ WARN_ON(1);
+ }
state->start = start;
state->end = end;
@@ -516,7 +536,8 @@ static int insert_state(struct extent_io_tree *tree,
if (node) {
struct extent_state *found;
found = rb_entry(node, struct extent_state, rb_node);
- pr_err("BTRFS: found node %llu %llu on insert of %llu %llu\n",
+ btrfs_err(tree->fs_info,
+ "found node %llu %llu on insert of %llu %llu",
found->start, found->end, start, end);
return -EEXIST;
}
@@ -1537,8 +1558,8 @@ out:
}
/**
- * find_first_clear_extent_bit - finds the first range that has @bits not set
- * and that starts after @start
+ * find_first_clear_extent_bit - find the first range that has @bits not set.
+ * This range could start before @start.
*
* @tree - the tree to search
* @start - the offset at/after which the found extent should start
@@ -1578,12 +1599,52 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
goto out;
}
}
+ /*
+ * At this point 'node' either contains 'start' or start is
+ * before 'node'
+ */
state = rb_entry(node, struct extent_state, rb_node);
- if (in_range(start, state->start, state->end - state->start + 1) &&
- (state->state & bits)) {
- start = state->end + 1;
+
+ if (in_range(start, state->start, state->end - state->start + 1)) {
+ if (state->state & bits) {
+ /*
+ * |--range with bits sets--|
+ * |
+ * start
+ */
+ start = state->end + 1;
+ } else {
+ /*
+ * 'start' falls within a range that doesn't
+ * have the bits set, so take its start as
+ * the beginning of the desired range
+ *
+ * |--range with bits cleared----|
+ * |
+ * start
+ */
+ *start_ret = state->start;
+ break;
+ }
} else {
- *start_ret = start;
+ /*
+ * |---prev range---|---hole/unset---|---node range---|
+ * |
+ * start
+ *
+ * or
+ *
+ * |---hole/unset--||--first node--|
+ * 0 |
+ * start
+ */
+ if (prev) {
+ state = rb_entry(prev, struct extent_state,
+ rb_node);
+ *start_ret = state->end + 1;
+ } else {
+ *start_ret = 0;
+ }
break;
}
}
@@ -1719,10 +1780,10 @@ static noinline int lock_delalloc_pages(struct inode *inode,
*/
EXPORT_FOR_TESTS
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
- struct extent_io_tree *tree,
struct page *locked_page, u64 *start,
u64 *end)
{
+ struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
u64 delalloc_start;
u64 delalloc_end;
@@ -2800,12 +2861,11 @@ static inline void btrfs_io_bio_init(struct btrfs_io_bio *btrfs_bio)
* never fail. We're returning a bio right now but you can call btrfs_io_bio
* for the appropriate container_of magic
*/
-struct bio *btrfs_bio_alloc(struct block_device *bdev, u64 first_byte)
+struct bio *btrfs_bio_alloc(u64 first_byte)
{
struct bio *bio;
bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
- bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = first_byte >> 9;
btrfs_io_bio_init(btrfs_io_bio(bio));
return bio;
@@ -2916,7 +2976,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
}
}
- bio = btrfs_bio_alloc(bdev, offset);
+ bio = btrfs_bio_alloc(offset);
+ bio_set_dev(bio, bdev);
bio_add_page(bio, page, page_size, pg_offset);
bio->bi_end_io = end_io_func;
bio->bi_private = tree;
@@ -3204,21 +3265,10 @@ static inline void contiguous_readpages(struct extent_io_tree *tree,
unsigned long *bio_flags,
u64 *prev_em_start)
{
- struct inode *inode;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
int index;
- inode = pages[0]->mapping->host;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
- end - start + 1);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
+ btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
@@ -3234,22 +3284,12 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
unsigned long *bio_flags,
unsigned int read_flags)
{
- struct inode *inode = page->mapping->host;
- struct btrfs_ordered_extent *ordered;
+ struct btrfs_inode *inode = BTRFS_I(page->mapping->host);
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
int ret;
- while (1) {
- lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), start,
- PAGE_SIZE);
- if (!ordered)
- break;
- unlock_extent(tree, start, end);
- btrfs_start_ordered_extent(inode, ordered, 1);
- btrfs_put_ordered_extent(ordered);
- }
+ btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
bio_flags, read_flags, NULL);
@@ -3290,7 +3330,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
struct page *page, struct writeback_control *wbc,
u64 delalloc_start, unsigned long *nr_written)
{
- struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
u64 page_end = delalloc_start + PAGE_SIZE - 1;
bool found;
u64 delalloc_to_write = 0;
@@ -3300,8 +3339,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
while (delalloc_end < page_end) {
- found = find_lock_delalloc_range(inode, tree,
- page,
+ found = find_lock_delalloc_range(inode, page,
&delalloc_start,
&delalloc_end);
if (!found) {
@@ -3310,7 +3348,6 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
}
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
delalloc_end, &page_started, nr_written, wbc);
- /* File system has been set read-only */
if (ret) {
SetPageError(page);
/*
@@ -4542,6 +4579,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
struct btrfs_path *path;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct fiemap_cache cache = { 0 };
+ struct ulist *roots;
+ struct ulist *tmp_ulist;
int end = 0;
u64 em_start = 0;
u64 em_len = 0;
@@ -4555,6 +4594,13 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
return -ENOMEM;
path->leave_spinning = 1;
+ roots = ulist_alloc(GFP_KERNEL);
+ tmp_ulist = ulist_alloc(GFP_KERNEL);
+ if (!roots || !tmp_ulist) {
+ ret = -ENOMEM;
+ goto out_free_ulist;
+ }
+
start = round_down(start, btrfs_inode_sectorsize(inode));
len = round_up(max, btrfs_inode_sectorsize(inode)) - start;
@@ -4565,8 +4611,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
ret = btrfs_lookup_file_extent(NULL, root, path,
btrfs_ino(BTRFS_I(inode)), -1, 0);
if (ret < 0) {
- btrfs_free_path(path);
- return ret;
+ goto out_free_ulist;
} else {
WARN_ON(!ret);
if (ret == 1)
@@ -4675,7 +4720,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
ret = btrfs_check_shared(root,
btrfs_ino(BTRFS_I(inode)),
- bytenr);
+ bytenr, roots, tmp_ulist);
if (ret < 0)
goto out_free;
if (ret)
@@ -4718,9 +4763,13 @@ out_free:
ret = emit_last_fiemap_cache(fieinfo, &cache);
free_extent_map(em);
out:
- btrfs_free_path(path);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len - 1,
&cached_state);
+
+out_free_ulist:
+ btrfs_free_path(path);
+ ulist_free(roots);
+ ulist_free(tmp_ulist);
return ret;
}
@@ -4808,7 +4857,7 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
eb->bflags = 0;
rwlock_init(&eb->lock);
atomic_set(&eb->blocking_readers, 0);
- atomic_set(&eb->blocking_writers, 0);
+ eb->blocking_writers = 0;
eb->lock_nested = false;
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);
@@ -4827,10 +4876,10 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
BUG_ON(len > MAX_INLINE_EXTENT_BUFFER_SIZE);
#ifdef CONFIG_BTRFS_DEBUG
- atomic_set(&eb->spinning_writers, 0);
+ eb->spinning_writers = 0;
atomic_set(&eb->spinning_readers, 0);
atomic_set(&eb->read_locks, 0);
- atomic_set(&eb->write_locks, 0);
+ eb->write_locks = 0;
#endif
return eb;