diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-03 05:59:25 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-03 05:59:25 +0300 |
commit | f3cdc8ae116e27d84e1f33c7a2995960cebb73ac (patch) | |
tree | db3dbbbbf82b76590f601b5caee5de3bef151c4b /fs/btrfs/backref.h | |
parent | 8eeae5bae1239c030ba0b34cac97ebd5e7ec1886 (diff) | |
parent | 2166e5edce9ac1edf3b113d6091ef72fcac2d6c4 (diff) | |
download | linux-f3cdc8ae116e27d84e1f33c7a2995960cebb73ac.tar.xz |
Merge tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"Highlights:
- speedup dead root detection during orphan cleanup, eg. when there
are many deleted subvolumes waiting to be cleaned, the trees are
now looked up in radix tree instead of a O(N^2) search
- snapshot creation with inherited qgroup will mark the qgroup
inconsistent, requires a rescan
- send will emit file capabilities after chown, this produces a
stream that does not need postprocessing to set the capabilities
again
- direct io ported to iomap infrastructure, cleaned up and simplified
code, notably removing last use of struct buffer_head in btrfs code
Core changes:
- factor out backreference iteration, to be used by ordinary
backreferences and relocation code
- improved global block reserve utilization
* better logic to serialize requests
* increased maximum available for unlink
* improved handling on large pages (64K)
- direct io cleanups and fixes
* simplify layering, where cloned bios were unnecessarily created
for some cases
* error handling fixes (submit, endio)
* remove repair worker thread, used to avoid deadlocks during
repair
- refactored block group reading code, preparatory work for new type
of block group storage that should improve mount time on large
filesystems
Cleanups:
- cleaned up (and slightly sped up) set/get helpers for metadata data
structure members
- root bit REF_COWS got renamed to SHAREABLE to reflect the that the
blocks of the tree get shared either among subvolumes or with the
relocation trees
Fixes:
- when subvolume deletion fails due to ENOSPC, the filesystem is not
turned read-only
- device scan deals with devices from other filesystems that changed
ownership due to overwrite (mkfs)
- fix a race between scrub and block group removal/allocation
- fix long standing bug of a runaway balance operation, printing the
same line to the syslog, caused by a stale status bit on a reloc
tree that prevented progress
- fix corrupt log due to concurrent fsync of inodes with shared
extents
- fix space underflow for NODATACOW and buffered writes when it for
some reason needs to fallback to COW mode"
* tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits)
btrfs: fix space_info bytes_may_use underflow during space cache writeout
btrfs: fix space_info bytes_may_use underflow after nocow buffered write
btrfs: fix wrong file range cleanup after an error filling dealloc range
btrfs: remove redundant local variable in read_block_for_search
btrfs: open code key_search
btrfs: split btrfs_direct_IO to read and write part
btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK
fs: remove dio_end_io()
btrfs: switch to iomap_dio_rw() for dio
iomap: remove lockdep_assert_held()
iomap: add a filesystem hook for direct I/O bio submission
fs: export generic_file_buffered_read()
btrfs: turn space cache writeout failure messages into debug messages
btrfs: include error on messages about failure to write space/inode caches
btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks()
btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums
btrfs: make checksum item extension more efficient
btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents
btrfs: unexport btrfs_compress_set_level()
btrfs: simplify iget helpers
...
Diffstat (limited to 'fs/btrfs/backref.h')
-rw-r--r-- | fs/btrfs/backref.h | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 723d6da99114..ff705cc564a9 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -8,6 +8,7 @@ #include <linux/btrfs.h> #include "ulist.h" +#include "disk-io.h" #include "extent_io.h" struct inode_fs_paths { @@ -78,4 +79,300 @@ struct prelim_ref { u64 wanted_disk_byte; }; +/* + * Iterate backrefs of one extent. + * + * Now it only supports iteration of tree block in commit root. + */ +struct btrfs_backref_iter { + u64 bytenr; + struct btrfs_path *path; + struct btrfs_fs_info *fs_info; + struct btrfs_key cur_key; + u32 item_ptr; + u32 cur_ptr; + u32 end_ptr; +}; + +struct btrfs_backref_iter *btrfs_backref_iter_alloc( + struct btrfs_fs_info *fs_info, gfp_t gfp_flag); + +static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter) +{ + if (!iter) + return; + btrfs_free_path(iter->path); + kfree(iter); +} + +static inline struct extent_buffer *btrfs_backref_get_eb( + struct btrfs_backref_iter *iter) +{ + if (!iter) + return NULL; + return iter->path->nodes[0]; +} + +/* + * For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data + * is btrfs_tree_block_info, without a btrfs_extent_inline_ref header. + * + * This helper determines if that's the case. + */ +static inline bool btrfs_backref_has_tree_block_info( + struct btrfs_backref_iter *iter) +{ + if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY && + iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item)) + return true; + return false; +} + +int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr); + +int btrfs_backref_iter_next(struct btrfs_backref_iter *iter); + +static inline bool btrfs_backref_iter_is_inline_ref( + struct btrfs_backref_iter *iter) +{ + if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY || + iter->cur_key.type == BTRFS_METADATA_ITEM_KEY) + return true; + return false; +} + +static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter) +{ + iter->bytenr = 0; + iter->item_ptr = 0; + iter->cur_ptr = 0; + iter->end_ptr = 0; + btrfs_release_path(iter->path); + memset(&iter->cur_key, 0, sizeof(iter->cur_key)); +} + +/* + * Backref cache related structures + * + * The whole objective of backref_cache is to build a bi-directional map + * of tree blocks (represented by backref_node) and all their parents. + */ + +/* + * Represent a tree block in the backref cache + */ +struct btrfs_backref_node { + struct { + struct rb_node rb_node; + u64 bytenr; + }; /* Use rb_simple_node for search/insert */ + + u64 new_bytenr; + /* Objectid of tree block owner, can be not uptodate */ + u64 owner; + /* Link to pending, changed or detached list */ + struct list_head list; + + /* List of upper level edges, which link this node to its parents */ + struct list_head upper; + /* List of lower level edges, which link this node to its children */ + struct list_head lower; + + /* NULL if this node is not tree root */ + struct btrfs_root *root; + /* Extent buffer got by COWing the block */ + struct extent_buffer *eb; + /* Level of the tree block */ + unsigned int level:8; + /* Is the block in a non-shareable tree */ + unsigned int cowonly:1; + /* 1 if no child node is in the cache */ + unsigned int lowest:1; + /* Is the extent buffer locked */ + unsigned int locked:1; + /* Has the block been processed */ + unsigned int processed:1; + /* Have backrefs of this block been checked */ + unsigned int checked:1; + /* + * 1 if corresponding block has been COWed but some upper level block + * pointers may not point to the new location + */ + unsigned int pending:1; + /* 1 if the backref node isn't connected to any other backref node */ + unsigned int detached:1; + + /* + * For generic purpose backref cache, where we only care if it's a reloc + * root, doesn't care the source subvolid. + */ + unsigned int is_reloc_root:1; +}; + +#define LOWER 0 +#define UPPER 1 + +/* + * Represent an edge connecting upper and lower backref nodes. + */ +struct btrfs_backref_edge { + /* + * list[LOWER] is linked to btrfs_backref_node::upper of lower level + * node, and list[UPPER] is linked to btrfs_backref_node::lower of + * upper level node. + * + * Also, build_backref_tree() uses list[UPPER] for pending edges, before + * linking list[UPPER] to its upper level nodes. + */ + struct list_head list[2]; + + /* Two related nodes */ + struct btrfs_backref_node *node[2]; +}; + +struct btrfs_backref_cache { + /* Red black tree of all backref nodes in the cache */ + struct rb_root rb_root; + /* For passing backref nodes to btrfs_reloc_cow_block */ + struct btrfs_backref_node *path[BTRFS_MAX_LEVEL]; + /* + * List of blocks that have been COWed but some block pointers in upper + * level blocks may not reflect the new location + */ + struct list_head pending[BTRFS_MAX_LEVEL]; + /* List of backref nodes with no child node */ + struct list_head leaves; + /* List of blocks that have been COWed in current transaction */ + struct list_head changed; + /* List of detached backref node. */ + struct list_head detached; + + u64 last_trans; + + int nr_nodes; + int nr_edges; + + /* List of unchecked backref edges during backref cache build */ + struct list_head pending_edge; + + /* List of useless backref nodes during backref cache build */ + struct list_head useless_node; + + struct btrfs_fs_info *fs_info; + + /* + * Whether this cache is for relocation + * + * Reloction backref cache require more info for reloc root compared + * to generic backref cache. + */ + unsigned int is_reloc; +}; + +void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info, + struct btrfs_backref_cache *cache, int is_reloc); +struct btrfs_backref_node *btrfs_backref_alloc_node( + struct btrfs_backref_cache *cache, u64 bytenr, int level); +struct btrfs_backref_edge *btrfs_backref_alloc_edge( + struct btrfs_backref_cache *cache); + +#define LINK_LOWER (1 << 0) +#define LINK_UPPER (1 << 1) +static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge, + struct btrfs_backref_node *lower, + struct btrfs_backref_node *upper, + int link_which) +{ + ASSERT(upper && lower && upper->level == lower->level + 1); + edge->node[LOWER] = lower; + edge->node[UPPER] = upper; + if (link_which & LINK_LOWER) + list_add_tail(&edge->list[LOWER], &lower->upper); + if (link_which & LINK_UPPER) + list_add_tail(&edge->list[UPPER], &upper->lower); +} + +static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache, + struct btrfs_backref_node *node) +{ + if (node) { + cache->nr_nodes--; + btrfs_put_root(node->root); + kfree(node); + } +} + +static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache, + struct btrfs_backref_edge *edge) +{ + if (edge) { + cache->nr_edges--; + kfree(edge); + } +} + +static inline void btrfs_backref_unlock_node_buffer( + struct btrfs_backref_node *node) +{ + if (node->locked) { + btrfs_tree_unlock(node->eb); + node->locked = 0; + } +} + +static inline void btrfs_backref_drop_node_buffer( + struct btrfs_backref_node *node) +{ + if (node->eb) { + btrfs_backref_unlock_node_buffer(node); + free_extent_buffer(node->eb); + node->eb = NULL; + } +} + +/* + * Drop the backref node from cache without cleaning up its children + * edges. + * + * This can only be called on node without parent edges. + * The children edges are still kept as is. + */ +static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree, + struct btrfs_backref_node *node) +{ + BUG_ON(!list_empty(&node->upper)); + + btrfs_backref_drop_node_buffer(node); + list_del(&node->list); + list_del(&node->lower); + if (!RB_EMPTY_NODE(&node->rb_node)) + rb_erase(&node->rb_node, &tree->rb_root); + btrfs_backref_free_node(tree, node); +} + +void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache, + struct btrfs_backref_node *node); + +void btrfs_backref_release_cache(struct btrfs_backref_cache *cache); + +static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info, + u64 bytenr, int errno) +{ + btrfs_panic(fs_info, errno, + "Inconsistency in backref cache found at offset %llu", + bytenr); +} + +int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, + struct btrfs_path *path, + struct btrfs_backref_iter *iter, + struct btrfs_key *node_key, + struct btrfs_backref_node *cur); + +int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, + struct btrfs_backref_node *start); + +void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, + struct btrfs_backref_node *node); + #endif |