summaryrefslogtreecommitdiff
path: root/fs/btrfs/backref.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 05:59:25 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-03 05:59:25 +0300
commitf3cdc8ae116e27d84e1f33c7a2995960cebb73ac (patch)
treedb3dbbbbf82b76590f601b5caee5de3bef151c4b /fs/btrfs/backref.h
parent8eeae5bae1239c030ba0b34cac97ebd5e7ec1886 (diff)
parent2166e5edce9ac1edf3b113d6091ef72fcac2d6c4 (diff)
downloadlinux-f3cdc8ae116e27d84e1f33c7a2995960cebb73ac.tar.xz
Merge tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Highlights: - speedup dead root detection during orphan cleanup, eg. when there are many deleted subvolumes waiting to be cleaned, the trees are now looked up in radix tree instead of a O(N^2) search - snapshot creation with inherited qgroup will mark the qgroup inconsistent, requires a rescan - send will emit file capabilities after chown, this produces a stream that does not need postprocessing to set the capabilities again - direct io ported to iomap infrastructure, cleaned up and simplified code, notably removing last use of struct buffer_head in btrfs code Core changes: - factor out backreference iteration, to be used by ordinary backreferences and relocation code - improved global block reserve utilization * better logic to serialize requests * increased maximum available for unlink * improved handling on large pages (64K) - direct io cleanups and fixes * simplify layering, where cloned bios were unnecessarily created for some cases * error handling fixes (submit, endio) * remove repair worker thread, used to avoid deadlocks during repair - refactored block group reading code, preparatory work for new type of block group storage that should improve mount time on large filesystems Cleanups: - cleaned up (and slightly sped up) set/get helpers for metadata data structure members - root bit REF_COWS got renamed to SHAREABLE to reflect the that the blocks of the tree get shared either among subvolumes or with the relocation trees Fixes: - when subvolume deletion fails due to ENOSPC, the filesystem is not turned read-only - device scan deals with devices from other filesystems that changed ownership due to overwrite (mkfs) - fix a race between scrub and block group removal/allocation - fix long standing bug of a runaway balance operation, printing the same line to the syslog, caused by a stale status bit on a reloc tree that prevented progress - fix corrupt log due to concurrent fsync of inodes with shared extents - fix space underflow for NODATACOW and buffered writes when it for some reason needs to fallback to COW mode" * tag 'for-5.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (133 commits) btrfs: fix space_info bytes_may_use underflow during space cache writeout btrfs: fix space_info bytes_may_use underflow after nocow buffered write btrfs: fix wrong file range cleanup after an error filling dealloc range btrfs: remove redundant local variable in read_block_for_search btrfs: open code key_search btrfs: split btrfs_direct_IO to read and write part btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK fs: remove dio_end_io() btrfs: switch to iomap_dio_rw() for dio iomap: remove lockdep_assert_held() iomap: add a filesystem hook for direct I/O bio submission fs: export generic_file_buffered_read() btrfs: turn space cache writeout failure messages into debug messages btrfs: include error on messages about failure to write space/inode caches btrfs: remove useless 'fail_unlock' label from btrfs_csum_file_blocks() btrfs: do not ignore error from btrfs_next_leaf() when inserting checksums btrfs: make checksum item extension more efficient btrfs: fix corrupt log due to concurrent fsync of inodes with shared extents btrfs: unexport btrfs_compress_set_level() btrfs: simplify iget helpers ...
Diffstat (limited to 'fs/btrfs/backref.h')
-rw-r--r--fs/btrfs/backref.h297
1 files changed, 297 insertions, 0 deletions
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 723d6da99114..ff705cc564a9 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -8,6 +8,7 @@
#include <linux/btrfs.h>
#include "ulist.h"
+#include "disk-io.h"
#include "extent_io.h"
struct inode_fs_paths {
@@ -78,4 +79,300 @@ struct prelim_ref {
u64 wanted_disk_byte;
};
+/*
+ * Iterate backrefs of one extent.
+ *
+ * Now it only supports iteration of tree block in commit root.
+ */
+struct btrfs_backref_iter {
+ u64 bytenr;
+ struct btrfs_path *path;
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_key cur_key;
+ u32 item_ptr;
+ u32 cur_ptr;
+ u32 end_ptr;
+};
+
+struct btrfs_backref_iter *btrfs_backref_iter_alloc(
+ struct btrfs_fs_info *fs_info, gfp_t gfp_flag);
+
+static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter)
+{
+ if (!iter)
+ return;
+ btrfs_free_path(iter->path);
+ kfree(iter);
+}
+
+static inline struct extent_buffer *btrfs_backref_get_eb(
+ struct btrfs_backref_iter *iter)
+{
+ if (!iter)
+ return NULL;
+ return iter->path->nodes[0];
+}
+
+/*
+ * For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data
+ * is btrfs_tree_block_info, without a btrfs_extent_inline_ref header.
+ *
+ * This helper determines if that's the case.
+ */
+static inline bool btrfs_backref_has_tree_block_info(
+ struct btrfs_backref_iter *iter)
+{
+ if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY &&
+ iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item))
+ return true;
+ return false;
+}
+
+int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr);
+
+int btrfs_backref_iter_next(struct btrfs_backref_iter *iter);
+
+static inline bool btrfs_backref_iter_is_inline_ref(
+ struct btrfs_backref_iter *iter)
+{
+ if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ iter->cur_key.type == BTRFS_METADATA_ITEM_KEY)
+ return true;
+ return false;
+}
+
+static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter)
+{
+ iter->bytenr = 0;
+ iter->item_ptr = 0;
+ iter->cur_ptr = 0;
+ iter->end_ptr = 0;
+ btrfs_release_path(iter->path);
+ memset(&iter->cur_key, 0, sizeof(iter->cur_key));
+}
+
+/*
+ * Backref cache related structures
+ *
+ * The whole objective of backref_cache is to build a bi-directional map
+ * of tree blocks (represented by backref_node) and all their parents.
+ */
+
+/*
+ * Represent a tree block in the backref cache
+ */
+struct btrfs_backref_node {
+ struct {
+ struct rb_node rb_node;
+ u64 bytenr;
+ }; /* Use rb_simple_node for search/insert */
+
+ u64 new_bytenr;
+ /* Objectid of tree block owner, can be not uptodate */
+ u64 owner;
+ /* Link to pending, changed or detached list */
+ struct list_head list;
+
+ /* List of upper level edges, which link this node to its parents */
+ struct list_head upper;
+ /* List of lower level edges, which link this node to its children */
+ struct list_head lower;
+
+ /* NULL if this node is not tree root */
+ struct btrfs_root *root;
+ /* Extent buffer got by COWing the block */
+ struct extent_buffer *eb;
+ /* Level of the tree block */
+ unsigned int level:8;
+ /* Is the block in a non-shareable tree */
+ unsigned int cowonly:1;
+ /* 1 if no child node is in the cache */
+ unsigned int lowest:1;
+ /* Is the extent buffer locked */
+ unsigned int locked:1;
+ /* Has the block been processed */
+ unsigned int processed:1;
+ /* Have backrefs of this block been checked */
+ unsigned int checked:1;
+ /*
+ * 1 if corresponding block has been COWed but some upper level block
+ * pointers may not point to the new location
+ */
+ unsigned int pending:1;
+ /* 1 if the backref node isn't connected to any other backref node */
+ unsigned int detached:1;
+
+ /*
+ * For generic purpose backref cache, where we only care if it's a reloc
+ * root, doesn't care the source subvolid.
+ */
+ unsigned int is_reloc_root:1;
+};
+
+#define LOWER 0
+#define UPPER 1
+
+/*
+ * Represent an edge connecting upper and lower backref nodes.
+ */
+struct btrfs_backref_edge {
+ /*
+ * list[LOWER] is linked to btrfs_backref_node::upper of lower level
+ * node, and list[UPPER] is linked to btrfs_backref_node::lower of
+ * upper level node.
+ *
+ * Also, build_backref_tree() uses list[UPPER] for pending edges, before
+ * linking list[UPPER] to its upper level nodes.
+ */
+ struct list_head list[2];
+
+ /* Two related nodes */
+ struct btrfs_backref_node *node[2];
+};
+
+struct btrfs_backref_cache {
+ /* Red black tree of all backref nodes in the cache */
+ struct rb_root rb_root;
+ /* For passing backref nodes to btrfs_reloc_cow_block */
+ struct btrfs_backref_node *path[BTRFS_MAX_LEVEL];
+ /*
+ * List of blocks that have been COWed but some block pointers in upper
+ * level blocks may not reflect the new location
+ */
+ struct list_head pending[BTRFS_MAX_LEVEL];
+ /* List of backref nodes with no child node */
+ struct list_head leaves;
+ /* List of blocks that have been COWed in current transaction */
+ struct list_head changed;
+ /* List of detached backref node. */
+ struct list_head detached;
+
+ u64 last_trans;
+
+ int nr_nodes;
+ int nr_edges;
+
+ /* List of unchecked backref edges during backref cache build */
+ struct list_head pending_edge;
+
+ /* List of useless backref nodes during backref cache build */
+ struct list_head useless_node;
+
+ struct btrfs_fs_info *fs_info;
+
+ /*
+ * Whether this cache is for relocation
+ *
+ * Reloction backref cache require more info for reloc root compared
+ * to generic backref cache.
+ */
+ unsigned int is_reloc;
+};
+
+void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_backref_cache *cache, int is_reloc);
+struct btrfs_backref_node *btrfs_backref_alloc_node(
+ struct btrfs_backref_cache *cache, u64 bytenr, int level);
+struct btrfs_backref_edge *btrfs_backref_alloc_edge(
+ struct btrfs_backref_cache *cache);
+
+#define LINK_LOWER (1 << 0)
+#define LINK_UPPER (1 << 1)
+static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge,
+ struct btrfs_backref_node *lower,
+ struct btrfs_backref_node *upper,
+ int link_which)
+{
+ ASSERT(upper && lower && upper->level == lower->level + 1);
+ edge->node[LOWER] = lower;
+ edge->node[UPPER] = upper;
+ if (link_which & LINK_LOWER)
+ list_add_tail(&edge->list[LOWER], &lower->upper);
+ if (link_which & LINK_UPPER)
+ list_add_tail(&edge->list[UPPER], &upper->lower);
+}
+
+static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node)
+{
+ if (node) {
+ cache->nr_nodes--;
+ btrfs_put_root(node->root);
+ kfree(node);
+ }
+}
+
+static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_edge *edge)
+{
+ if (edge) {
+ cache->nr_edges--;
+ kfree(edge);
+ }
+}
+
+static inline void btrfs_backref_unlock_node_buffer(
+ struct btrfs_backref_node *node)
+{
+ if (node->locked) {
+ btrfs_tree_unlock(node->eb);
+ node->locked = 0;
+ }
+}
+
+static inline void btrfs_backref_drop_node_buffer(
+ struct btrfs_backref_node *node)
+{
+ if (node->eb) {
+ btrfs_backref_unlock_node_buffer(node);
+ free_extent_buffer(node->eb);
+ node->eb = NULL;
+ }
+}
+
+/*
+ * Drop the backref node from cache without cleaning up its children
+ * edges.
+ *
+ * This can only be called on node without parent edges.
+ * The children edges are still kept as is.
+ */
+static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
+ struct btrfs_backref_node *node)
+{
+ BUG_ON(!list_empty(&node->upper));
+
+ btrfs_backref_drop_node_buffer(node);
+ list_del(&node->list);
+ list_del(&node->lower);
+ if (!RB_EMPTY_NODE(&node->rb_node))
+ rb_erase(&node->rb_node, &tree->rb_root);
+ btrfs_backref_free_node(tree, node);
+}
+
+void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node);
+
+void btrfs_backref_release_cache(struct btrfs_backref_cache *cache);
+
+static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info,
+ u64 bytenr, int errno)
+{
+ btrfs_panic(fs_info, errno,
+ "Inconsistency in backref cache found at offset %llu",
+ bytenr);
+}
+
+int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
+ struct btrfs_path *path,
+ struct btrfs_backref_iter *iter,
+ struct btrfs_key *node_key,
+ struct btrfs_backref_node *cur);
+
+int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *start);
+
+void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache,
+ struct btrfs_backref_node *node);
+
#endif