summaryrefslogtreecommitdiff
path: root/fs/btrfs/extent-io-tree.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 05:40:42 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-16 05:40:42 +0300
commitf1ee3b8829006b3fda999f00f0059aa327e3f3d0 (patch)
tree63c68a2568bc3d28f7b4d4199a5032f8eac748c5 /fs/btrfs/extent-io-tree.h
parenta725cb4d708e5ac8bc76a70b3002ff64c07312d8 (diff)
parentb42fe98c92698d2a10094997e5f4d2dd968fd44f (diff)
downloadlinux-f1ee3b8829006b3fda999f00f0059aa327e3f3d0.tar.xz
Merge tag 'for-5.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "We have a mix of all kinds of changes, feature updates, core stuff, performance improvements and lots of cleanups and preparatory changes. User visible: - export filesystem generation in sysfs - new features for mount option 'rescue': - what's currently supported is exported in sysfs - 'ignorebadroots'/'ibadroots' - continue even if some essential tree roots are not usable (extent, uuid, data reloc, device, csum, free space) - 'ignoredatacsums'/'idatacsums' - skip checksum verification on data - 'all' - now enables 'ignorebadroots' + 'ignoredatacsums' + 'nologreplay' - export read mirror policy settings to sysfs, new policies will be added in the future - remove inode number cache feature (mount -o inode_cache), obsoleted in 5.9 User visible fixes: - async discard scheduling fixes on high loads - update inode byte counter atomically so stat() does not report wrong value in some cases - free space tree fixes: - correctly report status of v2 after remount - clear v1 cache inodes when v2 is newly enabled after remount Core: - switch own tree lock implementation to standard rw semaphore: - one-level lock nesting is not required anymore, the last use of this was in free space that's now loaded asynchronously - own implementation of adaptive spinning before taking mutex has been part of rwsem - performance seems to be better in general, much better (+tens of percents) for some workloads - lockdep does not complain - finish direct IO conversion to iomap infrastructure, remove temporary workaround for DSYNC after iomap API updates - preparatory work to support data and metadata blocks smaller than page: - generalize code that assumes sectorsize == PAGE_SIZE, lots of refactoring - planned namely for 64K pages (eg. arm64, ppc64) - scrub read-only support - preparatory work for zoned allocation mode (SMR/ZBC/ZNS friendly): - disable incompatible features - round-robin superblock write - free space cache (v1) is loaded asynchronously, remove tree path recursion - slightly improved time tacking for transaction kthread wake ups Performance improvements (note that the numbers depend on load type or other features and weren't run on the same machine): - skip unnecessary work: - do not start readahead for csum tree when scrubbing non-data block groups - do not start and wait for delalloc on snapshot roots on transaction commit - fix race when defragmenting leads to unnecessary IO - dbench speedups (+throughput%/-max latency%): - skip unnecessary searches for xattrs when logging an inode (+10.8/-8.2) - stop incrementing log batch when joining log transaction (1-2) - unlock path before checking if extent is shared during nocow writeback (+5.0/-20.5), on fio load +9.7% throughput/-9.8% runtime - several tree log improvements, eg. removing unnecessary operations, fixing races that lead to additional work (+12.7/-8.2) - tree-checker error branches annotated with unlikely() (+3% throughput) Other: - cleanups - lockdep fixes - more btrfs_inode conversions - error variable cleanups" * tag 'for-5.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (198 commits) btrfs: scrub: allow scrub to work with subpage sectorsize btrfs: scrub: support subpage data scrub btrfs: scrub: support subpage tree block scrub btrfs: scrub: always allocate one full page for one sector for RAID56 btrfs: scrub: reduce width of extent_len/stripe_len from 64 to 32 bits btrfs: refactor btrfs_lookup_bio_sums to handle out-of-order bvecs btrfs: remove btrfs_find_ordered_sum call from btrfs_lookup_bio_sums btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors btrfs: update num_extent_pages to support subpage sized extent buffer btrfs: don't allow tree block to cross page boundary for subpage support btrfs: calculate inline extent buffer page size based on page size btrfs: factor out btree page submission code to a helper btrfs: make btrfs_verify_data_csum follow sector size btrfs: pass bio_offset to check_data_csum() directly btrfs: rename bio_offset of extent_submit_bio_start_t to dio_file_offset btrfs: fix lockdep warning when creating free space tree btrfs: skip space_cache v1 setup when not using it btrfs: remove free space items when disabling space cache v1 btrfs: warn when remount will not change the free space tree btrfs: use superblock state to print space_cache mount option ...
Diffstat (limited to 'fs/btrfs/extent-io-tree.h')
-rw-r--r--fs/btrfs/extent-io-tree.h71
1 files changed, 43 insertions, 28 deletions
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index 9800a8306368..04083ee5ae6e 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -21,10 +21,24 @@ struct io_failure_record;
#define EXTENT_NORESERVE (1U << 11)
#define EXTENT_QGROUP_RESERVED (1U << 12)
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
+/*
+ * Must be cleared only during ordered extent completion or on error paths if we
+ * did not manage to submit bios and create the ordered extents for the range.
+ * Should not be cleared during page release and page invalidation (if there is
+ * an ordered extent in flight), that is left for the ordered extent completion.
+ */
#define EXTENT_DELALLOC_NEW (1U << 14)
+/*
+ * When an ordered extent successfully completes for a region marked as a new
+ * delalloc range, use this flag when clearing a new delalloc range to indicate
+ * that the VFS' inode number of bytes should be incremented and the inode's new
+ * delalloc bytes decremented, in an atomic way to prevent races with stat(2).
+ */
+#define EXTENT_ADD_INODE_BYTES (1U << 15)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
-#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING)
+#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | \
+ EXTENT_ADD_INODE_BYTES)
/*
* Redefined bits above which are used only in the device allocation tree,
@@ -73,7 +87,7 @@ struct extent_state {
/* ADD NEW ELEMENTS AFTER THIS */
wait_queue_head_t wq;
refcount_t refs;
- unsigned state;
+ u32 state;
struct io_failure_record *failrec;
@@ -105,19 +119,18 @@ void __cold extent_io_exit(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
- u64 max_bytes, unsigned bits, int contig);
+ u64 max_bytes, u32 bits, int contig);
void free_extent_state(struct extent_state *state);
int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, int filled,
- struct extent_state *cached_state);
+ u32 bits, int filled, struct extent_state *cached_state);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_changeset *changeset);
+ u32 bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, int wake, int delete,
+ u32 bits, int wake, int delete,
struct extent_state **cached);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, int wake, int delete,
+ u32 bits, int wake, int delete,
struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
@@ -141,7 +154,7 @@ static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
- u64 end, unsigned bits)
+ u64 end, u32 bits)
{
int wake = 0;
@@ -152,17 +165,19 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
}
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, struct extent_changeset *changeset);
+ u32 bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, u64 *failed_start,
- struct extent_state **cached_state, gfp_t mask);
+ u32 bits, unsigned exclusive_bits, u64 *failed_start,
+ struct extent_state **cached_state, gfp_t mask,
+ struct extent_changeset *changeset);
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits);
+ u32 bits);
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
- u64 end, unsigned bits)
+ u64 end, u32 bits)
{
- return set_extent_bit(tree, start, end, bits, NULL, NULL, GFP_NOFS);
+ return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
+ NULL);
}
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -175,8 +190,8 @@ static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
- return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
- NULL, mask);
+ return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, NULL,
+ mask, NULL);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
@@ -188,16 +203,16 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
}
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
- unsigned bits, unsigned clear_bits,
+ u32 bits, u32 clear_bits,
struct extent_state **cached_state);
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
- u64 end, unsigned int extra_bits,
+ u64 end, u32 extra_bits,
struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
- NULL, cached_state, GFP_NOFS);
+ 0, NULL, cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
@@ -205,30 +220,30 @@ static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
- NULL, cached_state, GFP_NOFS);
+ 0, NULL, cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
u64 end)
{
- return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL,
- GFP_NOFS);
+ return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, NULL,
+ GFP_NOFS, NULL);
}
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state, gfp_t mask)
{
- return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
- cached_state, mask);
+ return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
+ cached_state, mask, NULL);
}
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, unsigned bits,
+ u64 *start_ret, u64 *end_ret, u32 bits,
struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, unsigned bits);
+ u64 *start_ret, u64 *end_ret, u32 bits);
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, unsigned bits);
+ u64 *start_ret, u64 *end_ret, u32 bits);
int extent_invalidatepage(struct extent_io_tree *tree,
struct page *page, unsigned long offset);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,