summaryrefslogtreecommitdiff
path: root/fs/btrfs/compression.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 23:54:27 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-20 23:54:27 +0300
commit885ce48739189fac6645ff42d736ee0de0b5917d (patch)
treed69300909ff5b4a0291a7311cd25cf90801a650a /fs/btrfs/compression.c
parent274978f173276c5720a3cd8d0b6047d2c0d3a684 (diff)
parent964a54e5e1a0d70cd80bd5a0885a1938463625b1 (diff)
downloadlinux-885ce48739189fac6645ff42d736ee0de0b5917d.tar.xz
Merge tag 'for-6.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "The usual mix of performance improvements and new features. The core change is reworking how checksums are processed, with followup cleanups and simplifications. There are two minor changes in block layer and iomap code. Features: - block group allocation class heuristics: - pack files by size (up to 128k, up to 8M, more) to avoid fragmentation in block groups, assuming that file size and life time is correlated, in particular this may help during balance - with tracepoints and extensible in the future Performance: - send: cache directory utimes and only emit the command when necessary - speedup up to 10x - smaller final stream produced (no redundant utimes commands issued) - compatibility not affected - fiemap: skip backref checks for shared leaves - speedup 3x on sample filesystem with all leaves shared (e.g. on snapshots) - micro optimized b-tree key lookup, speedup in metadata operations (sample benchmark: fs_mark +10% of files/sec) Core changes: - change where checksumming is done in the io path: - checksum and read repair does verification at lower layer - cascaded cleanups and simplifications - raid56 refactoring and cleanups Fixes: - sysfs: make sure that a run-time change of a feature is correctly tracked by the feature files - scrub: better reporting of tree block errors Other: - locally enable -Wmaybe-uninitialized after fixing all warnings - misc cleanups, spelling fixes Other code: - block: export bio_split_rw - iomap: remove IOMAP_F_ZONE_APPEND" * tag 'for-6.3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (109 commits) btrfs: make kobj_type structures constant btrfs: remove the bdev argument to btrfs_rmap_block btrfs: don't rely on unchanging ->bi_bdev for zone append remaps btrfs: never return true for reads in btrfs_use_zone_append btrfs: pass a btrfs_bio to btrfs_use_append btrfs: set bbio->file_offset in alloc_new_bio btrfs: use file_offset to limit bios size in calc_bio_boundaries btrfs: do unsigned integer division in the extent buffer binary search loop btrfs: eliminate extra call when doing binary search on extent buffer btrfs: raid56: handle endio in scrub_rbio btrfs: raid56: handle endio in recover_rbio btrfs: raid56: handle endio in rmw_rbio btrfs: raid56: submit the read bios from scrub_assemble_read_bios btrfs: raid56: fold rmw_read_wait_recover into rmw_read_bios btrfs: raid56: fold recover_assemble_read_bios into recover_rbio btrfs: raid56: add a bio_list_put helper btrfs: raid56: wait for I/O completion in submit_read_bios btrfs: raid56: simplify code flow in rmw_rbio btrfs: raid56: simplify error handling and code flow in raid56_parity_write btrfs: replace btrfs_wait_tree_block_writeback by wait_on_extent_buffer_writeback ...
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r--fs/btrfs/compression.c276
1 files changed, 45 insertions, 231 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 5122ca79f7ea..f42f31f22d13 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -141,12 +141,15 @@ static int compression_decompress(int type, struct list_head *ws,
static int btrfs_decompress_bio(struct compressed_bio *cb);
-static void finish_compressed_bio_read(struct compressed_bio *cb)
+static void end_compressed_bio_read(struct btrfs_bio *bbio)
{
+ struct compressed_bio *cb = bbio->private;
unsigned int index;
struct page *page;
- if (cb->status == BLK_STS_OK)
+ if (bbio->bio.bi_status)
+ cb->status = bbio->bio.bi_status;
+ else
cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
/* Release the compressed pages */
@@ -162,54 +165,6 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
/* Finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
-}
-
-/*
- * Verify the checksums and kick off repair if needed on the uncompressed data
- * before decompressing it into the original bio and freeing the uncompressed
- * pages.
- */
-static void end_compressed_bio_read(struct btrfs_bio *bbio)
-{
- struct compressed_bio *cb = bbio->private;
- struct inode *inode = cb->inode;
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
- struct btrfs_inode *bi = BTRFS_I(inode);
- bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
- !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
- blk_status_t status = bbio->bio.bi_status;
- struct bvec_iter iter;
- struct bio_vec bv;
- u32 offset;
-
- btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
- u64 start = bbio->file_offset + offset;
-
- if (!status &&
- (!csum || !btrfs_check_data_csum(bi, bbio, offset,
- bv.bv_page, bv.bv_offset))) {
- btrfs_clean_io_failure(bi, start, bv.bv_page,
- bv.bv_offset);
- } else {
- int ret;
-
- refcount_inc(&cb->pending_ios);
- ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset,
- true);
- if (ret) {
- refcount_dec(&cb->pending_ios);
- status = errno_to_blk_status(ret);
- }
- }
- }
-
- if (status)
- cb->status = status;
-
- if (refcount_dec_and_test(&cb->pending_ios))
- finish_compressed_bio_read(cb);
- btrfs_bio_free_csum(bbio);
bio_put(&bbio->bio);
}
@@ -303,68 +258,12 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
static void end_compressed_bio_write(struct btrfs_bio *bbio)
{
struct compressed_bio *cb = bbio->private;
-
- if (bbio->bio.bi_status)
- cb->status = bbio->bio.bi_status;
-
- if (refcount_dec_and_test(&cb->pending_ios)) {
- struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
-
- btrfs_record_physical_zoned(cb->inode, cb->start, &bbio->bio);
- queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
- }
- bio_put(&bbio->bio);
-}
-
-/*
- * Allocate a compressed_bio, which will be used to read/write on-disk
- * (aka, compressed) * data.
- *
- * @cb: The compressed_bio structure, which records all the needed
- * information to bind the compressed data to the uncompressed
- * page cache.
- * @disk_byten: The logical bytenr where the compressed data will be read
- * from or written to.
- * @endio_func: The endio function to call after the IO for compressed data
- * is finished.
- * @next_stripe_start: Return value of logical bytenr of where next stripe starts.
- * Let the caller know to only fill the bio up to the stripe
- * boundary.
- */
-
-
-static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
- blk_opf_t opf,
- btrfs_bio_end_io_t endio_func,
- u64 *next_stripe_start)
-{
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
- struct btrfs_io_geometry geom;
- struct extent_map *em;
- struct bio *bio;
- int ret;
- bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, endio_func, cb);
- bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
+ cb->status = bbio->bio.bi_status;
+ queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
- em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
- if (IS_ERR(em)) {
- bio_put(bio);
- return ERR_CAST(em);
- }
-
- if (bio_op(bio) == REQ_OP_ZONE_APPEND)
- bio_set_dev(bio, em->map_lookup->stripes[0].dev->bdev);
-
- ret = btrfs_get_io_geometry(fs_info, em, btrfs_op(bio), disk_bytenr, &geom);
- free_extent_map(em);
- if (ret < 0) {
- bio_put(bio);
- return ERR_PTR(ret);
- }
- *next_stripe_start = disk_bytenr + geom.len;
- refcount_inc(&cb->pending_ios);
- return bio;
+ bio_put(&bbio->bio);
}
/*
@@ -389,18 +288,13 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct bio *bio = NULL;
struct compressed_bio *cb;
u64 cur_disk_bytenr = disk_start;
- u64 next_stripe_start;
blk_status_t ret = BLK_STS_OK;
- int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
- const bool use_append = btrfs_use_zone_append(inode, disk_start);
- const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
if (!cb)
return BLK_STS_RESOURCE;
- refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode;
cb->start = start;
@@ -411,8 +305,16 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
- if (blkcg_css)
+ if (blkcg_css) {
kthread_associate_blkcg(blkcg_css);
+ write_flags |= REQ_CGROUP_PUNT;
+ }
+
+ write_flags |= REQ_BTRFS_ONE_ORDERED;
+ bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_WRITE | write_flags,
+ BTRFS_I(cb->inode), end_compressed_bio_write, cb);
+ bio->bi_iter.bi_sector = cur_disk_bytenr >> SECTOR_SHIFT;
+ btrfs_bio(bio)->file_offset = start;
while (cur_disk_bytenr < disk_start + compressed_len) {
u64 offset = cur_disk_bytenr - disk_start;
@@ -420,77 +322,30 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int real_size;
unsigned int added;
struct page *page = compressed_pages[index];
- bool submit = false;
-
- /* Allocate new bio if submitted or not yet allocated */
- if (!bio) {
- bio = alloc_compressed_bio(cb, cur_disk_bytenr,
- bio_op | write_flags, end_compressed_bio_write,
- &next_stripe_start);
- if (IS_ERR(bio)) {
- ret = errno_to_blk_status(PTR_ERR(bio));
- break;
- }
- if (blkcg_css)
- bio->bi_opf |= REQ_CGROUP_PUNT;
- }
- /*
- * We should never reach next_stripe_start start as we will
- * submit comp_bio when reach the boundary immediately.
- */
- ASSERT(cur_disk_bytenr != next_stripe_start);
/*
* We have various limits on the real read size:
- * - stripe boundary
* - page boundary
* - compressed length boundary
*/
- real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_bytenr);
- real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
+ real_size = min_t(u64, U32_MAX, PAGE_SIZE - offset_in_page(offset));
real_size = min_t(u64, real_size, compressed_len - offset);
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
- if (use_append)
- added = bio_add_zone_append_page(bio, page, real_size,
- offset_in_page(offset));
- else
- added = bio_add_page(bio, page, real_size,
- offset_in_page(offset));
- /* Reached zoned boundary */
- if (added == 0)
- submit = true;
-
+ added = bio_add_page(bio, page, real_size, offset_in_page(offset));
+ /*
+ * Maximum compressed extent is smaller than bio size limit,
+ * thus bio_add_page() should always success.
+ */
+ ASSERT(added == real_size);
cur_disk_bytenr += added;
- /* Reached stripe boundary */
- if (cur_disk_bytenr == next_stripe_start)
- submit = true;
-
- /* Finished the range */
- if (cur_disk_bytenr == disk_start + compressed_len)
- submit = true;
-
- if (submit) {
- if (!skip_sum) {
- ret = btrfs_csum_one_bio(inode, bio, start, true);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(bio), ret);
- break;
- }
- }
-
- ASSERT(bio->bi_iter.bi_size);
- btrfs_submit_bio(fs_info, bio, 0);
- bio = NULL;
- }
- cond_resched();
}
+ /* Finished the range. */
+ ASSERT(bio->bi_iter.bi_size);
+ btrfs_submit_bio(bio, 0);
if (blkcg_css)
kthread_associate_blkcg(NULL);
-
- if (refcount_dec_and_test(&cb->pending_ios))
- finish_compressed_bio_write(cb);
return ret;
}
@@ -667,10 +522,9 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
struct extent_map_tree *em_tree;
struct compressed_bio *cb;
unsigned int compressed_len;
- struct bio *comp_bio = NULL;
+ struct bio *comp_bio;
const u64 disk_bytenr = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 cur_disk_byte = disk_bytenr;
- u64 next_stripe_start;
u64 file_offset;
u64 em_len;
u64 em_start;
@@ -703,7 +557,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
goto out;
}
- refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = inode;
@@ -737,37 +590,23 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
/* include any pages we added in add_ra-bio_pages */
cb->len = bio->bi_iter.bi_size;
+ comp_bio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, BTRFS_I(cb->inode),
+ end_compressed_bio_read, cb);
+ comp_bio->bi_iter.bi_sector = (cur_disk_byte >> SECTOR_SHIFT);
+
while (cur_disk_byte < disk_bytenr + compressed_len) {
u64 offset = cur_disk_byte - disk_bytenr;
unsigned int index = offset >> PAGE_SHIFT;
unsigned int real_size;
unsigned int added;
struct page *page = cb->compressed_pages[index];
- bool submit = false;
-
- /* Allocate new bio if submitted or not yet allocated */
- if (!comp_bio) {
- comp_bio = alloc_compressed_bio(cb, cur_disk_byte,
- REQ_OP_READ, end_compressed_bio_read,
- &next_stripe_start);
- if (IS_ERR(comp_bio)) {
- cb->status = errno_to_blk_status(PTR_ERR(comp_bio));
- break;
- }
- }
- /*
- * We should never reach next_stripe_start start as we will
- * submit comp_bio when reach the boundary immediately.
- */
- ASSERT(cur_disk_byte != next_stripe_start);
+
/*
* We have various limit on the real read size:
- * - stripe boundary
* - page boundary
* - compressed length boundary
*/
- real_size = min_t(u64, U32_MAX, next_stripe_start - cur_disk_byte);
- real_size = min_t(u64, real_size, PAGE_SIZE - offset_in_page(offset));
+ real_size = min_t(u64, U32_MAX, PAGE_SIZE - offset_in_page(offset));
real_size = min_t(u64, real_size, compressed_len - offset);
ASSERT(IS_ALIGNED(real_size, fs_info->sectorsize));
@@ -778,45 +617,20 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
*/
ASSERT(added == real_size);
cur_disk_byte += added;
-
- /* Reached stripe boundary, need to submit */
- if (cur_disk_byte == next_stripe_start)
- submit = true;
-
- /* Has finished the range, need to submit */
- if (cur_disk_byte == disk_bytenr + compressed_len)
- submit = true;
-
- if (submit) {
- /* Save the original iter for read repair */
- if (bio_op(comp_bio) == REQ_OP_READ)
- btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
-
- /*
- * Save the initial offset of this chunk, as there
- * is no direct correlation between compressed pages and
- * the original file offset. The field is only used for
- * priting error messages.
- */
- btrfs_bio(comp_bio)->file_offset = file_offset;
-
- ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
- if (ret) {
- btrfs_bio_end_io(btrfs_bio(comp_bio), ret);
- break;
- }
-
- ASSERT(comp_bio->bi_iter.bi_size);
- btrfs_submit_bio(fs_info, comp_bio, mirror_num);
- comp_bio = NULL;
- }
}
if (memstall)
psi_memstall_leave(&pflags);
- if (refcount_dec_and_test(&cb->pending_ios))
- finish_compressed_bio_read(cb);
+ /*
+ * Stash the initial offset of this chunk, as there is no direct
+ * correlation between compressed pages and the original file offset.
+ * The field is only used for printing error messages anyway.
+ */
+ btrfs_bio(comp_bio)->file_offset = file_offset;
+
+ ASSERT(comp_bio->bi_iter.bi_size);
+ btrfs_submit_bio(comp_bio, mirror_num);
return;
fail:
@@ -1609,7 +1423,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
index_end = end >> PAGE_SHIFT;
/* Don't miss unaligned end */
- if (!IS_ALIGNED(end, PAGE_SIZE))
+ if (!PAGE_ALIGNED(end))
index_end++;
curr_sample_pos = 0;
@@ -1642,7 +1456,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
*
* For now is's a naive and optimistic 'return true', we'll extend the logic to
* quickly (compared to direct compression) detect data characteristics
- * (compressible/uncompressible) to avoid wasting CPU time on uncompressible
+ * (compressible/incompressible) to avoid wasting CPU time on incompressible
* data.
*
* The following types of analysis can be performed: