From 7b62e66cbbfb463a39bf83e30bdbbb4b9e83fa03 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 11 Dec 2019 16:07:06 -0800 Subject: btrfs: punt all bios created in btrfs_submit_compressed_write() Compressed writes happen in the background via kworkers. However, this causes bios to be attributed to root bypassing any cgroup limits from the actual writer. We tag the first bio with REQ_CGROUP_PUNT, which will punt the bio to an appropriate cgroup specific workqueue and attribute the IO properly. However, if btrfs_submit_compressed_write() creates a new bio, we don't tag it the same way. Add the appropriate tagging for subsequent bios. Fixes: ec39f7696ccfa ("Btrfs: use REQ_CGROUP_PUNT for worker thread submitted bios") Reviewed-by: Chris Mason Signed-off-by: Dennis Zhou Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ee834ef7beb4..b08e16b8cebb 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -491,6 +491,10 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; + if (blkcg_css) { + bio->bi_opf |= REQ_CGROUP_PUNT; + bio_associate_blkg_from_css(bio, blkcg_css); + } bio_add_page(bio, page, PAGE_SIZE, 0); } if (bytes_left < PAGE_SIZE) { -- cgit v1.2.3 From 46bcff2bfc5e6a8c638d3a32e4f6f6fa4bd01461 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 11 Dec 2019 15:20:15 -0800 Subject: btrfs: fix compressed write bio blkcg attribution Bio attribution is handled at bio_set_dev() as once we have a device, we have a corresponding request_queue and then can derive the current css. In special cases, we want to attribute to bio to someone else. This can be done by calling bio_associate_blkg_from_css() or kthread_associate_blkcg() depending on the scenario. Btrfs does this for compressed writeback as they are handled by kworkers, so the latter can be done here. Commit 1a41802701ec ("btrfs: drop bio_set_dev where not needed") removes early bio_set_dev() calls prior to submit_stripe_bio(). This breaks the above assumption that we'll have a request_queue when we are doing association. To fix this, switch to using kthread_associate_blkcg(). Without this, we crash in btrfs/024: [ 3052.093088] BUG: kernel NULL pointer dereference, address: 0000000000000510 [ 3052.107013] #PF: supervisor read access in kernel mode [ 3052.107014] #PF: error_code(0x0000) - not-present page [ 3052.107015] PGD 0 P4D 0 [ 3052.107021] Oops: 0000 [#1] SMP [ 3052.138904] CPU: 42 PID: 201270 Comm: kworker/u161:0 Kdump: loaded Not tainted 5.5.0-rc1-00062-g4852d8ac90a9 #712 [ 3052.138905] Hardware name: Quanta Tioga Pass Single Side 01-0032211004/Tioga Pass Single Side, BIOS F08_3A18 12/20/2018 [ 3052.138912] Workqueue: btrfs-delalloc btrfs_work_helper [ 3052.191375] RIP: 0010:bio_associate_blkg_from_css+0x1e/0x3c0 [ 3052.191379] RSP: 0018:ffffc900210cfc90 EFLAGS: 00010282 [ 3052.191380] RAX: 0000000000000000 RBX: ffff88bfe5573c00 RCX: 0000000000000000 [ 3052.191382] RDX: ffff889db48ec2f0 RSI: ffff88bfe5573c00 RDI: ffff889db48ec2f0 [ 3052.191386] RBP: 0000000000000800 R08: 0000000000203bb0 R09: ffff889db16b2400 [ 3052.293364] R10: 0000000000000000 R11: ffff88a07fffde80 R12: ffff889db48ec2f0 [ 3052.293365] R13: 0000000000001000 R14: ffff889de82bc000 R15: ffff889e2b7bdcc8 [ 3052.293367] FS: 0000000000000000(0000) GS:ffff889ffba00000(0000) knlGS:0000000000000000 [ 3052.293368] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3052.293369] CR2: 0000000000000510 CR3: 0000000002611001 CR4: 00000000007606e0 [ 3052.293370] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 3052.293371] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 3052.293372] PKRU: 55555554 [ 3052.293376] Call Trace: [ 3052.402552] btrfs_submit_compressed_write+0x137/0x390 [ 3052.402558] submit_compressed_extents+0x40f/0x4c0 [ 3052.422401] btrfs_work_helper+0x246/0x5a0 [ 3052.422408] process_one_work+0x200/0x570 [ 3052.438601] ? process_one_work+0x180/0x570 [ 3052.438605] worker_thread+0x4c/0x3e0 [ 3052.438614] kthread+0x103/0x140 [ 3052.460735] ? process_one_work+0x570/0x570 [ 3052.460737] ? kthread_mod_delayed_work+0xc0/0xc0 [ 3052.460744] ret_from_fork+0x24/0x30 Fixes: 1a41802701ec ("btrfs: drop bio_set_dev where not needed") Reported-by: Chris Murphy Signed-off-by: Dennis Zhou Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/compression.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b08e16b8cebb..43e1660f450f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -447,7 +447,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, if (blkcg_css) { bio->bi_opf |= REQ_CGROUP_PUNT; - bio_associate_blkg_from_css(bio, blkcg_css); + kthread_associate_blkcg(blkcg_css); } refcount_set(&cb->pending_bios, 1); @@ -491,10 +491,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio->bi_opf = REQ_OP_WRITE | write_flags; bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; - if (blkcg_css) { + if (blkcg_css) bio->bi_opf |= REQ_CGROUP_PUNT; - bio_associate_blkg_from_css(bio, blkcg_css); - } bio_add_page(bio, page, PAGE_SIZE, 0); } if (bytes_left < PAGE_SIZE) { @@ -521,6 +519,9 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_endio(bio); } + if (blkcg_css) + kthread_associate_blkcg(NULL); + return 0; } -- cgit v1.2.3 From e62958fce94b30ef6aab99bb1bc49fde7fac73ea Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 2 Dec 2019 17:34:17 -0800 Subject: btrfs: get rid of trivial __btrfs_lookup_bio_sums() wrappers Currently, we have two wrappers for __btrfs_lookup_bio_sums(): btrfs_lookup_bio_sums_dio(), which is used for direct I/O, and btrfs_lookup_bio_sums(), which is used everywhere else. The only difference is that the _dio variant looks up csums starting at the given offset instead of using the page index, which isn't actually direct I/O-specific. Let's clean up the signature and return value of __btrfs_lookup_bio_sums(), rename it to btrfs_lookup_bio_sums(), and get rid of the trivial helpers. Reviewed-by: Nikolay Borisov Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- fs/btrfs/compression.c | 4 ++-- fs/btrfs/ctree.h | 4 +--- fs/btrfs/file-item.c | 35 +++++++++++++++++------------------ fs/btrfs/inode.c | 6 +++--- 4 files changed, 23 insertions(+), 26 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 43e1660f450f..b6373de176f8 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -763,7 +763,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_lookup_bio_sums(inode, comp_bio, - sums); + false, 0, sums); BUG_ON(ret); /* -ENOMEM */ } @@ -791,7 +791,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, BUG_ON(ret); /* -ENOMEM */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { - ret = btrfs_lookup_bio_sums(inode, comp_bio, sums); + ret = btrfs_lookup_bio_sums(inode, comp_bio, false, 0, sums); BUG_ON(ret); /* -ENOMEM */ } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ea49e4b52cd2..b1c971cea33a 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2789,9 +2789,7 @@ struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u8 *dst); -blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, - u64 logical_offset); + bool at_offset, u64 offset, u8 *dst); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b1bfdc5c1387..b7f5394c37a1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -148,8 +148,21 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, return ret; } -static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u64 logical_offset, u8 *dst, int dio) +/** + * btrfs_lookup_bio_sums - Look up checksums for a bio. + * @inode: inode that the bio is for. + * @bio: bio embedded in btrfs_io_bio. + * @at_offset: If true, look up checksums for the extent at @offset. + * If false, use the page offsets from the bio. + * @offset: If @at_offset is true, offset in file to look up checksums for. + * Ignored otherwise. + * @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If + * NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead. + * + * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. + */ +blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, + bool at_offset, u64 offset, u8 *dst) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct bio_vec bvec; @@ -159,7 +172,6 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_path *path; u8 *csum; - u64 offset = 0; u64 item_start_offset = 0; u64 item_last_offset = 0; u64 disk_bytenr; @@ -205,15 +217,13 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio } disk_bytenr = (u64)bio->bi_iter.bi_sector << 9; - if (dio) - offset = logical_offset; bio_for_each_segment(bvec, bio, iter) { page_bytes_left = bvec.bv_len; if (count) goto next; - if (!dio) + if (!at_offset) offset = page_offset(bvec.bv_page) + bvec.bv_offset; count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, csum, nblocks); @@ -285,18 +295,7 @@ next: WARN_ON_ONCE(count); btrfs_free_path(path); - return 0; -} - -blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - u8 *dst) -{ - return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0); -} - -blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset) -{ - return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1); + return BLK_STS_OK; } int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4b2d151b6d7b..43858122b36e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2127,7 +2127,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, bio_flags); goto out; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums(inode, bio, NULL); + ret = btrfs_lookup_bio_sums(inode, bio, false, 0, NULL); if (ret) goto out; } @@ -8387,8 +8387,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, * contention. */ if (dip->logical_offset == file_offset) { - ret = btrfs_lookup_bio_sums_dio(inode, dip->orig_bio, - file_offset); + ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, true, + file_offset, NULL); if (ret) return ret; } -- cgit v1.2.3 From db72e47f79c5dbd95611edd453328d46c1eae93a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 10 Dec 2019 10:37:35 -0800 Subject: btrfs: get rid of at_offset parameter to btrfs_lookup_bio_sums() We can encode this in the offset parameter: -1 means use the page offsets, anything else is a valid offset. Signed-off-by: Omar Sandoval Signed-off-by: David Sterba --- fs/btrfs/compression.c | 4 ++-- fs/btrfs/ctree.h | 2 +- fs/btrfs/file-item.c | 11 +++++------ fs/btrfs/inode.c | 6 +++--- 4 files changed, 11 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b6373de176f8..de95ad27722f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -763,7 +763,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_lookup_bio_sums(inode, comp_bio, - false, 0, sums); + (u64)-1, sums); BUG_ON(ret); /* -ENOMEM */ } @@ -791,7 +791,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, BUG_ON(ret); /* -ENOMEM */ if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { - ret = btrfs_lookup_bio_sums(inode, comp_bio, false, 0, sums); + ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums); BUG_ON(ret); /* -ENOMEM */ } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b1c971cea33a..f895fb490b75 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2789,7 +2789,7 @@ struct btrfs_dio_private; int btrfs_del_csums(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 len); blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - bool at_offset, u64 offset, u8 *dst); + u64 offset, u8 *dst); int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 objectid, u64 pos, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b7f5394c37a1..b670014bfc1c 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -152,17 +152,15 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, * btrfs_lookup_bio_sums - Look up checksums for a bio. * @inode: inode that the bio is for. * @bio: bio embedded in btrfs_io_bio. - * @at_offset: If true, look up checksums for the extent at @offset. - * If false, use the page offsets from the bio. - * @offset: If @at_offset is true, offset in file to look up checksums for. - * Ignored otherwise. + * @offset: Unless (u64)-1, look up checksums for this offset in the file. + * If (u64)-1, use the page offsets from the bio instead. * @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If * NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead. * * Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise. */ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, - bool at_offset, u64 offset, u8 *dst) + u64 offset, u8 *dst) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct bio_vec bvec; @@ -171,6 +169,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, struct btrfs_csum_item *item = NULL; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_path *path; + const bool page_offsets = (offset == (u64)-1); u8 *csum; u64 item_start_offset = 0; u64 item_last_offset = 0; @@ -223,7 +222,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, if (count) goto next; - if (!at_offset) + if (page_offsets) offset = page_offset(bvec.bv_page) + bvec.bv_offset; count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, csum, nblocks); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 43858122b36e..4d1b3e8449d1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2127,7 +2127,7 @@ static blk_status_t btrfs_submit_bio_hook(struct inode *inode, struct bio *bio, bio_flags); goto out; } else if (!skip_sum) { - ret = btrfs_lookup_bio_sums(inode, bio, false, 0, NULL); + ret = btrfs_lookup_bio_sums(inode, bio, (u64)-1, NULL); if (ret) goto out; } @@ -8387,8 +8387,8 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode, * contention. */ if (dip->logical_offset == file_offset) { - ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, true, - file_offset, NULL); + ret = btrfs_lookup_bio_sums(inode, dip->orig_bio, file_offset, + NULL); if (ret) return ret; } -- cgit v1.2.3 From 3fd396afc05fc90097276c6b7a70c406ad4df5bb Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Thu, 30 Jan 2020 22:16:33 -0800 Subject: btrfs: use larger zlib buffer for s390 hardware compression In order to benefit from s390 zlib hardware compression support, increase the btrfs zlib workspace buffer size from 1 to 4 pages (if s390 zlib hardware support is enabled on the machine). This brings up to 60% better performance in hardware on s390 compared to the PAGE_SIZE buffer and much more compared to the software zlib processing in btrfs. In case of memory pressure, fall back to a single page buffer during workspace allocation. The data compressed with larger input buffers will still conform to zlib standard and thus can be decompressed also on a systems that uses only PAGE_SIZE buffer for btrfs zlib. Link: http://lkml.kernel.org/r/20200108105103.29028-1-zaslonko@linux.ibm.com Signed-off-by: Mikhail Zaslonko Reviewed-by: David Sterba Cc: Chris Mason Cc: Josef Bacik Cc: David Sterba Cc: Richard Purdie Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Christian Borntraeger Cc: Eduard Shishkin Cc: Ilya Leoshkevich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/compression.c | 2 +- fs/btrfs/zlib.c | 135 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 101 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/compression.c') diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index de95ad27722f..9ab610cc9114 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -1290,7 +1290,7 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, /* copy bytes from the working buffer into the pages */ while (working_bytes > 0) { bytes = min_t(unsigned long, bvec.bv_len, - PAGE_SIZE - buf_offset); + PAGE_SIZE - (buf_offset % PAGE_SIZE)); bytes = min(bytes, working_bytes); kaddr = kmap_atomic(bvec.bv_page); diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index a6c90a003c12..05615a1099db 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c @@ -20,9 +20,13 @@ #include #include "compression.h" +/* workspace buffer size for s390 zlib hardware support */ +#define ZLIB_DFLTCC_BUF_SIZE (4 * PAGE_SIZE) + struct workspace { z_stream strm; char *buf; + unsigned int buf_size; struct list_head list; int level; }; @@ -61,7 +65,21 @@ struct list_head *zlib_alloc_workspace(unsigned int level) zlib_inflate_workspacesize()); workspace->strm.workspace = kvmalloc(workspacesize, GFP_KERNEL); workspace->level = level; - workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + workspace->buf = NULL; + /* + * In case of s390 zlib hardware support, allocate lager workspace + * buffer. If allocator fails, fall back to a single page buffer. + */ + if (zlib_deflate_dfltcc_enabled()) { + workspace->buf = kmalloc(ZLIB_DFLTCC_BUF_SIZE, + __GFP_NOMEMALLOC | __GFP_NORETRY | + __GFP_NOWARN | GFP_NOIO); + workspace->buf_size = ZLIB_DFLTCC_BUF_SIZE; + } + if (!workspace->buf) { + workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + workspace->buf_size = PAGE_SIZE; + } if (!workspace->strm.workspace || !workspace->buf) goto fail; @@ -85,6 +103,7 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, struct page *in_page = NULL; struct page *out_page = NULL; unsigned long bytes_left; + unsigned int in_buf_pages; unsigned long len = *total_out; unsigned long nr_dest_pages = *out_pages; const unsigned long max_out = nr_dest_pages * PAGE_SIZE; @@ -102,9 +121,6 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, workspace->strm.total_in = 0; workspace->strm.total_out = 0; - in_page = find_get_page(mapping, start >> PAGE_SHIFT); - data_in = kmap(in_page); - out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (out_page == NULL) { ret = -ENOMEM; @@ -114,12 +130,51 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, pages[0] = out_page; nr_pages = 1; - workspace->strm.next_in = data_in; + workspace->strm.next_in = workspace->buf; + workspace->strm.avail_in = 0; workspace->strm.next_out = cpage_out; workspace->strm.avail_out = PAGE_SIZE; - workspace->strm.avail_in = min(len, PAGE_SIZE); while (workspace->strm.total_in < len) { + /* + * Get next input pages and copy the contents to + * the workspace buffer if required. + */ + if (workspace->strm.avail_in == 0) { + bytes_left = len - workspace->strm.total_in; + in_buf_pages = min(DIV_ROUND_UP(bytes_left, PAGE_SIZE), + workspace->buf_size / PAGE_SIZE); + if (in_buf_pages > 1) { + int i; + + for (i = 0; i < in_buf_pages; i++) { + if (in_page) { + kunmap(in_page); + put_page(in_page); + } + in_page = find_get_page(mapping, + start >> PAGE_SHIFT); + data_in = kmap(in_page); + memcpy(workspace->buf + i * PAGE_SIZE, + data_in, PAGE_SIZE); + start += PAGE_SIZE; + } + workspace->strm.next_in = workspace->buf; + } else { + if (in_page) { + kunmap(in_page); + put_page(in_page); + } + in_page = find_get_page(mapping, + start >> PAGE_SHIFT); + data_in = kmap(in_page); + start += PAGE_SIZE; + workspace->strm.next_in = data_in; + } + workspace->strm.avail_in = min(bytes_left, + (unsigned long) workspace->buf_size); + } + ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH); if (ret != Z_OK) { pr_debug("BTRFS: deflate in loop returned %d\n", @@ -161,33 +216,43 @@ int zlib_compress_pages(struct list_head *ws, struct address_space *mapping, /* we're all done */ if (workspace->strm.total_in >= len) break; - - /* we've read in a full page, get a new one */ - if (workspace->strm.avail_in == 0) { - if (workspace->strm.total_out > max_out) - break; - - bytes_left = len - workspace->strm.total_in; - kunmap(in_page); - put_page(in_page); - - start += PAGE_SIZE; - in_page = find_get_page(mapping, - start >> PAGE_SHIFT); - data_in = kmap(in_page); - workspace->strm.avail_in = min(bytes_left, - PAGE_SIZE); - workspace->strm.next_in = data_in; - } + if (workspace->strm.total_out > max_out) + break; } workspace->strm.avail_in = 0; - ret = zlib_deflate(&workspace->strm, Z_FINISH); - zlib_deflateEnd(&workspace->strm); - - if (ret != Z_STREAM_END) { - ret = -EIO; - goto out; + /* + * Call deflate with Z_FINISH flush parameter providing more output + * space but no more input data, until it returns with Z_STREAM_END. + */ + while (ret != Z_STREAM_END) { + ret = zlib_deflate(&workspace->strm, Z_FINISH); + if (ret == Z_STREAM_END) + break; + if (ret != Z_OK && ret != Z_BUF_ERROR) { + zlib_deflateEnd(&workspace->strm); + ret = -EIO; + goto out; + } else if (workspace->strm.avail_out == 0) { + /* get another page for the stream end */ + kunmap(out_page); + if (nr_pages == nr_dest_pages) { + out_page = NULL; + ret = -E2BIG; + goto out; + } + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); + if (out_page == NULL) { + ret = -ENOMEM; + goto out; + } + cpage_out = kmap(out_page); + pages[nr_pages] = out_page; + nr_pages++; + workspace->strm.avail_out = PAGE_SIZE; + workspace->strm.next_out = cpage_out; + } } + zlib_deflateEnd(&workspace->strm); if (workspace->strm.total_out >= workspace->strm.total_in) { ret = -E2BIG; @@ -231,7 +296,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) workspace->strm.total_out = 0; workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_SIZE; + workspace->strm.avail_out = workspace->buf_size; /* If it's deflate, and it's got no preset dictionary, then we can tell zlib to skip the adler32 check. */ @@ -270,7 +335,7 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb) } workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_SIZE; + workspace->strm.avail_out = workspace->buf_size; if (workspace->strm.avail_in == 0) { unsigned long tmp; @@ -320,7 +385,7 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in, workspace->strm.total_in = 0; workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_SIZE; + workspace->strm.avail_out = workspace->buf_size; workspace->strm.total_out = 0; /* If it's deflate, and it's got no preset dictionary, then we can tell zlib to skip the adler32 check. */ @@ -364,7 +429,7 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in, buf_offset = 0; bytes = min(PAGE_SIZE - pg_offset, - PAGE_SIZE - buf_offset); + PAGE_SIZE - (buf_offset % PAGE_SIZE)); bytes = min(bytes, bytes_left); kaddr = kmap_atomic(dest_page); @@ -375,7 +440,7 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in, bytes_left -= bytes; next: workspace->strm.next_out = workspace->buf; - workspace->strm.avail_out = PAGE_SIZE; + workspace->strm.avail_out = workspace->buf_size; } if (ret != Z_STREAM_END && bytes_left != 0) -- cgit v1.2.3