summaryrefslogtreecommitdiff
path: root/fs/bcachefs/compress.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs/compress.c')
-rw-r--r--fs/bcachefs/compress.c710
1 files changed, 710 insertions, 0 deletions
diff --git a/fs/bcachefs/compress.c b/fs/bcachefs/compress.c
new file mode 100644
index 000000000000..1480b64547b0
--- /dev/null
+++ b/fs/bcachefs/compress.c
@@ -0,0 +1,710 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bcachefs.h"
+#include "checksum.h"
+#include "compress.h"
+#include "extents.h"
+#include "super-io.h"
+
+#include <linux/lz4.h>
+#include <linux/zlib.h>
+#include <linux/zstd.h>
+
+/* Bounce buffer: */
+struct bbuf {
+ void *b;
+ enum {
+ BB_NONE,
+ BB_VMAP,
+ BB_KMALLOC,
+ BB_MEMPOOL,
+ } type;
+ int rw;
+};
+
+static struct bbuf __bounce_alloc(struct bch_fs *c, unsigned size, int rw)
+{
+ void *b;
+
+ BUG_ON(size > c->opts.encoded_extent_max);
+
+ b = kmalloc(size, GFP_NOFS|__GFP_NOWARN);
+ if (b)
+ return (struct bbuf) { .b = b, .type = BB_KMALLOC, .rw = rw };
+
+ b = mempool_alloc(&c->compression_bounce[rw], GFP_NOFS);
+ if (b)
+ return (struct bbuf) { .b = b, .type = BB_MEMPOOL, .rw = rw };
+
+ BUG();
+}
+
+static bool bio_phys_contig(struct bio *bio, struct bvec_iter start)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ void *expected_start = NULL;
+
+ __bio_for_each_bvec(bv, bio, iter, start) {
+ if (expected_start &&
+ expected_start != page_address(bv.bv_page) + bv.bv_offset)
+ return false;
+
+ expected_start = page_address(bv.bv_page) +
+ bv.bv_offset + bv.bv_len;
+ }
+
+ return true;
+}
+
+static struct bbuf __bio_map_or_bounce(struct bch_fs *c, struct bio *bio,
+ struct bvec_iter start, int rw)
+{
+ struct bbuf ret;
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ unsigned nr_pages = 0;
+ struct page *stack_pages[16];
+ struct page **pages = NULL;
+ void *data;
+
+ BUG_ON(start.bi_size > c->opts.encoded_extent_max);
+
+ if (!PageHighMem(bio_iter_page(bio, start)) &&
+ bio_phys_contig(bio, start))
+ return (struct bbuf) {
+ .b = page_address(bio_iter_page(bio, start)) +
+ bio_iter_offset(bio, start),
+ .type = BB_NONE, .rw = rw
+ };
+
+ /* check if we can map the pages contiguously: */
+ __bio_for_each_segment(bv, bio, iter, start) {
+ if (iter.bi_size != start.bi_size &&
+ bv.bv_offset)
+ goto bounce;
+
+ if (bv.bv_len < iter.bi_size &&
+ bv.bv_offset + bv.bv_len < PAGE_SIZE)
+ goto bounce;
+
+ nr_pages++;
+ }
+
+ BUG_ON(DIV_ROUND_UP(start.bi_size, PAGE_SIZE) > nr_pages);
+
+ pages = nr_pages > ARRAY_SIZE(stack_pages)
+ ? kmalloc_array(nr_pages, sizeof(struct page *), GFP_NOFS)
+ : stack_pages;
+ if (!pages)
+ goto bounce;
+
+ nr_pages = 0;
+ __bio_for_each_segment(bv, bio, iter, start)
+ pages[nr_pages++] = bv.bv_page;
+
+ data = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (pages != stack_pages)
+ kfree(pages);
+
+ if (data)
+ return (struct bbuf) {
+ .b = data + bio_iter_offset(bio, start),
+ .type = BB_VMAP, .rw = rw
+ };
+bounce:
+ ret = __bounce_alloc(c, start.bi_size, rw);
+
+ if (rw == READ)
+ memcpy_from_bio(ret.b, bio, start);
+
+ return ret;
+}
+
+static struct bbuf bio_map_or_bounce(struct bch_fs *c, struct bio *bio, int rw)
+{
+ return __bio_map_or_bounce(c, bio, bio->bi_iter, rw);
+}
+
+static void bio_unmap_or_unbounce(struct bch_fs *c, struct bbuf buf)
+{
+ switch (buf.type) {
+ case BB_NONE:
+ break;
+ case BB_VMAP:
+ vunmap((void *) ((unsigned long) buf.b & PAGE_MASK));
+ break;
+ case BB_KMALLOC:
+ kfree(buf.b);
+ break;
+ case BB_MEMPOOL:
+ mempool_free(buf.b, &c->compression_bounce[buf.rw]);
+ break;
+ }
+}
+
+static inline void zlib_set_workspace(z_stream *strm, void *workspace)
+{
+#ifdef __KERNEL__
+ strm->workspace = workspace;
+#endif
+}
+
+static int __bio_uncompress(struct bch_fs *c, struct bio *src,
+ void *dst_data, struct bch_extent_crc_unpacked crc)
+{
+ struct bbuf src_data = { NULL };
+ size_t src_len = src->bi_iter.bi_size;
+ size_t dst_len = crc.uncompressed_size << 9;
+ void *workspace;
+ int ret;
+
+ src_data = bio_map_or_bounce(c, src, READ);
+
+ switch (crc.compression_type) {
+ case BCH_COMPRESSION_TYPE_lz4_old:
+ case BCH_COMPRESSION_TYPE_lz4:
+ ret = LZ4_decompress_safe_partial(src_data.b, dst_data,
+ src_len, dst_len, dst_len);
+ if (ret != dst_len)
+ goto err;
+ break;
+ case BCH_COMPRESSION_TYPE_gzip: {
+ z_stream strm = {
+ .next_in = src_data.b,
+ .avail_in = src_len,
+ .next_out = dst_data,
+ .avail_out = dst_len,
+ };
+
+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
+
+ zlib_set_workspace(&strm, workspace);
+ zlib_inflateInit2(&strm, -MAX_WBITS);
+ ret = zlib_inflate(&strm, Z_FINISH);
+
+ mempool_free(workspace, &c->decompress_workspace);
+
+ if (ret != Z_STREAM_END)
+ goto err;
+ break;
+ }
+ case BCH_COMPRESSION_TYPE_zstd: {
+ ZSTD_DCtx *ctx;
+ size_t real_src_len = le32_to_cpup(src_data.b);
+
+ if (real_src_len > src_len - 4)
+ goto err;
+
+ workspace = mempool_alloc(&c->decompress_workspace, GFP_NOFS);
+ ctx = zstd_init_dctx(workspace, zstd_dctx_workspace_bound());
+
+ ret = zstd_decompress_dctx(ctx,
+ dst_data, dst_len,
+ src_data.b + 4, real_src_len);
+
+ mempool_free(workspace, &c->decompress_workspace);
+
+ if (ret != dst_len)
+ goto err;
+ break;
+ }
+ default:
+ BUG();
+ }
+ ret = 0;
+out:
+ bio_unmap_or_unbounce(c, src_data);
+ return ret;
+err:
+ ret = -EIO;
+ goto out;
+}
+
+int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
+ struct bch_extent_crc_unpacked *crc)
+{
+ struct bbuf data = { NULL };
+ size_t dst_len = crc->uncompressed_size << 9;
+
+ /* bio must own its pages: */
+ BUG_ON(!bio->bi_vcnt);
+ BUG_ON(DIV_ROUND_UP(crc->live_size, PAGE_SECTORS) > bio->bi_max_vecs);
+
+ if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
+ crc->compressed_size << 9 > c->opts.encoded_extent_max) {
+ bch_err(c, "error rewriting existing data: extent too big");
+ return -EIO;
+ }
+
+ data = __bounce_alloc(c, dst_len, WRITE);
+
+ if (__bio_uncompress(c, bio, data.b, *crc)) {
+ if (!c->opts.no_data_io)
+ bch_err(c, "error rewriting existing data: decompression error");
+ bio_unmap_or_unbounce(c, data);
+ return -EIO;
+ }
+
+ /*
+ * XXX: don't have a good way to assert that the bio was allocated with
+ * enough space, we depend on bch2_move_extent doing the right thing
+ */
+ bio->bi_iter.bi_size = crc->live_size << 9;
+
+ memcpy_to_bio(bio, bio->bi_iter, data.b + (crc->offset << 9));
+
+ crc->csum_type = 0;
+ crc->compression_type = 0;
+ crc->compressed_size = crc->live_size;
+ crc->uncompressed_size = crc->live_size;
+ crc->offset = 0;
+ crc->csum = (struct bch_csum) { 0, 0 };
+
+ bio_unmap_or_unbounce(c, data);
+ return 0;
+}
+
+int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,
+ struct bio *dst, struct bvec_iter dst_iter,
+ struct bch_extent_crc_unpacked crc)
+{
+ struct bbuf dst_data = { NULL };
+ size_t dst_len = crc.uncompressed_size << 9;
+ int ret;
+
+ if (crc.uncompressed_size << 9 > c->opts.encoded_extent_max ||
+ crc.compressed_size << 9 > c->opts.encoded_extent_max)
+ return -EIO;
+
+ dst_data = dst_len == dst_iter.bi_size
+ ? __bio_map_or_bounce(c, dst, dst_iter, WRITE)
+ : __bounce_alloc(c, dst_len, WRITE);
+
+ ret = __bio_uncompress(c, src, dst_data.b, crc);
+ if (ret)
+ goto err;
+
+ if (dst_data.type != BB_NONE &&
+ dst_data.type != BB_VMAP)
+ memcpy_to_bio(dst, dst_iter, dst_data.b + (crc.offset << 9));
+err:
+ bio_unmap_or_unbounce(c, dst_data);
+ return ret;
+}
+
+static int attempt_compress(struct bch_fs *c,
+ void *workspace,
+ void *dst, size_t dst_len,
+ void *src, size_t src_len,
+ struct bch_compression_opt compression)
+{
+ enum bch_compression_type compression_type =
+ __bch2_compression_opt_to_type[compression.type];
+
+ switch (compression_type) {
+ case BCH_COMPRESSION_TYPE_lz4:
+ if (compression.level < LZ4HC_MIN_CLEVEL) {
+ int len = src_len;
+ int ret = LZ4_compress_destSize(
+ src, dst,
+ &len, dst_len,
+ workspace);
+ if (len < src_len)
+ return -len;
+
+ return ret;
+ } else {
+ int ret = LZ4_compress_HC(
+ src, dst,
+ src_len, dst_len,
+ compression.level,
+ workspace);
+
+ return ret ?: -1;
+ }
+ case BCH_COMPRESSION_TYPE_gzip: {
+ z_stream strm = {
+ .next_in = src,
+ .avail_in = src_len,
+ .next_out = dst,
+ .avail_out = dst_len,
+ };
+
+ zlib_set_workspace(&strm, workspace);
+ zlib_deflateInit2(&strm,
+ compression.level
+ ? clamp_t(unsigned, compression.level,
+ Z_BEST_SPEED, Z_BEST_COMPRESSION)
+ : Z_DEFAULT_COMPRESSION,
+ Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
+ Z_DEFAULT_STRATEGY);
+
+ if (zlib_deflate(&strm, Z_FINISH) != Z_STREAM_END)
+ return 0;
+
+ if (zlib_deflateEnd(&strm) != Z_OK)
+ return 0;
+
+ return strm.total_out;
+ }
+ case BCH_COMPRESSION_TYPE_zstd: {
+ /*
+ * rescale:
+ * zstd max compression level is 22, our max level is 15
+ */
+ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel());
+ ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max);
+ ZSTD_CCtx *ctx = zstd_init_cctx(workspace,
+ zstd_cctx_workspace_bound(&params.cParams));
+
+ /*
+ * ZSTD requires that when we decompress we pass in the exact
+ * compressed size - rounding it up to the nearest sector
+ * doesn't work, so we use the first 4 bytes of the buffer for
+ * that.
+ *
+ * Additionally, the ZSTD code seems to have a bug where it will
+ * write just past the end of the buffer - so subtract a fudge
+ * factor (7 bytes) from the dst buffer size to account for
+ * that.
+ */
+ size_t len = zstd_compress_cctx(ctx,
+ dst + 4, dst_len - 4 - 7,
+ src, src_len,
+ &c->zstd_params);
+ if (zstd_is_error(len))
+ return 0;
+
+ *((__le32 *) dst) = cpu_to_le32(len);
+ return len + 4;
+ }
+ default:
+ BUG();
+ }
+}
+
+static unsigned __bio_compress(struct bch_fs *c,
+ struct bio *dst, size_t *dst_len,
+ struct bio *src, size_t *src_len,
+ struct bch_compression_opt compression)
+{
+ struct bbuf src_data = { NULL }, dst_data = { NULL };
+ void *workspace;
+ enum bch_compression_type compression_type =
+ __bch2_compression_opt_to_type[compression.type];
+ unsigned pad;
+ int ret = 0;
+
+ BUG_ON(compression_type >= BCH_COMPRESSION_TYPE_NR);
+ BUG_ON(!mempool_initialized(&c->compress_workspace[compression_type]));
+
+ /* If it's only one block, don't bother trying to compress: */
+ if (src->bi_iter.bi_size <= c->opts.block_size)
+ return BCH_COMPRESSION_TYPE_incompressible;
+
+ dst_data = bio_map_or_bounce(c, dst, WRITE);
+ src_data = bio_map_or_bounce(c, src, READ);
+
+ workspace = mempool_alloc(&c->compress_workspace[compression_type], GFP_NOFS);
+
+ *src_len = src->bi_iter.bi_size;
+ *dst_len = dst->bi_iter.bi_size;
+
+ /*
+ * XXX: this algorithm sucks when the compression code doesn't tell us
+ * how much would fit, like LZ4 does:
+ */
+ while (1) {
+ if (*src_len <= block_bytes(c)) {
+ ret = -1;
+ break;
+ }
+
+ ret = attempt_compress(c, workspace,
+ dst_data.b, *dst_len,
+ src_data.b, *src_len,
+ compression);
+ if (ret > 0) {
+ *dst_len = ret;
+ ret = 0;
+ break;
+ }
+
+ /* Didn't fit: should we retry with a smaller amount? */
+ if (*src_len <= *dst_len) {
+ ret = -1;
+ break;
+ }
+
+ /*
+ * If ret is negative, it's a hint as to how much data would fit
+ */
+ BUG_ON(-ret >= *src_len);
+
+ if (ret < 0)
+ *src_len = -ret;
+ else
+ *src_len -= (*src_len - *dst_len) / 2;
+ *src_len = round_down(*src_len, block_bytes(c));
+ }
+
+ mempool_free(workspace, &c->compress_workspace[compression_type]);
+
+ if (ret)
+ goto err;
+
+ /* Didn't get smaller: */
+ if (round_up(*dst_len, block_bytes(c)) >= *src_len)
+ goto err;
+
+ pad = round_up(*dst_len, block_bytes(c)) - *dst_len;
+
+ memset(dst_data.b + *dst_len, 0, pad);
+ *dst_len += pad;
+
+ if (dst_data.type != BB_NONE &&
+ dst_data.type != BB_VMAP)
+ memcpy_to_bio(dst, dst->bi_iter, dst_data.b);
+
+ BUG_ON(!*dst_len || *dst_len > dst->bi_iter.bi_size);
+ BUG_ON(!*src_len || *src_len > src->bi_iter.bi_size);
+ BUG_ON(*dst_len & (block_bytes(c) - 1));
+ BUG_ON(*src_len & (block_bytes(c) - 1));
+ ret = compression_type;
+out:
+ bio_unmap_or_unbounce(c, src_data);
+ bio_unmap_or_unbounce(c, dst_data);
+ return ret;
+err:
+ ret = BCH_COMPRESSION_TYPE_incompressible;
+ goto out;
+}
+
+unsigned bch2_bio_compress(struct bch_fs *c,
+ struct bio *dst, size_t *dst_len,
+ struct bio *src, size_t *src_len,
+ unsigned compression_opt)
+{
+ unsigned orig_dst = dst->bi_iter.bi_size;
+ unsigned orig_src = src->bi_iter.bi_size;
+ unsigned compression_type;
+
+ /* Don't consume more than BCH_ENCODED_EXTENT_MAX from @src: */
+ src->bi_iter.bi_size = min_t(unsigned, src->bi_iter.bi_size,
+ c->opts.encoded_extent_max);
+ /* Don't generate a bigger output than input: */
+ dst->bi_iter.bi_size = min(dst->bi_iter.bi_size, src->bi_iter.bi_size);
+
+ compression_type =
+ __bio_compress(c, dst, dst_len, src, src_len,
+ bch2_compression_decode(compression_opt));
+
+ dst->bi_iter.bi_size = orig_dst;
+ src->bi_iter.bi_size = orig_src;
+ return compression_type;
+}
+
+static int __bch2_fs_compress_init(struct bch_fs *, u64);
+
+#define BCH_FEATURE_none 0
+
+static const unsigned bch2_compression_opt_to_feature[] = {
+#define x(t, n) [BCH_COMPRESSION_OPT_##t] = BCH_FEATURE_##t,
+ BCH_COMPRESSION_OPTS()
+#undef x
+};
+
+#undef BCH_FEATURE_none
+
+static int __bch2_check_set_has_compressed_data(struct bch_fs *c, u64 f)
+{
+ int ret = 0;
+
+ if ((c->sb.features & f) == f)
+ return 0;
+
+ mutex_lock(&c->sb_lock);
+
+ if ((c->sb.features & f) == f) {
+ mutex_unlock(&c->sb_lock);
+ return 0;
+ }
+
+ ret = __bch2_fs_compress_init(c, c->sb.features|f);
+ if (ret) {
+ mutex_unlock(&c->sb_lock);
+ return ret;
+ }
+
+ c->disk_sb.sb->features[0] |= cpu_to_le64(f);
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+
+ return 0;
+}
+
+int bch2_check_set_has_compressed_data(struct bch_fs *c,
+ unsigned compression_opt)
+{
+ unsigned compression_type = bch2_compression_decode(compression_opt).type;
+
+ BUG_ON(compression_type >= ARRAY_SIZE(bch2_compression_opt_to_feature));
+
+ return compression_type
+ ? __bch2_check_set_has_compressed_data(c,
+ 1ULL << bch2_compression_opt_to_feature[compression_type])
+ : 0;
+}
+
+void bch2_fs_compress_exit(struct bch_fs *c)
+{
+ unsigned i;
+
+ mempool_exit(&c->decompress_workspace);
+ for (i = 0; i < ARRAY_SIZE(c->compress_workspace); i++)
+ mempool_exit(&c->compress_workspace[i]);
+ mempool_exit(&c->compression_bounce[WRITE]);
+ mempool_exit(&c->compression_bounce[READ]);
+}
+
+static int __bch2_fs_compress_init(struct bch_fs *c, u64 features)
+{
+ size_t decompress_workspace_size = 0;
+ ZSTD_parameters params = zstd_get_params(zstd_max_clevel(),
+ c->opts.encoded_extent_max);
+ struct {
+ unsigned feature;
+ enum bch_compression_type type;
+ size_t compress_workspace;
+ size_t decompress_workspace;
+ } compression_types[] = {
+ { BCH_FEATURE_lz4, BCH_COMPRESSION_TYPE_lz4,
+ max_t(size_t, LZ4_MEM_COMPRESS, LZ4HC_MEM_COMPRESS),
+ 0 },
+ { BCH_FEATURE_gzip, BCH_COMPRESSION_TYPE_gzip,
+ zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL),
+ zlib_inflate_workspacesize(), },
+ { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd,
+ zstd_cctx_workspace_bound(&params.cParams),
+ zstd_dctx_workspace_bound() },
+ }, *i;
+ bool have_compressed = false;
+
+ c->zstd_params = params;
+
+ for (i = compression_types;
+ i < compression_types + ARRAY_SIZE(compression_types);
+ i++)
+ have_compressed |= (features & (1 << i->feature)) != 0;
+
+ if (!have_compressed)
+ return 0;
+
+ if (!mempool_initialized(&c->compression_bounce[READ]) &&
+ mempool_init_kvpmalloc_pool(&c->compression_bounce[READ],
+ 1, c->opts.encoded_extent_max))
+ return -BCH_ERR_ENOMEM_compression_bounce_read_init;
+
+ if (!mempool_initialized(&c->compression_bounce[WRITE]) &&
+ mempool_init_kvpmalloc_pool(&c->compression_bounce[WRITE],
+ 1, c->opts.encoded_extent_max))
+ return -BCH_ERR_ENOMEM_compression_bounce_write_init;
+
+ for (i = compression_types;
+ i < compression_types + ARRAY_SIZE(compression_types);
+ i++) {
+ decompress_workspace_size =
+ max(decompress_workspace_size, i->decompress_workspace);
+
+ if (!(features & (1 << i->feature)))
+ continue;
+
+ if (mempool_initialized(&c->compress_workspace[i->type]))
+ continue;
+
+ if (mempool_init_kvpmalloc_pool(
+ &c->compress_workspace[i->type],
+ 1, i->compress_workspace))
+ return -BCH_ERR_ENOMEM_compression_workspace_init;
+ }
+
+ if (!mempool_initialized(&c->decompress_workspace) &&
+ mempool_init_kvpmalloc_pool(&c->decompress_workspace,
+ 1, decompress_workspace_size))
+ return -BCH_ERR_ENOMEM_decompression_workspace_init;
+
+ return 0;
+}
+
+static u64 compression_opt_to_feature(unsigned v)
+{
+ unsigned type = bch2_compression_decode(v).type;
+
+ return BIT_ULL(bch2_compression_opt_to_feature[type]);
+}
+
+int bch2_fs_compress_init(struct bch_fs *c)
+{
+ u64 f = c->sb.features;
+
+ f |= compression_opt_to_feature(c->opts.compression);
+ f |= compression_opt_to_feature(c->opts.background_compression);
+
+ return __bch2_fs_compress_init(c, f);
+}
+
+int bch2_opt_compression_parse(struct bch_fs *c, const char *_val, u64 *res,
+ struct printbuf *err)
+{
+ char *val = kstrdup(_val, GFP_KERNEL);
+ char *p = val, *type_str, *level_str;
+ struct bch_compression_opt opt = { 0 };
+ int ret;
+
+ if (!val)
+ return -ENOMEM;
+
+ type_str = strsep(&p, ":");
+ level_str = p;
+
+ ret = match_string(bch2_compression_opts, -1, type_str);
+ if (ret < 0 && err)
+ prt_str(err, "invalid compression type");
+ if (ret < 0)
+ goto err;
+
+ opt.type = ret;
+
+ if (level_str) {
+ unsigned level;
+
+ ret = kstrtouint(level_str, 10, &level);
+ if (!ret && !opt.type && level)
+ ret = -EINVAL;
+ if (!ret && level > 15)
+ ret = -EINVAL;
+ if (ret < 0 && err)
+ prt_str(err, "invalid compression level");
+ if (ret < 0)
+ goto err;
+
+ opt.level = level;
+ }
+
+ *res = bch2_compression_encode(opt);
+err:
+ kfree(val);
+ return ret;
+}
+
+void bch2_opt_compression_to_text(struct printbuf *out,
+ struct bch_fs *c,
+ struct bch_sb *sb,
+ u64 v)
+{
+ struct bch_compression_opt opt = bch2_compression_decode(v);
+
+ prt_str(out, bch2_compression_opts[opt.type]);
+ if (opt.level)
+ prt_printf(out, ":%u", opt.level);
+}