diff options
Diffstat (limited to 'fs/bcachefs/super-io.c')
-rw-r--r-- | fs/bcachefs/super-io.c | 207 |
1 files changed, 158 insertions, 49 deletions
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 7c71594f6a8b..cb5d960aed92 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -23,9 +23,7 @@ #include <linux/backing-dev.h> #include <linux/sort.h> - -static const struct blk_holder_ops bch2_sb_handle_bdev_ops = { -}; +#include <linux/string_choices.h> struct bch2_metadata_version { u16 version; @@ -41,7 +39,7 @@ static const struct bch2_metadata_version bch2_metadata_versions[] = { #undef x }; -void bch2_version_to_text(struct printbuf *out, unsigned v) +void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v) { const char *str = "(unknown version)"; @@ -54,7 +52,7 @@ void bch2_version_to_text(struct printbuf *out, unsigned v) prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); } -unsigned bch2_latest_compatible_version(unsigned v) +enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v) { if (!BCH_VERSION_MAJOR(v)) return v; @@ -68,6 +66,40 @@ unsigned bch2_latest_compatible_version(unsigned v) return v; } +int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) +{ + int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && + version <= c->sb.version_incompat_allowed) + ? 0 + : -BCH_ERR_may_not_use_incompat_feature; + + mutex_lock(&c->sb_lock); + if (!ret) { + SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); + bch2_write_super(c); + } else { + darray_for_each(c->incompat_versions_requested, i) + if (version == *i) + goto out; + + darray_push(&c->incompat_versions_requested, version); + struct printbuf buf = PRINTBUF; + prt_str(&buf, "requested incompat feature "); + bch2_version_to_text(&buf, version); + prt_str(&buf, " currently not enabled"); + prt_printf(&buf, "\n set version_upgrade=incompat to enable"); + + bch_notice(c, "%s", buf.buf); + printbuf_exit(&buf); + } + +out: + mutex_unlock(&c->sb_lock); + + return ret; +} + const char * const bch2_sb_fields[] = { #define x(name, nr) #name, BCH_SB_FIELDS() @@ -232,7 +264,7 @@ struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, struct bch_sb_handle *dev_sb = &ca->disk_sb; if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); return NULL; } } @@ -349,31 +381,39 @@ static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) return 0; } -static int bch2_sb_validate(struct bch_sb_handle *disk_sb, - enum bch_validate_flags flags, struct printbuf *out) +int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, + enum bch_validate_flags flags, struct printbuf *out) { - struct bch_sb *sb = disk_sb->sb; struct bch_sb_field_members_v1 *mi; enum bch_opt_id opt_id; - u16 block_size; int ret; ret = bch2_sb_compatible(sb, out); if (ret) return ret; - if (sb->features[1] || - (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) { - prt_printf(out, "Filesystem has incompatible features"); + u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); + unsigned incompat_bit = 0; + if (incompat) + incompat_bit = __ffs64(incompat); + else if (sb->features[1]) + incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); + + if (incompat_bit) { + prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", + incompat_bit, + bch2_sb_features[BCH_FEATURE_NR - 1], + BCH_FEATURE_NR - 1); return -BCH_ERR_invalid_sb_features; } - block_size = le16_to_cpu(sb->block_size); - - if (block_size > PAGE_SECTORS) { - prt_printf(out, "Block size too big (got %u, max %u)", - block_size, PAGE_SECTORS); - return -BCH_ERR_invalid_sb_block_size; + if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || + BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { + prt_str(out, "Filesystem has incompatible version "); + bch2_version_to_text(out, le16_to_cpu(sb->version)); + prt_str(out, ", current version "); + bch2_version_to_text(out, bcachefs_metadata_version_current); + return -BCH_ERR_invalid_sb_features; } if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { @@ -386,6 +426,13 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, return -BCH_ERR_invalid_sb_uuid; } + if (!(flags & BCH_VALIDATE_write) && + le64_to_cpu(sb->offset) != read_offset) { + prt_printf(out, "Bad sb offset (got %llu, read from %llu)", + le64_to_cpu(sb->offset), read_offset); + return -BCH_ERR_invalid_sb_offset; + } + if (!sb->nr_devices || sb->nr_devices > BCH_SB_MEMBERS_MAX) { prt_printf(out, "Bad number of member devices %u (max %u)", @@ -406,6 +453,21 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, return -BCH_ERR_invalid_sb_time_precision; } + /* old versions didn't know to downgrade this field */ + if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version)) + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version)); + + if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) { + prt_printf(out, "Invalid version_incompat "); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); + prt_str(out, " > incompat_allowed "); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); + if (flags & BCH_VALIDATE_write) + return -BCH_ERR_invalid_sb_version; + else + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); + } + if (!flags) { /* * Been seeing a bug where these are getting inexplicably @@ -426,13 +488,25 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2) SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true); + + if (!BCH_SB_WRITE_ERROR_TIMEOUT(sb)) + SET_BCH_SB_WRITE_ERROR_TIMEOUT(sb, 30); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_extent_flags && + !BCH_SB_CSUM_ERR_RETRY_NR(sb)) + SET_BCH_SB_CSUM_ERR_RETRY_NR(sb, 3); } +#ifdef __KERNEL__ + if (!BCH_SB_SHARD_INUMS_NBITS(sb)) + SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus()))); +#endif + for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { const struct bch_option *opt = bch2_opt_table + opt_id; - if (opt->get_sb != BCH2_NO_SB_OPT) { - u64 v = bch2_opt_from_sb(sb, opt_id); + if (opt->get_sb) { + u64 v = bch2_opt_from_sb(sb, opt_id, -1); prt_printf(out, "Invalid option "); ret = bch2_opt_validate(opt, v, out); @@ -519,6 +593,9 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; c->sb.version = le16_to_cpu(src->version); + c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src); + c->sb.version_incompat_allowed + = BCH_SB_VERSION_INCOMPAT_ALLOWED(src); c->sb.version_min = le16_to_cpu(src->version_min); c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); c->sb.nr_devices = src->nr_devices; @@ -676,7 +753,8 @@ reread: } enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); - if (csum_type >= BCH_CSUM_NR) { + if (csum_type >= BCH_CSUM_NR || + bch2_csum_type_is_encryption(csum_type)) { prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); return -BCH_ERR_invalid_sb_csum_type; } @@ -708,7 +786,7 @@ retry: memset(sb, 0, sizeof(*sb)); sb->mode = BLK_OPEN_READ; sb->have_bio = true; - sb->holder = kmalloc(1, GFP_KERNEL); + sb->holder = kzalloc(sizeof(*sb->holder), GFP_KERNEL); if (!sb->holder) return -ENOMEM; @@ -834,7 +912,7 @@ got_super: sb->have_layout = true; - ret = bch2_sb_validate(sb, 0, &err); + ret = bch2_sb_validate(sb->sb, offset, 0, &err); if (ret) { bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n", path, err.buf); @@ -871,19 +949,19 @@ static void write_super_endio(struct bio *bio) { struct bch_dev *ca = bio->bi_private; + bch2_account_io_success_fail(ca, bio_data_dir(bio), !bio->bi_status); + /* XXX: return errors directly */ - if (bch2_dev_io_err_on(bio->bi_status, ca, - bio_data_dir(bio) - ? BCH_MEMBER_ERROR_write - : BCH_MEMBER_ERROR_read, - "superblock %s error: %s", - bio_data_dir(bio) ? "write" : "read", - bch2_blk_status_to_str(bio->bi_status))) + if (bio->bi_status) { + bch_err_dev_ratelimited(ca, "superblock %s error: %s", + str_write_read(bio_data_dir(bio)), + bch2_blk_status_to_str(bio->bi_status)); ca->sb_write_error = 1; + } closure_put(&ca->fs->sb_write); - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); } static void read_back_super(struct bch_fs *c, struct bch_dev *ca) @@ -891,16 +969,17 @@ static void read_back_super(struct bch_fs *c, struct bch_dev *ca) struct bch_sb *sb = ca->disk_sb.sb; struct bio *bio = ca->disk_sb.bio; + memset(ca->sb_read_scratch, 0, BCH_SB_READ_SCRATCH_BUF_SIZE); + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); bio->bi_end_io = write_super_endio; bio->bi_private = ca; - bch2_bio_map(bio, ca->sb_read_scratch, PAGE_SIZE); + bch2_bio_map(bio, ca->sb_read_scratch, BCH_SB_READ_SCRATCH_BUF_SIZE); - this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], - bio_sectors(bio)); + this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); closure_bio_submit(bio, &c->sb_write); } @@ -926,7 +1005,7 @@ static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], bio_sectors(bio)); - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); closure_bio_submit(bio, &c->sb_write); } @@ -951,13 +1030,20 @@ int bch2_write_super(struct bch_fs *c) closure_init_stack(cl); memset(&sb_written, 0, sizeof(sb_written)); + /* + * Note: we do writes to RO devices here, and we might want to change + * that in the future. + * + * For now, we expect to be able to call write_super() when we're not + * yet RW: + */ for_each_online_member(c, ca) { ret = darray_push(&online_devices, ca); if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) { - percpu_ref_put(&ca->io_ref); + percpu_ref_put(&ca->io_ref[READ]); goto out; } - percpu_ref_get(&ca->io_ref); + percpu_ref_get(&ca->io_ref[READ]); } /* Make sure we're using the new magic numbers: */ @@ -990,7 +1076,7 @@ int bch2_write_super(struct bch_fs *c) darray_for_each(online_devices, ca) { printbuf_reset(&err); - ret = bch2_sb_validate(&(*ca)->disk_sb, BCH_VALIDATE_write, &err); + ret = bch2_sb_validate((*ca)->disk_sb.sb, 0, BCH_VALIDATE_write, &err); if (ret) { bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); goto out; @@ -1016,7 +1102,8 @@ int bch2_write_super(struct bch_fs *c) prt_str(&buf, ")"); bch2_fs_fatal_error(c, ": %s", buf.buf); printbuf_exit(&buf); - return -BCH_ERR_sb_not_downgraded; + ret = -BCH_ERR_sb_not_downgraded; + goto out; } darray_for_each(online_devices, ca) { @@ -1042,9 +1129,16 @@ int bch2_write_super(struct bch_fs *c) ": Superblock write was silently dropped! (seq %llu expected %llu)", le64_to_cpu(ca->sb_read_scratch->seq), ca->disk_sb.seq); - bch2_fs_fatal_error(c, "%s", buf.buf); + + if (c->opts.errors != BCH_ON_ERROR_continue && + c->opts.errors != BCH_ON_ERROR_fix_safe) { + ret = -BCH_ERR_erofs_sb_err; + bch2_fs_fatal_error(c, "%s", buf.buf); + } else { + bch_err(c, "%s", buf.buf); + } + printbuf_exit(&buf); - ret = -BCH_ERR_erofs_sb_err; } if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { @@ -1111,12 +1205,12 @@ int bch2_write_super(struct bch_fs *c) !can_mount_with_written), c, ": Unable to write superblock to sufficient devices (from %ps)", (void *) _RET_IP_)) - ret = -1; + ret = -BCH_ERR_erofs_sb_err; out: /* Make new options visible after they're persistent: */ bch2_sb_update(c); darray_for_each(online_devices, ca) - percpu_ref_put(&(*ca)->io_ref); + percpu_ref_put(&(*ca)->io_ref[READ]); darray_exit(&online_devices); printbuf_exit(&err); return ret; @@ -1149,6 +1243,8 @@ bool bch2_check_version_downgrade(struct bch_fs *c) */ if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); + if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current) + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current); if (c->sb.version > bcachefs_metadata_version_current) c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); if (c->sb.version_min > bcachefs_metadata_version_current) @@ -1157,7 +1253,7 @@ bool bch2_check_version_downgrade(struct bch_fs *c) return ret; } -void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) +void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) { lockdep_assert_held(&c->sb_lock); @@ -1166,7 +1262,12 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version) bch2_sb_field_resize(&c->disk_sb, downgrade, 0); c->disk_sb.sb->version = cpu_to_le16(new_version); - c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + + if (incompat) { + c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); + SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, + max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); + } } static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, @@ -1331,6 +1432,14 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, bch2_version_to_text(out, le16_to_cpu(sb->version)); prt_newline(out); + prt_printf(out, "Incompatible features allowed:\t"); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); + prt_newline(out); + + prt_printf(out, "Incompatible features in use:\t"); + bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); + prt_newline(out); + prt_printf(out, "Version upgrade complete:\t"); bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); prt_newline(out); @@ -1388,8 +1497,8 @@ void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, for (id = 0; id < bch2_opts_nr; id++) { const struct bch_option *opt = bch2_opt_table + id; - if (opt->get_sb != BCH2_NO_SB_OPT) { - u64 v = bch2_opt_from_sb(sb, id); + if (opt->get_sb) { + u64 v = bch2_opt_from_sb(sb, id, -1); prt_printf(out, "%s:\t", opt->attr.name); bch2_opt_to_text(out, NULL, sb, opt, v, |