diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-19 03:27:43 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-07-19 03:27:43 +0300 |
commit | 720261cfc7329406a50c2a8536e0039b9dd9a4e5 (patch) | |
tree | 660152d34a1e0fdfd747aae4b9c9485b826654e1 /fs/bcachefs/fs.c | |
parent | 4f40c636b291deeae7d1f4c9fb5db5f0aac54267 (diff) | |
parent | a97b43fac5b9b3ffca71b8a917a249789902fce9 (diff) | |
download | linux-720261cfc7329406a50c2a8536e0039b9dd9a4e5.tar.xz |
Merge tag 'bcachefs-2024-07-18.2' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs updates from Kent Overstreet:
- Metadata version 1.8: Stripe sectors accounting, BCH_DATA_unstriped
This splits out the accounting of dirty sectors and stripe sectors in
alloc keys; this lets us see stripe buckets that still have unstriped
data in them.
This is needed for ensuring that erasure coding is working correctly,
as well as completing stripe creation after a crash.
- Metadata version 1.9: Disk accounting rewrite
The previous disk accounting scheme relied heavily on percpu counters
that were also sharded by outstanding journal buffer; it was fast but
not extensible or scalable, and meant that all accounting counters
were recorded in every journal entry.
The new disk accounting scheme stores accounting as normal btree
keys; updates are deltas until they are flushed by the btree write
buffer.
This means we have no practical limit on the number of counters, and
a new tagged union format that's easy to extend.
We now have counters for compression type/ratio, per-snapshot-id
usage, per-btree-id usage, and pending rebalance work.
- Self healing on read IO/checksum error
Data is now automatically rewritten if we get a read error and then a
successful retry
- Mount API conversion (thanks to Thomas Bertschinger)
- Better lockdep coverage
Previously, btree node locks were tracked individually by lockdep,
like any other lock. But we may take _many_ btree node locks
simultaneously, we easily blow through the limit of 48 locks that
lockdep can track, leading to lockdep turning itself off.
Tracking each btree node lock individually isn't really necessary
since we have our own cycle detector for deadlock avoidance and
centralized tracking of btree node locks, so we now have a single
lockdep_map in btree_trans for "any btree nodes are locked".
- Some more small incremental work towards online check_allocations
- Lots more debugging improvements
- Fixes, including:
- undefined behaviour fixes, originally noted as breaking userspace
LTO builds
- fix a spurious warning in fsck_err, reported by Marcin
- fix an integer overflow on trans->nr_updates, also reported by
Marcin; this broke during deletion of highly fragmented indirect
extents
* tag 'bcachefs-2024-07-18.2' of https://evilpiepirate.org/git/bcachefs: (120 commits)
lockdep: Add comments for lockdep_set_no{validate,track}_class()
bcachefs: Fix integer overflow on trans->nr_updates
bcachefs: silence silly kdoc warning
bcachefs: Fix fsck warning about btree_trans not passed to fsck error
bcachefs: Add an error message for insufficient rw journal devs
bcachefs: varint: Avoid left-shift of a negative value
bcachefs: darray: Don't pass NULL to memcpy()
bcachefs: Kill bch2_assert_btree_nodes_not_locked()
bcachefs: Rename BCH_WRITE_DONE -> BCH_WRITE_SUBMITTED
bcachefs: __bch2_read(): call trans_begin() on every loop iter
bcachefs: show none if label is not set
bcachefs: drop packed, aligned from bkey_inode_buf
bcachefs: btree node scan: fall back to comparing by journal seq
bcachefs: Add lockdep support for btree node locks
lockdep: lockdep_set_notrack_class()
bcachefs: Improve copygc_wait_to_text()
bcachefs: Convert clock code to u64s
bcachefs: Improve startup message
bcachefs: Self healing on read IO error
bcachefs: Make read_only a mount option again, but hidden
...
Diffstat (limited to 'fs/bcachefs/fs.c')
-rw-r--r-- | fs/bcachefs/fs.c | 209 |
1 files changed, 138 insertions, 71 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 0c7d1bc0548a..3a5f49affa0a 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -26,11 +26,13 @@ #include "snapshot.h" #include "super.h" #include "xattr.h" +#include "trace.h" #include <linux/aio.h> #include <linux/backing-dev.h> #include <linux/exportfs.h> #include <linux/fiemap.h> +#include <linux/fs_context.h> #include <linux/module.h> #include <linux/pagemap.h> #include <linux/posix_acl.h> @@ -56,9 +58,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans, BUG_ON(bi->bi_inum != inode->v.i_ino); - bch2_assert_pos_locked(trans, BTREE_ID_inodes, - POS(0, bi->bi_inum), - c->opts.inodes_use_key_cache); + bch2_assert_pos_locked(trans, BTREE_ID_inodes, POS(0, bi->bi_inum)); set_nlink(&inode->v, bch2_inode_nlink_get(bi)); i_uid_write(&inode->v, bi->bi_uid); @@ -516,11 +516,11 @@ static int __bch2_link(struct bch_fs *c, struct bch_inode_info *dir, struct dentry *dentry) { - struct btree_trans *trans = bch2_trans_get(c); struct bch_inode_unpacked dir_u, inode_u; int ret; mutex_lock(&inode->ei_update_lock); + struct btree_trans *trans = bch2_trans_get(c); ret = commit_do(trans, NULL, NULL, 0, bch2_link_trans(trans, @@ -567,11 +567,12 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, struct bch_inode_info *dir = to_bch_ei(vdir); struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); struct bch_inode_unpacked dir_u, inode_u; - struct btree_trans *trans = bch2_trans_get(c); int ret; bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode); + struct btree_trans *trans = bch2_trans_get(c); + ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_unlink_trans(trans, @@ -594,8 +595,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry, set_nlink(&inode->v, 0); } err: - bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); bch2_trans_put(trans); + bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode); return ret; } @@ -680,14 +681,14 @@ static int bch2_rename2(struct mnt_idmap *idmap, return ret; } - trans = bch2_trans_get(c); - bch2_lock_inodes(INODE_UPDATE_LOCK, src_dir, dst_dir, src_inode, dst_inode); + trans = bch2_trans_get(c); + ret = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?: bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol); if (ret) @@ -893,6 +894,16 @@ static int bch2_getattr(struct mnt_idmap *idmap, stat->subvol = inode->ei_subvol; stat->result_mask |= STATX_SUBVOL; + if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) { + stat->result_mask |= STATX_DIOALIGN; + /* + * this is incorrect; we should be tracking this in superblock, + * and checking the alignment of open devices + */ + stat->dio_mem_align = SECTOR_SIZE; + stat->dio_offset_align = block_bytes(c); + } + if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime); @@ -1694,6 +1705,8 @@ static int bch2_sync_fs(struct super_block *sb, int wait) struct bch_fs *c = sb->s_fs_info; int ret; + trace_bch2_sync_fs(sb, wait); + if (c->opts.journal_flush_disabled) return 0; @@ -1722,15 +1735,11 @@ static struct bch_fs *bch2_path_to_fs(const char *path) return c ?: ERR_PTR(-ENOENT); } -static int bch2_remount(struct super_block *sb, int *flags, char *data) +static int bch2_remount(struct super_block *sb, int *flags, + struct bch_opts opts) { struct bch_fs *c = sb->s_fs_info; - struct bch_opts opts = bch2_opts_empty(); - int ret; - - ret = bch2_parse_mount_opts(c, &opts, data); - if (ret) - goto err; + int ret = 0; opt_set(opts, read_only, (*flags & SB_RDONLY) != 0); @@ -1790,7 +1799,8 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root) const struct bch_option *opt = &bch2_opt_table[i]; u64 v = bch2_opt_get_by_id(&c->opts, i); - if (!(opt->flags & OPT_MOUNT)) + if ((opt->flags & OPT_HIDDEN) || + !(opt->flags & OPT_MOUNT)) continue; if (v == bch2_opt_get_by_id(&bch2_opts_default, i)) @@ -1857,7 +1867,6 @@ static const struct super_operations bch_super_operations = { .statfs = bch2_statfs, .show_devname = bch2_show_devname, .show_options = bch2_show_options, - .remount_fs = bch2_remount, .put_super = bch2_put_super, .freeze_fs = bch2_freeze, .unfreeze_fs = bch2_unfreeze, @@ -1890,76 +1899,63 @@ static int bch2_test_super(struct super_block *s, void *data) return true; } -static struct dentry *bch2_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bch2_fs_get_tree(struct fs_context *fc) { struct bch_fs *c; struct super_block *sb; struct inode *vinode; - struct bch_opts opts = bch2_opts_empty(); + struct bch2_opts_parse *opts_parse = fc->fs_private; + struct bch_opts opts = opts_parse->opts; + darray_str devs; + darray_fs devs_to_fs = {}; int ret; - opt_set(opts, read_only, (flags & SB_RDONLY) != 0); + opt_set(opts, read_only, (fc->sb_flags & SB_RDONLY) != 0); + opt_set(opts, nostart, true); - ret = bch2_parse_mount_opts(NULL, &opts, data); - if (ret) { - ret = bch2_err_class(ret); - return ERR_PTR(ret); - } - - if (!dev_name || strlen(dev_name) == 0) - return ERR_PTR(-EINVAL); + if (!fc->source || strlen(fc->source) == 0) + return -EINVAL; - darray_str devs; - ret = bch2_split_devs(dev_name, &devs); + ret = bch2_split_devs(fc->source, &devs); if (ret) - return ERR_PTR(ret); + return ret; - darray_fs devs_to_fs = {}; darray_for_each(devs, i) { ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i)); - if (ret) { - sb = ERR_PTR(ret); - goto got_sb; - } + if (ret) + goto err; } - sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs); + sb = sget(fc->fs_type, bch2_test_super, bch2_noset_super, fc->sb_flags|SB_NOSEC, &devs_to_fs); if (!IS_ERR(sb)) goto got_sb; c = bch2_fs_open(devs.data, devs.nr, opts); - if (IS_ERR(c)) { - sb = ERR_CAST(c); - goto got_sb; - } + ret = PTR_ERR_OR_ZERO(c); + if (ret) + goto err; /* Some options can't be parsed until after the fs is started: */ - ret = bch2_parse_mount_opts(c, &opts, data); - if (ret) { - bch2_fs_stop(c); - sb = ERR_PTR(ret); - goto got_sb; - } + opts = bch2_opts_empty(); + ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf); + if (ret) + goto err_stop_fs; bch2_opts_apply(&c->opts, opts); - sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c); - if (IS_ERR(sb)) - bch2_fs_stop(c); -got_sb: - darray_exit(&devs_to_fs); - bch2_darray_str_exit(&devs); - - if (IS_ERR(sb)) { - ret = PTR_ERR(sb); - goto err; - } + ret = bch2_fs_start(c); + if (ret) + goto err_stop_fs; + sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c); + ret = PTR_ERR_OR_ZERO(sb); + if (ret) + goto err_stop_fs; +got_sb: c = sb->s_fs_info; if (sb->s_root) { - if ((flags ^ sb->s_flags) & SB_RDONLY) { + if ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY) { ret = -EBUSY; goto err_put_super; } @@ -2025,12 +2021,10 @@ got_sb: sb->s_flags |= SB_ACTIVE; out: - return dget(sb->s_root); - -err_put_super: - __bch2_fs_stop(c); - deactivate_locked_super(sb); + fc->root = dget(sb->s_root); err: + darray_exit(&devs_to_fs); + bch2_darray_str_exit(&devs); if (ret) pr_err("error: %s", bch2_err_str(ret)); /* @@ -2041,7 +2035,16 @@ err: */ if (bch2_err_matches(ret, EROFS) && ret != -EROFS) ret = -EIO; - return ERR_PTR(bch2_err_class(ret)); + return bch2_err_class(ret); + +err_stop_fs: + bch2_fs_stop(c); + goto err; + +err_put_super: + __bch2_fs_stop(c); + deactivate_locked_super(sb); + goto err; } static void bch2_kill_sb(struct super_block *sb) @@ -2052,12 +2055,76 @@ static void bch2_kill_sb(struct super_block *sb) bch2_fs_free(c); } +static void bch2_fs_context_free(struct fs_context *fc) +{ + struct bch2_opts_parse *opts = fc->fs_private; + + if (opts) { + printbuf_exit(&opts->parse_later); + kfree(opts); + } +} + +static int bch2_fs_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + /* + * the "source" param, i.e., the name of the device(s) to mount, + * is handled by the VFS layer. + */ + if (!strcmp(param->key, "source")) + return -ENOPARAM; + + struct bch2_opts_parse *opts = fc->fs_private; + struct bch_fs *c = NULL; + + /* for reconfigure, we already have a struct bch_fs */ + if (fc->root) + c = fc->root->d_sb->s_fs_info; + + int ret = bch2_parse_one_mount_opt(c, &opts->opts, + &opts->parse_later, param->key, + param->string); + + return bch2_err_class(ret); +} + +static int bch2_fs_reconfigure(struct fs_context *fc) +{ + struct super_block *sb = fc->root->d_sb; + struct bch2_opts_parse *opts = fc->fs_private; + + return bch2_remount(sb, &fc->sb_flags, opts->opts); +} + +static const struct fs_context_operations bch2_context_ops = { + .free = bch2_fs_context_free, + .parse_param = bch2_fs_parse_param, + .get_tree = bch2_fs_get_tree, + .reconfigure = bch2_fs_reconfigure, +}; + +static int bch2_init_fs_context(struct fs_context *fc) +{ + struct bch2_opts_parse *opts = kzalloc(sizeof(*opts), GFP_KERNEL); + + if (!opts) + return -ENOMEM; + + opts->parse_later = PRINTBUF; + + fc->ops = &bch2_context_ops; + fc->fs_private = opts; + + return 0; +} + static struct file_system_type bcache_fs_type = { - .owner = THIS_MODULE, - .name = "bcachefs", - .mount = bch2_mount, - .kill_sb = bch2_kill_sb, - .fs_flags = FS_REQUIRES_DEV, + .owner = THIS_MODULE, + .name = "bcachefs", + .init_fs_context = bch2_init_fs_context, + .kill_sb = bch2_kill_sb, + .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("bcachefs"); |