Merge tag 'bcachefs-2024-07-18.2' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs updates from Kent Overstreet: - Metadata version 1.8: Stripe sectors accounting, BCH_DATA_unstriped This splits out the accounting of dirty sectors and stripe sectors in alloc keys; this lets us see stripe buckets that still have unstriped data in them. This is needed for ensuring that erasure coding is working correctly, as well as completing stripe creation after a crash. - Metadata version 1.9: Disk accounting rewrite The previous disk accounting scheme relied heavily on percpu counters that were also sharded by outstanding journal buffer; it was fast but not extensible or scalable, and meant that all accounting counters were recorded in every journal entry. The new disk accounting scheme stores accounting as normal btree keys; updates are deltas until they are flushed by the btree write buffer. This means we have no practical limit on the number of counters, and a new tagged union format that's easy to extend. We now have counters for compression type/ratio, per-snapshot-id usage, per-btree-id usage, and pending rebalance work. - Self healing on read IO/checksum error Data is now automatically rewritten if we get a read error and then a successful retry - Mount API conversion (thanks to Thomas Bertschinger) - Better lockdep coverage Previously, btree node locks were tracked individually by lockdep, like any other lock. But we may take _many_ btree node locks simultaneously, we easily blow through the limit of 48 locks that lockdep can track, leading to lockdep turning itself off. Tracking each btree node lock individually isn't really necessary since we have our own cycle detector for deadlock avoidance and centralized tracking of btree node locks, so we now have a single lockdep_map in btree_trans for "any btree nodes are locked". - Some more small incremental work towards online check_allocations - Lots more debugging improvements - Fixes, including: - undefined behaviour fixes, originally noted as breaking userspace LTO builds - fix a spurious warning in fsck_err, reported by Marcin - fix an integer overflow on trans->nr_updates, also reported by Marcin; this broke during deletion of highly fragmented indirect extents * tag 'bcachefs-2024-07-18.2' of https://evilpiepirate.org/git/bcachefs: (120 commits) lockdep: Add comments for lockdep_set_no{validate,track}_class() bcachefs: Fix integer overflow on trans->nr_updates bcachefs: silence silly kdoc warning bcachefs: Fix fsck warning about btree_trans not passed to fsck error bcachefs: Add an error message for insufficient rw journal devs bcachefs: varint: Avoid left-shift of a negative value bcachefs: darray: Don't pass NULL to memcpy() bcachefs: Kill bch2_assert_btree_nodes_not_locked() bcachefs: Rename BCH_WRITE_DONE -> BCH_WRITE_SUBMITTED bcachefs: __bch2_read(): call trans_begin() on every loop iter bcachefs: show none if label is not set bcachefs: drop packed, aligned from bkey_inode_buf bcachefs: btree node scan: fall back to comparing by journal seq bcachefs: Add lockdep support for btree node locks lockdep: lockdep_set_notrack_class() bcachefs: Improve copygc_wait_to_text() bcachefs: Convert clock code to u64s bcachefs: Improve startup message bcachefs: Self healing on read IO error bcachefs: Make read_only a mount option again, but hidden ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2024-07-19 03:27:43 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2024-07-19 03:27:43 +0300
commit: 720261cfc7329406a50c2a8536e0039b9dd9a4e5 (patch)
tree: 660152d34a1e0fdfd747aae4b9c9485b826654e1 /fs/bcachefs/fs.c
parent: 4f40c636b291deeae7d1f4c9fb5db5f0aac54267 (diff)
parent: a97b43fac5b9b3ffca71b8a917a249789902fce9 (diff)
download: linux-720261cfc7329406a50c2a8536e0039b9dd9a4e5.tar.xz
1 files changed, 138 insertions, 71 deletions
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 0c7d1bc0548a..3a5f49affa0a 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -26,11 +26,13 @@
 #include "snapshot.h"
 #include "super.h"
 #include "xattr.h"
+#include "trace.h"
 
 #include <linux/aio.h>
 #include <linux/backing-dev.h>
 #include <linux/exportfs.h>
 #include <linux/fiemap.h>
+#include <linux/fs_context.h>
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/posix_acl.h>
@@ -56,9 +58,7 @@ void bch2_inode_update_after_write(struct btree_trans *trans,
 
 	BUG_ON(bi->bi_inum != inode->v.i_ino);
 
-	bch2_assert_pos_locked(trans, BTREE_ID_inodes,
-			       POS(0, bi->bi_inum),
-			       c->opts.inodes_use_key_cache);
+	bch2_assert_pos_locked(trans, BTREE_ID_inodes, POS(0, bi->bi_inum));
 
 	set_nlink(&inode->v, bch2_inode_nlink_get(bi));
 	i_uid_write(&inode->v, bi->bi_uid);
@@ -516,11 +516,11 @@ static int __bch2_link(struct bch_fs *c,
 		       struct bch_inode_info *dir,
 		       struct dentry *dentry)
 {
-	struct btree_trans *trans = bch2_trans_get(c);
 	struct bch_inode_unpacked dir_u, inode_u;
 	int ret;
 
 	mutex_lock(&inode->ei_update_lock);
+	struct btree_trans *trans = bch2_trans_get(c);
 
 	ret = commit_do(trans, NULL, NULL, 0,
 			bch2_link_trans(trans,
@@ -567,11 +567,12 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
 	struct bch_inode_info *dir = to_bch_ei(vdir);
 	struct bch_inode_info *inode = to_bch_ei(dentry->d_inode);
 	struct bch_inode_unpacked dir_u, inode_u;
-	struct btree_trans *trans = bch2_trans_get(c);
 	int ret;
 
 	bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
 
+	struct btree_trans *trans = bch2_trans_get(c);
+
 	ret = commit_do(trans, NULL, NULL,
 			BCH_TRANS_COMMIT_no_enospc,
 		bch2_unlink_trans(trans,
@@ -594,8 +595,8 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
 		set_nlink(&inode->v, 0);
 	}
 err:
-	bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
 	bch2_trans_put(trans);
+	bch2_unlock_inodes(INODE_UPDATE_LOCK, dir, inode);
 
 	return ret;
 }
@@ -680,14 +681,14 @@ static int bch2_rename2(struct mnt_idmap *idmap,
 			return ret;
 	}
 
-	trans = bch2_trans_get(c);
-
 	bch2_lock_inodes(INODE_UPDATE_LOCK,
 			 src_dir,
 			 dst_dir,
 			 src_inode,
 			 dst_inode);
 
+	trans = bch2_trans_get(c);
+
 	ret   = bch2_subvol_is_ro_trans(trans, src_dir->ei_subvol) ?:
 		bch2_subvol_is_ro_trans(trans, dst_dir->ei_subvol);
 	if (ret)
@@ -893,6 +894,16 @@ static int bch2_getattr(struct mnt_idmap *idmap,
 	stat->subvol	= inode->ei_subvol;
 	stat->result_mask |= STATX_SUBVOL;
 
+	if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->v.i_mode)) {
+		stat->result_mask |= STATX_DIOALIGN;
+		/*
+		 * this is incorrect; we should be tracking this in superblock,
+		 * and checking the alignment of open devices
+		 */
+		stat->dio_mem_align = SECTOR_SIZE;
+		stat->dio_offset_align = block_bytes(c);
+	}
+
 	if (request_mask & STATX_BTIME) {
 		stat->result_mask |= STATX_BTIME;
 		stat->btime = bch2_time_to_timespec(c, inode->ei_inode.bi_otime);
@@ -1694,6 +1705,8 @@ static int bch2_sync_fs(struct super_block *sb, int wait)
 	struct bch_fs *c = sb->s_fs_info;
 	int ret;
 
+	trace_bch2_sync_fs(sb, wait);
+
 	if (c->opts.journal_flush_disabled)
 		return 0;
 
@@ -1722,15 +1735,11 @@ static struct bch_fs *bch2_path_to_fs(const char *path)
 	return c ?: ERR_PTR(-ENOENT);
 }
 
-static int bch2_remount(struct super_block *sb, int *flags, char *data)
+static int bch2_remount(struct super_block *sb, int *flags,
+			struct bch_opts opts)
 {
 	struct bch_fs *c = sb->s_fs_info;
-	struct bch_opts opts = bch2_opts_empty();
-	int ret;
-
-	ret = bch2_parse_mount_opts(c, &opts, data);
-	if (ret)
-		goto err;
+	int ret = 0;
 
 	opt_set(opts, read_only, (*flags & SB_RDONLY) != 0);
 
@@ -1790,7 +1799,8 @@ static int bch2_show_options(struct seq_file *seq, struct dentry *root)
 		const struct bch_option *opt = &bch2_opt_table[i];
 		u64 v = bch2_opt_get_by_id(&c->opts, i);
 
-		if (!(opt->flags & OPT_MOUNT))
+		if ((opt->flags & OPT_HIDDEN) ||
+		    !(opt->flags & OPT_MOUNT))
 			continue;
 
 		if (v == bch2_opt_get_by_id(&bch2_opts_default, i))
@@ -1857,7 +1867,6 @@ static const struct super_operations bch_super_operations = {
 	.statfs		= bch2_statfs,
 	.show_devname	= bch2_show_devname,
 	.show_options	= bch2_show_options,
-	.remount_fs	= bch2_remount,
 	.put_super	= bch2_put_super,
 	.freeze_fs	= bch2_freeze,
 	.unfreeze_fs	= bch2_unfreeze,
@@ -1890,76 +1899,63 @@ static int bch2_test_super(struct super_block *s, void *data)
 	return true;
 }
 
-static struct dentry *bch2_mount(struct file_system_type *fs_type,
-				 int flags, const char *dev_name, void *data)
+static int bch2_fs_get_tree(struct fs_context *fc)
 {
 	struct bch_fs *c;
 	struct super_block *sb;
 	struct inode *vinode;
-	struct bch_opts opts = bch2_opts_empty();
+	struct bch2_opts_parse *opts_parse = fc->fs_private;
+	struct bch_opts opts = opts_parse->opts;
+	darray_str devs;
+	darray_fs devs_to_fs = {};
 	int ret;
 
-	opt_set(opts, read_only, (flags & SB_RDONLY) != 0);
+	opt_set(opts, read_only, (fc->sb_flags & SB_RDONLY) != 0);
+	opt_set(opts, nostart, true);
 
-	ret = bch2_parse_mount_opts(NULL, &opts, data);
-	if (ret) {
-		ret = bch2_err_class(ret);
-		return ERR_PTR(ret);
-	}
-
-	if (!dev_name || strlen(dev_name) == 0)
-		return ERR_PTR(-EINVAL);
+	if (!fc->source || strlen(fc->source) == 0)
+		return -EINVAL;
 
-	darray_str devs;
-	ret = bch2_split_devs(dev_name, &devs);
+	ret = bch2_split_devs(fc->source, &devs);
 	if (ret)
-		return ERR_PTR(ret);
+		return ret;
 
-	darray_fs devs_to_fs = {};
 	darray_for_each(devs, i) {
 		ret = darray_push(&devs_to_fs, bch2_path_to_fs(*i));
-		if (ret) {
-			sb = ERR_PTR(ret);
-			goto got_sb;
-		}
+		if (ret)
+			goto err;
 	}
 
-	sb = sget(fs_type, bch2_test_super, bch2_noset_super, flags|SB_NOSEC, &devs_to_fs);
+	sb = sget(fc->fs_type, bch2_test_super, bch2_noset_super, fc->sb_flags|SB_NOSEC, &devs_to_fs);
 	if (!IS_ERR(sb))
 		goto got_sb;
 
 	c = bch2_fs_open(devs.data, devs.nr, opts);
-	if (IS_ERR(c)) {
-		sb = ERR_CAST(c);
-		goto got_sb;
-	}
+	ret = PTR_ERR_OR_ZERO(c);
+	if (ret)
+		goto err;
 
 	/* Some options can't be parsed until after the fs is started: */
-	ret = bch2_parse_mount_opts(c, &opts, data);
-	if (ret) {
-		bch2_fs_stop(c);
-		sb = ERR_PTR(ret);
-		goto got_sb;
-	}
+	opts = bch2_opts_empty();
+	ret = bch2_parse_mount_opts(c, &opts, NULL, opts_parse->parse_later.buf);
+	if (ret)
+		goto err_stop_fs;
 
 	bch2_opts_apply(&c->opts, opts);
 
-	sb = sget(fs_type, NULL, bch2_set_super, flags|SB_NOSEC, c);
-	if (IS_ERR(sb))
-		bch2_fs_stop(c);
-got_sb:
-	darray_exit(&devs_to_fs);
-	bch2_darray_str_exit(&devs);
-
-	if (IS_ERR(sb)) {
-		ret = PTR_ERR(sb);
-		goto err;
-	}
+	ret = bch2_fs_start(c);
+	if (ret)
+		goto err_stop_fs;
 
+	sb = sget(fc->fs_type, NULL, bch2_set_super, fc->sb_flags|SB_NOSEC, c);
+	ret = PTR_ERR_OR_ZERO(sb);
+	if (ret)
+		goto err_stop_fs;
+got_sb:
 	c = sb->s_fs_info;
 
 	if (sb->s_root) {
-		if ((flags ^ sb->s_flags) & SB_RDONLY) {
+		if ((fc->sb_flags ^ sb->s_flags) & SB_RDONLY) {
 			ret = -EBUSY;
 			goto err_put_super;
 		}
@@ -2025,12 +2021,10 @@ got_sb:
 
 	sb->s_flags |= SB_ACTIVE;
 out:
-	return dget(sb->s_root);
-
-err_put_super:
-	__bch2_fs_stop(c);
-	deactivate_locked_super(sb);
+	fc->root = dget(sb->s_root);
 err:
+	darray_exit(&devs_to_fs);
+	bch2_darray_str_exit(&devs);
 	if (ret)
 		pr_err("error: %s", bch2_err_str(ret));
 	/*
@@ -2041,7 +2035,16 @@ err:
 	 */
 	if (bch2_err_matches(ret, EROFS) && ret != -EROFS)
 		ret = -EIO;
-	return ERR_PTR(bch2_err_class(ret));
+	return bch2_err_class(ret);
+
+err_stop_fs:
+	bch2_fs_stop(c);
+	goto err;
+
+err_put_super:
+	__bch2_fs_stop(c);
+	deactivate_locked_super(sb);
+	goto err;
 }
 
 static void bch2_kill_sb(struct super_block *sb)
@@ -2052,12 +2055,76 @@ static void bch2_kill_sb(struct super_block *sb)
 	bch2_fs_free(c);
 }
 
+static void bch2_fs_context_free(struct fs_context *fc)
+{
+	struct bch2_opts_parse *opts = fc->fs_private;
+
+	if (opts) {
+		printbuf_exit(&opts->parse_later);
+		kfree(opts);
+	}
+}
+
+static int bch2_fs_parse_param(struct fs_context *fc,
+			       struct fs_parameter *param)
+{
+	/*
+	 * the "source" param, i.e., the name of the device(s) to mount,
+	 * is handled by the VFS layer.
+	 */
+	if (!strcmp(param->key, "source"))
+		return -ENOPARAM;
+
+	struct bch2_opts_parse *opts = fc->fs_private;
+	struct bch_fs *c = NULL;
+
+	/* for reconfigure, we already have a struct bch_fs */
+	if (fc->root)
+		c = fc->root->d_sb->s_fs_info;
+
+	int ret = bch2_parse_one_mount_opt(c, &opts->opts,
+					   &opts->parse_later, param->key,
+					   param->string);
+
+	return bch2_err_class(ret);
+}
+
+static int bch2_fs_reconfigure(struct fs_context *fc)
+{
+	struct super_block *sb = fc->root->d_sb;
+	struct bch2_opts_parse *opts = fc->fs_private;
+
+	return bch2_remount(sb, &fc->sb_flags, opts->opts);
+}
+
+static const struct fs_context_operations bch2_context_ops = {
+	.free        = bch2_fs_context_free,
+	.parse_param = bch2_fs_parse_param,
+	.get_tree    = bch2_fs_get_tree,
+	.reconfigure = bch2_fs_reconfigure,
+};
+
+static int bch2_init_fs_context(struct fs_context *fc)
+{
+	struct bch2_opts_parse *opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+
+	if (!opts)
+		return -ENOMEM;
+
+	opts->parse_later = PRINTBUF;
+
+	fc->ops = &bch2_context_ops;
+	fc->fs_private = opts;
+
+	return 0;
+}
+
 static struct file_system_type bcache_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "bcachefs",
-	.mount		= bch2_mount,
-	.kill_sb	= bch2_kill_sb,
-	.fs_flags	= FS_REQUIRES_DEV,
+	.owner			= THIS_MODULE,
+	.name			= "bcachefs",
+	.init_fs_context	= bch2_init_fs_context,
+	.kill_sb		= bch2_kill_sb,
+	.fs_flags		= FS_REQUIRES_DEV,
 };
 
 MODULE_ALIAS_FS("bcachefs");
author	Linus Torvalds <torvalds@linux-foundation.org>	2024-07-19 03:27:43 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2024-07-19 03:27:43 +0300
commit	720261cfc7329406a50c2a8536e0039b9dd9a4e5 (patch)
tree	660152d34a1e0fdfd747aae4b9c9485b826654e1 /fs/bcachefs/fs.c
parent	4f40c636b291deeae7d1f4c9fb5db5f0aac54267 (diff)
parent	a97b43fac5b9b3ffca71b8a917a249789902fce9 (diff)
download	linux-720261cfc7329406a50c2a8536e0039b9dd9a4e5.tar.xz