summaryrefslogtreecommitdiff
path: root/fs/btrfs/sysfs.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 00:09:30 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-21 00:09:30 +0300
commit0eb4aaa230d725fa9b1cd758c0f17abca5597af6 (patch)
tree2ccf5473b8e7386dd082907c3aab693480783654 /fs/btrfs/sysfs.c
parent1851bccf608a28ac5ec9410764dda9a46828213b (diff)
parent9d0c23db26cb58c9fc6ee8817e8f9ebeb25776e5 (diff)
downloadlinux-0eb4aaa230d725fa9b1cd758c0f17abca5597af6.tar.xz
Merge tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "User visible changes, features: - rebuilding of the free space tree at mount time is done in more transactions, fix potential hangs when the transaction thread is blocked due to large amount of block groups - more read IO balancing strategies (experimental config), add two new ways how to select a device for read if the profiles allow that (all RAID1*), the current default selects the device by pid which is good on average but less performant for single reader workloads - select preferred device for all reads (namely for testing) - round-robin, balance reads across devices relevant for the requested IO range - add encoded write ioctl support to io_uring (read was added in 6.12), basis for writing send stream using that instead of syscalls, non-blocking mode is not yet implemented - support FS_IOC_READ_VERITY_METADATA, applications can use the metadata to do their own verification - pass inode's i_write_hint to bios, for parity with other filesystems, ioctls F_GET_RW_HINT/F_SET_RW_HINT Core: - in zoned mode: allow to directly reclaim a block group by simply resetting it, then it can be reused and another block group does not need to be allocated - super block validation now also does more comprehensive sys array validation, adding it to the points where superblock is validated (post-read, pre-write) - subpage mode fixes: - fix double accounting of blocks due to some races - improved or fixed error handling in a few cases (compression, delalloc) - raid stripe tree: - fix various cases with extent range splitting or deleting - implement hole punching to extent range - reduce number of stripe tree lookups during bio submission - more self-tests - updated self-tests (delayed refs) - error handling improvements - cleanups, refactoring - remove rest of backref caching infrastructure from relocation, not needed anymore - error message updates - remove unnecessary calls when extent buffer was marked dirty - unused parameter removal - code moved to new files Other code changes: add rb_find_add_cached() to the rb-tree API" * tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (127 commits) btrfs: selftests: add a selftest for deleting two out of three extents btrfs: selftests: add test for punching a hole into 3 RAID stripe-extents btrfs: selftests: add selftest for punching holes into the RAID stripe extents btrfs: selftests: test RAID stripe-tree deletion spanning two items btrfs: selftests: don't split RAID extents in half btrfs: selftests: check for correct return value of failed lookup btrfs: don't use btrfs_set_item_key_safe on RAID stripe-extents btrfs: implement hole punching for RAID stripe extents btrfs: fix deletion of a range spanning parts two RAID stripe extents btrfs: fix tail delete of RAID stripe-extents btrfs: fix front delete range calculation for RAID stripe extents btrfs: assert RAID stripe-extent length is always greater than 0 btrfs: don't try to delete RAID stripe-extents if we don't need to btrfs: selftests: correct RAID stripe-tree feature flag setting btrfs: add io_uring interface for encoded writes btrfs: remove the unused locked_folio parameter from btrfs_cleanup_ordered_extents() btrfs: add extra error messages for delalloc range related errors btrfs: subpage: dump the involved bitmap when ASSERT() failed btrfs: subpage: fix the bitmap dump of the locked flags btrfs: do proper folio cleanup when run_delalloc_nocow() failed ...
Diffstat (limited to 'fs/btrfs/sysfs.c')
-rw-r--r--fs/btrfs/sysfs.c174
1 files changed, 155 insertions, 19 deletions
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 7f09b6c9cc2d..53b846d99ece 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1305,7 +1305,73 @@ static ssize_t btrfs_temp_fsid_show(struct kobject *kobj,
}
BTRFS_ATTR(, temp_fsid, btrfs_temp_fsid_show);
-static const char * const btrfs_read_policy_name[] = { "pid" };
+static const char *btrfs_read_policy_name[] = {
+ "pid",
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ "round-robin",
+ "devid",
+#endif
+};
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+
+/* Global module configuration parameters. */
+static char *read_policy;
+char *btrfs_get_mod_read_policy(void)
+{
+ return read_policy;
+}
+
+/* Set perms to 0, disable /sys/module/btrfs/parameter/read_policy interface. */
+module_param(read_policy, charp, 0);
+MODULE_PARM_DESC(read_policy,
+"Global read policy: pid (default), round-robin[:<min_contig_read>], devid[:<devid>]");
+#endif
+
+int btrfs_read_policy_to_enum(const char *str, s64 *value_ret)
+{
+ char param[32] = { 0 };
+ char __maybe_unused *value_str;
+
+ if (!str || strlen(str) == 0)
+ return 0;
+
+ strncpy(param, str, sizeof(param) - 1);
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ /* Separate value from input in policy:value format. */
+ value_str = strchr(param, ':');
+ if (value_str) {
+ int ret;
+
+ *value_str = 0;
+ value_str++;
+ if (!value_ret)
+ return -EINVAL;
+ ret = kstrtos64(value_str, 10, value_ret);
+ if (ret)
+ return -EINVAL;
+ if (*value_ret < 0)
+ return -ERANGE;
+ }
+#endif
+
+ return sysfs_match_string(btrfs_read_policy_name, param);
+}
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+int __init btrfs_read_policy_init(void)
+{
+ s64 value;
+
+ if (btrfs_read_policy_to_enum(read_policy, &value) == -EINVAL) {
+ btrfs_err(NULL, "invalid read policy or value %s", read_policy);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif
static ssize_t btrfs_read_policy_show(struct kobject *kobj,
struct kobj_attribute *a, char *buf)
@@ -1316,14 +1382,25 @@ static ssize_t btrfs_read_policy_show(struct kobject *kobj,
int i;
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
- if (policy == i)
- ret += sysfs_emit_at(buf, ret, "%s[%s]",
- (ret == 0 ? "" : " "),
- btrfs_read_policy_name[i]);
- else
- ret += sysfs_emit_at(buf, ret, "%s%s",
- (ret == 0 ? "" : " "),
- btrfs_read_policy_name[i]);
+ if (ret != 0)
+ ret += sysfs_emit_at(buf, ret, " ");
+
+ if (i == policy)
+ ret += sysfs_emit_at(buf, ret, "[");
+
+ ret += sysfs_emit_at(buf, ret, "%s", btrfs_read_policy_name[i]);
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ if (i == BTRFS_READ_POLICY_RR)
+ ret += sysfs_emit_at(buf, ret, ":%u",
+ READ_ONCE(fs_devices->rr_min_contig_read));
+
+ if (i == BTRFS_READ_POLICY_DEVID)
+ ret += sysfs_emit_at(buf, ret, ":%llu",
+ READ_ONCE(fs_devices->read_devid));
+#endif
+ if (i == policy)
+ ret += sysfs_emit_at(buf, ret, "]");
}
ret += sysfs_emit_at(buf, ret, "\n");
@@ -1336,21 +1413,80 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
const char *buf, size_t len)
{
struct btrfs_fs_devices *fs_devices = to_fs_devs(kobj);
- int i;
+ int index;
+ s64 value = -1;
- for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
- if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
- if (i != READ_ONCE(fs_devices->read_policy)) {
- WRITE_ONCE(fs_devices->read_policy, i);
- btrfs_info(fs_devices->fs_info,
- "read policy set to '%s'",
- btrfs_read_policy_name[i]);
+ index = btrfs_read_policy_to_enum(buf, &value);
+ if (index < 0)
+ return -EINVAL;
+
+#ifdef CONFIG_BTRFS_EXPERIMENTAL
+ /* If moving from RR then disable collecting fs stats. */
+ if (fs_devices->read_policy == BTRFS_READ_POLICY_RR && index != BTRFS_READ_POLICY_RR)
+ fs_devices->collect_fs_stats = false;
+
+ if (index == BTRFS_READ_POLICY_RR) {
+ if (value != -1) {
+ const u32 sectorsize = fs_devices->fs_info->sectorsize;
+
+ if (!IS_ALIGNED(value, sectorsize)) {
+ u64 temp_value = round_up(value, sectorsize);
+
+ btrfs_debug(fs_devices->fs_info,
+"read_policy: min contig read %lld should be multiple of sectorsize %u, rounded to %llu",
+ value, sectorsize, temp_value);
+ value = temp_value;
}
- return len;
+ } else {
+ value = BTRFS_DEFAULT_RR_MIN_CONTIG_READ;
+ }
+
+ if (index != READ_ONCE(fs_devices->read_policy) ||
+ value != READ_ONCE(fs_devices->rr_min_contig_read)) {
+ WRITE_ONCE(fs_devices->read_policy, index);
+ WRITE_ONCE(fs_devices->rr_min_contig_read, value);
+
+ btrfs_info(fs_devices->fs_info, "read policy set to '%s:%lld'",
+ btrfs_read_policy_name[index], value);
+ }
+
+ fs_devices->collect_fs_stats = true;
+
+ return len;
+ }
+
+ if (index == BTRFS_READ_POLICY_DEVID) {
+ if (value != -1) {
+ BTRFS_DEV_LOOKUP_ARGS(args);
+
+ /* Validate input devid. */
+ args.devid = value;
+ if (btrfs_find_device(fs_devices, &args) == NULL)
+ return -EINVAL;
+ } else {
+ /* Set default devid to the devid of the latest device. */
+ value = fs_devices->latest_dev->devid;
}
+
+ if (index != READ_ONCE(fs_devices->read_policy) ||
+ value != READ_ONCE(fs_devices->read_devid)) {
+ WRITE_ONCE(fs_devices->read_policy, index);
+ WRITE_ONCE(fs_devices->read_devid, value);
+
+ btrfs_info(fs_devices->fs_info, "read policy set to '%s:%llu'",
+ btrfs_read_policy_name[index], value);
+ }
+
+ return len;
+ }
+#endif
+ if (index != READ_ONCE(fs_devices->read_policy)) {
+ WRITE_ONCE(fs_devices->read_policy, index);
+ btrfs_info(fs_devices->fs_info, "read policy set to '%s'",
+ btrfs_read_policy_name[index]);
}
- return -EINVAL;
+ return len;
}
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);