summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-16 03:05:40 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-16 03:05:40 +0300
commit3c26a6bc40fac7051b002411e771a8a5faed028f (patch)
tree2062e630a7cad362202f735d591da92ded15cd98
parent4b5713ef2f929dd8720fcdb66c063643ef9e3bcb (diff)
parent1cfa74c683ea82d37156ccd7ab4f4659056dc701 (diff)
downloadlinux-3c26a6bc40fac7051b002411e771a8a5faed028f.tar.xz
Merge tag 'x86_cache_for_v7.2_rc1' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip
Pull x86 resource control updates from Borislav Petkov: "Preparatory work for MPAM counter assignment: - Simplify the error handling path when creating monitor group event configuration directories - Make the MBM event filter configurable only on architectures that support it and expose this with the respective file modes in the event config - Disallow the MBA software controller on systems where MBM counters are assignable, as it requires continuous bandwidth measurement that assignable counters do not guarantee - Replace a compile-time Kconfig option for fixed counter assignment with a per-architecture runtime property, and expose whether the counter assignment mode is changeable to userspace - Continue counter allocation across all domains instead of aborting at the first failure - Document that automatic MBM counter assignment is best effort and may not assign counters to all domains - Document the behavior of task ID 0 and idle tasks in the resctrl tasks file" * tag 'x86_cache_for_v7.2_rc1' of gitolite.kernel.org:pub/scm/linux/kernel/git/tip/tip: fs/resctrl: Document tasks file behaviour for task id 0 and idle tasks fs/resctrl: Document that automatic counter assignment is best effort fs/resctrl: Continue counter allocation after failure fs/resctrl: Add monitor property 'mbm_cntr_assign_fixed' fs/resctrl: Disallow the software controller when MBM counters are assignable x86,fs/resctrl: Create 'event_filter' files read only if they're not configurable fs/resctrl: Tidy up the error path in resctrl_mkdir_event_configs()
-rw-r--r--Documentation/filesystems/resctrl.rst22
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c1
-rw-r--r--fs/resctrl/internal.h2
-rw-r--r--fs/resctrl/monitor.c30
-rw-r--r--fs/resctrl/rdtgroup.c40
-rw-r--r--include/linux/resctrl.h18
6 files changed, 78 insertions, 35 deletions
diff --git a/Documentation/filesystems/resctrl.rst b/Documentation/filesystems/resctrl.rst
index b003bed339fd..e4b66af55ffb 100644
--- a/Documentation/filesystems/resctrl.rst
+++ b/Documentation/filesystems/resctrl.rst
@@ -427,9 +427,9 @@ with the following files:
Two MBM events are supported by default: mbm_local_bytes and mbm_total_bytes.
Each MBM event's sub-directory contains a file named "event_filter" that is
- used to view and modify which memory transactions the MBM event is configured
- with. The file is accessible only when "mbm_event" counter assignment mode is
- enabled.
+ used to view and (if writable) modify which memory transactions the MBM event
+ is configured with. The file is accessible only when "mbm_event" counter
+ assignment mode is enabled.
List of memory transaction types supported:
@@ -454,9 +454,8 @@ with the following files:
# cat /sys/fs/resctrl/info/L3_MON/event_configs/mbm_local_bytes/event_filter
local_reads,local_non_temporal_writes,local_reads_slow_memory
- Modify the event configuration by writing to the "event_filter" file within
- the "event_configs" directory. The read/write "event_filter" file contains the
- configuration of the event that reflects which memory transactions are counted by it.
+ The memory transactions the MBM event is configured with can be changed
+ if "event_filter" is writable.
For example::
@@ -480,6 +479,12 @@ with the following files:
"1":
Auto assignment is enabled.
+ Automatic counter assignment is done with best effort. If auto
+ assignment is enabled but there are not enough available counters then
+ monitor group creation could succeed while one or more events belonging
+ to the group may not have a counter assigned in all domains. Consult
+ mbm_L3_assignments for counter assignment states of the new groups.
+
Example::
# echo 0 > /sys/fs/resctrl/info/L3_MON/mbm_assign_on_mkdir
@@ -570,6 +575,11 @@ All groups contain the following files:
then the task must already belong to the CTRL_MON parent of this
group. The task is removed from any previous MON group.
+ When writing to this file, a task id of 0 is interpreted as the
+ task id of the currently running task. On reading the file, a task
+ id of 0 will never be shown and there is no representation of the
+ idle tasks. Instead, a CPU's idle task is always considered as a
+ member of the group owning the CPU.
"cpus":
Reading this file shows a bitmask of the logical CPUs owned by
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index c102c5ef9340..03ee6102ab07 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -460,6 +460,7 @@ int __init rdt_get_l3_mon_config(struct rdt_resource *r)
(rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL) ||
rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))) {
r->mon.mbm_cntr_assignable = true;
+ r->mon.mbm_cntr_configurable = true;
cpuid_count(0x80000020, 5, &eax, &ebx, &ecx, &edx);
r->mon.num_mbm_cntrs = (ebx & GENMASK(15, 0)) + 1;
hw_res->mbm_cntr_assign_enabled = true;
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 1a9b29119f88..48af75b9dc85 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -408,6 +408,8 @@ void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free);
void resctrl_file_fflags_init(const char *config, unsigned long fflags);
+void resctrl_file_mode_init(const char *config, umode_t mode);
+
void rdt_staged_configs_clear(void);
bool closid_allocated(unsigned int closid);
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index 9fd901c78dc6..0e6a389a16bf 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -1211,9 +1211,10 @@ static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_
* NULL; otherwise, assign the counter to the specified domain @d.
*
* If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
- * will fail. The assignment process will abort at the first failure encountered
- * during domain traversal, which may result in the event being only partially
- * assigned.
+ * will fail. When attempting to assign counters to all domains, carry on trying
+ * to assign counters after a failure since only some domains may have counters
+ * and the goal is to assign counters where possible. If any counter assignment
+ * fails, return the error from the last failing assignment.
*
* Return:
* 0 on success, < 0 on failure.
@@ -1226,9 +1227,11 @@ static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgro
if (!d) {
list_for_each_entry(d, &r->mon_domains, hdr.list) {
- ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
- if (ret)
- return ret;
+ int err;
+
+ err = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
+ if (err)
+ ret = err;
}
} else {
ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
@@ -1422,6 +1425,11 @@ ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes
ret = -EINVAL;
goto out_unlock;
}
+ if (!r->mon.mbm_cntr_configurable) {
+ rdt_last_cmd_puts("event_filter is not configurable\n");
+ ret = -EPERM;
+ goto out_unlock;
+ }
ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
if (!ret && mevt->evt_cfg != evt_cfg) {
@@ -1451,7 +1459,7 @@ int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
else
seq_puts(s, "[default]\n");
- if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
+ if (!r->mon.mbm_cntr_assign_fixed) {
if (enabled)
seq_puts(s, "default\n");
else
@@ -1502,6 +1510,12 @@ ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
}
if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
+ if (r->mon.mbm_cntr_assign_fixed) {
+ ret = -EINVAL;
+ rdt_last_cmd_puts("Counter assignment mode is not configurable\n");
+ goto out_unlock;
+ }
+
ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
if (ret)
goto out_unlock;
@@ -1886,6 +1900,8 @@ int resctrl_l3_mon_resource_init(void)
resctrl_file_fflags_init("available_mbm_cntrs",
RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
+ if (r->mon.mbm_cntr_configurable)
+ resctrl_file_mode_init("event_filter", 0644);
resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
RFTYPE_RES_CACHE);
resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 5dfdaa6f9d8f..af2cbab14497 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -2022,7 +2022,7 @@ static struct rftype res_common_files[] = {
},
{
.name = "event_filter",
- .mode = 0644,
+ .mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = event_filter_show,
.write = event_filter_write,
@@ -2215,6 +2215,15 @@ void resctrl_file_fflags_init(const char *config, unsigned long fflags)
rft->fflags = fflags;
}
+void resctrl_file_mode_init(const char *config, umode_t mode)
+{
+ struct rftype *rft;
+
+ rft = rdtgroup_get_rftype_by_name(config);
+ if (rft)
+ rft->mode = mode;
+}
+
/**
* rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
* @r: The resource group with which the file is associated.
@@ -2331,22 +2340,19 @@ static int resctrl_mkdir_event_configs(struct rdt_resource *r, struct kernfs_nod
continue;
kn_subdir2 = kernfs_create_dir(kn_subdir, mevt->name, kn_subdir->mode, mevt);
- if (IS_ERR(kn_subdir2)) {
- ret = PTR_ERR(kn_subdir2);
- goto out;
- }
+ if (IS_ERR(kn_subdir2))
+ return PTR_ERR(kn_subdir2);
ret = rdtgroup_kn_set_ugid(kn_subdir2);
if (ret)
- goto out;
+ return ret;
ret = rdtgroup_add_files(kn_subdir2, RFTYPE_ASSIGN_CONFIG);
if (ret)
- break;
+ return ret;
}
-out:
- return ret;
+ return 0;
}
static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
@@ -2510,10 +2516,13 @@ static void mba_sc_domain_destroy(struct rdt_resource *r,
}
/*
- * MBA software controller is supported only if
- * MBM is supported and MBA is in linear scale,
- * and the MBM monitor scope is the same as MBA
- * control scope.
+ * The MBA software controller is supported only if MBM is supported and MBA is
+ * in linear scale, and the MBM monitor scope is the same as MBA control scope.
+ *
+ * The software controller cannot be supported when the MBM counters are
+ * assignable. There is no guarantee that MBM counters are assigned to the
+ * event backing the software controller in all monitoring domains of all
+ * monitoring groups.
*/
static bool supports_mba_mbps(void)
{
@@ -2522,7 +2531,8 @@ static bool supports_mba_mbps(void)
return (resctrl_is_mbm_enabled() &&
r->alloc_capable && is_mba_linear() &&
- r->ctrl_scope == rmbm->mon_scope);
+ r->ctrl_scope == rmbm->mon_scope &&
+ !rmbm->mon.mbm_cntr_assignable);
}
/*
@@ -2937,7 +2947,7 @@ static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
ctx->enable_cdpl2 = true;
return 0;
case Opt_mba_mbps:
- msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
+ msg = "mba_MBps requires MBM (mbm_event mode not supported) and linear scale MBA at L3 scope";
if (!supports_mba_mbps())
return invalfc(fc, msg);
ctx->enable_mba_mbps = true;
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 006e57fd7ca5..73ff522448a0 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -286,13 +286,15 @@ enum resctrl_schema_fmt {
/**
* struct resctrl_mon - Monitoring related data of a resctrl resource.
- * @num_rmid: Number of RMIDs available.
- * @mbm_cfg_mask: Memory transactions that can be tracked when bandwidth
- * monitoring events can be configured.
- * @num_mbm_cntrs: Number of assignable counters.
- * @mbm_cntr_assignable:Is system capable of supporting counter assignment?
- * @mbm_assign_on_mkdir:True if counters should automatically be assigned to MBM
- * events of monitor groups created via mkdir.
+ * @num_rmid: Number of RMIDs available.
+ * @mbm_cfg_mask: Memory transactions that can be tracked when
+ * bandwidth monitoring events can be configured.
+ * @num_mbm_cntrs: Number of assignable counters.
+ * @mbm_cntr_assignable: Is system capable of supporting counter assignment?
+ * @mbm_assign_on_mkdir: True if counters should automatically be assigned to MBM
+ * events of monitor groups created via mkdir.
+ * @mbm_cntr_configurable: True if assignable counters are configurable.
+ * @mbm_cntr_assign_fixed: True if the counter assignment mode is fixed.
*/
struct resctrl_mon {
u32 num_rmid;
@@ -300,6 +302,8 @@ struct resctrl_mon {
int num_mbm_cntrs;
bool mbm_cntr_assignable;
bool mbm_assign_on_mkdir;
+ bool mbm_cntr_configurable;
+ bool mbm_cntr_assign_fixed;
};
/**