From 4d269ed485298e8a09485a664e7b35b370ab4ada Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:09 +0000 Subject: x86/resctrl: Kill off alloc_enabled rdt_resources_all[] used to have extra entries for L2CODE/L2DATA. These were hidden from resctrl by the alloc_enabled value. Now that the L2/L2CODE/L2DATA resources have been merged together, alloc_enabled doesn't mean anything, it always has the same value as alloc_capable which indicates allocation is supported by this resource. Remove alloc_enabled and its helpers. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-2-james.morse@arm.com --- include/linux/resctrl.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 21deb5212bbd..386ab3a41500 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -130,7 +130,6 @@ struct resctrl_schema; /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource - * @alloc_enabled: Is allocation enabled on this machine * @mon_enabled: Is monitoring enabled for this feature * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine @@ -150,7 +149,6 @@ struct resctrl_schema; */ struct rdt_resource { int rid; - bool alloc_enabled; bool mon_enabled; bool alloc_capable; bool mon_capable; -- cgit v1.2.3 From bab6ee736873becc0216ba5fd159394e272d01b2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:10 +0000 Subject: x86/resctrl: Merge mon_capable and mon_enabled mon_enabled and mon_capable are always set as a pair by rdt_get_mon_l3_config(). There is no point having two values. Merge them together. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-3-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/internal.h | 4 ---- arch/x86/kernel/cpu/resctrl/monitor.c | 1 - arch/x86/kernel/cpu/resctrl/rdtgroup.c | 8 ++++---- include/linux/resctrl.h | 2 -- 4 files changed, 4 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 53f3d275a98f..8828b5c1b6d2 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -459,10 +459,6 @@ int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); for_each_rdt_resource(r) \ if (r->mon_capable) -#define for_each_mon_enabled_rdt_resource(r) \ - for_each_rdt_resource(r) \ - if (r->mon_enabled) - /* CPUID.(EAX=10H, ECX=ResID=1).EAX */ union cpuid_0x10_1_eax { struct { diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index eaf25a234ff5..497cadf3285d 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -717,7 +717,6 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) l3_mon_evt_init(r); r->mon_capable = true; - r->mon_enabled = true; return 0; } diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 526eb933333b..def7c6681f8b 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1765,7 +1765,7 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) goto out_destroy; } - for_each_mon_enabled_rdt_resource(r) { + for_each_mon_capable_rdt_resource(r) { fflags = r->fflags | RF_MON_INFO; sprintf(name, "%s_MON", r->name); ret = rdtgroup_mkdir_info_resdir(r, name, fflags); @@ -2504,7 +2504,7 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id) struct rdtgroup *prgrp, *crgrp; char name[32]; - if (!r->mon_enabled) + if (!r->mon_capable) return; list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -2572,7 +2572,7 @@ void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, struct rdtgroup *prgrp, *crgrp; struct list_head *head; - if (!r->mon_enabled) + if (!r->mon_capable) return; list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { @@ -2642,7 +2642,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, * Create the subdirectories for each domain. Note that all events * in a domain like L3 are grouped into a resource whose domain is L3 */ - for_each_mon_enabled_rdt_resource(r) { + for_each_mon_capable_rdt_resource(r) { ret = mkdir_mondata_subdir_alldom(kn, r, prgrp); if (ret) goto out_destroy; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 386ab3a41500..8180c539800d 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -130,7 +130,6 @@ struct resctrl_schema; /** * struct rdt_resource - attributes of a resctrl resource * @rid: The index of the resource - * @mon_enabled: Is monitoring enabled for this feature * @alloc_capable: Is allocation available on this machine * @mon_capable: Is monitor feature available on this machine * @num_rmid: Number of RMIDs available @@ -149,7 +148,6 @@ struct resctrl_schema; */ struct rdt_resource { int rid; - bool mon_enabled; bool alloc_capable; bool mon_capable; int num_rmid; -- cgit v1.2.3 From 3a7232cdf19e39e7f24c493117b373788b348af2 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:11 +0000 Subject: x86/resctrl: Add domain online callback for resctrl work Because domains are exposed to user-space via resctrl, the filesystem must update its state when CPU hotplug callbacks are triggered. Some of this work is common to any architecture that would support resctrl, but the work is tied up with the architecture code to allocate the memory. Move domain_setup_mon_state(), the monitor subdir creation call and the mbm/limbo workers into a new resctrl_online_domain() call. These bits are not specific to the architecture. Grouping them in one function allows that code to be moved to /fs/ and re-used by another architecture. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-4-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 53 ++++----------------------- arch/x86/kernel/cpu/resctrl/internal.h | 2 -- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 65 +++++++++++++++++++++++++++++++--- include/linux/resctrl.h | 1 + 4 files changed, 67 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index 2f87177f1f69..25f30148478b 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -443,42 +443,6 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) return 0; } -static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) -{ - size_t tsize; - - if (is_llc_occupancy_enabled()) { - d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL); - if (!d->rmid_busy_llc) - return -ENOMEM; - INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); - } - if (is_mbm_total_enabled()) { - tsize = sizeof(*d->mbm_total); - d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); - if (!d->mbm_total) { - bitmap_free(d->rmid_busy_llc); - return -ENOMEM; - } - } - if (is_mbm_local_enabled()) { - tsize = sizeof(*d->mbm_local); - d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); - if (!d->mbm_local) { - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - return -ENOMEM; - } - } - - if (is_mbm_enabled()) { - INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); - mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); - } - - return 0; -} - /* * domain_add_cpu - Add a cpu to a resource's domain list. * @@ -498,6 +462,7 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) struct list_head *add_pos = NULL; struct rdt_hw_domain *hw_dom; struct rdt_domain *d; + int err; d = rdt_find_domain(r, id, &add_pos); if (IS_ERR(d)) { @@ -527,21 +492,15 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) return; } - if (r->mon_capable && domain_setup_mon_state(r, d)) { + list_add_tail(&d->list, add_pos); + + err = resctrl_online_domain(r, d); + if (err) { + list_del(&d->list); kfree(hw_dom->ctrl_val); kfree(hw_dom->mbps_val); kfree(hw_dom); - return; } - - list_add_tail(&d->list, add_pos); - - /* - * If resctrl is mounted, add - * per domain monitor data directories. - */ - if (static_branch_unlikely(&rdt_mon_enable_key)) - mkdir_mondata_subdir_allrdtgrp(r, d); } static void domain_remove_cpu(int cpu, struct rdt_resource *r) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 8828b5c1b6d2..be48a682dbdb 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -524,8 +524,6 @@ void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id); -void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_domain *d, struct rdtgroup *rdtgrp, int evtid, int first); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index def7c6681f8b..030a70326ccc 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2565,16 +2565,13 @@ out_destroy: * Add all subdirectories of mon_data for "ctrl_mon" groups * and "monitor" groups with given domain id. */ -void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - struct rdt_domain *d) +static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + struct rdt_domain *d) { struct kernfs_node *parent_kn; struct rdtgroup *prgrp, *crgrp; struct list_head *head; - if (!r->mon_capable) - return; - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { parent_kn = prgrp->mon.mon_data_kn; mkdir_mondata_subdir(parent_kn, d, r, prgrp); @@ -3236,6 +3233,64 @@ out: return ret; } +static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) +{ + size_t tsize; + + if (is_llc_occupancy_enabled()) { + d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL); + if (!d->rmid_busy_llc) + return -ENOMEM; + } + if (is_mbm_total_enabled()) { + tsize = sizeof(*d->mbm_total); + d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_total) { + bitmap_free(d->rmid_busy_llc); + return -ENOMEM; + } + } + if (is_mbm_local_enabled()) { + tsize = sizeof(*d->mbm_local); + d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL); + if (!d->mbm_local) { + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + return -ENOMEM; + } + } + + return 0; +} + +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + int err; + + lockdep_assert_held(&rdtgroup_mutex); + + if (!r->mon_capable) + return 0; + + err = domain_setup_mon_state(r, d); + if (err) + return err; + + if (is_mbm_enabled()) { + INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow); + mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL); + } + + if (is_llc_occupancy_enabled()) + INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo); + + /* If resctrl is mounted, add per domain monitor data directories. */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + mkdir_mondata_subdir_allrdtgrp(r, d); + + return 0; +} + /* * rdtgroup_init - rdtgroup initialization * diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 8180c539800d..d512455b4c3a 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -192,5 +192,6 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); #endif /* _RESCTRL_H */ -- cgit v1.2.3 From 798fd4b9ac37fec571f55fb8592497b0dd5f7a73 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:13 +0000 Subject: x86/resctrl: Add domain offline callback for resctrl work Because domains are exposed to user-space via resctrl, the filesystem must update its state when CPU hotplug callbacks are triggered. Some of this work is common to any architecture that would support resctrl, but the work is tied up with the architecture code to free the memory. Move the monitor subdir removal and the cancelling of the mbm/limbo works into a new resctrl_offline_domain() call. These bits are not specific to the architecture. Grouping them in one function allows that code to be moved to /fs/ and re-used by another architecture. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-6-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 26 ++------------------ arch/x86/kernel/cpu/resctrl/internal.h | 2 -- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 45 +++++++++++++++++++++++++++++++--- include/linux/resctrl.h | 1 + 4 files changed, 44 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e37889f7a1a5..f69182973175 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -523,27 +523,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) cpumask_clear_cpu(cpu, &d->cpu_mask); if (cpumask_empty(&d->cpu_mask)) { - /* - * If resctrl is mounted, remove all the - * per domain monitor data directories. - */ - if (static_branch_unlikely(&rdt_mon_enable_key)) - rmdir_mondata_subdir_allrdtgrp(r, d->id); + resctrl_offline_domain(r, d); list_del(&d->list); - if (r->mon_capable && is_mbm_enabled()) - cancel_delayed_work(&d->mbm_over); - if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { - /* - * When a package is going down, forcefully - * decrement rmid->ebusy. There is no way to know - * that the L3 was flushed and hence may lead to - * incorrect counts in rare scenarios, but leaving - * the RMID as busy creates RMID leaks if the - * package never comes back. - */ - __check_limbo(d, true); - cancel_delayed_work(&d->cqm_limbo); - } /* * rdt_domain "d" is going to be freed below, so clear @@ -551,11 +532,8 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) */ if (d->plr) d->plr->d = NULL; - - bitmap_free(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); domain_free(hw_dom); + return; } diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index be48a682dbdb..e12b55f815bf 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -522,8 +522,6 @@ void free_rmid(u32 rmid); int rdt_get_mon_l3_config(struct rdt_resource *r); void mon_event_count(void *info); int rdtgroup_mondata_show(struct seq_file *m, void *arg); -void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, - unsigned int dom_id); void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, struct rdt_domain *d, struct rdtgroup *rdtgrp, int evtid, int first); diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 030a70326ccc..5830905a92d2 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -2499,14 +2499,12 @@ static int mon_addfile(struct kernfs_node *parent_kn, const char *name, * Remove all subdirectories of mon_data of ctrl_mon groups * and monitor groups with given domain id. */ -void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id) +static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, + unsigned int dom_id) { struct rdtgroup *prgrp, *crgrp; char name[32]; - if (!r->mon_capable) - return; - list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { sprintf(name, "mon_%s_%02d", r->name, dom_id); kernfs_remove_by_name(prgrp->mon.mon_data_kn, name); @@ -3233,6 +3231,45 @@ out: return ret; } +static void domain_destroy_mon_state(struct rdt_domain *d) +{ + bitmap_free(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); +} + +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) +{ + lockdep_assert_held(&rdtgroup_mutex); + + if (!r->mon_capable) + return; + + /* + * If resctrl is mounted, remove all the + * per domain monitor data directories. + */ + if (static_branch_unlikely(&rdt_mon_enable_key)) + rmdir_mondata_subdir_allrdtgrp(r, d->id); + + if (is_mbm_enabled()) + cancel_delayed_work(&d->mbm_over); + if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { + /* + * When a package is going down, forcefully + * decrement rmid->ebusy. There is no way to know + * that the L3 was flushed and hence may lead to + * incorrect counts in rare scenarios, but leaving + * the RMID as busy creates RMID leaks if the + * package never comes back. + */ + __check_limbo(d, true); + cancel_delayed_work(&d->cqm_limbo); + } + + domain_destroy_mon_state(d); +} + static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d) { size_t tsize; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d512455b4c3a..5d283bdd6162 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -193,5 +193,6 @@ int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); #endif /* _RESCTRL_H */ -- cgit v1.2.3 From 781096d971dfe3c5f9401a300bdb0b148a600584 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:16 +0000 Subject: x86/resctrl: Create mba_sc configuration in the rdt_domain To support resctrl's MBA software controller, the architecture must provide a second configuration array to hold the mbps_val[] from user-space. This complicates the interface between the architecture specific code and the filesystem portions of resctrl that will move to /fs/, to allow multiple architectures to support resctrl. Make the filesystem parts of resctrl create an array for the mba_sc values. The software controller can be changed to use this, allowing the architecture code to only consider the values configured in hardware. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-9-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/internal.h | 1 - arch/x86/kernel/cpu/resctrl/rdtgroup.c | 39 ++++++++++++++++++++++++++++++++++ include/linux/resctrl.h | 7 ++++++ 3 files changed, 46 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index e12b55f815bf..a7e2cbce29d5 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -36,7 +36,6 @@ #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 -#define MBA_MAX_MBPS U32_MAX #define MAX_MBA_BW_AMD 0x800 #define MBM_CNTR_WIDTH_OFFSET_AMD 20 diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 4ee26264ecfc..f7ebd019e7a5 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1889,6 +1889,30 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r) l3_qos_cfg_update(&hw_res->cdp_enabled); } +static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d) +{ + u32 num_closid = resctrl_arch_get_num_closid(r); + int cpu = cpumask_any(&d->cpu_mask); + int i; + + d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val), + GFP_KERNEL, cpu_to_node(cpu)); + if (!d->mbps_val) + return -ENOMEM; + + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; + + return 0; +} + +static void mba_sc_domain_destroy(struct rdt_resource *r, + struct rdt_domain *d) +{ + kfree(d->mbps_val); + d->mbps_val = NULL; +} + /* * MBA software controller is supported only if * MBM is supported and MBA is in linear scale. @@ -1908,12 +1932,20 @@ static bool supports_mba_mbps(void) static int set_mba_sc(bool mba_sc) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + u32 num_closid = resctrl_arch_get_num_closid(r); + struct rdt_domain *d; + int i; if (!supports_mba_mbps() || mba_sc == is_mba_sc(r)) return -EINVAL; r->membw.mba_sc = mba_sc; + list_for_each_entry(d, &r->domains, list) { + for (i = 0; i < num_closid; i++) + d->mbps_val[i] = MBA_MAX_MBPS; + } + return 0; } @@ -3247,6 +3279,9 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d) { lockdep_assert_held(&rdtgroup_mutex); + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) + mba_sc_domain_destroy(r, d); + if (!r->mon_capable) return; @@ -3311,6 +3346,10 @@ int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d) lockdep_assert_held(&rdtgroup_mutex); + if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) + /* RDT_RESOURCE_MBA is never mon_capable */ + return mba_sc_domain_allocate(r, d); + if (!r->mon_capable) return 0; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 5d283bdd6162..93dfe553b364 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -15,6 +15,9 @@ int proc_resctrl_show(struct seq_file *m, #endif +/* max value for struct rdt_domain's mbps_val */ +#define MBA_MAX_MBPS U32_MAX + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -53,6 +56,9 @@ struct resctrl_staged_config { * @cqm_work_cpu: worker CPU for CQM h/w counters * @plr: pseudo-locked region (if any) associated with domain * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid */ struct rdt_domain { struct list_head list; @@ -67,6 +73,7 @@ struct rdt_domain { int cqm_work_cpu; struct pseudo_lock_region *plr; struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; }; /** -- cgit v1.2.3 From ff6357bb50023af2a1dc8f113930082c5252c753 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:19 +0000 Subject: x86/resctrl: Allow update_mba_bw() to update controls directly update_mba_bw() calculates a new control value for the MBA resource based on the user provided mbps_val and the current measured bandwidth. Some control values need remapping by delay_bw_map(). It does this by calling wrmsrl() directly. This needs splitting up to be done by an architecture specific helper, so that the remainder can eventually be moved to /fs/. Add resctrl_arch_update_one() to apply one configuration value to the provided resource and domain. This avoids the staging and cross-calling that is only needed with changes made by user-space. delay_bw_map() moves to be part of the arch code, to maintain the 'percentage control' view of MBA resources in resctrl. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-12-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/core.c | 2 +- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 21 +++++++++++++++++++++ arch/x86/kernel/cpu/resctrl/internal.h | 1 - arch/x86/kernel/cpu/resctrl/monitor.c | 13 ++++--------- include/linux/resctrl.h | 8 ++++++++ 5 files changed, 34 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index f0e2820af475..90ebb7d71af2 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -296,7 +296,7 @@ mba_wrmsr_amd(struct rdt_domain *d, struct msr_param *m, struct rdt_resource *r) * that can be written to QOS_MSRs. * There are currently no SKUs which support non linear delay values. */ -u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) +static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) { if (r->membw.delay_linear) return MAX_MBA_BW - bw; diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index bf9d73c5be14..0ab92320de71 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -282,6 +282,27 @@ static bool apply_config(struct rdt_hw_domain *hw_dom, return false; } +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val) +{ + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + u32 idx = get_config_index(closid, t); + struct msr_param msr_param; + + if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + return -EINVAL; + + hw_dom->ctrl_val[idx] = cfg_val; + + msr_param.res = r; + msr_param.low = idx; + msr_param.high = idx + 1; + hw_res->msr_update(d, &msr_param, r); + + return 0; +} + int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid) { struct resctrl_staged_config *cfg; diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 373aaba53ecd..3b9e43ba7590 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -527,7 +527,6 @@ void mbm_setup_overflow_handler(struct rdt_domain *dom, void mbm_handle_overflow(struct work_struct *work); void __init intel_rdt_mbm_apply_quirk(void); bool is_mba_sc(struct rdt_resource *r); -u32 delay_bw_map(unsigned long bw, struct rdt_resource *r); void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 16028b2f756a..3e69386cfe00 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -420,10 +420,8 @@ void mon_event_count(void *info) */ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) { - u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; + u32 closid, rmid, cur_msr_val, new_msr_val; struct mbm_state *pmbm_data, *cmbm_data; - struct rdt_hw_resource *hw_r_mba; - struct rdt_hw_domain *hw_dom_mba; u32 cur_bw, delta_bw, user_bw; struct rdt_resource *r_mba; struct rdt_domain *dom_mba; @@ -433,8 +431,8 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) if (!is_mbm_local_enabled()) return; - hw_r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; - r_mba = &hw_r_mba->r_resctrl; + r_mba = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl; + closid = rgrp->closid; rmid = rgrp->mon.rmid; pmbm_data = &dom_mbm->mbm_local[rmid]; @@ -444,7 +442,6 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) pr_warn_once("Failure to get domain for MBA update\n"); return; } - hw_dom_mba = resctrl_to_arch_dom(dom_mba); cur_bw = pmbm_data->prev_bw; user_bw = dom_mba->mbps_val[closid]; @@ -486,9 +483,7 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) return; } - cur_msr = hw_r_mba->msr_base + closid; - wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba)); - hw_dom_mba->ctrl_val[closid] = new_msr_val; + resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val); /* * Delta values are updated dynamically package wise for each diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 93dfe553b364..f4c9101df461 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -197,6 +197,14 @@ struct resctrl_schema { /* The number of closid supported by this resource regardless of CDP */ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); + +/* + * Update the ctrl_val and apply this config right now. + * Must be called on one of the domain's CPUs. + */ +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val); + u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); -- cgit v1.2.3 From fea62d370d7a1ba288d71d0cae7ad47c2a02b839 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:22 +0000 Subject: x86/resctrl: Allow per-rmid arch private storage to be reset To abstract the rmid counters into a helper that returns the number of bytes counted, architecture specific per-rmid state is needed. It needs to be possible to reset this hidden state, as the values may outlive the life of an rmid, or the mount time of the filesystem. mon_event_read() is called with first = true when an rmid is first allocated in mkdir_mondata_subdir(). Add resctrl_arch_reset_rmid() and call it from __mon_event_count()'s rr->first check. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-15-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/internal.h | 18 +++++------------ arch/x86/kernel/cpu/resctrl/monitor.c | 35 +++++++++++++++++++++++++++++++++- include/linux/resctrl.h | 23 ++++++++++++++++++++++ 3 files changed, 62 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 4de8e5bb93e1..b34a1403f033 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -22,14 +22,6 @@ #define L2_QOS_CDP_ENABLE 0x01ULL -/* - * Event IDs are used to program IA32_QM_EVTSEL before reading event - * counter from IA32_QM_CTR - */ -#define QOS_L3_OCCUP_EVENT_ID 0x01 -#define QOS_L3_MBM_TOTAL_EVENT_ID 0x02 -#define QOS_L3_MBM_LOCAL_EVENT_ID 0x03 - #define CQM_LIMBOCHECK_INTERVAL 1000 #define MBM_CNTR_WIDTH_BASE 24 @@ -73,7 +65,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); * @list: entry in &rdt_resource->evt_list */ struct mon_evt { - u32 evtid; + enum resctrl_event_id evtid; char *name; struct list_head list; }; @@ -90,9 +82,9 @@ struct mon_evt { union mon_data_bits { void *priv; struct { - unsigned int rid : 10; - unsigned int evtid : 8; - unsigned int domid : 14; + unsigned int rid : 10; + enum resctrl_event_id evtid : 8; + unsigned int domid : 14; } u; }; @@ -100,7 +92,7 @@ struct rmid_read { struct rdtgroup *rgrp; struct rdt_resource *r; struct rdt_domain *d; - int evtid; + enum resctrl_event_id evtid; bool first; u64 val; }; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 2d81b6cd9632..e9755143492b 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -137,7 +137,37 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } -static u64 __rmid_read(u32 rmid, u32 eventid) +static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, + u32 rmid, + enum resctrl_event_id eventid) +{ + switch (eventid) { + case QOS_L3_OCCUP_EVENT_ID: + return NULL; + case QOS_L3_MBM_TOTAL_EVENT_ID: + return &hw_dom->arch_mbm_total[rmid]; + case QOS_L3_MBM_LOCAL_EVENT_ID: + return &hw_dom->arch_mbm_local[rmid]; + } + + /* Never expect to get here */ + WARN_ON_ONCE(1); + + return NULL; +} + +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid) +{ + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct arch_mbm_state *am; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) + memset(am, 0, sizeof(*am)); +} + +static u64 __rmid_read(u32 rmid, enum resctrl_event_id eventid) { u64 val; @@ -291,6 +321,9 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) struct mbm_state *m; u64 chunks, tval; + if (rr->first) + resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid); + tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { return tval; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index f4c9101df461..818456770176 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -32,6 +32,16 @@ enum resctrl_conf_type { #define CDP_NUM_TYPES (CDP_DATA + 1) +/* + * Event IDs, the values match those used to program IA32_QM_EVTSEL before + * reading IA32_QM_CTR on RDT systems. + */ +enum resctrl_event_id { + QOS_L3_OCCUP_EVENT_ID = 0x01, + QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, + QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, +}; + /** * struct resctrl_staged_config - parsed configuration to be applied * @new_ctrl: new ctrl value to be loaded @@ -210,4 +220,17 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +/** + * resctrl_arch_reset_rmid() - Reset any private state associated with rmid + * and eventid. + * @r: The domain's resource. + * @d: The rmid's domain. + * @rmid: The rmid whose counter values should be reset. + * @eventid: The eventid whose counter values should be reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid); + #endif /* _RESCTRL_H */ -- cgit v1.2.3 From 4d044c521a63b2cd394ea6e3547012032145e47e Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:23 +0000 Subject: x86/resctrl: Abstract __rmid_read() __rmid_read() selects the specified eventid and returns the counter value from the MSR. The error handling is architecture specific, and handled by the callers, rdtgroup_mondata_show() and __mon_event_count(). Error handling should be handled by architecture specific code, as a different architecture may have different requirements. MPAM's counters can report that they are 'not ready', requiring a second read after a short delay. This should be hidden from resctrl. Make __rmid_read() the architecture specific function for reading a counter. Rename it resctrl_arch_rmid_read() and move the error handling into it. A read from a counter that hardware supports but resctrl does not now returns -EINVAL instead of -EIO from the default case in __mon_event_count(). It isn't possible for user-space to see this change as resctrl doesn't expose counters it doesn't support. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-16-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 4 +- arch/x86/kernel/cpu/resctrl/internal.h | 1 + arch/x86/kernel/cpu/resctrl/monitor.c | 62 +++++++++++++++++++------------ include/linux/resctrl.h | 1 + 4 files changed, 43 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 0ab92320de71..42a1abb378f0 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -579,9 +579,9 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) mon_event_read(&rr, r, d, rdtgrp, evtid, false); - if (rr.val & RMID_VAL_ERROR) + if (rr.err == -EIO) seq_puts(m, "Error\n"); - else if (rr.val & RMID_VAL_UNAVAIL) + else if (rr.err == -EINVAL) seq_puts(m, "Unavailable\n"); else seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index b34a1403f033..1d2e7bd6305f 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -94,6 +94,7 @@ struct rmid_read { struct rdt_domain *d; enum resctrl_event_id evtid; bool first; + int err; u64 val; }; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index e9755143492b..51ab76f2dfbc 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -167,9 +167,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, memset(am, 0, sizeof(*am)); } -static u64 __rmid_read(u32 rmid, enum resctrl_event_id eventid) +int resctrl_arch_rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) { - u64 val; + u64 msr_val; /* * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured @@ -180,14 +180,24 @@ static u64 __rmid_read(u32 rmid, enum resctrl_event_id eventid) * are error bits. */ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, val); + rdmsrl(MSR_IA32_QM_CTR, msr_val); - return val; + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + + return 0; } static bool rmid_dirty(struct rmid_entry *entry) { - u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); + u64 val = 0; + + if (resctrl_arch_rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val)) + return true; return val >= resctrl_cqm_threshold; } @@ -259,8 +269,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) { struct rdt_resource *r; struct rdt_domain *d; - int cpu; - u64 val; + int cpu, err; + u64 val = 0; r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; @@ -268,8 +278,10 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) cpu = get_cpu(); list_for_each_entry(d, &r->domains, list) { if (cpumask_test_cpu(cpu, &d->cpu_mask)) { - val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID); - if (val <= resctrl_cqm_threshold) + err = resctrl_arch_rmid_read(entry->rmid, + QOS_L3_OCCUP_EVENT_ID, + &val); + if (err || val <= resctrl_cqm_threshold) continue; } @@ -315,19 +327,19 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >> shift; } -static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) +static int __mon_event_count(u32 rmid, struct rmid_read *rr) { struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); struct mbm_state *m; - u64 chunks, tval; + u64 chunks, tval = 0; if (rr->first) resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid); - tval = __rmid_read(rmid, rr->evtid); - if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { - return tval; - } + rr->err = resctrl_arch_rmid_read(rmid, rr->evtid, &tval); + if (rr->err) + return rr->err; + switch (rr->evtid) { case QOS_L3_OCCUP_EVENT_ID: rr->val += tval; @@ -341,9 +353,9 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) default: /* * Code would never reach here because an invalid - * event id would fail the __rmid_read. + * event id would fail in resctrl_arch_rmid_read(). */ - return RMID_VAL_ERROR; + return -EINVAL; } if (rr->first) { @@ -399,11 +411,11 @@ void mon_event_count(void *info) struct rdtgroup *rdtgrp, *entry; struct rmid_read *rr = info; struct list_head *head; - u64 ret_val; + int ret; rdtgrp = rr->rgrp; - ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); + ret = __mon_event_count(rdtgrp->mon.rmid, rr); /* * For Ctrl groups read data from child monitor groups and @@ -415,13 +427,17 @@ void mon_event_count(void *info) if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { if (__mon_event_count(entry->mon.rmid, rr) == 0) - ret_val = 0; + ret = 0; } } - /* Report error if none of rmid_reads are successful */ - if (ret_val) - rr->val = ret_val; + /* + * __mon_event_count() calls for newly created monitor groups may + * report -EINVAL/Unavailable if the monitor hasn't seen any traffic. + * Discard error if any of the monitor event reads succeeded. + */ + if (ret == 0) + rr->err = 0; } /* diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 818456770176..efe60dd7fd21 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -219,6 +219,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +int resctrl_arch_rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *res); /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid -- cgit v1.2.3 From 8286618aca331bf17323ff3023ca831ac6e4b86f Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:24 +0000 Subject: x86/resctrl: Pass the required parameters into resctrl_arch_rmid_read() resctrl_arch_rmid_read() is intended as the function that an architecture agnostic resctrl filesystem driver can use to read a value in bytes from a hardware register. Currently the function returns the MBM values in chunks directly from hardware. To convert this to bytes, some correction and overflow calculations are needed. These depend on the resource and domain structures. Overflow detection requires the old chunks value. None of this is available to resctrl_arch_rmid_read(). MPAM requires the resource and domain structures to find the MMIO device that holds the registers. Pass the resource and domain to resctrl_arch_rmid_read(). This makes rmid_dirty() too big. Instead merge it with its only caller, and the name is kept as a local variable. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-17-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 31 +++++++++++++++++-------------- include/linux/resctrl.h | 18 +++++++++++++++++- 2 files changed, 34 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 51ab76f2dfbc..262141bf4264 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -167,10 +167,14 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, memset(am, 0, sizeof(*am)); } -int resctrl_arch_rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid, u64 *val) { u64 msr_val; + if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + return -EINVAL; + /* * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured * with a valid event code for supported resource type and the bits @@ -192,16 +196,6 @@ int resctrl_arch_rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) return 0; } -static bool rmid_dirty(struct rmid_entry *entry) -{ - u64 val = 0; - - if (resctrl_arch_rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val)) - return true; - - return val >= resctrl_cqm_threshold; -} - /* * Check the RMIDs that are marked as busy for this domain. If the * reported LLC occupancy is below the threshold clear the busy bit and @@ -213,6 +207,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free) struct rmid_entry *entry; struct rdt_resource *r; u32 crmid = 1, nrmid; + bool rmid_dirty; + u64 val = 0; r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; @@ -228,7 +224,14 @@ void __check_limbo(struct rdt_domain *d, bool force_free) break; entry = __rmid_entry(nrmid); - if (force_free || !rmid_dirty(entry)) { + + if (resctrl_arch_rmid_read(r, d, entry->rmid, + QOS_L3_OCCUP_EVENT_ID, &val)) + rmid_dirty = true; + else + rmid_dirty = (val >= resctrl_cqm_threshold); + + if (force_free || !rmid_dirty) { clear_bit(entry->rmid, d->rmid_busy_llc); if (!--entry->busy) { rmid_limbo_count--; @@ -278,7 +281,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) cpu = get_cpu(); list_for_each_entry(d, &r->domains, list) { if (cpumask_test_cpu(cpu, &d->cpu_mask)) { - err = resctrl_arch_rmid_read(entry->rmid, + err = resctrl_arch_rmid_read(r, d, entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val); if (err || val <= resctrl_cqm_threshold) @@ -336,7 +339,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) if (rr->first) resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid); - rr->err = resctrl_arch_rmid_read(rmid, rr->evtid, &tval); + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, rmid, rr->evtid, &tval); if (rr->err) return rr->err; diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index efe60dd7fd21..7ccfa0d1bb34 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -219,7 +219,23 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, u32 closid, enum resctrl_conf_type type); int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); -int resctrl_arch_rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *res); + +/** + * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid + * for this resource and domain. + * @r: resource that the counter should be read from. + * @d: domain that the counter should be read from. + * @rmid: rmid of the counter to read. + * @eventid: eventid to read, e.g. L3 occupancy. + * @val: result of the counter read in chunks. + * + * Call from process context on a CPU that belongs to domain @d. + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + u32 rmid, enum resctrl_event_id eventid, u64 *val); /** * resctrl_arch_reset_rmid() - Reset any private state associated with rmid -- cgit v1.2.3 From ae2328b52962531c2d7c6b531022a3eb2d680f17 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:27 +0000 Subject: x86/resctrl: Rename and change the units of resctrl_cqm_threshold resctrl_cqm_threshold is stored in a hardware specific chunk size, but exposed to user-space as bytes. This means the filesystem parts of resctrl need to know how the hardware counts, to convert the user provided byte value to chunks. The interface between the architecture's resctrl code and the filesystem ought to treat everything as bytes. Change the unit of resctrl_cqm_threshold to bytes. resctrl_arch_rmid_read() still returns its value in chunks, so this needs converting to bytes. As all the users have been touched, rename the variable to resctrl_rmid_realloc_threshold, which describes what the value is for. Neither r->num_rmid nor hw_res->mon_scale are guaranteed to be a power of 2, so the existing code introduces a rounding error from resctrl's theoretical fraction of the cache usage. This behaviour is kept as it ensures the user visible value matches the value read from hardware when the rmid will be reallocated. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-20-james.morse@arm.com --- arch/x86/include/asm/resctrl.h | 9 +++++++ arch/x86/kernel/cpu/resctrl/internal.h | 1 - arch/x86/kernel/cpu/resctrl/monitor.c | 43 ++++++++++++++++++++-------------- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 9 ++----- include/linux/resctrl.h | 2 ++ 5 files changed, 39 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h index d60ed0668a59..d24b04ebf950 100644 --- a/arch/x86/include/asm/resctrl.h +++ b/arch/x86/include/asm/resctrl.h @@ -81,6 +81,15 @@ static void __resctrl_sched_in(void) } } +static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) +{ + unsigned int scale = boot_cpu_data.x86_cache_occ_scale; + + /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ + val /= scale; + return val * scale; +} + static inline void resctrl_sched_in(void) { if (static_branch_likely(&rdt_enable_key)) diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index bdb55c2fbdd3..c05e9b7cf77a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -98,7 +98,6 @@ struct rmid_read { u64 val; }; -extern unsigned int resctrl_cqm_threshold; extern bool rdt_alloc_capable; extern bool rdt_mon_capable; extern unsigned int rdt_mon_features; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 27bb4947a176..e91afe99b763 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -17,7 +17,10 @@ #include #include + #include +#include + #include "internal.h" struct rmid_entry { @@ -37,8 +40,8 @@ static LIST_HEAD(rmid_free_lru); * @rmid_limbo_count count of currently unused but (potentially) * dirty RMIDs. * This counts RMIDs that no one is currently using but that - * may have a occupancy value > intel_cqm_threshold. User can change - * the threshold occupancy value. + * may have a occupancy value > resctrl_rmid_realloc_threshold. User can + * change the threshold occupancy value. */ static unsigned int rmid_limbo_count; @@ -59,10 +62,10 @@ bool rdt_mon_capable; unsigned int rdt_mon_features; /* - * This is the threshold cache occupancy at which we will consider an + * This is the threshold cache occupancy in bytes at which we will consider an * RMID available for re-allocation. */ -unsigned int resctrl_cqm_threshold; +unsigned int resctrl_rmid_realloc_threshold; #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) @@ -223,14 +226,13 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, */ void __check_limbo(struct rdt_domain *d, bool force_free) { + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rmid_entry *entry; - struct rdt_resource *r; u32 crmid = 1, nrmid; bool rmid_dirty; u64 val = 0; - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - /* * Skip RMID 0 and start from RMID 1 and check all the RMIDs that * are marked as busy for occupancy < threshold. If the occupancy @@ -245,10 +247,12 @@ void __check_limbo(struct rdt_domain *d, bool force_free) entry = __rmid_entry(nrmid); if (resctrl_arch_rmid_read(r, d, entry->rmid, - QOS_L3_OCCUP_EVENT_ID, &val)) + QOS_L3_OCCUP_EVENT_ID, &val)) { rmid_dirty = true; - else - rmid_dirty = (val >= resctrl_cqm_threshold); + } else { + val *= hw_res->mon_scale; + rmid_dirty = (val >= resctrl_rmid_realloc_threshold); + } if (force_free || !rmid_dirty) { clear_bit(entry->rmid, d->rmid_busy_llc); @@ -289,13 +293,12 @@ int alloc_rmid(void) static void add_rmid_to_limbo(struct rmid_entry *entry) { - struct rdt_resource *r; + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; + struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_domain *d; int cpu, err; u64 val = 0; - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - entry->busy = 0; cpu = get_cpu(); list_for_each_entry(d, &r->domains, list) { @@ -303,7 +306,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) err = resctrl_arch_rmid_read(r, d, entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val); - if (err || val <= resctrl_cqm_threshold) + val *= hw_res->mon_scale; + if (err || val <= resctrl_rmid_realloc_threshold) continue; } @@ -744,6 +748,7 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); unsigned int cl_size = boot_cpu_data.x86_cache_size; + unsigned int threshold; int ret; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; @@ -762,10 +767,14 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. */ - resctrl_cqm_threshold = cl_size * 1024 / r->num_rmid; + threshold = cl_size * 1024 / r->num_rmid; - /* h/w works in units of "boot_cpu_data.x86_cache_occ_scale" */ - resctrl_cqm_threshold /= hw_res->mon_scale; + /* + * Because num_rmid may not be a power of two, round the value + * to the nearest multiple of hw_res->mon_scale so it matches a + * value the hardware will measure. mon_scale may not be a power of 2. + */ + resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(threshold); ret = dom_data_init(r); if (ret) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 6c33dfe7ea53..849bdec37217 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1030,10 +1030,7 @@ static int rdt_delay_linear_show(struct kernfs_open_file *of, static int max_threshold_occ_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { - struct rdt_resource *r = of->kn->parent->priv; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - - seq_printf(seq, "%u\n", resctrl_cqm_threshold * hw_res->mon_scale); + seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold); return 0; } @@ -1055,7 +1052,6 @@ static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of, static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct rdt_hw_resource *hw_res; unsigned int bytes; int ret; @@ -1066,8 +1062,7 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, if (bytes > (boot_cpu_data.x86_cache_size * 1024)) return -EINVAL; - hw_res = resctrl_to_arch_res(of->kn->parent->priv); - resctrl_cqm_threshold = bytes / hw_res->mon_scale; + resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); return nbytes; } diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 7ccfa0d1bb34..9995d043650a 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -250,4 +250,6 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, u32 rmid, enum resctrl_event_id eventid); +extern unsigned int resctrl_rmid_realloc_threshold; + #endif /* _RESCTRL_H */ -- cgit v1.2.3 From d80975e264c8f01518890f3d91ab5bada8fa7f5e Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:28 +0000 Subject: x86/resctrl: Add resctrl_rmid_realloc_limit to abstract x86's boot_cpu_data resctrl_rmid_realloc_threshold can be set by user-space. The maximum value is specified by the architecture. Currently max_threshold_occ_write() reads the maximum value from boot_cpu_data.x86_cache_size, which is not portable to another architecture. Add resctrl_rmid_realloc_limit to describe the maximum size in bytes that user-space can set the threshold to. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-21-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/monitor.c | 9 +++++++-- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 2 +- include/linux/resctrl.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index e91afe99b763..8d15568d7121 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -67,6 +67,11 @@ unsigned int rdt_mon_features; */ unsigned int resctrl_rmid_realloc_threshold; +/* + * This is the maximum value for the reallocation threshold, in bytes. + */ +unsigned int resctrl_rmid_realloc_limit; + #define CF(cf) ((unsigned long)(1048576 * (cf) + 0.5)) /* @@ -747,10 +752,10 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) { unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); - unsigned int cl_size = boot_cpu_data.x86_cache_size; unsigned int threshold; int ret; + resctrl_rmid_realloc_limit = boot_cpu_data.x86_cache_size * 1024; hw_res->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; hw_res->mbm_width = MBM_CNTR_WIDTH_BASE; @@ -767,7 +772,7 @@ int rdt_get_mon_l3_config(struct rdt_resource *r) * * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC. */ - threshold = cl_size * 1024 / r->num_rmid; + threshold = resctrl_rmid_realloc_limit / r->num_rmid; /* * Because num_rmid may not be a power of two, round the value diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 849bdec37217..e5a48f05e787 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -1059,7 +1059,7 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, if (ret) return ret; - if (bytes > (boot_cpu_data.x86_cache_size * 1024)) + if (bytes > resctrl_rmid_realloc_limit) return -EINVAL; resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 9995d043650a..cb857f753322 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -251,5 +251,6 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, u32 rmid, enum resctrl_event_id eventid); extern unsigned int resctrl_rmid_realloc_threshold; +extern unsigned int resctrl_rmid_realloc_limit; #endif /* _RESCTRL_H */ -- cgit v1.2.3 From f7b1843eca6fe295ba0c71fc02a3291954078f2b Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 2 Sep 2022 15:48:29 +0000 Subject: x86/resctrl: Make resctrl_arch_rmid_read() return values in bytes resctrl_arch_rmid_read() returns a value in chunks, as read from the hardware. This needs scaling to bytes by mon_scale, as provided by the architecture code. Now that resctrl_arch_rmid_read() performs the overflow and corrections itself, it may as well return a value in bytes directly. This allows the accesses to the architecture specific 'hw' structure to be removed. Move the mon_scale conversion into resctrl_arch_rmid_read(). mbm_bw_count() is updated to calculate bandwidth from bytes. Signed-off-by: James Morse Signed-off-by: Borislav Petkov Reviewed-by: Jamie Iles Reviewed-by: Shaopeng Tan Reviewed-by: Reinette Chatre Tested-by: Xin Hao Tested-by: Shaopeng Tan Tested-by: Cristian Marussi Link: https://lore.kernel.org/r/20220902154829.30399-22-james.morse@arm.com --- arch/x86/kernel/cpu/resctrl/ctrlmondata.c | 6 ++---- arch/x86/kernel/cpu/resctrl/internal.h | 4 ++-- arch/x86/kernel/cpu/resctrl/monitor.c | 24 +++++++++++------------- include/linux/resctrl.h | 2 +- 4 files changed, 16 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 42a1abb378f0..1dafbdc5ac31 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -549,7 +549,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, int rdtgroup_mondata_show(struct seq_file *m, void *arg) { struct kernfs_open_file *of = m->private; - struct rdt_hw_resource *hw_res; u32 resid, evtid, domid; struct rdtgroup *rdtgrp; struct rdt_resource *r; @@ -569,8 +568,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) domid = md.u.domid; evtid = md.u.evtid; - hw_res = &rdt_resources_all[resid]; - r = &hw_res->r_resctrl; + r = &rdt_resources_all[resid].r_resctrl; d = rdt_find_domain(r, domid, NULL); if (IS_ERR_OR_NULL(d)) { ret = -ENOENT; @@ -584,7 +582,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) else if (rr.err == -EINVAL) seq_puts(m, "Unavailable\n"); else - seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale); + seq_printf(m, "%llu\n", rr.val); out: rdtgroup_kn_unlock(of->kn); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c05e9b7cf77a..5f7128686cfd 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -279,13 +279,13 @@ struct rftype { /** * struct mbm_state - status for each MBM counter in each domain - * @prev_bw_chunks: Previous chunks value read for bandwidth calculation + * @prev_bw_bytes: Previous bytes value read for bandwidth calculation * @prev_bw: The most recent bandwidth in MBps * @delta_bw: Difference between the current and previous bandwidth * @delta_comp: Indicates whether to compute the delta_bw */ struct mbm_state { - u64 prev_bw_chunks; + u64 prev_bw_bytes; u32 prev_bw; u32 delta_bw; bool delta_comp; diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 8d15568d7121..efe0c30d3a12 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -16,6 +16,7 @@ */ #include +#include #include #include @@ -189,7 +190,7 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; - u64 msr_val; + u64 msr_val, chunks; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) return -EINVAL; @@ -214,12 +215,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, if (am) { am->chunks += mbm_overflow_count(am->prev_msr, msr_val, hw_res->mbm_width); - *val = get_corrected_mbm_count(rmid, am->chunks); + chunks = get_corrected_mbm_count(rmid, am->chunks); am->prev_msr = msr_val; } else { - *val = msr_val; + chunks = msr_val; } + *val = chunks * hw_res->mon_scale; + return 0; } @@ -232,7 +235,6 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, void __check_limbo(struct rdt_domain *d, bool force_free) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rmid_entry *entry; u32 crmid = 1, nrmid; bool rmid_dirty; @@ -255,7 +257,6 @@ void __check_limbo(struct rdt_domain *d, bool force_free) QOS_L3_OCCUP_EVENT_ID, &val)) { rmid_dirty = true; } else { - val *= hw_res->mon_scale; rmid_dirty = (val >= resctrl_rmid_realloc_threshold); } @@ -299,7 +300,6 @@ int alloc_rmid(void) static void add_rmid_to_limbo(struct rmid_entry *entry) { struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl; - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r); struct rdt_domain *d; int cpu, err; u64 val = 0; @@ -311,7 +311,6 @@ static void add_rmid_to_limbo(struct rmid_entry *entry) err = resctrl_arch_rmid_read(r, d, entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val); - val *= hw_res->mon_scale; if (err || val <= resctrl_rmid_realloc_threshold) continue; } @@ -403,15 +402,14 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) */ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) { - struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r); struct mbm_state *m = &rr->d->mbm_local[rmid]; - u64 cur_bw, chunks, cur_chunks; + u64 cur_bw, bytes, cur_bytes; - cur_chunks = rr->val; - chunks = cur_chunks - m->prev_bw_chunks; - m->prev_bw_chunks = cur_chunks; + cur_bytes = rr->val; + bytes = cur_bytes - m->prev_bw_bytes; + m->prev_bw_bytes = cur_bytes; - cur_bw = (chunks * hw_res->mon_scale) >> 20; + cur_bw = bytes / SZ_1M; if (m->delta_comp) m->delta_bw = abs(cur_bw - m->prev_bw); diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index cb857f753322..0cf5b20c6ddf 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -227,7 +227,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); * @d: domain that the counter should be read from. * @rmid: rmid of the counter to read. * @eventid: eventid to read, e.g. L3 occupancy. - * @val: result of the counter read in chunks. + * @val: result of the counter read in bytes. * * Call from process context on a CPU that belongs to domain @d. * -- cgit v1.2.3