diff options
Diffstat (limited to 'arch/x86/events/rapl.c')
-rw-r--r-- | arch/x86/events/rapl.c | 515 |
1 files changed, 285 insertions, 230 deletions
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index a481a939862e..defd86137f12 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -39,6 +39,10 @@ * event: rapl_energy_psys * perf code: 0x5 * + * core counter: consumption of a single physical core + * event: rapl_energy_core (power_core PMU) + * perf code: 0x1 + * * We manage those counters as free running (read-only). They may be * use simultaneously by other tools, such as turbostat. * @@ -61,6 +65,7 @@ #include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> +#include <asm/msr.h> #include "perf_event.h" #include "probe.h" @@ -70,18 +75,22 @@ MODULE_LICENSE("GPL"); /* * RAPL energy status counters */ -enum perf_rapl_events { +enum perf_rapl_pkg_events { PERF_RAPL_PP0 = 0, /* all cores */ PERF_RAPL_PKG, /* entire package */ PERF_RAPL_RAM, /* DRAM */ PERF_RAPL_PP1, /* gpu */ PERF_RAPL_PSYS, /* psys */ - PERF_RAPL_MAX, - NR_RAPL_DOMAINS = PERF_RAPL_MAX, + PERF_RAPL_PKG_EVENTS_MAX, + NR_RAPL_PKG_DOMAINS = PERF_RAPL_PKG_EVENTS_MAX, }; -static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { +#define PERF_RAPL_CORE 0 /* single core */ +#define PERF_RAPL_CORE_EVENTS_MAX 1 +#define NR_RAPL_CORE_DOMAINS PERF_RAPL_CORE_EVENTS_MAX + +static const char *const rapl_pkg_domain_names[NR_RAPL_PKG_DOMAINS] __initconst = { "pp0-core", "package", "dram", @@ -89,6 +98,8 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = { "psys", }; +static const char *const rapl_core_domain_name __initconst = "core"; + /* * event code: LSB 8 bits, passed in attr->config * any other bit is reserved @@ -112,7 +123,7 @@ static struct perf_pmu_events_attr event_attr_##v = { \ * considered as either pkg-scope or die-scope, and we are considering * them as die-scope. */ -#define rapl_pmu_is_pkg_scope() \ +#define rapl_pkg_pmu_is_pkg_scope() \ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) @@ -129,7 +140,8 @@ struct rapl_pmu { struct rapl_pmus { struct pmu pmu; unsigned int nr_rapl_pmu; - struct rapl_pmu *pmus[] __counted_by(nr_rapl_pmu); + unsigned int cntr_mask; + struct rapl_pmu *rapl_pmu[] __counted_by(nr_rapl_pmu); }; enum rapl_unit_quirk { @@ -139,67 +151,66 @@ enum rapl_unit_quirk { }; struct rapl_model { - struct perf_msr *rapl_msrs; - unsigned long events; + struct perf_msr *rapl_pkg_msrs; + struct perf_msr *rapl_core_msrs; + unsigned long pkg_events; + unsigned long core_events; unsigned int msr_power_unit; enum rapl_unit_quirk unit_quirk; }; /* 1/2^hw_unit Joule */ -static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; -static struct rapl_pmus *rapl_pmus; -static cpumask_t rapl_cpu_mask; -static unsigned int rapl_cntr_mask; +static int rapl_pkg_hw_unit[NR_RAPL_PKG_DOMAINS] __read_mostly; +static int rapl_core_hw_unit __read_mostly; +static struct rapl_pmus *rapl_pmus_pkg; +static struct rapl_pmus *rapl_pmus_core; static u64 rapl_timer_ms; -static struct perf_msr *rapl_msrs; +static struct rapl_model *rapl_model; /* - * Helper functions to get the correct topology macros according to the + * Helper function to get the correct topology id according to the * RAPL PMU scope. */ -static inline unsigned int get_rapl_pmu_idx(int cpu) -{ - return rapl_pmu_is_pkg_scope() ? topology_logical_package_id(cpu) : - topology_logical_die_id(cpu); -} - -static inline const struct cpumask *get_rapl_pmu_cpumask(int cpu) +static inline unsigned int get_rapl_pmu_idx(int cpu, int scope) { - return rapl_pmu_is_pkg_scope() ? topology_core_cpumask(cpu) : - topology_die_cpumask(cpu); -} - -static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) -{ - unsigned int rapl_pmu_idx = get_rapl_pmu_idx(cpu); - /* - * The unsigned check also catches the '-1' return value for non - * existent mappings in the topology map. + * Returns unsigned int, which converts the '-1' return value + * (for non-existent mappings in topology map) to UINT_MAX, so + * the error check in the caller is simplified. */ - return rapl_pmu_idx < rapl_pmus->nr_rapl_pmu ? rapl_pmus->pmus[rapl_pmu_idx] : NULL; + switch (scope) { + case PERF_PMU_SCOPE_PKG: + return topology_logical_package_id(cpu); + case PERF_PMU_SCOPE_DIE: + return topology_logical_die_id(cpu); + case PERF_PMU_SCOPE_CORE: + return topology_logical_core_id(cpu); + default: + return -EINVAL; + } } static inline u64 rapl_read_counter(struct perf_event *event) { u64 raw; - rdmsrl(event->hw.event_base, raw); + rdmsrq(event->hw.event_base, raw); return raw; } -static inline u64 rapl_scale(u64 v, int cfg) +static inline u64 rapl_scale(u64 v, struct perf_event *event) { - if (cfg > NR_RAPL_DOMAINS) { - pr_warn("Invalid domain %d, failed to scale data\n", cfg); - return v; - } + int hw_unit = rapl_pkg_hw_unit[event->hw.config - 1]; + + if (event->pmu->scope == PERF_PMU_SCOPE_CORE) + hw_unit = rapl_core_hw_unit; + /* * scale delta to smallest unit (1/2^32) * users must then scale back: count * 1/(1e9*2^32) to get Joules * or use ldexp(count, -32). * Watts = Joules/Time delta */ - return v << (32 - rapl_hw_unit[cfg - 1]); + return v << (32 - hw_unit); } static u64 rapl_event_update(struct perf_event *event) @@ -211,7 +222,7 @@ static u64 rapl_event_update(struct perf_event *event) prev_raw_count = local64_read(&hwc->prev_count); do { - rdmsrl(event->hw.event_base, new_raw_count); + rdmsrq(event->hw.event_base, new_raw_count); } while (!local64_try_cmpxchg(&hwc->prev_count, &prev_raw_count, new_raw_count)); @@ -226,7 +237,7 @@ static u64 rapl_event_update(struct perf_event *event) delta = (new_raw_count << shift) - (prev_raw_count << shift); delta >>= shift; - sdelta = rapl_scale(delta, event->hw.config); + sdelta = rapl_scale(delta, event); local64_add(sdelta, &event->count); @@ -241,34 +252,33 @@ static void rapl_start_hrtimer(struct rapl_pmu *pmu) static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) { - struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); + struct rapl_pmu *rapl_pmu = container_of(hrtimer, struct rapl_pmu, hrtimer); struct perf_event *event; unsigned long flags; - if (!pmu->n_active) + if (!rapl_pmu->n_active) return HRTIMER_NORESTART; - raw_spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); - list_for_each_entry(event, &pmu->active_list, active_entry) + list_for_each_entry(event, &rapl_pmu->active_list, active_entry) rapl_event_update(event); - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); - hrtimer_forward_now(hrtimer, pmu->timer_interval); + hrtimer_forward_now(hrtimer, rapl_pmu->timer_interval); return HRTIMER_RESTART; } -static void rapl_hrtimer_init(struct rapl_pmu *pmu) +static void rapl_hrtimer_init(struct rapl_pmu *rapl_pmu) { - struct hrtimer *hr = &pmu->hrtimer; + struct hrtimer *hr = &rapl_pmu->hrtimer; - hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hr->function = rapl_hrtimer_handle; + hrtimer_setup(hr, rapl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL); } -static void __rapl_pmu_event_start(struct rapl_pmu *pmu, +static void __rapl_pmu_event_start(struct rapl_pmu *rapl_pmu, struct perf_event *event) { if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) @@ -276,39 +286,39 @@ static void __rapl_pmu_event_start(struct rapl_pmu *pmu, event->hw.state = 0; - list_add_tail(&event->active_entry, &pmu->active_list); + list_add_tail(&event->active_entry, &rapl_pmu->active_list); local64_set(&event->hw.prev_count, rapl_read_counter(event)); - pmu->n_active++; - if (pmu->n_active == 1) - rapl_start_hrtimer(pmu); + rapl_pmu->n_active++; + if (rapl_pmu->n_active == 1) + rapl_start_hrtimer(rapl_pmu); } static void rapl_pmu_event_start(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = event->pmu_private; + struct rapl_pmu *rapl_pmu = event->pmu_private; unsigned long flags; - raw_spin_lock_irqsave(&pmu->lock, flags); - __rapl_pmu_event_start(pmu, event); - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); + __rapl_pmu_event_start(rapl_pmu, event); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); } static void rapl_pmu_event_stop(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = event->pmu_private; + struct rapl_pmu *rapl_pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - raw_spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); /* mark event as deactivated and stopped */ if (!(hwc->state & PERF_HES_STOPPED)) { - WARN_ON_ONCE(pmu->n_active <= 0); - pmu->n_active--; - if (pmu->n_active == 0) - hrtimer_cancel(&pmu->hrtimer); + WARN_ON_ONCE(rapl_pmu->n_active <= 0); + rapl_pmu->n_active--; + if (rapl_pmu->n_active == 0) + hrtimer_cancel(&rapl_pmu->hrtimer); list_del(&event->active_entry); @@ -326,23 +336,23 @@ static void rapl_pmu_event_stop(struct perf_event *event, int mode) hwc->state |= PERF_HES_UPTODATE; } - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); } static int rapl_pmu_event_add(struct perf_event *event, int mode) { - struct rapl_pmu *pmu = event->pmu_private; + struct rapl_pmu *rapl_pmu = event->pmu_private; struct hw_perf_event *hwc = &event->hw; unsigned long flags; - raw_spin_lock_irqsave(&pmu->lock, flags); + raw_spin_lock_irqsave(&rapl_pmu->lock, flags); hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; if (mode & PERF_EF_START) - __rapl_pmu_event_start(pmu, event); + __rapl_pmu_event_start(rapl_pmu, event); - raw_spin_unlock_irqrestore(&pmu->lock, flags); + raw_spin_unlock_irqrestore(&rapl_pmu->lock, flags); return 0; } @@ -355,13 +365,19 @@ static void rapl_pmu_event_del(struct perf_event *event, int flags) static int rapl_pmu_event_init(struct perf_event *event) { u64 cfg = event->attr.config & RAPL_EVENT_MASK; - int bit, ret = 0; - struct rapl_pmu *pmu; + int bit, rapl_pmus_scope, ret = 0; + struct rapl_pmu *rapl_pmu; + unsigned int rapl_pmu_idx; + struct rapl_pmus *rapl_pmus; /* only look at RAPL events */ - if (event->attr.type != rapl_pmus->pmu.type) + if (event->attr.type != event->pmu->type) return -ENOENT; + /* unsupported modes and filters */ + if (event->attr.sample_period) /* no sampling */ + return -EINVAL; + /* check only supported bits are set */ if (event->attr.config & ~RAPL_EVENT_MASK) return -EINVAL; @@ -369,29 +385,41 @@ static int rapl_pmu_event_init(struct perf_event *event) if (event->cpu < 0) return -EINVAL; - event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; - - if (!cfg || cfg >= NR_RAPL_DOMAINS + 1) + rapl_pmus = container_of(event->pmu, struct rapl_pmus, pmu); + if (!rapl_pmus) + return -EINVAL; + rapl_pmus_scope = rapl_pmus->pmu.scope; + + if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) { + cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); + if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) + return -EINVAL; + + bit = cfg - 1; + event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; + } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) { + cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1); + if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) + return -EINVAL; + + bit = cfg - 1; + event->hw.event_base = rapl_model->rapl_core_msrs[bit].msr; + } else return -EINVAL; - - cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1); - bit = cfg - 1; /* check event supported */ - if (!(rapl_cntr_mask & (1 << bit))) + if (!(rapl_pmus->cntr_mask & (1 << bit))) return -EINVAL; - /* unsupported modes and filters */ - if (event->attr.sample_period) /* no sampling */ + rapl_pmu_idx = get_rapl_pmu_idx(event->cpu, rapl_pmus_scope); + if (rapl_pmu_idx >= rapl_pmus->nr_rapl_pmu) return -EINVAL; - /* must be done before validate_group */ - pmu = cpu_to_rapl_pmu(event->cpu); - if (!pmu) + rapl_pmu = rapl_pmus->rapl_pmu[rapl_pmu_idx]; + if (!rapl_pmu) return -EINVAL; - event->cpu = pmu->cpu; - event->pmu_private = pmu; - event->hw.event_base = rapl_msrs[bit].msr; + + event->pmu_private = rapl_pmu; event->hw.config = cfg; event->hw.idx = bit; @@ -403,34 +431,19 @@ static void rapl_pmu_event_read(struct perf_event *event) rapl_event_update(event); } -static ssize_t rapl_get_attr_cpumask(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask); -} - -static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); - -static struct attribute *rapl_pmu_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group rapl_pmu_attr_group = { - .attrs = rapl_pmu_attrs, -}; - RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); RAPL_EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); RAPL_EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05"); +RAPL_EVENT_ATTR_STR(energy-core, rapl_core, "event=0x01"); RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_psys_unit, "Joules"); +RAPL_EVENT_ATTR_STR(energy-core.unit, rapl_core_unit, "Joules"); /* * we compute in 0.23 nJ increments regardless of MSR @@ -440,6 +453,7 @@ RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890 RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_psys_scale, "2.3283064365386962890625e-10"); +RAPL_EVENT_ATTR_STR(energy-core.scale, rapl_core_scale, "2.3283064365386962890625e-10"); /* * There are no default events, but we need to create @@ -467,7 +481,12 @@ static struct attribute_group rapl_pmu_format_group = { }; static const struct attribute_group *rapl_attr_groups[] = { - &rapl_pmu_attr_group, + &rapl_pmu_format_group, + &rapl_pmu_events_group, + NULL, +}; + +static const struct attribute_group *rapl_core_attr_groups[] = { &rapl_pmu_format_group, &rapl_pmu_events_group, NULL, @@ -533,6 +552,18 @@ static struct attribute_group rapl_events_psys_group = { .attrs = rapl_events_psys, }; +static struct attribute *rapl_events_core[] = { + EVENT_PTR(rapl_core), + EVENT_PTR(rapl_core_unit), + EVENT_PTR(rapl_core_scale), + NULL, +}; + +static struct attribute_group rapl_events_core_group = { + .name = "events", + .attrs = rapl_events_core, +}; + static bool test_msr(int idx, void *data) { return test_bit(idx, (unsigned long *) data); @@ -558,11 +589,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { }; /* - * Force to PERF_RAPL_MAX size due to: - * - perf_msr_probe(PERF_RAPL_MAX) + * Force to PERF_RAPL_PKG_EVENTS_MAX size due to: + * - perf_msr_probe(PERF_RAPL_PKG_EVENTS_MAX) * - want to use same event codes across both architectures */ -static struct perf_msr amd_rapl_msrs[] = { +static struct perf_msr amd_rapl_pkg_msrs[] = { [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 }, [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 }, @@ -570,77 +601,25 @@ static struct perf_msr amd_rapl_msrs[] = { [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, }; -static int rapl_cpu_offline(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - /* Check if exiting cpu is used for collecting rapl events */ - if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) - return 0; - - pmu->cpu = -1; - /* Find a new cpu to collect rapl events */ - target = cpumask_any_but(get_rapl_pmu_cpumask(cpu), cpu); - - /* Migrate rapl events to the new target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &rapl_cpu_mask); - pmu->cpu = target; - perf_pmu_migrate_context(pmu->pmu, cpu, target); - } - return 0; -} - -static int rapl_cpu_online(unsigned int cpu) -{ - s32 rapl_pmu_idx = get_rapl_pmu_idx(cpu); - if (rapl_pmu_idx < 0) { - pr_err("topology_logical_(package/die)_id() returned a negative value"); - return -EINVAL; - } - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - int target; - - if (!pmu) { - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - rapl_hrtimer_init(pmu); - - rapl_pmus->pmus[rapl_pmu_idx] = pmu; - } - - /* - * Check if there is an online cpu in the package which collects rapl - * events already. - */ - target = cpumask_any_and(&rapl_cpu_mask, get_rapl_pmu_cpumask(cpu)); - if (target < nr_cpu_ids) - return 0; - - cpumask_set_cpu(cpu, &rapl_cpu_mask); - pmu->cpu = cpu; - return 0; -} +static struct perf_msr amd_rapl_core_msrs[] = { + [PERF_RAPL_CORE] = { MSR_AMD_CORE_ENERGY_STATUS, &rapl_events_core_group, + test_msr, false, RAPL_MSR_MASK }, +}; -static int rapl_check_hw_unit(struct rapl_model *rm) +static int rapl_check_hw_unit(void) { u64 msr_rapl_power_unit_bits; int i; - /* protect rdmsrl() to handle virtualization */ - if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits)) + /* protect rdmsrq() to handle virtualization */ + if (rdmsrq_safe(rapl_model->msr_power_unit, &msr_rapl_power_unit_bits)) return -1; - for (i = 0; i < NR_RAPL_DOMAINS; i++) - rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) + rapl_pkg_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; - switch (rm->unit_quirk) { + rapl_core_hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; + + switch (rapl_model->unit_quirk) { /* * DRAM domain on HSW server and KNL has fixed energy unit which can be * different than the unit from power unit MSR. See @@ -648,17 +627,16 @@ static int rapl_check_hw_unit(struct rapl_model *rm) * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ case RAPL_UNIT_QUIRK_INTEL_HSW: - rapl_hw_unit[PERF_RAPL_RAM] = 16; + rapl_pkg_hw_unit[PERF_RAPL_RAM] = 16; break; /* SPR uses a fixed energy unit for Psys domain. */ case RAPL_UNIT_QUIRK_INTEL_SPR: - rapl_hw_unit[PERF_RAPL_PSYS] = 0; + rapl_pkg_hw_unit[PERF_RAPL_PSYS] = 0; break; default: break; } - /* * Calculate the timer rate: * Use reference of 200W for scaling the timeout to avoid counter @@ -667,9 +645,9 @@ static int rapl_check_hw_unit(struct rapl_model *rm) * if hw unit is 32, then we use 2 ms 1/200/2 */ rapl_timer_ms = 2; - if (rapl_hw_unit[0] < 32) { + if (rapl_pkg_hw_unit[0] < 32) { rapl_timer_ms = (1000 / (2 * 100)); - rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1)); + rapl_timer_ms *= (1ULL << (32 - rapl_pkg_hw_unit[0] - 1)); } return 0; } @@ -677,24 +655,32 @@ static int rapl_check_hw_unit(struct rapl_model *rm) static void __init rapl_advertise(void) { int i; + int num_counters = hweight32(rapl_pmus_pkg->cntr_mask); + + if (rapl_pmus_core) + num_counters += hweight32(rapl_pmus_core->cntr_mask); pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n", - hweight32(rapl_cntr_mask), rapl_timer_ms); + num_counters, rapl_timer_ms); - for (i = 0; i < NR_RAPL_DOMAINS; i++) { - if (rapl_cntr_mask & (1 << i)) { + for (i = 0; i < NR_RAPL_PKG_DOMAINS; i++) { + if (rapl_pmus_pkg->cntr_mask & (1 << i)) { pr_info("hw unit of domain %s 2^-%d Joules\n", - rapl_domain_names[i], rapl_hw_unit[i]); + rapl_pkg_domain_names[i], rapl_pkg_hw_unit[i]); } } + + if (rapl_pmus_core && (rapl_pmus_core->cntr_mask & (1 << PERF_RAPL_CORE))) + pr_info("hw unit of domain %s 2^-%d Joules\n", + rapl_core_domain_name, rapl_core_hw_unit); } -static void cleanup_rapl_pmus(void) +static void cleanup_rapl_pmus(struct rapl_pmus *rapl_pmus) { int i; for (i = 0; i < rapl_pmus->nr_rapl_pmu; i++) - kfree(rapl_pmus->pmus[i]); + kfree(rapl_pmus->rapl_pmu[i]); kfree(rapl_pmus); } @@ -707,17 +693,61 @@ static const struct attribute_group *rapl_attr_update[] = { NULL, }; -static int __init init_rapl_pmus(void) +static const struct attribute_group *rapl_core_attr_update[] = { + &rapl_events_core_group, + NULL, +}; + +static int __init init_rapl_pmu(struct rapl_pmus *rapl_pmus) +{ + struct rapl_pmu *rapl_pmu; + int idx; + + for (idx = 0; idx < rapl_pmus->nr_rapl_pmu; idx++) { + rapl_pmu = kzalloc(sizeof(*rapl_pmu), GFP_KERNEL); + if (!rapl_pmu) + goto free; + + raw_spin_lock_init(&rapl_pmu->lock); + INIT_LIST_HEAD(&rapl_pmu->active_list); + rapl_pmu->pmu = &rapl_pmus->pmu; + rapl_pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(rapl_pmu); + + rapl_pmus->rapl_pmu[idx] = rapl_pmu; + } + + return 0; +free: + for (; idx > 0; idx--) + kfree(rapl_pmus->rapl_pmu[idx - 1]); + return -ENOMEM; +} + +static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_scope, + const struct attribute_group **rapl_attr_groups, + const struct attribute_group **rapl_attr_update) { int nr_rapl_pmu = topology_max_packages(); + struct rapl_pmus *rapl_pmus; + int ret; - if (!rapl_pmu_is_pkg_scope()) - nr_rapl_pmu *= topology_max_dies_per_package(); + /* + * rapl_pmu_scope must be either PKG, DIE or CORE + */ + if (rapl_pmu_scope == PERF_PMU_SCOPE_DIE) + nr_rapl_pmu *= topology_max_dies_per_package(); + else if (rapl_pmu_scope == PERF_PMU_SCOPE_CORE) + nr_rapl_pmu *= topology_num_cores_per_package(); + else if (rapl_pmu_scope != PERF_PMU_SCOPE_PKG) + return -EINVAL; - rapl_pmus = kzalloc(struct_size(rapl_pmus, pmus, nr_rapl_pmu), GFP_KERNEL); + rapl_pmus = kzalloc(struct_size(rapl_pmus, rapl_pmu, nr_rapl_pmu), GFP_KERNEL); if (!rapl_pmus) return -ENOMEM; + *rapl_pmus_ptr = rapl_pmus; + rapl_pmus->nr_rapl_pmu = nr_rapl_pmu; rapl_pmus->pmu.attr_groups = rapl_attr_groups; rapl_pmus->pmu.attr_update = rapl_attr_update; @@ -728,77 +758,85 @@ static int __init init_rapl_pmus(void) rapl_pmus->pmu.start = rapl_pmu_event_start; rapl_pmus->pmu.stop = rapl_pmu_event_stop; rapl_pmus->pmu.read = rapl_pmu_event_read; + rapl_pmus->pmu.scope = rapl_pmu_scope; rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; - return 0; + + ret = init_rapl_pmu(rapl_pmus); + if (ret) + kfree(rapl_pmus); + + return ret; } static struct rapl_model model_snb = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_PP1), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_snbep = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_hsw = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_hsx = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_knl = { - .events = BIT(PERF_RAPL_PKG) | + .pkg_events = BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .unit_quirk = RAPL_UNIT_QUIRK_INTEL_HSW, .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_skl = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1) | BIT(PERF_RAPL_PSYS), .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_msrs, + .rapl_pkg_msrs = intel_rapl_msrs, }; static struct rapl_model model_spr = { - .events = BIT(PERF_RAPL_PP0) | + .pkg_events = BIT(PERF_RAPL_PP0) | BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PSYS), .unit_quirk = RAPL_UNIT_QUIRK_INTEL_SPR, .msr_power_unit = MSR_RAPL_POWER_UNIT, - .rapl_msrs = intel_rapl_spr_msrs, + .rapl_pkg_msrs = intel_rapl_spr_msrs, }; static struct rapl_model model_amd_hygon = { - .events = BIT(PERF_RAPL_PKG), + .pkg_events = BIT(PERF_RAPL_PKG), + .core_events = BIT(PERF_RAPL_CORE), .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, - .rapl_msrs = amd_rapl_msrs, + .rapl_pkg_msrs = amd_rapl_pkg_msrs, + .rapl_core_msrs = amd_rapl_core_msrs, }; static const struct x86_cpu_id rapl_model_match[] __initconst = { @@ -846,6 +884,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl), {}, }; @@ -854,57 +893,73 @@ MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); static int __init rapl_pmu_init(void) { const struct x86_cpu_id *id; - struct rapl_model *rm; + int rapl_pkg_pmu_scope = PERF_PMU_SCOPE_DIE; int ret; + if (rapl_pkg_pmu_is_pkg_scope()) + rapl_pkg_pmu_scope = PERF_PMU_SCOPE_PKG; + id = x86_match_cpu(rapl_model_match); if (!id) return -ENODEV; - rm = (struct rapl_model *) id->driver_data; - - rapl_msrs = rm->rapl_msrs; + rapl_model = (struct rapl_model *) id->driver_data; - rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, - false, (void *) &rm->events); - - ret = rapl_check_hw_unit(rm); + ret = rapl_check_hw_unit(); if (ret) return ret; - ret = init_rapl_pmus(); + ret = init_rapl_pmus(&rapl_pmus_pkg, rapl_pkg_pmu_scope, rapl_attr_groups, + rapl_attr_update); if (ret) return ret; - /* - * Install callbacks. Core will call them for each online cpu. - */ - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, - "perf/x86/rapl:online", - rapl_cpu_online, rapl_cpu_offline); + rapl_pmus_pkg->cntr_mask = perf_msr_probe(rapl_model->rapl_pkg_msrs, + PERF_RAPL_PKG_EVENTS_MAX, false, + (void *) &rapl_model->pkg_events); + + ret = perf_pmu_register(&rapl_pmus_pkg->pmu, "power", -1); if (ret) goto out; - ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); - if (ret) - goto out1; + if (rapl_model->core_events) { + ret = init_rapl_pmus(&rapl_pmus_core, PERF_PMU_SCOPE_CORE, + rapl_core_attr_groups, + rapl_core_attr_update); + if (ret) { + pr_warn("power-core PMU initialization failed (%d)\n", ret); + goto core_init_failed; + } + + rapl_pmus_core->cntr_mask = perf_msr_probe(rapl_model->rapl_core_msrs, + PERF_RAPL_CORE_EVENTS_MAX, false, + (void *) &rapl_model->core_events); + + ret = perf_pmu_register(&rapl_pmus_core->pmu, "power_core", -1); + if (ret) { + pr_warn("power-core PMU registration failed (%d)\n", ret); + cleanup_rapl_pmus(rapl_pmus_core); + } + } +core_init_failed: rapl_advertise(); return 0; -out1: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); - cleanup_rapl_pmus(); + cleanup_rapl_pmus(rapl_pmus_pkg); return ret; } module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - perf_pmu_unregister(&rapl_pmus->pmu); - cleanup_rapl_pmus(); + if (rapl_pmus_core) { + perf_pmu_unregister(&rapl_pmus_core->pmu); + cleanup_rapl_pmus(rapl_pmus_core); + } + perf_pmu_unregister(&rapl_pmus_pkg->pmu); + cleanup_rapl_pmus(rapl_pmus_pkg); } module_exit(intel_rapl_exit); |