summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2017-06-24 08:11:52 +0300
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2017-06-27 02:47:32 +0300
commitf8475cef90082bf0902ddab106112de130d90395 (patch)
treec457befe40d6412c24fde174882524f1eac9b9c8 /arch/x86
parent1a4fe38add8be45eb3873ad756561b9baf6bcaef (diff)
downloadlinux-f8475cef90082bf0902ddab106112de130d90395.tar.xz
x86: use common aperfmperf_khz_on_cpu() to calculate KHz using APERF/MPERF
The goal of this change is to give users a uniform and meaningful result when they read /sys/...cpufreq/scaling_cur_freq on modern x86 hardware, as compared to what they get today. Modern x86 processors include the hardware needed to accurately calculate frequency over an interval -- APERF, MPERF, and the TSC. Here we provide an x86 routine to make this calculation on supported hardware, and use it in preference to any driver driver-specific cpufreq_driver.get() routine. MHz is computed like so: MHz = base_MHz * delta_APERF / delta_MPERF MHz is the average frequency of the busy processor over a measurement interval. The interval is defined to be the time between successive invocations of aperfmperf_khz_on_cpu(), which are expected to to happen on-demand when users read sysfs attribute cpufreq/scaling_cur_freq. As with previous methods of calculating MHz, idle time is excluded. base_MHz above is from TSC calibration global "cpu_khz". This x86 native method to calculate MHz returns a meaningful result no matter if P-states are controlled by hardware or firmware and/or if the Linux cpufreq sub-system is or is-not installed. When this routine is invoked more frequently, the measurement interval becomes shorter. However, the code limits re-computation to 10ms intervals so that average frequency remains meaningful. Discerning users are encouraged to take advantage of the turbostat(8) utility, which can gracefully handle concurrent measurement intervals of arbitrary length. Signed-off-by: Len Brown <len.brown@intel.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c79
2 files changed, 80 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 52000010c62e..cdf82492b770 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -21,6 +21,7 @@ obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
obj-y += bugs.o
+obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
new file mode 100644
index 000000000000..d869c8671e36
--- /dev/null
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -0,0 +1,79 @@
+/*
+ * x86 APERF/MPERF KHz calculation for
+ * /sys/.../cpufreq/scaling_cur_freq
+ *
+ * Copyright (C) 2017 Intel Corp.
+ * Author: Len Brown <len.brown@intel.com>
+ *
+ * This file is licensed under GPLv2.
+ */
+
+#include <linux/jiffies.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+
+struct aperfmperf_sample {
+ unsigned int khz;
+ unsigned long jiffies;
+ u64 aperf;
+ u64 mperf;
+};
+
+static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
+
+/*
+ * aperfmperf_snapshot_khz()
+ * On the current CPU, snapshot APERF, MPERF, and jiffies
+ * unless we already did it within 10ms
+ * calculate kHz, save snapshot
+ */
+static void aperfmperf_snapshot_khz(void *dummy)
+{
+ u64 aperf, aperf_delta;
+ u64 mperf, mperf_delta;
+ struct aperfmperf_sample *s = this_cpu_ptr(&samples);
+
+ /* Don't bother re-computing within 10 ms */
+ if (time_before(jiffies, s->jiffies + HZ/100))
+ return;
+
+ rdmsrl(MSR_IA32_APERF, aperf);
+ rdmsrl(MSR_IA32_MPERF, mperf);
+
+ aperf_delta = aperf - s->aperf;
+ mperf_delta = mperf - s->mperf;
+
+ /*
+ * There is no architectural guarantee that MPERF
+ * increments faster than we can read it.
+ */
+ if (mperf_delta == 0)
+ return;
+
+ /*
+ * if (cpu_khz * aperf_delta) fits into ULLONG_MAX, then
+ * khz = (cpu_khz * aperf_delta) / mperf_delta
+ */
+ if (div64_u64(ULLONG_MAX, cpu_khz) > aperf_delta)
+ s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+ else /* khz = aperf_delta / (mperf_delta / cpu_khz) */
+ s->khz = div64_u64(aperf_delta,
+ div64_u64(mperf_delta, cpu_khz));
+ s->jiffies = jiffies;
+ s->aperf = aperf;
+ s->mperf = mperf;
+}
+
+unsigned int arch_freq_get_on_cpu(int cpu)
+{
+ if (!cpu_khz)
+ return 0;
+
+ if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ return 0;
+
+ smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
+
+ return per_cpu(samples.khz, cpu);
+}