summaryrefslogtreecommitdiff
path: root/tools/power/x86/turbostat
diff options
context:
space:
mode:
Diffstat (limited to 'tools/power/x86/turbostat')
-rw-r--r--tools/power/x86/turbostat/Makefile3
-rw-r--r--tools/power/x86/turbostat/turbostat.c101
2 files changed, 71 insertions, 33 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 045f5f7d68ab..13f1e8b9ac52 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -9,9 +9,10 @@ ifeq ("$(origin O)", "command line")
endif
turbostat : turbostat.c
-override CFLAGS += -Wall -I../../../include
+override CFLAGS += -O2 -Wall -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
+override CFLAGS += -D_FORTIFY_SOURCE=2
%: %.c
@mkdir -p $(BUILD_OUTPUT)
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 75fc4fb9901c..b2a86438f074 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -39,7 +39,6 @@ FILE *outf;
int *fd_percpu;
struct timeval interval_tv = {5, 0};
struct timespec interval_ts = {5, 0};
-struct timespec one_msec = {0, 1000000};
unsigned int num_iterations;
unsigned int debug;
unsigned int quiet;
@@ -60,6 +59,7 @@ unsigned int do_irtl_hsw;
unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int authentic_amd;
+unsigned int hygon_genuine;
unsigned int max_level, max_extended_level;
unsigned int has_invariant_tsc;
unsigned int do_nhm_platform_info;
@@ -100,6 +100,7 @@ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int has_misc_feature_control;
unsigned int first_counter_read = 1;
+int ignore_stdin;
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -166,6 +167,7 @@ size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
struct thread_data {
struct timeval tv_begin;
struct timeval tv_end;
+ struct timeval tv_delta;
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
@@ -506,6 +508,7 @@ unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAU
unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
@@ -849,7 +852,6 @@ int dump_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
- outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
@@ -911,7 +913,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (DO_BIC(BIC_TOD))
outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
- interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+ interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0;
tsc = t->tsc * tsc_tweak;
@@ -1287,6 +1289,14 @@ delta_core(struct core_data *new, struct core_data *old)
}
}
+int soft_c1_residency_display(int bic)
+{
+ if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr)
+ return 0;
+
+ return DO_BIC_READ(bic);
+}
+
/*
* old = new - old
*/
@@ -1309,6 +1319,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
* over-write old w/ new so we can print end of interval values
*/
+ timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta);
old->tv_begin = new->tv_begin;
old->tv_end = new->tv_end;
@@ -1322,7 +1333,8 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->c1 = new->c1 - old->c1;
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
+ soft_c1_residency_display(BIC_Avg_MHz)) {
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
old->aperf = new->aperf - old->aperf;
old->mperf = new->mperf - old->mperf;
@@ -1404,6 +1416,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->tv_begin.tv_usec = 0;
t->tv_end.tv_sec = 0;
t->tv_end.tv_usec = 0;
+ t->tv_delta.tv_sec = 0;
+ t->tv_delta.tv_usec = 0;
t->tsc = 0;
t->aperf = 0;
@@ -1573,6 +1587,9 @@ void compute_average(struct thread_data *t, struct core_data *c,
for_all_cpus(sum_counters, t, c, p);
+ /* Use the global time delta for the average. */
+ average.threads.tv_delta = tv_delta;
+
average.threads.tsc /= topo.num_cpus;
average.threads.aperf /= topo.num_cpus;
average.threads.mperf /= topo.num_cpus;
@@ -1714,7 +1731,7 @@ void get_apic_id(struct thread_data *t)
if (!DO_BIC(BIC_X2APIC))
return;
- if (authentic_amd) {
+ if (authentic_amd || hygon_genuine) {
unsigned int topology_extensions;
if (max_extended_level < 0x8000001e)
@@ -1762,19 +1779,20 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
struct msr_counter *mp;
int i;
- gettimeofday(&t->tv_begin, (struct timezone *)NULL);
-
if (cpu_migrate(cpu)) {
fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
+ gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
if (first_counter_read)
get_apic_id(t);
retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
+ soft_c1_residency_display(BIC_Avg_MHz)) {
unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
/*
@@ -1851,20 +1869,20 @@ retry:
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (DO_BIC(BIC_CPU_c3)) {
+ if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
}
- if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
+ if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
- } else if (do_knl_cstates) {
+ } else if (do_knl_cstates || soft_c1_residency_display(BIC_CPU_c6)) {
if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
return -7;
}
- if (DO_BIC(BIC_CPU_c7))
+ if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7))
if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
return -8;
@@ -2912,6 +2930,7 @@ int snapshot_cpu_lpi_us(void)
if (retval != 1) {
fprintf(stderr, "Disabling Low Power Idle CPU output\n");
BIC_NOT_PRESENT(BIC_CPU_LPI);
+ fclose(fp);
return -1;
}
@@ -2938,6 +2957,7 @@ int snapshot_sys_lpi_us(void)
if (retval != 1) {
fprintf(stderr, "Disabling Low Power Idle System output\n");
BIC_NOT_PRESENT(BIC_SYS_LPI);
+ fclose(fp);
return -1;
}
fclose(fp);
@@ -2985,8 +3005,6 @@ static void signal_handler (int signal)
fprintf(stderr, "SIGUSR1\n");
break;
}
- /* make sure this manually-invoked interval is at least 1ms long */
- nanosleep(&one_msec, NULL);
}
void setup_signal_handler(void)
@@ -3005,29 +3023,38 @@ void setup_signal_handler(void)
void do_sleep(void)
{
- struct timeval select_timeout;
+ struct timeval tout;
+ struct timespec rest;
fd_set readfds;
int retval;
FD_ZERO(&readfds);
FD_SET(0, &readfds);
- if (!isatty(fileno(stdin))) {
+ if (ignore_stdin) {
nanosleep(&interval_ts, NULL);
return;
}
- select_timeout = interval_tv;
- retval = select(1, &readfds, NULL, NULL, &select_timeout);
+ tout = interval_tv;
+ retval = select(1, &readfds, NULL, NULL, &tout);
if (retval == 1) {
switch (getc(stdin)) {
case 'q':
exit_requested = 1;
break;
+ case EOF:
+ /*
+ * 'stdin' is a pipe closed on the other end. There
+ * won't be any further input.
+ */
+ ignore_stdin = 1;
+ /* Sleep the rest of the time */
+ rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000);
+ rest.tv_nsec = (tout.tv_usec % 1000000) * 1000;
+ nanosleep(&rest, NULL);
}
- /* make sure this manually-invoked interval is at least 1ms long */
- nanosleep(&one_msec, NULL);
}
}
@@ -3209,6 +3236,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
break;
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_X: /* HSX */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
@@ -3405,6 +3433,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
case INTEL_FAM6_IVYBRIDGE: /* IVB */
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_X: /* HSX */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
@@ -3803,6 +3832,7 @@ double get_tdp_amd(unsigned int family)
{
switch (family) {
case 0x17:
+ case 0x18:
default:
/* This is the max stock TDP of HEDT/Server Fam17h chips */
return 250.0;
@@ -3841,6 +3871,7 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
case INTEL_FAM6_SANDYBRIDGE:
case INTEL_FAM6_IVYBRIDGE:
case INTEL_FAM6_HASWELL_CORE: /* HSW */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
@@ -3982,6 +4013,7 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
switch (family) {
case 0x17: /* Zen, Zen+ */
+ case 0x18: /* Hygon Dhyana */
do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
if (rapl_joules) {
BIC_PRESENT(BIC_Pkg_J);
@@ -4002,7 +4034,7 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f));
rapl_power_units = ldexp(1.0, -(msr & 0xf));
- tdp = get_tdp_amd(model);
+ tdp = get_tdp_amd(family);
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (!quiet)
@@ -4018,7 +4050,7 @@ void rapl_probe(unsigned int family, unsigned int model)
{
if (genuine_intel)
rapl_probe_intel(family, model);
- if (authentic_amd)
+ if (authentic_amd || hygon_genuine)
rapl_probe_amd(family, model);
}
@@ -4032,6 +4064,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
switch (model) {
case INTEL_FAM6_HASWELL_CORE: /* HSW */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
do_gfx_perf_limit_reasons = 1;
case INTEL_FAM6_HASWELL_X: /* HSX */
@@ -4251,6 +4284,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
case INTEL_FAM6_HASWELL_CORE: /* HSW */
case INTEL_FAM6_HASWELL_X: /* HSW */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_HASWELL_GT3E: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
@@ -4267,7 +4301,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
}
/*
- * HSW adds support for additional MSRs:
+ * HSW ULT added support for C8/C9/C10 MSRs:
*
* MSR_PKG_C8_RESIDENCY 0x00000630
* MSR_PKG_C9_RESIDENCY 0x00000631
@@ -4278,13 +4312,13 @@ int has_snb_msrs(unsigned int family, unsigned int model)
* MSR_PKGC10_IRTL 0x00000635
*
*/
-int has_hsw_msrs(unsigned int family, unsigned int model)
+int has_c8910_msrs(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
switch (model) {
- case INTEL_FAM6_HASWELL_CORE:
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
case INTEL_FAM6_BROADWELL_CORE: /* BDW */
case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
@@ -4568,9 +4602,6 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_XEON_PHI_KNM:
return INTEL_FAM6_XEON_PHI_KNL;
- case INTEL_FAM6_HASWELL_ULT:
- return INTEL_FAM6_HASWELL_CORE;
-
case INTEL_FAM6_BROADWELL_X:
case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
return INTEL_FAM6_BROADWELL_X;
@@ -4582,7 +4613,11 @@ unsigned int intel_model_duplicates(unsigned int model)
return INTEL_FAM6_SKYLAKE_MOBILE;
case INTEL_FAM6_ICELAKE_MOBILE:
+ case INTEL_FAM6_ICELAKE_NNPI:
return INTEL_FAM6_CANNONLAKE_MOBILE;
+
+ case INTEL_FAM6_ATOM_TREMONT_X:
+ return INTEL_FAM6_ATOM_GOLDMONT_X;
}
return model;
}
@@ -4600,6 +4635,8 @@ void process_cpuid()
genuine_intel = 1;
else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
authentic_amd = 1;
+ else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
+ hygon_genuine = 1;
if (!quiet)
fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
@@ -4820,12 +4857,12 @@ void process_cpuid()
BIC_NOT_PRESENT(BIC_CPU_c7);
BIC_NOT_PRESENT(BIC_Pkgpc7);
}
- if (has_hsw_msrs(family, model)) {
+ if (has_c8910_msrs(family, model)) {
BIC_PRESENT(BIC_Pkgpc8);
BIC_PRESENT(BIC_Pkgpc9);
BIC_PRESENT(BIC_Pkgpc10);
}
- do_irtl_hsw = has_hsw_msrs(family, model);
+ do_irtl_hsw = has_c8910_msrs(family, model);
if (has_skl_msrs(family, model)) {
BIC_PRESENT(BIC_Totl_c0);
BIC_PRESENT(BIC_Any_c0);
@@ -5123,7 +5160,7 @@ int initialize_counters(int cpu_id)
void allocate_output_buffer()
{
- output_buffer = calloc(1, (1 + topo.num_cpus) * 1024);
+ output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
outp = output_buffer;
if (outp == NULL)
err(-1, "calloc output buffer");
@@ -5269,7 +5306,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(outf, "turbostat version 19.03.20"
+ fprintf(outf, "turbostat version 19.08.31"
" - Len Brown <lenb@kernel.org>\n");
}