summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-record.txt6
-rw-r--r--tools/perf/builtin-probe.c2
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-top.c4
-rw-r--r--tools/perf/builtin-trace.c10
-rw-r--r--tools/perf/config/Makefile14
-rw-r--r--tools/perf/config/feature-checks/Makefile3
-rw-r--r--tools/perf/config/feature-checks/test-all.c4
-rw-r--r--tools/perf/tests/parse-events.c3
-rw-r--r--tools/perf/ui/browser.c4
-rw-r--r--tools/perf/ui/browsers/hists.c11
-rw-r--r--tools/perf/util/event.c6
-rw-r--r--tools/perf/util/evlist.c6
-rw-r--r--tools/perf/util/evsel.c4
-rw-r--r--tools/perf/util/evsel.h5
-rw-r--r--tools/perf/util/machine.c2
-rw-r--r--tools/perf/util/target.h1
-rw-r--r--tools/perf/util/unwind.c9
-rw-r--r--tools/perf/util/unwind.h5
-rw-r--r--tools/power/cpupower/man/cpupower-idle-info.13
-rw-r--r--tools/power/cpupower/man/cpupower-idle-set.171
-rw-r--r--tools/power/cpupower/utils/helpers/sysfs.c4
-rw-r--r--tools/power/x86/turbostat/turbostat.c197
23 files changed, 279 insertions, 97 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 052f7c4dc00c..43b42c4f4a91 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -201,6 +201,12 @@ abort events and some memory events in precise mode on modern Intel CPUs.
--transaction::
Record transaction flags for transaction related events.
+--force-per-cpu::
+Force the use of per-cpu mmaps. By default, when tasks are specified (i.e. -p,
+-t or -u options) per-thread mmaps are created. This option overrides that and
+forces per-cpu mmaps. A side-effect of that is that inheritance is
+automatically enabled. Add the -i option also to disable inheritance.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 89acc17cf2a0..6ea9e85bdc00 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -325,6 +325,8 @@ int cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
opt_set_filter),
OPT_CALLBACK('x', "exec", NULL, "executable|path",
"target executable name or path", opt_set_target),
+ OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+ "Disable symbol demangling"),
OPT_END()
};
int ret;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4d644fe2d5b7..7c8020a32784 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -888,6 +888,8 @@ const struct option record_options[] = {
"sample by weight (on special events only)"),
OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
"sample transaction flags (special events only)"),
+ OPT_BOOLEAN(0, "force-per-cpu", &record.opts.target.force_per_cpu,
+ "force the use of per-cpu mmaps"),
OPT_END()
};
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b8f8e29db332..71e6402729a8 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1172,7 +1172,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
status = target__validate(target);
if (status) {
target__strerror(target, status, errbuf, BUFSIZ);
- ui__warning("%s", errbuf);
+ ui__warning("%s\n", errbuf);
}
status = target__parse_uid(target);
@@ -1180,7 +1180,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
int saved_errno = errno;
target__strerror(target, status, errbuf, BUFSIZ);
- ui__error("%s", errbuf);
+ ui__error("%s\n", errbuf);
status = -saved_errno;
goto out_delete_evlist;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 6b230af940e2..8be17fc462ba 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2112,9 +2112,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
printed += fprintf(fp, "\n");
- printed += fprintf(fp, " msec/call\n");
- printed += fprintf(fp, " syscall calls min avg max stddev\n");
- printed += fprintf(fp, " --------------- -------- -------- -------- -------- ------\n");
+ printed += fprintf(fp, " syscall calls min avg max stddev\n");
+ printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
+ printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
/* each int_node is a syscall */
while (inode) {
@@ -2131,9 +2131,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace,
sc = &trace->syscalls.table[inode->i];
printed += fprintf(fp, " %-15s", sc->name);
- printed += fprintf(fp, " %8" PRIu64 " %8.3f %8.3f",
+ printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
n, min, avg);
- printed += fprintf(fp, " %8.3f %6.2f\n", max, pct);
+ printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
}
inode = intlist__next(inode);
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index f5905f2b197d..f7d11a811c74 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -142,7 +142,8 @@ CORE_FEATURE_TESTS = \
libunwind \
on-exit \
stackprotector \
- stackprotector-all
+ stackprotector-all \
+ timerfd
#
# So here we detect whether test-all was rebuilt, to be able
@@ -328,8 +329,14 @@ ifndef NO_LIBUNWIND
msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1);
NO_LIBUNWIND := 1
else
- ifneq ($(feature-libunwind-debug-frame), 1)
- msg := $(warning No debug_frame support found in libunwind);
+ ifeq ($(ARCH),arm)
+ $(call feature_check,libunwind-debug-frame)
+ ifneq ($(feature-libunwind-debug-frame), 1)
+ msg := $(warning No debug_frame support found in libunwind);
+ CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+ endif
+ else
+ # non-ARM has no dwarf_find_debug_frame() function:
CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
endif
endif
@@ -405,7 +412,6 @@ else
endif
endif
-$(call feature_check,timerfd)
ifeq ($(feature-timerfd), 1)
CFLAGS += -DHAVE_TIMERFD_SUPPORT
else
diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile
index e8e195f49a4e..87e790017c69 100644
--- a/tools/perf/config/feature-checks/Makefile
+++ b/tools/perf/config/feature-checks/Makefile
@@ -76,6 +76,9 @@ test-libnuma:
test-libunwind:
$(BUILD) $(LIBUNWIND_LIBS) -lelf
+test-libunwind-debug-frame:
+ $(BUILD) $(LIBUNWIND_LIBS) -lelf
+
test-libaudit:
$(BUILD) -laudit
diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c
index 799865b60772..59e7a705e146 100644
--- a/tools/perf/config/feature-checks/test-all.c
+++ b/tools/perf/config/feature-checks/test-all.c
@@ -49,10 +49,6 @@
# include "test-libunwind.c"
#undef main
-#define main main_test_libunwind_debug_frame
-# include "test-libunwind-debug-frame.c"
-#undef main
-
#define main main_test_libaudit
# include "test-libaudit.c"
#undef main
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index ef671cd41bb3..3cbd10496087 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -441,9 +441,8 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist)
static int test__checkevent_pmu_events(struct perf_evlist *evlist)
{
- struct perf_evsel *evsel;
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
- evsel = list_entry(evlist->entries.next, struct perf_evsel, node);
TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
TEST_ASSERT_VAL("wrong exclude_user",
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index bbc782e364b0..cbaa7af45513 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -569,7 +569,7 @@ void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence)
browser->top = browser->top + browser->top_idx + offset;
break;
case SEEK_END:
- browser->top = browser->top + browser->nr_entries + offset;
+ browser->top = browser->top + browser->nr_entries - 1 + offset;
break;
default:
return;
@@ -680,7 +680,7 @@ static void __ui_browser__line_arrow_down(struct ui_browser *browser,
if (end >= browser->top_idx + browser->height)
end_row = browser->height - 1;
else
- end_row = end - browser->top_idx;;
+ end_row = end - browser->top_idx;
ui_browser__gotorc(browser, row, column);
SLsmg_draw_vline(end_row - row + 1);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 16848bb4c418..a440e03cd8c2 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1847,15 +1847,15 @@ browse_hists:
switch (key) {
case K_TAB:
if (pos->node.next == &evlist->entries)
- pos = list_entry(evlist->entries.next, struct perf_evsel, node);
+ pos = perf_evlist__first(evlist);
else
- pos = list_entry(pos->node.next, struct perf_evsel, node);
+ pos = perf_evsel__next(pos);
goto browse_hists;
case K_UNTAB:
if (pos->node.prev == &evlist->entries)
- pos = list_entry(evlist->entries.prev, struct perf_evsel, node);
+ pos = perf_evlist__last(evlist);
else
- pos = list_entry(pos->node.prev, struct perf_evsel, node);
+ pos = perf_evsel__prev(pos);
goto browse_hists;
case K_ESC:
if (!ui_browser__dialog_yesno(&menu->b,
@@ -1943,8 +1943,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
single_entry:
if (nr_entries == 1) {
- struct perf_evsel *first = list_entry(evlist->entries.next,
- struct perf_evsel, node);
+ struct perf_evsel *first = perf_evlist__first(evlist);
const char *ev_name = perf_evsel__name(first);
return perf_evsel__hists_browse(first, nr_entries, help,
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 6e3a846aed0e..bb788c109fe6 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -209,8 +209,10 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
&event->mmap.start, &event->mmap.len, prot,
&event->mmap.pgoff,
execname);
-
- if (n != 5)
+ /*
+ * Anon maps don't have the execname.
+ */
+ if (n < 4)
continue;
/*
* Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index dc6fa3fbb180..bbc746aa5716 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -819,7 +819,9 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
if (evlist->threads == NULL)
return -1;
- if (target__has_task(target))
+ if (target->force_per_cpu)
+ evlist->cpus = cpu_map__new(target->cpu_list);
+ else if (target__has_task(target))
evlist->cpus = cpu_map__dummy_new();
else if (!target__has_cpu(target) && !target->uses_mmap)
evlist->cpus = cpu_map__dummy_new();
@@ -1148,7 +1150,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
perf_evsel__name(evsel));
}
- return printed + fprintf(fp, "\n");;
+ return printed + fprintf(fp, "\n");
}
int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 18f7c188ff63..46dd4c2a41ce 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -645,7 +645,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
}
}
- if (target__has_cpu(&opts->target))
+ if (target__has_cpu(&opts->target) || opts->target.force_per_cpu)
perf_evsel__set_sample_bit(evsel, CPU);
if (opts->period)
@@ -653,7 +653,7 @@ void perf_evsel__config(struct perf_evsel *evsel,
if (!perf_missing_features.sample_id_all &&
(opts->sample_time || !opts->no_inherit ||
- target__has_cpu(&opts->target)))
+ target__has_cpu(&opts->target) || opts->target.force_per_cpu))
perf_evsel__set_sample_bit(evsel, TIME);
if (opts->raw_samples) {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index f5029653dcd7..1ea7c92e6e33 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -279,6 +279,11 @@ static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)
return list_entry(evsel->node.next, struct perf_evsel, node);
}
+static inline struct perf_evsel *perf_evsel__prev(struct perf_evsel *evsel)
+{
+ return list_entry(evsel->node.prev, struct perf_evsel, node);
+}
+
/**
* perf_evsel__is_group_leader - Return whether given evsel is a leader event
*
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 0393912d8033..84cdb072ac83 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1368,7 +1368,7 @@ int machine__resolve_callchain(struct machine *machine,
return unwind__get_entries(unwind_entry, &callchain_cursor, machine,
thread, evsel->attr.sample_regs_user,
- sample);
+ sample, max_stack);
}
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 89bab7129de4..2d0c50690892 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -12,6 +12,7 @@ struct target {
uid_t uid;
bool system_wide;
bool uses_mmap;
+ bool force_per_cpu;
};
enum target_errno {
diff --git a/tools/perf/util/unwind.c b/tools/perf/util/unwind.c
index 5390d0b8862a..0efd5393de85 100644
--- a/tools/perf/util/unwind.c
+++ b/tools/perf/util/unwind.c
@@ -559,7 +559,7 @@ static unw_accessors_t accessors = {
};
static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
- void *arg)
+ void *arg, int max_stack)
{
unw_addr_space_t addr_space;
unw_cursor_t c;
@@ -575,7 +575,7 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
if (ret)
display_error(ret);
- while (!ret && (unw_step(&c) > 0)) {
+ while (!ret && (unw_step(&c) > 0) && max_stack--) {
unw_word_t ip;
unw_get_reg(&c, UNW_REG_IP, &ip);
@@ -588,7 +588,8 @@ static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct machine *machine, struct thread *thread,
- u64 sample_uregs, struct perf_sample *data)
+ u64 sample_uregs, struct perf_sample *data,
+ int max_stack)
{
unw_word_t ip;
struct unwind_info ui = {
@@ -610,5 +611,5 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
if (ret)
return -ENOMEM;
- return get_entries(&ui, cb, arg);
+ return get_entries(&ui, cb, arg, max_stack);
}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
index ec0c71a2ca2e..d5966f49e22c 100644
--- a/tools/perf/util/unwind.h
+++ b/tools/perf/util/unwind.h
@@ -18,7 +18,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
struct machine *machine,
struct thread *thread,
u64 sample_uregs,
- struct perf_sample *data);
+ struct perf_sample *data, int max_stack);
int unwind__arch_reg_id(int regnum);
#else
static inline int
@@ -27,7 +27,8 @@ unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
struct machine *machine __maybe_unused,
struct thread *thread __maybe_unused,
u64 sample_uregs __maybe_unused,
- struct perf_sample *data __maybe_unused)
+ struct perf_sample *data __maybe_unused,
+ int max_stack __maybe_unused)
{
return 0;
}
diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1
index 4178effd9e99..7b3646adb92f 100644
--- a/tools/power/cpupower/man/cpupower-idle-info.1
+++ b/tools/power/cpupower/man/cpupower-idle-info.1
@@ -87,4 +87,5 @@ Thomas Renninger <trenn@suse.de>
.fi
.SH "SEE ALSO"
.LP
-cpupower(1), cpupower\-monitor(1), cpupower\-info(1), cpupower\-set(1)
+cpupower(1), cpupower\-monitor(1), cpupower\-info(1), cpupower\-set(1),
+cpupower\-idle\-set(1)
diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
new file mode 100644
index 000000000000..6b1607272a5b
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-idle-set.1
@@ -0,0 +1,71 @@
+.TH "CPUPOWER-IDLE-SET" "1" "0.1" "" "cpupower Manual"
+.SH "NAME"
+.LP
+cpupower idle\-set \- Utility to set cpu idle state specific kernel options
+.SH "SYNTAX"
+.LP
+cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
+.SH "DESCRIPTION"
+.LP
+The cpupower idle\-set subcommand allows to set cpu idle, also called cpu
+sleep state, specific options offered by the kernel. One example is disabling
+sleep states. This can be handy for power vs performance tuning.
+.SH "OPTIONS"
+.LP
+.TP
+\fB\-d\fR \fB\-\-disable\fR
+Disable a specific processor sleep state.
+.TP
+\fB\-e\fR \fB\-\-enable\fR
+Enable a specific processor sleep state.
+
+.SH "REMARKS"
+.LP
+Cpuidle Governors Policy on Disabling Sleep States
+
+.RS 4
+Depending on the used cpuidle governor, implementing the kernel policy
+how to choose sleep states, subsequent sleep states on this core, might get
+disabled as well.
+
+There are two cpuidle governors ladder and menu. While the ladder
+governor is always available, if CONFIG_CPU_IDLE is selected, the
+menu governor additionally requires CONFIG_NO_HZ.
+
+The behavior and the effect of the disable variable depends on the
+implementation of a particular governor. In the ladder governor, for
+example, it is not coherent, i.e. if one is disabling a light state,
+then all deeper states are disabled as well. Likewise, if one enables a
+deep state but a lighter state still is disabled, then this has no effect.
+.RE
+.LP
+Disabling the Lightest Sleep State may not have any Affect
+
+.RS 4
+If criteria are not met to enter deeper sleep states and the lightest sleep
+state is chosen when idle, the kernel may still enter this sleep state,
+irrespective of whether it is disabled or not. This is also reflected in
+the usage count of the disabled sleep state when using the cpupower idle-info
+command.
+.RE
+.LP
+Selecting specific CPU Cores
+
+.RS 4
+By default processor sleep states of all CPU cores are set. Please refer
+to the cpupower(1) manpage in the \-\-cpu option section how to disable
+C-states of specific cores.
+.RE
+.SH "FILES"
+.nf
+\fI/sys/devices/system/cpu/cpu*/cpuidle/state*\fP
+\fI/sys/devices/system/cpu/cpuidle/*\fP
+.fi
+.SH "AUTHORS"
+.nf
+Thomas Renninger <trenn@suse.de>
+.fi
+.SH "SEE ALSO"
+.LP
+cpupower(1), cpupower\-monitor(1), cpupower\-info(1), cpupower\-set(1),
+cpupower\-idle\-info(1)
diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c
index 5cdc600e8152..851c7a16ca49 100644
--- a/tools/power/cpupower/utils/helpers/sysfs.c
+++ b/tools/power/cpupower/utils/helpers/sysfs.c
@@ -278,7 +278,7 @@ static char *sysfs_idlestate_get_one_string(unsigned int cpu,
int sysfs_is_idlestate_disabled(unsigned int cpu,
unsigned int idlestate)
{
- if (sysfs_get_idlestate_count(cpu) < idlestate)
+ if (sysfs_get_idlestate_count(cpu) <= idlestate)
return -1;
if (!sysfs_idlestate_file_exists(cpu, idlestate,
@@ -303,7 +303,7 @@ int sysfs_idlestate_disable(unsigned int cpu,
char value[SYSFS_PATH_MAX];
int bytes_written;
- if (sysfs_get_idlestate_count(cpu) < idlestate)
+ if (sysfs_get_idlestate_count(cpu) <= idlestate)
return -1;
if (!sysfs_idlestate_file_exists(cpu, idlestate,
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index fe702076ca46..9d77f13c2d25 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2,7 +2,7 @@
* turbostat -- show CPU frequency and C-state residency
* on modern Intel turbo-capable processors.
*
- * Copyright (c) 2012 Intel Corporation.
+ * Copyright (c) 2013 Intel Corporation.
* Len Brown <len.brown@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
@@ -47,6 +47,8 @@ unsigned int skip_c1;
unsigned int do_nhm_cstates;
unsigned int do_snb_cstates;
unsigned int do_c8_c9_c10;
+unsigned int do_slm_cstates;
+unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
unsigned int units = 1000000000; /* Ghz etc */
@@ -81,6 +83,8 @@ double rapl_joule_counter_range;
#define RAPL_DRAM (1 << 3)
#define RAPL_PKG_PERF_STATUS (1 << 4)
#define RAPL_DRAM_PERF_STATUS (1 << 5)
+#define RAPL_PKG_POWER_INFO (1 << 6)
+#define RAPL_CORE_POLICY (1 << 7)
#define TJMAX_DEFAULT 100
#define MAX(a, b) ((a) > (b) ? (a) : (b))
@@ -96,7 +100,7 @@ struct thread_data {
unsigned long long tsc;
unsigned long long aperf;
unsigned long long mperf;
- unsigned long long c1; /* derived */
+ unsigned long long c1;
unsigned long long extra_msr64;
unsigned long long extra_delta64;
unsigned long long extra_msr32;
@@ -266,7 +270,7 @@ void print_header(void)
outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
if (do_nhm_cstates)
outp += sprintf(outp, " %%c1");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%c3");
if (do_nhm_cstates)
outp += sprintf(outp, " %%c6");
@@ -280,9 +284,9 @@ void print_header(void)
if (do_snb_cstates)
outp += sprintf(outp, " %%pc2");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%pc3");
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %%pc6");
if (do_snb_cstates)
outp += sprintf(outp, " %%pc7");
@@ -480,7 +484,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * c->c3/t->tsc);
if (do_nhm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * c->c6/t->tsc);
@@ -499,9 +503,9 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_snb_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc3/t->tsc);
- if (do_nhm_cstates)
+ if (do_nhm_cstates && !do_slm_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
if (do_snb_cstates)
outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
@@ -648,17 +652,24 @@ delta_thread(struct thread_data *new, struct thread_data *old,
}
- /*
- * As counter collection is not atomic,
- * it is possible for mperf's non-halted cycles + idle states
- * to exceed TSC's all cycles: show c1 = 0% in that case.
- */
- if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
- old->c1 = 0;
- else {
- /* normal case, derive c1 */
- old->c1 = old->tsc - old->mperf - core_delta->c3
+ if (use_c1_residency_msr) {
+ /*
+ * Some models have a dedicated C1 residency MSR,
+ * which should be more accurate than the derivation below.
+ */
+ } else {
+ /*
+ * As counter collection is not atomic,
+ * it is possible for mperf's non-halted cycles + idle states
+ * to exceed TSC's all cycles: show c1 = 0% in that case.
+ */
+ if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc)
+ old->c1 = 0;
+ else {
+ /* normal case, derive c1 */
+ old->c1 = old->tsc - old->mperf - core_delta->c3
- core_delta->c6 - core_delta->c7;
+ }
}
if (old->mperf == 0) {
@@ -872,13 +883,21 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
return -5;
+ if (use_c1_residency_msr) {
+ if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+ return -6;
+ }
+
/* collect core counters only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates) {
if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
return -6;
+ }
+
+ if (do_nhm_cstates) {
if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
return -7;
}
@@ -898,7 +917,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- if (do_nhm_cstates) {
+ if (do_nhm_cstates && !do_slm_cstates) {
if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
return -9;
if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
@@ -977,7 +996,7 @@ void print_verbose_header(void)
ratio, bclk, ratio * bclk);
get_msr(0, MSR_IA32_POWER_CTL, &msr);
- fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E: %sabled)\n",
+ fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
msr, msr & 0x2 ? "EN" : "DIS");
if (!do_ivt_turbo_ratio_limit)
@@ -1046,25 +1065,28 @@ print_nhm_turbo_ratio_limits:
switch(msr & 0x7) {
case 0:
- fprintf(stderr, "pc0");
+ fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0");
break;
case 1:
- fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
+ fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0");
break;
case 2:
- fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
+ fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3");
break;
case 3:
- fprintf(stderr, "pc6");
+ fprintf(stderr, do_slm_cstates ? "invalid" : "pc6");
break;
case 4:
- fprintf(stderr, "pc7");
+ fprintf(stderr, do_slm_cstates ? "pc4" : "pc7");
break;
case 5:
- fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
+ fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid");
+ break;
+ case 6:
+ fprintf(stderr, do_slm_cstates ? "pc6" : "invalid");
break;
case 7:
- fprintf(stderr, "unlimited");
+ fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited");
break;
default:
fprintf(stderr, "invalid");
@@ -1460,6 +1482,8 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
case 0x3F: /* HSW */
case 0x45: /* HSW */
case 0x46: /* HSW */
+ case 0x37: /* BYT */
+ case 0x4D: /* AVN */
return 1;
case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */
@@ -1532,14 +1556,33 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
+double get_tdp(model)
+{
+ unsigned long long msr;
+
+ if (do_rapl & RAPL_PKG_POWER_INFO)
+ if (!get_msr(0, MSR_PKG_POWER_INFO, &msr))
+ return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+
+ switch (model) {
+ case 0x37:
+ case 0x4D:
+ return 30.0;
+ default:
+ return 135.0;
+ }
+}
+
+
/*
* rapl_probe()
*
- * sets do_rapl
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
*/
void rapl_probe(unsigned int family, unsigned int model)
{
unsigned long long msr;
+ unsigned int time_unit;
double tdp;
if (!genuine_intel)
@@ -1555,11 +1598,15 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x3F: /* HSW */
case 0x45: /* HSW */
case 0x46: /* HSW */
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
break;
case 0x2D:
case 0x3E:
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ break;
+ case 0x37: /* BYT */
+ case 0x4D: /* AVN */
+ do_rapl = RAPL_PKG | RAPL_CORES ;
break;
default:
return;
@@ -1570,19 +1617,22 @@ void rapl_probe(unsigned int family, unsigned int model)
return;
rapl_power_units = 1.0 / (1 << (msr & 0xF));
- rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
+ if (model == 0x37)
+ rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
+ else
+ rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- /* get TDP to determine energy counter range */
- if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
- return;
+ time_unit = msr >> 16 & 0xF;
+ if (time_unit == 0)
+ time_unit = 0xA;
- tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+ rapl_time_units = 1.0 / (1 << (time_unit));
- rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+ tdp = get_tdp(model);
+ rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (verbose)
- fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
+ fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
return;
}
@@ -1668,7 +1718,6 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
int cpu;
- double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
if (!do_rapl)
return 0;
@@ -1686,23 +1735,13 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
return -1;
- local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
- local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
- local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
-
- if (local_rapl_power_units != rapl_power_units)
- fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
- if (local_rapl_energy_units != rapl_energy_units)
- fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
- if (local_rapl_time_units != rapl_time_units)
- fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
-
if (verbose) {
fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
- local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
+ rapl_power_units, rapl_energy_units, rapl_time_units);
}
- if (do_rapl & RAPL_PKG) {
+ if (do_rapl & RAPL_PKG_POWER_INFO) {
+
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
return -5;
@@ -1714,6 +1753,9 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+ }
+ if (do_rapl & RAPL_PKG) {
+
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9;
@@ -1749,12 +1791,16 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
- if (do_rapl & RAPL_CORES) {
+ if (do_rapl & RAPL_CORE_POLICY) {
if (verbose) {
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+ }
+ }
+ if (do_rapl & RAPL_CORES) {
+ if (verbose) {
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
@@ -1813,10 +1859,48 @@ int has_c8_c9_c10(unsigned int family, unsigned int model)
}
+int is_slm(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case 0x37: /* BYT */
+ case 0x4D: /* AVN */
+ return 1;
+ }
+ return 0;
+}
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+
+double slm_bclk(void)
+{
+ unsigned long long msr = 3;
+ unsigned int i;
+ double freq;
+
+ if (get_msr(0, MSR_FSB_FREQ, &msr))
+ fprintf(stderr, "SLM BCLK: unknown\n");
+
+ i = msr & 0xf;
+ if (i >= SLM_BCLK_FREQS) {
+ fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+ msr = 3;
+ }
+ freq = slm_freq_table[i];
+
+ fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+
+ return freq;
+}
+
double discover_bclk(unsigned int family, unsigned int model)
{
if (is_snb(family, model))
return 100.00;
+ else if (is_slm(family, model))
+ return slm_bclk();
else
return 133.33;
}
@@ -1873,7 +1957,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
cpu, msr, target_c_local);
- if (target_c_local < 85 || target_c_local > 120)
+ if (target_c_local < 85 || target_c_local > 127)
goto guess;
tcc_activation_temp = target_c_local;
@@ -1970,6 +2054,7 @@ void check_cpuid()
do_smi = do_nhm_cstates;
do_snb_cstates = is_snb(family, model);
do_c8_c9_c10 = has_c8_c9_c10(family, model);
+ do_slm_cstates = is_slm(family, model);
bclk = discover_bclk(family, model);
do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
@@ -2331,7 +2416,7 @@ int main(int argc, char **argv)
cmdline(argc, argv);
if (verbose)
- fprintf(stderr, "turbostat v3.4 April 17, 2013"
+ fprintf(stderr, "turbostat v3.5 April 26, 2013"
" - Len Brown <lenb@kernel.org>\n");
turbostat_init();