diff options
Diffstat (limited to 'tools')
428 files changed, 18773 insertions, 4314 deletions
diff --git a/tools/build/Build.include b/tools/build/Build.include new file mode 100644 index 000000000000..4c8daaccb82a --- /dev/null +++ b/tools/build/Build.include @@ -0,0 +1,81 @@ +### +# build: Generic definitions +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +### +# Convenient variables +comma := , +squote := ' + +### +# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o +dot-target = $(dir $@).$(notdir $@) + +### +# filename of target with directory and extension stripped +basetarget = $(basename $(notdir $@)) + +### +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(dot-target).d) + +### +# Check if both arguments has same arguments. Result is empty string if equal. +arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \ + $(filter-out $(cmd_$@), $(cmd_$(1))) ) + +### +# Escape single quote for use in echo statements +escsq = $(subst $(squote),'\$(squote)',$1) + +# Echo command +# Short version is used, if $(quiet) equals `quiet_', otherwise full one. +echo-cmd = $(if $($(quiet)cmd_$(1)),\ + echo ' $(call escsq,$($(quiet)cmd_$(1)))';) + +### +# Replace >$< with >$$< to preserve $ when reloading the .cmd file +# (needed for make) +# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# (needed for make) +# Replace >'< with >'\''< to be able to enclose the whole string in '...' +# (needed for the shell) +make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) + +### +# Find any prerequisites that is newer than target or that does not exist. +# PHONY targets skipped in both cases. +any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) + +### +# if_changed_dep - execute command if any prerequisite is newer than +# target, or command line has changed and update +# dependencies in the cmd file +if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + cat $(depfile) > $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + +# if_changed - execute command if any prerequisite is newer than +# target, or command line has changed +if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) + +### +# C flags to be used in rule definitions, includes: +# - depfile generation +# - global $(CFLAGS) +# - per target C flags +# - per object C flags +# - BUILD_STR macro to allow '-D"$(variable)"' constructs +c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt new file mode 100644 index 000000000000..00ad2d608727 --- /dev/null +++ b/tools/build/Documentation/Build.txt @@ -0,0 +1,139 @@ +Build Framework +=============== + +The perf build framework was adopted from the kernel build system, hence the +idea and the way how objects are built is the same. + +Basically the user provides set of 'Build' files that list objects and +directories to nest for specific target to be build. + +Unlike the kernel we don't have a single build object 'obj-y' list that where +we setup source objects, but we support more. This allows one 'Build' file to +carry a sources list for multiple build objects. + +a) Build framework makefiles +---------------------------- + +The build framework consists of 2 Makefiles: + + Build.include + Makefile.build + +While the 'Build.include' file contains just some generic definitions, the +'Makefile.build' file is the makefile used from the outside. It's +interface/usage is following: + + $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + +where: + + KSRC - is the path to kernel sources + DIR - is the path to the project to be built + OBJECT - is the name of the build object + +When succefully finished the $(DIR) directory contains the final object file +called $(OBJECT)-in.o: + + $ ls $(DIR)/$(OBJECT)-in.o + +which includes all compiled sources described in 'Build' makefiles. + +a) Build makefiles +------------------ + +The user supplies 'Build' makefiles that contains a objects list, and connects +the build to nested directories. + +Assume we have the following project structure: + + ex/a.c + /b.c + /c.c + /d.c + /arch/e.c + /arch/f.c + +Out of which you build the 'ex' binary ' and the 'libex.a' library: + + 'ex' - consists of 'a.o', 'b.o' and libex.a + 'libex.a' - consists of 'c.o', 'd.o', 'e.o' and 'f.o' + +The build framework does not create the 'ex' and 'libex.a' binaries for you, it +only prepares proper objects to be compiled and grouped together. + +To follow the above example, the user provides following 'Build' files: + + ex/Build: + ex-y += a.o + ex-y += b.o + + libex-y += c.o + libex-y += d.o + libex-y += arch/ + + ex/arch/Build: + libex-y += e.o + libex-y += f.o + +and runs: + + $ make -f tools/build/Makefile.build dir=. obj=ex + $ make -f tools/build/Makefile.build dir=. obj=libex + +which creates the following objects: + + ex/ex-in.o + ex/libex-in.o + +that contain request objects names in Build files. + +It's only a matter of 2 single commands to create the final binaries: + + $ ar rcs libex.a libex-in.o + $ gcc -o ex ex-in.o libex.a + +You can check the 'ex' example in 'tools/build/tests/ex' for more details. + +b) Rules +-------- + +The build framework provides standard compilation rules to handle .S and .c +compilation. + +It's possible to include special rule if needed (like we do for flex or bison +code generation). + +c) CFLAGS +--------- + +It's possible to alter the standard object C flags in the following way: + + CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object + CFLAGS_gtk += '...' - alters CFLAGS for gtk build object + +This C flags changes has the scope of the Build makefile they are defined in. + + +d) Dependencies +--------------- + +For each built object file 'a.o' the '.a.cmd' is created and holds: + + - Command line used to built that object + (for each object) + + - Dependency rules generated by 'gcc -Wp,-MD,...' + (for compiled object) + +All existing '.cmd' files are included in the Build process to follow properly +the dependencies and trigger a rebuild when necessary. + + +e) Single rules +--------------- + +It's possible to build single object file by choice, like: + + $ make util/map.o # objects + $ make util/map.i # preprocessor + $ make util/map.s # assembly diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build new file mode 100644 index 000000000000..10df57237a66 --- /dev/null +++ b/tools/build/Makefile.build @@ -0,0 +1,130 @@ +### +# Main build makefile. +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +PHONY := __build +__build: + +ifeq ($(V),1) + quiet = + Q = +else + quiet=quiet_ + Q=@ +endif + +build-dir := $(srctree)/tools/build + +# Generic definitions +include $(build-dir)/Build.include + +# do not force detected configuration +-include .config-detected + +# Init all relevant variables used in build files so +# 1) they have correct type +# 2) they do not inherit any value from the environment +subdir-y := +obj-y := +subdir-y := +subdir-obj-y := + +# Build definitions +build-file := $(dir)/Build +include $(build-file) + +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ + +# Create directory unless it exists +quiet_cmd_mkdir = MKDIR $(dir $@) + cmd_mkdir = mkdir -p $(dir $@) + rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) + +# Compile command +quiet_cmd_cc_o_c = CC $@ + cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< + +quiet_cmd_cc_i_c = CPP $@ + cmd_cc_i_c = $(CC) $(c_flags) -E -o $@ $< + +quiet_cmd_cc_s_c = AS $@ + cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< + +# Link agregate command +# If there's nothing to link, create empty $@ object. +quiet_cmd_ld_multi = LD $@ + cmd_ld_multi = $(if $(strip $(obj-y)),\ + $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + +# Build rules +$(OUTPUT)%.o: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.o: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.i: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.i: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.s: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_s_c) + +# Gather build data: +# obj-y - list of build objects +# subdir-y - list of directories to nest +# subdir-obj-y - list of directories objects 'dir/$(obj)-in.o' +obj-y := $($(obj)-y) +subdir-y := $(patsubst %/,%,$(filter %/, $(obj-y))) +obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) +subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) + +# '$(OUTPUT)/dir' prefix to all objects +prefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(prefix),$(obj-y)) +subdir-obj-y := $(addprefix $(prefix),$(subdir-obj-y)) + +# Final '$(obj)-in.o' object +in-target := $(prefix)$(obj)-in.o + +PHONY += $(subdir-y) + +$(subdir-y): + $(Q)$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj) + +$(sort $(subdir-obj-y)): $(subdir-y) ; + +$(in-target): $(obj-y) FORCE + $(call rule_mkdir) + $(call if_changed,ld_multi) + +__build: $(in-target) + @: + +PHONY += FORCE +FORCE: + +# Include all cmd files to get all the dependency rules +# for all objects included +targets := $(wildcard $(sort $(obj-y) $(in-target) $(MAKECMDGOALS))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif + +.PHONY: $(PHONY) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature new file mode 100644 index 000000000000..3a0b0ca2a28c --- /dev/null +++ b/tools/build/Makefile.feature @@ -0,0 +1,171 @@ +feature_dir := $(srctree)/tools/build/feature + +ifneq ($(OUTPUT),) + OUTPUT_FEATURES = $(OUTPUT)feature/ + $(shell mkdir -p $(OUTPUT_FEATURES)) +endif + +feature_check = $(eval $(feature_check_code)) +define feature_check_code + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) +endef + +feature_set = $(eval $(feature_set_code)) +define feature_set_code + feature-$(1) := 1 +endef + +# +# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: +# + +# +# Note that this is not a complete list of all feature tests, just +# those that are typically built on a fully configured system. +# +# [ Feature tests not mentioned here have to be built explicitly in +# the rule that uses them - an example for that is the 'bionic' +# feature check. ] +# +FEATURE_TESTS = \ + backtrace \ + dwarf \ + fortify-source \ + sync-compare-and-swap \ + glibc \ + gtk2 \ + gtk2-infobar \ + libaudit \ + libbfd \ + libelf \ + libelf-getphdrnum \ + libelf-mmap \ + libnuma \ + libperl \ + libpython \ + libpython-version \ + libslang \ + libunwind \ + pthread-attr-setaffinity-np \ + stackprotector-all \ + timerfd \ + libdw-dwarf-unwind \ + zlib \ + lzma + +FEATURE_DISPLAY = \ + dwarf \ + glibc \ + gtk2 \ + libaudit \ + libbfd \ + libelf \ + libnuma \ + libperl \ + libpython \ + libslang \ + libunwind \ + libdw-dwarf-unwind \ + zlib \ + lzma + +# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. +# If in the future we need per-feature checks/flags for features not +# mentioned in this list we need to refactor this ;-). +set_test_all_flags = $(eval $(set_test_all_flags_code)) +define set_test_all_flags_code + FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1)) + FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1)) +endef + +$(foreach feat,$(FEATURE_TESTS),$(call set_test_all_flags,$(feat))) + +# +# Special fast-path for the 'all features are available' case: +# +$(call feature_check,all,$(MSG)) + +# +# Just in case the build freshly failed, make sure we print the +# feature matrix: +# +ifeq ($(feature-all), 1) + # + # test-all.c passed - just set all the core feature flags to 1: + # + $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) +else + $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C $(feature_dir) $(addsuffix .bin,$(FEATURE_TESTS)) >/dev/null 2>&1) + $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) +endif + +# +# Print the result of the feature test: +# +feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG)) + +define feature_print_status_code + ifeq ($(feature-$(1)), 1) + MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) + else + MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) + endif +endef + +feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG)) +define feature_print_text_code + MSG = $(shell printf '...%30s: %s' $(1) $(2)) +endef + +FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat)))) +FEATURE_DUMP_FILE := $(shell touch $(OUTPUT)FEATURE-DUMP; cat $(OUTPUT)FEATURE-DUMP) + +ifeq ($(dwarf-post-unwind),1) + FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text)) +endif + +# The $(feature_display) controls the default detection message +# output. It's set if: +# - detected features differes from stored features from +# last build (in FEATURE-DUMP file) +# - one of the $(FEATURE_DISPLAY) is not detected +# - VF is enabled + +ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)") + $(shell echo "$(FEATURE_DUMP)" > $(OUTPUT)FEATURE-DUMP) + feature_display := 1 +endif + +feature_display_check = $(eval $(feature_check_code)) +define feature_display_check_code + ifneq ($(feature-$(1)), 1) + feature_display := 1 + endif +endef + +$(foreach feat,$(FEATURE_DISPLAY),$(call feature_display_check,$(feat))) + +ifeq ($(VF),1) + feature_display := 1 + feature_verbose := 1 +endif + +ifeq ($(feature_display),1) + $(info ) + $(info Auto-detecting system features:) + $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) + + ifeq ($(dwarf-post-unwind),1) + $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) + endif + + ifneq ($(feature_verbose),1) + $(info ) + endif +endif + +ifeq ($(feature_verbose),1) + TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) + $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) + $(info ) +endif diff --git a/tools/perf/config/feature-checks/.gitignore b/tools/build/feature/.gitignore index 80f3da0c3515..09b335b98842 100644 --- a/tools/perf/config/feature-checks/.gitignore +++ b/tools/build/feature/.gitignore @@ -1,2 +1,3 @@ *.d *.bin +*.output diff --git a/tools/perf/config/feature-checks/Makefile b/tools/build/feature/Makefile index 53f19b5dbc37..463ed8f2a267 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/build/feature/Makefile @@ -25,33 +25,40 @@ FILES= \ test-libslang.bin \ test-libunwind.bin \ test-libunwind-debug-frame.bin \ + test-pthread-attr-setaffinity-np.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ + test-libbabeltrace.bin \ test-compile-32.bin \ test-compile-x32.bin \ - test-zlib.bin + test-zlib.bin \ + test-lzma.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config all: $(FILES) -BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) +__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) + BUILD = $(__BUILD) > $(OUTPUT)$(@:.bin=.make.output) 2>&1 ############################### test-all.bin: - $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma test-hello.bin: $(BUILD) +test-pthread-attr-setaffinity-np.bin: + $(BUILD) -D_GNU_SOURCE -lpthread + test-stackprotector-all.bin: - $(BUILD) -Werror -fstack-protector-all + $(BUILD) -fstack-protector-all test-fortify-source.bin: - $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 + $(BUILD) -O2 -D_FORTIFY_SOURCE=2 test-bionic.bin: $(BUILD) @@ -114,10 +121,10 @@ test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl test-liberty.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty test-liberty-z.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz test-cplus-demangle.bin: $(BUILD) -liberty @@ -129,10 +136,13 @@ test-timerfd.bin: $(BUILD) test-libdw-dwarf-unwind.bin: - $(BUILD) + $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) + +test-libbabeltrace.bin: + $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) test-sync-compare-and-swap.bin: - $(BUILD) -Werror + $(BUILD) test-compile-32.bin: $(CC) -m32 -o $(OUTPUT)$@ test-compile.c @@ -143,9 +153,12 @@ test-compile-x32.bin: test-zlib.bin: $(BUILD) -lz +test-lzma.bin: + $(BUILD) -llzma + -include *.d ############################### clean: - rm -f $(FILES) *.d + rm -f $(FILES) *.d $(FILES:.bin=.make.output) diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/build/feature/test-all.c index 652e0098eba6..84689a67814a 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/build/feature/test-all.c @@ -97,6 +97,26 @@ # include "test-zlib.c" #undef main +#define main main_test_pthread_attr_setaffinity_np +# include "test-pthread-attr-setaffinity-np.c" +#undef main + +# if 0 +/* + * Disable libbabeltrace check for test-all, because the requested + * library version is not released yet in most distributions. Will + * reenable later. + */ + +#define main main_test_libbabeltrace +# include "test-libbabeltrace.c" +#undef main +#endif + +#define main main_test_lzma +# include "test-lzma.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -121,6 +141,8 @@ int main(int argc, char *argv[]) main_test_libdw_dwarf_unwind(); main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); + main_test_pthread_attr_setaffinity_np(); + main_test_lzma(); return 0; } diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/build/feature/test-backtrace.c index 7124aa1dc8fb..7124aa1dc8fb 100644 --- a/tools/perf/config/feature-checks/test-backtrace.c +++ b/tools/build/feature/test-backtrace.c diff --git a/tools/perf/config/feature-checks/test-bionic.c b/tools/build/feature/test-bionic.c index eac24e9513eb..eac24e9513eb 100644 --- a/tools/perf/config/feature-checks/test-bionic.c +++ b/tools/build/feature/test-bionic.c diff --git a/tools/perf/config/feature-checks/test-compile.c b/tools/build/feature/test-compile.c index 31dbf45bf99c..31dbf45bf99c 100644 --- a/tools/perf/config/feature-checks/test-compile.c +++ b/tools/build/feature/test-compile.c diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/build/feature/test-cplus-demangle.c index 610c686e0009..610c686e0009 100644 --- a/tools/perf/config/feature-checks/test-cplus-demangle.c +++ b/tools/build/feature/test-cplus-demangle.c diff --git a/tools/perf/config/feature-checks/test-dwarf.c b/tools/build/feature/test-dwarf.c index 3fc1801ce4a9..3fc1801ce4a9 100644 --- a/tools/perf/config/feature-checks/test-dwarf.c +++ b/tools/build/feature/test-dwarf.c diff --git a/tools/perf/config/feature-checks/test-fortify-source.c b/tools/build/feature/test-fortify-source.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-fortify-source.c +++ b/tools/build/feature/test-fortify-source.c diff --git a/tools/perf/config/feature-checks/test-glibc.c b/tools/build/feature/test-glibc.c index b0820345cd98..b0820345cd98 100644 --- a/tools/perf/config/feature-checks/test-glibc.c +++ b/tools/build/feature/test-glibc.c diff --git a/tools/perf/config/feature-checks/test-gtk2-infobar.c b/tools/build/feature/test-gtk2-infobar.c index 397b4646d066..397b4646d066 100644 --- a/tools/perf/config/feature-checks/test-gtk2-infobar.c +++ b/tools/build/feature/test-gtk2-infobar.c diff --git a/tools/perf/config/feature-checks/test-gtk2.c b/tools/build/feature/test-gtk2.c index 6bd80e509439..6bd80e509439 100644 --- a/tools/perf/config/feature-checks/test-gtk2.c +++ b/tools/build/feature/test-gtk2.c diff --git a/tools/perf/config/feature-checks/test-hello.c b/tools/build/feature/test-hello.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-hello.c +++ b/tools/build/feature/test-hello.c diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/build/feature/test-libaudit.c index afc019f08641..afc019f08641 100644 --- a/tools/perf/config/feature-checks/test-libaudit.c +++ b/tools/build/feature/test-libaudit.c diff --git a/tools/build/feature/test-libbabeltrace.c b/tools/build/feature/test-libbabeltrace.c new file mode 100644 index 000000000000..9cf802a04885 --- /dev/null +++ b/tools/build/feature/test-libbabeltrace.c @@ -0,0 +1,9 @@ + +#include <babeltrace/ctf-writer/writer.h> +#include <babeltrace/ctf-ir/stream-class.h> + +int main(void) +{ + bt_ctf_stream_class_get_packet_context_type((void *) 0); + return 0; +} diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/build/feature/test-libbfd.c index 24059907e990..24059907e990 100644 --- a/tools/perf/config/feature-checks/test-libbfd.c +++ b/tools/build/feature/test-libbfd.c diff --git a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c index f676a3ff442a..f676a3ff442a 100644 --- a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c +++ b/tools/build/feature/test-libdw-dwarf-unwind.c diff --git a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c b/tools/build/feature/test-libelf-getphdrnum.c index d710459306c3..d710459306c3 100644 --- a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c +++ b/tools/build/feature/test-libelf-getphdrnum.c diff --git a/tools/perf/config/feature-checks/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c index 564427d7ef18..564427d7ef18 100644 --- a/tools/perf/config/feature-checks/test-libelf-mmap.c +++ b/tools/build/feature/test-libelf-mmap.c diff --git a/tools/perf/config/feature-checks/test-libelf.c b/tools/build/feature/test-libelf.c index 08db322d8957..08db322d8957 100644 --- a/tools/perf/config/feature-checks/test-libelf.c +++ b/tools/build/feature/test-libelf.c diff --git a/tools/perf/config/feature-checks/test-libnuma.c b/tools/build/feature/test-libnuma.c index 4763d9cd587d..4763d9cd587d 100644 --- a/tools/perf/config/feature-checks/test-libnuma.c +++ b/tools/build/feature/test-libnuma.c diff --git a/tools/perf/config/feature-checks/test-libperl.c b/tools/build/feature/test-libperl.c index 8871f6a0fdb4..8871f6a0fdb4 100644 --- a/tools/perf/config/feature-checks/test-libperl.c +++ b/tools/build/feature/test-libperl.c diff --git a/tools/perf/config/feature-checks/test-libpython-version.c b/tools/build/feature/test-libpython-version.c index facea122d812..facea122d812 100644 --- a/tools/perf/config/feature-checks/test-libpython-version.c +++ b/tools/build/feature/test-libpython-version.c diff --git a/tools/perf/config/feature-checks/test-libpython.c b/tools/build/feature/test-libpython.c index b24b28ad6324..b24b28ad6324 100644 --- a/tools/perf/config/feature-checks/test-libpython.c +++ b/tools/build/feature/test-libpython.c diff --git a/tools/perf/config/feature-checks/test-libslang.c b/tools/build/feature/test-libslang.c index 22ff22ed94d1..22ff22ed94d1 100644 --- a/tools/perf/config/feature-checks/test-libslang.c +++ b/tools/build/feature/test-libslang.c diff --git a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c b/tools/build/feature/test-libunwind-debug-frame.c index 0ef8087a104a..0ef8087a104a 100644 --- a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c +++ b/tools/build/feature/test-libunwind-debug-frame.c diff --git a/tools/perf/config/feature-checks/test-libunwind.c b/tools/build/feature/test-libunwind.c index 43b9369bcab7..43b9369bcab7 100644 --- a/tools/perf/config/feature-checks/test-libunwind.c +++ b/tools/build/feature/test-libunwind.c diff --git a/tools/build/feature/test-lzma.c b/tools/build/feature/test-lzma.c new file mode 100644 index 000000000000..95adc8ced3dd --- /dev/null +++ b/tools/build/feature/test-lzma.c @@ -0,0 +1,10 @@ +#include <lzma.h> + +int main(void) +{ + lzma_stream strm = LZMA_STREAM_INIT; + int ret; + + ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); + return ret ? -1 : 0; +} diff --git a/tools/build/feature/test-pthread-attr-setaffinity-np.c b/tools/build/feature/test-pthread-attr-setaffinity-np.c new file mode 100644 index 000000000000..fdada5e8d454 --- /dev/null +++ b/tools/build/feature/test-pthread-attr-setaffinity-np.c @@ -0,0 +1,17 @@ +#include <stdint.h> +#include <pthread.h> +#include <sched.h> + +int main(void) +{ + int ret = 0; + pthread_attr_t thread_attr; + cpu_set_t cs; + + pthread_attr_init(&thread_attr); + CPU_ZERO(&cs); + + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cs), &cs); + + return ret; +} diff --git a/tools/perf/config/feature-checks/test-stackprotector-all.c b/tools/build/feature/test-stackprotector-all.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-stackprotector-all.c +++ b/tools/build/feature/test-stackprotector-all.c diff --git a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c index c34d4ca4af56..c34d4ca4af56 100644 --- a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c +++ b/tools/build/feature/test-sync-compare-and-swap.c diff --git a/tools/perf/config/feature-checks/test-timerfd.c b/tools/build/feature/test-timerfd.c index 8c5c083b4d3c..8c5c083b4d3c 100644 --- a/tools/perf/config/feature-checks/test-timerfd.c +++ b/tools/build/feature/test-timerfd.c diff --git a/tools/perf/config/feature-checks/test-zlib.c b/tools/build/feature/test-zlib.c index e111fff6240e..e111fff6240e 100644 --- a/tools/perf/config/feature-checks/test-zlib.c +++ b/tools/build/feature/test-zlib.c diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build new file mode 100644 index 000000000000..0e6c3e6767e6 --- /dev/null +++ b/tools/build/tests/ex/Build @@ -0,0 +1,8 @@ +ex-y += ex.o +ex-y += a.o +ex-y += b.o +ex-y += empty/ + +libex-y += c.o +libex-y += d.o +libex-y += arch/ diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile new file mode 100644 index 000000000000..52d2476073a3 --- /dev/null +++ b/tools/build/tests/ex/Makefile @@ -0,0 +1,23 @@ +export srctree := ../../../.. +export CC := gcc +export LD := ld +export AR := ar + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +ex: ex-in.o libex-in.o + gcc -o $@ $^ + +ex.%: FORCE + make -f $(srctree)/tools/build/Makefile.build dir=. $@ + +ex-in.o: FORCE + make $(build)=ex + +libex-in.o: FORCE + make $(build)=libex + +clean: + find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + rm -f ex ex.i ex.s + +.PHONY: FORCE diff --git a/tools/build/tests/ex/a.c b/tools/build/tests/ex/a.c new file mode 100644 index 000000000000..851762798c83 --- /dev/null +++ b/tools/build/tests/ex/a.c @@ -0,0 +1,5 @@ + +int a(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/Build b/tools/build/tests/ex/arch/Build new file mode 100644 index 000000000000..55506189efae --- /dev/null +++ b/tools/build/tests/ex/arch/Build @@ -0,0 +1,2 @@ +libex-y += e.o +libex-y += f.o diff --git a/tools/build/tests/ex/arch/e.c b/tools/build/tests/ex/arch/e.c new file mode 100644 index 000000000000..beaa4a1d7ba8 --- /dev/null +++ b/tools/build/tests/ex/arch/e.c @@ -0,0 +1,5 @@ + +int e(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/f.c b/tools/build/tests/ex/arch/f.c new file mode 100644 index 000000000000..7c3e9e9da5b7 --- /dev/null +++ b/tools/build/tests/ex/arch/f.c @@ -0,0 +1,5 @@ + +int f(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/b.c b/tools/build/tests/ex/b.c new file mode 100644 index 000000000000..c24ff9ca9a97 --- /dev/null +++ b/tools/build/tests/ex/b.c @@ -0,0 +1,5 @@ + +int b(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/c.c b/tools/build/tests/ex/c.c new file mode 100644 index 000000000000..e216d0217499 --- /dev/null +++ b/tools/build/tests/ex/c.c @@ -0,0 +1,5 @@ + +int c(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/d.c b/tools/build/tests/ex/d.c new file mode 100644 index 000000000000..80dc0f06151b --- /dev/null +++ b/tools/build/tests/ex/d.c @@ -0,0 +1,5 @@ + +int d(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/empty/Build b/tools/build/tests/ex/empty/Build new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/build/tests/ex/empty/Build diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c new file mode 100644 index 000000000000..dc42eb2e1a67 --- /dev/null +++ b/tools/build/tests/ex/ex.c @@ -0,0 +1,19 @@ + +int a(void); +int b(void); +int c(void); +int d(void); +int e(void); +int f(void); + +int main(void) +{ + a(); + b(); + c(); + d(); + e(); + f(); + + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh new file mode 100755 index 000000000000..5494f8ea7567 --- /dev/null +++ b/tools/build/tests/run.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +function test_ex { + make -C ex V=1 clean > ex.out 2>&1 + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + +function test_ex_suffix { + make -C ex V=1 clean > ex.out 2>&1 + + # use -rR to disable make's builtin rules + make -rR -C ex V=1 ex.o >> ex.out 2>&1 + make -rR -C ex V=1 ex.i >> ex.out 2>&1 + make -rR -C ex V=1 ex.s >> ex.out 2>&1 + + if [ -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + if [ ! -f ./ex/ex.o -o ! -f ./ex/ex.i -o ! -f ./ex/ex.s ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} +echo -n Testing.. + +test_ex +test_ex_suffix + +echo OK diff --git a/tools/hv/Makefile b/tools/hv/Makefile index bd22f786a60c..a8ab79556926 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -3,11 +3,11 @@ CC = $(CROSS_COMPILE)gcc PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra -CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) +CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) $(shell getconf LFS_CFLAGS) -all: hv_kvp_daemon hv_vss_daemon +all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon %: %.c $(CC) $(CFLAGS) -o $@ $^ clean: - $(RM) hv_kvp_daemon hv_vss_daemon + $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index f437d739f37d..9445d8f264a4 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -43,15 +43,9 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg) int error = HV_E_FAIL; char *q, *p; - /* - * If possile append a path seperator to the path. - */ - if (strlen((char *)smsg->path_name) < (W_MAX_PATH - 2)) - strcat((char *)smsg->path_name, "/"); - p = (char *)smsg->path_name; snprintf(target_fname, sizeof(target_fname), "%s/%s", - (char *)smsg->path_name, smsg->file_name); + (char *)smsg->path_name, (char *)smsg->file_name); syslog(LOG_INFO, "Target file name: %s", target_fname); /* @@ -137,7 +131,7 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int fd, fcopy_fd, len; + int fcopy_fd, len; int error; int daemonize = 1, long_index = 0, opt; int version = FCOPY_CURRENT_VERSION; diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 6a6432a20a1d..408bb076a234 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -147,7 +147,6 @@ static void kvp_release_lock(int pool) static void kvp_update_file(int pool) { FILE *filep; - size_t bytes_written; /* * We are going to write our in-memory registry out to @@ -163,8 +162,7 @@ static void kvp_update_file(int pool) exit(EXIT_FAILURE); } - bytes_written = fwrite(kvp_file_info[pool].records, - sizeof(struct kvp_record), + fwrite(kvp_file_info[pool].records, sizeof(struct kvp_record), kvp_file_info[pool].num_records, filep); if (ferror(filep) || fclose(filep)) { @@ -310,7 +308,7 @@ static int kvp_file_init(void) return 0; } -static int kvp_key_delete(int pool, const char *key, int key_size) +static int kvp_key_delete(int pool, const __u8 *key, int key_size) { int i; int j, k; @@ -353,8 +351,8 @@ static int kvp_key_delete(int pool, const char *key, int key_size) return 1; } -static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const char *value, - int value_size) +static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size, + const __u8 *value, int value_size) { int i; int num_records; @@ -407,7 +405,7 @@ static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const return 0; } -static int kvp_get_value(int pool, const char *key, int key_size, char *value, +static int kvp_get_value(int pool, const __u8 *key, int key_size, __u8 *value, int value_size) { int i; @@ -439,8 +437,8 @@ static int kvp_get_value(int pool, const char *key, int key_size, char *value, return 1; } -static int kvp_pool_enumerate(int pool, int index, char *key, int key_size, - char *value, int value_size) +static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size, + __u8 *value, int value_size) { struct kvp_record *record; @@ -661,7 +659,7 @@ static char *kvp_if_name_to_mac(char *if_name) char *p, *x; char buf[256]; char addr_file[256]; - int i; + unsigned int i; char *mac_addr = NULL; snprintf(addr_file, sizeof(addr_file), "%s%s%s", "/sys/class/net/", @@ -700,7 +698,7 @@ static char *kvp_mac_to_if_name(char *mac) char buf[256]; char *kvp_net_dir = "/sys/class/net/"; char dev_id[256]; - int i; + unsigned int i; dir = opendir(kvp_net_dir); if (dir == NULL) @@ -750,7 +748,7 @@ static char *kvp_mac_to_if_name(char *mac) static void kvp_process_ipconfig_file(char *cmd, - char *config_buf, int len, + char *config_buf, unsigned int len, int element_size, int offset) { char buf[256]; @@ -768,7 +766,7 @@ static void kvp_process_ipconfig_file(char *cmd, if (offset == 0) memset(config_buf, 0, len); while ((p = fgets(buf, sizeof(buf), file)) != NULL) { - if ((len - strlen(config_buf)) < (element_size + 1)) + if (len < strlen(config_buf) + element_size + 1) break; x = strchr(p, '\n'); @@ -916,7 +914,7 @@ static int kvp_process_ip_address(void *addrp, static int kvp_get_ip_info(int family, char *if_name, int op, - void *out_buffer, int length) + void *out_buffer, unsigned int length) { struct ifaddrs *ifap; struct ifaddrs *curp; @@ -1019,8 +1017,7 @@ kvp_get_ip_info(int family, char *if_name, int op, weight += hweight32(&w[i]); sprintf(cidr_mask, "/%d", weight); - if ((length - sn_offset) < - (strlen(cidr_mask) + 1)) + if (length < sn_offset + strlen(cidr_mask) + 1) goto gather_ipaddr; if (sn_offset == 0) @@ -1308,16 +1305,17 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; + /* + * The dhcp_enabled flag is only for IPv4. In the case the host only + * injects an IPv6 address, the flag is true, but we still need to + * proceed to parse and pass the IPv6 information to the + * disto-specific script hv_set_ifconfig. + */ if (new_val->dhcp_enabled) { error = kvp_write_file(file, "BOOTPROTO", "", "dhcp"); if (error) goto setval_error; - /* - * We are done!. - */ - goto setval_done; - } else { error = kvp_write_file(file, "BOOTPROTO", "", "none"); if (error) @@ -1345,7 +1343,6 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val) if (error) goto setval_error; -setval_done: fclose(file); /* diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 5e63f70bd956..506dd0148828 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -81,6 +81,7 @@ static int vss_operate(int operation) char match[] = "/dev/"; FILE *mounts; struct mntent *ent; + char errdir[1024] = {0}; unsigned int cmd; int error = 0, root_seen = 0, save_errno = 0; @@ -115,6 +116,8 @@ static int vss_operate(int operation) goto err; } + endmntent(mounts); + if (root_seen) { error |= vss_do_freeze("/", cmd); if (error && operation == VSS_OP_FREEZE) @@ -124,16 +127,19 @@ static int vss_operate(int operation) goto out; err: save_errno = errno; + if (ent) { + strncpy(errdir, ent->mnt_dir, sizeof(errdir)-1); + endmntent(mounts); + } vss_operate(VSS_OP_THAW); /* Call syslog after we thaw all filesystems */ if (ent) syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s", - ent->mnt_dir, save_errno, strerror(save_errno)); + errdir, save_errno, strerror(save_errno)); else syslog(LOG_ERR, "FREEZE of / failed; error:%d %s", save_errno, strerror(save_errno)); out: - endmntent(mounts); return error; } diff --git a/tools/iio/Makefile b/tools/iio/Makefile new file mode 100644 index 000000000000..bf7ae6d6612a --- /dev/null +++ b/tools/iio/Makefile @@ -0,0 +1,16 @@ +CC = gcc +CFLAGS = -Wall -g -D_GNU_SOURCE + +all: iio_event_monitor lsiio generic_buffer + +iio_event_monitor: iio_event_monitor.o iio_utils.o + +lsiio: lsiio.o iio_utils.o + +generic_buffer: generic_buffer.o iio_utils.o + +%.o: %.c iio_utils.h + +.PHONY: clean +clean: + rm -f *.o iio_event_monitor lsiio generic_buffer diff --git a/tools/iio/generic_buffer.c b/tools/iio/generic_buffer.c new file mode 100644 index 000000000000..f805493be3eb --- /dev/null +++ b/tools/iio/generic_buffer.c @@ -0,0 +1,359 @@ +/* Industrialio buffer test code. + * + * Copyright (c) 2008 Jonathan Cameron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is primarily intended as an example application. + * Reads the current buffer setup from sysfs and starts a short capture + * from the specified device, pretty printing the result after appropriate + * conversion. + * + * Command line parameters + * generic_buffer -n <device_name> -t <trigger_name> + * If trigger name is not specified the program assumes you want a dataready + * trigger associated with the device and goes looking for it. + * + */ + +#include <unistd.h> +#include <stdlib.h> +#include <dirent.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/dir.h> +#include <linux/types.h> +#include <string.h> +#include <poll.h> +#include <endian.h> +#include <getopt.h> +#include <inttypes.h> +#include "iio_utils.h" + +/** + * size_from_channelarray() - calculate the storage size of a scan + * @channels: the channel info array + * @num_channels: number of channels + * + * Has the side effect of filling the channels[i].location values used + * in processing the buffer output. + **/ +int size_from_channelarray(struct iio_channel_info *channels, int num_channels) +{ + int bytes = 0; + int i = 0; + + while (i < num_channels) { + if (bytes % channels[i].bytes == 0) + channels[i].location = bytes; + else + channels[i].location = bytes - bytes%channels[i].bytes + + channels[i].bytes; + bytes = channels[i].location + channels[i].bytes; + i++; + } + return bytes; +} + +void print2byte(int input, struct iio_channel_info *info) +{ + /* First swap if incorrect endian */ + if (info->be) + input = be16toh((uint16_t)input); + else + input = le16toh((uint16_t)input); + + /* + * Shift before conversion to avoid sign extension + * of left aligned data + */ + input >>= info->shift; + if (info->is_signed) { + int16_t val = input; + + val &= (1 << info->bits_used) - 1; + val = (int16_t)(val << (16 - info->bits_used)) >> + (16 - info->bits_used); + printf("%05f ", ((float)val + info->offset)*info->scale); + } else { + uint16_t val = input; + + val &= (1 << info->bits_used) - 1; + printf("%05f ", ((float)val + info->offset)*info->scale); + } +} +/** + * process_scan() - print out the values in SI units + * @data: pointer to the start of the scan + * @channels: information about the channels. Note + * size_from_channelarray must have been called first to fill the + * location offsets. + * @num_channels: number of channels + **/ +void process_scan(char *data, + struct iio_channel_info *channels, + int num_channels) +{ + int k; + + for (k = 0; k < num_channels; k++) + switch (channels[k].bytes) { + /* only a few cases implemented so far */ + case 2: + print2byte(*(uint16_t *)(data + channels[k].location), + &channels[k]); + break; + case 4: + if (!channels[k].is_signed) { + uint32_t val = *(uint32_t *) + (data + channels[k].location); + printf("%05f ", ((float)val + + channels[k].offset)* + channels[k].scale); + + } + break; + case 8: + if (channels[k].is_signed) { + int64_t val = *(int64_t *) + (data + + channels[k].location); + if ((val >> channels[k].bits_used) & 1) + val = (val & channels[k].mask) | + ~channels[k].mask; + /* special case for timestamp */ + if (channels[k].scale == 1.0f && + channels[k].offset == 0.0f) + printf("%" PRId64 " ", val); + else + printf("%05f ", ((float)val + + channels[k].offset)* + channels[k].scale); + } + break; + default: + break; + } + printf("\n"); +} + +int main(int argc, char **argv) +{ + unsigned long num_loops = 2; + unsigned long timedelay = 1000000; + unsigned long buf_len = 128; + + int ret, c, i, j, toread; + int fp; + + int num_channels; + char *trigger_name = NULL, *device_name = NULL; + char *dev_dir_name, *buf_dir_name; + + int datardytrigger = 1; + char *data; + ssize_t read_size; + int dev_num, trig_num; + char *buffer_access; + int scan_size; + int noevents = 0; + int notrigger = 0; + char *dummy; + + struct iio_channel_info *channels; + + while ((c = getopt(argc, argv, "l:w:c:et:n:g")) != -1) { + switch (c) { + case 'n': + device_name = optarg; + break; + case 't': + trigger_name = optarg; + datardytrigger = 0; + break; + case 'e': + noevents = 1; + break; + case 'c': + num_loops = strtoul(optarg, &dummy, 10); + break; + case 'w': + timedelay = strtoul(optarg, &dummy, 10); + break; + case 'l': + buf_len = strtoul(optarg, &dummy, 10); + break; + case 'g': + notrigger = 1; + break; + case '?': + return -1; + } + } + + if (device_name == NULL) + return -1; + + /* Find the device requested */ + dev_num = find_type_by_name(device_name, "iio:device"); + if (dev_num < 0) { + printf("Failed to find the %s\n", device_name); + ret = -ENODEV; + goto error_ret; + } + printf("iio device number being used is %d\n", dev_num); + + asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num); + + if (!notrigger) { + if (trigger_name == NULL) { + /* + * Build the trigger name. If it is device associated + * its name is <device_name>_dev[n] where n matches + * the device number found above. + */ + ret = asprintf(&trigger_name, + "%s-dev%d", device_name, dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error_ret; + } + } + + /* Verify the trigger exists */ + trig_num = find_type_by_name(trigger_name, "trigger"); + if (trig_num < 0) { + printf("Failed to find the trigger %s\n", trigger_name); + ret = -ENODEV; + goto error_free_triggername; + } + printf("iio trigger number being used is %d\n", trig_num); + } else + printf("trigger-less mode selected\n"); + + /* + * Parse the files in scan_elements to identify what channels are + * present + */ + ret = build_channel_array(dev_dir_name, &channels, &num_channels); + if (ret) { + printf("Problem reading scan element information\n"); + printf("diag %s\n", dev_dir_name); + goto error_free_triggername; + } + + /* + * Construct the directory name for the associated buffer. + * As we know that the lis3l02dq has only one buffer this may + * be built rather than found. + */ + ret = asprintf(&buf_dir_name, + "%siio:device%d/buffer", iio_dir, dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_triggername; + } + + if (!notrigger) { + printf("%s %s\n", dev_dir_name, trigger_name); + /* Set the device trigger to be the data ready trigger found + * above */ + ret = write_sysfs_string_and_verify("trigger/current_trigger", + dev_dir_name, + trigger_name); + if (ret < 0) { + printf("Failed to write current_trigger file\n"); + goto error_free_buf_dir_name; + } + } + + /* Setup ring buffer parameters */ + ret = write_sysfs_int("length", buf_dir_name, buf_len); + if (ret < 0) + goto error_free_buf_dir_name; + + /* Enable the buffer */ + ret = write_sysfs_int("enable", buf_dir_name, 1); + if (ret < 0) + goto error_free_buf_dir_name; + scan_size = size_from_channelarray(channels, num_channels); + data = malloc(scan_size*buf_len); + if (!data) { + ret = -ENOMEM; + goto error_free_buf_dir_name; + } + + ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_data; + } + + /* Attempt to open non blocking the access dev */ + fp = open(buffer_access, O_RDONLY | O_NONBLOCK); + if (fp == -1) { /* If it isn't there make the node */ + printf("Failed to open %s\n", buffer_access); + ret = -errno; + goto error_free_buffer_access; + } + + /* Wait for events 10 times */ + for (j = 0; j < num_loops; j++) { + if (!noevents) { + struct pollfd pfd = { + .fd = fp, + .events = POLLIN, + }; + + poll(&pfd, 1, -1); + toread = buf_len; + + } else { + usleep(timedelay); + toread = 64; + } + + read_size = read(fp, + data, + toread*scan_size); + if (read_size < 0) { + if (errno == -EAGAIN) { + printf("nothing available\n"); + continue; + } else + break; + } + for (i = 0; i < read_size/scan_size; i++) + process_scan(data + scan_size*i, + channels, + num_channels); + } + + /* Stop the buffer */ + ret = write_sysfs_int("enable", buf_dir_name, 0); + if (ret < 0) + goto error_close_buffer_access; + + if (!notrigger) + /* Disconnect the trigger - just write a dummy name. */ + write_sysfs_string("trigger/current_trigger", + dev_dir_name, "NULL"); + +error_close_buffer_access: + close(fp); +error_free_data: + free(data); +error_free_buffer_access: + free(buffer_access); +error_free_buf_dir_name: + free(buf_dir_name); +error_free_triggername: + if (datardytrigger) + free(trigger_name); +error_ret: + return ret; +} diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c new file mode 100644 index 000000000000..427c271ac0d6 --- /dev/null +++ b/tools/iio/iio_event_monitor.c @@ -0,0 +1,308 @@ +/* Industrialio event test code. + * + * Copyright (c) 2011-2012 Lars-Peter Clausen <lars@metafoo.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is primarily intended as an example application. + * Reads the current buffer setup from sysfs and starts a short capture + * from the specified device, pretty printing the result after appropriate + * conversion. + * + * Usage: + * iio_event_monitor <device_name> + * + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <poll.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include "iio_utils.h" +#include <linux/iio/events.h> +#include <linux/iio/types.h> + +static const char * const iio_chan_type_name_spec[] = { + [IIO_VOLTAGE] = "voltage", + [IIO_CURRENT] = "current", + [IIO_POWER] = "power", + [IIO_ACCEL] = "accel", + [IIO_ANGL_VEL] = "anglvel", + [IIO_MAGN] = "magn", + [IIO_LIGHT] = "illuminance", + [IIO_INTENSITY] = "intensity", + [IIO_PROXIMITY] = "proximity", + [IIO_TEMP] = "temp", + [IIO_INCLI] = "incli", + [IIO_ROT] = "rot", + [IIO_ANGL] = "angl", + [IIO_TIMESTAMP] = "timestamp", + [IIO_CAPACITANCE] = "capacitance", + [IIO_ALTVOLTAGE] = "altvoltage", + [IIO_CCT] = "cct", + [IIO_PRESSURE] = "pressure", + [IIO_HUMIDITYRELATIVE] = "humidityrelative", + [IIO_ACTIVITY] = "activity", + [IIO_STEPS] = "steps", +}; + +static const char * const iio_ev_type_text[] = { + [IIO_EV_TYPE_THRESH] = "thresh", + [IIO_EV_TYPE_MAG] = "mag", + [IIO_EV_TYPE_ROC] = "roc", + [IIO_EV_TYPE_THRESH_ADAPTIVE] = "thresh_adaptive", + [IIO_EV_TYPE_MAG_ADAPTIVE] = "mag_adaptive", + [IIO_EV_TYPE_CHANGE] = "change", +}; + +static const char * const iio_ev_dir_text[] = { + [IIO_EV_DIR_EITHER] = "either", + [IIO_EV_DIR_RISING] = "rising", + [IIO_EV_DIR_FALLING] = "falling" +}; + +static const char * const iio_modifier_names[] = { + [IIO_MOD_X] = "x", + [IIO_MOD_Y] = "y", + [IIO_MOD_Z] = "z", + [IIO_MOD_X_AND_Y] = "x&y", + [IIO_MOD_X_AND_Z] = "x&z", + [IIO_MOD_Y_AND_Z] = "y&z", + [IIO_MOD_X_AND_Y_AND_Z] = "x&y&z", + [IIO_MOD_X_OR_Y] = "x|y", + [IIO_MOD_X_OR_Z] = "x|z", + [IIO_MOD_Y_OR_Z] = "y|z", + [IIO_MOD_X_OR_Y_OR_Z] = "x|y|z", + [IIO_MOD_LIGHT_BOTH] = "both", + [IIO_MOD_LIGHT_IR] = "ir", + [IIO_MOD_ROOT_SUM_SQUARED_X_Y] = "sqrt(x^2+y^2)", + [IIO_MOD_SUM_SQUARED_X_Y_Z] = "x^2+y^2+z^2", + [IIO_MOD_LIGHT_CLEAR] = "clear", + [IIO_MOD_LIGHT_RED] = "red", + [IIO_MOD_LIGHT_GREEN] = "green", + [IIO_MOD_LIGHT_BLUE] = "blue", + [IIO_MOD_QUATERNION] = "quaternion", + [IIO_MOD_TEMP_AMBIENT] = "ambient", + [IIO_MOD_TEMP_OBJECT] = "object", + [IIO_MOD_NORTH_MAGN] = "from_north_magnetic", + [IIO_MOD_NORTH_TRUE] = "from_north_true", + [IIO_MOD_NORTH_MAGN_TILT_COMP] = "from_north_magnetic_tilt_comp", + [IIO_MOD_NORTH_TRUE_TILT_COMP] = "from_north_true_tilt_comp", + [IIO_MOD_RUNNING] = "running", + [IIO_MOD_JOGGING] = "jogging", + [IIO_MOD_WALKING] = "walking", + [IIO_MOD_STILL] = "still", +}; + +static bool event_is_known(struct iio_event_data *event) +{ + enum iio_chan_type type = IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event->id); + enum iio_modifier mod = IIO_EVENT_CODE_EXTRACT_MODIFIER(event->id); + enum iio_event_type ev_type = IIO_EVENT_CODE_EXTRACT_TYPE(event->id); + enum iio_event_direction dir = IIO_EVENT_CODE_EXTRACT_DIR(event->id); + + switch (type) { + case IIO_VOLTAGE: + case IIO_CURRENT: + case IIO_POWER: + case IIO_ACCEL: + case IIO_ANGL_VEL: + case IIO_MAGN: + case IIO_LIGHT: + case IIO_INTENSITY: + case IIO_PROXIMITY: + case IIO_TEMP: + case IIO_INCLI: + case IIO_ROT: + case IIO_ANGL: + case IIO_TIMESTAMP: + case IIO_CAPACITANCE: + case IIO_ALTVOLTAGE: + case IIO_CCT: + case IIO_PRESSURE: + case IIO_HUMIDITYRELATIVE: + case IIO_ACTIVITY: + case IIO_STEPS: + break; + default: + return false; + } + + switch (mod) { + case IIO_NO_MOD: + case IIO_MOD_X: + case IIO_MOD_Y: + case IIO_MOD_Z: + case IIO_MOD_X_AND_Y: + case IIO_MOD_X_AND_Z: + case IIO_MOD_Y_AND_Z: + case IIO_MOD_X_AND_Y_AND_Z: + case IIO_MOD_X_OR_Y: + case IIO_MOD_X_OR_Z: + case IIO_MOD_Y_OR_Z: + case IIO_MOD_X_OR_Y_OR_Z: + case IIO_MOD_LIGHT_BOTH: + case IIO_MOD_LIGHT_IR: + case IIO_MOD_ROOT_SUM_SQUARED_X_Y: + case IIO_MOD_SUM_SQUARED_X_Y_Z: + case IIO_MOD_LIGHT_CLEAR: + case IIO_MOD_LIGHT_RED: + case IIO_MOD_LIGHT_GREEN: + case IIO_MOD_LIGHT_BLUE: + case IIO_MOD_QUATERNION: + case IIO_MOD_TEMP_AMBIENT: + case IIO_MOD_TEMP_OBJECT: + case IIO_MOD_NORTH_MAGN: + case IIO_MOD_NORTH_TRUE: + case IIO_MOD_NORTH_MAGN_TILT_COMP: + case IIO_MOD_NORTH_TRUE_TILT_COMP: + case IIO_MOD_RUNNING: + case IIO_MOD_JOGGING: + case IIO_MOD_WALKING: + case IIO_MOD_STILL: + break; + default: + return false; + } + + switch (ev_type) { + case IIO_EV_TYPE_THRESH: + case IIO_EV_TYPE_MAG: + case IIO_EV_TYPE_ROC: + case IIO_EV_TYPE_THRESH_ADAPTIVE: + case IIO_EV_TYPE_MAG_ADAPTIVE: + case IIO_EV_TYPE_CHANGE: + break; + default: + return false; + } + + switch (dir) { + case IIO_EV_DIR_EITHER: + case IIO_EV_DIR_RISING: + case IIO_EV_DIR_FALLING: + case IIO_EV_DIR_NONE: + break; + default: + return false; + } + + return true; +} + +static void print_event(struct iio_event_data *event) +{ + enum iio_chan_type type = IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event->id); + enum iio_modifier mod = IIO_EVENT_CODE_EXTRACT_MODIFIER(event->id); + enum iio_event_type ev_type = IIO_EVENT_CODE_EXTRACT_TYPE(event->id); + enum iio_event_direction dir = IIO_EVENT_CODE_EXTRACT_DIR(event->id); + int chan = IIO_EVENT_CODE_EXTRACT_CHAN(event->id); + int chan2 = IIO_EVENT_CODE_EXTRACT_CHAN2(event->id); + bool diff = IIO_EVENT_CODE_EXTRACT_DIFF(event->id); + + if (!event_is_known(event)) { + printf("Unknown event: time: %lld, id: %llx\n", + event->timestamp, event->id); + return; + } + + printf("Event: time: %lld, ", event->timestamp); + + if (mod != IIO_NO_MOD) { + printf("type: %s(%s), ", + iio_chan_type_name_spec[type], + iio_modifier_names[mod]); + } else { + printf("type: %s, ", + iio_chan_type_name_spec[type]); + } + + if (diff && chan >= 0 && chan2 >= 0) + printf("channel: %d-%d, ", chan, chan2); + else if (chan >= 0) + printf("channel: %d, ", chan); + + printf("evtype: %s", iio_ev_type_text[ev_type]); + + if (dir != IIO_EV_DIR_NONE) + printf(", direction: %s", iio_ev_dir_text[dir]); + printf("\n"); +} + +int main(int argc, char **argv) +{ + struct iio_event_data event; + const char *device_name; + char *chrdev_name; + int ret; + int dev_num; + int fd, event_fd; + + if (argc <= 1) { + printf("Usage: %s <device_name>\n", argv[0]); + return -1; + } + + device_name = argv[1]; + + dev_num = find_type_by_name(device_name, "iio:device"); + if (dev_num >= 0) { + printf("Found IIO device with name %s with device number %d\n", + device_name, dev_num); + ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error_ret; + } + } else { + /* If we can't find a IIO device by name assume device_name is a + IIO chrdev */ + chrdev_name = strdup(device_name); + } + + fd = open(chrdev_name, 0); + if (fd == -1) { + fprintf(stdout, "Failed to open %s\n", chrdev_name); + ret = -errno; + goto error_free_chrdev_name; + } + + ret = ioctl(fd, IIO_GET_EVENT_FD_IOCTL, &event_fd); + + close(fd); + + if (ret == -1 || event_fd == -1) { + fprintf(stdout, "Failed to retrieve event fd\n"); + ret = -errno; + goto error_free_chrdev_name; + } + + while (true) { + ret = read(event_fd, &event, sizeof(event)); + if (ret == -1) { + if (errno == EAGAIN) { + printf("nothing available\n"); + continue; + } else { + perror("Failed to read event from device"); + ret = -errno; + break; + } + } + + print_event(&event); + } + + close(event_fd); +error_free_chrdev_name: + free(chrdev_name); +error_ret: + return ret; +} diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c new file mode 100644 index 000000000000..6f6452167b67 --- /dev/null +++ b/tools/iio/iio_utils.c @@ -0,0 +1,669 @@ +/* IIO - useful set of util functionality + * + * Copyright (c) 2008 Jonathan Cameron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#ifndef _IIO_UTILS_H +#define _IIO_UTILS_H + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <dirent.h> +#include <errno.h> +#include <ctype.h> +#include "iio_utils.h" + +const char *iio_dir = "/sys/bus/iio/devices/"; + +static char * const iio_direction[] = { + "in", + "out", +}; + +/** + * iioutils_break_up_name() - extract generic name from full channel name + * @full_name: the full channel name + * @generic_name: the output generic channel name + **/ +int iioutils_break_up_name(const char *full_name, + char **generic_name) +{ + char *current; + char *w, *r; + char *working, *prefix = ""; + int i; + + for (i = 0; i < sizeof(iio_direction) / sizeof(iio_direction[0]); i++) + if (!strncmp(full_name, iio_direction[i], + strlen(iio_direction[i]))) { + prefix = iio_direction[i]; + break; + } + + current = strdup(full_name + strlen(prefix) + 1); + working = strtok(current, "_\0"); + + w = working; + r = working; + + while (*r != '\0') { + if (!isdigit(*r)) { + *w = *r; + w++; + } + r++; + } + *w = '\0'; + asprintf(generic_name, "%s_%s", prefix, working); + free(current); + + return 0; +} + +/** + * iioutils_get_type() - find and process _type attribute data + * @is_signed: output whether channel is signed + * @bytes: output how many bytes the channel storage occupies + * @mask: output a bit mask for the raw data + * @be: big endian + * @device_dir: the iio device directory + * @name: the channel name + * @generic_name: the channel type name + **/ +int iioutils_get_type(unsigned *is_signed, + unsigned *bytes, + unsigned *bits_used, + unsigned *shift, + uint64_t *mask, + unsigned *be, + const char *device_dir, + const char *name, + const char *generic_name) +{ + FILE *sysfsfp; + int ret; + DIR *dp; + char *scan_el_dir, *builtname, *builtname_generic, *filename = 0; + char signchar, endianchar; + unsigned padint; + const struct dirent *ent; + + ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir); + if (ret < 0) { + ret = -ENOMEM; + goto error_ret; + } + ret = asprintf(&builtname, FORMAT_TYPE_FILE, name); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_scan_el_dir; + } + ret = asprintf(&builtname_generic, FORMAT_TYPE_FILE, generic_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_builtname; + } + + dp = opendir(scan_el_dir); + if (dp == NULL) { + ret = -errno; + goto error_free_builtname_generic; + } + while (ent = readdir(dp), ent != NULL) + /* + * Do we allow devices to override a generic name with + * a specific one? + */ + if ((strcmp(builtname, ent->d_name) == 0) || + (strcmp(builtname_generic, ent->d_name) == 0)) { + ret = asprintf(&filename, + "%s/%s", scan_el_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_closedir; + } + sysfsfp = fopen(filename, "r"); + if (sysfsfp == NULL) { + printf("failed to open %s\n", filename); + ret = -errno; + goto error_free_filename; + } + + ret = fscanf(sysfsfp, + "%ce:%c%u/%u>>%u", + &endianchar, + &signchar, + bits_used, + &padint, shift); + if (ret < 0) { + printf("failed to pass scan type description\n"); + ret = -errno; + goto error_close_sysfsfp; + } + *be = (endianchar == 'b'); + *bytes = padint / 8; + if (*bits_used == 64) + *mask = ~0; + else + *mask = (1 << *bits_used) - 1; + if (signchar == 's') + *is_signed = 1; + else + *is_signed = 0; + fclose(sysfsfp); + free(filename); + + filename = 0; + sysfsfp = 0; + } +error_close_sysfsfp: + if (sysfsfp) + fclose(sysfsfp); +error_free_filename: + if (filename) + free(filename); +error_closedir: + closedir(dp); +error_free_builtname_generic: + free(builtname_generic); +error_free_builtname: + free(builtname); +error_free_scan_el_dir: + free(scan_el_dir); +error_ret: + return ret; +} + +int iioutils_get_param_float(float *output, + const char *param_name, + const char *device_dir, + const char *name, + const char *generic_name) +{ + FILE *sysfsfp; + int ret; + DIR *dp; + char *builtname, *builtname_generic; + char *filename = NULL; + const struct dirent *ent; + + ret = asprintf(&builtname, "%s_%s", name, param_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_ret; + } + ret = asprintf(&builtname_generic, + "%s_%s", generic_name, param_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_builtname; + } + dp = opendir(device_dir); + if (dp == NULL) { + ret = -errno; + goto error_free_builtname_generic; + } + while (ent = readdir(dp), ent != NULL) + if ((strcmp(builtname, ent->d_name) == 0) || + (strcmp(builtname_generic, ent->d_name) == 0)) { + ret = asprintf(&filename, + "%s/%s", device_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_closedir; + } + sysfsfp = fopen(filename, "r"); + if (!sysfsfp) { + ret = -errno; + goto error_free_filename; + } + fscanf(sysfsfp, "%f", output); + break; + } +error_free_filename: + if (filename) + free(filename); +error_closedir: + closedir(dp); +error_free_builtname_generic: + free(builtname_generic); +error_free_builtname: + free(builtname); +error_ret: + return ret; +} + +/** + * bsort_channel_array_by_index() - reorder so that the array is in index order + * + **/ + +void bsort_channel_array_by_index(struct iio_channel_info **ci_array, + int cnt) +{ + + struct iio_channel_info temp; + int x, y; + + for (x = 0; x < cnt; x++) + for (y = 0; y < (cnt - 1); y++) + if ((*ci_array)[y].index > (*ci_array)[y+1].index) { + temp = (*ci_array)[y + 1]; + (*ci_array)[y + 1] = (*ci_array)[y]; + (*ci_array)[y] = temp; + } +} + +/** + * build_channel_array() - function to figure out what channels are present + * @device_dir: the IIO device directory in sysfs + * @ + **/ +int build_channel_array(const char *device_dir, + struct iio_channel_info **ci_array, + int *counter) +{ + DIR *dp; + FILE *sysfsfp; + int count, i; + struct iio_channel_info *current; + int ret; + const struct dirent *ent; + char *scan_el_dir; + char *filename; + + *counter = 0; + ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir); + if (ret < 0) { + ret = -ENOMEM; + goto error_ret; + } + dp = opendir(scan_el_dir); + if (dp == NULL) { + ret = -errno; + goto error_free_name; + } + while (ent = readdir(dp), ent != NULL) + if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), + "_en") == 0) { + ret = asprintf(&filename, + "%s/%s", scan_el_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_close_dir; + } + sysfsfp = fopen(filename, "r"); + if (sysfsfp == NULL) { + ret = -errno; + free(filename); + goto error_close_dir; + } + fscanf(sysfsfp, "%i", &ret); + if (ret == 1) + (*counter)++; + fclose(sysfsfp); + free(filename); + } + *ci_array = malloc(sizeof(**ci_array) * (*counter)); + if (*ci_array == NULL) { + ret = -ENOMEM; + goto error_close_dir; + } + seekdir(dp, 0); + count = 0; + while (ent = readdir(dp), ent != NULL) { + if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), + "_en") == 0) { + int current_enabled = 0; + + current = &(*ci_array)[count++]; + ret = asprintf(&filename, + "%s/%s", scan_el_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + /* decrement count to avoid freeing name */ + count--; + goto error_cleanup_array; + } + sysfsfp = fopen(filename, "r"); + if (sysfsfp == NULL) { + free(filename); + ret = -errno; + goto error_cleanup_array; + } + fscanf(sysfsfp, "%i", ¤t_enabled); + fclose(sysfsfp); + + if (!current_enabled) { + free(filename); + count--; + continue; + } + + current->scale = 1.0; + current->offset = 0; + current->name = strndup(ent->d_name, + strlen(ent->d_name) - + strlen("_en")); + if (current->name == NULL) { + free(filename); + ret = -ENOMEM; + goto error_cleanup_array; + } + /* Get the generic and specific name elements */ + ret = iioutils_break_up_name(current->name, + ¤t->generic_name); + if (ret) { + free(filename); + goto error_cleanup_array; + } + ret = asprintf(&filename, + "%s/%s_index", + scan_el_dir, + current->name); + if (ret < 0) { + free(filename); + ret = -ENOMEM; + goto error_cleanup_array; + } + sysfsfp = fopen(filename, "r"); + fscanf(sysfsfp, "%u", ¤t->index); + fclose(sysfsfp); + free(filename); + /* Find the scale */ + ret = iioutils_get_param_float(¤t->scale, + "scale", + device_dir, + current->name, + current->generic_name); + if (ret < 0) + goto error_cleanup_array; + ret = iioutils_get_param_float(¤t->offset, + "offset", + device_dir, + current->name, + current->generic_name); + if (ret < 0) + goto error_cleanup_array; + ret = iioutils_get_type(¤t->is_signed, + ¤t->bytes, + ¤t->bits_used, + ¤t->shift, + ¤t->mask, + ¤t->be, + device_dir, + current->name, + current->generic_name); + } + } + + closedir(dp); + /* reorder so that the array is in index order */ + bsort_channel_array_by_index(ci_array, *counter); + + return 0; + +error_cleanup_array: + for (i = count - 1; i >= 0; i--) + free((*ci_array)[i].name); + free(*ci_array); +error_close_dir: + closedir(dp); +error_free_name: + free(scan_el_dir); +error_ret: + return ret; +} + +/** + * find_type_by_name() - function to match top level types by name + * @name: top level type instance name + * @type: the type of top level instance being sort + * + * Typical types this is used for are device and trigger. + **/ +int find_type_by_name(const char *name, const char *type) +{ + const struct dirent *ent; + int number, numstrlen; + + FILE *nameFile; + DIR *dp; + char thisname[IIO_MAX_NAME_LENGTH]; + char *filename; + + dp = opendir(iio_dir); + if (dp == NULL) { + printf("No industrialio devices available\n"); + return -ENODEV; + } + + while (ent = readdir(dp), ent != NULL) { + if (strcmp(ent->d_name, ".") != 0 && + strcmp(ent->d_name, "..") != 0 && + strlen(ent->d_name) > strlen(type) && + strncmp(ent->d_name, type, strlen(type)) == 0) { + numstrlen = sscanf(ent->d_name + strlen(type), + "%d", + &number); + /* verify the next character is not a colon */ + if (strncmp(ent->d_name + strlen(type) + numstrlen, + ":", + 1) != 0) { + filename = malloc(strlen(iio_dir) + + strlen(type) + + numstrlen + + 6); + if (filename == NULL) { + closedir(dp); + return -ENOMEM; + } + sprintf(filename, "%s%s%d/name", + iio_dir, + type, + number); + nameFile = fopen(filename, "r"); + if (!nameFile) { + free(filename); + continue; + } + free(filename); + fscanf(nameFile, "%s", thisname); + fclose(nameFile); + if (strcmp(name, thisname) == 0) { + closedir(dp); + return number; + } + } + } + } + closedir(dp); + return -ENODEV; +} + +int _write_sysfs_int(char *filename, char *basedir, int val, int verify) +{ + int ret = 0; + FILE *sysfsfp; + int test; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) + return -ENOMEM; + sprintf(temp, "%s/%s", basedir, filename); + sysfsfp = fopen(temp, "w"); + if (sysfsfp == NULL) { + printf("failed to open %s\n", temp); + ret = -errno; + goto error_free; + } + fprintf(sysfsfp, "%d", val); + fclose(sysfsfp); + if (verify) { + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + printf("failed to open %s\n", temp); + ret = -errno; + goto error_free; + } + fscanf(sysfsfp, "%d", &test); + fclose(sysfsfp); + if (test != val) { + printf("Possible failure in int write %d to %s%s\n", + val, + basedir, + filename); + ret = -1; + } + } +error_free: + free(temp); + return ret; +} + +int write_sysfs_int(char *filename, char *basedir, int val) +{ + return _write_sysfs_int(filename, basedir, val, 0); +} + +int write_sysfs_int_and_verify(char *filename, char *basedir, int val) +{ + return _write_sysfs_int(filename, basedir, val, 1); +} + +int _write_sysfs_string(char *filename, char *basedir, char *val, int verify) +{ + int ret = 0; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed\n"); + return -ENOMEM; + } + sprintf(temp, "%s/%s", basedir, filename); + sysfsfp = fopen(temp, "w"); + if (sysfsfp == NULL) { + printf("Could not open %s\n", temp); + ret = -errno; + goto error_free; + } + fprintf(sysfsfp, "%s", val); + fclose(sysfsfp); + if (verify) { + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + printf("could not open file to verify\n"); + ret = -errno; + goto error_free; + } + fscanf(sysfsfp, "%s", temp); + fclose(sysfsfp); + if (strcmp(temp, val) != 0) { + printf("Possible failure in string write of %s " + "Should be %s " + "written to %s\%s\n", + temp, + val, + basedir, + filename); + ret = -1; + } + } +error_free: + free(temp); + + return ret; +} + +/** + * write_sysfs_string_and_verify() - string write, readback and verify + * @filename: name of file to write to + * @basedir: the sysfs directory in which the file is to be found + * @val: the string to write + **/ +int write_sysfs_string_and_verify(char *filename, char *basedir, char *val) +{ + return _write_sysfs_string(filename, basedir, val, 1); +} + +int write_sysfs_string(char *filename, char *basedir, char *val) +{ + return _write_sysfs_string(filename, basedir, val, 0); +} + +int read_sysfs_posint(char *filename, char *basedir) +{ + int ret; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed"); + return -ENOMEM; + } + sprintf(temp, "%s/%s", basedir, filename); + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + goto error_free; + } + fscanf(sysfsfp, "%d\n", &ret); + fclose(sysfsfp); +error_free: + free(temp); + return ret; +} + +int read_sysfs_float(char *filename, char *basedir, float *val) +{ + int ret = 0; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed"); + return -ENOMEM; + } + sprintf(temp, "%s/%s", basedir, filename); + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + goto error_free; + } + fscanf(sysfsfp, "%f\n", val); + fclose(sysfsfp); +error_free: + free(temp); + return ret; +} + +int read_sysfs_string(const char *filename, const char *basedir, char *str) +{ + int ret = 0; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed"); + return -ENOMEM; + } + sprintf(temp, "%s/%s", basedir, filename); + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + goto error_free; + } + fscanf(sysfsfp, "%s\n", str); + fclose(sysfsfp); +error_free: + free(temp); + return ret; +} + +#endif /* _IIO_UTILS_H */ diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h new file mode 100644 index 000000000000..1bc837b2d769 --- /dev/null +++ b/tools/iio/iio_utils.h @@ -0,0 +1,71 @@ +#ifndef _IIO_UTILS_H_ +#define _IIO_UTILS_H_ + +/* IIO - useful set of util functionality + * + * Copyright (c) 2008 Jonathan Cameron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <stdint.h> + +/* Made up value to limit allocation sizes */ +#define IIO_MAX_NAME_LENGTH 30 + +#define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements" +#define FORMAT_TYPE_FILE "%s_type" + +extern const char *iio_dir; + +/** + * struct iio_channel_info - information about a given channel + * @name: channel name + * @generic_name: general name for channel type + * @scale: scale factor to be applied for conversion to si units + * @offset: offset to be applied for conversion to si units + * @index: the channel index in the buffer output + * @bytes: number of bytes occupied in buffer output + * @mask: a bit mask for the raw output + * @is_signed: is the raw value stored signed + * @enabled: is this channel enabled + **/ +struct iio_channel_info { + char *name; + char *generic_name; + float scale; + float offset; + unsigned index; + unsigned bytes; + unsigned bits_used; + unsigned shift; + uint64_t mask; + unsigned be; + unsigned is_signed; + unsigned location; +}; + +int iioutils_break_up_name(const char *full_name, char **generic_name); +int iioutils_get_type(unsigned *is_signed, unsigned *bytes, + unsigned *bits_used, unsigned *shift, + uint64_t *mask, unsigned *be, + const char *device_dir, const char *name, + const char *generic_name); +int iioutils_get_param_float(float *output, const char *param_name, + const char *device_dir, const char *name, + const char *generic_name); +void bsort_channel_array_by_index(struct iio_channel_info **ci_array, int cnt); +int build_channel_array(const char *device_dir, + struct iio_channel_info **ci_array, int *counter); +int find_type_by_name(const char *name, const char *type); +int write_sysfs_int(char *filename, char *basedir, int val); +int write_sysfs_int_and_verify(char *filename, char *basedir, int val); +int write_sysfs_string_and_verify(char *filename, char *basedir, char *val); +int write_sysfs_string(char *filename, char *basedir, char *val); +int read_sysfs_posint(char *filename, char *basedir); +int read_sysfs_float(char *filename, char *basedir, float *val); +int read_sysfs_string(const char *filename, const char *basedir, char *str); + +#endif /* _IIO_UTILS_H_ */ diff --git a/tools/iio/lsiio.c b/tools/iio/lsiio.c new file mode 100644 index 000000000000..c585440f864e --- /dev/null +++ b/tools/iio/lsiio.c @@ -0,0 +1,158 @@ +/* + * Industrial I/O utilities - lsiio.c + * + * Copyright (c) 2010 Manuel Stahl <manuel.stahl@iis.fraunhofer.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <string.h> +#include <dirent.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/dir.h> +#include "iio_utils.h" + + +static enum verbosity { + VERBLEVEL_DEFAULT, /* 0 gives lspci behaviour */ + VERBLEVEL_SENSORS, /* 1 lists sensors */ +} verblevel = VERBLEVEL_DEFAULT; + +const char *type_device = "iio:device"; +const char *type_trigger = "trigger"; + + +static inline int check_prefix(const char *str, const char *prefix) +{ + return strlen(str) > strlen(prefix) && + strncmp(str, prefix, strlen(prefix)) == 0; +} + +static inline int check_postfix(const char *str, const char *postfix) +{ + return strlen(str) > strlen(postfix) && + strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; +} + +static int dump_channels(const char *dev_dir_name) +{ + DIR *dp; + const struct dirent *ent; + + dp = opendir(dev_dir_name); + if (dp == NULL) + return -errno; + while (ent = readdir(dp), ent != NULL) + if (check_prefix(ent->d_name, "in_") && + check_postfix(ent->d_name, "_raw")) { + printf(" %-10s\n", ent->d_name); + } + + return 0; +} + +static int dump_one_device(const char *dev_dir_name) +{ + char name[IIO_MAX_NAME_LENGTH]; + int dev_idx; + int retval; + + retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_device), + "%i", &dev_idx); + if (retval != 1) + return -EINVAL; + read_sysfs_string("name", dev_dir_name, name); + printf("Device %03d: %s\n", dev_idx, name); + + if (verblevel >= VERBLEVEL_SENSORS) + return dump_channels(dev_dir_name); + return 0; +} + +static int dump_one_trigger(const char *dev_dir_name) +{ + char name[IIO_MAX_NAME_LENGTH]; + int dev_idx; + int retval; + + retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_trigger), + "%i", &dev_idx); + if (retval != 1) + return -EINVAL; + read_sysfs_string("name", dev_dir_name, name); + printf("Trigger %03d: %s\n", dev_idx, name); + return 0; +} + +static void dump_devices(void) +{ + const struct dirent *ent; + DIR *dp; + + dp = opendir(iio_dir); + if (dp == NULL) { + printf("No industrial I/O devices available\n"); + return; + } + + while (ent = readdir(dp), ent != NULL) { + if (check_prefix(ent->d_name, type_device)) { + char *dev_dir_name; + + asprintf(&dev_dir_name, "%s%s", iio_dir, ent->d_name); + dump_one_device(dev_dir_name); + free(dev_dir_name); + if (verblevel >= VERBLEVEL_SENSORS) + printf("\n"); + } + } + rewinddir(dp); + while (ent = readdir(dp), ent != NULL) { + if (check_prefix(ent->d_name, type_trigger)) { + char *dev_dir_name; + + asprintf(&dev_dir_name, "%s%s", iio_dir, ent->d_name); + dump_one_trigger(dev_dir_name); + free(dev_dir_name); + } + } + closedir(dp); +} + +int main(int argc, char **argv) +{ + int c, err = 0; + + while ((c = getopt(argc, argv, "d:D:v")) != EOF) { + switch (c) { + case 'v': + verblevel++; + break; + + case '?': + default: + err++; + break; + } + } + if (err || argc > optind) { + fprintf(stderr, "Usage: lsiio [options]...\n" + "List industrial I/O devices\n" + " -v, --verbose\n" + " Increase verbosity (may be given multiple times)\n" + ); + exit(1); + } + + dump_devices(); + + return 0; +} diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h index 6eedba1f7732..653d1bad77de 100644 --- a/tools/include/asm-generic/bitops.h +++ b/tools/include/asm-generic/bitops.h @@ -22,6 +22,8 @@ #error only <linux/bitops.h> can be included directly #endif +#include <asm-generic/bitops/hweight.h> + #include <asm-generic/bitops/atomic.h> #endif /* __TOOLS_ASM_GENERIC_BITOPS_H */ diff --git a/tools/include/asm-generic/bitops/arch_hweight.h b/tools/include/asm-generic/bitops/arch_hweight.h new file mode 100644 index 000000000000..318bb2b202b0 --- /dev/null +++ b/tools/include/asm-generic/bitops/arch_hweight.h @@ -0,0 +1 @@ +#include "../../../../include/asm-generic/bitops/arch_hweight.h" diff --git a/tools/include/asm-generic/bitops/const_hweight.h b/tools/include/asm-generic/bitops/const_hweight.h new file mode 100644 index 000000000000..0afd644aff83 --- /dev/null +++ b/tools/include/asm-generic/bitops/const_hweight.h @@ -0,0 +1 @@ +#include "../../../../include/asm-generic/bitops/const_hweight.h" diff --git a/tools/include/asm-generic/bitops/hweight.h b/tools/include/asm-generic/bitops/hweight.h new file mode 100644 index 000000000000..290120c01a8e --- /dev/null +++ b/tools/include/asm-generic/bitops/hweight.h @@ -0,0 +1,7 @@ +#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_HWEIGHT_H_ +#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_HWEIGHT_H_ + +#include <asm-generic/bitops/arch_hweight.h> +#include <asm-generic/bitops/const_hweight.h> + +#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_HWEIGHT_H_ */ diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 26005a15e7e2..5ad9ee1dd7f6 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -1,9 +1,9 @@ #ifndef _TOOLS_LINUX_BITOPS_H_ #define _TOOLS_LINUX_BITOPS_H_ +#include <asm/types.h> #include <linux/kernel.h> #include <linux/compiler.h> -#include <asm/hweight.h> #ifndef __WORDSIZE #define __WORDSIZE (__SIZEOF_LONG__ * 8) @@ -19,6 +19,11 @@ #define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) #define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) +extern unsigned int __sw_hweight8(unsigned int w); +extern unsigned int __sw_hweight16(unsigned int w); +extern unsigned int __sw_hweight32(unsigned int w); +extern unsigned long __sw_hweight64(__u64 w); + /* * Include this here because some architectures need generic_ffs/fls in * scope diff --git a/tools/lguest/Makefile b/tools/lguest/Makefile index 97bca4871ea3..a107b5e4da13 100644 --- a/tools/lguest/Makefile +++ b/tools/lguest/Makefile @@ -1,7 +1,13 @@ # This creates the demonstration utility "lguest" which runs a Linux guest. -CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE +CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE -Iinclude all: lguest +include/linux/virtio_types.h: ../../include/uapi/linux/virtio_types.h + mkdir -p include/linux 2>&1 || true + ln -sf ../../../../include/uapi/linux/virtio_types.h $@ + +lguest: include/linux/virtio_types.h + clean: rm -f lguest diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 32cf2ce15d69..e44052483ed9 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -41,6 +41,8 @@ #include <signal.h> #include <pwd.h> #include <grp.h> +#include <sys/user.h> +#include <linux/pci_regs.h> #ifndef VIRTIO_F_ANY_LAYOUT #define VIRTIO_F_ANY_LAYOUT 27 @@ -61,12 +63,19 @@ typedef uint16_t u16; typedef uint8_t u8; /*:*/ -#include <linux/virtio_config.h> -#include <linux/virtio_net.h> -#include <linux/virtio_blk.h> -#include <linux/virtio_console.h> -#include <linux/virtio_rng.h> +#define VIRTIO_CONFIG_NO_LEGACY +#define VIRTIO_PCI_NO_LEGACY +#define VIRTIO_BLK_NO_LEGACY +#define VIRTIO_NET_NO_LEGACY + +/* Use in-kernel ones, which defines VIRTIO_F_VERSION_1 */ +#include "../../include/uapi/linux/virtio_config.h" +#include "../../include/uapi/linux/virtio_net.h" +#include "../../include/uapi/linux/virtio_blk.h" +#include "../../include/uapi/linux/virtio_console.h" +#include "../../include/uapi/linux/virtio_rng.h" #include <linux/virtio_ring.h> +#include "../../include/uapi/linux/virtio_pci.h" #include <asm/bootparam.h> #include "../../include/linux/lguest_launcher.h" @@ -91,13 +100,16 @@ static bool verbose; /* The pointer to the start of guest memory. */ static void *guest_base; /* The maximum guest physical address allowed, and maximum possible. */ -static unsigned long guest_limit, guest_max; +static unsigned long guest_limit, guest_max, guest_mmio; /* The /dev/lguest file descriptor. */ static int lguest_fd; /* a per-cpu variable indicating whose vcpu is currently running */ static unsigned int __thread cpu_id; +/* 5 bit device number in the PCI_CONFIG_ADDR => 32 only */ +#define MAX_PCI_DEVICES 32 + /* This is our list of devices. */ struct device_list { /* Counter to assign interrupt numbers. */ @@ -106,30 +118,50 @@ struct device_list { /* Counter to print out convenient device numbers. */ unsigned int device_num; - /* The descriptor page for the devices. */ - u8 *descpage; - - /* A single linked list of devices. */ - struct device *dev; - /* And a pointer to the last device for easy append. */ - struct device *lastdev; + /* PCI devices. */ + struct device *pci[MAX_PCI_DEVICES]; }; /* The list of Guest devices, based on command line arguments. */ static struct device_list devices; -/* The device structure describes a single device. */ -struct device { - /* The linked-list pointer. */ - struct device *next; +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + u32 pci_cfg_data; /* Data for BAR access. */ +}; - /* The device's descriptor, as mapped into the Guest. */ - struct lguest_device_desc *desc; +struct virtio_pci_mmio { + struct virtio_pci_common_cfg cfg; + u16 notify; + u8 isr; + u8 padding; + /* Device-specific configuration follows this. */ +}; - /* We can't trust desc values once Guest has booted: we use these. */ - unsigned int feature_len; - unsigned int num_vq; +/* This is the layout (little-endian) of the PCI config space. */ +struct pci_config { + u16 vendor_id, device_id; + u16 command, status; + u8 revid, prog_if, subclass, class; + u8 cacheline_size, lat_timer, header_type, bist; + u32 bar[6]; + u32 cardbus_cis_ptr; + u16 subsystem_vendor_id, subsystem_device_id; + u32 expansion_rom_addr; + u8 capabilities, reserved1[3]; + u32 reserved2; + u8 irq_line, irq_pin, min_grant, max_latency; + + /* Now, this is the linked capability list. */ + struct virtio_pci_cap common; + struct virtio_pci_notify_cap notify; + struct virtio_pci_cap isr; + struct virtio_pci_cap device; + struct virtio_pci_cfg_cap cfg_access; +}; +/* The device structure describes a single device. */ +struct device { /* The name of this device, for --verbose. */ const char *name; @@ -139,6 +171,25 @@ struct device { /* Is it operational */ bool running; + /* Has it written FEATURES_OK but not re-checked it? */ + bool wrote_features_ok; + + /* PCI configuration */ + union { + struct pci_config config; + u32 config_words[sizeof(struct pci_config) / sizeof(u32)]; + }; + + /* Features we offer, and those accepted. */ + u64 features, features_accepted; + + /* Device-specific config hangs off the end of this. */ + struct virtio_pci_mmio *mmio; + + /* PCI MMIO resources (all in BAR0) */ + size_t mmio_size; + u32 mmio_addr; + /* Device-specific data. */ void *priv; }; @@ -150,12 +201,15 @@ struct virtqueue { /* Which device owns me. */ struct device *dev; - /* The configuration for this queue. */ - struct lguest_vqconfig config; + /* Name for printing errors. */ + const char *name; /* The actual ring of buffers. */ struct vring vring; + /* The information about this virtqueue (we only use queue_size on) */ + struct virtio_pci_common_cfg pci_config; + /* Last available index we saw. */ u16 last_avail_idx; @@ -199,6 +253,16 @@ static struct termios orig_term; #define le32_to_cpu(v32) (v32) #define le64_to_cpu(v64) (v64) +/* + * A real device would ignore weird/non-compliant driver behaviour. We + * stop and flag it, to help debugging Linux problems. + */ +#define bad_driver(d, fmt, ...) \ + errx(1, "%s: bad driver: " fmt, (d)->name, ## __VA_ARGS__) +#define bad_driver_vq(vq, fmt, ...) \ + errx(1, "%s vq %s: bad driver: " fmt, (vq)->dev->name, \ + vq->name, ## __VA_ARGS__) + /* Is this iovec empty? */ static bool iov_empty(const struct iovec iov[], unsigned int num_iov) { @@ -211,7 +275,8 @@ static bool iov_empty(const struct iovec iov[], unsigned int num_iov) } /* Take len bytes from the front of this iovec. */ -static void iov_consume(struct iovec iov[], unsigned num_iov, +static void iov_consume(struct device *d, + struct iovec iov[], unsigned num_iov, void *dest, unsigned len) { unsigned int i; @@ -229,14 +294,7 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, len -= used; } if (len != 0) - errx(1, "iovec too short!"); -} - -/* The device virtqueue descriptors are followed by feature bitmasks. */ -static u8 *get_feature_bits(struct device *dev) -{ - return (u8 *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig); + bad_driver(d, "iovec too short!"); } /*L:100 @@ -309,14 +367,20 @@ static void *map_zeroed_pages(unsigned int num) return addr + getpagesize(); } -/* Get some more pages for a device. */ -static void *get_pages(unsigned int num) +/* Get some bytes which won't be mapped into the guest. */ +static unsigned long get_mmio_region(size_t size) { - void *addr = from_guest_phys(guest_limit); + unsigned long addr = guest_mmio; + size_t i; + + if (!size) + return addr; + + /* Size has to be a power of 2 (and multiple of 16) */ + for (i = 1; i < size; i <<= 1); + + guest_mmio += i; - guest_limit += num * getpagesize(); - if (guest_limit > guest_max) - errx(1, "Not enough memory for devices"); return addr; } @@ -547,9 +611,11 @@ static void tell_kernel(unsigned long start) { unsigned long args[] = { LHREQ_INITIALIZE, (unsigned long)guest_base, - guest_limit / getpagesize(), start }; - verbose("Guest: %p - %p (%#lx)\n", - guest_base, guest_base + guest_limit, guest_limit); + guest_limit / getpagesize(), start, + (guest_mmio+getpagesize()-1) / getpagesize() }; + verbose("Guest: %p - %p (%#lx, MMIO %#lx)\n", + guest_base, guest_base + guest_limit, + guest_limit, guest_mmio); lguest_fd = open_or_die("/dev/lguest", O_RDWR); if (write(lguest_fd, args, sizeof(args)) < 0) err(1, "Writing to /dev/lguest"); @@ -564,7 +630,8 @@ static void tell_kernel(unsigned long start) * we have a convenient routine which checks it and exits with an error message * if something funny is going on: */ -static void *_check_pointer(unsigned long addr, unsigned int size, +static void *_check_pointer(struct device *d, + unsigned long addr, unsigned int size, unsigned int line) { /* @@ -572,7 +639,8 @@ static void *_check_pointer(unsigned long addr, unsigned int size, * or addr + size wraps around. */ if ((addr + size) > guest_limit || (addr + size) < addr) - errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr); + bad_driver(d, "%s:%i: Invalid address %#lx", + __FILE__, line, addr); /* * We return a pointer for the caller's convenience, now we know it's * safe to use. @@ -580,14 +648,14 @@ static void *_check_pointer(unsigned long addr, unsigned int size, return from_guest_phys(addr); } /* A macro which transparently hands the line number to the real function. */ -#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__) +#define check_pointer(d,addr,size) _check_pointer(d, addr, size, __LINE__) /* * Each buffer in the virtqueues is actually a chain of descriptors. This * function returns the next descriptor in the chain, or vq->vring.num if we're * at the end. */ -static unsigned next_desc(struct vring_desc *desc, +static unsigned next_desc(struct device *d, struct vring_desc *desc, unsigned int i, unsigned int max) { unsigned int next; @@ -602,7 +670,7 @@ static unsigned next_desc(struct vring_desc *desc, wmb(); if (next >= max) - errx(1, "Desc next is %u", next); + bad_driver(d, "Desc next is %u", next); return next; } @@ -613,21 +681,48 @@ static unsigned next_desc(struct vring_desc *desc, */ static void trigger_irq(struct virtqueue *vq) { - unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; + unsigned long buf[] = { LHREQ_IRQ, vq->dev->config.irq_line }; /* Don't inform them if nothing used. */ if (!vq->pending_used) return; vq->pending_used = 0; - /* If they don't want an interrupt, don't send one... */ + /* + * 2.4.7.1: + * + * If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: + * The driver MUST set flags to 0 or 1. + */ + if (vq->vring.avail->flags > 1) + bad_driver_vq(vq, "avail->flags = %u\n", vq->vring.avail->flags); + + /* + * 2.4.7.2: + * + * If the VIRTIO_F_EVENT_IDX feature bit is not negotiated: + * + * - The device MUST ignore the used_event value. + * - After the device writes a descriptor index into the used ring: + * - If flags is 1, the device SHOULD NOT send an interrupt. + * - If flags is 0, the device MUST send an interrupt. + */ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { return; } + /* + * 4.1.4.5.1: + * + * If MSI-X capability is disabled, the device MUST set the Queue + * Interrupt bit in ISR status before sending a virtqueue notification + * to the driver. + */ + vq->dev->mmio->isr = 0x1; + /* Send the Guest an interrupt tell them we used something up. */ if (write(lguest_fd, buf, sizeof(buf)) != 0) - err(1, "Triggering irq %i", vq->config.irq); + err(1, "Triggering irq %i", vq->dev->config.irq_line); } /* @@ -646,6 +741,14 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, struct vring_desc *desc; u16 last_avail = lg_last_avail(vq); + /* + * 2.4.7.1: + * + * The driver MUST handle spurious interrupts from the device. + * + * That's why this is a while loop. + */ + /* There's nothing available? */ while (last_avail == vq->vring.avail->idx) { u64 event; @@ -679,8 +782,8 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, /* Check it isn't doing very strange things with descriptor numbers. */ if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num) - errx(1, "Guest moved used index from %u to %u", - last_avail, vq->vring.avail->idx); + bad_driver_vq(vq, "Guest moved used index from %u to %u", + last_avail, vq->vring.avail->idx); /* * Make sure we read the descriptor number *after* we read the ring @@ -697,7 +800,7 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, /* If their number is silly, that's a fatal mistake. */ if (head >= vq->vring.num) - errx(1, "Guest says index %u is available", head); + bad_driver_vq(vq, "Guest says index %u is available", head); /* When we start there are none of either input nor output. */ *out_num = *in_num = 0; @@ -712,24 +815,73 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, * that: no rmb() required. */ - /* - * If this is an indirect entry, then this buffer contains a descriptor - * table which we handle as if it's any normal descriptor chain. - */ - if (desc[i].flags & VRING_DESC_F_INDIRECT) { - if (desc[i].len % sizeof(struct vring_desc)) - errx(1, "Invalid size for indirect buffer table"); + do { + /* + * If this is an indirect entry, then this buffer contains a + * descriptor table which we handle as if it's any normal + * descriptor chain. + */ + if (desc[i].flags & VRING_DESC_F_INDIRECT) { + /* 2.4.5.3.1: + * + * The driver MUST NOT set the VIRTQ_DESC_F_INDIRECT + * flag unless the VIRTIO_F_INDIRECT_DESC feature was + * negotiated. + */ + if (!(vq->dev->features_accepted & + (1<<VIRTIO_RING_F_INDIRECT_DESC))) + bad_driver_vq(vq, "vq indirect not negotiated"); - max = desc[i].len / sizeof(struct vring_desc); - desc = check_pointer(desc[i].addr, desc[i].len); - i = 0; - } + /* + * 2.4.5.3.1: + * + * The driver MUST NOT set the VIRTQ_DESC_F_INDIRECT + * flag within an indirect descriptor (ie. only one + * table per descriptor). + */ + if (desc != vq->vring.desc) + bad_driver_vq(vq, "Indirect within indirect"); + + /* + * Proposed update VIRTIO-134 spells this out: + * + * A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT + * and VIRTQ_DESC_F_NEXT in flags. + */ + if (desc[i].flags & VRING_DESC_F_NEXT) + bad_driver_vq(vq, "indirect and next together"); + + if (desc[i].len % sizeof(struct vring_desc)) + bad_driver_vq(vq, + "Invalid size for indirect table"); + /* + * 2.4.5.3.2: + * + * The device MUST ignore the write-only flag + * (flags&VIRTQ_DESC_F_WRITE) in the descriptor that + * refers to an indirect table. + * + * We ignore it here: :) + */ + + max = desc[i].len / sizeof(struct vring_desc); + desc = check_pointer(vq->dev, desc[i].addr, desc[i].len); + i = 0; + + /* 2.4.5.3.1: + * + * A driver MUST NOT create a descriptor chain longer + * than the Queue Size of the device. + */ + if (max > vq->pci_config.queue_size) + bad_driver_vq(vq, + "indirect has too many entries"); + } - do { /* Grab the first descriptor, and check it's OK. */ iov[*out_num + *in_num].iov_len = desc[i].len; iov[*out_num + *in_num].iov_base - = check_pointer(desc[i].addr, desc[i].len); + = check_pointer(vq->dev, desc[i].addr, desc[i].len); /* If this is an input descriptor, increment that count. */ if (desc[i].flags & VRING_DESC_F_WRITE) (*in_num)++; @@ -739,14 +891,15 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq, * to come before any input descriptors. */ if (*in_num) - errx(1, "Descriptor has out after in"); + bad_driver_vq(vq, + "Descriptor has out after in"); (*out_num)++; } /* If we've got too many, that implies a descriptor loop. */ if (*out_num + *in_num > max) - errx(1, "Looped descriptor"); - } while ((i = next_desc(desc, i, max)) != max); + bad_driver_vq(vq, "Looped descriptor"); + } while ((i = next_desc(vq->dev, desc, i, max)) != max); return head; } @@ -803,7 +956,7 @@ static void console_input(struct virtqueue *vq) /* Make sure there's a descriptor available. */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); if (out_num) - errx(1, "Output buffers in console in queue?"); + bad_driver_vq(vq, "Output buffers in console in queue?"); /* Read into it. This is where we usually wait. */ len = readv(STDIN_FILENO, iov, in_num); @@ -856,7 +1009,7 @@ static void console_output(struct virtqueue *vq) /* We usually wait in here, for the Guest to give us something. */ head = wait_for_vq_desc(vq, iov, &out, &in); if (in) - errx(1, "Input buffers in console output queue?"); + bad_driver_vq(vq, "Input buffers in console output queue?"); /* writev can return a partial write, so we loop here. */ while (!iov_empty(iov, out)) { @@ -865,7 +1018,7 @@ static void console_output(struct virtqueue *vq) warn("Write to stdout gave %i (%d)", len, errno); break; } - iov_consume(iov, out, NULL, len); + iov_consume(vq->dev, iov, out, NULL, len); } /* @@ -894,7 +1047,7 @@ static void net_output(struct virtqueue *vq) /* We usually wait in here for the Guest to give us a packet. */ head = wait_for_vq_desc(vq, iov, &out, &in); if (in) - errx(1, "Input buffers in net output queue?"); + bad_driver_vq(vq, "Input buffers in net output queue?"); /* * Send the whole thing through to /dev/net/tun. It expects the exact * same format: what a coincidence! @@ -942,7 +1095,7 @@ static void net_input(struct virtqueue *vq) */ head = wait_for_vq_desc(vq, iov, &out, &in); if (out) - errx(1, "Output buffers in net input queue?"); + bad_driver_vq(vq, "Output buffers in net input queue?"); /* * If it looks like we'll block reading from the tun device, send them @@ -986,6 +1139,12 @@ static void kill_launcher(int signal) kill(0, SIGTERM); } +static void reset_vq_pci_config(struct virtqueue *vq) +{ + vq->pci_config.queue_size = VIRTQUEUE_NUM; + vq->pci_config.queue_enable = 0; +} + static void reset_device(struct device *dev) { struct virtqueue *vq; @@ -993,53 +1152,705 @@ static void reset_device(struct device *dev) verbose("Resetting device %s\n", dev->name); /* Clear any features they've acked. */ - memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); + dev->features_accepted = 0; /* We're going to be explicitly killing threads, so ignore them. */ signal(SIGCHLD, SIG_IGN); - /* Zero out the virtqueues, get rid of their threads */ + /* + * 4.1.4.3.1: + * + * The device MUST present a 0 in queue_enable on reset. + * + * This means we set it here, and reset the saved ones in every vq. + */ + dev->mmio->cfg.queue_enable = 0; + + /* Get rid of the virtqueue threads */ for (vq = dev->vq; vq; vq = vq->next) { + vq->last_avail_idx = 0; + reset_vq_pci_config(vq); if (vq->thread != (pid_t)-1) { kill(vq->thread, SIGTERM); waitpid(vq->thread, NULL, 0); vq->thread = (pid_t)-1; } - memset(vq->vring.desc, 0, - vring_size(vq->config.num, LGUEST_VRING_ALIGN)); - lg_last_avail(vq) = 0; } dev->running = false; + dev->wrote_features_ok = false; /* Now we care if threads die. */ signal(SIGCHLD, (void *)kill_launcher); } +static void cleanup_devices(void) +{ + unsigned int i; + + for (i = 1; i < MAX_PCI_DEVICES; i++) { + struct device *d = devices.pci[i]; + if (!d) + continue; + reset_device(d); + } + + /* If we saved off the original terminal settings, restore them now. */ + if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) + tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); +} + +/*L:217 + * We do PCI. This is mainly done to let us test the kernel virtio PCI + * code. + */ + +/* Linux expects a PCI host bridge: ours is a dummy, and first on the bus. */ +static struct device pci_host_bridge; + +static void init_pci_host_bridge(void) +{ + pci_host_bridge.name = "PCI Host Bridge"; + pci_host_bridge.config.class = 0x06; /* bridge */ + pci_host_bridge.config.subclass = 0; /* host bridge */ + devices.pci[0] = &pci_host_bridge; +} + +/* The IO ports used to read the PCI config space. */ +#define PCI_CONFIG_ADDR 0xCF8 +#define PCI_CONFIG_DATA 0xCFC + +/* + * Not really portable, but does help readability: this is what the Guest + * writes to the PCI_CONFIG_ADDR IO port. + */ +union pci_config_addr { + struct { + unsigned mbz: 2; + unsigned offset: 6; + unsigned funcnum: 3; + unsigned devnum: 5; + unsigned busnum: 8; + unsigned reserved: 7; + unsigned enabled : 1; + } bits; + u32 val; +}; + +/* + * We cache what they wrote to the address port, so we know what they're + * talking about when they access the data port. + */ +static union pci_config_addr pci_config_addr; + +static struct device *find_pci_device(unsigned int index) +{ + return devices.pci[index]; +} + +/* PCI can do 1, 2 and 4 byte reads; we handle that here. */ +static void ioread(u16 off, u32 v, u32 mask, u32 *val) +{ + assert(off < 4); + assert(mask == 0xFF || mask == 0xFFFF || mask == 0xFFFFFFFF); + *val = (v >> (off * 8)) & mask; +} + +/* PCI can do 1, 2 and 4 byte writes; we handle that here. */ +static void iowrite(u16 off, u32 v, u32 mask, u32 *dst) +{ + assert(off < 4); + assert(mask == 0xFF || mask == 0xFFFF || mask == 0xFFFFFFFF); + *dst &= ~(mask << (off * 8)); + *dst |= (v & mask) << (off * 8); +} + +/* + * Where PCI_CONFIG_DATA accesses depends on the previous write to + * PCI_CONFIG_ADDR. + */ +static struct device *dev_and_reg(u32 *reg) +{ + if (!pci_config_addr.bits.enabled) + return NULL; + + if (pci_config_addr.bits.funcnum != 0) + return NULL; + + if (pci_config_addr.bits.busnum != 0) + return NULL; + + if (pci_config_addr.bits.offset * 4 >= sizeof(struct pci_config)) + return NULL; + + *reg = pci_config_addr.bits.offset; + return find_pci_device(pci_config_addr.bits.devnum); +} + +/* + * We can get invalid combinations of values while they're writing, so we + * only fault if they try to write with some invalid bar/offset/length. + */ +static bool valid_bar_access(struct device *d, + struct virtio_pci_cfg_cap *cfg_access) +{ + /* We only have 1 bar (BAR0) */ + if (cfg_access->cap.bar != 0) + return false; + + /* Check it's within BAR0. */ + if (cfg_access->cap.offset >= d->mmio_size + || cfg_access->cap.offset + cfg_access->cap.length > d->mmio_size) + return false; + + /* Check length is 1, 2 or 4. */ + if (cfg_access->cap.length != 1 + && cfg_access->cap.length != 2 + && cfg_access->cap.length != 4) + return false; + + /* + * 4.1.4.7.2: + * + * The driver MUST NOT write a cap.offset which is not a multiple of + * cap.length (ie. all accesses MUST be aligned). + */ + if (cfg_access->cap.offset % cfg_access->cap.length != 0) + return false; + + /* Return pointer into word in BAR0. */ + return true; +} + +/* Is this accessing the PCI config address port?. */ +static bool is_pci_addr_port(u16 port) +{ + return port >= PCI_CONFIG_ADDR && port < PCI_CONFIG_ADDR + 4; +} + +static bool pci_addr_iowrite(u16 port, u32 mask, u32 val) +{ + iowrite(port - PCI_CONFIG_ADDR, val, mask, + &pci_config_addr.val); + verbose("PCI%s: %#x/%x: bus %u dev %u func %u reg %u\n", + pci_config_addr.bits.enabled ? "" : " DISABLED", + val, mask, + pci_config_addr.bits.busnum, + pci_config_addr.bits.devnum, + pci_config_addr.bits.funcnum, + pci_config_addr.bits.offset); + return true; +} + +static void pci_addr_ioread(u16 port, u32 mask, u32 *val) +{ + ioread(port - PCI_CONFIG_ADDR, pci_config_addr.val, mask, val); +} + +/* Is this accessing the PCI config data port?. */ +static bool is_pci_data_port(u16 port) +{ + return port >= PCI_CONFIG_DATA && port < PCI_CONFIG_DATA + 4; +} + +static void emulate_mmio_write(struct device *d, u32 off, u32 val, u32 mask); + +static bool pci_data_iowrite(u16 port, u32 mask, u32 val) +{ + u32 reg, portoff; + struct device *d = dev_and_reg(®); + + /* Complain if they don't belong to a device. */ + if (!d) + return false; + + /* They can do 1 byte writes, etc. */ + portoff = port - PCI_CONFIG_DATA; + + /* + * PCI uses a weird way to determine the BAR size: the OS + * writes all 1's, and sees which ones stick. + */ + if (&d->config_words[reg] == &d->config.bar[0]) { + int i; + + iowrite(portoff, val, mask, &d->config.bar[0]); + for (i = 0; (1 << i) < d->mmio_size; i++) + d->config.bar[0] &= ~(1 << i); + return true; + } else if ((&d->config_words[reg] > &d->config.bar[0] + && &d->config_words[reg] <= &d->config.bar[6]) + || &d->config_words[reg] == &d->config.expansion_rom_addr) { + /* Allow writing to any other BAR, or expansion ROM */ + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + /* We let them overide latency timer and cacheline size */ + } else if (&d->config_words[reg] == (void *)&d->config.cacheline_size) { + /* Only let them change the first two fields. */ + if (mask == 0xFFFFFFFF) + mask = 0xFFFF; + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + } else if (&d->config_words[reg] == (void *)&d->config.command + && mask == 0xFFFF) { + /* Ignore command writes. */ + return true; + } else if (&d->config_words[reg] + == (void *)&d->config.cfg_access.cap.bar + || &d->config_words[reg] + == &d->config.cfg_access.cap.length + || &d->config_words[reg] + == &d->config.cfg_access.cap.offset) { + + /* + * The VIRTIO_PCI_CAP_PCI_CFG capability + * provides a backdoor to access the MMIO + * regions without mapping them. Weird, but + * useful. + */ + iowrite(portoff, val, mask, &d->config_words[reg]); + return true; + } else if (&d->config_words[reg] == &d->config.cfg_access.pci_cfg_data) { + u32 write_mask; + + /* + * 4.1.4.7.1: + * + * Upon detecting driver write access to pci_cfg_data, the + * device MUST execute a write access at offset cap.offset at + * BAR selected by cap.bar using the first cap.length bytes + * from pci_cfg_data. + */ + + /* Must be bar 0 */ + if (!valid_bar_access(d, &d->config.cfg_access)) + return false; + + iowrite(portoff, val, mask, &d->config.cfg_access.pci_cfg_data); + + /* + * Now emulate a write. The mask we use is set by + * len, *not* this write! + */ + write_mask = (1ULL<<(8*d->config.cfg_access.cap.length)) - 1; + verbose("Window writing %#x/%#x to bar %u, offset %u len %u\n", + d->config.cfg_access.pci_cfg_data, write_mask, + d->config.cfg_access.cap.bar, + d->config.cfg_access.cap.offset, + d->config.cfg_access.cap.length); + + emulate_mmio_write(d, d->config.cfg_access.cap.offset, + d->config.cfg_access.pci_cfg_data, + write_mask); + return true; + } + + /* + * 4.1.4.1: + * + * The driver MUST NOT write into any field of the capability + * structure, with the exception of those with cap_type + * VIRTIO_PCI_CAP_PCI_CFG... + */ + return false; +} + +static u32 emulate_mmio_read(struct device *d, u32 off, u32 mask); + +static void pci_data_ioread(u16 port, u32 mask, u32 *val) +{ + u32 reg; + struct device *d = dev_and_reg(®); + + if (!d) + return; + + /* Read through the PCI MMIO access window is special */ + if (&d->config_words[reg] == &d->config.cfg_access.pci_cfg_data) { + u32 read_mask; + + /* + * 4.1.4.7.1: + * + * Upon detecting driver read access to pci_cfg_data, the + * device MUST execute a read access of length cap.length at + * offset cap.offset at BAR selected by cap.bar and store the + * first cap.length bytes in pci_cfg_data. + */ + /* Must be bar 0 */ + if (!valid_bar_access(d, &d->config.cfg_access)) + bad_driver(d, + "Invalid cfg_access to bar%u, offset %u len %u", + d->config.cfg_access.cap.bar, + d->config.cfg_access.cap.offset, + d->config.cfg_access.cap.length); + + /* + * Read into the window. The mask we use is set by + * len, *not* this read! + */ + read_mask = (1ULL<<(8*d->config.cfg_access.cap.length))-1; + d->config.cfg_access.pci_cfg_data + = emulate_mmio_read(d, + d->config.cfg_access.cap.offset, + read_mask); + verbose("Window read %#x/%#x from bar %u, offset %u len %u\n", + d->config.cfg_access.pci_cfg_data, read_mask, + d->config.cfg_access.cap.bar, + d->config.cfg_access.cap.offset, + d->config.cfg_access.cap.length); + } + ioread(port - PCI_CONFIG_DATA, d->config_words[reg], mask, val); +} + /*L:216 - * This actually creates the thread which services the virtqueue for a device. + * This is where we emulate a handful of Guest instructions. It's ugly + * and we used to do it in the kernel but it grew over time. + */ + +/* + * We use the ptrace syscall's pt_regs struct to talk about registers + * to lguest: these macros convert the names to the offsets. + */ +#define getreg(name) getreg_off(offsetof(struct user_regs_struct, name)) +#define setreg(name, val) \ + setreg_off(offsetof(struct user_regs_struct, name), (val)) + +static u32 getreg_off(size_t offset) +{ + u32 r; + unsigned long args[] = { LHREQ_GETREG, offset }; + + if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0) + err(1, "Getting register %u", offset); + if (pread(lguest_fd, &r, sizeof(r), cpu_id) != sizeof(r)) + err(1, "Reading register %u", offset); + + return r; +} + +static void setreg_off(size_t offset, u32 val) +{ + unsigned long args[] = { LHREQ_SETREG, offset, val }; + + if (pwrite(lguest_fd, args, sizeof(args), cpu_id) < 0) + err(1, "Setting register %u", offset); +} + +/* Get register by instruction encoding */ +static u32 getreg_num(unsigned regnum, u32 mask) +{ + /* 8 bit ops use regnums 4-7 for high parts of word */ + if (mask == 0xFF && (regnum & 0x4)) + return getreg_num(regnum & 0x3, 0xFFFF) >> 8; + + switch (regnum) { + case 0: return getreg(eax) & mask; + case 1: return getreg(ecx) & mask; + case 2: return getreg(edx) & mask; + case 3: return getreg(ebx) & mask; + case 4: return getreg(esp) & mask; + case 5: return getreg(ebp) & mask; + case 6: return getreg(esi) & mask; + case 7: return getreg(edi) & mask; + } + abort(); +} + +/* Set register by instruction encoding */ +static void setreg_num(unsigned regnum, u32 val, u32 mask) +{ + /* Don't try to set bits out of range */ + assert(~(val & ~mask)); + + /* 8 bit ops use regnums 4-7 for high parts of word */ + if (mask == 0xFF && (regnum & 0x4)) { + /* Construct the 16 bits we want. */ + val = (val << 8) | getreg_num(regnum & 0x3, 0xFF); + setreg_num(regnum & 0x3, val, 0xFFFF); + return; + } + + switch (regnum) { + case 0: setreg(eax, val | (getreg(eax) & ~mask)); return; + case 1: setreg(ecx, val | (getreg(ecx) & ~mask)); return; + case 2: setreg(edx, val | (getreg(edx) & ~mask)); return; + case 3: setreg(ebx, val | (getreg(ebx) & ~mask)); return; + case 4: setreg(esp, val | (getreg(esp) & ~mask)); return; + case 5: setreg(ebp, val | (getreg(ebp) & ~mask)); return; + case 6: setreg(esi, val | (getreg(esi) & ~mask)); return; + case 7: setreg(edi, val | (getreg(edi) & ~mask)); return; + } + abort(); +} + +/* Get bytes of displacement appended to instruction, from r/m encoding */ +static u32 insn_displacement_len(u8 mod_reg_rm) +{ + /* Switch on the mod bits */ + switch (mod_reg_rm >> 6) { + case 0: + /* If mod == 0, and r/m == 101, 16-bit displacement follows */ + if ((mod_reg_rm & 0x7) == 0x5) + return 2; + /* Normally, mod == 0 means no literal displacement */ + return 0; + case 1: + /* One byte displacement */ + return 1; + case 2: + /* Four byte displacement */ + return 4; + case 3: + /* Register mode */ + return 0; + } + abort(); +} + +static void emulate_insn(const u8 insn[]) +{ + unsigned long args[] = { LHREQ_TRAP, 13 }; + unsigned int insnlen = 0, in = 0, small_operand = 0, byte_access; + unsigned int eax, port, mask; + /* + * Default is to return all-ones on IO port reads, which traditionally + * means "there's nothing there". + */ + u32 val = 0xFFFFFFFF; + + /* + * This must be the Guest kernel trying to do something, not userspace! + * The bottom two bits of the CS segment register are the privilege + * level. + */ + if ((getreg(xcs) & 3) != 0x1) + goto no_emulate; + + /* Decoding x86 instructions is icky. */ + + /* + * Around 2.6.33, the kernel started using an emulation for the + * cmpxchg8b instruction in early boot on many configurations. This + * code isn't paravirtualized, and it tries to disable interrupts. + * Ignore it, which will Mostly Work. + */ + if (insn[insnlen] == 0xfa) { + /* "cli", or Clear Interrupt Enable instruction. Skip it. */ + insnlen = 1; + goto skip_insn; + } + + /* + * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. + */ + if (insn[insnlen] == 0x66) { + small_operand = 1; + /* The instruction is 1 byte so far, read the next byte. */ + insnlen = 1; + } + + /* If the lower bit isn't set, it's a single byte access */ + byte_access = !(insn[insnlen] & 1); + + /* + * Now we can ignore the lower bit and decode the 4 opcodes + * we need to emulate. + */ + switch (insn[insnlen] & 0xFE) { + case 0xE4: /* in <next byte>,%al */ + port = insn[insnlen+1]; + insnlen += 2; + in = 1; + break; + case 0xEC: /* in (%dx),%al */ + port = getreg(edx) & 0xFFFF; + insnlen += 1; + in = 1; + break; + case 0xE6: /* out %al,<next byte> */ + port = insn[insnlen+1]; + insnlen += 2; + break; + case 0xEE: /* out %al,(%dx) */ + port = getreg(edx) & 0xFFFF; + insnlen += 1; + break; + default: + /* OK, we don't know what this is, can't emulate. */ + goto no_emulate; + } + + /* Set a mask of the 1, 2 or 4 bytes, depending on size of IO */ + if (byte_access) + mask = 0xFF; + else if (small_operand) + mask = 0xFFFF; + else + mask = 0xFFFFFFFF; + + /* + * If it was an "IN" instruction, they expect the result to be read + * into %eax, so we change %eax. + */ + eax = getreg(eax); + + if (in) { + /* This is the PS/2 keyboard status; 1 means ready for output */ + if (port == 0x64) + val = 1; + else if (is_pci_addr_port(port)) + pci_addr_ioread(port, mask, &val); + else if (is_pci_data_port(port)) + pci_data_ioread(port, mask, &val); + + /* Clear the bits we're about to read */ + eax &= ~mask; + /* Copy bits in from val. */ + eax |= val & mask; + /* Now update the register. */ + setreg(eax, eax); + } else { + if (is_pci_addr_port(port)) { + if (!pci_addr_iowrite(port, mask, eax)) + goto bad_io; + } else if (is_pci_data_port(port)) { + if (!pci_data_iowrite(port, mask, eax)) + goto bad_io; + } + /* There are many other ports, eg. CMOS clock, serial + * and parallel ports, so we ignore them all. */ + } + + verbose("IO %s of %x to %u: %#08x\n", + in ? "IN" : "OUT", mask, port, eax); +skip_insn: + /* Finally, we've "done" the instruction, so move past it. */ + setreg(eip, getreg(eip) + insnlen); + return; + +bad_io: + warnx("Attempt to %s port %u (%#x mask)", + in ? "read from" : "write to", port, mask); + +no_emulate: + /* Inject trap into Guest. */ + if (write(lguest_fd, args, sizeof(args)) < 0) + err(1, "Reinjecting trap 13 for fault at %#x", getreg(eip)); +} + +static struct device *find_mmio_region(unsigned long paddr, u32 *off) +{ + unsigned int i; + + for (i = 1; i < MAX_PCI_DEVICES; i++) { + struct device *d = devices.pci[i]; + + if (!d) + continue; + if (paddr < d->mmio_addr) + continue; + if (paddr >= d->mmio_addr + d->mmio_size) + continue; + *off = paddr - d->mmio_addr; + return d; + } + return NULL; +} + +/* FIXME: Use vq array. */ +static struct virtqueue *vq_by_num(struct device *d, u32 num) +{ + struct virtqueue *vq = d->vq; + + while (num-- && vq) + vq = vq->next; + + return vq; +} + +static void save_vq_config(const struct virtio_pci_common_cfg *cfg, + struct virtqueue *vq) +{ + vq->pci_config = *cfg; +} + +static void restore_vq_config(struct virtio_pci_common_cfg *cfg, + struct virtqueue *vq) +{ + /* Only restore the per-vq part */ + size_t off = offsetof(struct virtio_pci_common_cfg, queue_size); + + memcpy((void *)cfg + off, (void *)&vq->pci_config + off, + sizeof(*cfg) - off); +} + +/* + * 4.1.4.3.2: + * + * The driver MUST configure the other virtqueue fields before + * enabling the virtqueue with queue_enable. + * + * When they enable the virtqueue, we check that their setup is valid. */ -static void create_thread(struct virtqueue *vq) +static void check_virtqueue(struct device *d, struct virtqueue *vq) +{ + /* Because lguest is 32 bit, all the descriptor high bits must be 0 */ + if (vq->pci_config.queue_desc_hi + || vq->pci_config.queue_avail_hi + || vq->pci_config.queue_used_hi) + bad_driver_vq(vq, "invalid 64-bit queue address"); + + /* + * 2.4.1: + * + * The driver MUST ensure that the physical address of the first byte + * of each virtqueue part is a multiple of the specified alignment + * value in the above table. + */ + if (vq->pci_config.queue_desc_lo % 16 + || vq->pci_config.queue_avail_lo % 2 + || vq->pci_config.queue_used_lo % 4) + bad_driver_vq(vq, "invalid alignment in queue addresses"); + + /* Initialize the virtqueue and check they're all in range. */ + vq->vring.num = vq->pci_config.queue_size; + vq->vring.desc = check_pointer(vq->dev, + vq->pci_config.queue_desc_lo, + sizeof(*vq->vring.desc) * vq->vring.num); + vq->vring.avail = check_pointer(vq->dev, + vq->pci_config.queue_avail_lo, + sizeof(*vq->vring.avail) + + (sizeof(vq->vring.avail->ring[0]) + * vq->vring.num)); + vq->vring.used = check_pointer(vq->dev, + vq->pci_config.queue_used_lo, + sizeof(*vq->vring.used) + + (sizeof(vq->vring.used->ring[0]) + * vq->vring.num)); + + /* + * 2.4.9.1: + * + * The driver MUST initialize flags in the used ring to 0 + * when allocating the used ring. + */ + if (vq->vring.used->flags != 0) + bad_driver_vq(vq, "invalid initial used.flags %#x", + vq->vring.used->flags); +} + +static void start_virtqueue(struct virtqueue *vq) { /* * Create stack for thread. Since the stack grows upwards, we point * the stack pointer to the end of this region. */ char *stack = malloc(32768); - unsigned long args[] = { LHREQ_EVENTFD, - vq->config.pfn*getpagesize(), 0 }; /* Create a zero-initialized eventfd. */ vq->eventfd = eventfd(0, 0); if (vq->eventfd < 0) err(1, "Creating eventfd"); - args[2] = vq->eventfd; - - /* - * Attach an eventfd to this virtqueue: it will go off when the Guest - * does an LHCALL_NOTIFY for this vq. - */ - if (write(lguest_fd, &args, sizeof(args)) != 0) - err(1, "Attaching eventfd"); /* * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so @@ -1048,167 +1859,531 @@ static void create_thread(struct virtqueue *vq) vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); if (vq->thread == (pid_t)-1) err(1, "Creating clone"); - - /* We close our local copy now the child has it. */ - close(vq->eventfd); } -static void start_device(struct device *dev) +static void start_virtqueues(struct device *d) { - unsigned int i; struct virtqueue *vq; - verbose("Device %s OK: offered", dev->name); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev)[i]); - verbose(", accepted"); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev) - [dev->feature_len+i]); - - for (vq = dev->vq; vq; vq = vq->next) { - if (vq->service) - create_thread(vq); + for (vq = d->vq; vq; vq = vq->next) { + if (vq->pci_config.queue_enable) + start_virtqueue(vq); } - dev->running = true; } -static void cleanup_devices(void) +static void emulate_mmio_write(struct device *d, u32 off, u32 val, u32 mask) { - struct device *dev; + struct virtqueue *vq; - for (dev = devices.dev; dev; dev = dev->next) - reset_device(dev); + switch (off) { + case offsetof(struct virtio_pci_mmio, cfg.device_feature_select): + /* + * 4.1.4.3.1: + * + * The device MUST present the feature bits it is offering in + * device_feature, starting at bit device_feature_select ∗ 32 + * for any device_feature_select written by the driver + */ + if (val == 0) + d->mmio->cfg.device_feature = d->features; + else if (val == 1) + d->mmio->cfg.device_feature = (d->features >> 32); + else + d->mmio->cfg.device_feature = 0; + goto feature_write_through32; + case offsetof(struct virtio_pci_mmio, cfg.guest_feature_select): + if (val > 1) + bad_driver(d, "Unexpected driver select %u", val); + goto feature_write_through32; + case offsetof(struct virtio_pci_mmio, cfg.guest_feature): + if (d->mmio->cfg.guest_feature_select == 0) { + d->features_accepted &= ~((u64)0xFFFFFFFF); + d->features_accepted |= val; + } else { + assert(d->mmio->cfg.guest_feature_select == 1); + d->features_accepted &= 0xFFFFFFFF; + d->features_accepted |= ((u64)val) << 32; + } + /* + * 2.2.1: + * + * The driver MUST NOT accept a feature which the device did + * not offer + */ + if (d->features_accepted & ~d->features) + bad_driver(d, "over-accepted features %#llx of %#llx", + d->features_accepted, d->features); + goto feature_write_through32; + case offsetof(struct virtio_pci_mmio, cfg.device_status): { + u8 prev; + + verbose("%s: device status -> %#x\n", d->name, val); + /* + * 4.1.4.3.1: + * + * The device MUST reset when 0 is written to device_status, + * and present a 0 in device_status once that is done. + */ + if (val == 0) { + reset_device(d); + goto write_through8; + } - /* If we saved off the original terminal settings, restore them now. */ - if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) - tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); -} + /* 2.1.1: The driver MUST NOT clear a device status bit. */ + if (d->mmio->cfg.device_status & ~val) + bad_driver(d, "unset of device status bit %#x -> %#x", + d->mmio->cfg.device_status, val); -/* When the Guest tells us they updated the status field, we handle it. */ -static void update_device_status(struct device *dev) -{ - /* A zero status is a reset, otherwise it's a set of flags. */ - if (dev->desc->status == 0) - reset_device(dev); - else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { - warnx("Device %s configuration FAILED", dev->name); - if (dev->running) - reset_device(dev); - } else { - if (dev->running) - err(1, "Device %s features finalized twice", dev->name); - start_device(dev); + /* + * 2.1.2: + * + * The device MUST NOT consume buffers or notify the driver + * before DRIVER_OK. + */ + if (val & VIRTIO_CONFIG_S_DRIVER_OK + && !(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER_OK)) + start_virtqueues(d); + + /* + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + * - Reset the device. + * - Set the ACKNOWLEDGE status bit: the guest OS has + * notice the device. + * - Set the DRIVER status bit: the guest OS knows how + * to drive the device. + * - Read device feature bits, and write the subset + * of feature bits understood by the OS and driver + * to the device. During this step the driver MAY + * read (but MUST NOT write) the device-specific + * configuration fields to check that it can + * support the device before accepting it. + * - Set the FEATURES_OK status bit. The driver + * MUST not accept new feature bits after this + * step. + * - Re-read device status to ensure the FEATURES_OK + * bit is still set: otherwise, the device does + * not support our subset of features and the + * device is unusable. + * - Perform device-specific setup, including + * discovery of virtqueues for the device, + * optional per-bus setup, reading and possibly + * writing the device’s virtio configuration + * space, and population of virtqueues. + * - Set the DRIVER_OK status bit. At this point the + * device is “live”. + */ + prev = 0; + switch (val & ~d->mmio->cfg.device_status) { + case VIRTIO_CONFIG_S_DRIVER_OK: + prev |= VIRTIO_CONFIG_S_FEATURES_OK; /* fall thru */ + case VIRTIO_CONFIG_S_FEATURES_OK: + prev |= VIRTIO_CONFIG_S_DRIVER; /* fall thru */ + case VIRTIO_CONFIG_S_DRIVER: + prev |= VIRTIO_CONFIG_S_ACKNOWLEDGE; /* fall thru */ + case VIRTIO_CONFIG_S_ACKNOWLEDGE: + break; + default: + bad_driver(d, "unknown device status bit %#x -> %#x", + d->mmio->cfg.device_status, val); + } + if (d->mmio->cfg.device_status != prev) + bad_driver(d, "unexpected status transition %#x -> %#x", + d->mmio->cfg.device_status, val); + + /* If they just wrote FEATURES_OK, we make sure they read */ + switch (val & ~d->mmio->cfg.device_status) { + case VIRTIO_CONFIG_S_FEATURES_OK: + d->wrote_features_ok = true; + break; + case VIRTIO_CONFIG_S_DRIVER_OK: + if (d->wrote_features_ok) + bad_driver(d, "did not re-read FEATURES_OK"); + break; + } + goto write_through8; } -} + case offsetof(struct virtio_pci_mmio, cfg.queue_select): + vq = vq_by_num(d, val); + /* + * 4.1.4.3.1: + * + * The device MUST present a 0 in queue_size if the virtqueue + * corresponding to the current queue_select is unavailable. + */ + if (!vq) { + d->mmio->cfg.queue_size = 0; + goto write_through16; + } + /* Save registers for old vq, if it was a valid vq */ + if (d->mmio->cfg.queue_size) + save_vq_config(&d->mmio->cfg, + vq_by_num(d, d->mmio->cfg.queue_select)); + /* Restore the registers for the queue they asked for */ + restore_vq_config(&d->mmio->cfg, vq); + goto write_through16; + case offsetof(struct virtio_pci_mmio, cfg.queue_size): + /* + * 4.1.4.3.2: + * + * The driver MUST NOT write a value which is not a power of 2 + * to queue_size. + */ + if (val & (val-1)) + bad_driver(d, "invalid queue size %u", val); + if (d->mmio->cfg.queue_enable) + bad_driver(d, "changing queue size on live device"); + goto write_through16; + case offsetof(struct virtio_pci_mmio, cfg.queue_msix_vector): + bad_driver(d, "attempt to set MSIX vector to %u", val); + case offsetof(struct virtio_pci_mmio, cfg.queue_enable): { + struct virtqueue *vq = vq_by_num(d, d->mmio->cfg.queue_select); -/*L:215 - * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In - * particular, it's used to notify us of device status changes during boot. - */ -static void handle_output(unsigned long addr) -{ - struct device *i; + /* + * 4.1.4.3.2: + * + * The driver MUST NOT write a 0 to queue_enable. + */ + if (val != 1) + bad_driver(d, "setting queue_enable to %u", val); - /* Check each device. */ - for (i = devices.dev; i; i = i->next) { - struct virtqueue *vq; + /* + * 3.1.1: + * + * 7. Perform device-specific setup, including discovery of + * virtqueues for the device, optional per-bus setup, + * reading and possibly writing the device’s virtio + * configuration space, and population of virtqueues. + * 8. Set the DRIVER_OK status bit. + * + * All our devices require all virtqueues to be enabled, so + * they should have done that before setting DRIVER_OK. + */ + if (d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER_OK) + bad_driver(d, "enabling vq after DRIVER_OK"); + d->mmio->cfg.queue_enable = val; + save_vq_config(&d->mmio->cfg, vq); + check_virtqueue(d, vq); + goto write_through16; + } + case offsetof(struct virtio_pci_mmio, cfg.queue_notify_off): + bad_driver(d, "attempt to write to queue_notify_off"); + case offsetof(struct virtio_pci_mmio, cfg.queue_desc_lo): + case offsetof(struct virtio_pci_mmio, cfg.queue_desc_hi): + case offsetof(struct virtio_pci_mmio, cfg.queue_avail_lo): + case offsetof(struct virtio_pci_mmio, cfg.queue_avail_hi): + case offsetof(struct virtio_pci_mmio, cfg.queue_used_lo): + case offsetof(struct virtio_pci_mmio, cfg.queue_used_hi): /* - * Notifications to device descriptors mean they updated the - * device status. + * 4.1.4.3.2: + * + * The driver MUST configure the other virtqueue fields before + * enabling the virtqueue with queue_enable. */ - if (from_guest_phys(addr) == i->desc) { - update_device_status(i); - return; - } + if (d->mmio->cfg.queue_enable) + bad_driver(d, "changing queue on live device"); + + /* + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + *... + * 5. Set the FEATURES_OK status bit. The driver MUST not + * accept new feature bits after this step. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_FEATURES_OK)) + bad_driver(d, "setting up vq before FEATURES_OK"); - /* Devices should not be used before features are finalized. */ - for (vq = i->vq; vq; vq = vq->next) { - if (addr != vq->config.pfn*getpagesize()) - continue; - errx(1, "Notification on %s before setup!", i->name); + /* + * 6. Re-read device status to ensure the FEATURES_OK bit is + * still set... + */ + if (d->wrote_features_ok) + bad_driver(d, "didn't re-read FEATURES_OK before setup"); + + goto write_through32; + case offsetof(struct virtio_pci_mmio, notify): + vq = vq_by_num(d, val); + if (!vq) + bad_driver(d, "Invalid vq notification on %u", val); + /* Notify the process handling this vq by adding 1 to eventfd */ + write(vq->eventfd, "\1\0\0\0\0\0\0\0", 8); + goto write_through16; + case offsetof(struct virtio_pci_mmio, isr): + bad_driver(d, "Unexpected write to isr"); + /* Weird corner case: write to emerg_wr of console */ + case sizeof(struct virtio_pci_mmio) + + offsetof(struct virtio_console_config, emerg_wr): + if (strcmp(d->name, "console") == 0) { + char c = val; + write(STDOUT_FILENO, &c, 1); + goto write_through32; } + /* Fall through... */ + default: + /* + * 4.1.4.3.2: + * + * The driver MUST NOT write to device_feature, num_queues, + * config_generation or queue_notify_off. + */ + bad_driver(d, "Unexpected write to offset %u", off); } +feature_write_through32: /* - * Early console write is done using notify on a nul-terminated string - * in Guest memory. It's also great for hacking debugging messages - * into a Guest. + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + *... + * - Set the DRIVER status bit: the guest OS knows how + * to drive the device. + * - Read device feature bits, and write the subset + * of feature bits understood by the OS and driver + * to the device. + *... + * - Set the FEATURES_OK status bit. The driver MUST not + * accept new feature bits after this step. */ - if (addr >= guest_limit) - errx(1, "Bad NOTIFY %#lx", addr); + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER)) + bad_driver(d, "feature write before VIRTIO_CONFIG_S_DRIVER"); + if (d->mmio->cfg.device_status & VIRTIO_CONFIG_S_FEATURES_OK) + bad_driver(d, "feature write after VIRTIO_CONFIG_S_FEATURES_OK"); - write(STDOUT_FILENO, from_guest_phys(addr), - strnlen(from_guest_phys(addr), guest_limit - addr)); + /* + * 4.1.3.1: + * + * The driver MUST access each field using the “natural” access + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses for + * 16-bit fields and 8-bit accesses for 8-bit fields. + */ +write_through32: + if (mask != 0xFFFFFFFF) { + bad_driver(d, "non-32-bit write to offset %u (%#x)", + off, getreg(eip)); + return; + } + memcpy((char *)d->mmio + off, &val, 4); + return; + +write_through16: + if (mask != 0xFFFF) + bad_driver(d, "non-16-bit write to offset %u (%#x)", + off, getreg(eip)); + memcpy((char *)d->mmio + off, &val, 2); + return; + +write_through8: + if (mask != 0xFF) + bad_driver(d, "non-8-bit write to offset %u (%#x)", + off, getreg(eip)); + memcpy((char *)d->mmio + off, &val, 1); + return; } -/*L:190 - * Device Setup - * - * All devices need a descriptor so the Guest knows it exists, and a "struct - * device" so the Launcher can keep track of it. We have common helper - * routines to allocate and manage them. - */ - -/* - * The layout of the device page is a "struct lguest_device_desc" followed by a - * number of virtqueue descriptors, then two sets of feature bits, then an - * array of configuration bytes. This routine returns the configuration - * pointer. - */ -static u8 *device_config(const struct device *dev) +static u32 emulate_mmio_read(struct device *d, u32 off, u32 mask) { - return (void *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig) - + dev->feature_len * 2; + u8 isr; + u32 val = 0; + + switch (off) { + case offsetof(struct virtio_pci_mmio, cfg.device_feature_select): + case offsetof(struct virtio_pci_mmio, cfg.device_feature): + case offsetof(struct virtio_pci_mmio, cfg.guest_feature_select): + case offsetof(struct virtio_pci_mmio, cfg.guest_feature): + /* + * 3.1.1: + * + * The driver MUST follow this sequence to initialize a device: + *... + * - Set the DRIVER status bit: the guest OS knows how + * to drive the device. + * - Read device feature bits, and write the subset + * of feature bits understood by the OS and driver + * to the device. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER)) + bad_driver(d, + "feature read before VIRTIO_CONFIG_S_DRIVER"); + goto read_through32; + case offsetof(struct virtio_pci_mmio, cfg.msix_config): + bad_driver(d, "read of msix_config"); + case offsetof(struct virtio_pci_mmio, cfg.num_queues): + goto read_through16; + case offsetof(struct virtio_pci_mmio, cfg.device_status): + /* As they did read, any write of FEATURES_OK is now fine. */ + d->wrote_features_ok = false; + goto read_through8; + case offsetof(struct virtio_pci_mmio, cfg.config_generation): + /* + * 4.1.4.3.1: + * + * The device MUST present a changed config_generation after + * the driver has read a device-specific configuration value + * which has changed since any part of the device-specific + * configuration was last read. + * + * This is simple: none of our devices change config, so this + * is always 0. + */ + goto read_through8; + case offsetof(struct virtio_pci_mmio, notify): + /* + * 3.1.1: + * + * The driver MUST NOT notify the device before setting + * DRIVER_OK. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER_OK)) + bad_driver(d, "notify before VIRTIO_CONFIG_S_DRIVER_OK"); + goto read_through16; + case offsetof(struct virtio_pci_mmio, isr): + if (mask != 0xFF) + bad_driver(d, "non-8-bit read from offset %u (%#x)", + off, getreg(eip)); + isr = d->mmio->isr; + /* + * 4.1.4.5.1: + * + * The device MUST reset ISR status to 0 on driver read. + */ + d->mmio->isr = 0; + return isr; + case offsetof(struct virtio_pci_mmio, padding): + bad_driver(d, "read from padding (%#x)", getreg(eip)); + default: + /* Read from device config space, beware unaligned overflow */ + if (off > d->mmio_size - 4) + bad_driver(d, "read past end (%#x)", getreg(eip)); + + /* + * 3.1.1: + * The driver MUST follow this sequence to initialize a device: + *... + * 3. Set the DRIVER status bit: the guest OS knows how to + * drive the device. + * 4. Read device feature bits, and write the subset of + * feature bits understood by the OS and driver to the + * device. During this step the driver MAY read (but MUST NOT + * write) the device-specific configuration fields to check + * that it can support the device before accepting it. + */ + if (!(d->mmio->cfg.device_status & VIRTIO_CONFIG_S_DRIVER)) + bad_driver(d, + "config read before VIRTIO_CONFIG_S_DRIVER"); + + if (mask == 0xFFFFFFFF) + goto read_through32; + else if (mask == 0xFFFF) + goto read_through16; + else + goto read_through8; + } + + /* + * 4.1.3.1: + * + * The driver MUST access each field using the “natural” access + * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses for + * 16-bit fields and 8-bit accesses for 8-bit fields. + */ +read_through32: + if (mask != 0xFFFFFFFF) + bad_driver(d, "non-32-bit read to offset %u (%#x)", + off, getreg(eip)); + memcpy(&val, (char *)d->mmio + off, 4); + return val; + +read_through16: + if (mask != 0xFFFF) + bad_driver(d, "non-16-bit read to offset %u (%#x)", + off, getreg(eip)); + memcpy(&val, (char *)d->mmio + off, 2); + return val; + +read_through8: + if (mask != 0xFF) + bad_driver(d, "non-8-bit read to offset %u (%#x)", + off, getreg(eip)); + memcpy(&val, (char *)d->mmio + off, 1); + return val; } -/* - * This routine allocates a new "struct lguest_device_desc" from descriptor - * table page just above the Guest's normal memory. It returns a pointer to - * that descriptor. - */ -static struct lguest_device_desc *new_dev_desc(u16 type) +static void emulate_mmio(unsigned long paddr, const u8 *insn) { - struct lguest_device_desc d = { .type = type }; - void *p; + u32 val, off, mask = 0xFFFFFFFF, insnlen = 0; + struct device *d = find_mmio_region(paddr, &off); + unsigned long args[] = { LHREQ_TRAP, 14 }; - /* Figure out where the next device config is, based on the last one. */ - if (devices.lastdev) - p = device_config(devices.lastdev) - + devices.lastdev->desc->config_len; - else - p = devices.descpage; + if (!d) { + warnx("MMIO touching %#08lx (not a device)", paddr); + goto reinject; + } + + /* Prefix makes it a 16 bit op */ + if (insn[0] == 0x66) { + mask = 0xFFFF; + insnlen++; + } - /* We only have one page for all the descriptors. */ - if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) - errx(1, "Too many devices"); + /* iowrite */ + if (insn[insnlen] == 0x89) { + /* Next byte is r/m byte: bits 3-5 are register. */ + val = getreg_num((insn[insnlen+1] >> 3) & 0x7, mask); + emulate_mmio_write(d, off, val, mask); + insnlen += 2 + insn_displacement_len(insn[insnlen+1]); + } else if (insn[insnlen] == 0x8b) { /* ioread */ + /* Next byte is r/m byte: bits 3-5 are register. */ + val = emulate_mmio_read(d, off, mask); + setreg_num((insn[insnlen+1] >> 3) & 0x7, val, mask); + insnlen += 2 + insn_displacement_len(insn[insnlen+1]); + } else if (insn[0] == 0x88) { /* 8-bit iowrite */ + mask = 0xff; + /* Next byte is r/m byte: bits 3-5 are register. */ + val = getreg_num((insn[1] >> 3) & 0x7, mask); + emulate_mmio_write(d, off, val, mask); + insnlen = 2 + insn_displacement_len(insn[1]); + } else if (insn[0] == 0x8a) { /* 8-bit ioread */ + mask = 0xff; + val = emulate_mmio_read(d, off, mask); + setreg_num((insn[1] >> 3) & 0x7, val, mask); + insnlen = 2 + insn_displacement_len(insn[1]); + } else { + warnx("Unknown MMIO instruction touching %#08lx:" + " %02x %02x %02x %02x at %u", + paddr, insn[0], insn[1], insn[2], insn[3], getreg(eip)); + reinject: + /* Inject trap into Guest. */ + if (write(lguest_fd, args, sizeof(args)) < 0) + err(1, "Reinjecting trap 14 for fault at %#x", + getreg(eip)); + return; + } - /* p might not be aligned, so we memcpy in. */ - return memcpy(p, &d, sizeof(d)); + /* Finally, we've "done" the instruction, so move past it. */ + setreg(eip, getreg(eip) + insnlen); } -/* - * Each device descriptor is followed by the description of its virtqueues. We - * specify how many descriptors the virtqueue is to have. +/*L:190 + * Device Setup + * + * All devices need a descriptor so the Guest knows it exists, and a "struct + * device" so the Launcher can keep track of it. We have common helper + * routines to allocate and manage them. */ -static void add_virtqueue(struct device *dev, unsigned int num_descs, - void (*service)(struct virtqueue *)) +static void add_pci_virtqueue(struct device *dev, + void (*service)(struct virtqueue *), + const char *name) { - unsigned int pages; struct virtqueue **i, *vq = malloc(sizeof(*vq)); - void *p; - - /* First we need some memory for this virtqueue. */ - pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) - / getpagesize(); - p = get_pages(pages); /* Initialize the virtqueue */ vq->next = NULL; vq->last_avail_idx = 0; vq->dev = dev; + vq->name = name; /* * This is the routine the service thread will run, and its Process ID @@ -1218,25 +2393,11 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, vq->thread = (pid_t)-1; /* Initialize the configuration. */ - vq->config.num = num_descs; - vq->config.irq = devices.next_irq++; - vq->config.pfn = to_guest_phys(p) / getpagesize(); - - /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); - - /* - * Append virtqueue to this device's descriptor. We use - * device_config() to get the end of the device's current virtqueues; - * we check that we haven't added any config or feature information - * yet, otherwise we'd be overwriting them. - */ - assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); - memcpy(device_config(dev), &vq->config, sizeof(vq->config)); - dev->num_vq++; - dev->desc->num_vq++; + reset_vq_pci_config(vq); + vq->pci_config.queue_notify_off = 0; - verbose("Virtqueue page %#lx\n", to_guest_phys(p)); + /* Add one to the number of queues */ + vq->dev->mmio->cfg.num_queues++; /* * Add to tail of list, so dev->vq is first vq, dev->vq->next is @@ -1246,73 +2407,239 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs, *i = vq; } -/* - * The first half of the feature bitmask is for us to advertise features. The - * second half is for the Guest to accept features. - */ -static void add_feature(struct device *dev, unsigned bit) +/* The Guest accesses the feature bits via the PCI common config MMIO region */ +static void add_pci_feature(struct device *dev, unsigned bit) { - u8 *features = get_feature_bits(dev); + dev->features |= (1ULL << bit); +} - /* We can't extend the feature bits once we've added config bytes */ - if (dev->desc->feature_len <= bit / CHAR_BIT) { - assert(dev->desc->config_len == 0); - dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; - } +/* For devices with no config. */ +static void no_device_config(struct device *dev) +{ + dev->mmio_addr = get_mmio_region(dev->mmio_size); - features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); + dev->config.bar[0] = dev->mmio_addr; + /* Bottom 4 bits must be zero */ + assert(~(dev->config.bar[0] & 0xF)); +} + +/* This puts the device config into BAR0 */ +static void set_device_config(struct device *dev, const void *conf, size_t len) +{ + /* Set up BAR 0 */ + dev->mmio_size += len; + dev->mmio = realloc(dev->mmio, dev->mmio_size); + memcpy(dev->mmio + 1, conf, len); + + /* + * 4.1.4.6: + * + * The device MUST present at least one VIRTIO_PCI_CAP_DEVICE_CFG + * capability for any device type which has a device-specific + * configuration. + */ + /* Hook up device cfg */ + dev->config.cfg_access.cap.cap_next + = offsetof(struct pci_config, device); + + /* + * 4.1.4.6.1: + * + * The offset for the device-specific configuration MUST be 4-byte + * aligned. + */ + assert(dev->config.cfg_access.cap.cap_next % 4 == 0); + + /* Fix up device cfg field length. */ + dev->config.device.length = len; + + /* The rest is the same as the no-config case */ + no_device_config(dev); +} + +static void init_cap(struct virtio_pci_cap *cap, size_t caplen, int type, + size_t bar_offset, size_t bar_bytes, u8 next) +{ + cap->cap_vndr = PCI_CAP_ID_VNDR; + cap->cap_next = next; + cap->cap_len = caplen; + cap->cfg_type = type; + cap->bar = 0; + memset(cap->padding, 0, sizeof(cap->padding)); + cap->offset = bar_offset; + cap->length = bar_bytes; } /* - * This routine sets the configuration fields for an existing device's - * descriptor. It only works for the last device, but that's OK because that's - * how we use it. + * This sets up the pci_config structure, as defined in the virtio 1.0 + * standard (and PCI standard). */ -static void set_config(struct device *dev, unsigned len, const void *conf) +static void init_pci_config(struct pci_config *pci, u16 type, + u8 class, u8 subclass) { - /* Check we haven't overflowed our single page. */ - if (device_config(dev) + len > devices.descpage + getpagesize()) - errx(1, "Too many devices"); + size_t bar_offset, bar_len; + + /* + * 4.1.4.4.1: + * + * The device MUST either present notify_off_multiplier as an even + * power of 2, or present notify_off_multiplier as 0. + * + * 2.1.2: + * + * The device MUST initialize device status to 0 upon reset. + */ + memset(pci, 0, sizeof(*pci)); + + /* 4.1.2.1: Devices MUST have the PCI Vendor ID 0x1AF4 */ + pci->vendor_id = 0x1AF4; + /* 4.1.2.1: ... PCI Device ID calculated by adding 0x1040 ... */ + pci->device_id = 0x1040 + type; + + /* + * PCI have specific codes for different types of devices. + * Linux doesn't care, but it's a good clue for people looking + * at the device. + */ + pci->class = class; + pci->subclass = subclass; + + /* + * 4.1.2.1: + * + * Non-transitional devices SHOULD have a PCI Revision ID of 1 or + * higher + */ + pci->revid = 1; + + /* + * 4.1.2.1: + * + * Non-transitional devices SHOULD have a PCI Subsystem Device ID of + * 0x40 or higher. + */ + pci->subsystem_device_id = 0x40; + + /* We use our dummy interrupt controller, and irq_line is the irq */ + pci->irq_line = devices.next_irq++; + pci->irq_pin = 0; + + /* Support for extended capabilities. */ + pci->status = (1 << 4); + + /* Link them in. */ + /* + * 4.1.4.3.1: + * + * The device MUST present at least one common configuration + * capability. + */ + pci->capabilities = offsetof(struct pci_config, common); + + /* 4.1.4.3.1 ... offset MUST be 4-byte aligned. */ + assert(pci->capabilities % 4 == 0); + + bar_offset = offsetof(struct virtio_pci_mmio, cfg); + bar_len = sizeof(((struct virtio_pci_mmio *)0)->cfg); + init_cap(&pci->common, sizeof(pci->common), VIRTIO_PCI_CAP_COMMON_CFG, + bar_offset, bar_len, + offsetof(struct pci_config, notify)); + + /* + * 4.1.4.4.1: + * + * The device MUST present at least one notification capability. + */ + bar_offset += bar_len; + bar_len = sizeof(((struct virtio_pci_mmio *)0)->notify); + + /* + * 4.1.4.4.1: + * + * The cap.offset MUST be 2-byte aligned. + */ + assert(pci->common.cap_next % 2 == 0); + + /* FIXME: Use a non-zero notify_off, for per-queue notification? */ + /* + * 4.1.4.4.1: + * + * The value cap.length presented by the device MUST be at least 2 and + * MUST be large enough to support queue notification offsets for all + * supported queues in all possible configurations. + */ + assert(bar_len >= 2); + + init_cap(&pci->notify.cap, sizeof(pci->notify), + VIRTIO_PCI_CAP_NOTIFY_CFG, + bar_offset, bar_len, + offsetof(struct pci_config, isr)); + + bar_offset += bar_len; + bar_len = sizeof(((struct virtio_pci_mmio *)0)->isr); + /* + * 4.1.4.5.1: + * + * The device MUST present at least one VIRTIO_PCI_CAP_ISR_CFG + * capability. + */ + init_cap(&pci->isr, sizeof(pci->isr), + VIRTIO_PCI_CAP_ISR_CFG, + bar_offset, bar_len, + offsetof(struct pci_config, cfg_access)); + + /* + * 4.1.4.7.1: + * + * The device MUST present at least one VIRTIO_PCI_CAP_PCI_CFG + * capability. + */ + /* This doesn't have any presence in the BAR */ + init_cap(&pci->cfg_access.cap, sizeof(pci->cfg_access), + VIRTIO_PCI_CAP_PCI_CFG, + 0, 0, 0); - /* Copy in the config information, and store the length. */ - memcpy(device_config(dev), conf, len); - dev->desc->config_len = len; + bar_offset += bar_len + sizeof(((struct virtio_pci_mmio *)0)->padding); + assert(bar_offset == sizeof(struct virtio_pci_mmio)); - /* Size must fit in config_len field (8 bits)! */ - assert(dev->desc->config_len == len); + /* + * This gets sewn in and length set in set_device_config(). + * Some devices don't have a device configuration interface, so + * we never expose this if we don't call set_device_config(). + */ + init_cap(&pci->device, sizeof(pci->device), VIRTIO_PCI_CAP_DEVICE_CFG, + bar_offset, 0, 0); } /* - * This routine does all the creation and setup of a new device, including - * calling new_dev_desc() to allocate the descriptor and device memory. We - * don't actually start the service threads until later. + * This routine does all the creation and setup of a new device, but we don't + * actually place the MMIO region until we know the size (if any) of the + * device-specific config. And we don't actually start the service threads + * until later. * * See what I mean about userspace being boring? */ -static struct device *new_device(const char *name, u16 type) +static struct device *new_pci_device(const char *name, u16 type, + u8 class, u8 subclass) { struct device *dev = malloc(sizeof(*dev)); /* Now we populate the fields one at a time. */ - dev->desc = new_dev_desc(type); dev->name = name; dev->vq = NULL; - dev->feature_len = 0; - dev->num_vq = 0; dev->running = false; - dev->next = NULL; + dev->wrote_features_ok = false; + dev->mmio_size = sizeof(struct virtio_pci_mmio); + dev->mmio = calloc(1, dev->mmio_size); + dev->features = (u64)1 << VIRTIO_F_VERSION_1; + dev->features_accepted = 0; - /* - * Append to device list. Prepending to a single-linked list is - * easier, but the user expects the devices to be arranged on the bus - * in command-line order. The first network device on the command line - * is eth0, the first block device /dev/vda, etc. - */ - if (devices.lastdev) - devices.lastdev->next = dev; - else - devices.dev = dev; - devices.lastdev = dev; + if (devices.device_num + 1 >= MAX_PCI_DEVICES) + errx(1, "Can only handle 31 PCI devices"); + + init_pci_config(&dev->config, type, class, subclass); + assert(!devices.pci[devices.device_num+1]); + devices.pci[++devices.device_num] = dev; return dev; } @@ -1324,6 +2651,7 @@ static struct device *new_device(const char *name, u16 type) static void setup_console(void) { struct device *dev; + struct virtio_console_config conf; /* If we can save the initial standard input settings... */ if (tcgetattr(STDIN_FILENO, &orig_term) == 0) { @@ -1336,7 +2664,7 @@ static void setup_console(void) tcsetattr(STDIN_FILENO, TCSANOW, &term); } - dev = new_device("console", VIRTIO_ID_CONSOLE); + dev = new_pci_device("console", VIRTIO_ID_CONSOLE, 0x07, 0x00); /* We store the console state in dev->priv, and initialize it. */ dev->priv = malloc(sizeof(struct console_abort)); @@ -1348,10 +2676,14 @@ static void setup_console(void) * stdin. When they put something in the output queue, we write it to * stdout. */ - add_virtqueue(dev, VIRTQUEUE_NUM, console_input); - add_virtqueue(dev, VIRTQUEUE_NUM, console_output); + add_pci_virtqueue(dev, console_input, "input"); + add_pci_virtqueue(dev, console_output, "output"); + + /* We need a configuration area for the emerg_wr early writes. */ + add_pci_feature(dev, VIRTIO_CONSOLE_F_EMERG_WRITE); + set_device_config(dev, &conf, sizeof(conf)); - verbose("device %u: console\n", ++devices.device_num); + verbose("device %u: console\n", devices.device_num); } /*:*/ @@ -1449,6 +2781,7 @@ static void configure_device(int fd, const char *tapif, u32 ipaddr) static int get_tun_device(char tapif[IFNAMSIZ]) { struct ifreq ifr; + int vnet_hdr_sz; int netfd; /* Start with this zeroed. Messy but sure. */ @@ -1476,6 +2809,18 @@ static int get_tun_device(char tapif[IFNAMSIZ]) */ ioctl(netfd, TUNSETNOCSUM, 1); + /* + * In virtio before 1.0 (aka legacy virtio), we added a 16-bit + * field at the end of the network header iff + * VIRTIO_NET_F_MRG_RXBUF was negotiated. For virtio 1.0, + * that became the norm, but we need to tell the tun device + * about our expanded header (which is called + * virtio_net_hdr_mrg_rxbuf in the legacy system). + */ + vnet_hdr_sz = sizeof(struct virtio_net_hdr_v1); + if (ioctl(netfd, TUNSETVNETHDRSZ, &vnet_hdr_sz) != 0) + err(1, "Setting tun header size to %u", vnet_hdr_sz); + memcpy(tapif, ifr.ifr_name, IFNAMSIZ); return netfd; } @@ -1499,12 +2844,12 @@ static void setup_tun_net(char *arg) net_info->tunfd = get_tun_device(tapif); /* First we create a new network device. */ - dev = new_device("net", VIRTIO_ID_NET); + dev = new_pci_device("net", VIRTIO_ID_NET, 0x02, 0x00); dev->priv = net_info; /* Network devices need a recv and a send queue, just like console. */ - add_virtqueue(dev, VIRTQUEUE_NUM, net_input); - add_virtqueue(dev, VIRTQUEUE_NUM, net_output); + add_pci_virtqueue(dev, net_input, "rx"); + add_pci_virtqueue(dev, net_output, "tx"); /* * We need a socket to perform the magic network ioctls to bring up the @@ -1524,7 +2869,7 @@ static void setup_tun_net(char *arg) p = strchr(arg, ':'); if (p) { str2mac(p+1, conf.mac); - add_feature(dev, VIRTIO_NET_F_MAC); + add_pci_feature(dev, VIRTIO_NET_F_MAC); *p = '\0'; } @@ -1538,25 +2883,21 @@ static void setup_tun_net(char *arg) configure_device(ipfd, tapif, ip); /* Expect Guest to handle everything except UFO */ - add_feature(dev, VIRTIO_NET_F_CSUM); - add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); - add_feature(dev, VIRTIO_NET_F_GUEST_TSO4); - add_feature(dev, VIRTIO_NET_F_GUEST_TSO6); - add_feature(dev, VIRTIO_NET_F_GUEST_ECN); - add_feature(dev, VIRTIO_NET_F_HOST_TSO4); - add_feature(dev, VIRTIO_NET_F_HOST_TSO6); - add_feature(dev, VIRTIO_NET_F_HOST_ECN); + add_pci_feature(dev, VIRTIO_NET_F_CSUM); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_CSUM); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_TSO4); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_TSO6); + add_pci_feature(dev, VIRTIO_NET_F_GUEST_ECN); + add_pci_feature(dev, VIRTIO_NET_F_HOST_TSO4); + add_pci_feature(dev, VIRTIO_NET_F_HOST_TSO6); + add_pci_feature(dev, VIRTIO_NET_F_HOST_ECN); /* We handle indirect ring entries */ - add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); - /* We're compliant with the damn spec. */ - add_feature(dev, VIRTIO_F_ANY_LAYOUT); - set_config(dev, sizeof(conf), &conf); + add_pci_feature(dev, VIRTIO_RING_F_INDIRECT_DESC); + set_device_config(dev, &conf, sizeof(conf)); /* We don't need the socket any more; setup is done. */ close(ipfd); - devices.device_num++; - if (bridging) verbose("device %u: tun %s attached to bridge: %s\n", devices.device_num, tapif, arg); @@ -1607,7 +2948,7 @@ static void blk_request(struct virtqueue *vq) head = wait_for_vq_desc(vq, iov, &out_num, &in_num); /* Copy the output header from the front of the iov (adjusts iov) */ - iov_consume(iov, out_num, &out, sizeof(out)); + iov_consume(vq->dev, iov, out_num, &out, sizeof(out)); /* Find and trim end of iov input array, for our status byte. */ in = NULL; @@ -1619,7 +2960,7 @@ static void blk_request(struct virtqueue *vq) } } if (!in) - errx(1, "Bad virtblk cmd with no room for status"); + bad_driver_vq(vq, "Bad virtblk cmd with no room for status"); /* * For historical reasons, block operations are expressed in 512 byte @@ -1627,15 +2968,7 @@ static void blk_request(struct virtqueue *vq) */ off = out.sector * 512; - /* - * In general the virtio block driver is allowed to try SCSI commands. - * It'd be nice if we supported eject, for example, but we don't. - */ - if (out.type & VIRTIO_BLK_T_SCSI_CMD) { - fprintf(stderr, "Scsi commands unsupported\n"); - *in = VIRTIO_BLK_S_UNSUPP; - wlen = sizeof(*in); - } else if (out.type & VIRTIO_BLK_T_OUT) { + if (out.type & VIRTIO_BLK_T_OUT) { /* * Write * @@ -1657,7 +2990,7 @@ static void blk_request(struct virtqueue *vq) /* Trim it back to the correct length */ ftruncate64(vblk->fd, vblk->len); /* Die, bad Guest, die. */ - errx(1, "Write past end %llu+%u", off, ret); + bad_driver_vq(vq, "Write past end %llu+%u", off, ret); } wlen = sizeof(*in); @@ -1699,11 +3032,11 @@ static void setup_block_file(const char *filename) struct vblk_info *vblk; struct virtio_blk_config conf; - /* Creat the device. */ - dev = new_device("block", VIRTIO_ID_BLOCK); + /* Create the device. */ + dev = new_pci_device("block", VIRTIO_ID_BLOCK, 0x01, 0x80); /* The device has one virtqueue, where the Guest places requests. */ - add_virtqueue(dev, VIRTQUEUE_NUM, blk_request); + add_pci_virtqueue(dev, blk_request, "request"); /* Allocate the room for our own bookkeeping */ vblk = dev->priv = malloc(sizeof(*vblk)); @@ -1712,9 +3045,6 @@ static void setup_block_file(const char *filename) vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); vblk->len = lseek64(vblk->fd, 0, SEEK_END); - /* We support FLUSH. */ - add_feature(dev, VIRTIO_BLK_F_FLUSH); - /* Tell Guest how many sectors this device has. */ conf.capacity = cpu_to_le64(vblk->len / 512); @@ -1722,20 +3052,19 @@ static void setup_block_file(const char *filename) * Tell Guest not to put in too many descriptors at once: two are used * for the in and out elements. */ - add_feature(dev, VIRTIO_BLK_F_SEG_MAX); + add_pci_feature(dev, VIRTIO_BLK_F_SEG_MAX); conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); - /* Don't try to put whole struct: we have 8 bit limit. */ - set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf); + set_device_config(dev, &conf, sizeof(struct virtio_blk_config)); verbose("device %u: virtblock %llu sectors\n", - ++devices.device_num, le64_to_cpu(conf.capacity)); + devices.device_num, le64_to_cpu(conf.capacity)); } /*L:211 - * Our random number generator device reads from /dev/random into the Guest's + * Our random number generator device reads from /dev/urandom into the Guest's * input buffers. The usual case is that the Guest doesn't want random numbers - * and so has no buffers although /dev/random is still readable, whereas + * and so has no buffers although /dev/urandom is still readable, whereas * console is the reverse. * * The same logic applies, however. @@ -1754,7 +3083,7 @@ static void rng_input(struct virtqueue *vq) /* First we need a buffer from the Guests's virtqueue. */ head = wait_for_vq_desc(vq, iov, &out_num, &in_num); if (out_num) - errx(1, "Output buffers in rng?"); + bad_driver_vq(vq, "Output buffers in rng?"); /* * Just like the console write, we loop to cover the whole iovec. @@ -1763,8 +3092,8 @@ static void rng_input(struct virtqueue *vq) while (!iov_empty(iov, in_num)) { len = readv(rng_info->rfd, iov, in_num); if (len <= 0) - err(1, "Read from /dev/random gave %i", len); - iov_consume(iov, in_num, NULL, len); + err(1, "Read from /dev/urandom gave %i", len); + iov_consume(vq->dev, iov, in_num, NULL, len); totlen += len; } @@ -1780,17 +3109,20 @@ static void setup_rng(void) struct device *dev; struct rng_info *rng_info = malloc(sizeof(*rng_info)); - /* Our device's privat info simply contains the /dev/random fd. */ - rng_info->rfd = open_or_die("/dev/random", O_RDONLY); + /* Our device's private info simply contains the /dev/urandom fd. */ + rng_info->rfd = open_or_die("/dev/urandom", O_RDONLY); /* Create the new device. */ - dev = new_device("rng", VIRTIO_ID_RNG); + dev = new_pci_device("rng", VIRTIO_ID_RNG, 0xff, 0); dev->priv = rng_info; /* The device has one virtqueue, where the Guest places inbufs. */ - add_virtqueue(dev, VIRTQUEUE_NUM, rng_input); + add_pci_virtqueue(dev, rng_input, "input"); - verbose("device %u: rng\n", devices.device_num++); + /* We don't have any configuration space */ + no_device_config(dev); + + verbose("device %u: rng\n", devices.device_num); } /* That's the end of device setup. */ @@ -1820,17 +3152,23 @@ static void __attribute__((noreturn)) restart_guest(void) static void __attribute__((noreturn)) run_guest(void) { for (;;) { - unsigned long notify_addr; + struct lguest_pending notify; int readval; /* We read from the /dev/lguest device to run the Guest. */ - readval = pread(lguest_fd, ¬ify_addr, - sizeof(notify_addr), cpu_id); - - /* One unsigned long means the Guest did HCALL_NOTIFY */ - if (readval == sizeof(notify_addr)) { - verbose("Notify on address %#lx\n", notify_addr); - handle_output(notify_addr); + readval = pread(lguest_fd, ¬ify, sizeof(notify), cpu_id); + if (readval == sizeof(notify)) { + if (notify.trap == 13) { + verbose("Emulating instruction at %#x\n", + getreg(eip)); + emulate_insn(notify.insn); + } else if (notify.trap == 14) { + verbose("Emulating MMIO at %#x\n", + getreg(eip)); + emulate_mmio(notify.addr, notify.insn); + } else + errx(1, "Unknown trap %i addr %#08x\n", + notify.trap, notify.addr); /* ENOENT means the Guest died. Reading tells us why. */ } else if (errno == ENOENT) { char reason[1024] = { 0 }; @@ -1893,11 +3231,9 @@ int main(int argc, char *argv[]) main_args = argv; /* - * First we initialize the device list. We keep a pointer to the last - * device, and the next interrupt number to use for devices (1: - * remember that 0 is used by the timer). + * First we initialize the device list. We remember next interrupt + * number to use for devices (1: remember that 0 is used by the timer). */ - devices.lastdev = NULL; devices.next_irq = 1; /* We're CPU 0. In fact, that's the only CPU possible right now. */ @@ -1921,12 +3257,14 @@ int main(int argc, char *argv[]) guest_base = map_zeroed_pages(mem / getpagesize() + DEVICE_PAGES); guest_limit = mem; - guest_max = mem + DEVICE_PAGES*getpagesize(); - devices.descpage = get_pages(1); + guest_max = guest_mmio = mem + DEVICE_PAGES*getpagesize(); break; } } + /* We always have a console device, and it's always device 1. */ + setup_console(); + /* The options are fairly straight-forward */ while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) { switch (c) { @@ -1967,8 +3305,8 @@ int main(int argc, char *argv[]) verbose("Guest base is at %p\n", guest_base); - /* We always have a console device */ - setup_console(); + /* Initialize the (fake) PCI host bridge device. */ + init_pci_host_bridge(); /* Now we load the kernel */ start = load_kernel(open_or_die(argv[optind+1], O_RDONLY)); diff --git a/tools/lib/api/Build b/tools/lib/api/Build new file mode 100644 index 000000000000..3653965cf481 --- /dev/null +++ b/tools/lib/api/Build @@ -0,0 +1,2 @@ +libapi-y += fd/ +libapi-y += fs/ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 36c08b1f4afb..8bd960658463 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,49 +1,43 @@ include ../../scripts/Makefile.include include ../../perf/config/utilities.mak # QUIET_CLEAN +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar -# guard against environment variables -LIB_H= -LIB_OBJS= - -LIB_H += fs/debugfs.h -LIB_H += fs/fs.h -# See comment below about piggybacking... -LIB_H += fd/array.h - -LIB_OBJS += $(OUTPUT)fs/debugfs.o -LIB_OBJS += $(OUTPUT)fs/fs.o -# XXX piggybacking here, need to introduce libapikfd, or rename this -# to plain libapik.a and make it have it all api goodies -LIB_OBJS += $(OUTPUT)fd/array.o +MAKEFLAGS += --no-print-directory -LIBFILE = libapikfs.a +LIBFILE = $(OUTPUT)libapi.a -CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC -EXTLIBS = -lelf -lpthread -lrt -lm -ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ALL_LDFLAGS = $(LDFLAGS) +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -$(LIBFILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $(OUTPUT)$@ $(LIB_OBJS) +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +API_IN := $(OUTPUT)libapi-in.o -$(LIB_OBJS): $(LIB_H) +export srctree OUTPUT CC LD CFLAGS V -libapi_dirs: - $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs +all: $(LIBFILE) -$(OUTPUT)%.o: %.c libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< -$(OUTPUT)%.s: %.c libapi_dirs - $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< -$(OUTPUT)%.o: %.S libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< +$(API_IN): FORCE + @$(MAKE) $(build)=libapi + +$(LIBFILE): $(API_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) clean: - $(call QUIET_CLEAN, libapi) $(RM) $(LIB_OBJS) $(LIBFILE) + $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM) + +FORCE: -.PHONY: clean +.PHONY: clean FORCE diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build new file mode 100644 index 000000000000..605d99f6d71a --- /dev/null +++ b/tools/lib/api/fd/Build @@ -0,0 +1 @@ +libapi-y += array.o diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build new file mode 100644 index 000000000000..6de5a4f0b501 --- /dev/null +++ b/tools/lib/api/fs/Build @@ -0,0 +1,4 @@ +libapi-y += fs.o +libapi-y += debugfs.o +libapi-y += findfs.o +libapi-y += tracefs.o diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index a74fba6d7743..8305b3e9d48e 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -1,76 +1,52 @@ +#define _GNU_SOURCE #include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #include <stdbool.h> #include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> #include <sys/mount.h> #include <linux/kernel.h> #include "debugfs.h" -char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; +#ifndef DEBUGFS_DEFAULT_PATH +#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" +#endif + +char debugfs_mountpoint[PATH_MAX + 1] = DEBUGFS_DEFAULT_PATH; static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", + DEBUGFS_DEFAULT_PATH, "/debug", 0, }; static bool debugfs_found; +bool debugfs_configured(void) +{ + return debugfs_find_mountpoint() != NULL; +} + /* find the path to the mounted debugfs */ const char *debugfs_find_mountpoint(void) { - const char * const *ptr; - char type[100]; - FILE *fp; + const char *ret; if (debugfs_found) return (const char *)debugfs_mountpoint; - ptr = debugfs_known_mountpoints; - while (*ptr) { - if (debugfs_valid_mountpoint(*ptr) == 0) { - debugfs_found = true; - strcpy(debugfs_mountpoint, *ptr); - return debugfs_mountpoint; - } - ptr++; - } - - /* give up and parse /proc/mounts */ - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - debugfs_mountpoint, type) == 2) { - if (strcmp(type, "debugfs") == 0) - break; - } - fclose(fp); - - if (strcmp(type, "debugfs") != 0) - return NULL; - - debugfs_found = true; - - return debugfs_mountpoint; -} - -/* verify that a mountpoint is actually a debugfs instance */ - -int debugfs_valid_mountpoint(const char *debugfs) -{ - struct statfs st_fs; + ret = find_mountpoint("debugfs", (long) DEBUGFS_MAGIC, + debugfs_mountpoint, PATH_MAX + 1, + debugfs_known_mountpoints); + if (ret) + debugfs_found = true; - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if (st_fs.f_type != (long) DEBUGFS_MAGIC) - return -ENOENT; - - return 0; + return ret; } /* mount the debugfs somewhere if it's not mounted */ @@ -86,7 +62,7 @@ char *debugfs_mount(const char *mountpoint) mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); /* if no environment variable, use default */ if (mountpoint == NULL) - mountpoint = "/sys/kernel/debug"; + mountpoint = DEBUGFS_DEFAULT_PATH; } if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0) @@ -98,3 +74,45 @@ char *debugfs_mount(const char *mountpoint) out: return debugfs_mountpoint; } + +int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename) +{ + char sbuf[128]; + + switch (err) { + case ENOENT: + if (debugfs_found) { + snprintf(buf, size, + "Error:\tFile %s/%s not found.\n" + "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n", + debugfs_mountpoint, filename); + break; + } + snprintf(buf, size, "%s", + "Error:\tUnable to find debugfs\n" + "Hint:\tWas your kernel compiled with debugfs support?\n" + "Hint:\tIs the debugfs filesystem mounted?\n" + "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); + break; + case EACCES: + snprintf(buf, size, + "Error:\tNo permissions to read %s/%s\n" + "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", + debugfs_mountpoint, filename, debugfs_mountpoint); + break; + default: + snprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); + break; + } + + return 0; +} + +int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) +{ + char path[PATH_MAX]; + + snprintf(path, PATH_MAX, "tracing/events/%s/%s", sys, name ?: "*"); + + return debugfs__strerror_open(err, buf, size, path); +} diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index f19d3df9609d..455023698d2b 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -1,16 +1,7 @@ #ifndef __API_DEBUGFS_H__ #define __API_DEBUGFS_H__ -#define _STR(x) #x -#define STR(x) _STR(x) - -/* - * On most systems <limits.h> would have given us this, but not on some systems - * (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif +#include "findfs.h" #ifndef DEBUGFS_MAGIC #define DEBUGFS_MAGIC 0x64626720 @@ -20,10 +11,13 @@ #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" #endif +bool debugfs_configured(void); const char *debugfs_find_mountpoint(void); -int debugfs_valid_mountpoint(const char *debugfs); char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; +int debugfs__strerror_open(int err, char *buf, size_t size, const char *filename); +int debugfs__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name); + #endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/api/fs/findfs.c b/tools/lib/api/fs/findfs.c new file mode 100644 index 000000000000..49946cb6d7af --- /dev/null +++ b/tools/lib/api/fs/findfs.c @@ -0,0 +1,63 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <sys/vfs.h> + +#include "findfs.h" + +/* verify that a mountpoint is actually the type we want */ + +int valid_mountpoint(const char *mount, long magic) +{ + struct statfs st_fs; + + if (statfs(mount, &st_fs) < 0) + return -ENOENT; + else if ((long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +/* find the path to a mounted file system */ +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints) +{ + const char * const *ptr; + char format[128]; + char type[100]; + FILE *fp; + + if (known_mountpoints) { + ptr = known_mountpoints; + while (*ptr) { + if (valid_mountpoint(*ptr, magic) == 0) { + strncpy(mountpoint, *ptr, len - 1); + mountpoint[len-1] = 0; + return mountpoint; + } + ptr++; + } + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + snprintf(format, 128, "%%*s %%%ds %%99s %%*s %%*d %%*d\n", len); + + while (fscanf(fp, format, mountpoint, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + fclose(fp); + + if (strcmp(type, fstype) != 0) + return NULL; + + return mountpoint; +} diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h new file mode 100644 index 000000000000..b6f5d05acc42 --- /dev/null +++ b/tools/lib/api/fs/findfs.h @@ -0,0 +1,23 @@ +#ifndef __API_FINDFS_H__ +#define __API_FINDFS_H__ + +#include <stdbool.h> + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * On most systems <limits.h> would have given us this, but not on some systems + * (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints); + +int valid_mountpoint(const char *mount, long magic); + +#endif /* __API_FINDFS_H__ */ diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index 65d9be3f9887..128ef6332a6b 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -79,7 +79,7 @@ static int fs__valid_mount(const char *fs, long magic) if (statfs(fs, &st_fs) < 0) return -ENOENT; - else if (st_fs.f_type != magic) + else if ((long)st_fs.f_type != magic) return -ENOENT; return 0; diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c new file mode 100644 index 000000000000..e4aa9688b71e --- /dev/null +++ b/tools/lib/api/fs/tracefs.c @@ -0,0 +1,78 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <linux/kernel.h> + +#include "tracefs.h" + +#ifndef TRACEFS_DEFAULT_PATH +#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" +#endif + +char tracefs_mountpoint[PATH_MAX + 1] = TRACEFS_DEFAULT_PATH; + +static const char * const tracefs_known_mountpoints[] = { + TRACEFS_DEFAULT_PATH, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, +}; + +static bool tracefs_found; + +bool tracefs_configured(void) +{ + return tracefs_find_mountpoint() != NULL; +} + +/* find the path to the mounted tracefs */ +const char *tracefs_find_mountpoint(void) +{ + const char *ret; + + if (tracefs_found) + return (const char *)tracefs_mountpoint; + + ret = find_mountpoint("tracefs", (long) TRACEFS_MAGIC, + tracefs_mountpoint, PATH_MAX + 1, + tracefs_known_mountpoints); + + if (ret) + tracefs_found = true; + + return ret; +} + +/* mount the tracefs somewhere if it's not mounted */ +char *tracefs_mount(const char *mountpoint) +{ + /* see if it's already mounted */ + if (tracefs_find_mountpoint()) + goto out; + + /* if not mounted and no argument */ + if (mountpoint == NULL) { + /* see if environment variable set */ + mountpoint = getenv(PERF_TRACEFS_ENVIRONMENT); + /* if no environment variable, use default */ + if (mountpoint == NULL) + mountpoint = TRACEFS_DEFAULT_PATH; + } + + if (mount(NULL, mountpoint, "tracefs", 0, NULL) < 0) + return NULL; + + /* save the mountpoint */ + tracefs_found = true; + strncpy(tracefs_mountpoint, mountpoint, sizeof(tracefs_mountpoint)); +out: + return tracefs_mountpoint; +} diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h new file mode 100644 index 000000000000..da780ac49acb --- /dev/null +++ b/tools/lib/api/fs/tracefs.h @@ -0,0 +1,21 @@ +#ifndef __API_TRACEFS_H__ +#define __API_TRACEFS_H__ + +#include "findfs.h" + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#ifndef PERF_TRACEFS_ENVIRONMENT +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" +#endif + +bool tracefs_configured(void); +const char *tracefs_find_mountpoint(void); +int tracefs_valid_mountpoint(const char *debugfs); +char *tracefs_mount(const char *mountpoint); + +extern char tracefs_mountpoint[]; + +#endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore new file mode 100644 index 000000000000..cc0e7a9f99e3 --- /dev/null +++ b/tools/lib/lockdep/.gitignore @@ -0,0 +1 @@ +liblockdep.so.* diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build new file mode 100644 index 000000000000..6f667355b068 --- /dev/null +++ b/tools/lib/lockdep/Build @@ -0,0 +1 @@ +liblockdep-y += common.o lockdep.o preload.o rbtree.o diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 52f9279c6c13..18ffccf00426 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -14,9 +14,10 @@ define allow-override $(eval $(1) = $(2))) endef -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) INSTALL = install @@ -35,6 +36,10 @@ bindir = $(prefix)/$(bindir_relative) export DESTDIR DESTDIR_SQ INSTALL +MAKEFLAGS += --no-print-directory + +include ../../scripts/Makefile.include + # copy a bit from Linux kbuild ifeq ("$(origin V)", "command line") @@ -44,56 +49,21 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(BUILD_OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ - BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 -endef - -saved-output := $(BUILD_OUTPUT) -BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd) -$(if $(BUILD_OUTPUT),, \ - $(error output directory "$(saved-output)" does not exist)) - -all: sub-make - -gui: force - $(call build_output, all_cmd) - -$(filter-out gui,$(MAKECMDGOALS)): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # BUILD_OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))) -objtree := $(realpath $(CURDIR)) -src := $(srctree) -obj := $(objtree) - -export prefix libdir bindir src obj - # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) bindir_SQ = $(subst ','\'',$(bindir)) -LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION) +LIB_IN := $(OUTPUT)liblockdep-in.o + BIN_FILE = lockdep +LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION) CONFIG_INCLUDES = CONFIG_LIBS = @@ -104,37 +74,27 @@ N = export Q VERBOSE -INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) +INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g +CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) ifeq ($(VERBOSE),1) Q = - print_compile = - print_app_build = - print_fpic_compile = print_shared_lib_compile = print_install = else Q = @ - print_compile = echo ' CC '$(OBJ); - print_app_build = echo ' BUILD '$(OBJ); - print_fpic_compile = echo ' CC FPIC '$(OBJ); - print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_shared_lib_compile = echo ' LD '$(OBJ); + print_static_lib_build = echo ' LD '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif -do_fpic_compile = \ - ($(print_fpic_compile) \ - $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@) - -do_app_build = \ - ($(print_app_build) \ - $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS)) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj do_compile_shared_library = \ ($(print_shared_lib_compile) \ @@ -144,22 +104,6 @@ do_build_static_lib = \ ($(print_static_lib_build) \ $(RM) $@; $(AR) rcs $@ $^) - -define do_compile - $(print_compile) \ - $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; -endef - -$(obj)/%.o: $(src)/%.c - $(Q)$(call do_compile) - -%.o: $(src)/%.c - $(Q)$(call do_compile) - -PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o - -ALL_OBJS = $(PEVENT_LIB_OBJS) - CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) @@ -169,42 +113,15 @@ all: all_cmd all_cmd: $(CMD_TARGETS) -liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS) +$(LIB_IN): force + $(Q)$(MAKE) $(build)=liblockdep + +liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) $(Q)$(do_compile_shared_library) -liblockdep.a: $(PEVENT_LIB_OBJS) +liblockdep.a: $(LIB_IN) $(Q)$(do_build_static_lib) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c - $(Q)$(do_fpic_compile) - -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -233,8 +150,6 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d $(RM) tags TAGS -endif # skip-makefile - PHONY += force force: diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c index 6f803609e498..0b0112c80f22 100644 --- a/tools/lib/lockdep/preload.c +++ b/tools/lib/lockdep/preload.c @@ -317,7 +317,7 @@ int pthread_mutex_destroy(pthread_mutex_t *mutex) * * TODO: Hook into free() and add that check there as well. */ - debug_check_no_locks_freed(mutex, mutex + sizeof(*mutex)); + debug_check_no_locks_freed(mutex, sizeof(*mutex)); __del_lock(__get_lock(mutex)); return ll_pthread_mutex_destroy(mutex); } @@ -341,7 +341,7 @@ int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) { try_init_preload(); - debug_check_no_locks_freed(rwlock, rwlock + sizeof(*rwlock)); + debug_check_no_locks_freed(rwlock, sizeof(*rwlock)); __del_lock(__get_lock(rwlock)); return ll_pthread_rwlock_destroy(rwlock); } diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h index a11e3c357be7..cd2cc59a5da7 100644 --- a/tools/lib/lockdep/uinclude/linux/kernel.h +++ b/tools/lib/lockdep/uinclude/linux/kernel.h @@ -28,6 +28,9 @@ #define __init #define noinline #define list_add_tail_rcu list_add_tail +#define list_for_each_entry_rcu list_for_each_entry +#define barrier() +#define synchronize_sched() #ifndef CALLER_ADDR0 #define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build new file mode 100644 index 000000000000..c681d0575d16 --- /dev/null +++ b/tools/lib/traceevent/Build @@ -0,0 +1,17 @@ +libtraceevent-y += event-parse.o +libtraceevent-y += event-plugin.o +libtraceevent-y += trace-seq.o +libtraceevent-y += parse-filter.o +libtraceevent-y += parse-utils.o +libtraceevent-y += kbuffer-parse.o + +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 005c9cc06935..d410da335e3d 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -67,7 +67,7 @@ PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' endif -include $(if $(BUILD_SRC),$(BUILD_SRC)/)../../scripts/Makefile.include +include ../../scripts/Makefile.include # copy a bit from Linux kbuild @@ -78,40 +78,13 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \ - BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1 -endef - -all: sub-make - -$(MAKECMDGOALS): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) -src := $(srctree) -obj := $(objtree) - export prefix bindir src obj # Shell quotes @@ -132,16 +105,19 @@ EXTRAVERSION = $(EP_EXTRAVERSION) OBJ = $@ N = -export Q VERBOSE - EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) -INCLUDES = -I. -I $(srctree)/../../include $(CONFIG_INCLUDES) +INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) -# Set compile option CFLAGS if not set elsewhere -CFLAGS ?= -g -Wall +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif # Append required CFLAGS +override CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) override CFLAGS += $(udis86-flags) -D_GNU_SOURCE @@ -151,74 +127,58 @@ else Q = @ endif -do_compile_shared_library = \ - ($(print_shared_lib_compile) \ - $(CC) --shared $^ -o $@) - -do_plugin_build = \ - ($(print_plugin_build) \ - $(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<) - -do_build_static_lib = \ - ($(print_static_lib_build) \ - $(RM) $@; $(AR) rcs $@ $^) - - -do_compile = $(QUIET_CC)$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; +# Disable command line variables (CFLAGS) overide from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= -$(obj)/%.o: $(src)/%.c - $(call do_compile) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -%.o: $(src)/%.c - $(call do_compile) +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so -PEVENT_LIB_OBJS = event-parse.o -PEVENT_LIB_OBJS += event-plugin.o -PEVENT_LIB_OBJS += trace-seq.o -PEVENT_LIB_OBJS += parse-filter.o -PEVENT_LIB_OBJS += parse-utils.o -PEVENT_LIB_OBJS += kbuffer-parse.o +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) -PLUGIN_OBJS = plugin_jbd2.o -PLUGIN_OBJS += plugin_hrtimer.o -PLUGIN_OBJS += plugin_kmem.o -PLUGIN_OBJS += plugin_kvm.o -PLUGIN_OBJS += plugin_mac80211.o -PLUGIN_OBJS += plugin_sched_switch.o -PLUGIN_OBJS += plugin_function.o -PLUGIN_OBJS += plugin_xen.o -PLUGIN_OBJS += plugin_scsi.o -PLUGIN_OBJS += plugin_cfg80211.o - -PLUGINS := $(PLUGIN_OBJS:.o=.so) - -ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS) +TE_IN := $(OUTPUT)libtraceevent-in.o +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) CMD_TARGETS = $(LIB_FILE) $(PLUGINS) TARGETS = $(CMD_TARGETS) - all: all_cmd all_cmd: $(CMD_TARGETS) -libtraceevent.so: $(PEVENT_LIB_OBJS) +$(TE_IN): force + $(Q)$(MAKE) $(build)=libtraceevent + +$(OUTPUT)libtraceevent.so: $(TE_IN) $(QUIET_LINK)$(CC) --shared $^ -o $@ -libtraceevent.a: $(PEVENT_LIB_OBJS) +$(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ plugins: $(PLUGINS) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@ +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) -$(PLUGIN_OBJS): %.o : $(src)/%.c - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) -fPIC -o $@ $< +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) -$(PLUGINS): %.so: %.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $< +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^ define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ @@ -255,40 +215,6 @@ define update_dir fi); endef -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - -TRACEEVENT-CFLAGS: force - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or cross compiler"; \ - echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ - fi - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -327,14 +253,9 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ $(RM) TRACEEVENT-CFLAGS tags TAGS -endif # skip-makefile - PHONY += force plugins force: -plugins: - @echo > /dev/null - # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index cf3a44bf1ec3..29f94f6f0d9e 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -32,6 +32,7 @@ #include <stdint.h> #include <limits.h> +#include <netinet/ip6.h> #include "event-parse.h" #include "event-utils.h" @@ -303,7 +304,10 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) if (!item) return -1; - item->comm = strdup(comm); + if (comm) + item->comm = strdup(comm); + else + item->comm = strdup("<...>"); if (!item->comm) { free(item); return -1; @@ -317,9 +321,14 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) return 0; } -void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock) +int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock) { - pevent->trace_clock = trace_clock; + pevent->trace_clock = strdup(trace_clock); + if (!pevent->trace_clock) { + errno = ENOMEM; + return -1; + } + return 0; } struct func_map { @@ -757,6 +766,11 @@ static void free_arg(struct print_arg *arg) free_arg(arg->hex.field); free_arg(arg->hex.size); break; + case PRINT_INT_ARRAY: + free_arg(arg->int_array.field); + free_arg(arg->int_array.count); + free_arg(arg->int_array.el_size); + break; case PRINT_TYPE: free(arg->typecast.type); free_arg(arg->typecast.item); @@ -1925,7 +1939,22 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) goto out_warn_free; type = process_arg_token(event, right, tok, type); - arg->op.right = right; + + if (right->type == PRINT_OP && + get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { + struct print_arg tmp; + + /* rotate ops according to the priority */ + arg->op.right = right->op.left; + + tmp = *arg; + *arg = *right; + *right = tmp; + + arg->op.left = right; + } else { + arg->op.right = right; + } } else if (strcmp(token, "[") == 0) { @@ -2013,6 +2042,38 @@ process_entry(struct event_format *event __maybe_unused, struct print_arg *arg, return EVENT_ERROR; } +static int alloc_and_process_delim(struct event_format *event, char *next_token, + struct print_arg **print_arg) +{ + struct print_arg *field; + enum event_type type; + char *token; + int ret = 0; + + field = alloc_arg(); + if (!field) { + do_warning_event(event, "%s: not enough memory!", __func__); + errno = ENOMEM; + return -1; + } + + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, next_token)) { + errno = EINVAL; + ret = -1; + free_arg(field); + goto out_free_token; + } + + *print_arg = field; + +out_free_token: + free_token(token); + + return ret; +} + static char *arg_eval (struct print_arg *arg); static unsigned long long @@ -2485,49 +2546,46 @@ out_free: static enum event_type process_hex(struct event_format *event, struct print_arg *arg, char **tok) { - struct print_arg *field; - enum event_type type; - char *token = NULL; - memset(arg, 0, sizeof(*arg)); arg->type = PRINT_HEX; - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } + if (alloc_and_process_delim(event, ",", &arg->hex.field)) + goto out; - type = process_arg(event, field, &token); + if (alloc_and_process_delim(event, ")", &arg->hex.size)) + goto free_field; - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; - - arg->hex.field = field; + return read_token_item(tok); - free_token(token); +free_field: + free_arg(arg->hex.field); +out: + *tok = NULL; + return EVENT_ERROR; +} - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - *tok = NULL; - return EVENT_ERROR; - } +static enum event_type +process_int_array(struct event_format *event, struct print_arg *arg, char **tok) +{ + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_INT_ARRAY; - type = process_arg(event, field, &token); + if (alloc_and_process_delim(event, ",", &arg->int_array.field)) + goto out; - if (test_type_token(type, token, EVENT_DELIM, ")")) - goto out_free; + if (alloc_and_process_delim(event, ",", &arg->int_array.count)) + goto free_field; - arg->hex.size = field; + if (alloc_and_process_delim(event, ")", &arg->int_array.el_size)) + goto free_size; - free_token(token); - type = read_token_item(tok); - return type; + return read_token_item(tok); - out_free: - free_arg(field); - free_token(token); +free_size: + free_arg(arg->int_array.count); +free_field: + free_arg(arg->int_array.field); +out: *tok = NULL; return EVENT_ERROR; } @@ -2827,6 +2885,10 @@ process_function(struct event_format *event, struct print_arg *arg, free_token(token); return process_hex(event, arg, tok); } + if (strcmp(token, "__print_array") == 0) { + free_token(token); + return process_int_array(event, arg, tok); + } if (strcmp(token, "__get_str") == 0) { free_token(token); return process_str(event, arg, tok); @@ -3355,6 +3417,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg break; case PRINT_FLAGS: case PRINT_SYMBOL: + case PRINT_INT_ARRAY: case PRINT_HEX: break; case PRINT_TYPE: @@ -3567,7 +3630,7 @@ static const struct flag flags[] = { { "HRTIMER_RESTART", 1 }, }; -static unsigned long long eval_flag(const char *flag) +static long long eval_flag(const char *flag) { int i; @@ -3583,7 +3646,7 @@ static unsigned long long eval_flag(const char *flag) if (strcmp(flags[i].name, flag) == 0) return flags[i].value; - return 0; + return -1LL; } static void print_str_to_seq(struct trace_seq *s, const char *format, @@ -3657,7 +3720,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct print_flag_sym *flag; struct format_field *field; struct printk_map *printk; - unsigned long long val, fval; + long long val, fval; unsigned long addr; char *str; unsigned char *hex; @@ -3716,11 +3779,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, print = 0; for (flag = arg->flags.flags; flag; flag = flag->next) { fval = eval_flag(flag->value); - if (!val && !fval) { + if (!val && fval < 0) { print_str_to_seq(s, format, len_arg, flag->str); break; } - if (fval && (val & fval) == fval) { + if (fval > 0 && (val & fval) == fval) { if (print && arg->flags.delim) trace_seq_puts(s, arg->flags.delim); print_str_to_seq(s, format, len_arg, flag->str); @@ -3765,6 +3828,54 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, } break; + case PRINT_INT_ARRAY: { + void *num; + int el_size; + + if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) { + unsigned long offset; + struct format_field *field = + arg->int_array.field->dynarray.field; + offset = pevent_read_number(pevent, + data + field->offset, + field->size); + num = data + (offset & 0xffff); + } else { + field = arg->int_array.field->field.field; + if (!field) { + str = arg->int_array.field->field.name; + field = pevent_find_any_field(event, str); + if (!field) + goto out_warning_field; + arg->int_array.field->field.field = field; + } + num = data + field->offset; + } + len = eval_num_arg(data, size, event, arg->int_array.count); + el_size = eval_num_arg(data, size, event, + arg->int_array.el_size); + for (i = 0; i < len; i++) { + if (i) + trace_seq_putc(s, ' '); + + if (el_size == 1) { + trace_seq_printf(s, "%u", *(uint8_t *)num); + } else if (el_size == 2) { + trace_seq_printf(s, "%u", *(uint16_t *)num); + } else if (el_size == 4) { + trace_seq_printf(s, "%u", *(uint32_t *)num); + } else if (el_size == 8) { + trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num); + } else { + trace_seq_printf(s, "BAD SIZE:%d 0x%x", + el_size, *(uint8_t *)num); + el_size = 1; + } + + num += el_size; + } + break; + } case PRINT_TYPE: break; case PRINT_STRING: { @@ -3975,7 +4086,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc if (asprintf(&arg->atom.atom, "%lld", ip) < 0) goto out_free; - /* skip the first "%pf: " */ + /* skip the first "%ps: " */ for (ptr = fmt + 5, bptr = data + field->offset; bptr < data + size && *ptr; ptr++) { int ls = 0; @@ -3996,6 +4107,10 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc goto process_again; case '.': goto process_again; + case 'z': + case 'Z': + ls = 1; + goto process_again; case 'p': ls = 1; /* fall through */ @@ -4149,6 +4264,324 @@ static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); } +static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + const char *fmt; + + if (i == 'i') + fmt = "%03d.%03d.%03d.%03d"; + else + fmt = "%d.%d.%d.%d"; + + trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]); +} + +static inline bool ipv6_addr_v4mapped(const struct in6_addr *a) +{ + return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) | + (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL; +} + +static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr) +{ + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); +} + +static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr) +{ + int i, j, range; + unsigned char zerolength[8]; + int longest = 1; + int colonpos = -1; + uint16_t word; + uint8_t hi, lo; + bool needcolon = false; + bool useIPv4; + struct in6_addr in6; + + memcpy(&in6, addr, sizeof(struct in6_addr)); + + useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6); + + memset(zerolength, 0, sizeof(zerolength)); + + if (useIPv4) + range = 6; + else + range = 8; + + /* find position of longest 0 run */ + for (i = 0; i < range; i++) { + for (j = i; j < range; j++) { + if (in6.s6_addr16[j] != 0) + break; + zerolength[i]++; + } + } + for (i = 0; i < range; i++) { + if (zerolength[i] > longest) { + longest = zerolength[i]; + colonpos = i; + } + } + if (longest == 1) /* don't compress a single 0 */ + colonpos = -1; + + /* emit address */ + for (i = 0; i < range; i++) { + if (i == colonpos) { + if (needcolon || i == 0) + trace_seq_printf(s, ":"); + trace_seq_printf(s, ":"); + needcolon = false; + i += longest - 1; + continue; + } + if (needcolon) { + trace_seq_printf(s, ":"); + needcolon = false; + } + /* hex u16 without leading 0s */ + word = ntohs(in6.s6_addr16[i]); + hi = word >> 8; + lo = word & 0xff; + if (hi) + trace_seq_printf(s, "%x%02x", hi, lo); + else + trace_seq_printf(s, "%x", lo); + + needcolon = true; + } + + if (useIPv4) { + if (needcolon) + trace_seq_printf(s, ":"); + print_ip4_addr(s, 'I', &in6.s6_addr[12]); + } + + return; +} + +static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) +{ + int j; + + for (j = 0; j < 16; j += 2) { + trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]); + if (i == 'I' && j < 14) + trace_seq_printf(s, ":"); + } +} + +/* + * %pi4 print an IPv4 address with leading zeros + * %pI4 print an IPv4 address without leading zeros + * %pi6 print an IPv6 address without colons + * %pI6 print an IPv6 address with colons + * %pI6c print an IPv6 address in compressed form with colons + * %pISpc print an IP address based on sockaddr; p adds port. + */ +static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + unsigned char *buf; + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return 0; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return 0; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return 0; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 4) { + trace_seq_printf(s, "INVALIDIPv4"); + return 0; + } + print_ip4_addr(s, i, buf); + + return 0; +} + +static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0; + unsigned char *buf; + int rc = 0; + + /* pI6c */ + if (i == 'I' && *ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + buf = data + arg->field.field->offset; + + if (arg->field.field->size != 16) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + return rc; +} + +static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char have_c = 0, have_p = 0; + unsigned char *buf; + struct sockaddr_storage *sa; + int rc = 0; + + /* pISpc */ + if (i == 'I') { + if (*ptr == 'p') { + have_p = 1; + ptr++; + rc++; + } + if (*ptr == 'c') { + have_c = 1; + ptr++; + rc++; + } + } + + if (arg->type == PRINT_FUNC) { + process_defined_func(s, data, size, event, arg); + return rc; + } + + if (arg->type != PRINT_FIELD) { + trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type); + return rc; + } + + if (!arg->field.field) { + arg->field.field = + pevent_find_any_field(event, arg->field.name); + if (!arg->field.field) { + do_warning("%s: field %s not found", + __func__, arg->field.name); + return rc; + } + } + + sa = (struct sockaddr_storage *) (data + arg->field.field->offset); + + if (sa->ss_family == AF_INET) { + struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in)) { + trace_seq_printf(s, "INVALIDIPv4"); + return rc; + } + + print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr); + if (have_p) + trace_seq_printf(s, ":%d", ntohs(sa4->sin_port)); + + + } else if (sa->ss_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; + + if (arg->field.field->size < sizeof(struct sockaddr_in6)) { + trace_seq_printf(s, "INVALIDIPv6"); + return rc; + } + + if (have_p) + trace_seq_printf(s, "["); + + buf = (unsigned char *) &sa6->sin6_addr; + if (have_c) + print_ip6c_addr(s, buf); + else + print_ip6_addr(s, i, buf); + + if (have_p) + trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port)); + } + + return rc; +} + +static int print_ip_arg(struct trace_seq *s, const char *ptr, + void *data, int size, struct event_format *event, + struct print_arg *arg) +{ + char i = *ptr; /* 'i' or 'I' */ + char ver; + int rc = 0; + + ptr++; + rc++; + + ver = *ptr; + ptr++; + rc++; + + switch (ver) { + case '4': + rc += print_ipv4_arg(s, ptr, i, data, size, event, arg); + break; + case '6': + rc += print_ipv6_arg(s, ptr, i, data, size, event, arg); + break; + case 'S': + rc += print_ipsa_arg(s, ptr, i, data, size, event, arg); + break; + default: + return 0; + } + + return rc; +} + static int is_printable_array(char *p, unsigned int len) { unsigned int i; @@ -4337,6 +4770,15 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event ptr++; arg = arg->next; break; + } else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') { + int n; + + n = print_ip_arg(s, ptr+1, data, size, event, arg); + if (n > 0) { + ptr += n; + arg = arg->next; + break; + } } /* fall through */ @@ -4611,6 +5053,96 @@ const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid) return comm; } +static struct cmdline * +pid_from_cmdlist(struct pevent *pevent, const char *comm, struct cmdline *next) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)next; + + if (cmdlist) + cmdlist = cmdlist->next; + else + cmdlist = pevent->cmdlist; + + while (cmdlist && strcmp(cmdlist->comm, comm) != 0) + cmdlist = cmdlist->next; + + return (struct cmdline *)cmdlist; +} + +/** + * pevent_data_pid_from_comm - return the pid from a given comm + * @pevent: a handle to the pevent + * @comm: the cmdline to find the pid from + * @next: the cmdline structure to find the next comm + * + * This returns the cmdline structure that holds a pid for a given + * comm, or NULL if none found. As there may be more than one pid for + * a given comm, the result of this call can be passed back into + * a recurring call in the @next paramater, and then it will find the + * next pid. + * Also, it does a linear seach, so it may be slow. + */ +struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm, + struct cmdline *next) +{ + struct cmdline *cmdline; + + /* + * If the cmdlines have not been converted yet, then use + * the list. + */ + if (!pevent->cmdlines) + return pid_from_cmdlist(pevent, comm, next); + + if (next) { + /* + * The next pointer could have been still from + * a previous call before cmdlines were created + */ + if (next < pevent->cmdlines || + next >= pevent->cmdlines + pevent->cmdline_count) + next = NULL; + else + cmdline = next++; + } + + if (!next) + cmdline = pevent->cmdlines; + + while (cmdline < pevent->cmdlines + pevent->cmdline_count) { + if (strcmp(cmdline->comm, comm) == 0) + return cmdline; + cmdline++; + } + return NULL; +} + +/** + * pevent_cmdline_pid - return the pid associated to a given cmdline + * @cmdline: The cmdline structure to get the pid from + * + * Returns the pid for a give cmdline. If @cmdline is NULL, then + * -1 is returned. + */ +int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; + + if (!cmdline) + return -1; + + /* + * If cmdlines have not been created yet, or cmdline is + * not part of the array, then treat it as a cmdlist instead. + */ + if (!pevent->cmdlines || + cmdline < pevent->cmdlines || + cmdline >= pevent->cmdlines + pevent->cmdline_count) + return cmdlist->pid; + + return cmdline->pid; +} + /** * pevent_data_comm_from_pid - parse the data into the print format * @s: the trace_seq to write to @@ -4928,6 +5460,15 @@ static void print_args(struct print_arg *args) print_args(args->hex.size); printf(")"); break; + case PRINT_INT_ARRAY: + printf("__print_array("); + print_args(args->int_array.field); + printf(", "); + print_args(args->int_array.count); + printf(", "); + print_args(args->int_array.el_size); + printf(")"); + break; case PRINT_STRING: case PRINT_BSTRING: printf("__get_str(%s)", args->string.string); @@ -5900,15 +6441,20 @@ void pevent_ref(struct pevent *pevent) pevent->ref_count++; } +void pevent_free_format_field(struct format_field *field) +{ + free(field->type); + free(field->name); + free(field); +} + static void free_format_fields(struct format_field *field) { struct format_field *next; while (field) { next = field->next; - free(field->type); - free(field->name); - free(field); + pevent_free_format_field(field); field = next; } } @@ -6013,6 +6559,7 @@ void pevent_free(struct pevent *pevent) free_handler(handle); } + free(pevent->trace_clock); free(pevent->events); free(pevent->sort_events); diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 7a3873ff9a4f..86a5839fb048 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -22,6 +22,7 @@ #include <stdbool.h> #include <stdarg.h> +#include <stdio.h> #include <regex.h> #include <string.h> @@ -91,6 +92,7 @@ extern int trace_seq_putc(struct trace_seq *s, unsigned char c); extern void trace_seq_terminate(struct trace_seq *s); +extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); extern int trace_seq_do_printf(struct trace_seq *s); @@ -114,7 +116,7 @@ struct pevent_plugin_option { char *name; char *plugin_alias; char *description; - char *value; + const char *value; void *priv; int set; }; @@ -152,6 +154,10 @@ struct pevent_plugin_option { * .plugin_alias is used to give a shorter name to access * the vairable. Useful if a plugin handles more than one event. * + * If .value is not set, then it is considered a boolean and only + * .set will be processed. If .value is defined, then it is considered + * a string option and .set will be ignored. + * * PEVENT_PLUGIN_ALIAS: (optional) * The name to use for finding options (uses filename if not defined) */ @@ -245,6 +251,12 @@ struct print_arg_hex { struct print_arg *size; }; +struct print_arg_int_array { + struct print_arg *field; + struct print_arg *count; + struct print_arg *el_size; +}; + struct print_arg_dynarray { struct format_field *field; struct print_arg *index; @@ -273,6 +285,7 @@ enum print_arg_type { PRINT_FLAGS, PRINT_SYMBOL, PRINT_HEX, + PRINT_INT_ARRAY, PRINT_TYPE, PRINT_STRING, PRINT_BSTRING, @@ -292,6 +305,7 @@ struct print_arg { struct print_arg_flags flags; struct print_arg_symbol symbol; struct print_arg_hex hex; + struct print_arg_int_array int_array; struct print_arg_func func; struct print_arg_string string; struct print_arg_bitmask bitmask; @@ -597,7 +611,7 @@ enum trace_flag_type { }; int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); -void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock); +int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock); int pevent_register_function(struct pevent *pevent, char *name, unsigned long long addr, char *mod); int pevent_register_print_string(struct pevent *pevent, const char *fmt, @@ -617,6 +631,7 @@ enum pevent_errno pevent_parse_format(struct pevent *pevent, const char *buf, unsigned long size, const char *sys); void pevent_free_format(struct event_format *event); +void pevent_free_format_field(struct format_field *field); void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, const char *name, struct pevent_record *record, @@ -675,6 +690,11 @@ int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type); int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); +struct cmdline; +struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm, + struct cmdline *next); +int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline); + void pevent_event_info(struct trace_seq *s, struct event_format *event, struct pevent_record *record); int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index 136162c03af1..a16756ae3526 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -18,6 +18,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include <ctype.h> #include <stdio.h> #include <string.h> #include <dlfcn.h> @@ -49,6 +50,52 @@ struct plugin_list { void *handle; }; +static void lower_case(char *str) +{ + if (!str) + return; + for (; *str; str++) + *str = tolower(*str); +} + +static int update_option_value(struct pevent_plugin_option *op, const char *val) +{ + char *op_val; + + if (!val) { + /* toggle, only if option is boolean */ + if (op->value) + /* Warn? */ + return 0; + op->set ^= 1; + return 0; + } + + /* + * If the option has a value then it takes a string + * otherwise the option is a boolean. + */ + if (op->value) { + op->value = val; + return 0; + } + + /* Option is boolean, must be either "1", "0", "true" or "false" */ + + op_val = strdup(val); + if (!op_val) + return -1; + lower_case(op_val); + + if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0) + op->set = 1; + else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0) + op->set = 0; + free(op_val); + + return 0; +} + /** * traceevent_plugin_list_options - get list of plugin options * @@ -120,6 +167,7 @@ update_option(const char *file, struct pevent_plugin_option *option) { struct trace_plugin_options *op; char *plugin; + int ret = 0; if (option->plugin_alias) { plugin = strdup(option->plugin_alias); @@ -144,9 +192,10 @@ update_option(const char *file, struct pevent_plugin_option *option) if (strcmp(op->option, option->name) != 0) continue; - option->value = op->value; - option->set ^= 1; - goto out; + ret = update_option_value(option, op->value); + if (ret) + goto out; + break; } /* first look for unnamed options */ @@ -156,14 +205,13 @@ update_option(const char *file, struct pevent_plugin_option *option) if (strcmp(op->option, option->name) != 0) continue; - option->value = op->value; - option->set ^= 1; + ret = update_option_value(option, op->value); break; } out: free(plugin); - return 0; + return ret; } /** diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index dcc665228c71..3bcada3ae05a 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -372,7 +372,6 @@ translate_data(struct kbuffer *kbuf, void *data, void **rptr, switch (type_len) { case KBUFFER_TYPE_PADDING: *length = read_4(kbuf, data); - data += *length; break; case KBUFFER_TYPE_TIME_EXTEND: @@ -730,3 +729,14 @@ void kbuffer_set_old_format(struct kbuffer *kbuf) kbuf->next_event = __old_next_event; } + +/** + * kbuffer_start_of_data - return offset of where data starts on subbuffer + * @kbuf: The kbuffer + * + * Returns the location on the subbuffer where the data starts. + */ +int kbuffer_start_of_data(struct kbuffer *kbuf) +{ + return kbuf->start; +} diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h index c831f64b17a0..03dce757553f 100644 --- a/tools/lib/traceevent/kbuffer.h +++ b/tools/lib/traceevent/kbuffer.h @@ -63,5 +63,6 @@ int kbuffer_missed_events(struct kbuffer *kbuf); int kbuffer_subbuffer_size(struct kbuffer *kbuf); void kbuffer_set_old_format(struct kbuffer *kbuf); +int kbuffer_start_of_data(struct kbuffer *kbuf); #endif /* _K_BUFFER_H */ diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index b50234402fc2..0144b3d1bb77 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1058,6 +1058,7 @@ process_filter(struct event_format *event, struct filter_arg **parg, *parg = current_op; else *parg = current_exp; + free(token); return PEVENT_ERRNO__UNBALANCED_PAREN; } break; @@ -1168,6 +1169,7 @@ process_filter(struct event_format *event, struct filter_arg **parg, *parg = current_op; + free(token); return 0; fail_alloc: diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index ec3bd16a5488..292dc9f1d233 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -231,19 +231,24 @@ void trace_seq_terminate(struct trace_seq *s) s->buffer[s->len] = 0; } -int trace_seq_do_printf(struct trace_seq *s) +int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp) { TRACE_SEQ_CHECK(s); switch (s->state) { case TRACE_SEQ__GOOD: - return printf("%.*s", s->len, s->buffer); + return fprintf(fp, "%.*s", s->len, s->buffer); case TRACE_SEQ__BUFFER_POISONED: - puts("Usage of trace_seq after it was destroyed"); + fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed"); break; case TRACE_SEQ__MEM_ALLOC_FAILED: - puts("Can't allocate trace_seq buffer memory"); + fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory"); break; } return -1; } + +int trace_seq_do_printf(struct trace_seq *s) +{ + return trace_seq_do_fprintf(s, stdout); +} diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index 833a96611da6..7cc72a336645 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -90,8 +90,10 @@ extern void yyerror(const char *str); "#"?("hatype") { return K_HATYPE; } "#"?("rxhash") { return K_RXHASH; } "#"?("cpu") { return K_CPU; } -"#"?("vlan_tci") { return K_VLANT; } -"#"?("vlan_pr") { return K_VLANP; } +"#"?("vlan_tci") { return K_VLAN_TCI; } +"#"?("vlan_pr") { return K_VLAN_AVAIL; } +"#"?("vlan_avail") { return K_VLAN_AVAIL; } +"#"?("vlan_tpid") { return K_VLAN_TPID; } "#"?("rand") { return K_RAND; } ":" { return ':'; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index e6306c51c26f..e24eea1b0db5 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND +%token K_RXHASH K_CPU K_IFIDX K_VLAN_TCI K_VLAN_AVAIL K_VLAN_TPID K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -155,10 +155,10 @@ ldb | OP_LDB K_CPU { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDB K_VLANT { + | OP_LDB K_VLAN_TCI { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDB K_VLANP { + | OP_LDB K_VLAN_AVAIL { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } | OP_LDB K_POFF { @@ -167,6 +167,9 @@ ldb | OP_LDB K_RAND { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDB K_VLAN_TPID { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldh @@ -206,10 +209,10 @@ ldh | OP_LDH K_CPU { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDH K_VLANT { + | OP_LDH K_VLAN_TCI { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDH K_VLANP { + | OP_LDH K_VLAN_AVAIL { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } | OP_LDH K_POFF { @@ -218,6 +221,9 @@ ldh | OP_LDH K_RAND { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDH K_VLAN_TPID { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldi @@ -262,10 +268,10 @@ ld | OP_LD K_CPU { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU); } - | OP_LD K_VLANT { + | OP_LD K_VLAN_TCI { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LD K_VLANP { + | OP_LD K_VLAN_AVAIL { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } | OP_LD K_POFF { @@ -274,6 +280,9 @@ ld | OP_LD K_RAND { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LD K_VLAN_TPID { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 40399c3d97d6..812f904193e8 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -1,6 +1,7 @@ PERF-CFLAGS PERF-GUI-VARS PERF-VERSION-FILE +FEATURE-DUMP perf perf-read-vdso32 perf-read-vdsox32 diff --git a/tools/perf/Build b/tools/perf/Build new file mode 100644 index 000000000000..b77370ef7005 --- /dev/null +++ b/tools/perf/Build @@ -0,0 +1,44 @@ +perf-y += builtin-bench.o +perf-y += builtin-annotate.o +perf-y += builtin-diff.o +perf-y += builtin-evlist.o +perf-y += builtin-help.o +perf-y += builtin-sched.o +perf-y += builtin-buildid-list.o +perf-y += builtin-buildid-cache.o +perf-y += builtin-list.o +perf-y += builtin-record.o +perf-y += builtin-report.o +perf-y += builtin-stat.o +perf-y += builtin-timechart.o +perf-y += builtin-top.o +perf-y += builtin-script.o +perf-y += builtin-kmem.o +perf-y += builtin-lock.o +perf-y += builtin-kvm.o +perf-y += builtin-inject.o +perf-y += builtin-mem.o +perf-y += builtin-data.o + +perf-$(CONFIG_AUDIT) += builtin-trace.o +perf-$(CONFIG_LIBELF) += builtin-probe.o + +perf-y += bench/ +perf-y += tests/ + +perf-y += perf.o + +paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" +paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))" +paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" + +CFLAGS_builtin-help.o += $(paths) +CFLAGS_builtin-timechart.o += $(paths) +CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE + +libperf-y += util/ +libperf-y += arch/ +libperf-y += ui/ +libperf-y += scripts/ + +gtk-y += ui/gtk/ diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt new file mode 100644 index 000000000000..f6fc6507ba55 --- /dev/null +++ b/tools/perf/Documentation/Build.txt @@ -0,0 +1,49 @@ + +1) perf build +============= +The perf build process consists of several separated building blocks, +which are linked together to form the perf binary: + - libperf library (static) + - perf builtin commands + - traceevent library (static) + - GTK ui library + +Several makefiles govern the perf build: + + - Makefile + top level Makefile working as a wrapper that calls the main + Makefile.perf with a -j option to do parallel builds. + + - Makefile.perf + main makefile that triggers build of all perf objects including + installation and documentation processing. + + - tools/build/Makefile.build + main makefile of the build framework + + - tools/build/Build.include + build framework generic definitions + + - Build makefiles + makefiles that defines build objects + +Please refer to tools/build/Documentation/Build.txt for more +information about build framework. + + +2) perf build +============= +The Makefile.perf triggers the build framework for build objects: + perf, libperf, gtk + +resulting in following objects: + $ ls *-in.o + gtk-in.o libperf-in.o perf-in.o + +Those objects are then used in final linking: + libperf-gtk.so <- gtk-in.o libperf-in.o + perf <- perf-in.o libperf-in.o + + +NOTE this description is omitting other libraries involved, only + focusing on build framework outcomes diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index fd77d81ea748..dd07b55f58d8 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -12,9 +12,9 @@ SYNOPSIS DESCRIPTION ----------- -This command manages the build-id cache. It can add and remove files to/from -the cache. In the future it should as well purge older entries, set upper -limits for the space used by the cache, etc. +This command manages the build-id cache. It can add, remove, update and purge +files to/from the cache. In the future it should as well set upper limits for +the space used by the cache, etc. OPTIONS ------- @@ -36,14 +36,24 @@ OPTIONS actually made. -r:: --remove=:: - Remove specified file from the cache. + Remove a cached binary which has same build-id of specified file + from the cache. +-p:: +--purge=:: + Purge all cached binaries including older caches which have specified + path from the cache. -M:: ---missing=:: +--missing=:: List missing build ids in the cache for the specified file. -u:: ---update:: - Update specified file of the cache. It can be used to update kallsyms - kernel dso to vmlinux in order to support annotation. +--update=:: + Update specified file of the cache. Note that this doesn't remove + older entires since those may be still needed for annotating old + (or remote) perf.data. Only if there is already a cache which has + exactly same build-id, that is replaced by new one. It can be used + to update kallsyms and kernel dso to vmlinux in order to support + annotation. + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt new file mode 100644 index 000000000000..be8fa1a0a97e --- /dev/null +++ b/tools/perf/Documentation/perf-data.txt @@ -0,0 +1,40 @@ +perf-data(1) +============== + +NAME +---- +perf-data - Data file related processing + +SYNOPSIS +-------- +[verse] +'perf data' [<common options>] <command> [<options>]", + +DESCRIPTION +----------- +Data file related processing. + +COMMANDS +-------- +convert:: + Converts perf data file into another format (only CTF [1] format is support by now). + It's possible to set data-convert debug variable to get debug messages from conversion, + like: + perf --debug data-convert data convert ... + +OPTIONS for 'convert' +--------------------- +--to-ctf:: + Triggers the CTF conversion, specify the path of CTF data directory. + +-i:: + Specify input perf data file path. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +SEE ALSO +-------- +linkperf:perf[1] +[1] Common Trace Format - http://www.efficios.com/ctf diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index e463caa3eb49..d1deb573877f 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -20,12 +20,20 @@ If no parameters are passed it will assume perf.data.old and perf.data. The differential profile is displayed only for events matching both specified perf.data files. +If no parameters are passed the samples will be sorted by dso and symbol. +As the perf.data files could come from different binaries, the symbols addresses +could vary. So perf diff is based on the comparison of the files and +symbols name. + OPTIONS ------- -D:: --dump-raw-trace:: Dump raw trace in ASCII. +--kallsyms=<file>:: + kallsyms pathname + -m:: --modules:: Load module symbols. WARNING: use only with -k and LIVE kernel diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 7c8fbbf3f61c..23219c65c16f 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -3,7 +3,7 @@ perf-kmem(1) NAME ---- -perf-kmem - Tool to trace/measure kernel memory(slab) properties +perf-kmem - Tool to trace/measure kernel memory properties SYNOPSIS -------- @@ -25,6 +25,10 @@ OPTIONS --input=<file>:: Select the input file (default: perf.data unless stdin is a fifo) +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + --caller:: Show per-callsite statistics @@ -42,6 +46,12 @@ OPTIONS --raw-ip:: Print raw ip instead of symbol +--slab:: + Analyze SLAB allocator events. + +--page:: + Analyze page allocator events + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index cbb4f743d921..bada8933fdd4 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -26,6 +26,7 @@ counted. The following modifiers exist: u - user-space counting k - kernel counting h - hypervisor counting + I - non idle counting G - guest counting (in KVM guests) H - host counting (not in KVM guests) p - precise level @@ -89,6 +90,19 @@ raw encoding of 0x1A8 can be used: You should refer to the processor specific documentation for getting these details. Some of them are referenced in the SEE ALSO section below. +PARAMETERIZED EVENTS +-------------------- + +Some pmu events listed by 'perf-list' will be displayed with '?' in them. For +example: + + hv_gpci/dtbp_ptitc,phys_processor_idx=?/ + +This means that when provided as an event, a value for '?' must +also be supplied. For example: + + perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... + OPTIONS ------- @@ -114,6 +128,12 @@ To limit the list use: One or more types can be used at the same time, listing the events for the types specified. +Support raw format: + +. '--raw-dump', shows the raw-dump of all the events. +. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of + a certain kind of events. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-top[1], diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 1d78a4064da4..43310d8661fe 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -12,11 +12,12 @@ SYNOPSIS DESCRIPTION ----------- -"perf mem -t <TYPE> record" runs a command and gathers memory operation data +"perf mem record" runs a command and gathers memory operation data from it, into perf.data. Perf record options are accepted and are passed through. -"perf mem -t <TYPE> report" displays the result. It invokes perf report with the -right set of options to display a memory access profile. +"perf mem report" displays the result. It invokes perf report with the +right set of options to display a memory access profile. By default, loads +and stores are sampled. Use the -t option to limit to loads or stores. Note that on Intel systems the memory latency reported is the use-latency, not the pure load (or store latency). Use latency includes any pipeline @@ -29,7 +30,7 @@ OPTIONS -t:: --type=:: - Select the memory operation type: load or store (default: load) + Select the memory operation type: load or store (default: load,store) -D:: --dump-raw-samples=:: diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index aaa869be3dc1..239609c09f83 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -47,6 +47,12 @@ OPTIONS -v:: --verbose:: Be more verbose (show parsed arguments, etc). + Can not use with -q. + +-q:: +--quiet:: + Be quiet (do not show any messages including errors). + Can not use with -v. -a:: --add=:: @@ -96,7 +102,7 @@ OPTIONS Dry run. With this option, --add and --del doesn't execute actual adding and removal operations. ---max-probes:: +--max-probes=NUM:: Set the maximum number of probe points for an event. Default is 128. -x:: @@ -104,8 +110,13 @@ OPTIONS Specify path to the executable or shared library file for user space tracing. Can also be used with --funcs option. +--demangle:: + Demangle application symbols. --no-demangle is also available + for disabling demangling. + --demangle-kernel:: - Demangle kernel symbols. + Demangle kernel symbols. --no-demangle-kernel is also available + for disabling kernel demangling. In absence of -m/-x options, perf probe checks if the first argument after the options is an absolute path name. If its an absolute path, perf probe @@ -137,6 +148,7 @@ Each probe argument follows below syntax. [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) +'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point. 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index af9a54ece024..4847a793de65 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -33,12 +33,32 @@ OPTIONS - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a hexadecimal event descriptor. - - a hardware breakpoint event in the form of '\mem:addr[:access]' + - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where + 'param1', 'param2', etc are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/*. + + - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/' + + where M, N, K are numbers (in decimal, hex, octal format). Acceptable + values for each of 'config', 'config1' and 'config2' are defined by + corresponding entries in /sys/bus/event_sources/devices/<pmu>/format/* + param1 and param2 are defined as formats for the PMU in: + /sys/bus/event_sources/devices/<pmu>/format/* + + - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' where addr is the address in memory you want to break in. Access is the memory access type (read, write, execute) it can - be passed as follows: '\mem:addr[:[r][w][x]]'. + be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range, + number of bytes from specified addr, which the breakpoint will cover. If you want to profile read-write accesses in 0x1000, just set 'mem:0x1000:rw'. + If you want to profile write accesses in [0x1000~1008), just set + 'mem:0x1000/8:w'. + + - a group of events surrounded by a pair of brace ("{event1,event2,...}"). + Each event is separated by commas and the group should be quoted to + prevent the shell interpretation. You also need to use --group on + "perf report" to view group events together. --filter=<filter>:: Event filter. @@ -47,9 +67,6 @@ OPTIONS --all-cpus:: System-wide collection from all CPUs. --l:: - Scale counter values. - -p:: --pid=:: Record events on existing process ID (comma separated list). @@ -92,6 +109,10 @@ OPTIONS specification with appended unit character - B/K/M/G. The size is rounded up to have nearest pages power of two value. +--group:: + Put all events in a single event group. This precedes the --event + option and remains only for backward compatibility. See --event. + -g:: Enables call-graph (stack chain/backtrace) recording. @@ -100,13 +121,19 @@ OPTIONS implies -g. Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) as the method to collect + (DWARF's CFI - Call Frame Information) or "lbr" + (Hardware Last Branch Record facility) as the method to collect the information used to show the call graphs. In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to the libunwind library) should be used instead. + Using the "lbr" method doesn't require any compiler options. It + will produce call graphs from the hardware LBR registers. The + main limition is that it is only available on new Intel + platforms, such as Haswell. It can only get user call chain. It + doesn't work with branch stack sampling at the same time. -q:: --quiet:: @@ -220,6 +247,16 @@ Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option is off by default. +--running-time:: +Record running and enabled time for read events (:S) + +-k:: +--clockid:: +Sets the clock id to use for the various time fields in the perf_event_type +records. See clock_gettime(). In particular CLOCK_MONOTONIC and +CLOCK_MONOTONIC_RAW are supported, some events might also allow +CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index dd7cccdde498..4879cf638824 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -40,6 +40,11 @@ OPTIONS Only consider symbols in these comms. CSV that understands file://filename entries. This option will affect the percentage of the overhead column. See --percentage for more info. +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 21494806c0ab..79445750fcb3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -125,46 +125,46 @@ OPTIONS is equivalent to: perf script -f trace:<fields> -f sw:<fields> -f hw:<fields> - + i.e., the specified fields apply to all event types if the type string is not given. - + The arguments are processed in the order received. A later usage can reset a prior request. e.g.: - + -f trace: -f comm,tid,time,ip,sym - + The first -f suppresses trace events (field list is ""), but then the second invocation sets the fields to comm,tid,time,ip,sym. In this case a warning is given to the user: - + "Overriding previous field request for all events." - + Alternatively, consider the order: - + -f comm,tid,time,ip,sym -f trace: - + The first -f sets the fields for all events and the second -f suppresses trace events. The user is given a warning message about the override, and the result of the above is that only S/W and H/W events are displayed with the given fields. - + For the 'wildcard' option if a user selected field is invalid for an event type, a message is displayed to the user that the option is ignored for that type. For example: - + $ perf script -f comm,tid,trace 'trace' not valid for hardware events. Ignoring. 'trace' not valid for software events. Ignoring. - + Alternatively, if the type is given an invalid field is specified it is an error. For example: - + perf script -v -f sw:comm,tid,trace 'trace' not valid for software events. - + At this point usage is displayed, and perf-script exits. - + Finally, a user may not set fields to none for all event types. i.e., -f "" is not allowed. @@ -193,6 +193,12 @@ OPTIONS Only display events for these comms. CSV that understands file://filename entries. +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). + -I:: --show-info:: Display extended information about the perf.data file. This adds diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 29ee857c09c6..04e150d83e7d 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -25,10 +25,22 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + Select the PMU event. Selection can be: + + - a symbolic event name (use 'perf list' to list all events) + + - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. + + - a symbolically formed event like 'pmu/param1=0x3,param2/' where + param1 and param2 are defined as formats for the PMU in + /sys/bus/event_sources/devices/<pmu>/format/* + + - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' + where M, N, K are numbers (in decimal, hex, octal format). + Acceptable values for each of 'config', 'config1' and 'config2' + parameters are defined by corresponding entries in + /sys/bus/event_sources/devices/<pmu>/format/* -i:: --no-inherit:: diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7e1b1f2bb83c..ba03fd5d1a54 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -55,6 +55,9 @@ OPTIONS --uid=:: Record events in threads owned by uid. Name or number. +--filter-pids=:: + Filter out events for these pids and for 'trace' itself (comma separated list). + -v:: --verbose=:: Verbosity level. @@ -115,6 +118,9 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --syscalls:: Trace system calls. This options is enabled by default. +--event:: + Trace other events, see 'perf list' for a complete list. + PAGEFAULTS ---------- diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 1e8e400b4493..2b131776363e 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -13,11 +13,16 @@ SYNOPSIS OPTIONS ------- --debug:: - Setup debug variable (just verbose for now) in value + Setup debug variable (see list below) in value range (0, 10). Use like: --debug verbose # sets verbose = 1 --debug verbose=2 # sets verbose = 2 + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + --buildid-dir:: Setup buildid cache directory. It has higher priority than buildid.dir config file option. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 83e2887f91a3..11ccbb22ea2b 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,17 +1,21 @@ tools/perf tools/scripts +tools/build tools/lib/traceevent tools/lib/api tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c tools/include/asm/bug.h +tools/include/asm-generic/bitops/arch_hweight.h tools/include/asm-generic/bitops/atomic.h +tools/include/asm-generic/bitops/const_hweight.h tools/include/asm-generic/bitops/__ffs.h tools/include/asm-generic/bitops/__fls.h tools/include/asm-generic/bitops/find.h tools/include/asm-generic/bitops/fls64.h tools/include/asm-generic/bitops/fls.h +tools/include/asm-generic/bitops/hweight.h tools/include/asm-generic/bitops.h tools/include/linux/bitops.h tools/include/linux/compiler.h @@ -19,6 +23,8 @@ tools/include/linux/export.h tools/include/linux/hash.h tools/include/linux/log2.h tools/include/linux/types.h +include/asm-generic/bitops/arch_hweight.h +include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h @@ -29,6 +35,7 @@ include/linux/list.h include/linux/hash.h include/linux/stringify.h lib/find_next_bit.c +lib/hweight.c lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index cb2e5868c8e8..d31a7bbd7cee 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -24,8 +24,8 @@ unexport MAKEFLAGS # (To override it, run 'make JOBS=1' and similar.) # ifeq ($(JOBS),) - JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null) - ifeq ($(JOBS),) + JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) + ifeq ($(JOBS),0) JOBS := 1 endif endif diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 67a03a825b3c..c43a20517591 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -68,7 +68,11 @@ include config/utilities.mak # for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode # # Define NO_ZLIB if you do not want to support compressed kernel modules - +# +# Define LIBBABELTRACE if you DO want libbabeltrace support +# for CTF data format. +# +# Define NO_LZMA if you do not want to support compressed (xz) kernel modules ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -82,13 +86,29 @@ endif ifneq ($(OUTPUT),) #$(info Determined 'OUTPUT' to be $(OUTPUT)) +# Adding $(OUTPUT) as a directory to look for source files, +# because use generated output files as sources dependency +# for flex/bison parsers. +VPATH += $(OUTPUT) +export VPATH +endif + +ifeq ($(V),1) + Q = +else + Q = @ endif +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD - @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) - @touch $(OUTPUT)PERF-VERSION-FILE + $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) + $(Q)touch $(OUTPUT)PERF-VERSION-FILE CC = $(CROSS_COMPILE)gcc +LD = $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar PKG_CONFIG = $(CROSS_COMPILE)pkg-config @@ -127,10 +147,6 @@ export prefix bindir sharedir sysconfdir DESTDIR SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Guard against environment variables -BUILTIN_OBJS = -LIB_H = -LIB_OBJS = -GTK_OBJS = PYRF_OBJS = SCRIPT_SH = @@ -155,8 +171,8 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBAPIKFS = $(LIB_PATH)libapikfs.a -export LIBAPIKFS +LIBAPI = $(LIB_PATH)libapi.a +export LIBAPI # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -167,7 +183,7 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS) +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ @@ -206,293 +222,9 @@ endif export PERL_PATH -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l - -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ - -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l - -$(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ - -$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c - LIB_FILE=$(OUTPUT)libperf.a -LIB_H += ../lib/symbol/kallsyms.h -LIB_H += ../../include/uapi/linux/perf_event.h -LIB_H += ../../include/linux/rbtree.h -LIB_H += ../../include/linux/list.h -LIB_H += ../../include/uapi/linux/const.h -LIB_H += ../include/linux/hash.h -LIB_H += ../../include/linux/stringify.h -LIB_H += util/include/linux/bitmap.h -LIB_H += ../include/linux/bitops.h -LIB_H += ../include/asm-generic/bitops/atomic.h -LIB_H += ../include/asm-generic/bitops/find.h -LIB_H += ../include/asm-generic/bitops/fls64.h -LIB_H += ../include/asm-generic/bitops/fls.h -LIB_H += ../include/asm-generic/bitops/__ffs.h -LIB_H += ../include/asm-generic/bitops/__fls.h -LIB_H += ../include/asm-generic/bitops.h -LIB_H += ../include/linux/compiler.h -LIB_H += ../include/linux/log2.h -LIB_H += util/include/linux/const.h -LIB_H += util/include/linux/ctype.h -LIB_H += util/include/linux/kernel.h -LIB_H += util/include/linux/list.h -LIB_H += ../include/linux/export.h -LIB_H += util/include/linux/poison.h -LIB_H += util/include/linux/rbtree.h -LIB_H += util/include/linux/rbtree_augmented.h -LIB_H += util/include/linux/string.h -LIB_H += ../include/linux/types.h -LIB_H += util/include/linux/linkage.h -LIB_H += util/include/asm/asm-offsets.h -LIB_H += ../include/asm/bug.h -LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/hweight.h -LIB_H += util/include/asm/swab.h -LIB_H += util/include/asm/system.h -LIB_H += util/include/asm/uaccess.h -LIB_H += util/include/dwarf-regs.h -LIB_H += util/include/asm/dwarf2.h -LIB_H += util/include/asm/cpufeature.h -LIB_H += util/include/asm/unistd_32.h -LIB_H += util/include/asm/unistd_64.h -LIB_H += perf.h -LIB_H += util/annotate.h -LIB_H += util/cache.h -LIB_H += util/callchain.h -LIB_H += util/build-id.h -LIB_H += util/db-export.h -LIB_H += util/debug.h -LIB_H += util/pmu.h -LIB_H += util/event.h -LIB_H += util/evsel.h -LIB_H += util/evlist.h -LIB_H += util/exec_cmd.h -LIB_H += util/find-vdso-map.c -LIB_H += util/levenshtein.h -LIB_H += util/machine.h -LIB_H += util/map.h -LIB_H += util/parse-options.h -LIB_H += util/parse-events.h -LIB_H += util/quote.h -LIB_H += util/util.h -LIB_H += util/xyarray.h -LIB_H += util/header.h -LIB_H += util/help.h -LIB_H += util/session.h -LIB_H += util/ordered-events.h -LIB_H += util/strbuf.h -LIB_H += util/strlist.h -LIB_H += util/strfilter.h -LIB_H += util/svghelper.h -LIB_H += util/tool.h -LIB_H += util/run-command.h -LIB_H += util/sigchain.h -LIB_H += util/dso.h -LIB_H += util/symbol.h -LIB_H += util/color.h -LIB_H += util/values.h -LIB_H += util/sort.h -LIB_H += util/hist.h -LIB_H += util/comm.h -LIB_H += util/thread.h -LIB_H += util/thread_map.h -LIB_H += util/trace-event.h -LIB_H += util/probe-finder.h -LIB_H += util/dwarf-aux.h -LIB_H += util/probe-event.h -LIB_H += util/pstack.h -LIB_H += util/cpumap.h -LIB_H += util/top.h -LIB_H += $(ARCH_INCLUDE) -LIB_H += util/cgroup.h -LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h -LIB_H += util/target.h -LIB_H += util/rblist.h -LIB_H += util/intlist.h -LIB_H += util/perf_regs.h -LIB_H += util/unwind.h -LIB_H += util/vdso.h -LIB_H += util/tsc.h -LIB_H += ui/helpline.h -LIB_H += ui/progress.h -LIB_H += ui/util.h -LIB_H += ui/ui.h -LIB_H += util/data.h -LIB_H += util/kvm-stat.h -LIB_H += util/thread-stack.h - -LIB_OBJS += $(OUTPUT)util/abspath.o -LIB_OBJS += $(OUTPUT)util/alias.o -LIB_OBJS += $(OUTPUT)util/annotate.o -LIB_OBJS += $(OUTPUT)util/build-id.o -LIB_OBJS += $(OUTPUT)util/config.o -LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/db-export.o -LIB_OBJS += $(OUTPUT)util/pmu.o -LIB_OBJS += $(OUTPUT)util/environment.o -LIB_OBJS += $(OUTPUT)util/event.o -LIB_OBJS += $(OUTPUT)util/evlist.o -LIB_OBJS += $(OUTPUT)util/evsel.o -LIB_OBJS += $(OUTPUT)util/exec_cmd.o -LIB_OBJS += $(OUTPUT)util/find_next_bit.o -LIB_OBJS += $(OUTPUT)util/help.o -LIB_OBJS += $(OUTPUT)util/kallsyms.o -LIB_OBJS += $(OUTPUT)util/levenshtein.o -LIB_OBJS += $(OUTPUT)util/parse-options.o -LIB_OBJS += $(OUTPUT)util/parse-events.o -LIB_OBJS += $(OUTPUT)util/path.o -LIB_OBJS += $(OUTPUT)util/rbtree.o -LIB_OBJS += $(OUTPUT)util/bitmap.o -LIB_OBJS += $(OUTPUT)util/hweight.o -LIB_OBJS += $(OUTPUT)util/run-command.o -LIB_OBJS += $(OUTPUT)util/quote.o -LIB_OBJS += $(OUTPUT)util/strbuf.o -LIB_OBJS += $(OUTPUT)util/string.o -LIB_OBJS += $(OUTPUT)util/strlist.o -LIB_OBJS += $(OUTPUT)util/strfilter.o -LIB_OBJS += $(OUTPUT)util/top.o -LIB_OBJS += $(OUTPUT)util/usage.o -LIB_OBJS += $(OUTPUT)util/wrapper.o -LIB_OBJS += $(OUTPUT)util/sigchain.o -LIB_OBJS += $(OUTPUT)util/dso.o -LIB_OBJS += $(OUTPUT)util/symbol.o -LIB_OBJS += $(OUTPUT)util/symbol-elf.o -LIB_OBJS += $(OUTPUT)util/color.o -LIB_OBJS += $(OUTPUT)util/pager.o -LIB_OBJS += $(OUTPUT)util/header.o -LIB_OBJS += $(OUTPUT)util/callchain.o -LIB_OBJS += $(OUTPUT)util/values.o -LIB_OBJS += $(OUTPUT)util/debug.o -LIB_OBJS += $(OUTPUT)util/machine.o -LIB_OBJS += $(OUTPUT)util/map.o -LIB_OBJS += $(OUTPUT)util/pstack.o -LIB_OBJS += $(OUTPUT)util/session.o -LIB_OBJS += $(OUTPUT)util/ordered-events.o -LIB_OBJS += $(OUTPUT)util/comm.o -LIB_OBJS += $(OUTPUT)util/thread.o -LIB_OBJS += $(OUTPUT)util/thread_map.o -LIB_OBJS += $(OUTPUT)util/trace-event-parse.o -LIB_OBJS += $(OUTPUT)util/parse-events-flex.o -LIB_OBJS += $(OUTPUT)util/parse-events-bison.o -LIB_OBJS += $(OUTPUT)util/pmu-flex.o -LIB_OBJS += $(OUTPUT)util/pmu-bison.o -LIB_OBJS += $(OUTPUT)util/trace-event-read.o -LIB_OBJS += $(OUTPUT)util/trace-event-info.o -LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o -LIB_OBJS += $(OUTPUT)util/trace-event.o -LIB_OBJS += $(OUTPUT)util/svghelper.o -LIB_OBJS += $(OUTPUT)util/sort.o -LIB_OBJS += $(OUTPUT)util/hist.o -LIB_OBJS += $(OUTPUT)util/probe-event.o -LIB_OBJS += $(OUTPUT)util/util.o -LIB_OBJS += $(OUTPUT)util/xyarray.o -LIB_OBJS += $(OUTPUT)util/cpumap.o -LIB_OBJS += $(OUTPUT)util/cgroup.o -LIB_OBJS += $(OUTPUT)util/target.o -LIB_OBJS += $(OUTPUT)util/rblist.o -LIB_OBJS += $(OUTPUT)util/intlist.o -LIB_OBJS += $(OUTPUT)util/vdso.o -LIB_OBJS += $(OUTPUT)util/stat.o -LIB_OBJS += $(OUTPUT)util/record.o -LIB_OBJS += $(OUTPUT)util/srcline.o -LIB_OBJS += $(OUTPUT)util/data.o -LIB_OBJS += $(OUTPUT)util/tsc.o -LIB_OBJS += $(OUTPUT)util/cloexec.o -LIB_OBJS += $(OUTPUT)util/thread-stack.o - -LIB_OBJS += $(OUTPUT)ui/setup.o -LIB_OBJS += $(OUTPUT)ui/helpline.o -LIB_OBJS += $(OUTPUT)ui/progress.o -LIB_OBJS += $(OUTPUT)ui/util.o -LIB_OBJS += $(OUTPUT)ui/hist.o -LIB_OBJS += $(OUTPUT)ui/stdio/hist.o - -LIB_OBJS += $(OUTPUT)arch/common.o - -LIB_OBJS += $(OUTPUT)tests/parse-events.o -LIB_OBJS += $(OUTPUT)tests/dso-data.o -LIB_OBJS += $(OUTPUT)tests/attr.o -LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o -LIB_OBJS += $(OUTPUT)tests/open-syscall.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o -LIB_OBJS += $(OUTPUT)tests/mmap-basic.o -LIB_OBJS += $(OUTPUT)tests/perf-record.o -LIB_OBJS += $(OUTPUT)tests/rdpmc.o -LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o -LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o -LIB_OBJS += $(OUTPUT)tests/fdarray.o -LIB_OBJS += $(OUTPUT)tests/pmu.o -LIB_OBJS += $(OUTPUT)tests/hists_common.o -LIB_OBJS += $(OUTPUT)tests/hists_link.o -LIB_OBJS += $(OUTPUT)tests/hists_filter.o -LIB_OBJS += $(OUTPUT)tests/hists_output.o -LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o -LIB_OBJS += $(OUTPUT)tests/python-use.o -LIB_OBJS += $(OUTPUT)tests/bp_signal.o -LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o -LIB_OBJS += $(OUTPUT)tests/task-exit.o -LIB_OBJS += $(OUTPUT)tests/sw-clock.o -ifeq ($(ARCH),x86) -LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o -endif -LIB_OBJS += $(OUTPUT)tests/code-reading.o -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o -LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o -ifndef NO_DWARF_UNWIND -ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) -LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o -endif -endif -LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o -LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o -LIB_OBJS += $(OUTPUT)tests/switch-tracking.o - -BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o -BUILTIN_OBJS += $(OUTPUT)builtin-bench.o -# Benchmark modules -BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o -BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(RAW_ARCH),x86_64) -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o -endif -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o - -BUILTIN_OBJS += $(OUTPUT)builtin-diff.o -BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o -BUILTIN_OBJS += $(OUTPUT)builtin-help.o -BUILTIN_OBJS += $(OUTPUT)builtin-sched.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o -BUILTIN_OBJS += $(OUTPUT)builtin-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-record.o -BUILTIN_OBJS += $(OUTPUT)builtin-report.o -BUILTIN_OBJS += $(OUTPUT)builtin-stat.o -BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o -BUILTIN_OBJS += $(OUTPUT)builtin-top.o -BUILTIN_OBJS += $(OUTPUT)builtin-script.o -BUILTIN_OBJS += $(OUTPUT)builtin-probe.o -BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o -BUILTIN_OBJS += $(OUTPUT)builtin-lock.o -BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o -BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o -BUILTIN_OBJS += $(OUTPUT)builtin-mem.o - -PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) +PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -504,67 +236,9 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifdef NO_LIBELF -# Remove ELF/DWARF dependent codes -LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) - -BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) - -# Use minimal symbol handling -LIB_OBJS += $(OUTPUT)util/symbol-minimal.o - -else # NO_LIBELF -ifndef NO_DWARF - LIB_OBJS += $(OUTPUT)util/probe-finder.o - LIB_OBJS += $(OUTPUT)util/dwarf-aux.o -endif # NO_DWARF -endif # NO_LIBELF - -ifndef NO_LIBDW_DWARF_UNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libdw.o - LIB_H += util/unwind-libdw.h -endif - -ifndef NO_LIBUNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o -endif -LIB_OBJS += $(OUTPUT)tests/keep-tracking.o - -ifndef NO_LIBAUDIT - BUILTIN_OBJS += $(OUTPUT)builtin-trace.o -endif - -ifndef NO_SLANG - LIB_OBJS += $(OUTPUT)ui/browser.o - LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o - LIB_OBJS += $(OUTPUT)ui/browsers/hists.o - LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o - LIB_OBJS += $(OUTPUT)ui/browsers/header.o - LIB_OBJS += $(OUTPUT)ui/tui/setup.o - LIB_OBJS += $(OUTPUT)ui/tui/util.o - LIB_OBJS += $(OUTPUT)ui/tui/helpline.o - LIB_OBJS += $(OUTPUT)ui/tui/progress.o - LIB_H += ui/tui/tui.h - LIB_H += ui/browser.h - LIB_H += ui/browsers/map.h - LIB_H += ui/keysyms.h - LIB_H += ui/libslang.h -endif - ifndef NO_GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so - - GTK_OBJS += $(OUTPUT)ui/gtk/browser.o - GTK_OBJS += $(OUTPUT)ui/gtk/hists.o - GTK_OBJS += $(OUTPUT)ui/gtk/setup.o - GTK_OBJS += $(OUTPUT)ui/gtk/util.o - GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o - GTK_OBJS += $(OUTPUT)ui/gtk/progress.o - GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o + GTK_IN := $(OUTPUT)gtk-in.o install-gtk: $(OUTPUT)libperf-gtk.so $(call QUIET_INSTALL, 'GTK UI') \ @@ -572,31 +246,6 @@ install-gtk: $(OUTPUT)libperf-gtk.so $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif -ifndef NO_LIBPERL - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o - LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o -endif - -ifndef NO_LIBPYTHON - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o - LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o -endif - -ifeq ($(NO_PERF_REGS),0) - ifeq ($(ARCH),x86) - LIB_H += arch/x86/include/perf_regs.h - endif - LIB_OBJS += $(OUTPUT)util/perf_regs.o -endif - -ifndef NO_LIBNUMA - BUILTIN_OBJS += $(OUTPUT)bench/numa.o -endif - -ifndef NO_ZLIB - LIB_OBJS += $(OUTPUT)util/zlib.o -endif - ifdef ASCIIDOC8 export ASCIIDOC8 endif @@ -612,39 +261,29 @@ SHELL = $(SHELL_PATH) all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) please_set_SHELL_PATH_to_a_more_modern_shell: - @$$(:) + $(Q)$$(:) shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell strip: $(PROGRAMS) $(OUTPUT)perf $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf -$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - $(CFLAGS) -c $(filter %.c,$^) -o $@ +PERF_IN := $(OUTPUT)perf-in.o -$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(LIBS) -o $@ +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) - $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< +$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE + $(Q)$(MAKE) $(build)=perf -$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@ -$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(GTK_IN): FORCE + $(Q)$(MAKE) $(build)=gtk -$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) + $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt @@ -655,8 +294,7 @@ $(SCRIPTS) : % : %.sh $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' # These can record PERF_VERSION -$(OUTPUT)perf.o perf.spec \ - $(SCRIPTS) \ +perf.spec $(SCRIPTS) \ : $(OUTPUT)PERF-VERSION-FILE .SUFFIXES: @@ -679,87 +317,33 @@ endif # These two need to be here so that when O= is not used they take precedence # over the general rule for .o -$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $< - -$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< - -$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $< -$(OUTPUT)%.o: %.S - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.s: %.S - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< - -$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ - '-DPREFIX="$(prefix_SQ)"' \ - $< - -$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - -DPYTHONPATH='"$(OUTPUT)python"' \ - -DPYTHON='"$(PYTHON_WORD)"' \ - $< +# get relative building directory (to $(OUTPUT)) +# and '.' if it's $(OUTPUT) itself +__build-dir = $(subst $(OUTPUT),,$(dir $@)) +build-dir = $(if $(__build-dir),$(__build-dir),.) -$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $< +single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +$(OUTPUT)%.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $< +$(OUTPUT)%.i: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.s: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-bison.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-flex.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.o: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< - -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< - -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< - -$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< - -$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< - -$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< - -$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< +$(OUTPUT)%.i: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -774,58 +358,34 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c endif -$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So -# we depend the various files onto their directories. -DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS) -DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -# no need to add flex objects, because they depend on bison ones -DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c -DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c +LIBPERF_IN := $(OUTPUT)libperf-in.o -OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS))) +$(LIBPERF_IN): FORCE + $(Q)$(MAKE) $(build)=libperf -$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES) -# In the second step, we make a rule to actually create these directories -$(OUTPUT_DIRECTORIES): - $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null +$(LIB_FILE): $(LIBPERF_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -$(LIB_FILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) - -# libtraceevent.a -TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) - -LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT) -LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)" LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins +$(LIBTRACEEVENT): FORCE + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null install-traceevent-plugins: $(LIBTRACEEVENT) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch]) - -# if subdir is set, we've been called from above so target has been built -# already -$(LIBAPIKFS): $(LIBAPIKFS_SOURCES) -ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a -endif +$(LIBAPI): FORCE + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a -$(LIBAPIKFS)-clean: -ifeq ($(subdir),) - $(call QUIET_CLEAN, libapikfs) - @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -endif +$(LIBAPI)-clean: + $(call QUIET_CLEAN, libapi) + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null help: @echo 'Perf make targets:' @@ -881,17 +441,6 @@ cscope: $(QUIET_GEN)$(RM) cscope*; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) -### Detect prefix changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ) - -$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or prefix"; \ - echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ - fi - ### Testing rules # GNU make supports exporting all variables by "export" without parameters. @@ -974,12 +523,14 @@ $(INSTALL_DOC_TARGETS): # config-clean: $(call QUIET_CLEAN, config) - @$(MAKE) -C config/feature-checks clean >/dev/null + $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) +clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)$(RM) .config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) @@ -993,7 +544,9 @@ else GIT-HEAD-PHONY = endif +FORCE: + .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build new file mode 100644 index 000000000000..109eb75cf7de --- /dev/null +++ b/tools/perf/arch/Build @@ -0,0 +1,2 @@ +libperf-y += common.o +libperf-y += $(ARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/arm/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 09d62153d384..7fbca175099e 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,14 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/arm/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build new file mode 100644 index 000000000000..d22e3d07de3d --- /dev/null +++ b/tools/perf/arch/arm/util/Build @@ -0,0 +1,4 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/arm64/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 67e9b3d38e89..7fbca175099e 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,7 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o endif diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build new file mode 100644 index 000000000000..e58123a8912b --- /dev/null +++ b/tools/perf/arch/arm64/util/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/powerpc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 6f7782bea5dd..7fbca175099e 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,6 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build new file mode 100644 index 000000000000..0af6e9b3f728 --- /dev/null +++ b/tools/perf/arch/powerpc/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3bb50eac5542..0c370f81e002 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -103,7 +103,7 @@ static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc) return NULL; } - result = dwarf_cfi_addrframe(cfi, pc, &frame); + result = dwarf_cfi_addrframe(cfi, pc-bias, &frame); if (result) { pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); return NULL; @@ -128,7 +128,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) return NULL; } - result = dwarf_cfi_addrframe(cfi, pc, &frame); + result = dwarf_cfi_addrframe(cfi, pc-bias, &frame); if (result) { pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1)); return NULL; @@ -145,7 +145,7 @@ static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc) * yet used) * -1 in case of errors */ -static int check_return_addr(struct dso *dso, Dwarf_Addr pc) +static int check_return_addr(struct dso *dso, u64 map_start, Dwarf_Addr pc) { int rc = -1; Dwfl *dwfl; @@ -155,6 +155,7 @@ static int check_return_addr(struct dso *dso, Dwarf_Addr pc) Dwarf_Addr start = pc; Dwarf_Addr end = pc; bool signalp; + const char *exec_file = dso->long_name; dwfl = dso->dwfl; @@ -165,8 +166,10 @@ static int check_return_addr(struct dso *dso, Dwarf_Addr pc) return -1; } - if (dwfl_report_offline(dwfl, "", dso->long_name, -1) == NULL) { - pr_debug("dwfl_report_offline() failed %s\n", + mod = dwfl_report_elf(dwfl, exec_file, exec_file, -1, + map_start, false); + if (!mod) { + pr_debug("dwfl_report_elf() failed %s\n", dwarf_errmsg(-1)); /* * We normally cache the DWARF debug info and never @@ -256,10 +259,10 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) return skip_slot; } - rc = check_return_addr(dso, ip); + rc = check_return_addr(dso, al.map->start, ip); - pr_debug("DSO %s, nr %" PRIx64 ", ip 0x%" PRIx64 "rc %d\n", - dso->long_name, chain->nr, ip, rc); + pr_debug("[DSO %s, sym %s, ip 0x%" PRIx64 "] rc %d\n", + dso->long_name, al.sym->name, ip, rc); if (rc == 0) { /* diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/s390/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 798ac7379c5f..21322e0385b8 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,7 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build new file mode 100644 index 000000000000..8a61372bb47a --- /dev/null +++ b/tools/perf/arch/s390/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sh/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sh/Makefile +++ b/tools/perf/arch/sh/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sh/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sparc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sparc/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/x86/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 9b21881db52f..21322e0385b8 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,19 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o -endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o -LIB_H += arch/$(ARCH)/util/tsc.h HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/x86/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build new file mode 100644 index 000000000000..cfbccc4e3187 --- /dev/null +++ b/tools/perf/arch/x86/util/Build @@ -0,0 +1,8 @@ +libperf-y += header.o +libperf-y += tsc.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build new file mode 100644 index 000000000000..5ce98023d518 --- /dev/null +++ b/tools/perf/bench/Build @@ -0,0 +1,11 @@ +perf-y += sched-messaging.o +perf-y += sched-pipe.o +perf-y += mem-memcpy.o +perf-y += futex-hash.o +perf-y += futex-wake.o +perf-y += futex-requeue.o + +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o +perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o + +perf-$(CONFIG_NUMA) += numa.o diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index bedff6b5b3cf..ad0d9b5342fb 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv, if (!fshared) futex_flag = FUTEX_PRIVATE_FLAG; + if (nrequeue > nthreads) + nrequeue = nthreads; + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " "%d at a time.\n\n", getpid(), nthreads, fshared ? "shared":"private", &futex1, &futex2, nrequeue); @@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv, /* Ok, all threads are patiently blocked, start requeueing */ gettimeofday(&start, NULL); - for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) { + while (nrequeued < nthreads) { /* * Do not wakeup any tasks blocked on futex1, allowing * us to really measure futex_wait functionality. */ - futex_cmp_requeue(&futex1, 0, &futex2, 0, - nrequeue, futex_flag); + nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, + nrequeue, futex_flag); } + gettimeofday(&end, NULL); timersub(&end, &start, &runtime); - if (nrequeued > nthreads) - nrequeued = nthreads; - update_stats(&requeued_stats, nrequeued); update_stats(&requeuetime_stats, runtime.tv_usec); @@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv, } /* everybody should be blocked on futex2, wake'em up */ - nrequeued = futex_wake(&futex2, nthreads, futex_flag); + nrequeued = futex_wake(&futex2, nrequeued, futex_flag); if (nthreads != nrequeued) warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 71f2844cf97f..7ed22ff1e1ac 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -68,4 +68,17 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak val, opflags); } +#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP +#include <pthread.h> +static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr, + size_t cpusetsize, + cpu_set_t *cpuset) +{ + attr = attr; + cpusetsize = cpusetsize; + cpuset = cpuset; + return 0; +} +#endif + #endif /* _FUTEX_H */ diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d66ab799b35f..8c0c1a2770c8 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMCPY_FN(__memcpy, +MEMCPY_FN(memcpy_orig, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c, +MEMCPY_FN(__memcpy, "x86-64-movsq", "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c_e, +MEMCPY_FN(memcpy_erms, "x86-64-movsb", "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index fcd9cf00600a..e4c2c30143b9 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,8 +1,6 @@ #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemcpy_c globl memcpy_c; memcpy_c -#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e #include "../../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 6c14afe8c1b1..d3dfb7936dcd 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -36,7 +36,7 @@ static const struct option options[] = { "Specify length of memory to copy. " "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", - "Specify routine to copy"), + "Specify routine to copy, \"all\" runs all available routines"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "cycle", &use_cycle, @@ -135,55 +135,16 @@ struct bench_mem_info { const char *const *usage; }; -static int bench_mem_common(int argc, const char **argv, - const char *prefix __maybe_unused, - struct bench_mem_info *info) +static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) { - int i; - size_t len; - double totallen; + const struct routine *r = &info->routines[r_idx]; double result_bps[2]; u64 result_cycle[2]; - argc = parse_options(argc, argv, options, - info->usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; - result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) - break; - } - if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; info->routines[i].name; i++) { - printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); - } - return 1; - } + printf("Routine %s (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s Bytes ...\n\n", length_str); @@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv, if (!only_prefault && !no_prefault) { /* show both of results */ if (use_cycle) { - result_cycle[0] = - info->do_cycle(&info->routines[i], len, false); - result_cycle[1] = - info->do_cycle(&info->routines[i], len, true); + result_cycle[0] = info->do_cycle(r, len, false); + result_cycle[1] = info->do_cycle(r, len, true); } else { - result_bps[0] = - info->do_gettimeofday(&info->routines[i], - len, false); - result_bps[1] = - info->do_gettimeofday(&info->routines[i], - len, true); + result_bps[0] = info->do_gettimeofday(r, len, false); + result_bps[1] = info->do_gettimeofday(r, len, true); } } else { - if (use_cycle) { - result_cycle[pf] = - info->do_cycle(&info->routines[i], - len, only_prefault); - } else { - result_bps[pf] = - info->do_gettimeofday(&info->routines[i], - len, only_prefault); - } + if (use_cycle) + result_cycle[pf] = info->do_cycle(r, len, only_prefault); + else + result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); } switch (bench_format) { @@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv, die("unknown format: %d\n", bench_format); break; } +} + +static int bench_mem_common(int argc, const char **argv, + const char *prefix __maybe_unused, + struct bench_mem_info *info) +{ + int i; + size_t len; + double totallen; + + argc = parse_options(argc, argv, options, + info->usage, 0); + + if (no_prefault && only_prefault) { + fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); + return 1; + } + + if (use_cycle) + init_cycle(); + + len = (size_t)perf_atoll((char *)length_str); + totallen = (double)len * iterations; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + if (!strncmp(routine, "all", 3)) { + for (i = 0; info->routines[i].name; i++) + __bench_mem_routine(info, i, len, totallen); + return 0; + } + + for (i = 0; info->routines[i].name; i++) { + if (!strcmp(info->routines[i].name, routine)) + break; + } + if (!info->routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; info->routines[i].name; i++) { + printf("\t%s ... %s\n", + info->routines[i].name, info->routines[i].desc); + } + return 1; + } + + __bench_mem_routine(info, i, len, totallen); return 0; } @@ -289,7 +293,7 @@ static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault) memcpy_t fn = r->fn.memcpy; int i; - memcpy_alloc_mem(&src, &dst, len); + memcpy_alloc_mem(&dst, &src, len); if (prefault) fn(dst, src, len); @@ -312,7 +316,7 @@ static double do_memcpy_gettimeofday(const struct routine *r, size_t len, void *src = NULL, *dst = NULL; int i; - memcpy_alloc_mem(&src, &dst, len); + memcpy_alloc_mem(&dst, &src, len); if (prefault) fn(dst, src, len); diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h index a71dff97c1f5..f02d028771d9 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMSET_FN(__memset, +MEMSET_FN(memset_orig, "x86-64-unrolled", "unrolled memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c, +MEMSET_FN(__memset, "x86-64-stosq", "movsq-based memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c_e, +MEMSET_FN(memset_erms, "x86-64-stosb", "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index 9e5af89ed13a..de278784c866 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,8 +1,6 @@ #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemset_c globl memset_c; memset_c -#define Lmemset_c_e globl memset_c_e; memset_c_e #include "../../../arch/x86/lib/memset_64.S" /* diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index ebfa163b80b5..ba5efa4710b5 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -180,7 +180,7 @@ static const struct option options[] = { OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), - OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"), + OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), /* Special option string parsing callbacks: */ @@ -828,6 +828,9 @@ static int count_process_nodes(int process_nr) td = g->threads + task_nr; node = numa_node_of_cpu(td->curr_cpu); + if (node < 0) /* curr_cpu was likely still -1 */ + return 0; + node_present[node] = 1; } @@ -882,6 +885,11 @@ static void calc_convergence_compression(int *strong) for (p = 0; p < g->p.nr_proc; p++) { unsigned int nodes = count_process_nodes(p); + if (!nodes) { + *strong = 0; + return; + } + nodes_min = min(nodes, nodes_min); nodes_max = max(nodes, nodes_max); } @@ -1395,7 +1403,7 @@ static void print_res(const char *name, double val, if (!name) name = "main,"; - if (g->p.show_quiet) + if (!g->p.show_quiet) printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); else printf(" %14.3f %s\n", val, txt_long); diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 07a8d7646a15..005cc283790c 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -19,12 +19,12 @@ #include <stdlib.h> #include <signal.h> #include <sys/wait.h> -#include <linux/unistd.h> #include <string.h> #include <errno.h> #include <assert.h> #include <sys/time.h> #include <sys/types.h> +#include <sys/syscall.h> #include <pthread.h> diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index e7417fe97a97..71bf7451c0ca 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -208,7 +208,7 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - ret = perf_session__process_events(session, &ann->tool); + ret = perf_session__process_events(session); if (ret) goto out; @@ -232,7 +232,7 @@ static int __cmd_annotate(struct perf_annotate *ann) if (nr_samples > 0) { total_nr_samples += nr_samples; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 77d5cae54c6a..d47a0cdc71c9 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -125,8 +125,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, return ret; } -static int build_id_cache__add_kcore(const char *filename, const char *debugdir, - bool force) +static int build_id_cache__add_kcore(const char *filename, bool force) { char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; char from_dir[PATH_MAX], to_dir[PATH_MAX]; @@ -143,7 +142,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - debugdir, sbuildid); + buildid_dir, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -155,7 +154,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - debugdir, sbuildid, dir); + buildid_dir, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; @@ -183,7 +182,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return 0; } -static int build_id_cache__add_file(const char *filename, const char *debugdir) +static int build_id_cache__add_file(const char *filename) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; u8 build_id[BUILD_ID_SIZE]; @@ -195,16 +194,14 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, debugdir, filename, + err = build_id_cache__add_s(sbuild_id, filename, false, false); - if (verbose) - pr_info("Adding %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + pr_debug("Adding %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } -static int build_id_cache__remove_file(const char *filename, - const char *debugdir) +static int build_id_cache__remove_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -217,10 +214,34 @@ static int build_id_cache__remove_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (verbose) - pr_info("Removing %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + err = build_id_cache__remove_s(sbuild_id); + pr_debug("Removing %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); + + return err; +} + +static int build_id_cache__purge_path(const char *pathname) +{ + struct strlist *list; + struct str_node *pos; + int err; + + err = build_id_cache__list_build_ids(pathname, &list); + if (err) + goto out; + + strlist__for_each(pos, list) { + err = build_id_cache__remove_s(pos->s); + pr_debug("Removing %s %s: %s\n", pos->s, pathname, + err ? "FAIL" : "Ok"); + if (err) + break; + } + strlist__delete(list); + +out: + pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok"); return err; } @@ -236,10 +257,10 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) if (errno == ENOENT) return false; - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { - pr_warning("Problems with %s file, consider removing it from the cache\n", + pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } @@ -252,13 +273,12 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f return 0; } -static int build_id_cache__update_file(const char *filename, - const char *debugdir) +static int build_id_cache__update_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - int err; + int err = 0; if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -266,14 +286,14 @@ static int build_id_cache__update_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (!err) { - err = build_id_cache__add_s(sbuild_id, debugdir, filename, - false, false); - } - if (verbose) - pr_info("Updating %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + if (build_id_cache__cached(sbuild_id)) + err = build_id_cache__remove_s(sbuild_id); + + if (!err) + err = build_id_cache__add_s(sbuild_id, filename, false, false); + + pr_debug("Updating %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } @@ -287,6 +307,7 @@ int cmd_buildid_cache(int argc, const char **argv, bool force = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, + *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, *kcore_filename = NULL; @@ -304,6 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv, "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), + OPT_STRING('p', "purge", &purge_name_list_str, "path list", + "path(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -320,6 +343,11 @@ int cmd_buildid_cache(int argc, const char **argv, argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); + if (argc || (!add_name_list_str && !kcore_filename && + !remove_name_list_str && !purge_name_list_str && + !missing_filename && !update_name_list_str)) + usage_with_options(buildid_cache_usage, buildid_cache_options); + if (missing_filename) { file.path = missing_filename; file.force = force; @@ -338,7 +366,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, add_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__add_file(pos->s, buildid_dir)) { + if (build_id_cache__add_file(pos->s)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -356,7 +384,25 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, remove_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__remove_file(pos->s, buildid_dir)) { + if (build_id_cache__remove_file(pos->s)) { + if (errno == ENOENT) { + pr_debug("%s wasn't in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't remove %s: %s\n", + pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + } + + strlist__delete(list); + } + } + + if (purge_name_list_str) { + list = strlist__new(true, purge_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__purge_path(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -377,7 +423,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, update_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__update_file(pos->s, buildid_dir)) { + if (build_id_cache__update_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -391,8 +437,7 @@ int cmd_buildid_cache(int argc, const char **argv, } } - if (kcore_filename && - build_id_cache__add_kcore(kcore_filename, buildid_dir, force)) + if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force)) pr_warning("Couldn't add %s\n", kcore_filename); out: diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index ed3873b3e238..feb420f74c2d 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -74,7 +74,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ if (with_hits || perf_data_file__is_pipe(&file)) - perf_session__process_events(session, &build_id__mark_dso_hit_ops); + perf_session__process_events(session); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); perf_session__delete(session); diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c new file mode 100644 index 000000000000..d6525bc54d13 --- /dev/null +++ b/tools/perf/builtin-data.c @@ -0,0 +1,123 @@ +#include <linux/compiler.h> +#include "builtin.h" +#include "perf.h" +#include "debug.h" +#include "parse-options.h" +#include "data-convert-bt.h" + +typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); + +struct data_cmd { + const char *name; + const char *summary; + data_cmd_fn_t fn; +}; + +static struct data_cmd data_cmds[]; + +#define for_each_cmd(cmd) \ + for (cmd = data_cmds; cmd && cmd->name; cmd++) + +static const struct option data_options[] = { + OPT_END() +}; + +static const char * const data_subcommands[] = { "convert", NULL }; + +static const char *data_usage[] = { + "perf data [<common options>] <command> [<options>]", + NULL +}; + +static void print_usage(void) +{ + struct data_cmd *cmd; + + printf("Usage:\n"); + printf("\t%s\n\n", data_usage[0]); + printf("\tAvailable commands:\n"); + + for_each_cmd(cmd) { + printf("\t %s\t- %s\n", cmd->name, cmd->summary); + } + + printf("\n"); +} + +static const char * const data_convert_usage[] = { + "perf data convert [<options>]", + NULL +}; + +static int cmd_data_convert(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + const char *to_ctf = NULL; + bool force = false; + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_STRING('i', "input", &input_name, "file", "input file name"), +#ifdef HAVE_LIBBABELTRACE_SUPPORT + OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), +#endif + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_END() + }; + +#ifndef HAVE_LIBBABELTRACE_SUPPORT + pr_err("No conversion support compiled in.\n"); + return -1; +#endif + + argc = parse_options(argc, argv, options, + data_convert_usage, 0); + if (argc) { + usage_with_options(data_convert_usage, options); + return -1; + } + + if (to_ctf) { +#ifdef HAVE_LIBBABELTRACE_SUPPORT + return bt_convert__perf2ctf(input_name, to_ctf, force); +#else + pr_err("The libbabeltrace support is not compiled in.\n"); + return -1; +#endif + } + + return 0; +} + +static struct data_cmd data_cmds[] = { + { "convert", "converts data file between formats", cmd_data_convert }, + { .name = NULL, }, +}; + +int cmd_data(int argc, const char **argv, const char *prefix) +{ + struct data_cmd *cmd; + const char *cmdstr; + + /* No command specified. */ + if (argc < 2) + goto usage; + + argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc < 1) + goto usage; + + cmdstr = argv[0]; + + for_each_cmd(cmd) { + if (strcmp(cmd->name, cmdstr)) + continue; + + return cmd->fn(argc, argv, prefix); + } + + pr_err("Unknown command: %s\n", cmdstr); +usage: + print_usage(); + return -1; +} diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 1ce425d101a9..df6307b4050a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -390,6 +390,15 @@ static void perf_evlist__collapse_resort(struct perf_evlist *evlist) } } +static struct data__file *fmt_to_data_file(struct perf_hpp_fmt *fmt) +{ + struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt); + void *ptr = dfmt - dfmt->idx; + struct data__file *d = container_of(ptr, struct data__file, fmt); + + return d; +} + static struct hist_entry* get_pair_data(struct hist_entry *he, struct data__file *d) { @@ -407,8 +416,7 @@ get_pair_data(struct hist_entry *he, struct data__file *d) static struct hist_entry* get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt) { - void *ptr = dfmt - dfmt->idx; - struct data__file *d = container_of(ptr, struct data__file, fmt); + struct data__file *d = fmt_to_data_file(&dfmt->fmt); return get_pair_data(he, d); } @@ -430,7 +438,7 @@ static void hists__baseline_only(struct hists *hists) next = rb_next(&he->rb_node_in); if (!hist_entry__next_pair(he)) { rb_erase(&he->rb_node_in, root); - hist_entry__free(he); + hist_entry__delete(he); } } } @@ -448,26 +456,30 @@ static void hists__precompute(struct hists *hists) next = rb_first(root); while (next != NULL) { struct hist_entry *he, *pair; + struct data__file *d; + int i; he = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&he->rb_node_in); - pair = get_pair_data(he, &data__files[sort_compute]); - if (!pair) - continue; + data__for_each_file_new(i, d) { + pair = get_pair_data(he, d); + if (!pair) + continue; - switch (compute) { - case COMPUTE_DELTA: - compute_delta(he, pair); - break; - case COMPUTE_RATIO: - compute_ratio(he, pair); - break; - case COMPUTE_WEIGHTED_DIFF: - compute_wdiff(he, pair); - break; - default: - BUG_ON(1); + switch (compute) { + case COMPUTE_DELTA: + compute_delta(he, pair); + break; + case COMPUTE_RATIO: + compute_ratio(he, pair); + break; + case COMPUTE_WEIGHTED_DIFF: + compute_wdiff(he, pair); + break; + default: + BUG_ON(1); + } } } } @@ -517,7 +529,7 @@ __hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, static int64_t hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, - int c) + int c, int sort_idx) { bool pairs_left = hist_entry__has_pairs(left); bool pairs_right = hist_entry__has_pairs(right); @@ -529,8 +541,8 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, if (!pairs_left || !pairs_right) return pairs_left ? -1 : 1; - p_left = get_pair_data(left, &data__files[sort_compute]); - p_right = get_pair_data(right, &data__files[sort_compute]); + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); if (!p_left && !p_right) return 0; @@ -545,55 +557,103 @@ hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right, return __hist_entry__cmp_compute(p_left, p_right, c); } -static void insert_hist_entry_by_compute(struct rb_root *root, - struct hist_entry *he, - int c) +static int64_t +hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right, + int c, int sort_idx) { - struct rb_node **p = &root->rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - if (hist_entry__cmp_compute(he, iter, c) < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; + struct hist_entry *p_right, *p_left; + + p_left = get_pair_data(left, &data__files[sort_idx]); + p_right = get_pair_data(right, &data__files[sort_idx]); + + if (!p_left && !p_right) + return 0; + + if (!p_left || !p_right) + return p_left ? -1 : 1; + + if (c != COMPUTE_DELTA) { + /* + * The delta can be computed without the baseline, but + * others are not. Put those entries which have no + * values below. + */ + if (left->dummy && right->dummy) + return 0; + + if (left->dummy || right->dummy) + return left->dummy ? 1 : -1; } - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, root); + return __hist_entry__cmp_compute(p_left, p_right, c); } -static void hists__compute_resort(struct hists *hists) +static int64_t +hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) { - struct rb_root *root; - struct rb_node *next; + return 0; +} - if (sort__need_collapse) - root = &hists->entries_collapsed; - else - root = hists->entries_in; +static int64_t +hist_entry__cmp_baseline(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + if (left->stat.period == right->stat.period) + return 0; + return left->stat.period > right->stat.period ? 1 : -1; +} - hists->entries = RB_ROOT; - next = rb_first(root); +static int64_t +hist_entry__cmp_delta(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) +{ + struct data__file *d = fmt_to_data_file(fmt); - hists__reset_stats(hists); - hists__reset_col_len(hists); + return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx); +} - while (next != NULL) { - struct hist_entry *he; +static int64_t +hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) +{ + struct data__file *d = fmt_to_data_file(fmt); - he = rb_entry(next, struct hist_entry, rb_node_in); - next = rb_next(&he->rb_node_in); + return hist_entry__cmp_compute(right, left, COMPUTE_RATIO, d->idx); +} + +static int64_t +hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt, + struct hist_entry *left, struct hist_entry *right) +{ + struct data__file *d = fmt_to_data_file(fmt); - insert_hist_entry_by_compute(&hists->entries, he, compute); - hists__inc_stats(hists, he); + return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF, d->idx); +} - if (!he->filtered) - hists__calc_col_len(hists, he); - } +static int64_t +hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA, + sort_compute); +} + +static int64_t +hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_RATIO, + sort_compute); +} + +static int64_t +hist_entry__cmp_wdiff_idx(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return hist_entry__cmp_compute_idx(right, left, COMPUTE_WEIGHTED_DIFF, + sort_compute); } static void hists__process(struct hists *hists) @@ -601,12 +661,8 @@ static void hists__process(struct hists *hists) if (show_baseline_only) hists__baseline_only(hists); - if (sort_compute) { - hists__precompute(hists); - hists__compute_resort(hists); - } else { - hists__output_resort(hists); - } + hists__precompute(hists); + hists__output_resort(hists, NULL); hists__fprintf(hists, true, 0, 0, 0, stdout); } @@ -691,7 +747,7 @@ static int __cmd_diff(void) goto out_delete; } - ret = perf_session__process_events(d->session, &tool); + ret = perf_session__process_events(d->session); if (ret) { pr_err("Failed to process %s\n", d->file.path); goto out_delete; @@ -735,6 +791,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", @@ -746,7 +804,7 @@ static const struct option options[] = { OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..." " Please refer the man page for the complete list."), - OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", + OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", @@ -805,7 +863,7 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, char pfmt[20] = " "; if (!pair) - goto dummy_print; + goto no_print; switch (comparison_method) { case COMPUTE_DELTA: @@ -814,8 +872,6 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, else diff = compute_delta(he, pair); - if (fabs(diff) < 0.01) - goto dummy_print; scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1); return percent_color_snprintf(hpp->buf, hpp->size, pfmt, diff); @@ -847,6 +903,9 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt, } dummy_print: return scnprintf(hpp->buf, hpp->size, "%*s", + dfmt->header_width, "N/A"); +no_print: + return scnprintf(hpp->buf, hpp->size, "%*s", dfmt->header_width, pfmt); } @@ -896,14 +955,15 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, else diff = compute_delta(he, pair); - if (fabs(diff) >= 0.01) - scnprintf(buf, size, "%+4.2F%%", diff); + scnprintf(buf, size, "%+4.2F%%", diff); break; case PERF_HPP_DIFF__RATIO: /* No point for ratio number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) ratio = pair->diff.period_ratio; @@ -916,8 +976,10 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair, case PERF_HPP_DIFF__WEIGHTED_DIFF: /* No point for wdiff number if we are dummy.. */ - if (he->dummy) + if (he->dummy) { + scnprintf(buf, size, "N/A"); break; + } if (pair->diff.computed) wdiff = pair->diff.wdiff; @@ -1038,32 +1100,41 @@ static void data__hpp_register(struct data__file *d, int idx) fmt->header = hpp__header; fmt->width = hpp__width; fmt->entry = hpp__entry_global; + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; /* TODO more colors */ switch (idx) { case PERF_HPP_DIFF__BASELINE: fmt->color = hpp__color_baseline; + fmt->sort = hist_entry__cmp_baseline; break; case PERF_HPP_DIFF__DELTA: fmt->color = hpp__color_delta; + fmt->sort = hist_entry__cmp_delta; break; case PERF_HPP_DIFF__RATIO: fmt->color = hpp__color_ratio; + fmt->sort = hist_entry__cmp_ratio; break; case PERF_HPP_DIFF__WEIGHTED_DIFF: fmt->color = hpp__color_wdiff; + fmt->sort = hist_entry__cmp_wdiff; break; default: + fmt->sort = hist_entry__cmp_nop; break; } init_header(d, dfmt); perf_hpp__column_register(fmt); + perf_hpp__register_sort_field(fmt); } -static void ui_init(void) +static int ui_init(void) { struct data__file *d; + struct perf_hpp_fmt *fmt; int i; data__for_each_file(i, d) { @@ -1093,6 +1164,46 @@ static void ui_init(void) data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD : PERF_HPP_DIFF__PERIOD_BASELINE); } + + if (!sort_compute) + return 0; + + /* + * Prepend an fmt to sort on columns at 'sort_compute' first. + * This fmt is added only to the sort list but not to the + * output fields list. + * + * Note that this column (data) can be compared twice - one + * for this 'sort_compute' fmt and another for the normal + * diff_hpp_fmt. But it shouldn't a problem as most entries + * will be sorted out by first try or baseline and comparing + * is not a costly operation. + */ + fmt = zalloc(sizeof(*fmt)); + if (fmt == NULL) { + pr_err("Memory allocation failed\n"); + return -1; + } + + fmt->cmp = hist_entry__cmp_nop; + fmt->collapse = hist_entry__cmp_nop; + + switch (compute) { + case COMPUTE_DELTA: + fmt->sort = hist_entry__cmp_delta_idx; + break; + case COMPUTE_RATIO: + fmt->sort = hist_entry__cmp_ratio_idx; + break; + case COMPUTE_WEIGHTED_DIFF: + fmt->sort = hist_entry__cmp_wdiff_idx; + break; + default: + BUG_ON(1); + } + + list_add(&fmt->sort_list, &perf_hpp__sort_list); + return 0; } static int data_init(int argc, const char **argv) @@ -1158,7 +1269,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (data_init(argc, argv) < 0) return -1; - ui_init(); + if (ui_init() < 0) + return -1; sort__mode = SORT_MODE__DIFF; diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 0f93f859b782..695ec5a50cf2 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -24,6 +24,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details struct perf_data_file file = { .path = file_name, .mode = PERF_DATA_MODE_READ, + .force = details->force, }; session = perf_session__new(&file, 0, NULL); @@ -47,6 +48,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) "Show all event attr details"), OPT_BOOLEAN('g', "group", &details.event_group, "Show event group information"), + OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"), OPT_END() }; const char * const evlist_usage[] = { diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 25d20628212e..36486eade1ef 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -437,7 +437,18 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) HELP_FORMAT_INFO), OPT_END(), }; - const char * const builtin_help_usage[] = { + const char * const builtin_help_subcommands[] = { + "buildid-cache", "buildid-list", "diff", "evlist", "help", "list", + "record", "report", "bench", "stat", "timechart", "top", "annotate", + "script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data", +#ifdef HAVE_LIBELF_SUPPORT + "probe", +#endif +#ifdef HAVE_LIBAUDIT_SUPPORT + "trace", +#endif + NULL }; + const char *builtin_help_usage[] = { "perf help [--all] [--man|--web|--info] [command]", NULL }; @@ -448,8 +459,8 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) perf_config(perf_help_config, &help_format); - argc = parse_options(argc, argv, builtin_help_options, - builtin_help_usage, 0); + argc = parse_options_subcommand(argc, argv, builtin_help_options, + builtin_help_subcommands, builtin_help_usage, 0); if (show_all) { printf("\n usage: %s\n\n", perf_usage_string); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 84df2deed988..40a33d7334cc 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -53,6 +53,13 @@ static int perf_event__repipe_synth(struct perf_tool *tool, return 0; } +static int perf_event__repipe_oe_synth(struct perf_tool *tool, + union perf_event *event, + struct ordered_events *oe __maybe_unused) +{ + return perf_event__repipe_synth(tool, event); +} + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -343,6 +350,7 @@ static int __cmd_inject(struct perf_inject *inject) int ret = -EINVAL; struct perf_session *session = inject->session; struct perf_data_file *file_out = &inject->output; + int fd = perf_data_file__fd(file_out); signal(SIGINT, sig_handler); @@ -358,8 +366,6 @@ static int __cmd_inject(struct perf_inject *inject) } else if (inject->sched_stat) { struct perf_evsel *evsel; - inject->tool.ordered_events = true; - evlist__for_each(session->evlist, evsel) { const char *name = perf_evsel__name(evsel); @@ -376,16 +382,16 @@ static int __cmd_inject(struct perf_inject *inject) } if (!file_out->is_pipe) - lseek(file_out->fd, session->header.data_offset, SEEK_SET); + lseek(fd, session->header.data_offset, SEEK_SET); - ret = perf_session__process_events(session, &inject->tool); + ret = perf_session__process_events(session); if (!file_out->is_pipe) { if (inject->build_ids) perf_header__set_feat(&session->header, HEADER_BUILD_ID); session->header.data_size = inject->bytes_written; - perf_session__write_header(session, session->evlist, file_out->fd, true); + perf_session__write_header(session, session->evlist, fd, true); } return ret; @@ -407,7 +413,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .unthrottle = perf_event__repipe, .attr = perf_event__repipe_attr, .tracing_data = perf_event__repipe_op2_synth, - .finished_round = perf_event__repipe_op2_synth, + .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, }, @@ -437,6 +443,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), OPT_END() }; const char * const inject_usage[] = { @@ -457,6 +464,8 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } + inject.tool.ordered_events = inject.sched_stat; + file.path = inject.input_name; inject.session = perf_session__new(&file, true, &inject.tool); if (inject.session == NULL) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index f295141025bc..1634186d537c 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -20,6 +20,12 @@ #include <linux/rbtree.h> #include <linux/string.h> +#include <locale.h> + +static int kmem_slab; +static int kmem_page; + +static long kmem_page_size; struct alloc_stat; typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); @@ -225,6 +231,244 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel, return 0; } +static u64 total_page_alloc_bytes; +static u64 total_page_free_bytes; +static u64 total_page_nomatch_bytes; +static u64 total_page_fail_bytes; +static unsigned long nr_page_allocs; +static unsigned long nr_page_frees; +static unsigned long nr_page_fails; +static unsigned long nr_page_nomatch; + +static bool use_pfn; + +#define MAX_MIGRATE_TYPES 6 +#define MAX_PAGE_ORDER 11 + +static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES]; + +struct page_stat { + struct rb_node node; + u64 page; + int order; + unsigned gfp_flags; + unsigned migrate_type; + u64 alloc_bytes; + u64 free_bytes; + int nr_alloc; + int nr_free; +}; + +static struct rb_root page_tree; +static struct rb_root page_alloc_tree; +static struct rb_root page_alloc_sorted; + +static struct page_stat *search_page(unsigned long page, bool create) +{ + struct rb_node **node = &page_tree.rb_node; + struct rb_node *parent = NULL; + struct page_stat *data; + + while (*node) { + s64 cmp; + + parent = *node; + data = rb_entry(*node, struct page_stat, node); + + cmp = data->page - page; + if (cmp < 0) + node = &parent->rb_left; + else if (cmp > 0) + node = &parent->rb_right; + else + return data; + } + + if (!create) + return NULL; + + data = zalloc(sizeof(*data)); + if (data != NULL) { + data->page = page; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &page_tree); + } + + return data; +} + +static int page_stat_cmp(struct page_stat *a, struct page_stat *b) +{ + if (a->page > b->page) + return -1; + if (a->page < b->page) + return 1; + if (a->order > b->order) + return -1; + if (a->order < b->order) + return 1; + if (a->migrate_type > b->migrate_type) + return -1; + if (a->migrate_type < b->migrate_type) + return 1; + if (a->gfp_flags > b->gfp_flags) + return -1; + if (a->gfp_flags < b->gfp_flags) + return 1; + return 0; +} + +static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create) +{ + struct rb_node **node = &page_alloc_tree.rb_node; + struct rb_node *parent = NULL; + struct page_stat *data; + + while (*node) { + s64 cmp; + + parent = *node; + data = rb_entry(*node, struct page_stat, node); + + cmp = page_stat_cmp(data, pstat); + if (cmp < 0) + node = &parent->rb_left; + else if (cmp > 0) + node = &parent->rb_right; + else + return data; + } + + if (!create) + return NULL; + + data = zalloc(sizeof(*data)); + if (data != NULL) { + data->page = pstat->page; + data->order = pstat->order; + data->gfp_flags = pstat->gfp_flags; + data->migrate_type = pstat->migrate_type; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &page_alloc_tree); + } + + return data; +} + +static bool valid_page(u64 pfn_or_page) +{ + if (use_pfn && pfn_or_page == -1UL) + return false; + if (!use_pfn && pfn_or_page == 0) + return false; + return true; +} + +static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 page; + unsigned int order = perf_evsel__intval(evsel, sample, "order"); + unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); + unsigned int migrate_type = perf_evsel__intval(evsel, sample, + "migratetype"); + u64 bytes = kmem_page_size << order; + struct page_stat *pstat; + struct page_stat this = { + .order = order, + .gfp_flags = gfp_flags, + .migrate_type = migrate_type, + }; + + if (use_pfn) + page = perf_evsel__intval(evsel, sample, "pfn"); + else + page = perf_evsel__intval(evsel, sample, "page"); + + nr_page_allocs++; + total_page_alloc_bytes += bytes; + + if (!valid_page(page)) { + nr_page_fails++; + total_page_fail_bytes += bytes; + + return 0; + } + + /* + * This is to find the current page (with correct gfp flags and + * migrate type) at free event. + */ + pstat = search_page(page, true); + if (pstat == NULL) + return -ENOMEM; + + pstat->order = order; + pstat->gfp_flags = gfp_flags; + pstat->migrate_type = migrate_type; + + this.page = page; + pstat = search_page_alloc_stat(&this, true); + if (pstat == NULL) + return -ENOMEM; + + pstat->nr_alloc++; + pstat->alloc_bytes += bytes; + + order_stats[order][migrate_type]++; + + return 0; +} + +static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 page; + unsigned int order = perf_evsel__intval(evsel, sample, "order"); + u64 bytes = kmem_page_size << order; + struct page_stat *pstat; + struct page_stat this = { + .order = order, + }; + + if (use_pfn) + page = perf_evsel__intval(evsel, sample, "pfn"); + else + page = perf_evsel__intval(evsel, sample, "page"); + + nr_page_frees++; + total_page_free_bytes += bytes; + + pstat = search_page(page, false); + if (pstat == NULL) { + pr_debug2("missing free at page %"PRIx64" (order: %d)\n", + page, order); + + nr_page_nomatch++; + total_page_nomatch_bytes += bytes; + + return 0; + } + + this.page = page; + this.gfp_flags = pstat->gfp_flags; + this.migrate_type = pstat->migrate_type; + + rb_erase(&pstat->node, &page_tree); + free(pstat); + + pstat = search_page_alloc_stat(&this, false); + if (pstat == NULL) + return -ENOENT; + + pstat->nr_free++; + pstat->free_bytes += bytes; + + return 0; +} + typedef int (*tracepoint_handler)(struct perf_evsel *evsel, struct perf_sample *sample); @@ -269,16 +513,17 @@ static double fragmentation(unsigned long n_req, unsigned long n_alloc) return 100.0 - (100.0 * n_req / n_alloc); } -static void __print_result(struct rb_root *root, struct perf_session *session, - int n_lines, int is_caller) +static void __print_slab_result(struct rb_root *root, + struct perf_session *session, + int n_lines, int is_caller) { struct rb_node *next; struct machine *machine = &session->machines.host; - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); next = rb_first(root); @@ -304,7 +549,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session, snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); printf(" %-34s |", buf); - printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n", + printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n", (unsigned long long)data->bytes_alloc, (unsigned long)data->bytes_alloc / data->hit, (unsigned long long)data->bytes_req, @@ -317,30 +562,137 @@ static void __print_result(struct rb_root *root, struct perf_session *session, } if (n_lines == -1) - printf(" ... | ... | ... | ... | ... | ... \n"); + printf(" ... | ... | ... | ... | ... | ... \n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); } -static void print_summary(void) +static const char * const migrate_type_str[] = { + "UNMOVABL", + "RECLAIM", + "MOVABLE", + "RESERVED", + "CMA/ISLT", + "UNKNOWN", +}; + +static void __print_page_result(struct rb_root *root, + struct perf_session *session __maybe_unused, + int n_lines) { - printf("\nSUMMARY\n=======\n"); - printf("Total bytes requested: %lu\n", total_requested); - printf("Total bytes allocated: %lu\n", total_allocated); - printf("Total bytes wasted on internal fragmentation: %lu\n", + struct rb_node *next = rb_first(root); + const char *format; + + printf("\n%.80s\n", graph_dotted_line); + printf(" %-16s | Total alloc (KB) | Hits | Order | Mig.type | GFP flags\n", + use_pfn ? "PFN" : "Page"); + printf("%.80s\n", graph_dotted_line); + + if (use_pfn) + format = " %16llu | %'16llu | %'9d | %5d | %8s | %08lx\n"; + else + format = " %016llx | %'16llu | %'9d | %5d | %8s | %08lx\n"; + + while (next && n_lines--) { + struct page_stat *data; + + data = rb_entry(next, struct page_stat, node); + + printf(format, (unsigned long long)data->page, + (unsigned long long)data->alloc_bytes / 1024, + data->nr_alloc, data->order, + migrate_type_str[data->migrate_type], + (unsigned long)data->gfp_flags); + + next = rb_next(next); + } + + if (n_lines == -1) + printf(" ... | ... | ... | ... | ... | ... \n"); + + printf("%.80s\n", graph_dotted_line); +} + +static void print_slab_summary(void) +{ + printf("\nSUMMARY (SLAB allocator)"); + printf("\n========================\n"); + printf("Total bytes requested: %'lu\n", total_requested); + printf("Total bytes allocated: %'lu\n", total_allocated); + printf("Total bytes wasted on internal fragmentation: %'lu\n", total_allocated - total_requested); printf("Internal fragmentation: %f%%\n", fragmentation(total_requested, total_allocated)); - printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); + printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); } -static void print_result(struct perf_session *session) +static void print_page_summary(void) +{ + int o, m; + u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch; + u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes; + + printf("\nSUMMARY (page allocator)"); + printf("\n========================\n"); + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests", + nr_page_allocs, total_page_alloc_bytes / 1024); + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests", + nr_page_frees, total_page_free_bytes / 1024); + printf("\n"); + + printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", + nr_alloc_freed, (total_alloc_freed_bytes) / 1024); + printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", + nr_page_allocs - nr_alloc_freed, + (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests", + nr_page_nomatch, total_page_nomatch_bytes / 1024); + printf("\n"); + + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures", + nr_page_fails, total_page_fail_bytes / 1024); + printf("\n"); + + printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable", + "Reclaimable", "Movable", "Reserved", "CMA/Isolated"); + printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line, + graph_dotted_line, graph_dotted_line, graph_dotted_line, + graph_dotted_line, graph_dotted_line); + + for (o = 0; o < MAX_PAGE_ORDER; o++) { + printf("%5d", o); + for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) { + if (order_stats[o][m]) + printf(" %'12d", order_stats[o][m]); + else + printf(" %12c", '.'); + } + printf("\n"); + } +} + +static void print_slab_result(struct perf_session *session) { if (caller_flag) - __print_result(&root_caller_sorted, session, caller_lines, 1); + __print_slab_result(&root_caller_sorted, session, caller_lines, 1); if (alloc_flag) - __print_result(&root_alloc_sorted, session, alloc_lines, 0); - print_summary(); + __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0); + print_slab_summary(); +} + +static void print_page_result(struct perf_session *session) +{ + if (alloc_flag) + __print_page_result(&page_alloc_sorted, session, alloc_lines); + print_page_summary(); +} + +static void print_result(struct perf_session *session) +{ + if (kmem_slab) + print_slab_result(session); + if (kmem_page) + print_page_result(session); } struct sort_dimension { @@ -352,8 +704,8 @@ struct sort_dimension { static LIST_HEAD(caller_sort); static LIST_HEAD(alloc_sort); -static void sort_insert(struct rb_root *root, struct alloc_stat *data, - struct list_head *sort_list) +static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data, + struct list_head *sort_list) { struct rb_node **new = &(root->rb_node); struct rb_node *parent = NULL; @@ -382,8 +734,8 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data, rb_insert_color(&data->node, root); } -static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, - struct list_head *sort_list) +static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted, + struct list_head *sort_list) { struct rb_node *node; struct alloc_stat *data; @@ -395,26 +747,79 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, rb_erase(node, root); data = rb_entry(node, struct alloc_stat, node); - sort_insert(root_sorted, data, sort_list); + sort_slab_insert(root_sorted, data, sort_list); + } +} + +static void sort_page_insert(struct rb_root *root, struct page_stat *data) +{ + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + + while (*new) { + struct page_stat *this; + int cmp = 0; + + this = rb_entry(*new, struct page_stat, node); + parent = *new; + + /* TODO: support more sort key */ + cmp = data->alloc_bytes - this->alloc_bytes; + + if (cmp > 0) + new = &parent->rb_left; + else + new = &parent->rb_right; + } + + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted) +{ + struct rb_node *node; + struct page_stat *data; + + for (;;) { + node = rb_first(root); + if (!node) + break; + + rb_erase(node, root); + data = rb_entry(node, struct page_stat, node); + sort_page_insert(root_sorted, data); } } static void sort_result(void) { - __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); - __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); + if (kmem_slab) { + __sort_slab_result(&root_alloc_stat, &root_alloc_sorted, + &alloc_sort); + __sort_slab_result(&root_caller_stat, &root_caller_sorted, + &caller_sort); + } + if (kmem_page) { + __sort_page_result(&page_alloc_tree, &page_alloc_sorted); + } } static int __cmd_kmem(struct perf_session *session) { int err = -EINVAL; + struct perf_evsel *evsel; const struct perf_evsel_str_handler kmem_tracepoints[] = { + /* slab allocator */ { "kmem:kmalloc", perf_evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, { "kmem:kfree", perf_evsel__process_free_event, }, { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, + /* page allocator */ + { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, + { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, }; if (!perf_session__has_traces(session, "kmem record")) @@ -425,10 +830,20 @@ static int __cmd_kmem(struct perf_session *session) goto out; } + evlist__for_each(session->evlist, evsel) { + if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && + perf_evsel__field(evsel, "pfn")) { + use_pfn = true; + break; + } + } + setup_pager(); - err = perf_session__process_events(session, &perf_kmem); - if (err != 0) + err = perf_session__process_events(session); + if (err != 0) { + pr_err("error during process events: %d\n", err); goto out; + } sort_result(); print_result(session); out: @@ -559,6 +974,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) { char *tok; char *str = strdup(arg); + char *pos = str; if (!str) { pr_err("%s: strdup failed\n", __func__); @@ -566,7 +982,7 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) } while (true) { - tok = strsep(&str, ","); + tok = strsep(&pos, ","); if (!tok) break; if (sort_dimension__add(tok, sort_list) < 0) { @@ -610,6 +1026,22 @@ static int parse_alloc_opt(const struct option *opt __maybe_unused, return 0; } +static int parse_slab_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + kmem_slab = (kmem_page + 1); + return 0; +} + +static int parse_page_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + kmem_page = (kmem_slab + 1); + return 0; +} + static int parse_line_opt(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) { @@ -632,6 +1064,8 @@ static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { "record", "-a", "-R", "-c", "1", + }; + const char * const slab_events[] = { "-e", "kmem:kmalloc", "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", @@ -639,10 +1073,19 @@ static int __cmd_record(int argc, const char **argv) "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const page_events[] = { + "-e", "kmem:mm_page_alloc", + "-e", "kmem:mm_page_free", + }; unsigned int rec_argc, i, j; const char **rec_argv; rec_argc = ARRAY_SIZE(record_args) + argc - 1; + if (kmem_slab) + rec_argc += ARRAY_SIZE(slab_events); + if (kmem_page) + rec_argc += ARRAY_SIZE(page_events); + rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -651,6 +1094,15 @@ static int __cmd_record(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); + if (kmem_slab) { + for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) + rec_argv[i] = strdup(slab_events[j]); + } + if (kmem_page) { + for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) + rec_argv[i] = strdup(page_events[j]); + } + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; @@ -660,8 +1112,13 @@ static int __cmd_record(int argc, const char **argv) int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) { const char * const default_sort_order = "frag,hit,bytes"; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; const struct option kmem_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, "show per-callsite statistics", parse_caller_opt), OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, @@ -671,6 +1128,11 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", + parse_slab_opt), + OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", + parse_page_opt), OPT_END() }; const char *const kmem_subcommands[] = { "record", "stat", NULL }; @@ -679,10 +1141,6 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct perf_session *session; - struct perf_data_file file = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - }; int ret = -1; argc = parse_options_subcommand(argc, argv, kmem_options, @@ -691,18 +1149,36 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) if (!argc) usage_with_options(kmem_usage, kmem_options); + if (kmem_slab == 0 && kmem_page == 0) + kmem_slab = 1; /* for backward compatibility */ + if (!strncmp(argv[0], "rec", 3)) { symbol__init(NULL); return __cmd_record(argc, argv); } + file.path = input_name; + session = perf_session__new(&file, false, &perf_kmem); if (session == NULL) return -1; + if (kmem_page) { + struct perf_evsel *evsel = perf_evlist__first(session->evlist); + + if (evsel == NULL || evsel->tp_format == NULL) { + pr_err("invalid event found.. aborting\n"); + return -1; + } + + kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); + } + symbol__init(&session->header.env); if (!strcmp(argv[0], "stat")) { + setlocale(LC_ALL, ""); + if (cpu__setup_cpunode_map()) goto out_delete; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0894a817f67e..1f9338f6109c 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -18,6 +18,7 @@ #include "util/stat.h" #include "util/top.h" #include "util/data.h" +#include "util/ordered-events.h" #include <sys/prctl.h> #ifdef HAVE_TIMERFD_SUPPORT @@ -730,9 +731,9 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session_queue_event(kvm->session, event, &kvm->tool, &sample, 0); + err = perf_session__queue_event(kvm->session, event, &sample, 0); /* - * FIXME: Here we can't consume the event, as perf_session_queue_event will + * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ perf_evlist__mmap_consume(kvm->evlist, idx); @@ -783,8 +784,10 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm) /* flush queue after each round in which we processed events */ if (ntotal) { - kvm->session->ordered_events.next_flush = flush_time; - err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session); + struct ordered_events *oe = &kvm->session->ordered_events; + + oe->next_flush = flush_time; + err = ordered_events__flush(oe, OE_FLUSH__ROUND); if (err) { if (kvm->lost_events) pr_info("\nLost events: %" PRIu64 "\n\n", @@ -1044,6 +1047,7 @@ static int read_events(struct perf_kvm_stat *kvm) struct perf_data_file file = { .path = kvm->file_name, .mode = PERF_DATA_MODE_READ, + .force = kvm->force, }; kvm->tool = eops; @@ -1066,7 +1070,7 @@ static int read_events(struct perf_kvm_stat *kvm) if (ret < 0) return ret; - return perf_session__process_events(kvm->session, &kvm->tool); + return perf_session__process_events(kvm->session); } static int parse_target_str(struct perf_kvm_stat *kvm) @@ -1201,6 +1205,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) " time (sort by avg time)"), OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid", "analyze events only for given process id(s)"), + OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"), OPT_END() }; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 011195e38f21..af5bd0514108 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,7 +19,9 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { int i; - const struct option list_options[] = { + bool raw_dump = false; + struct option list_options[] = { + OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_END() }; const char * const list_usage[] = { @@ -27,40 +29,43 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); + argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); setup_pager(); + if (!raw_dump) + printf("\nList of pre-defined events (to be used in -e):\n\n"); + if (argc == 0) { - print_events(NULL, false); + print_events(NULL, raw_dump); return 0; } for (i = 0; i < argc; ++i) { - if (i) - putchar('\n'); - if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL, false); + if (strcmp(argv[i], "tracepoint") == 0) + print_tracepoint_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_events_type(PERF_TYPE_HARDWARE); + print_symbol_events(NULL, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) - print_events_type(PERF_TYPE_SOFTWARE); + print_symbol_events(NULL, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, false); + print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, false); - else if (strcmp(argv[i], "--raw-dump") == 0) - print_events(NULL, true); + print_pmu_events(NULL, raw_dump); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], false); + print_events(argv[i], raw_dump); continue; } sep_idx = sep - argv[i]; @@ -69,7 +74,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, false); + print_tracepoint_events(s, s + sep_idx + 1, raw_dump); free(s); } } diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e7ec71589da6..d49c2ab85fc2 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -846,6 +846,8 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = { { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ }; +static bool force; + static int __cmd_report(bool display_info) { int err = -EINVAL; @@ -857,6 +859,7 @@ static int __cmd_report(bool display_info) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = force, }; session = perf_session__new(&file, false, &eops); @@ -878,7 +881,7 @@ static int __cmd_report(bool display_info) if (select_key()) goto out_delete; - err = perf_session__process_events(session, &eops); + err = perf_session__process_events(session); if (err) goto out_delete; @@ -945,6 +948,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) "dump thread list in perf.data"), OPT_BOOLEAN('m', "map", &info_map, "map of lock instances (address:name table)"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_END() }; const struct option lock_options[] = { @@ -956,6 +960,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), /* TODO: type */ OPT_END() }; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 24db6ffe2957..675216e08bfc 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,44 +7,48 @@ #include "util/session.h" #include "util/data.h" -#define MEM_OPERATION_LOAD "load" -#define MEM_OPERATION_STORE "store" - -static const char *mem_operation = MEM_OPERATION_LOAD; +#define MEM_OPERATION_LOAD 0x1 +#define MEM_OPERATION_STORE 0x2 struct perf_mem { struct perf_tool tool; char const *input_name; bool hide_unresolved; bool dump_raw; + bool force; + int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; -static int __cmd_record(int argc, const char **argv) +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j; const char **rec_argv; - char event[64]; int ret; - rec_argc = argc + 4; + rec_argc = argc + 7; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (!rec_argv) return -1; - rec_argv[i++] = strdup("record"); - if (!strcmp(mem_operation, MEM_OPERATION_LOAD)) - rec_argv[i++] = strdup("-W"); - rec_argv[i++] = strdup("-d"); - rec_argv[i++] = strdup("-e"); + rec_argv[i++] = "record"; - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - sprintf(event, "cpu/mem-stores/pp"); - else - sprintf(event, "cpu/mem-loads/pp"); + if (mem->operation & MEM_OPERATION_LOAD) + rec_argv[i++] = "-W"; + + rec_argv[i++] = "-d"; + + if (mem->operation & MEM_OPERATION_LOAD) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-loads/pp"; + } + + if (mem->operation & MEM_OPERATION_STORE) { + rec_argv[i++] = "-e"; + rec_argv[i++] = "cpu/mem-stores/pp"; + } - rec_argv[i++] = strdup(event); for (j = 1; j < argc; j++, i++) rec_argv[i] = argv[j]; @@ -117,6 +121,7 @@ static int report_raw_events(struct perf_mem *mem) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = mem->force, }; int err = -EINVAL; int ret; @@ -138,7 +143,7 @@ static int report_raw_events(struct perf_mem *mem) printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - err = perf_session__process_events(session, &mem->tool); + err = perf_session__process_events(session); if (err) return err; @@ -162,17 +167,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) if (!rep_argv) return -1; - rep_argv[i++] = strdup("report"); - rep_argv[i++] = strdup("--mem-mode"); - rep_argv[i++] = strdup("-n"); /* display number of samples */ + rep_argv[i++] = "report"; + rep_argv[i++] = "--mem-mode"; + rep_argv[i++] = "-n"; /* display number of samples */ /* * there is no weight (cost) associated with stores, so don't print * the column */ - if (strcmp(mem_operation, MEM_OPERATION_LOAD)) - rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"); + if (!(mem->operation & MEM_OPERATION_LOAD)) + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; @@ -182,6 +187,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) return ret; } +struct mem_mode { + const char *name; + int mode; +}; + +#define MEM_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define MEM_END { .name = NULL } + +static const struct mem_mode mem_modes[]={ + MEM_OPT("load", MEM_OPERATION_LOAD), + MEM_OPT("store", MEM_OPERATION_STORE), + MEM_END +}; + +static int +parse_mem_ops(const struct option *opt, const char *str, int unset) +{ + int *mode = (int *)opt->value; + const struct mem_mode *m; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* str may be NULL in case no arg is passed to -t */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + /* reset mode */ + *mode = 0; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (m = mem_modes; m->name; m++) { + if (!strcasecmp(s, m->name)) + break; + } + if (!m->name) { + fprintf(stderr, "unknown sampling op %s," + " check man page\n", s); + goto error; + } + + *mode |= m->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + if (*mode == 0) + *mode = MEM_OPERATION_LOAD; +error: + free(os); + return ret; +} + int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) { struct stat st; @@ -197,10 +271,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_events = true, }, .input_name = "perf.data", + /* + * default to both load an store sampling + */ + .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE, }; const struct option mem_options[] = { - OPT_STRING('t', "type", &mem_operation, - "type", "memory operations(load/store)"), + OPT_CALLBACK('t', "type", &mem.operation, + "type", "memory operations(load,store) Default load,store", + parse_mem_ops), OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw, "dump raw samples in ASCII"), OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved, @@ -209,10 +288,11 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) "input file name"), OPT_STRING('C', "cpu", &mem.cpu_list, "cpu", "list of cpus to profile"), - OPT_STRING('x', "field-separator", &symbol_conf.field_sep, + OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added" " between columns '.' is reserved."), + OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), OPT_END() }; const char *const mem_subcommands[] = { "record", "report", NULL }; @@ -225,7 +305,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation)) + if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation)) usage_with_options(mem_usage, mem_options); if (!mem.input_name || !strlen(mem.input_name)) { @@ -236,7 +316,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) } if (!strncmp(argv[0], "rec", 3)) - return __cmd_record(argc, argv); + return __cmd_record(argc, argv, &mem); else if (!strncmp(argv[0], "rep", 3)) return report_events(argc, argv, &mem); else diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 921bb6942503..f7b1af67e9f6 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -56,6 +56,7 @@ static struct { bool mod_events; bool uprobes; bool quiet; + bool target_used; int nevents; struct perf_probe_event events[MAX_PROBES]; struct strlist *dellist; @@ -78,6 +79,12 @@ static int parse_probe_event(const char *str) } pev->uprobes = params.uprobes; + if (params.target) { + pev->target = strdup(params.target); + if (!pev->target) + return -ENOMEM; + params.target_used = true; + } /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); @@ -102,6 +109,7 @@ static int set_target(const char *ptr) params.target = strdup(ptr); if (!params.target) return -ENOMEM; + params.target_used = false; found = 1; buf = ptr + (strlen(ptr) - 3); @@ -178,7 +186,7 @@ static int opt_set_target(const struct option *opt, const char *str, int ret = -ENOENT; char *tmp; - if (str && !params.target) { + if (str) { if (!strcmp(opt->long_name, "exec")) params.uprobes = true; #ifdef HAVE_DWARF_SUPPORT @@ -200,7 +208,9 @@ static int opt_set_target(const struct option *opt, const char *str, if (!tmp) return -ENOMEM; } + free(params.target); params.target = tmp; + params.target_used = false; ret = 0; } @@ -485,9 +495,14 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) } if (params.nevents) { + /* Ensure the last given target is used */ + if (params.target && !params.target_used) { + pr_warning(" Error: -x/-m must follow the probe definitions.\n"); + usage_with_options(probe_usage, options); + } + ret = add_perf_probe_events(params.events, params.nevents, params.max_probe_points, - params.target, params.force_add); if (ret < 0) { pr_err_with_code(" Error: Failed to add events.", ret); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 8648c6d3003d..c3efdfb630b5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -70,8 +70,8 @@ static int process_synthesized_event(struct perf_tool *tool, static int record__mmap_read(struct record *rec, int idx) { struct perf_mmap *md = &rec->evlist->mmap[idx]; - unsigned int head = perf_mmap__read_head(md); - unsigned int old = md->prev; + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; unsigned char *data = md->base + page_size; unsigned long size; void *buf; @@ -161,8 +161,9 @@ try_again: } } - if (perf_evlist__apply_filters(evlist)) { - error("failed to set filter with %d (%s)\n", errno, + if (perf_evlist__apply_filters(evlist, &pos)) { + error("failed to set filter \"%s\" on event %s with %d (%s)\n", + pos->filter, perf_evsel__name(pos), errno, strerror_r(errno, msg, sizeof(msg))); rc = -1; goto out; @@ -190,16 +191,30 @@ out: return rc; } +static int process_sample_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine) +{ + struct record *rec = container_of(tool, struct record, tool); + + rec->samples++; + + return build_id__mark_dso_hit(tool, event, sample, evsel, machine); +} + static int process_buildids(struct record *rec) { struct perf_data_file *file = &rec->file; struct perf_session *session = rec->session; - u64 start = session->header.data_offset; - u64 size = lseek(file->fd, 0, SEEK_CUR); + u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); if (size == 0) return 0; + file->size = size; + /* * During this process, it'll load kernel map and replace the * dso->long_name to a real pathname it found. In this case @@ -211,9 +226,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return __perf_session__process_events(session, start, - size - start, - size, &build_id__mark_dso_hit_ops); + return perf_session__process_events(session); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) @@ -322,6 +335,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data_file *file = &rec->file; struct perf_session *session; bool disabled = false, draining = false; + int fd; rec->progname = argv[0]; @@ -330,12 +344,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); - session = perf_session__new(file, false, NULL); + session = perf_session__new(file, false, tool); if (session == NULL) { pr_err("Perf session creation failed.\n"); return -1; } + fd = perf_data_file__fd(file); rec->session = session; record__init_features(rec); @@ -360,12 +375,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); if (file->is_pipe) { - err = perf_header__write_pipe(file->fd); + err = perf_header__write_pipe(fd); if (err < 0) goto out_child; } else { - err = perf_session__write_header(session, rec->evlist, - file->fd, false); + err = perf_session__write_header(session, rec->evlist, fd, false); if (err < 0) goto out_child; } @@ -397,7 +411,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * return this more properly and also * propagate errors that now are calling die() */ - err = perf_event__synthesize_tracing_data(tool, file->fd, rec->evlist, + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, process_synthesized_event); if (err <= 0) { pr_err("Couldn't record tracing data.\n"); @@ -504,19 +518,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } - if (!quiet) { + if (!quiet) fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); - /* - * Approximate RIP event size: 24 bytes. - */ - fprintf(stderr, - "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n", - (double)rec->bytes_written / 1024.0 / 1024.0, - file->path, - rec->bytes_written / 24); - } - out_child: if (forks) { int exit_status; @@ -535,13 +539,29 @@ out_child: } else status = err; + /* this will be recalculated during process_buildids() */ + rec->samples = 0; + if (!err && !file->is_pipe) { rec->session->header.data_size += rec->bytes_written; if (!rec->no_buildid) process_buildids(rec); - perf_session__write_header(rec->session, rec->evlist, - file->fd, true); + perf_session__write_header(rec->session, rec->evlist, fd, true); + } + + if (!err && !quiet) { + char samples[128]; + + if (rec->samples) + scnprintf(samples, sizeof(samples), + " (%" PRIu64 " samples)", rec->samples); + else + samples[0] = '\0'; + + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s ]\n", + perf_data_file__size(file) / 1024.0 / 1024.0, + file->path, samples); } out_delete_session: @@ -639,7 +659,7 @@ error: static void callchain_debug(void) { - static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; + static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); @@ -691,6 +711,90 @@ static int perf_record_config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } +struct clockid_map { + const char *name; + int clockid; +}; + +#define CLOCKID_MAP(n, c) \ + { .name = n, .clockid = (c), } + +#define CLOCKID_END { .name = NULL, } + + +/* + * Add the missing ones, we need to build on many distros... + */ +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif +#ifndef CLOCK_BOOTTIME +#define CLOCK_BOOTTIME 7 +#endif +#ifndef CLOCK_TAI +#define CLOCK_TAI 11 +#endif + +static const struct clockid_map clockids[] = { + /* available for all events, NMI safe */ + CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), + CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), + + /* available for some events */ + CLOCKID_MAP("realtime", CLOCK_REALTIME), + CLOCKID_MAP("boottime", CLOCK_BOOTTIME), + CLOCKID_MAP("tai", CLOCK_TAI), + + /* available for the lazy */ + CLOCKID_MAP("mono", CLOCK_MONOTONIC), + CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), + CLOCKID_MAP("real", CLOCK_REALTIME), + CLOCKID_MAP("boot", CLOCK_BOOTTIME), + + CLOCKID_END, +}; + +static int parse_clockid(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + const struct clockid_map *cm; + const char *ostr = str; + + if (unset) { + opts->use_clockid = 0; + return 0; + } + + /* no arg passed */ + if (!str) + return 0; + + /* no setting it twice */ + if (opts->use_clockid) + return -1; + + opts->use_clockid = true; + + /* if its a number, we're done */ + if (sscanf(str, "%d", &opts->clockid) == 1) + return 0; + + /* allow a "CLOCK_" prefix to the name */ + if (!strncasecmp(str, "CLOCK_", 6)) + str += 6; + + for (cm = clockids; cm->name; cm++) { + if (!strcasecmp(str, cm->name)) { + opts->clockid = cm->clockid; + return 0; + } + } + + opts->use_clockid = false; + ui__warning("unknown clockid %s, check man page\n", ostr); + return -1; +} + static const char * const __record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -720,14 +824,21 @@ static struct record record = { .default_per_cpu = true, }, }, + .tool = { + .sample = process_sample_event, + .fork = perf_event__process_fork, + .comm = perf_event__process_comm, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + }, }; #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "fp"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; #endif /* @@ -813,6 +924,11 @@ struct option __record_options[] = { "use per-thread mmaps"), OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, "Sample machine registers on interrupt"), + OPT_BOOLEAN(0, "running-time", &record.opts.running_time, + "Record running/enabled time of read (:S) events"), + OPT_CALLBACK('k', "clockid", &record.opts, + "clockid", "clockid to use for events, see clock_gettime()", + parse_clockid), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 39367609c707..b63aeda719be 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -86,17 +86,6 @@ static int report__config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } -static void report__inc_stats(struct report *rep, struct hist_entry *he) -{ - /* - * The @he is either of a newly created one or an existing one - * merging current sample. We only want to count a new one so - * checking ->nr_events being 1. - */ - if (he->stat.nr_events == 1) - rep->nr_entries++; -} - static int hist_iter__report_callback(struct hist_entry_iter *iter, struct addr_location *al, bool single, void *arg) @@ -108,8 +97,6 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, struct mem_info *mi; struct branch_info *bi; - report__inc_stats(rep, he); - if (!ui__has_annotation()) return 0; @@ -262,6 +249,8 @@ static int report__setup_sample_type(struct report *rep) if ((sample_type & PERF_SAMPLE_REGS_USER) && (sample_type & PERF_SAMPLE_STACK_USER)) callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; } @@ -315,7 +304,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (rep->mem_mode) { ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); - ret += fprintf(fp, "\n# Sort order : %s", sort_order); + ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); } else ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); return ret + fprintf(fp, "\n#\n"); @@ -340,7 +329,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, fprintf(stdout, "\n\n"); } - if (sort_order == default_sort_order && + if (sort_order == NULL && parent_pattern == default_parent_pattern) { fprintf(stdout, "#\n# (%s)\n#\n", help); @@ -358,7 +347,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, static void report__warn_kptr_restrict(const struct report *rep) { struct map *kernel_map = rep->session->machines.host.vmlinux_maps[MAP__FUNCTION]; - struct kmap *kernel_kmap = map__kmap(kernel_map); + struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; if (kernel_map == NULL || (kernel_map->dso->hit && @@ -457,6 +446,19 @@ static void report__collapse_hists(struct report *rep) ui_progress__finish(); } +static void report__output_resort(struct report *rep) +{ + struct ui_progress prog; + struct perf_evsel *pos; + + ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); + + evlist__for_each(rep->session->evlist, pos) + hists__output_resort(evsel__hists(pos), &prog); + + ui_progress__finish(); +} + static int __cmd_report(struct report *rep) { int ret; @@ -480,12 +482,15 @@ static int __cmd_report(struct report *rep) if (ret) return ret; - ret = perf_session__process_events(session, &rep->tool); + ret = perf_session__process_events(session); if (ret) return ret; report__warn_kptr_restrict(rep); + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (use_browser == 0) { if (verbose > 3) perf_session__fprintf(session, stdout); @@ -505,13 +510,20 @@ static int __cmd_report(struct report *rep) if (session_done()) return 0; + /* + * recalculate number of entries after collapsing since it + * might be changed during the collapse phase. + */ + rep->nr_entries = 0; + evlist__for_each(session->evlist, pos) + rep->nr_entries += evsel__hists(pos)->nr_entries; + if (rep->nr_entries == 0) { ui__error("The %s file has no samples!\n", file->path); return 0; } - evlist__for_each(session->evlist, pos) - hists__output_resort(evsel__hists(pos)); + report__output_resort(rep); return report__browse_hists(rep); } @@ -657,6 +669,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "only consider symbols in these dsos"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only consider symbols in these comms"), + OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "only consider symbols in these pids"), + OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "only consider symbols in these tids"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), OPT_STRING(0, "symbol-filter", &report.symbol_filter_str, "filter", @@ -664,7 +680,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, "width[,width...]", "don't try to adjust column width, use these fixed values"), - OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", + OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved, @@ -756,7 +772,7 @@ repeat: * 0/1 means the user chose a mode. */ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && - branch_call_mode == -1) { + !branch_call_mode) { sort__mode = SORT_MODE__BRANCH; symbol_conf.cumulate_callchain = false; } diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 891c3930080e..5275bab70313 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -23,12 +23,13 @@ #include <semaphore.h> #include <pthread.h> #include <math.h> +#include <api/fs/fs.h> #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 #define COMM_LEN 20 #define SYM_LEN 129 -#define MAX_PID 65536 +#define MAX_PID 1024000 struct sched_atom; @@ -124,7 +125,7 @@ struct perf_sched { struct perf_tool tool; const char *sort_order; unsigned long nr_tasks; - struct task_desc *pid_to_task[MAX_PID]; + struct task_desc **pid_to_task; struct task_desc **tasks; const struct trace_sched_handler *tp_handler; pthread_mutex_t start_work_mutex; @@ -169,6 +170,7 @@ struct perf_sched { u64 cpu_last_switched[MAX_CPUS]; struct rb_root atom_root, sorted_atom_root; struct list_head sort_list, cmp_pid; + bool force; }; static u64 get_nsecs(void) @@ -326,8 +328,19 @@ static struct task_desc *register_pid(struct perf_sched *sched, unsigned long pid, const char *comm) { struct task_desc *task; + static int pid_max; - BUG_ON(pid >= MAX_PID); + if (sched->pid_to_task == NULL) { + if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) + pid_max = MAX_PID; + BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); + } + if (pid >= (unsigned long)pid_max) { + BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * + sizeof(struct task_desc *))) == NULL); + while (pid >= (unsigned long)pid_max) + sched->pid_to_task[pid_max++] = NULL; + } task = sched->pid_to_task[pid]; @@ -346,7 +359,7 @@ static struct task_desc *register_pid(struct perf_sched *sched, sched->pid_to_task[pid] = task; sched->nr_tasks++; - sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *)); + sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); BUG_ON(!sched->tasks); sched->tasks[task->nr] = task; @@ -425,24 +438,45 @@ static u64 get_cpu_usage_nsec_parent(void) return sum; } -static int self_open_counters(void) +static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) { struct perf_event_attr attr; - char sbuf[STRERR_BUFSIZE]; + char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; int fd; + struct rlimit limit; + bool need_privilege = false; memset(&attr, 0, sizeof(attr)); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_TASK_CLOCK; +force_again: fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); - if (fd < 0) + if (fd < 0) { + if (errno == EMFILE) { + if (sched->force) { + BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); + limit.rlim_cur += sched->nr_tasks - cur_task; + if (limit.rlim_cur > limit.rlim_max) { + limit.rlim_max = limit.rlim_cur; + need_privilege = true; + } + if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { + if (need_privilege && errno == EPERM) + strcpy(info, "Need privilege\n"); + } else + goto force_again; + } else + strcpy(info, "Have a try with -f option\n"); + } pr_err("Error: sys_perf_event_open() syscall returned " - "with %d (%s)\n", fd, - strerror_r(errno, sbuf, sizeof(sbuf))); + "with %d (%s)\n%s", fd, + strerror_r(errno, sbuf, sizeof(sbuf)), info); + exit(EXIT_FAILURE); + } return fd; } @@ -460,6 +494,7 @@ static u64 get_cpu_usage_nsec_self(int fd) struct sched_thread_parms { struct task_desc *task; struct perf_sched *sched; + int fd; }; static void *thread_func(void *ctx) @@ -470,13 +505,12 @@ static void *thread_func(void *ctx) u64 cpu_usage_0, cpu_usage_1; unsigned long i, ret; char comm2[22]; - int fd; + int fd = parms->fd; zfree(&parms); sprintf(comm2, ":%s", this_task->comm); prctl(PR_SET_NAME, comm2); - fd = self_open_counters(); if (fd < 0) return NULL; again: @@ -528,6 +562,7 @@ static void create_tasks(struct perf_sched *sched) BUG_ON(parms == NULL); parms->task = task = sched->tasks[i]; parms->sched = sched; + parms->fd = self_open_counters(sched, i); sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); @@ -572,13 +607,13 @@ static void wait_for_tasks(struct perf_sched *sched) cpu_usage_1 = get_cpu_usage_nsec_parent(); if (!sched->runavg_cpu_usage) sched->runavg_cpu_usage = sched->cpu_usage; - sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10; + sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; if (!sched->runavg_parent_cpu_usage) sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; - sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 + - sched->parent_cpu_usage)/10; + sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + + sched->parent_cpu_usage)/sched->replay_repeat; ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); @@ -610,7 +645,7 @@ static void run_one_test(struct perf_sched *sched) sched->sum_fluct += fluct; if (!sched->run_avg) sched->run_avg = delta; - sched->run_avg = (sched->run_avg * 9 + delta) / 10; + sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0); @@ -831,7 +866,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) return -1; } - atoms->thread = thread; + atoms->thread = thread__get(thread); INIT_LIST_HEAD(&atoms->work_list); __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); return 0; @@ -1439,8 +1474,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ return err; } -static int perf_sched__read_events(struct perf_sched *sched, - struct perf_session **psession) +static int perf_sched__read_events(struct perf_sched *sched) { const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, @@ -1453,7 +1487,9 @@ static int perf_sched__read_events(struct perf_sched *sched, struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; + int rc = -1; session = perf_session__new(&file, false, &sched->tool); if (session == NULL) { @@ -1467,27 +1503,21 @@ static int perf_sched__read_events(struct perf_sched *sched, goto out_delete; if (perf_session__has_traces(session, "record -R")) { - int err = perf_session__process_events(session, &sched->tool); + int err = perf_session__process_events(session); if (err) { pr_err("Failed to process events, error %d", err); goto out_delete; } - sched->nr_events = session->stats.nr_events[0]; - sched->nr_lost_events = session->stats.total_lost; - sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->evlist->stats.nr_events[0]; + sched->nr_lost_events = session->evlist->stats.total_lost; + sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } - if (psession) - *psession = session; - else - perf_session__delete(session); - - return 0; - + rc = 0; out_delete: perf_session__delete(session); - return -1; + return rc; } static void print_bad_events(struct perf_sched *sched) @@ -1515,12 +1545,10 @@ static void print_bad_events(struct perf_sched *sched) static int perf_sched__lat(struct perf_sched *sched) { struct rb_node *next; - struct perf_session *session; setup_pager(); - /* save session -- references to threads are held in work_list */ - if (perf_sched__read_events(sched, &session)) + if (perf_sched__read_events(sched)) return -1; perf_sched__sort_lat(sched); @@ -1537,6 +1565,7 @@ static int perf_sched__lat(struct perf_sched *sched) work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); + thread__zput(work_list->thread); } printf(" -----------------------------------------------------------------------------------------------------------------\n"); @@ -1548,7 +1577,6 @@ static int perf_sched__lat(struct perf_sched *sched) print_bad_events(sched); printf("\n"); - perf_session__delete(session); return 0; } @@ -1557,7 +1585,7 @@ static int perf_sched__map(struct perf_sched *sched) sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; print_bad_events(sched); return 0; @@ -1572,7 +1600,7 @@ static int perf_sched__replay(struct perf_sched *sched) test_calibrations(sched); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; printf("nr_run_events: %ld\n", sched->nr_run_events); @@ -1693,6 +1721,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), OPT_END() }; const struct option sched_options[] = { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ce304dfd962a..58f10b8e6ff2 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -446,9 +446,9 @@ static void print_sample_bts(union perf_event *event, } static void process_event(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel, struct thread *thread, - struct addr_location *al) + struct perf_evsel *evsel, struct addr_location *al) { + struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; if (output[attr->type].fields == 0) @@ -549,14 +549,6 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct machine *machine) { struct addr_location al; - struct thread *thread = machine__findnew_thread(machine, sample->pid, - sample->tid); - - if (thread == NULL) { - pr_debug("problem processing %d event, skipping it.\n", - event->header.type); - return -1; - } if (debug_mode) { if (sample->time < last_timestamp) { @@ -581,7 +573,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) return 0; - scripting_ops->process_event(event, sample, evsel, thread, &al); + scripting_ops->process_event(event, sample, evsel, &al); return 0; } @@ -800,7 +792,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap2 = process_mmap2_event; } - ret = perf_session__process_events(script->session, &script->tool); + ret = perf_session__process_events(script->session); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1523,6 +1515,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .ordering_requires_timestamps = true, }, }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), @@ -1550,7 +1545,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "When printing symbols do not display call chain"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), - OPT_CALLBACK('f', "fields", NULL, "str", + OPT_CALLBACK('F', "fields", NULL, "str", "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," @@ -1562,6 +1557,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only display events for these comms"), + OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "only consider symbols in these pids"), + OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "only consider symbols in these tids"), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -1570,9 +1569,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the fork/comm/exit events"), OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, "Show the mmap events"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), OPT_END() }; - const char * const script_usage[] = { + const char * const script_subcommands[] = { "record", "report", NULL }; + const char *script_usage[] = { "perf script [<options>]", "perf script [<options>] record <script> [<record-options>] <command>", "perf script [<options>] report <script> [script-args]", @@ -1580,13 +1581,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "perf script [<options>] <top-script> [script-args]", NULL }; - struct perf_data_file file = { - .mode = PERF_DATA_MODE_READ, - }; setup_scripting(); - argc = parse_options(argc, argv, options, script_usage, + argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); file.path = input_name; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 891086376381..f7b8218785f6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -353,39 +353,40 @@ static struct perf_evsel *nth_evsel(int n) * more semantic information such as miss/hit ratios, * instruction rates, etc: */ -static void update_shadow_stats(struct perf_evsel *counter, u64 *count) +static void update_shadow_stats(struct perf_evsel *counter, u64 *count, + int cpu) { if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) - update_stats(&runtime_nsecs_stats[0], count[0]); + update_stats(&runtime_nsecs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[0], count[0]); + update_stats(&runtime_cycles_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) - update_stats(&runtime_cycles_in_tx_stats[0], count[0]); + update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) - update_stats(&runtime_transaction_stats[0], count[0]); + update_stats(&runtime_transaction_stats[cpu], count[0]); else if (transaction_run && perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) - update_stats(&runtime_elision_stats[0], count[0]); + update_stats(&runtime_elision_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); + update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); + update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[0], count[0]); + update_stats(&runtime_branches_stats[cpu], count[0]); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[0], count[0]); + update_stats(&runtime_cacherefs_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[0], count[0]); + update_stats(&runtime_l1_dcache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_l1_icache_stats[0], count[0]); + update_stats(&runtime_l1_icache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[0], count[0]); + update_stats(&runtime_ll_cache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[0], count[0]); + update_stats(&runtime_dtlb_cache_stats[cpu], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[0], count[0]); + update_stats(&runtime_itlb_cache_stats[cpu], count[0]); } static void zero_per_pkg(struct perf_evsel *counter) @@ -447,7 +448,8 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, perf_evsel__compute_deltas(evsel, cpu, count); perf_counts_values__scale(count, scale, NULL); evsel->counts->cpu[cpu] = *count; - update_shadow_stats(evsel, count->values); + if (aggr_mode == AGGR_NONE) + update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -495,7 +497,7 @@ static int read_counter_aggr(struct perf_evsel *counter) /* * Save the full runtime - to allow normalization during printout: */ - update_shadow_stats(counter, count); + update_shadow_stats(counter, count, 0); return 0; } @@ -510,6 +512,9 @@ static int read_counter(struct perf_evsel *counter) int ncpus = perf_evsel__nr_cpus(counter); int cpu, thread; + if (!counter->supported) + return -ENOENT; + if (counter->system_wide) nthreads = 1; @@ -679,8 +684,9 @@ static int __run_perf_stat(int argc, const char **argv) unit_width = l; } - if (perf_evlist__apply_filters(evsel_list)) { - error("failed to set filter with %d (%s)\n", errno, + if (perf_evlist__apply_filters(evsel_list, &counter)) { + error("failed to set filter \"%s\" on event %s with %d (%s)\n", + counter->filter, perf_evsel__name(counter), errno, strerror_r(errno, msg, sizeof(msg))); return -1; } @@ -766,6 +772,19 @@ static int run_perf_stat(int argc, const char **argv) return ret; } +static void print_running(u64 run, u64 ena) +{ + if (csv_output) { + fprintf(output, "%s%" PRIu64 "%s%.2f", + csv_sep, + run, + csv_sep, + ena ? 100.0 * run / ena : 100.0); + } else if (run != ena) { + fprintf(output, " (%.2f%%)", 100.0 * run / ena); + } +} + static void print_noise_pct(double total, double avg) { double pct = rel_stddev_stats(total, avg); @@ -1076,6 +1095,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (total) { ratio = avg / total; fprintf(output, " # %5.2f insns per cycle ", ratio); + } else { + fprintf(output, " "); } total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); @@ -1145,6 +1166,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (total) { ratio = avg / total; fprintf(output, " # %8.3f GHz ", ratio); + } else { + fprintf(output, " "); } } else if (transaction_run && perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { @@ -1249,6 +1272,7 @@ static void print_aggr(char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1259,13 +1283,10 @@ static void print_aggr(char *prefix) else abs_printout(id, nr, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } + print_running(run, ena); fputc('\n', output); } } @@ -1281,11 +1302,15 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; double uval; + double avg_enabled, avg_running; + + avg_enabled = avg_stats(&ps->res_stats[1]); + avg_running = avg_stats(&ps->res_stats[2]); if (prefix) fprintf(output, "%s", prefix); - if (scaled == -1) { + if (scaled == -1 || !counter->supported) { fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, @@ -1300,6 +1325,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (counter->cgrp) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(avg_running, avg_enabled); fputc('\n', output); return; } @@ -1313,19 +1339,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) print_noise(counter, avg); - if (csv_output) { - fputc('\n', output); - return; - } - - if (scaled) { - double avg_enabled, avg_running; - - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); - - fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); - } + print_running(avg_running, avg_enabled); fprintf(output, "\n"); } @@ -1367,6 +1381,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1378,13 +1393,10 @@ static void print_counter(struct perf_evsel *counter, char *prefix) else abs_printout(cpu, 0, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); + print_running(run, ena); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } fputc('\n', output); } } @@ -1730,7 +1742,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "detailed run - start a lot of events"), OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"), - OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, + OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, "print large numbers with thousands\' separators", stat__set_big_num), OPT_STRING('C', "cpu", &target.cpu_list, "cpu", diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index f3bb1a4bf060..e50fe1187b0b 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -67,6 +67,7 @@ struct timechart { skip_eagain; u64 min_time, merge_dist; + bool force; }; struct per_pidcomm; @@ -1598,6 +1599,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = tchart->force, }; struct perf_session *session = perf_session__new(&file, false, @@ -1623,7 +1625,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) goto out_delete; } - ret = perf_session__process_events(session, &tchart->tool); + ret = perf_session__process_events(session); if (ret) goto out_delete; @@ -1956,9 +1958,11 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK(0, "io-merge-dist", &tchart.merge_dist, "time", "merge events that are merge-dist us apart", parse_time), + OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"), OPT_END() }; - const char * const timechart_usage[] = { + const char * const timechart_subcommands[] = { "record", NULL }; + const char *timechart_usage[] = { "perf timechart [<options>] {record}", NULL }; @@ -1976,8 +1980,8 @@ int cmd_timechart(int argc, const char **argv, "perf timechart record [<options>]", NULL }; - argc = parse_options(argc, argv, timechart_options, timechart_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + argc = parse_options_subcommand(argc, argv, timechart_options, timechart_subcommands, + timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (tchart.power_only && tchart.tasks_only) { pr_err("-P and -T options cannot be used at the same time.\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0aa7747ff139..6a4d5d41c671 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -66,7 +66,6 @@ #include <sys/utsname.h> #include <sys/mman.h> -#include <linux/unistd.h> #include <linux/types.h> static volatile int done; @@ -166,7 +165,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) err ? "[unknown]" : uts.release, perf_version_string); if (use_browser <= 0) sleep(5); - + map->erange_warned = true; } @@ -285,7 +284,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -554,7 +553,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); } static void *display_thread_tui(void *arg) @@ -717,7 +716,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine) { pr_err("%u unprocessable samples recorded.\r", - top->session->stats.nr_unprocessable_samples++); + top->session->evlist->stats.nr_unprocessable_samples++); return; } @@ -734,7 +733,7 @@ static void perf_event__process_sample(struct perf_tool *tool, "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", - !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? + al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? " modules" : ""); if (use_browser <= 0) sleep(5); @@ -758,8 +757,10 @@ static void perf_event__process_sample(struct perf_tool *tool, al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { if (symbol_conf.vmlinux_name) { - ui__warning("The %s file can't be used.\n%s", - symbol_conf.vmlinux_name, msg); + char serr[256]; + dso__strerror_load(al.map->dso, serr, sizeof(serr)); + ui__warning("The %s file can't be used: %s\n%s", + symbol_conf.vmlinux_name, serr, msg); } else { ui__warning("A vmlinux file was not found.\n%s", msg); @@ -857,7 +858,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) hists__inc_nr_events(evsel__hists(evsel), event->header.type); machine__process_event(machine, event, &sample); } else - ++session->stats.nr_unknown_events; + ++session->evlist->stats.nr_unknown_events; next_event: perf_evlist__mmap_consume(top->evlist, idx); } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index badfabc6a01f..e122970361f2 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -52,7 +52,9 @@ struct tp_field { #define TP_UINT_FIELD(bits) \ static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - return *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ + return value; \ } TP_UINT_FIELD(8); @@ -63,7 +65,8 @@ TP_UINT_FIELD(64); #define TP_UINT_FIELD__SWAPPED(bits) \ static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ return bswap_##bits(value);\ } @@ -929,66 +932,66 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, { .name = "close", .errmsg = true, - .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, { .name = "connect", .errmsg = true, }, { .name = "dup", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup2", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "dup3", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), }, { .name = "eventfd2", .errmsg = true, .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, { .name = "faccessat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fadvise64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fallocate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchdir", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmod", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchmodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fchown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fchownat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "fcntl", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_STRARRAY, /* cmd */ }, .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, }, { .name = "fdatasync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "flock", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [1] = SCA_FLOCK, /* cmd */ }, }, { .name = "fsetxattr", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstat", .errmsg = true, .alias = "newfstat", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fstatat", .errmsg = true, .alias = "newfstatat", - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "fstatfs", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "fsync", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "ftruncate", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "futex", .errmsg = true, .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, { .name = "futimesat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "getdents", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getdents64", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "ioctl", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ + .arg_scnprintf = { [0] = SCA_FD, /* fd */ #if defined(__i386__) || defined(__x86_64__) /* * FIXME: Make this available to all arches. @@ -1002,7 +1005,7 @@ static struct syscall_fmt { { .name = "kill", .errmsg = true, .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, { .name = "linkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "lseek", .errmsg = true, .arg_scnprintf = { [0] = SCA_FD, /* fd */ [2] = SCA_STRARRAY, /* whence */ }, @@ -1012,9 +1015,9 @@ static struct syscall_fmt { .arg_scnprintf = { [0] = SCA_HEX, /* start */ [2] = SCA_MADV_BHV, /* behavior */ }, }, { .name = "mkdirat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mknodat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, { .name = "mlock", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "mlockall", .errmsg = true, @@ -1036,9 +1039,9 @@ static struct syscall_fmt { { .name = "munmap", .errmsg = true, .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, { .name = "name_to_handle_at", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "newfstatat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "open", .errmsg = true, .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, { .name = "open_by_handle_at", .errmsg = true, @@ -1052,20 +1055,20 @@ static struct syscall_fmt { { .name = "poll", .errmsg = true, .timeout = true, }, { .name = "ppoll", .errmsg = true, .timeout = true, }, { .name = "pread", .errmsg = true, .alias = "pread64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "preadv", .errmsg = true, .alias = "pread", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), }, { .name = "pwrite", .errmsg = true, .alias = "pwrite64", - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "pwritev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "read", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "readlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "readv", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "recvfrom", .errmsg = true, .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "recvmmsg", .errmsg = true, @@ -1073,7 +1076,7 @@ static struct syscall_fmt { { .name = "recvmsg", .errmsg = true, .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, { .name = "renameat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "rt_sigaction", .errmsg = true, .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, @@ -1091,7 +1094,7 @@ static struct syscall_fmt { { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, { .name = "shutdown", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "socket", .errmsg = true, .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ [1] = SCA_SK_TYPE, /* type */ }, @@ -1102,7 +1105,7 @@ static struct syscall_fmt { .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, { .name = "stat", .errmsg = true, .alias = "newstat", }, { .name = "symlinkat", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, + .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, { .name = "tgkill", .errmsg = true, .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, { .name = "tkill", .errmsg = true, @@ -1113,9 +1116,9 @@ static struct syscall_fmt { { .name = "utimensat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, { .name = "write", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, { .name = "writev", .errmsg = true, - .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, + .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, }; static int syscall_fmt__cmp(const void *name, const void *fmtp) @@ -1132,6 +1135,8 @@ static struct syscall_fmt *syscall_fmt__find(const char *name) struct syscall { struct event_format *tp_format; + int nr_args; + struct format_field *args; const char *name; bool filtered; bool is_exit; @@ -1191,7 +1196,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) if (thread__priv(thread) == NULL) thread__set_priv(thread, thread_trace__new()); - + if (thread__priv(thread) == NULL) goto fail; @@ -1219,7 +1224,9 @@ struct trace { struct syscall *table; } syscalls; struct record_opts opts; + struct perf_evlist *evlist; struct machine *host; + struct thread *current; u64 base_time; FILE *output; unsigned long nr_events; @@ -1227,6 +1234,10 @@ struct trace { const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; + struct { + size_t nr; + pid_t *entries; + } filter_pids; double duration_filter; double runtime_ms; struct { @@ -1243,6 +1254,7 @@ struct trace { bool show_comm; bool show_tool_stats; bool trace_syscalls; + bool force; int trace_pgfaults; }; @@ -1433,14 +1445,14 @@ static int syscall__set_arg_fmts(struct syscall *sc) struct format_field *field; int idx = 0; - sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); + sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); if (sc->arg_scnprintf == NULL) return -1; if (sc->fmt) sc->arg_parm = sc->fmt->arg_parm; - for (field = sc->tp_format->format.fields->next; field; field = field->next) { + for (field = sc->args; field; field = field->next) { if (sc->fmt && sc->fmt->arg_scnprintf[idx]) sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; else if (field->flags & FIELD_IS_POINTER) @@ -1506,18 +1518,37 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (sc->tp_format == NULL) return -1; + sc->args = sc->tp_format->format.fields; + sc->nr_args = sc->tp_format->format.nr_fields; + /* drop nr field - not relevant here; does not exist on older kernels */ + if (sc->args && strcmp(sc->args->name, "nr") == 0) { + sc->args = sc->args->next; + --sc->nr_args; + } + sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); return syscall__set_arg_fmts(sc); } +/* + * args is to be interpreted as a series of longs but we need to handle + * 8-byte unaligned accesses. args points to raw_data within the event + * and raw_data is guaranteed to be 8-byte unaligned because it is + * preceded by raw_size which is a u32. So we need to copy args to a temp + * variable to read it. Most notably this avoids extended load instructions + * on unaligned addresses + */ + static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned long *args, struct trace *trace, + unsigned char *args, struct trace *trace, struct thread *thread) { size_t printed = 0; + unsigned char *p; + unsigned long val; - if (sc->tp_format != NULL) { + if (sc->args != NULL) { struct format_field *field; u8 bit = 1; struct syscall_arg arg = { @@ -1527,16 +1558,21 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, .thread = thread, }; - for (field = sc->tp_format->format.fields->next; field; + for (field = sc->args; field; field = field->next, ++arg.idx, bit <<= 1) { if (arg.mask & bit) continue; + + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * arg.idx; + memcpy(&val, p, sizeof(val)); + /* * Suppress this argument if its value is zero and * and we don't have a string associated in an * strarray for it. */ - if (args[arg.idx] == 0 && + if (val == 0 && !(sc->arg_scnprintf && sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && sc->arg_parm[arg.idx])) @@ -1545,23 +1581,26 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { - arg.val = args[arg.idx]; + arg.val = val; if (sc->arg_parm) arg.parm = sc->arg_parm[arg.idx]; printed += sc->arg_scnprintf[arg.idx](bf + printed, size - printed, &arg); } else { printed += scnprintf(bf + printed, size - printed, - "%ld", args[arg.idx]); + "%ld", val); } } } else { int i = 0; while (i < 6) { + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * i; + memcpy(&val, p, sizeof(val)); printed += scnprintf(bf + printed, size - printed, "%sarg%d: %ld", - printed ? ", " : "", i, args[i]); + printed ? ", " : "", i, val); ++i; } } @@ -1642,6 +1681,29 @@ static void thread__update_stats(struct thread_trace *ttrace, update_stats(stats, duration); } +static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) +{ + struct thread_trace *ttrace; + u64 duration; + size_t printed; + + if (trace->current == NULL) + return 0; + + ttrace = thread__priv(trace->current); + + if (!ttrace->entry_pending) + return 0; + + duration = sample->time - ttrace->entry_time; + + printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); + printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); + ttrace->entry_pending = false; + + return printed; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1673,6 +1735,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, return -1; } + if (!trace->summary_only) + trace__printf_interrupted_entry(trace, sample); + ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); @@ -1688,6 +1753,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } else ttrace->entry_pending = true; + if (trace->current != thread) { + thread__put(trace->current); + trace->current = thread__get(thread); + } + return 0; } @@ -1805,6 +1875,28 @@ out_dump: return 0; } +static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, + struct perf_sample *sample) +{ + trace__printf_interrupted_entry(trace, sample); + trace__fprintf_tstamp(trace, sample->time, trace->output); + + if (trace->trace_syscalls) + fprintf(trace->output, "( ): "); + + fprintf(trace->output, "%s:", evsel->name); + + if (evsel->tp_format) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + } + + fprintf(trace->output, ")\n"); + return 0; +} + static void print_location(FILE *f, struct perf_sample *sample, struct addr_location *al, bool print_dso, bool print_sym) @@ -2037,10 +2129,39 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, return 0; } -static int trace__run(struct trace *trace, int argc, const char **argv) +static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { - struct perf_evlist *evlist = perf_evlist__new(); + const u32 type = event->header.type; struct perf_evsel *evsel; + + if (!trace->full_time && trace->base_time == 0) + trace->base_time = sample->time; + + if (type != PERF_RECORD_SAMPLE) { + trace__process_event(trace, trace->host, event, sample); + return; + } + + evsel = perf_evlist__id2evsel(trace->evlist, sample->id); + if (evsel == NULL) { + fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); + return; + } + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + sample->raw_data == NULL) { + fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", + perf_evsel__name(evsel), sample->tid, + sample->cpu, sample->raw_size); + } else { + tracepoint_handler handler = evsel->handler; + handler(trace, evsel, event, sample); + } +} + +static int trace__run(struct trace *trace, int argc, const char **argv) +{ + struct perf_evlist *evlist = trace->evlist; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2048,31 +2169,27 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; - if (evlist == NULL) { - fprintf(trace->output, "Not enough memory to run!\n"); - goto out; - } - if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) - goto out_error_tp; + goto out_error_raw_syscalls; if (trace->trace_syscalls) perf_evlist__add_vfs_getname(evlist); if ((trace->trace_pgfaults & TRACE_PFMAJ) && - perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) - goto out_error_tp; + perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { + goto out_error_mem; + } if ((trace->trace_pgfaults & TRACE_PFMIN) && perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN)) - goto out_error_tp; + goto out_error_mem; if (trace->sched && - perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", - trace__sched_stat_runtime)) - goto out_error_tp; + perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime", + trace__sched_stat_runtime)) + goto out_error_sched_stat_runtime; err = perf_evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { @@ -2104,16 +2221,35 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + /* + * Better not use !target__has_task() here because we need to cover the + * case where no threads were specified in the command line, but a + * workload was, and in that case we will fill in the thread_map when + * we fork the workload in perf_evlist__prepare_workload. + */ + if (trace->filter_pids.nr > 0) + err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); + else if (evlist->threads->map[0] == -1) + err = perf_evlist__set_filter_pid(evlist, getpid()); + + if (err < 0) { + printf("err=%d,%s\n", -err, strerror(-err)); + exit(1); + } + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) goto out_error_mmap; - perf_evlist__enable(evlist); + if (!target__none(&trace->opts.target)) + perf_evlist__enable(evlist); if (forks) perf_evlist__start_workload(evlist); - trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; + trace->multiple_threads = evlist->threads->map[0] == -1 || + evlist->threads->nr > 1 || + perf_evlist__first(evlist)->attr.inherit; again: before = trace->nr_events; @@ -2121,8 +2257,6 @@ again: union perf_event *event; while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { - const u32 type = event->header.type; - tracepoint_handler handler; struct perf_sample sample; ++trace->nr_events; @@ -2133,35 +2267,17 @@ again: goto next_event; } - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample.time; - - if (type != PERF_RECORD_SAMPLE) { - trace__process_event(trace, trace->host, event, &sample); - continue; - } - - evsel = perf_evlist__id2evsel(evlist, sample.id); - if (evsel == NULL) { - fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); - goto next_event; - } - - if (evsel->attr.type == PERF_TYPE_TRACEPOINT && - sample.raw_data == NULL) { - fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", - perf_evsel__name(evsel), sample.tid, - sample.cpu, sample.raw_size); - goto next_event; - } - - handler = evsel->handler; - handler(trace, evsel, event, &sample); + trace__handle_event(trace, event, &sample); next_event: perf_evlist__mmap_consume(evlist, i); if (interrupted) goto out_disable; + + if (done && !draining) { + perf_evlist__disable(evlist); + draining = true; + } } } @@ -2179,6 +2295,8 @@ next_event: } out_disable: + thread__zput(trace->current); + perf_evlist__disable(evlist); if (!err) { @@ -2196,14 +2314,18 @@ out_disable: out_delete_evlist: perf_evlist__delete(evlist); -out: + trace->evlist = NULL; trace->live = false; return err; { char errbuf[BUFSIZ]; -out_error_tp: - perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf)); +out_error_sched_stat_runtime: + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime"); + goto out_error; + +out_error_raw_syscalls: + debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)"); goto out_error; out_error_mmap: @@ -2217,6 +2339,9 @@ out_error: fprintf(trace->output, "%s\n", errbuf); goto out_delete_evlist; } +out_error_mem: + fprintf(trace->output, "Not enough memory to run!\n"); + goto out_delete_evlist; } static int trace__replay(struct trace *trace) @@ -2227,6 +2352,7 @@ static int trace__replay(struct trace *trace) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = trace->force, }; struct perf_session *session; struct perf_evsel *evsel; @@ -2301,7 +2427,7 @@ static int trace__replay(struct trace *trace) setup_pager(); - err = perf_session__process_events(session, &trace->tool); + err = perf_session__process_events(session); if (err) pr_err("Failed to process events, error %d", err); @@ -2426,6 +2552,38 @@ static int trace__set_duration(const struct option *opt, const char *str, return 0; } +static int trace__set_filter_pids(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int ret = -1; + size_t i; + struct trace *trace = opt->value; + /* + * FIXME: introduce a intarray class, plain parse csv and create a + * { int nr, int entries[] } struct... + */ + struct intlist *list = intlist__new(str); + + if (list == NULL) + return -1; + + i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; + trace->filter_pids.entries = calloc(i, sizeof(pid_t)); + + if (trace->filter_pids.entries == NULL) + goto out; + + trace->filter_pids.entries[0] = getpid(); + + for (i = 1; i < trace->filter_pids.nr; ++i) + trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; + + intlist__delete(list); + ret = 0; +out: + return ret; +} + static int trace__open_output(struct trace *trace, const char *filename) { struct stat st; @@ -2460,9 +2618,17 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } +static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + evsel->handler = handler; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { - const char * const trace_usage[] = { + const char *trace_usage[] = { "perf trace [<options>] [<command>]", "perf trace [<options>] -- <command> [<options>]", "perf trace record [<options>] [<command>]", @@ -2494,6 +2660,9 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { + OPT_CALLBACK(0, "event", &trace.evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), @@ -2505,6 +2674,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) "trace events on existing process id"), OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", "trace events on existing thread id"), + OPT_CALLBACK(0, "filter-pids", &trace, "float", + "show only events with duration > N.M ms", trace__set_filter_pids), OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", @@ -2530,19 +2701,36 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), + OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), OPT_END() }; + const char * const trace_subcommands[] = { "record", NULL }; int err; char bf[BUFSIZ]; - argc = parse_options(argc, argv, trace_options, trace_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); + + trace.evlist = perf_evlist__new(); + if (trace.evlist == NULL) + return -ENOMEM; + + if (trace.evlist == NULL) { + pr_err("Not enough memory to run!\n"); + goto out; + } + + argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, + trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (trace.trace_pgfaults) { trace.opts.sample_address = true; trace.opts.sample_time = true; } + if (trace.evlist->nr_entries > 0) + evlist__set_evsel_handler(trace.evlist, trace__event_handler); + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); @@ -2550,7 +2738,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (trace.summary_only) trace.summary = trace.summary_only; - if (!trace.trace_syscalls && !trace.trace_pgfaults) { + if (!trace.trace_syscalls && !trace.trace_pgfaults && + trace.evlist->nr_entries == 0 /* Was --events used? */) { pr_err("Please specify something to trace.\n"); return -1; } diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index b210d62907e4..3688ad29085f 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -37,6 +37,7 @@ extern int cmd_test(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int cmd_mem(int argc, const char **argv, const char *prefix); +extern int cmd_data(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0906fc401c52..00fcaf8a5b8d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,6 +7,7 @@ perf-archive mainporcelain common perf-bench mainporcelain common perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common +perf-data mainporcelain common perf-diff mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 5d4b039fe1ed..59a98c643240 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,19 +11,26 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif -LIB_INCLUDE := $(srctree)/tools/lib/ +$(shell echo -n > .config-detected) +detected = $(shell echo "$(1)=y" >> .config-detected) +detected_var = $(shell echo "$(1)=$($(1))" >> .config-detected) + CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(src-perf)/config/Makefile.arch +$(call detected_var,ARCH) + NO_PERF_REGS := 1 # Additional ARCH settings for x86 ifeq ($(ARCH),x86) - ifeq (${IS_X86_64}, 1) + $(call detected,CONFIG_X86) + ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 + $(call detected,CONFIG_X86_64) else LIBUNWIND_LIBS = -lunwind -lunwind-x86 endif @@ -40,6 +47,10 @@ ifeq ($(ARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(NO_PERF_REGS),0) + $(call detected,CONFIG_PERF_REGS) +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -84,6 +95,17 @@ ifndef NO_LIBELF FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw endif +ifdef LIBBABELTRACE + # for linking with debug library, run like: + # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ + ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib + endif + FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) + FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +endif + # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile @@ -114,6 +136,8 @@ ifdef PARSER_DEBUG PARSER_DEBUG_BISON := -t PARSER_DEBUG_FLEX := -d CFLAGS += -DPARSER_DEBUG + $(call detected_var,PARSER_DEBUG_BISON) + $(call detected_var,PARSER_DEBUG_FLEX) endif ifndef NO_LIBPYTHON @@ -152,119 +176,7 @@ LDFLAGS += -Wl,-z,noexecstack EXTLIBS = -lpthread -lrt -lm -ldl -ifneq ($(OUTPUT),) - OUTPUT_FEATURES = $(OUTPUT)config/feature-checks/ - $(shell mkdir -p $(OUTPUT_FEATURES)) -endif - -feature_check = $(eval $(feature_check_code)) -define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) -endef - -feature_set = $(eval $(feature_set_code)) -define feature_set_code - feature-$(1) := 1 -endef - -# -# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: -# - -# -# Note that this is not a complete list of all feature tests, just -# those that are typically built on a fully configured system. -# -# [ Feature tests not mentioned here have to be built explicitly in -# the rule that uses them - an example for that is the 'bionic' -# feature check. ] -# -CORE_FEATURE_TESTS = \ - backtrace \ - dwarf \ - fortify-source \ - sync-compare-and-swap \ - glibc \ - gtk2 \ - gtk2-infobar \ - libaudit \ - libbfd \ - libelf \ - libelf-getphdrnum \ - libelf-mmap \ - libnuma \ - libperl \ - libpython \ - libpython-version \ - libslang \ - libunwind \ - stackprotector-all \ - timerfd \ - libdw-dwarf-unwind \ - zlib - -LIB_FEATURE_TESTS = \ - dwarf \ - glibc \ - gtk2 \ - libaudit \ - libbfd \ - libelf \ - libnuma \ - libperl \ - libpython \ - libslang \ - libunwind \ - libdw-dwarf-unwind \ - zlib - -VF_FEATURE_TESTS = \ - backtrace \ - fortify-source \ - sync-compare-and-swap \ - gtk2-infobar \ - libelf-getphdrnum \ - libelf-mmap \ - libpython-version \ - stackprotector-all \ - timerfd \ - libunwind-debug-frame \ - bionic \ - liberty \ - liberty-z \ - cplus-demangle \ - compile-32 \ - compile-x32 - -# Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features. -# If in the future we need per-feature checks/flags for features not -# mentioned in this list we need to refactor this ;-). -set_test_all_flags = $(eval $(set_test_all_flags_code)) -define set_test_all_flags_code - FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1)) - FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1)) -endef - -$(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat))) - -# -# Special fast-path for the 'all features are available' case: -# -$(call feature_check,all,$(MSG)) - -# -# Just in case the build freshly failed, make sure we print the -# feature matrix: -# -ifeq ($(feature-all), 1) - # - # test-all.c passed - just set all the core feature flags to 1: - # - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) -else - $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(addsuffix .bin,$(CORE_FEATURE_TESTS)) >/dev/null 2>&1) - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) -endif +include $(srctree)/tools/build/Makefile.feature ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all @@ -293,7 +205,7 @@ endif CFLAGS += -I$(src-perf)/util CFLAGS += -I$(src-perf) -CFLAGS += -I$(LIB_INCLUDE) +CFLAGS += -I$(srctree)/tools/lib/ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE @@ -301,6 +213,10 @@ ifeq ($(feature-sync-compare-and-swap), 1) CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT endif +ifeq ($(feature-pthread-attr-setaffinity-np), 1) + CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP +endif + ifndef NO_BIONIC $(call feature_check,bionic) ifeq ($(feature-bionic), 1) @@ -355,6 +271,7 @@ endif # NO_LIBELF ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf + $(call detected,CONFIG_LIBELF) ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT @@ -375,6 +292,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) EXTLIBS += -ldw + $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF endif # NO_LIBELF @@ -402,9 +320,11 @@ ifdef NO_LIBUNWIND dwarf-post-unwind := 0 else dwarf-post-unwind-text := libdw + $(call detected,CONFIG_LIBDW_DWARF_UNWIND) endif else dwarf-post-unwind-text := libunwind + $(call detected,CONFIG_LIBUNWIND) # Enable libunwind support by default. ifndef NO_LIBDW_DWARF_UNWIND NO_LIBDW_DWARF_UNWIND := 1 @@ -413,6 +333,7 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT + $(call detected,CONFIG_DWARF_UNWIND) else NO_DWARF_UNWIND := 1 endif @@ -441,6 +362,7 @@ ifndef NO_LIBAUDIT else CFLAGS += -DHAVE_LIBAUDIT_SUPPORT EXTLIBS += -laudit + $(call detected,CONFIG_AUDIT) endif endif @@ -457,6 +379,7 @@ ifndef NO_SLANG CFLAGS += -I/usr/include/slang CFLAGS += -DHAVE_SLANG_SUPPORT EXTLIBS += -lslang + $(call detected,CONFIG_SLANG) endif endif @@ -491,10 +414,11 @@ else ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 - msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); + msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev); else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) + $(call detected,CONFIG_LIBPERL) endif endif @@ -507,22 +431,21 @@ endif disable-python = $(eval $(disable-python_code)) define disable-python_code CFLAGS += -DNO_LIBPYTHON - $(if $(1),$(warning No $(1) was found)) - $(warning Python support will not be built) + $(warning $1) NO_LIBPYTHON := 1 endef ifdef NO_LIBPYTHON - $(call disable-python) + $(call disable-python,Python support disabled by user) else ifndef PYTHON - $(call disable-python,python interpreter) + $(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev) else PYTHON_WORD := $(call shell-wordify,$(PYTHON)) ifndef PYTHON_CONFIG - $(call disable-python,python-config tool) + $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev) else PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) @@ -534,7 +457,7 @@ else FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) ifneq ($(feature-libpython), 1) - $(call disable-python,Python.h (for Python 2.x)) + $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev) else ifneq ($(feature-libpython-version), 1) @@ -554,6 +477,7 @@ else LDFLAGS += $(PYTHON_EMBED_LDFLAGS) EXTLIBS += $(PYTHON_EMBED_LIBADD) LANG_BINDINGS += $(obj-perf)python/perf.so + $(call detected,CONFIG_LIBPYTHON) endif endif endif @@ -594,7 +518,7 @@ else EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) + msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling) CFLAGS += -DNO_DEMANGLE endif endif @@ -611,11 +535,23 @@ ifndef NO_ZLIB ifeq ($(feature-zlib), 1) CFLAGS += -DHAVE_ZLIB_SUPPORT EXTLIBS += -lz + $(call detected,CONFIG_ZLIB) else NO_ZLIB := 1 endif endif +ifndef NO_LZMA + ifeq ($(feature-lzma), 1) + CFLAGS += -DHAVE_LZMA_SUPPORT + EXTLIBS += -llzma + $(call detected,CONFIG_LZMA) + else + msg := $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev); + NO_LZMA := 1 + endif +endif + ifndef NO_BACKTRACE ifeq ($(feature-backtrace), 1) CFLAGS += -DHAVE_BACKTRACE_SUPPORT @@ -629,6 +565,7 @@ ifndef NO_LIBNUMA else CFLAGS += -DHAVE_LIBNUMA_SUPPORT EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) endif endif @@ -645,7 +582,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq (${IS_X86_64}, 1) + ifneq ($(ARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -661,6 +598,18 @@ else NO_PERF_READ_VDSOX32 := 1 endif +ifdef LIBBABELTRACE + $(call feature_check,libbabeltrace) + ifeq ($(feature-libbabeltrace), 1) + CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) + LDFLAGS += $(LIBBABELTRACE_LDFLAGS) + EXTLIBS += -lbabeltrace-ctf + $(call detected,CONFIG_LIBBABELTRACE) + else + msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev); + endif +endif + # Among the variables below, these: # perfexecdir # template_dir @@ -693,7 +642,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib @@ -729,83 +678,33 @@ plugindir=$(libdir)/traceevent/plugins plugindir_SQ= $(subst ','\'',$(plugindir)) endif -# -# Print the result of the feature test: -# -feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG)) - -define feature_print_status_code - ifeq ($(feature-$(1)), 1) - MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) - else - MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) - endif -endef - -feature_print_var = $(eval $(feature_print_var_code)) $(info $(MSG)) -define feature_print_var_code +print_var = $(eval $(print_var_code)) $(info $(MSG)) +define print_var_code MSG = $(shell printf '...%30s: %s' $(1) $($(1))) endef -feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG)) -define feature_print_text_code - MSG = $(shell printf '...%30s: %s' $(1) $(2)) -endef - -PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat)))) -PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) - -ifeq ($(dwarf-post-unwind),1) - PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text)) -endif - -# The $(display_lib) controls the default detection message -# output. It's set if: -# - detected features differes from stored features from -# last build (in PERF-FEATURES file) -# - one of the $(LIB_FEATURE_TESTS) is not detected -# - VF is enabled - -ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)") - $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES) - display_lib := 1 -endif - -feature_check = $(eval $(feature_check_code)) -define feature_check_code - ifneq ($(feature-$(1)), 1) - display_lib := 1 - endif -endef - -$(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_check,$(feat))) - ifeq ($(VF),1) - display_lib := 1 - display_vf := 1 -endif - -ifeq ($(display_lib),1) + $(call print_var,prefix) + $(call print_var,bindir) + $(call print_var,libdir) + $(call print_var,sysconfdir) + $(call print_var,LIBUNWIND_DIR) + $(call print_var,LIBDW_DIR) $(info ) - $(info Auto-detecting system features:) - $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),)) - - ifeq ($(dwarf-post-unwind),1) - $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) - endif -endif - -ifeq ($(display_vf),1) - $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print_status,$(feat),)) - $(info ) - $(call feature_print_var,prefix) - $(call feature_print_var,bindir) - $(call feature_print_var,libdir) - $(call feature_print_var,sysconfdir) - $(call feature_print_var,LIBUNWIND_DIR) - $(call feature_print_var,LIBDW_DIR) endif -ifeq ($(display_lib),1) - $(info ) -endif +$(call detected_var,bindir_SQ) +$(call detected_var,PYTHON_WORD) +ifneq ($(OUTPUT),) +$(call detected_var,OUTPUT) +endif +$(call detected_var,htmldir_SQ) +$(call detected_var,infodir_SQ) +$(call detected_var,mandir_SQ) +$(call detected_var,ETC_PERFCONFIG_SQ) +$(call detected_var,prefix_SQ) +$(call detected_var,perfexecdir_SQ) +$(call detected_var,LIBDIR) +$(call detected_var,GTK_CFLAGS) +$(call detected_var,PERL_EMBED_CCOPTS) +$(call detected_var,PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index 851cd0172a76..e11fbd6fae78 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -1,31 +1,18 @@ +ifndef ARCH +ARCH := $(shell uname -m 2>/dev/null || echo not) +endif -uname_M := $(shell uname -m 2>/dev/null || echo not) - -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ -e s/sa110/arm/ \ +ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ + -e s/sun4u/sparc/ -e s/sparc64/sparc/ \ + -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -e s/tile.*/tile/ ) -# Additional ARCH settings for x86 -ifeq ($(ARCH),i386) - override ARCH := x86 -endif - -ifeq ($(ARCH),x86_64) - override ARCH := x86 - IS_X86_64 := 0 - ifeq (, $(findstring m32,$(CFLAGS))) - IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1) - RAW_ARCH := x86_64 - endif -endif - -ifeq (${IS_X86_64}, 1) +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) IS_64_BIT := 1 -else ifeq ($(ARCH),x86) - IS_64_BIT := 0 else - IS_64_BIT := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) + IS_64_BIT := 0 endif diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 7076a62d0ff7..c16ce833079c 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -175,6 +175,5 @@ _ge-abspath = $(if $(is-executable),$(1)) define get-executable-or-default $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef -_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) -_gea_warn = $(warning The path '$(1)' is not executable.) +_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2))) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 33569847fdcc..3ba80b2359cc 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -47,8 +47,16 @@ __my_reassemble_comp_words_by_ref() done } -type _get_comp_words_by_ref &>/dev/null || -_get_comp_words_by_ref() +# Define preload_get_comp_words_by_ref="false", if the function +# __perf_get_comp_words_by_ref() is required instead. +preload_get_comp_words_by_ref="true" + +if [ $preload_get_comp_words_by_ref = "true" ]; then + type _get_comp_words_by_ref &>/dev/null || + preload_get_comp_words_by_ref="false" +fi +[ $preload_get_comp_words_by_ref = "true" ] || +__perf_get_comp_words_by_ref() { local exclude cur_ words_ cword_ if [ "$1" = "-n" ]; then @@ -76,8 +84,16 @@ _get_comp_words_by_ref() done } -type __ltrim_colon_completions &>/dev/null || -__ltrim_colon_completions() +# Define preload__ltrim_colon_completions="false", if the function +# __perf__ltrim_colon_completions() is required instead. +preload__ltrim_colon_completions="true" + +if [ $preload__ltrim_colon_completions = "true" ]; then + type __ltrim_colon_completions &>/dev/null || + preload__ltrim_colon_completions="false" +fi +[ $preload__ltrim_colon_completions = "true" ] || +__perf__ltrim_colon_completions() { if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then # Remove colon-word prefix from COMPREPLY items @@ -97,7 +113,32 @@ __perfcomp () __perfcomp_colon () { __perfcomp "$1" "$2" - __ltrim_colon_completions $cur + if [ $preload__ltrim_colon_completions = "true" ]; then + __ltrim_colon_completions $cur + else + __perf__ltrim_colon_completions $cur + fi +} + +__perf_prev_skip_opts () +{ + local i cmd_ cmds_ + + let i=cword-1 + cmds_=$($cmd $1 --list-cmds) + prev_skip_opts=() + while [ $i -ge 0 ]; do + if [[ ${words[i]} == $1 ]]; then + return + fi + for cmd_ in $cmds_; do + if [[ ${words[i]} == $cmd_ ]]; then + prev_skip_opts=${words[i]} + return + fi + done + ((i--)) + done } __perf_main () @@ -107,29 +148,36 @@ __perf_main () cmd=${words[0]} COMPREPLY=() + # Skip options backward and find the last perf command + __perf_prev_skip_opts # List perf subcommands or long options - if [ $cword -eq 1 ]; then + if [ -z $prev_skip_opts ]; then if [[ $cur == --* ]]; then - __perfcomp '--help --version \ - --exec-path --html-path --paginate --no-pager \ - --perf-dir --work-tree --debugfs-dir' -- "$cur" + cmds=$($cmd --list-opts) else cmds=$($cmd --list-cmds) - __perfcomp "$cmds" "$cur" fi + __perfcomp "$cmds" "$cur" # List possible events for -e option - elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then + elif [[ $prev == @("-e"|"--event") && + $prev_skip_opts == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) __perfcomp_colon "$evts" "$cur" - # List subcommands for perf commands - elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then - subcmds=$($cmd $prev --list-cmds) - __perfcomp_colon "$subcmds" "$cur" - # List long option names - elif [[ $cur == --* ]]; then - subcmd=${words[1]} - opts=$($cmd $subcmd --list-opts) - __perfcomp "$opts" "$cur" + else + # List subcommands for perf commands + if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| + |data|help|script|test|timechart|trace) ]]; then + subcmds=$($cmd $prev_skip_opts --list-cmds) + __perfcomp_colon "$subcmds" "$cur" + fi + # List long option names + if [[ $cur == --* ]]; then + subcmd=$prev_skip_opts + __perf_prev_skip_opts $subcmd + subcmd=$subcmd" "$prev_skip_opts + opts=$($cmd $subcmd --list-opts) + __perfcomp "$opts" "$cur" + fi fi } @@ -198,7 +246,11 @@ type perf &>/dev/null && _perf() { local cur words cword prev - _get_comp_words_by_ref -n =: cur words cword prev + if [ $preload_get_comp_words_by_ref = "true" ]; then + _get_comp_words_by_ref -n =: cur words cword prev + else + __perf_get_comp_words_by_ref -n =: cur words cword prev + fi __perf_main } && diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index a3b13d7dc1d4..6ef68165c9db 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -6,7 +6,6 @@ #include <sys/syscall.h> #include <linux/types.h> #include <linux/perf_event.h> -#include <asm/unistd.h> #if defined(__i386__) #define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3700a7faca6c..b857fcbd00cf 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -13,6 +13,7 @@ #include "util/quote.h" #include "util/run-command.h" #include "util/parse-events.h" +#include "util/parse-options.h" #include "util/debug.h" #include <api/fs/debugfs.h> #include <pthread.h> @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { #endif { "inject", cmd_inject, 0 }, { "mem", cmd_mem, 0 }, + { "data", cmd_data, 0 }, }; struct pager_config { @@ -124,6 +126,23 @@ static void commit_pager_choice(void) } } +struct option options[] = { + OPT_ARGUMENT("help", "help"), + OPT_ARGUMENT("version", "version"), + OPT_ARGUMENT("exec-path", "exec-path"), + OPT_ARGUMENT("html-path", "html-path"), + OPT_ARGUMENT("paginate", "paginate"), + OPT_ARGUMENT("no-pager", "no-pager"), + OPT_ARGUMENT("perf-dir", "perf-dir"), + OPT_ARGUMENT("work-tree", "work-tree"), + OPT_ARGUMENT("debugfs-dir", "debugfs-dir"), + OPT_ARGUMENT("buildid-dir", "buildid-dir"), + OPT_ARGUMENT("list-cmds", "list-cmds"), + OPT_ARGUMENT("list-opts", "list-opts"), + OPT_ARGUMENT("debug", "debug"), + OPT_END() +}; + static int handle_options(const char ***argv, int *argc, int *envchanged) { int handled = 0; @@ -222,6 +241,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) struct cmd_struct *p = commands+i; printf("%s ", p->cmd); } + putchar('\n'); + exit(0); + } else if (!strcmp(cmd, "--list-opts")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(options)-1; i++) { + struct option *p = options+i; + printf("--%s ", p->long_name); + } + putchar('\n'); exit(0); } else if (!strcmp(cmd, "--debug")) { if (*argc < 2) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1dabb8553499..e14bb637255c 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -29,7 +29,7 @@ static inline unsigned long long rdclock(void) return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } -#define MAX_NR_CPUS 256 +#define MAX_NR_CPUS 1024 extern const char *input_name; extern bool perf_host, perf_guest; @@ -53,6 +53,7 @@ struct record_opts { bool sample_time; bool period; bool sample_intr_regs; + bool running_time; unsigned int freq; unsigned int mmap_pages; unsigned int user_freq; @@ -61,6 +62,8 @@ struct record_opts { u64 user_interval; bool sample_transaction; unsigned initial_delay; + bool use_clockid; + clockid_t clockid; }; struct option; diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build new file mode 100644 index 000000000000..41efd7e368b3 --- /dev/null +++ b/tools/perf/scripts/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build new file mode 100644 index 000000000000..928e110179cb --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c index 790ceba6ad3f..28431d1bbcf5 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c @@ -5,7 +5,10 @@ * ANY CHANGES MADE HERE WILL BE LOST! * */ - +#include <stdbool.h> +#ifndef HAS_BOOL +# define HAS_BOOL 1 +#endif #line 1 "Context.xs" /* * Context.xs. XS interfaces for perf script. diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build new file mode 100644 index 000000000000..aefc15c9444a --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build new file mode 100644 index 000000000000..6a8801b32017 --- /dev/null +++ b/tools/perf/tests/Build @@ -0,0 +1,43 @@ +perf-y += builtin-test.o +perf-y += parse-events.o +perf-y += dso-data.o +perf-y += attr.o +perf-y += vmlinux-kallsyms.o +perf-y += open-syscall.o +perf-y += open-syscall-all-cpus.o +perf-y += open-syscall-tp-fields.o +perf-y += mmap-basic.o +perf-y += perf-record.o +perf-y += rdpmc.o +perf-y += evsel-roundtrip-name.o +perf-y += evsel-tp-sched.o +perf-y += fdarray.o +perf-y += pmu.o +perf-y += hists_common.o +perf-y += hists_link.o +perf-y += hists_filter.o +perf-y += hists_output.o +perf-y += hists_cumulate.o +perf-y += python-use.o +perf-y += bp_signal.o +perf-y += bp_signal_overflow.o +perf-y += task-exit.o +perf-y += sw-clock.o +perf-y += mmap-thread-lookup.o +perf-y += thread-mg-share.o +perf-y += switch-tracking.o +perf-y += keep-tracking.o +perf-y += code-reading.o +perf-y += sample-parsing.o +perf-y += parse-no-sample-id-all.o +perf-y += kmod-path.o + +perf-$(CONFIG_X86) += perf-time-to-tsc.o + +ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +endif + +CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index c9b4b6269b51..1091bd47adfd 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py @@ -104,7 +104,6 @@ class Event(dict): continue if not self.compare_data(self[t], other[t]): log.warning("expected %s=%s, got %s" % (t, self[t], other[t])) - # Test file description needs to have following sections: # [config] diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index d3095dafed36..7e6d74946e04 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=104 +size=112 config=0 sample_period=4000 sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index 872ed7e24c7c..f4cf148f14cb 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=104 +size=112 config=0 sample_period=0 sample_type=0 diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 4b7d9ab0f049..4f4098167112 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -167,6 +167,10 @@ static struct test { .func = test__fdarray__add, }, { + .desc = "Test kmod_path__parse function", + .func = test__kmod_path__parse, + }, + { .func = NULL, }, }; @@ -291,7 +295,7 @@ static int perf_test__list(int argc, const char **argv) int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) { - const char * const test_usage[] = { + const char *test_usage[] = { "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", NULL, }; @@ -302,13 +306,14 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_END() }; + const char * const test_subcommands[] = { "list", NULL }; struct intlist *skiplist = NULL; int ret = hists__init(); if (ret < 0) return ret; - argc = parse_options(argc, argv, test_options, test_usage, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc, argv); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index caaf37f079b1..513e5febbe5a 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -112,6 +112,9 @@ int test__dso_data(void) dso = dso__new((const char *)file); + TEST_ASSERT_VAL("Failed to access to dso", + dso__data_fd(dso, &machine) >= 0); + /* Basic 10 bytes tests. */ for (i = 0; i < ARRAY_SIZE(offsets); i++) { struct test_data_offset *data = &offsets[i]; @@ -243,8 +246,8 @@ int test__dso_data_cache(void) limit = nr * 4; TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit)); - /* and this is now our dso open FDs limit + 1 extra */ - dso_cnt = limit / 2 + 1; + /* and this is now our dso open FDs limit */ + dso_cnt = limit / 2; TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(dso_cnt, TEST_FILE_SIZE)); @@ -252,13 +255,13 @@ int test__dso_data_cache(void) struct dso *dso = dsos[i]; /* - * Open dsos via dso__data_fd or dso__data_read_offset. - * Both opens the data file and keep it open. + * Open dsos via dso__data_fd(), it opens the data + * file and keep it open (unless open file limit). */ + fd = dso__data_fd(dso, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + if (i % 2) { - fd = dso__data_fd(dso, &machine); - TEST_ASSERT_VAL("failed to get fd", fd > 0); - } else { #define BUFSIZE 10 u8 buf[BUFSIZE]; ssize_t n; @@ -268,7 +271,10 @@ int test__dso_data_cache(void) } } - /* open +1 dso over the allowed limit */ + /* verify the first one is already open */ + TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1); + + /* open +1 dso to reach the allowed limit */ fd = dso__data_fd(dsos[i], &machine); TEST_ASSERT_VAL("failed to get fd", fd > 0); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index ab28cca2cb97..0bf06bec68c7 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -11,6 +11,9 @@ #include "thread.h" #include "callchain.h" +/* For bsearch. We try to unwind functions in shared object. */ +#include <stdlib.h> + static int mmap_handler(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -28,7 +31,7 @@ static int init_live_machine(struct machine *machine) mmap_handler, machine, true); } -#define MAX_STACK 6 +#define MAX_STACK 8 static int unwind_entry(struct unwind_entry *entry, void *arg) { @@ -37,6 +40,8 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) static const char *funcs[MAX_STACK] = { "test__arch_unwind_sample", "unwind_thread", + "compare", + "bsearch", "krava_3", "krava_2", "krava_1", @@ -88,10 +93,37 @@ static int unwind_thread(struct thread *thread) return err; } +static int global_unwind_retval = -INT_MAX; + +__attribute__ ((noinline)) +static int compare(void *p1, void *p2) +{ + /* Any possible value should be 'thread' */ + struct thread *thread = *(struct thread **)p1; + + if (global_unwind_retval == -INT_MAX) + global_unwind_retval = unwind_thread(thread); + + return p1 - p2; +} + __attribute__ ((noinline)) static int krava_3(struct thread *thread) { - return unwind_thread(thread); + struct thread *array[2] = {thread, thread}; + void *fp = &bsearch; + /* + * make _bsearch a volatile function pointer to + * prevent potential optimization, which may expand + * bsearch and call compare directly from this function, + * instead of libc shared object. + */ + void *(*volatile _bsearch)(void *, void *, size_t, + size_t, int (*)(void *, void *)); + + _bsearch = fp; + _bsearch(array, &thread, 2, sizeof(struct thread **), compare); + return global_unwind_retval; } __attribute__ ((noinline)) diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 614d5c4978ab..18619966454c 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -140,7 +140,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } @@ -187,7 +187,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", @@ -454,12 +454,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) * 30.00% 10.00% perf perf [.] cmd_record * 20.00% 0.00% bash libc [.] malloc * 10.00% 10.00% bash [kernel] [k] page_fault - * 10.00% 10.00% perf [kernel] [k] schedule - * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open + * 10.00% 10.00% bash bash [.] xmalloc * 10.00% 10.00% perf [kernel] [k] page_fault - * 10.00% 10.00% perf libc [.] free * 10.00% 10.00% perf libc [.] malloc - * 10.00% 10.00% bash bash [.] xmalloc + * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% perf libc [.] free + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -468,12 +468,12 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, - { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, }; symbol_conf.use_callchain = false; @@ -537,10 +537,13 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * malloc * main * - * 10.00% 10.00% perf [kernel] [k] schedule + * 10.00% 10.00% bash bash [.] xmalloc * | - * --- schedule - * run_command + * --- xmalloc + * malloc + * xmalloc <--- NOTE: there's a cycle + * malloc + * xmalloc * main * * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open @@ -556,6 +559,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * + * 10.00% 10.00% perf [kernel] [k] schedule + * | + * --- schedule + * run_command + * main + * * 10.00% 10.00% perf libc [.] free * | * --- free @@ -570,15 +579,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) * run_command * main * - * 10.00% 10.00% bash bash [.] xmalloc - * | - * --- xmalloc - * malloc - * xmalloc <--- NOTE: there's a cycle - * malloc - * xmalloc - * main - * */ struct result expected[] = { { 7000, 2000, "perf", "perf", "main" }, @@ -587,12 +587,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { 3000, 1000, "perf", "perf", "cmd_record" }, { 2000, 0, "bash", "libc", "malloc" }, { 1000, 1000, "bash", "[kernel]", "page_fault" }, - { 1000, 1000, "perf", "[kernel]", "schedule" }, + { 1000, 1000, "bash", "bash", "xmalloc" }, { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, { 1000, 1000, "perf", "[kernel]", "page_fault" }, + { 1000, 1000, "perf", "[kernel]", "schedule" }, { 1000, 1000, "perf", "libc", "free" }, { 1000, 1000, "perf", "libc", "malloc" }, - { 1000, 1000, "bash", "bash", "xmalloc" }, }; struct callchain_result expected_callchain[] = { { @@ -622,9 +622,12 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "bash", "main" }, }, }, { - 3, { { "[kernel]", "schedule" }, - { "perf", "run_command" }, - { "perf", "main" }, }, + 6, { { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "libc", "malloc" }, + { "bash", "xmalloc" }, + { "bash", "main" }, }, }, { 3, { { "[kernel]", "sys_perf_event_open" }, @@ -638,6 +641,11 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "main" }, }, }, { + 3, { { "[kernel]", "schedule" }, + { "perf", "run_command" }, + { "perf", "main" }, }, + }, + { 4, { { "libc", "free" }, { "perf", "cmd_record" }, { "perf", "run_command" }, @@ -649,14 +657,6 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) { "perf", "run_command" }, { "perf", "main" }, }, }, - { - 6, { { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "libc", "malloc" }, - { "bash", "xmalloc" }, - { "bash", "main" }, }, - }, }; symbol_conf.use_callchain = true; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 74f257a81265..59e53db7914c 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -138,7 +138,7 @@ int test__hists_filter(void) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index a748f2be1222..b52c9faea224 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -106,7 +106,7 @@ static void del_hist_entries(struct hists *hists) he = rb_entry(node, struct hist_entry, rb_node); rb_erase(node, root_out); rb_erase(&he->rb_node_in, root_in); - hist_entry__free(he); + hist_entry__delete(he); } } @@ -152,7 +152,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -252,7 +252,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -306,7 +306,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -384,7 +384,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -487,7 +487,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists); + hists__output_resort(hists, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c new file mode 100644 index 000000000000..e8d7cbb9320c --- /dev/null +++ b/tools/perf/tests/kmod-path.c @@ -0,0 +1,73 @@ +#include <stdbool.h> +#include "tests.h" +#include "dso.h" +#include "debug.h" + +static int test(const char *path, bool alloc_name, bool alloc_ext, + bool kmod, bool comp, const char *name, const char *ext) +{ + struct kmod_path m; + + memset(&m, 0x0, sizeof(m)); + + TEST_ASSERT_VAL("kmod_path__parse", + !__kmod_path__parse(&m, path, alloc_name, alloc_ext)); + + pr_debug("%s - alloc name %d, alloc ext %d, kmod %d, comp %d, name '%s', ext '%s'\n", + path, alloc_name, alloc_ext, m.kmod, m.comp, m.name, m.ext); + + TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod); + TEST_ASSERT_VAL("wrong comp", m.comp == comp); + + if (ext) + TEST_ASSERT_VAL("wrong ext", m.ext && !strcmp(ext, m.ext)); + else + TEST_ASSERT_VAL("wrong ext", !m.ext); + + if (name) + TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name)); + else + TEST_ASSERT_VAL("wrong name", !m.name); + + free(m.name); + free(m.ext); + return 0; +} + +#define T(path, an, ae, k, c, n, e) \ + TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e)) + +int test__kmod_path__parse(void) +{ + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL); + T("/xxxx/xxxx/x-x.ko", false , true , true, false, NULL , NULL); + T("/xxxx/xxxx/x-x.ko", true , false , true, false, "[x_x]", NULL); + T("/xxxx/xxxx/x-x.ko", false , false , true, false, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz"); + T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz"); + T("/xxxx/xxxx/x.ko.gz", true , false , true, true, "[x]", NULL); + T("/xxxx/xxxx/x.ko.gz", false , false , true, true, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x.gz", true , true , false, true, "x.gz" ,"gz"); + T("/xxxx/xxxx/x.gz", false , true , false, true, NULL ,"gz"); + T("/xxxx/xxxx/x.gz", true , false , false, true, "x.gz" , NULL); + T("/xxxx/xxxx/x.gz", false , false , false, true, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("x.gz", true , true , false, true, "x.gz", "gz"); + T("x.gz", false , true , false, true, NULL , "gz"); + T("x.gz", true , false , false, true, "x.gz", NULL); + T("x.gz", false , false , false, true, NULL , NULL); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("x.ko.gz", true , true , true, true, "[x]", "gz"); + T("x.ko.gz", false , true , true, true, NULL , "gz"); + T("x.ko.gz", true , false , true, true, "[x]", NULL); + T("x.ko.gz", false , false , true, true, NULL , NULL); + + return 0; +} diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 69a71ff84e01..bff85324f799 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -5,7 +5,7 @@ include config/Makefile.arch # FIXME looks like x86 is the only arch running tests ;-) # we need some IS_(32/64) flag to make this generic -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib @@ -222,7 +222,6 @@ tarpkg: @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \ echo "- $@: $$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 - all: $(run) $(run_O) tarpkg @echo OK diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c index 8fa82d1700c7..3ec885c48f8f 100644 --- a/tools/perf/tests/open-syscall-all-cpus.c +++ b/tools/perf/tests/open-syscall-all-cpus.c @@ -29,7 +29,12 @@ int test__open_syscall_event_on_all_cpus(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c index a33b2daae40f..07aa319bf334 100644 --- a/tools/perf/tests/open-syscall.c +++ b/tools/perf/tests/open-syscall.c @@ -18,7 +18,12 @@ int test__open_syscall_event(void) evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7f2f51f93619..3de744961739 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,6 +3,7 @@ #include "evsel.h" #include "evlist.h" #include <api/fs/fs.h> +#include <api/fs/tracefs.h> #include <api/fs/debugfs.h> #include "tests.h" #include "debug.h" @@ -294,6 +295,36 @@ static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) return test__checkevent_genhw(evlist); } +static int test__checkevent_exclude_idle_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__checkevent_exclude_idle_modifier_1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_name(evlist); +} + static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -1145,15 +1176,66 @@ static int test__pinned_group(struct perf_evlist *evlist) return 0; } +static int test__checkevent_breakpoint_len(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) == + evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 == + evsel->attr.bp_len); + + return 0; +} + +static int test__checkevent_breakpoint_len_w(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type); + TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config); + TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W == + evsel->attr.bp_type); + TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 == + evsel->attr.bp_len); + + return 0; +} + +static int +test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_breakpoint_rw(evlist); +} + static int count_tracepoints(void) { char events_path[PATH_MAX]; struct dirent *events_ent; + const char *mountpoint; DIR *events_dir; int cnt = 0; - scnprintf(events_path, PATH_MAX, "%s/tracing/events", - debugfs_find_mountpoint()); + mountpoint = tracefs_find_mountpoint(); + if (mountpoint) { + scnprintf(events_path, PATH_MAX, "%s/events", + mountpoint); + } else { + mountpoint = debugfs_find_mountpoint(); + scnprintf(events_path, PATH_MAX, "%s/tracing/events", + mountpoint); + } events_dir = opendir(events_path); @@ -1420,6 +1502,21 @@ static struct evlist_test test__events[] = { .check = test__pinned_group, .id = 41, }, + { + .name = "mem:0/1", + .check = test__checkevent_breakpoint_len, + .id = 42, + }, + { + .name = "mem:0/2:w", + .check = test__checkevent_breakpoint_len_w, + .id = 43, + }, + { + .name = "mem:0/4:rw:u", + .check = test__checkevent_breakpoint_len_rw_modifier, + .id = 44 + }, #if defined(__s390x__) { .name = "kvm-s390:kvm_s390_create_vm", @@ -1427,6 +1524,16 @@ static struct evlist_test test__events[] = { .id = 100, }, #endif + { + .name = "instructions:I", + .check = test__checkevent_exclude_idle_modifier, + .id = 45, + }, + { + .name = "instructions:kIG", + .check = test__checkevent_exclude_idle_modifier_1, + .id = 46, + }, }; static struct evlist_test test__events_pmu[] = { @@ -1471,7 +1578,7 @@ static int test_event(struct evlist_test *e) } else { ret = e->check(evlist); } - + perf_evlist__delete(evlist); return ret; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 4908c648a597..30c02181e78b 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -110,7 +110,7 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_STACK_USER) { COMP(user_stack.size); - if (memcmp(s1->user_stack.data, s1->user_stack.data, + if (memcmp(s1->user_stack.data, s2->user_stack.data, s1->user_stack.size)) { pr_debug("Samples differ at 'user_stack'\n"); return false; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 00e776a87a9c..52758a33f64c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -51,6 +51,7 @@ int test__hists_cumulate(void); int test__switch_tracking(void); int test__fdarray__filter(void); int test__fdarray__add(void); +int test__kmod_path__parse(void); #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build new file mode 100644 index 000000000000..0a73538c0441 --- /dev/null +++ b/tools/perf/ui/Build @@ -0,0 +1,14 @@ +libperf-y += setup.o +libperf-y += helpline.o +libperf-y += progress.o +libperf-y += util.o +libperf-y += hist.o +libperf-y += stdio/hist.o + +CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" + +libperf-$(CONFIG_SLANG) += browser.o +libperf-$(CONFIG_SLANG) += browsers/ +libperf-$(CONFIG_SLANG) += tui/ + +CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build new file mode 100644 index 000000000000..de223f5bed58 --- /dev/null +++ b/tools/perf/ui/browsers/Build @@ -0,0 +1,10 @@ +libperf-y += annotate.o +libperf-y += hists.o +libperf-y += map.o +libperf-y += scripts.o +libperf-y += header.o + +CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST +CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST +CFLAGS_map.o += -DENABLE_SLFUTURE_CONST +CFLAGS_scripts.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 1e0a2fd80115..e5250eb2dd57 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -517,7 +517,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) } annotate_browser__set_top(browser, dl, idx); - + return true; } @@ -829,10 +829,16 @@ out: return key; } +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt) +{ + return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); +} + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - return symbol__tui_annotate(he->ms.sym, he->ms.map, evsel, hbt); + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, @@ -867,7 +873,6 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser ++browser->nr_jumps; } - } static inline int width_jumps(int n) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e6bb04b5b09b..995b7a8596b1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -48,6 +48,24 @@ static bool hist_browser__has_filter(struct hist_browser *hb) return hists__has_filter(hb->hists) || hb->min_pcnt; } +static int hist_browser__get_folding(struct hist_browser *browser) +{ + struct rb_node *nd; + struct hists *hists = browser->hists; + int unfolded_rows = 0; + + for (nd = rb_first(&hists->entries); + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; + nd = rb_next(nd)) { + struct hist_entry *he = + rb_entry(nd, struct hist_entry, rb_node); + + if (he->ms.unfolded) + unfolded_rows += he->nr_rows; + } + return unfolded_rows; +} + static u32 hist_browser__nr_entries(struct hist_browser *hb) { u32 nr_entries; @@ -57,6 +75,7 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb) else nr_entries = hb->hists->nr_entries; + hb->nr_callchain_rows = hist_browser__get_folding(hb); return nr_entries + hb->nr_callchain_rows; } @@ -492,6 +511,7 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, { int color, width; char folded_sign = callchain_list__folded(chain); + bool show_annotated = browser->show_dso && chain->ms.sym && symbol__annotation(chain->ms.sym)->src; color = HE_COLORSET_NORMAL; width = browser->b.width - (offset + 2); @@ -504,7 +524,8 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, ui_browser__set_color(&browser->b, color); hist_browser__gotorc(browser, row, 0); slsmg_write_nstring(" ", offset); - slsmg_printf("%c ", folded_sign); + slsmg_printf("%c", folded_sign); + ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); slsmg_write_nstring(str, width); } @@ -550,7 +571,7 @@ static int hist_browser__show_callchain(struct hist_browser *browser, bool need_percent; node = rb_first(root); - need_percent = !!rb_next(node); + need_percent = node && rb_next(node); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); @@ -1467,7 +1488,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, perf_hpp__set_user_width(symbol_conf.col_width_list_str); while (1) { - const struct thread *thread = NULL; + struct thread *thread = NULL; const struct dso *dso = NULL; int choice = 0, annotate = -2, zoom_dso = -2, zoom_thread = -2, @@ -1593,28 +1614,30 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (!sort__has_sym) goto add_exit_option; + if (browser->selection == NULL) + goto skip_annotation; + if (sort__mode == SORT_MODE__BRANCH) { bi = browser->he_selection->branch_info; - if (browser->selection != NULL && - bi && - bi->from.sym != NULL && + + if (bi == NULL) + goto skip_annotation; + + if (bi->from.sym != NULL && !bi->from.map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - bi->from.sym->name) > 0) + asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) { annotate_f = nr_options++; + } - if (browser->selection != NULL && - bi && - bi->to.sym != NULL && + if (bi->to.sym != NULL && !bi->to.map->dso->annotate_warned && (bi->to.sym != bi->from.sym || bi->to.map->dso != bi->from.map->dso) && - asprintf(&options[nr_options], "Annotate %s", - bi->to.sym->name) > 0) + asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) { annotate_t = nr_options++; + } } else { - if (browser->selection != NULL && - browser->selection->sym != NULL && + if (browser->selection->sym != NULL && !browser->selection->map->dso->annotate_warned) { struct annotation *notes; @@ -1622,11 +1645,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (notes->src && asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) + browser->selection->sym->name) > 0) { annotate = nr_options++; + } } } - +skip_annotation: if (thread != NULL && asprintf(&options[nr_options], "Zoom %s %s(%d) thread", (browser->hists->thread_filter ? "out of" : "into"), @@ -1682,6 +1706,7 @@ retry_popup_menu: if (choice == annotate || choice == annotate_t || choice == annotate_f) { struct hist_entry *he; struct annotation *notes; + struct map_symbol ms; int err; do_annotate: if (!objdump_path && perf_session_env__lookup_objdump(env)) @@ -1691,30 +1716,21 @@ do_annotate: if (he == NULL) continue; - /* - * we stash the branch_info symbol + map into the - * the ms so we don't have to rewrite all the annotation - * code to use branch_info. - * in branch mode, the ms struct is not used - */ if (choice == annotate_f) { - he->ms.sym = he->branch_info->from.sym; - he->ms.map = he->branch_info->from.map; - } else if (choice == annotate_t) { - he->ms.sym = he->branch_info->to.sym; - he->ms.map = he->branch_info->to.map; + ms.map = he->branch_info->from.map; + ms.sym = he->branch_info->from.sym; + } else if (choice == annotate_t) { + ms.map = he->branch_info->to.map; + ms.sym = he->branch_info->to.sym; + } else { + ms = *browser->selection; } - notes = symbol__annotation(he->ms.sym); + notes = symbol__annotation(ms.sym); if (!notes->src) continue; - /* - * Don't let this be freed, say, by hists__decay_entry. - */ - he->used = true; - err = hist_entry__tui_annotate(he, evsel, hbt); - he->used = false; + err = map_symbol__tui_annotate(&ms, evsel, hbt); /* * offer option to annotate the other branch source or target * (if they exists) when returning from annotate @@ -1754,13 +1770,13 @@ zoom_thread: pstack__remove(fstack, &browser->hists->thread_filter); zoom_out_thread: ui_helpline__pop(); - browser->hists->thread_filter = NULL; + thread__zput(browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); } else { ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", thread->comm_set ? thread__comm_str(thread) : "", thread->tid); - browser->hists->thread_filter = thread; + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(fstack, &browser->hists->thread_filter); } diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build new file mode 100644 index 000000000000..ec22e899a224 --- /dev/null +++ b/tools/perf/ui/gtk/Build @@ -0,0 +1,9 @@ +CFLAGS_gtk += -fPIC $(GTK_CFLAGS) + +gtk-y += browser.o +gtk-y += hists.o +gtk-y += setup.o +gtk-y += util.o +gtk-y += helpline.o +gtk-y += progress.o +gtk-y += annotate.o diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index dc0d095f318c..25d608394d74 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -204,6 +204,9 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, if (ret) return ret; + if (a->thread != b->thread || !symbol_conf.use_callchain) + return 0; + ret = b->callchain->max_depth - a->callchain->max_depth; } return ret; @@ -282,7 +285,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_##_field); \ } @@ -309,7 +313,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_ACC_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort_acc(a, b, he_get_acc_##_field); \ } @@ -328,7 +333,8 @@ static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \ } #define __HPP_SORT_RAW_FN(_type, _field) \ -static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ +static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *a, struct hist_entry *b) \ { \ return __hpp__sort(a, b, he_get_raw_##_field); \ } @@ -358,7 +364,8 @@ HPP_PERCENT_ACC_FNS(overhead_acc, period) HPP_RAW_FNS(samples, nr_events) HPP_RAW_FNS(period, period) -static int64_t hpp__nop_cmp(struct hist_entry *a __maybe_unused, +static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *a __maybe_unused, struct hist_entry *b __maybe_unused) { return 0; diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h index f34f89eb607c..717d39d3052b 100644 --- a/tools/perf/ui/progress.h +++ b/tools/perf/ui/progress.h @@ -4,12 +4,12 @@ #include <linux/types.h> void ui_progress__finish(void); - + struct ui_progress { const char *title; u64 curr, next, step, total; }; - + void ui_progress__init(struct ui_progress *p, u64 total, const char *title); void ui_progress__update(struct ui_progress *p, u64 adv); diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build new file mode 100644 index 000000000000..9e4c6ca41a9f --- /dev/null +++ b/tools/perf/ui/tui/Build @@ -0,0 +1,4 @@ +libperf-y += setup.o +libperf-y += util.o +libperf-y += helpline.o +libperf-y += progress.o diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c index 1c8b9afd5d6e..88f5143a5981 100644 --- a/tools/perf/ui/tui/helpline.c +++ b/tools/perf/ui/tui/helpline.c @@ -9,6 +9,7 @@ #include "../libslang.h" char ui_helpline__last_msg[1024]; +bool tui_helpline__set; static void tui_helpline__pop(void) { @@ -35,6 +36,8 @@ static int tui_helpline__show(const char *format, va_list ap) sizeof(ui_helpline__last_msg) - backlog, format, ap); backlog += ret; + tui_helpline__set = true; + if (ui_helpline__last_msg[backlog - 1] == '\n') { ui_helpline__puts(ui_helpline__last_msg); SLsmg_refresh(); diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index 2f612562978c..b77e1d771363 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -1,5 +1,8 @@ #include <signal.h> #include <stdbool.h> +#ifdef HAVE_BACKTRACE_SUPPORT +#include <execinfo.h> +#endif #include "../../util/cache.h" #include "../../util/debug.h" @@ -14,6 +17,7 @@ static volatile int ui__need_resize; extern struct perf_error_ops perf_tui_eops; +extern bool tui_helpline__set; extern void hist_browser__init_hpp(void); @@ -88,6 +92,25 @@ int ui__getch(int delay_secs) return SLkp_getkey(); } +#ifdef HAVE_BACKTRACE_SUPPORT +static void ui__signal_backtrace(int sig) +{ + void *stackdump[32]; + size_t size; + + ui__exit(false); + psignal(sig, "perf"); + + printf("-------- backtrace --------\n"); + size = backtrace(stackdump, ARRAY_SIZE(stackdump)); + backtrace_symbols_fd(stackdump, size, STDOUT_FILENO); + + exit(0); +} +#else +# define ui__signal_backtrace ui__signal +#endif + static void ui__signal(int sig) { ui__exit(false); @@ -122,8 +145,8 @@ int ui__init(void) ui_browser__init(); tui_progress__init(); - signal(SIGSEGV, ui__signal); - signal(SIGFPE, ui__signal); + signal(SIGSEGV, ui__signal_backtrace); + signal(SIGFPE, ui__signal_backtrace); signal(SIGINT, ui__signal); signal(SIGQUIT, ui__signal); signal(SIGTERM, ui__signal); @@ -137,7 +160,7 @@ out: void ui__exit(bool wait_for_ok) { - if (wait_for_ok) + if (wait_for_ok && tui_helpline__set) ui__question_window("Fatal Error", ui_helpline__last_msg, "Press any key...", 0); diff --git a/tools/perf/util/Build b/tools/perf/util/Build new file mode 100644 index 000000000000..797490a40075 --- /dev/null +++ b/tools/perf/util/Build @@ -0,0 +1,145 @@ +libperf-y += abspath.o +libperf-y += alias.o +libperf-y += annotate.o +libperf-y += build-id.o +libperf-y += config.o +libperf-y += ctype.o +libperf-y += db-export.o +libperf-y += environment.o +libperf-y += event.o +libperf-y += evlist.o +libperf-y += evsel.o +libperf-y += exec_cmd.o +libperf-y += find_next_bit.o +libperf-y += help.o +libperf-y += kallsyms.o +libperf-y += levenshtein.o +libperf-y += parse-options.o +libperf-y += parse-events.o +libperf-y += path.o +libperf-y += rbtree.o +libperf-y += bitmap.o +libperf-y += hweight.o +libperf-y += run-command.o +libperf-y += quote.o +libperf-y += strbuf.o +libperf-y += string.o +libperf-y += strlist.o +libperf-y += strfilter.o +libperf-y += top.o +libperf-y += usage.o +libperf-y += wrapper.o +libperf-y += sigchain.o +libperf-y += dso.o +libperf-y += symbol.o +libperf-y += color.o +libperf-y += pager.o +libperf-y += header.o +libperf-y += callchain.o +libperf-y += values.o +libperf-y += debug.o +libperf-y += machine.o +libperf-y += map.o +libperf-y += pstack.o +libperf-y += session.o +libperf-y += ordered-events.o +libperf-y += comm.o +libperf-y += thread.o +libperf-y += thread_map.o +libperf-y += trace-event-parse.o +libperf-y += parse-events-flex.o +libperf-y += parse-events-bison.o +libperf-y += pmu.o +libperf-y += pmu-flex.o +libperf-y += pmu-bison.o +libperf-y += trace-event-read.o +libperf-y += trace-event-info.o +libperf-y += trace-event-scripting.o +libperf-y += trace-event.o +libperf-y += svghelper.o +libperf-y += sort.o +libperf-y += hist.o +libperf-y += util.o +libperf-y += xyarray.o +libperf-y += cpumap.o +libperf-y += cgroup.o +libperf-y += target.o +libperf-y += rblist.o +libperf-y += intlist.o +libperf-y += vdso.o +libperf-y += stat.o +libperf-y += record.o +libperf-y += srcline.o +libperf-y += data.o +libperf-$(CONFIG_X86) += tsc.o +libperf-y += cloexec.o +libperf-y += thread-stack.o + +libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-event.o + +ifndef CONFIG_LIBELF +libperf-y += symbol-minimal.o +endif + +libperf-$(CONFIG_DWARF) += probe-finder.o +libperf-$(CONFIG_DWARF) += dwarf-aux.o + +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o + +libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o + +libperf-y += scripting-engines/ + +libperf-$(CONFIG_PERF_REGS) += perf_regs.o +libperf-$(CONFIG_ZLIB) += zlib.o +libperf-$(CONFIG_LZMA) += lzma.o + +CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" + +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c + $(call rule_mkdir) + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l + +$(OUTPUT)util/parse-events-bison.c: util/parse-events.y + $(call rule_mkdir) + @$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ + +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c + $(call rule_mkdir) + @$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l + +$(OUTPUT)util/pmu-bison.c: util/pmu.y + $(call rule_mkdir) + @$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ + +CFLAGS_parse-events-flex.o += -w +CFLAGS_pmu-flex.o += -w +CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w +CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w + +$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c + +CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_parse-events.o += -Wno-redundant-decls + +$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 79999ceaf2be..7f5bdfc9bc87 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -30,6 +30,8 @@ static int disasm_line__parse(char *line, char **namep, char **rawp); static void ins__delete(struct ins_operands *ops) { + if (ops == NULL) + return; zfree(&ops->source.raw); zfree(&ops->source.name); zfree(&ops->target.raw); @@ -177,14 +179,17 @@ static int lock__parse(struct ins_operands *ops) goto out_free_ops; ops->locked.ins = ins__find(name); + free(name); + if (ops->locked.ins == NULL) goto out_free_ops; if (!ops->locked.ins->ops) return 0; - if (ops->locked.ins->ops->parse) - ops->locked.ins->ops->parse(ops->locked.ops); + if (ops->locked.ins->ops->parse && + ops->locked.ins->ops->parse(ops->locked.ops) < 0) + goto out_free_ops; return 0; @@ -208,6 +213,13 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, static void lock__delete(struct ins_operands *ops) { + struct ins *ins = ops->locked.ins; + + if (ins && ins->ops->free) + ins->ops->free(ops->locked.ops); + else + ins__delete(ops->locked.ops); + zfree(&ops->locked.ops); zfree(&ops->target.raw); zfree(&ops->target.name); @@ -229,7 +241,7 @@ static int mov__parse(struct ins_operands *ops) *s = '\0'; ops->source.raw = strdup(ops->raw); *s = ','; - + if (ops->source.raw == NULL) return -1; @@ -531,8 +543,8 @@ static void disasm_line__init_ins(struct disasm_line *dl) if (!dl->ins->ops) return; - if (dl->ins->ops->parse) - dl->ins->ops->parse(&dl->ops); + if (dl->ins->ops->parse && dl->ins->ops->parse(&dl->ops) < 0) + dl->ins = NULL; } static int disasm_line__parse(char *line, char **namep, char **rawp) @@ -998,6 +1010,32 @@ fallback: } filename = symfs_filename; } + } else if (dso__needs_decompress(dso)) { + char tmp[PATH_MAX]; + struct kmod_path m; + int fd; + bool ret; + + if (kmod_path__parse_ext(&m, symfs_filename)) + goto out_free_filename; + + snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX"); + + fd = mkstemp(tmp); + if (fd < 0) { + free(m.ext); + goto out_free_filename; + } + + ret = decompress_to_file(m.ext, symfs_filename, fd); + + free(m.ext); + close(fd); + + if (!ret) + goto out_free_filename; + + strcpy(symfs_filename, tmp); } snprintf(command, sizeof(command), @@ -1017,7 +1055,7 @@ fallback: file = popen(command, "r"); if (!file) - goto out_free_filename; + goto out_remove_tmp; while (!feof(file)) if (symbol__parse_objdump_line(sym, map, file, privsize, @@ -1032,6 +1070,10 @@ fallback: delete_last_nop(sym); pclose(file); + +out_remove_tmp: + if (dso__needs_decompress(dso)) + unlink(symfs_filename); out_free_filename: if (delete_extract) kcore_extract__delete(&kce); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0784a9420528..cadbdc90a5cb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -116,11 +116,6 @@ struct annotation { struct annotated_source *src; }; -struct sannotation { - struct annotation annotation; - struct symbol symbol; -}; - static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) { return (((void *)¬es->src->histograms) + @@ -129,8 +124,7 @@ static inline struct sym_hist *annotation__histogram(struct annotation *notes, i static inline struct annotation *symbol__annotation(struct symbol *sym) { - struct sannotation *a = container_of(sym, struct sannotation, symbol); - return &a->annotation; + return (void *)sym - symbol_conf.priv_size; } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0c72680a977f..61867dff5d5a 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -59,11 +59,8 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid, event->fork.ppid, event->fork.ptid); - if (thread) { - rb_erase(&thread->rb_node, &machine->threads); - machine->last_match = NULL; - thread__delete(thread); - } + if (thread) + machine__remove_thread(machine, thread); return 0; } @@ -93,6 +90,35 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +/* asnprintf consolidates asprintf and snprintf */ +static int asnprintf(char **strp, size_t size, const char *fmt, ...) +{ + va_list ap; + int ret; + + if (!strp) + return -EINVAL; + + va_start(ap, fmt); + if (*strp) + ret = vsnprintf(*strp, size, fmt, ap); + else + ret = vasprintf(strp, fmt, ap); + va_end(ap); + + return ret; +} + +static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +{ + char *tmp = bf; + int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, + sbuild_id, sbuild_id + 2); + if (ret < 0 || (tmp && size < (unsigned int)ret)) + return NULL; + return bf; +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; @@ -101,14 +127,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return NULL; build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - if (bf == NULL) { - if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2) < 0) - return NULL; - } else - snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2); - return bf; + return build_id__filename(build_id_hex, bf, size); } #define dsos__for_each_with_build_id(pos, head) \ @@ -259,52 +278,113 @@ void disable_buildid_cache(void) no_buildid_cache = true; } -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms, bool is_vdso) +static char *build_id_cache__dirname_from_path(const char *name, + bool is_kallsyms, bool is_vdso) { - const size_t size = PATH_MAX; - char *realname, *filename = zalloc(size), - *linkname = zalloc(size), *targetname; - int len, err = -1; + char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; - if (is_kallsyms) { - if (symbol_conf.kptr_restrict) { - pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); - err = 0; - goto out_free; - } - realname = (char *) name; - } else + if (!slash) { realname = realpath(name, NULL); + if (!realname) + return NULL; + } - if (realname == NULL || filename == NULL || linkname == NULL) + if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname) < 0) + filename = NULL; + + if (!slash) + free(realname); + + return filename; +} + +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result) +{ + struct strlist *list; + char *dir_name; + DIR *dir; + struct dirent *d; + int ret = 0; + + list = strlist__new(true, NULL); + dir_name = build_id_cache__dirname_from_path(pathname, false, false); + if (!list || !dir_name) { + ret = -ENOMEM; + goto out; + } + + /* List up all dirents */ + dir = opendir(dir_name); + if (!dir) { + ret = -errno; + goto out; + } + + while ((d = readdir(dir)) != NULL) { + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + strlist__add(list, d->d_name); + } + closedir(dir); + +out: + free(dir_name); + if (ret) + strlist__delete(list); + else + *result = list; + + return ret; +} + +int build_id_cache__add_s(const char *sbuild_id, const char *name, + bool is_kallsyms, bool is_vdso) +{ + const size_t size = PATH_MAX; + char *realname = NULL, *filename = NULL, *dir_name = NULL, + *linkname = zalloc(size), *targetname, *tmp; + int err = -1; + + if (!is_kallsyms) { + realname = realpath(name, NULL); + if (!realname) + goto out_free; + } + + dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + if (!dir_name) goto out_free; - len = scnprintf(filename, size, "%s%s%s", - debugdir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname); - if (mkdir_p(filename, 0755)) + if (mkdir_p(dir_name, 0755)) goto out_free; - snprintf(filename + len, size - len, "/%s", sbuild_id); + if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + filename = NULL; + goto out_free; + } if (access(filename, F_OK)) { if (is_kallsyms) { if (copyfile("/proc/kallsyms", filename)) goto out_free; - } else if (link(realname, filename) && copyfile(name, filename)) + } else if (link(realname, filename) && errno != EEXIST && + copyfile(name, filename)) goto out_free; } - len = scnprintf(linkname, size, "%s/.build-id/%.2s", - debugdir, sbuild_id); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; + tmp = strrchr(linkname, '/'); + *tmp = '\0'; if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) goto out_free; - snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); - targetname = filename + strlen(debugdir) - 5; + *tmp = '/'; + targetname = filename + strlen(buildid_dir) - 5; memcpy(targetname, "../..", 5); if (symlink(targetname, linkname) == 0) @@ -313,34 +393,46 @@ out_free: if (!is_kallsyms) free(realname); free(filename); + free(dir_name); free(linkname); return err; } static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, - const char *name, const char *debugdir, - bool is_kallsyms, bool is_vdso) + const char *name, bool is_kallsyms, + bool is_vdso) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, debugdir, name, - is_kallsyms, is_vdso); + return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso); +} + +bool build_id_cache__cached(const char *sbuild_id) +{ + bool ret = false; + char *filename = build_id__filename(sbuild_id, NULL, 0); + + if (filename && !access(filename, F_OK)) + ret = true; + free(filename); + + return ret; } -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) +int build_id_cache__remove_s(const char *sbuild_id) { const size_t size = PATH_MAX; char *filename = zalloc(size), - *linkname = zalloc(size); + *linkname = zalloc(size), *tmp; int err = -1; if (filename == NULL || linkname == NULL) goto out_free; - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, sbuild_id + 2); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; if (access(linkname, F_OK)) goto out_free; @@ -354,8 +446,8 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) /* * Since the link is relative, we must make it absolute: */ - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, filename); + tmp = strrchr(linkname, '/') + 1; + snprintf(tmp, size - (tmp - linkname), "%s", filename); if (unlink(linkname)) goto out_free; @@ -367,8 +459,7 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, struct machine *machine, - const char *debugdir) +static int dso__cache_build_id(struct dso *dso, struct machine *machine) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; bool is_vdso = dso__is_vdso(dso); @@ -381,28 +472,26 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, name = nm; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - debugdir, is_kallsyms, is_vdso); + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine, const char *debugdir) + struct machine *machine) { struct dso *pos; int err = 0; dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine, debugdir)) + if (dso__cache_build_id(pos, machine)) err = -1; return err; } -static int machine__cache_build_ids(struct machine *machine, const char *debugdir) +static int machine__cache_build_ids(struct machine *machine) { - int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, - debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, - debugdir); + int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine); + ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine); return ret; } @@ -417,11 +506,11 @@ int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host, buildid_dir); + ret = machine__cache_build_ids(&session->machines.host); for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, buildid_dir); + ret |= machine__cache_build_ids(pos); } return ret ? -1 : 0; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 8236319514d5..85011222cc14 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -4,6 +4,7 @@ #define BUILD_ID_SIZE 20 #include "tool.h" +#include "strlist.h" #include <linux/types.h> extern struct perf_tool build_id__mark_dso_hit_ops; @@ -22,9 +23,12 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result); +bool build_id_cache__cached(const char *sbuild_id); +int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); +int build_id_cache__remove_s(const char *sbuild_id); void disable_buildid_cache(void); #endif diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 5cf9e1b5989d..fbcca21d66ab 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -17,6 +17,7 @@ #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); @@ -71,7 +72,9 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +#ifndef __UCLIBC__ /* Matches the libc/libbsd function attribute so we declare this unconditionally: */ extern size_t strlcpy(char *dest, const char *src, size_t size); +#endif #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 64b377e591e4..9f643ee77001 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg) callchain_param.dump_size = size; } #endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + callchain_param.record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; } else { pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); @@ -841,3 +849,33 @@ char *callchain_list__sym_name(struct callchain_list *cl, return bf; } + +static void free_callchain_node(struct callchain_node *node) +{ + struct callchain_list *list, *tmp; + struct callchain_node *child; + struct rb_node *n; + + list_for_each_entry_safe(list, tmp, &node->val, list) { + list_del(&list->list); + free(list); + } + + n = rb_first(&node->rb_root_in); + while (n) { + child = container_of(n, struct callchain_node, rb_node_in); + n = rb_next(n); + rb_erase(&child->rb_node_in, &node->rb_root_in); + + free_callchain_node(child); + free(child); + } +} + +void free_callchain(struct callchain_root *root) +{ + if (!symbol_conf.use_callchain) + return; + + free_callchain_node(&root->node); +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index dbc08cf5f970..6033a0a212ca 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,6 +11,7 @@ enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, CALLCHAIN_DWARF, + CALLCHAIN_LBR, CALLCHAIN_MAX }; @@ -198,4 +199,6 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); +void free_callchain(struct callchain_root *root); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 47b78b3f0325..85b523885f9d 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -7,6 +7,12 @@ static unsigned long flag = PERF_FLAG_FD_CLOEXEC; +int __weak sched_getcpu(void) +{ + errno = ENOSYS; + return -1; +} + static int perf_flag_probe(void) { /* use 'safest' configuration as used in perf_evsel__fallback() */ @@ -25,6 +31,10 @@ static int perf_flag_probe(void) if (cpu < 0) cpu = 0; + /* + * Using -1 for the pid is a workaround to avoid gratuitous jump label + * changes. + */ while (1) { /* check cloexec flag */ fd = sys_perf_event_open(&attr, pid, cpu, -1, @@ -47,16 +57,24 @@ static int perf_flag_probe(void) err, strerror_r(err, sbuf, sizeof(sbuf))); /* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */ - fd = sys_perf_event_open(&attr, pid, cpu, -1, 0); + while (1) { + fd = sys_perf_event_open(&attr, pid, cpu, -1, 0); + if (fd < 0 && pid == -1 && errno == EACCES) { + pid = 0; + continue; + } + break; + } err = errno; + if (fd >= 0) + close(fd); + if (WARN_ONCE(fd < 0 && err != EBUSY, "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n", err, strerror_r(err, sbuf, sizeof(sbuf)))) return -1; - close(fd); - return 0; } diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 94a5a7d829d5..68888c29b04a 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -3,4 +3,10 @@ unsigned long perf_event_open_cloexec_flag(void); +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) +extern int sched_getcpu(void) __THROW; +#endif +#endif + #endif /* __PERF_CLOEXEC_H */ diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index f4654183d391..55355b3d4f85 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -5,132 +5,6 @@ int perf_use_color_default = -1; -static int parse_color(const char *name, int len) -{ - static const char * const color_names[] = { - "normal", "black", "red", "green", "yellow", - "blue", "magenta", "cyan", "white" - }; - char *end; - int i; - - for (i = 0; i < (int)ARRAY_SIZE(color_names); i++) { - const char *str = color_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return i - 1; - } - i = strtol(name, &end, 10); - if (end - name == len && i >= -1 && i <= 255) - return i; - return -2; -} - -static int parse_attr(const char *name, int len) -{ - static const int attr_values[] = { 1, 2, 4, 5, 7 }; - static const char * const attr_names[] = { - "bold", "dim", "ul", "blink", "reverse" - }; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(attr_names); i++) { - const char *str = attr_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return attr_values[i]; - } - return -1; -} - -void color_parse(const char *value, const char *var, char *dst) -{ - color_parse_mem(value, strlen(value), var, dst); -} - -void color_parse_mem(const char *value, int value_len, const char *var, - char *dst) -{ - const char *ptr = value; - int len = value_len; - int attr = -1; - int fg = -2; - int bg = -2; - - if (!strncasecmp(value, "reset", len)) { - strcpy(dst, PERF_COLOR_RESET); - return; - } - - /* [fg [bg]] [attr] */ - while (len > 0) { - const char *word = ptr; - int val, wordlen = 0; - - while (len > 0 && !isspace(word[wordlen])) { - wordlen++; - len--; - } - - ptr = word + wordlen; - while (len > 0 && isspace(*ptr)) { - ptr++; - len--; - } - - val = parse_color(word, wordlen); - if (val >= -1) { - if (fg == -2) { - fg = val; - continue; - } - if (bg == -2) { - bg = val; - continue; - } - goto bad; - } - val = parse_attr(word, wordlen); - if (val < 0 || attr != -1) - goto bad; - attr = val; - } - - if (attr >= 0 || fg >= 0 || bg >= 0) { - int sep = 0; - - *dst++ = '\033'; - *dst++ = '['; - if (attr >= 0) { - *dst++ = '0' + attr; - sep++; - } - if (fg >= 0) { - if (sep++) - *dst++ = ';'; - if (fg < 8) { - *dst++ = '3'; - *dst++ = '0' + fg; - } else { - dst += sprintf(dst, "38;5;%d", fg); - } - } - if (bg >= 0) { - if (sep++) - *dst++ = ';'; - if (bg < 8) { - *dst++ = '4'; - *dst++ = '0' + bg; - } else { - dst += sprintf(dst, "48;5;%d", bg); - } - } - *dst++ = 'm'; - } - *dst = 0; - return; -bad: - die("bad color value '%.*s' for variable '%s'", value_len, value, var); -} - int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) { if (value) { diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 0a594b8a0c26..38146f922c54 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -30,8 +30,6 @@ extern int perf_use_color_default; int perf_color_default_config(const char *var, const char *value, void *cb); int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); -void color_parse(const char *value, const char *var, char *dst); -void color_parse_mem(const char *value, int len, const char *var, char *dst); int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c new file mode 100644 index 000000000000..dd17c9a32fbc --- /dev/null +++ b/tools/perf/util/data-convert-bt.c @@ -0,0 +1,857 @@ +/* + * CTF writing support via babeltrace. + * + * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com> + * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <linux/compiler.h> +#include <babeltrace/ctf-writer/writer.h> +#include <babeltrace/ctf-writer/clock.h> +#include <babeltrace/ctf-writer/stream.h> +#include <babeltrace/ctf-writer/event.h> +#include <babeltrace/ctf-writer/event-types.h> +#include <babeltrace/ctf-writer/event-fields.h> +#include <babeltrace/ctf/events.h> +#include <traceevent/event-parse.h> +#include "asm/bug.h" +#include "data-convert-bt.h" +#include "session.h" +#include "util.h" +#include "debug.h" +#include "tool.h" +#include "evlist.h" +#include "evsel.h" +#include "machine.h" + +#define pr_N(n, fmt, ...) \ + eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) + +#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__) +#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__) + +#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__) + +struct evsel_priv { + struct bt_ctf_event_class *event_class; +}; + +struct ctf_writer { + /* writer primitives */ + struct bt_ctf_writer *writer; + struct bt_ctf_stream *stream; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_clock *clock; + + /* data types */ + union { + struct { + struct bt_ctf_field_type *s64; + struct bt_ctf_field_type *u64; + struct bt_ctf_field_type *s32; + struct bt_ctf_field_type *u32; + struct bt_ctf_field_type *string; + struct bt_ctf_field_type *u64_hex; + }; + struct bt_ctf_field_type *array[6]; + } data; +}; + +struct convert { + struct perf_tool tool; + struct ctf_writer writer; + + u64 events_size; + u64 events_count; +}; + +static int value_set(struct bt_ctf_field_type *type, + struct bt_ctf_event *event, + const char *name, u64 val) +{ + struct bt_ctf_field *field; + bool sign = bt_ctf_field_type_integer_get_signed(type); + int ret; + + field = bt_ctf_field_create(type); + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (sign) { + ret = bt_ctf_field_signed_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } else { + ret = bt_ctf_field_unsigned_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } + + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err; + } + + pr2(" SET [%s = %" PRIu64 "]\n", name, val); + +err: + bt_ctf_field_put(field); + return ret; +} + +#define __FUNC_VALUE_SET(_name, _val_type) \ +static __maybe_unused int value_set_##_name(struct ctf_writer *cw, \ + struct bt_ctf_event *event, \ + const char *name, \ + _val_type val) \ +{ \ + struct bt_ctf_field_type *type = cw->data._name; \ + return value_set(type, event, name, (u64) val); \ +} + +#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name) + +FUNC_VALUE_SET(s32) +FUNC_VALUE_SET(u32) +FUNC_VALUE_SET(s64) +FUNC_VALUE_SET(u64) +__FUNC_VALUE_SET(u64_hex, u64) + +static struct bt_ctf_field_type* +get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field) +{ + unsigned long flags = field->flags; + + if (flags & FIELD_IS_STRING) + return cw->data.string; + + if (!(flags & FIELD_IS_SIGNED)) { + /* unsigned long are mostly pointers */ + if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER) + return cw->data.u64_hex; + } + + if (flags & FIELD_IS_SIGNED) { + if (field->size == 8) + return cw->data.s64; + else + return cw->data.s32; + } + + if (field->size == 8) + return cw->data.u64; + else + return cw->data.u32; +} + +static int add_tracepoint_field_value(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample, + struct format_field *fmtf) +{ + struct bt_ctf_field_type *type; + struct bt_ctf_field *array_field; + struct bt_ctf_field *field; + const char *name = fmtf->name; + void *data = sample->raw_data; + unsigned long long value_int; + unsigned long flags = fmtf->flags; + unsigned int n_items; + unsigned int i; + unsigned int offset; + unsigned int len; + int ret; + + offset = fmtf->offset; + len = fmtf->size; + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_DYNAMIC) { + unsigned long long tmp_val; + + tmp_val = pevent_read_number(fmtf->event->pevent, + data + offset, len); + offset = tmp_val; + len = offset >> 16; + offset &= 0xffff; + } + + if (flags & FIELD_IS_ARRAY) { + + type = bt_ctf_event_class_get_field_by_name( + event_class, name); + array_field = bt_ctf_field_create(type); + bt_ctf_field_type_put(type); + if (!array_field) { + pr_err("Failed to create array type %s\n", name); + return -1; + } + + len = fmtf->size / fmtf->arraylen; + n_items = fmtf->arraylen; + } else { + n_items = 1; + array_field = NULL; + } + + type = get_tracepoint_field_type(cw, fmtf); + + for (i = 0; i < n_items; i++) { + if (!(flags & FIELD_IS_STRING)) + value_int = pevent_read_number( + fmtf->event->pevent, + data + offset + i * len, len); + + if (flags & FIELD_IS_ARRAY) + field = bt_ctf_field_array_get_field(array_field, i); + else + field = bt_ctf_field_create(type); + + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (flags & FIELD_IS_STRING) + ret = bt_ctf_field_string_set_value(field, + data + offset + i * len); + else if (!(flags & FIELD_IS_SIGNED)) + ret = bt_ctf_field_unsigned_integer_set_value( + field, value_int); + else + ret = bt_ctf_field_signed_integer_set_value( + field, value_int); + if (ret) { + pr_err("failed to set file value %s\n", name); + goto err_put_field; + } + if (!(flags & FIELD_IS_ARRAY)) { + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err_put_field; + } + } + bt_ctf_field_put(field); + } + if (flags & FIELD_IS_ARRAY) { + ret = bt_ctf_event_set_payload(event, name, array_field); + if (ret) { + pr_err("Failed add payload array %s\n", name); + return -1; + } + bt_ctf_field_put(array_field); + } + return 0; + +err_put_field: + bt_ctf_field_put(field); + return -1; +} + +static int add_tracepoint_fields_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct format_field *fields, + struct perf_sample *sample) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + ret = add_tracepoint_field_value(cw, event_class, event, sample, + field); + if (ret) + return -1; + } + return 0; +} + +static int add_tracepoint_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_values(cw, event_class, event, + common_fields, sample); + if (!ret) + ret = add_tracepoint_fields_values(cw, event_class, event, + fields, sample); + + return ret; +} + +static int add_generic_values(struct ctf_writer *cw, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 type = evsel->attr.sample_type; + int ret; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + + if (type & PERF_SAMPLE_IP) { + ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TID) { + ret = value_set_s32(cw, event, "perf_tid", sample->tid); + if (ret) + return -1; + + ret = value_set_s32(cw, event, "perf_pid", sample->pid); + if (ret) + return -1; + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) { + ret = value_set_u64(cw, event, "perf_id", sample->id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_CPU) { + ret = value_set_u32(cw, event, "perf_cpu", sample->cpu); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_PERIOD) { + ret = value_set_u64(cw, event, "perf_period", sample->period); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_WEIGHT) { + ret = value_set_u64(cw, event, "perf_weight", sample->weight); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_DATA_SRC) { + ret = value_set_u64(cw, event, "perf_data_src", + sample->data_src); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TRANSACTION) { + ret = value_set_u64(cw, event, "perf_transaction", + sample->transaction); + if (ret) + return -1; + } + + return 0; +} + +static int process_sample_event(struct perf_tool *tool, + union perf_event *_event __maybe_unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct convert *c = container_of(tool, struct convert, tool); + struct evsel_priv *priv = evsel->priv; + struct ctf_writer *cw = &c->writer; + struct bt_ctf_event_class *event_class; + struct bt_ctf_event *event; + int ret; + + if (WARN_ONCE(!priv, "Failed to setup all events.\n")) + return 0; + + event_class = priv->event_class; + + /* update stats */ + c->events_count++; + c->events_size += _event->header.size; + + pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count); + + event = bt_ctf_event_create(event_class); + if (!event) { + pr_err("Failed to create an CTF event\n"); + return -1; + } + + bt_ctf_clock_set_time(cw->clock, sample->time); + + ret = add_generic_values(cw, event, evsel, sample); + if (ret) + return -1; + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_values(cw, event_class, event, + evsel, sample); + if (ret) + return -1; + } + + bt_ctf_stream_append_event(cw->stream, event); + bt_ctf_event_put(event); + return 0; +} + +static int add_tracepoint_fields_types(struct ctf_writer *cw, + struct format_field *fields, + struct bt_ctf_event_class *event_class) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + struct bt_ctf_field_type *type; + unsigned long flags = field->flags; + + pr2(" field '%s'\n", field->name); + + type = get_tracepoint_field_type(cw, field); + if (!type) + return -1; + + /* + * A string is an array of chars. For this we use the string + * type and don't care that it is an array. What we don't + * support is an array of strings. + */ + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_ARRAY) + type = bt_ctf_field_type_array_create(type, field->arraylen); + + ret = bt_ctf_event_class_add_field(event_class, type, + field->name); + + if (flags & FIELD_IS_ARRAY) + bt_ctf_field_type_put(type); + + if (ret) { + pr_err("Failed to add field '%s\n", field->name); + return -1; + } + } + + return 0; +} + +static int add_tracepoint_types(struct ctf_writer *cw, + struct perf_evsel *evsel, + struct bt_ctf_event_class *class) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_types(cw, common_fields, class); + if (!ret) + ret = add_tracepoint_fields_types(cw, fields, class); + + return ret; +} + +static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, + struct bt_ctf_event_class *event_class) +{ + u64 type = evsel->attr.sample_type; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + +#define ADD_FIELD(cl, t, n) \ + do { \ + pr2(" field '%s'\n", n); \ + if (bt_ctf_event_class_add_field(cl, t, n)) { \ + pr_err("Failed to add field '%s;\n", n); \ + return -1; \ + } \ + } while (0) + + if (type & PERF_SAMPLE_IP) + ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip"); + + if (type & PERF_SAMPLE_TID) { + ADD_FIELD(event_class, cw->data.s32, "perf_tid"); + ADD_FIELD(event_class, cw->data.s32, "perf_pid"); + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) + ADD_FIELD(event_class, cw->data.u64, "perf_id"); + + if (type & PERF_SAMPLE_STREAM_ID) + ADD_FIELD(event_class, cw->data.u64, "perf_stream_id"); + + if (type & PERF_SAMPLE_CPU) + ADD_FIELD(event_class, cw->data.u32, "perf_cpu"); + + if (type & PERF_SAMPLE_PERIOD) + ADD_FIELD(event_class, cw->data.u64, "perf_period"); + + if (type & PERF_SAMPLE_WEIGHT) + ADD_FIELD(event_class, cw->data.u64, "perf_weight"); + + if (type & PERF_SAMPLE_DATA_SRC) + ADD_FIELD(event_class, cw->data.u64, "perf_data_src"); + + if (type & PERF_SAMPLE_TRANSACTION) + ADD_FIELD(event_class, cw->data.u64, "perf_transaction"); + +#undef ADD_FIELD + return 0; +} + +static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) +{ + struct bt_ctf_event_class *event_class; + struct evsel_priv *priv; + const char *name = perf_evsel__name(evsel); + int ret; + + pr("Adding event '%s' (type %d)\n", name, evsel->attr.type); + + event_class = bt_ctf_event_class_create(name); + if (!event_class) + return -1; + + ret = add_generic_types(cw, evsel, event_class); + if (ret) + goto err; + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_types(cw, evsel, event_class); + if (ret) + goto err; + } + + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); + if (ret) { + pr("Failed to add event class into stream.\n"); + goto err; + } + + priv = malloc(sizeof(*priv)); + if (!priv) + goto err; + + priv->event_class = event_class; + evsel->priv = priv; + return 0; + +err: + bt_ctf_event_class_put(event_class); + pr_err("Failed to add event '%s'.\n", name); + return -1; +} + +static int setup_events(struct ctf_writer *cw, struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + int ret; + + evlist__for_each(evlist, evsel) { + ret = add_event(cw, evsel); + if (ret) + return ret; + } + return 0; +} + +static int ctf_writer__setup_env(struct ctf_writer *cw, + struct perf_session *session) +{ + struct perf_header *header = &session->header; + struct bt_ctf_writer *writer = cw->writer; + +#define ADD(__n, __v) \ +do { \ + if (bt_ctf_writer_add_environment_field(writer, __n, __v)) \ + return -1; \ +} while (0) + + ADD("host", header->env.hostname); + ADD("sysname", "Linux"); + ADD("release", header->env.os_release); + ADD("version", header->env.version); + ADD("machine", header->env.arch); + ADD("domain", "kernel"); + ADD("tracer_name", "perf"); + +#undef ADD + return 0; +} + +static int ctf_writer__setup_clock(struct ctf_writer *cw) +{ + struct bt_ctf_clock *clock = cw->clock; + + bt_ctf_clock_set_description(clock, "perf clock"); + +#define SET(__n, __v) \ +do { \ + if (bt_ctf_clock_set_##__n(clock, __v)) \ + return -1; \ +} while (0) + + SET(frequency, 1000000000); + SET(offset_s, 0); + SET(offset, 0); + SET(precision, 10); + SET(is_absolute, 0); + +#undef SET + return 0; +} + +static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) +{ + struct bt_ctf_field_type *type; + + type = bt_ctf_field_type_integer_create(size); + if (!type) + return NULL; + + if (sign && + bt_ctf_field_type_integer_set_signed(type, 1)) + goto err; + + if (hex && + bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) + goto err; + + pr2("Created type: INTEGER %d-bit %ssigned %s\n", + size, sign ? "un" : "", hex ? "hex" : ""); + return type; + +err: + bt_ctf_field_type_put(type); + return NULL; +} + +static void ctf_writer__cleanup_data(struct ctf_writer *cw) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cw->data.array); i++) + bt_ctf_field_type_put(cw->data.array[i]); +} + +static int ctf_writer__init_data(struct ctf_writer *cw) +{ +#define CREATE_INT_TYPE(type, size, sign, hex) \ +do { \ + (type) = create_int_type(size, sign, hex); \ + if (!(type)) \ + goto err; \ +} while (0) + + CREATE_INT_TYPE(cw->data.s64, 64, true, false); + CREATE_INT_TYPE(cw->data.u64, 64, false, false); + CREATE_INT_TYPE(cw->data.s32, 32, true, false); + CREATE_INT_TYPE(cw->data.u32, 32, false, false); + CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true); + + cw->data.string = bt_ctf_field_type_string_create(); + if (cw->data.string) + return 0; + +err: + ctf_writer__cleanup_data(cw); + pr_err("Failed to create data types.\n"); + return -1; +} + +static void ctf_writer__cleanup(struct ctf_writer *cw) +{ + ctf_writer__cleanup_data(cw); + + bt_ctf_clock_put(cw->clock); + bt_ctf_stream_put(cw->stream); + bt_ctf_stream_class_put(cw->stream_class); + bt_ctf_writer_put(cw->writer); + + /* and NULL all the pointers */ + memset(cw, 0, sizeof(*cw)); +} + +static int ctf_writer__init(struct ctf_writer *cw, const char *path) +{ + struct bt_ctf_writer *writer; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_stream *stream; + struct bt_ctf_clock *clock; + + /* CTF writer */ + writer = bt_ctf_writer_create(path); + if (!writer) + goto err; + + cw->writer = writer; + + /* CTF clock */ + clock = bt_ctf_clock_create("perf_clock"); + if (!clock) { + pr("Failed to create CTF clock.\n"); + goto err_cleanup; + } + + cw->clock = clock; + + if (ctf_writer__setup_clock(cw)) { + pr("Failed to setup CTF clock.\n"); + goto err_cleanup; + } + + /* CTF stream class */ + stream_class = bt_ctf_stream_class_create("perf_stream"); + if (!stream_class) { + pr("Failed to create CTF stream class.\n"); + goto err_cleanup; + } + + cw->stream_class = stream_class; + + /* CTF clock stream setup */ + if (bt_ctf_stream_class_set_clock(stream_class, clock)) { + pr("Failed to assign CTF clock to stream class.\n"); + goto err_cleanup; + } + + if (ctf_writer__init_data(cw)) + goto err_cleanup; + + /* CTF stream instance */ + stream = bt_ctf_writer_create_stream(writer, stream_class); + if (!stream) { + pr("Failed to create CTF stream.\n"); + goto err_cleanup; + } + + cw->stream = stream; + + /* CTF clock writer setup */ + if (bt_ctf_writer_add_clock(writer, clock)) { + pr("Failed to assign CTF clock to writer.\n"); + goto err_cleanup; + } + + return 0; + +err_cleanup: + ctf_writer__cleanup(cw); +err: + pr_err("Failed to setup CTF writer.\n"); + return -1; +} + +int bt_convert__perf2ctf(const char *input, const char *path, bool force) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = input, + .mode = PERF_DATA_MODE_READ, + .force = force, + }; + struct convert c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, + }; + struct ctf_writer *cw = &c.writer; + int err = -1; + + /* CTF writer */ + if (ctf_writer__init(cw, path)) + return -1; + + /* perf.data session */ + session = perf_session__new(&file, 0, &c.tool); + if (!session) + goto free_writer; + + /* CTF writer env/clock setup */ + if (ctf_writer__setup_env(cw, session)) + goto free_session; + + /* CTF events setup */ + if (setup_events(cw, session)) + goto free_session; + + err = perf_session__process_events(session); + if (!err) + err = bt_ctf_stream_flush(cw->stream); + + fprintf(stderr, + "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", + file.path, path); + + fprintf(stderr, + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + (double) c.events_size / 1024.0 / 1024.0, + c.events_count); + + /* its all good */ +free_session: + perf_session__delete(session); + +free_writer: + ctf_writer__cleanup(cw); + return err; +} diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h new file mode 100644 index 000000000000..4c204342a9d8 --- /dev/null +++ b/tools/perf/util/data-convert-bt.h @@ -0,0 +1,8 @@ +#ifndef __DATA_CONVERT_BT_H +#define __DATA_CONVERT_BT_H +#ifdef HAVE_LIBBABELTRACE_SUPPORT + +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, bool force); + +#endif /* HAVE_LIBBABELTRACE_SUPPORT */ +#endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index c81dae399763..bb39a3ffc70b 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -282,13 +282,13 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) + struct addr_location *al) { + struct thread* thread = al->thread; struct export_sample es = { .event = event, .sample = sample, .evsel = evsel, - .thread = thread, .al = al, }; struct thread *main_thread; diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index adbd22d66798..25e22fd76aca 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -34,7 +34,6 @@ struct export_sample { union perf_event *event; struct perf_sample *sample; struct perf_evsel *evsel; - struct thread *thread; struct addr_location *al; u64 db_id; u64 comm_db_id; @@ -97,7 +96,7 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, const char *name); int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al); + struct addr_location *al); int db_export__branch_types(struct db_export *dbe); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ad60b2f20258..2da5581ec74d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -20,6 +20,7 @@ int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; +int debug_data_convert; static int _eprintf(int level, int var, const char *fmt, va_list args) { @@ -147,6 +148,7 @@ static struct debug_variable { { .name = "verbose", .ptr = &verbose }, { .name = "ordered-events", .ptr = &debug_ordered_events}, { .name = "stderr", .ptr = &redirect_to_stderr}, + { .name = "data-convert", .ptr = &debug_data_convert }, { .name = NULL, } }; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index be264d6f3b30..caac2fdc6105 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,6 +12,7 @@ extern int verbose; extern bool quiet, dump_trace; extern int debug_ordered_events; +extern int debug_data_convert; #ifndef pr_fmt #define pr_fmt(fmt) fmt diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 45be944d450a..fc0ddd5792a9 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -45,13 +45,13 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__DEBUGLINK: { char *debuglink; - strncpy(filename, dso->long_name, size); - debuglink = filename + dso->long_name_len; + len = __symbol__join_symfs(filename, size, dso->long_name); + debuglink = filename + len; while (debuglink != filename && *debuglink != '/') debuglink--; if (*debuglink == '/') debuglink++; - ret = filename__read_debuglink(dso->long_name, debuglink, + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } break; @@ -148,6 +148,9 @@ static const struct { #ifdef HAVE_ZLIB_SUPPORT { "gz", gzip_decompress_to_file }, #endif +#ifdef HAVE_LZMA_SUPPORT + { "xz", lzma_decompress_to_file }, +#endif { NULL, NULL }, }; @@ -162,32 +165,14 @@ bool is_supported_compression(const char *ext) return false; } -bool is_kmodule_extension(const char *ext) +bool is_kernel_module(const char *pathname) { - if (strncmp(ext, "ko", 2)) - return false; - - if (ext[2] == '\0' || (ext[2] == '.' && is_supported_compression(ext+3))) - return true; - - return false; -} - -bool is_kernel_module(const char *pathname, bool *compressed) -{ - const char *ext = strrchr(pathname, '.'); - - if (ext == NULL) - return false; + struct kmod_path m; - if (is_supported_compression(ext + 1)) { - if (compressed) - *compressed = true; - ext -= 3; - } else if (compressed) - *compressed = false; + if (kmod_path__parse(&m, pathname)) + return NULL; - return is_kmodule_extension(ext + 1); + return m.kmod; } bool decompress_to_file(const char *ext, const char *filename, int output_fd) @@ -209,6 +194,72 @@ bool dso__needs_decompress(struct dso *dso) } /* + * Parses kernel module specified in @path and updates + * @m argument like: + * + * @comp - true if @path contains supported compression suffix, + * false otherwise + * @kmod - true if @path contains '.ko' suffix in right position, + * false otherwise + * @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name + * of the kernel module without suffixes, otherwise strudup-ed + * base name of @path + * @ext - if (@alloc_ext && @comp) is true, it contains strdup-ed string + * the compression suffix + * + * Returns 0 if there's no strdup error, -ENOMEM otherwise. + */ +int __kmod_path__parse(struct kmod_path *m, const char *path, + bool alloc_name, bool alloc_ext) +{ + const char *name = strrchr(path, '/'); + const char *ext = strrchr(path, '.'); + + memset(m, 0x0, sizeof(*m)); + name = name ? name + 1 : path; + + /* No extension, just return name. */ + if (ext == NULL) { + if (alloc_name) { + m->name = strdup(name); + return m->name ? 0 : -ENOMEM; + } + return 0; + } + + if (is_supported_compression(ext + 1)) { + m->comp = true; + ext -= 3; + } + + /* Check .ko extension only if there's enough name left. */ + if (ext > name) + m->kmod = !strncmp(ext, ".ko", 3); + + if (alloc_name) { + if (m->kmod) { + if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1) + return -ENOMEM; + } else { + if (asprintf(&m->name, "%s", name) == -1) + return -ENOMEM; + } + + strxfrchar(m->name, '-', '_'); + } + + if (alloc_ext && m->comp) { + m->ext = strdup(ext + 4); + if (!m->ext) { + free((void *) m->name); + return -ENOMEM; + } + } + + return 0; +} + +/* * Global list of open DSOs and the counter. */ static LIST_HEAD(dso__data_open); @@ -240,7 +291,7 @@ static int do_open(char *name) if (fd >= 0) return fd; - pr_debug("dso open failed, mmap: %s\n", + pr_debug("dso open failed: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); if (!dso__data_open_cnt || errno != EMFILE) break; @@ -532,12 +583,8 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) break; cache_offset = offset & DSO__DATA_CACHE_MASK; - ret = -EINVAL; - - if (-1 == lseek(dso->data.fd, cache_offset, SEEK_SET)) - break; - ret = read(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE); + ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset); if (ret <= 0) break; @@ -1006,21 +1053,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name, return dso__find_by_longname(&dsos->root, name); } -struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +struct dso *dsos__addnew(struct dsos *dsos, const char *name) { - struct dso *dso = dsos__find(dsos, name, false); + struct dso *dso = dso__new(name); - if (!dso) { - dso = dso__new(name); - if (dso != NULL) { - dsos__add(dsos, dso); - dso__set_basename(dso); - } + if (dso != NULL) { + dsos__add(dsos, dso); + dso__set_basename(dso); } - return dso; } +struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +{ + struct dso *dso = dsos__find(dsos, name, false); + + return dso ? dso : dsos__addnew(dsos, name); +} + size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm) { @@ -1087,3 +1137,36 @@ enum dso_type dso__type(struct dso *dso, struct machine *machine) return dso__type_fd(fd); } + +int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) +{ + int idx, errnum = dso->load_errno; + /* + * This must have a same ordering as the enum dso_load_errno. + */ + static const char *dso_load__error_str[] = { + "Internal tools/perf/ library error", + "Invalid ELF file", + "Can not read build id", + "Mismatching build id", + "Decompression failure", + }; + + BUG_ON(buflen == 0); + + if (errnum >= 0) { + const char *err = strerror_r(errnum, buf, buflen); + + if (err != buf) + scnprintf(buf, buflen, "%s", err); + + return 0; + } + + if (errnum < __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END) + return -1; + + idx = errnum - __DSO_LOAD_ERRNO__START; + scnprintf(buf, buflen, "%s", dso_load__error_str[idx]); + return 0; +} diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 3782c82c6e44..e0901b4ed8de 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -60,6 +60,31 @@ enum dso_type { DSO__TYPE_X32BIT, }; +enum dso_load_errno { + DSO_LOAD_ERRNO__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __DSO_LOAD_ERRNO__START = -10000, + + DSO_LOAD_ERRNO__INTERNAL_ERROR = __DSO_LOAD_ERRNO__START, + + /* for symsrc__init() */ + DSO_LOAD_ERRNO__INVALID_ELF, + DSO_LOAD_ERRNO__CANNOT_READ_BUILDID, + DSO_LOAD_ERRNO__MISMATCHING_BUILDID, + + /* for decompress_kmodule */ + DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE, + + __DSO_LOAD_ERRNO__END, +}; + #define DSO__SWAP(dso, type, val) \ ({ \ type ____r = val; \ @@ -113,6 +138,7 @@ struct dso { enum dso_swap_type needs_swap; enum dso_binary_type symtab_type; enum dso_binary_type binary_type; + enum dso_load_errno load_errno; u8 adjust_symbols:1; u8 has_build_id:1; u8 has_srcline:1; @@ -139,6 +165,8 @@ struct dso { u32 status_seen; size_t file_size; struct list_head open_entry; + u64 debug_frame_offset; + u64 eh_frame_hdr_offset; } data; union { /* Tool specific area */ @@ -188,11 +216,24 @@ char dso__symtab_origin(const struct dso *dso); int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size); bool is_supported_compression(const char *ext); -bool is_kmodule_extension(const char *ext); -bool is_kernel_module(const char *pathname, bool *compressed); +bool is_kernel_module(const char *pathname); bool decompress_to_file(const char *ext, const char *filename, int output_fd); bool dso__needs_decompress(struct dso *dso); +struct kmod_path { + char *name; + char *ext; + bool comp; + bool kmod; +}; + +int __kmod_path__parse(struct kmod_path *m, const char *path, + bool alloc_name, bool alloc_ext); + +#define kmod_path__parse(__m, __p) __kmod_path__parse(__m, __p, false, false) +#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false) +#define kmod_path__parse_ext(__m, __p) __kmod_path__parse(__m, __p, false, true) + /* * The dso__data_* external interface provides following functions: * dso__data_fd @@ -248,6 +289,7 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name, const char *short_name, int dso_type); void dsos__add(struct dsos *dsos, struct dso *dso); +struct dso *dsos__addnew(struct dsos *dsos, const char *name); struct dso *dsos__find(const struct dsos *dsos, const char *name, bool cmp_short); struct dso *__dsos__findnew(struct dsos *dsos, const char *name); @@ -278,4 +320,6 @@ void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); +int dso__strerror_load(struct dso *dso, char *buf, size_t buflen); + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index cc66c4049e09..c34e024020c7 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -278,6 +278,21 @@ bool die_is_func_def(Dwarf_Die *dw_die) } /** + * die_is_func_instance - Ensure that this DIE is an instance of a subprogram + * @dw_die: a DIE + * + * Ensure that this DIE is an instance (which has an entry address). + * This returns true if @dw_die is a function instance. If not, you need to + * call die_walk_instances() to find actual instances. + **/ +bool die_is_func_instance(Dwarf_Die *dw_die) +{ + Dwarf_Addr tmp; + + /* Actually gcc optimizes non-inline as like as inlined */ + return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0; +} +/** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure * @offs: The offset of the member in the data structure @@ -786,10 +801,16 @@ static int __die_find_member_cb(Dwarf_Die *die_mem, void *data) { const char *name = data; - if ((dwarf_tag(die_mem) == DW_TAG_member) && - die_compare_name(die_mem, name)) - return DIE_FIND_CB_END; - + if (dwarf_tag(die_mem) == DW_TAG_member) { + if (die_compare_name(die_mem, name)) + return DIE_FIND_CB_END; + else if (!dwarf_diename(die_mem)) { /* Unnamed structure */ + Dwarf_Die type_die, tmp_die; + if (die_get_type(die_mem, &type_die) && + die_find_member(&type_die, name, &tmp_die)) + return DIE_FIND_CB_END; + } + } return DIE_FIND_CB_SIBLING; } diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index b4fe90c6cb2d..af7dbcd5f929 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -41,6 +41,9 @@ extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, /* Ensure that this DIE is a subprogram and definition (not declaration) */ extern bool die_is_func_def(Dwarf_Die *dw_die); +/* Ensure that this DIE is an instance of a subprogram */ +extern bool die_is_func_instance(Dwarf_Die *dw_die); + /* Compare diename and tname */ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6c6d044e959a..ff866c4d2e2f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -49,72 +49,103 @@ static struct perf_sample synth_sample = { .period = 1, }; -static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len) +/* + * Assumes that the first 4095 bytes of /proc/pid/stat contains + * the comm, tgid and ppid. + */ +static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, + pid_t *tgid, pid_t *ppid) { char filename[PATH_MAX]; - char bf[BUFSIZ]; - FILE *fp; - size_t size = 0; - pid_t tgid = -1; + char bf[4096]; + int fd; + size_t size = 0, n; + char *nl, *name, *tgids, *ppids; + + *tgid = -1; + *ppid = -1; snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - fp = fopen(filename, "r"); - if (fp == NULL) { + fd = open(filename, O_RDONLY); + if (fd < 0) { pr_debug("couldn't open %s\n", filename); - return 0; + return -1; } - while (!comm[0] || (tgid < 0)) { - if (fgets(bf, sizeof(bf), fp) == NULL) { - pr_warning("couldn't get COMM and pgid, malformed %s\n", - filename); - break; - } + n = read(fd, bf, sizeof(bf) - 1); + close(fd); + if (n <= 0) { + pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", + pid); + return -1; + } + bf[n] = '\0'; - if (memcmp(bf, "Name:", 5) == 0) { - char *name = bf + 5; - while (*name && isspace(*name)) - ++name; - size = strlen(name) - 1; - if (size >= len) - size = len - 1; - memcpy(comm, name, size); - comm[size] = '\0'; - - } else if (memcmp(bf, "Tgid:", 5) == 0) { - char *tgids = bf + 5; - while (*tgids && isspace(*tgids)) - ++tgids; - tgid = atoi(tgids); - } + name = strstr(bf, "Name:"); + tgids = strstr(bf, "Tgid:"); + ppids = strstr(bf, "PPid:"); + + if (name) { + name += 5; /* strlen("Name:") */ + + while (*name && isspace(*name)) + ++name; + + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + + size = strlen(name); + if (size >= len) + size = len - 1; + memcpy(comm, name, size); + comm[size] = '\0'; + } else { + pr_debug("Name: string not found for pid %d\n", pid); } - fclose(fp); + if (tgids) { + tgids += 5; /* strlen("Tgid:") */ + *tgid = atoi(tgids); + } else { + pr_debug("Tgid: string not found for pid %d\n", pid); + } - return tgid; + if (ppids) { + ppids += 5; /* strlen("PPid:") */ + *ppid = atoi(ppids); + } else { + pr_debug("PPid: string not found for pid %d\n", pid); + } + + return 0; } -static pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, - struct machine *machine) +static int perf_event__prepare_comm(union perf_event *event, pid_t pid, + struct machine *machine, + pid_t *tgid, pid_t *ppid) { size_t size; - pid_t tgid; + + *ppid = -1; memset(&event->comm, 0, sizeof(event->comm)); - if (machine__is_host(machine)) - tgid = perf_event__get_comm_tgid(pid, event->comm.comm, - sizeof(event->comm.comm)); - else - tgid = machine->pid; + if (machine__is_host(machine)) { + if (perf_event__get_comm_ids(pid, event->comm.comm, + sizeof(event->comm.comm), + tgid, ppid) != 0) { + return -1; + } + } else { + *tgid = machine->pid; + } - if (tgid < 0) - goto out; + if (*tgid < 0) + return -1; - event->comm.pid = tgid; + event->comm.pid = *tgid; event->comm.header.type = PERF_RECORD_COMM; size = strlen(event->comm.comm) + 1; @@ -125,23 +156,45 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, machine->id_hdr_size); event->comm.tid = pid; + return 0; +} + +static pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine) +{ + pid_t tgid, ppid; + + if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) + return -1; + if (process(tool, event, &synth_sample, machine) != 0) return -1; -out: return tgid; } static int perf_event__synthesize_fork(struct perf_tool *tool, - union perf_event *event, pid_t pid, - pid_t tgid, perf_event__handler_t process, + union perf_event *event, + pid_t pid, pid_t tgid, pid_t ppid, + perf_event__handler_t process, struct machine *machine) { memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); - /* this is really a clone event but we use fork to synthesize it */ - event->fork.ppid = tgid; - event->fork.ptid = tgid; + /* + * for main thread set parent to ppid from status file. For other + * threads set parent pid to main thread. ie., assume main thread + * spawns all threads in a process + */ + if (tgid == pid) { + event->fork.ppid = ppid; + event->fork.ptid = ppid; + } else { + event->fork.ppid = tgid; + event->fork.ptid = tgid; + } event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; @@ -333,7 +386,8 @@ static int __event__synthesize_thread(union perf_event *comm_event, char filename[PATH_MAX]; DIR *tasks; struct dirent dirent, *next; - pid_t tgid; + pid_t tgid, ppid; + int rc = 0; /* special case: only send one comm event using passed in pid */ if (!full) { @@ -361,34 +415,38 @@ static int __event__synthesize_thread(union perf_event *comm_event, while (!readdir_r(tasks, &dirent, &next) && next) { char *end; - int rc = 0; pid_t _pid; _pid = strtol(dirent.d_name, &end, 10); if (*end) continue; - tgid = perf_event__synthesize_comm(tool, comm_event, _pid, - process, machine); - if (tgid == -1) - return -1; + rc = -1; + if (perf_event__prepare_comm(comm_event, _pid, machine, + &tgid, &ppid) != 0) + break; + if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + ppid, process, machine) < 0) + break; + /* + * Send the prepared comm event + */ + if (process(tool, comm_event, &synth_sample, machine) != 0) + break; + + rc = 0; if (_pid == pid) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, process, machine, mmap_data); - } else { - /* only fork the tid's map, to save time */ - rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid, - process, machine); + if (rc) + break; } - - if (rc) - return rc; } closedir(tasks); - return 0; + return rc; } int perf_event__synthesize_thread_map(struct perf_tool *tool, @@ -615,7 +673,7 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) else s = ""; - return fprintf(fp, "%s: %s:%d\n", s, event->comm.comm, event->comm.tid); + return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } int perf_event__process_comm(struct perf_tool *tool __maybe_unused, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c4ffe2bd0738..09b9e8d3fcf7 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -242,7 +242,6 @@ struct events_stats { u32 nr_invalid_chains; u32 nr_unknown_id; u32 nr_unprocessable_samples; - u32 nr_unordered_events; }; struct attr_event { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index cbab1fb77b1d..080be93eea96 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -7,7 +7,6 @@ * Released under the GPL v2. (and only v2, not any later version) */ #include "util.h" -#include <api/fs/debugfs.h> #include <api/fs/fs.h> #include <poll.h> #include "cpumap.h" @@ -635,8 +634,8 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; - unsigned int head = perf_mmap__read_head(md); - unsigned int old = md->prev; + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; unsigned char *data = md->base + page_size; union perf_event *event = NULL; @@ -696,7 +695,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) static bool perf_mmap__empty(struct perf_mmap *md) { - return perf_mmap__read_head(md) != md->prev; + return perf_mmap__read_head(md) == md->prev; } static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) @@ -717,7 +716,7 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) struct perf_mmap *md = &evlist->mmap[idx]; if (!evlist->overwrite) { - unsigned int old = md->prev; + u64 old = md->prev; perf_mmap__write_tail(md, old); } @@ -1051,7 +1050,7 @@ out_delete_threads: return -1; } -int perf_evlist__apply_filters(struct perf_evlist *evlist) +int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; int err = 0; @@ -1063,8 +1062,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist) continue; err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); - if (err) + if (err) { + *err_evsel = evsel; break; + } } return err; @@ -1086,6 +1087,38 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) return err; } +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter; + int ret = -1; + size_t i; + + for (i = 0; i < npids; ++i) { + if (i == 0) { + if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) + return -1; + } else { + char *tmp; + + if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) + goto out_free; + + free(filter); + filter = tmp; + } + } + + ret = perf_evlist__set_filter(evlist, filter); +out_free: + free(filter); + return ret; +} + +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +{ + return perf_evlist__set_filter_pids(evlist, 1, &pid); +} + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { struct perf_evsel *pos; @@ -1329,7 +1362,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar * writing exactly one byte, in workload.cork_fd, usually via * perf_evlist__start_workload(). * - * For cancelling the workload without actuallin running it, + * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() * here. @@ -1436,33 +1469,6 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n"); } -int perf_evlist__strerror_tp(struct perf_evlist *evlist __maybe_unused, - int err, char *buf, size_t size) -{ - char sbuf[128]; - - switch (err) { - case ENOENT: - scnprintf(buf, size, "%s", - "Error:\tUnable to find debugfs\n" - "Hint:\tWas your kernel was compiled with debugfs support?\n" - "Hint:\tIs the debugfs filesystem mounted?\n" - "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'"); - break; - case EACCES: - scnprintf(buf, size, - "Error:\tNo permissions to read %s/tracing/events/raw_syscalls\n" - "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n", - debugfs_mountpoint, debugfs_mountpoint); - break; - default: - scnprintf(buf, size, "%s", strerror_r(err, sbuf, sizeof(sbuf))); - break; - } - - return 0; -} - int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, int err, char *buf, size_t size) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 0ba93f67ab94..b5cce95d644e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -27,8 +27,8 @@ struct perf_mmap { void *base; int mask; int refcnt; - unsigned int prev; - char event_copy[PERF_SAMPLE_MAX_SIZE]; + u64 prev; + char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); }; struct perf_evlist { @@ -51,6 +51,7 @@ struct perf_evlist { struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; + struct events_stats stats; }; struct perf_evsel_str_handler { @@ -77,6 +78,8 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); @@ -149,7 +152,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); -int perf_evlist__apply_filters(struct perf_evlist *evlist); +int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); @@ -183,20 +186,18 @@ static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist) size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); -int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); -static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) +static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; - int head = ACCESS_ONCE(pc->data_head); + u64 head = ACCESS_ONCE(pc->data_head); rmb(); return head; } -static inline void perf_mmap__write_tail(struct perf_mmap *md, - unsigned long tail) +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) { struct perf_event_mmap_page *pc = md->base; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1e90c8557ede..33e3fd8c2e68 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -32,8 +32,12 @@ static struct { bool exclude_guest; bool mmap2; bool cloexec; + bool clockid; + bool clockid_wrong; } perf_missing_features; +static clockid_t clockid; + static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused) { return 0; @@ -537,13 +541,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) } static void -perf_evsel__config_callgraph(struct perf_evsel *evsel) +perf_evsel__config_callgraph(struct perf_evsel *evsel, + struct record_opts *opts) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); + if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (!opts->branch_stack) { + if (attr->exclude_user) { + pr_warning("LBR callstack option is only available " + "to get user callchain information. " + "Falling back to framepointers.\n"); + } else { + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK; + } + } else + pr_warning("Cannot use LBR callstack with branch stack. " + "Falling back to framepointers.\n"); + } + if (callchain_param.record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); @@ -667,7 +688,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel); + perf_evsel__config_callgraph(evsel, opts); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; @@ -709,6 +730,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_weight) perf_evsel__set_sample_bit(evsel, WEIGHT); + attr->task = track; attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; @@ -716,6 +738,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); + if (opts->running_time) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + /* * XXX see the function comment above * @@ -737,6 +765,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->disabled = 0; attr->enable_on_exec = 0; } + + clockid = opts->clockid; + if (opts->use_clockid) { + attr->use_clockid = 1; + attr->clockid = opts->clockid; + } } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -797,6 +831,9 @@ int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { + if (ncpus == 0 || nthreads == 0) + return 0; + if (evsel->system_wide) nthreads = 1; @@ -974,67 +1011,126 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) return fd; } -#define __PRINT_ATTR(fmt, cast, field) \ - fprintf(fp, " %-19s "fmt"\n", #field, cast attr->field) - -#define PRINT_ATTR_U32(field) __PRINT_ATTR("%u" , , field) -#define PRINT_ATTR_X32(field) __PRINT_ATTR("%#x", , field) -#define PRINT_ATTR_U64(field) __PRINT_ATTR("%" PRIu64, (uint64_t), field) -#define PRINT_ATTR_X64(field) __PRINT_ATTR("%#"PRIx64, (uint64_t), field) - -#define PRINT_ATTR2N(name1, field1, name2, field2) \ - fprintf(fp, " %-19s %u %-19s %u\n", \ - name1, attr->field1, name2, attr->field2) - -#define PRINT_ATTR2(field1, field2) \ - PRINT_ATTR2N(#field1, field1, #field2, field2) - -static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) -{ - size_t ret = 0; - - ret += fprintf(fp, "%.60s\n", graph_dotted_line); - ret += fprintf(fp, "perf_event_attr:\n"); - - ret += PRINT_ATTR_U32(type); - ret += PRINT_ATTR_U32(size); - ret += PRINT_ATTR_X64(config); - ret += PRINT_ATTR_U64(sample_period); - ret += PRINT_ATTR_U64(sample_freq); - ret += PRINT_ATTR_X64(sample_type); - ret += PRINT_ATTR_X64(read_format); - - ret += PRINT_ATTR2(disabled, inherit); - ret += PRINT_ATTR2(pinned, exclusive); - ret += PRINT_ATTR2(exclude_user, exclude_kernel); - ret += PRINT_ATTR2(exclude_hv, exclude_idle); - ret += PRINT_ATTR2(mmap, comm); - ret += PRINT_ATTR2(mmap2, comm_exec); - ret += PRINT_ATTR2(freq, inherit_stat); - ret += PRINT_ATTR2(enable_on_exec, task); - ret += PRINT_ATTR2(watermark, precise_ip); - ret += PRINT_ATTR2(mmap_data, sample_id_all); - ret += PRINT_ATTR2(exclude_host, exclude_guest); - ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel, - "excl.callchain_user", exclude_callchain_user); - - ret += PRINT_ATTR_U32(wakeup_events); - ret += PRINT_ATTR_U32(wakeup_watermark); - ret += PRINT_ATTR_X32(bp_type); - ret += PRINT_ATTR_X64(bp_addr); - ret += PRINT_ATTR_X64(config1); - ret += PRINT_ATTR_U64(bp_len); - ret += PRINT_ATTR_X64(config2); - ret += PRINT_ATTR_X64(branch_sample_type); - ret += PRINT_ATTR_X64(sample_regs_user); - ret += PRINT_ATTR_U32(sample_stack_user); - ret += PRINT_ATTR_X64(sample_regs_intr); - - ret += fprintf(fp, "%.60s\n", graph_dotted_line); +struct bit_names { + int bit; + const char *name; +}; + +static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) +{ + bool first_bit = true; + int i = 0; + + do { + if (value & bits[i].bit) { + buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); + first_bit = false; + } + } while (bits[++i].name != NULL); +} + +static void __p_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_##n, #n } + struct bit_names bits[] = { + bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), + bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), + bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), + bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), + bit_name(IDENTIFIER), bit_name(REGS_INTR), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +static void __p_read_format(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_FORMAT_##n, #n } + struct bit_names bits[] = { + bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), + bit_name(ID), bit_name(GROUP), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +#define BUF_SIZE 1024 + +#define p_hex(val) snprintf(buf, BUF_SIZE, "%"PRIx64, (uint64_t)(val)) +#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) +#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) +#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) +#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) + +#define PRINT_ATTRn(_n, _f, _p) \ +do { \ + if (attr->_f) { \ + _p(attr->_f); \ + ret += attr__fprintf(fp, _n, buf, priv);\ + } \ +} while (0) + +#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv) +{ + char buf[BUF_SIZE]; + int ret = 0; + + PRINT_ATTRf(type, p_unsigned); + PRINT_ATTRf(size, p_unsigned); + PRINT_ATTRf(config, p_hex); + PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); + PRINT_ATTRf(sample_type, p_sample_type); + PRINT_ATTRf(read_format, p_read_format); + + PRINT_ATTRf(disabled, p_unsigned); + PRINT_ATTRf(inherit, p_unsigned); + PRINT_ATTRf(pinned, p_unsigned); + PRINT_ATTRf(exclusive, p_unsigned); + PRINT_ATTRf(exclude_user, p_unsigned); + PRINT_ATTRf(exclude_kernel, p_unsigned); + PRINT_ATTRf(exclude_hv, p_unsigned); + PRINT_ATTRf(exclude_idle, p_unsigned); + PRINT_ATTRf(mmap, p_unsigned); + PRINT_ATTRf(comm, p_unsigned); + PRINT_ATTRf(freq, p_unsigned); + PRINT_ATTRf(inherit_stat, p_unsigned); + PRINT_ATTRf(enable_on_exec, p_unsigned); + PRINT_ATTRf(task, p_unsigned); + PRINT_ATTRf(watermark, p_unsigned); + PRINT_ATTRf(precise_ip, p_unsigned); + PRINT_ATTRf(mmap_data, p_unsigned); + PRINT_ATTRf(sample_id_all, p_unsigned); + PRINT_ATTRf(exclude_host, p_unsigned); + PRINT_ATTRf(exclude_guest, p_unsigned); + PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); + PRINT_ATTRf(exclude_callchain_user, p_unsigned); + PRINT_ATTRf(mmap2, p_unsigned); + PRINT_ATTRf(comm_exec, p_unsigned); + PRINT_ATTRf(use_clockid, p_unsigned); + + PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); + PRINT_ATTRf(bp_type, p_unsigned); + PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); + PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); + PRINT_ATTRf(sample_regs_user, p_hex); + PRINT_ATTRf(sample_stack_user, p_unsigned); + PRINT_ATTRf(clockid, p_signed); + PRINT_ATTRf(sample_regs_intr, p_hex); return ret; } +static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __attribute__((unused))) +{ + return fprintf(fp, " %-32s %s\n", name, val); +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1058,6 +1154,12 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } fallback_missing_features: + if (perf_missing_features.clockid_wrong) + evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */ + if (perf_missing_features.clockid) { + evsel->attr.use_clockid = 0; + evsel->attr.clockid = 0; + } if (perf_missing_features.cloexec) flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) @@ -1068,8 +1170,12 @@ retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; - if (verbose >= 2) - perf_event_attr__fprintf(&evsel->attr, stderr); + if (verbose >= 2) { + fprintf(stderr, "%.60s\n", graph_dotted_line); + fprintf(stderr, "perf_event_attr:\n"); + perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL); + fprintf(stderr, "%.60s\n", graph_dotted_line); + } for (cpu = 0; cpu < cpus->nr; cpu++) { @@ -1095,6 +1201,17 @@ retry_open: goto try_fallback; } set_rlimit = NO_CHANGE; + + /* + * If we succeeded but had to kill clockid, fail and + * have perf_evsel__open_strerror() print us a nice + * error. + */ + if (perf_missing_features.clockid || + perf_missing_features.clockid_wrong) { + err = -EINVAL; + goto out_close; + } } } @@ -1128,7 +1245,17 @@ try_fallback: if (err != -EINVAL || cpu > 0 || thread > 0) goto out_close; - if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. + */ + if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { + perf_missing_features.clockid_wrong = true; + goto fallback_missing_features; + } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) { + perf_missing_features.clockid = true; + goto fallback_missing_features; + } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { perf_missing_features.cloexec = true; goto fallback_missing_features; } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) { @@ -1888,7 +2015,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, value = *(u32 *)ptr; break; case 8: - value = *(u64 *)ptr; + memcpy(&value, ptr, sizeof(u64)); break; default: return 0; @@ -1929,62 +2056,9 @@ static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) return ret; } -static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value) +static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv) { - if (value == 0) - return 0; - - return comma_fprintf(fp, first, " %s: %" PRIu64, field, value); -} - -#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field) - -struct bit_names { - int bit; - const char *name; -}; - -static int bits__fprintf(FILE *fp, const char *field, u64 value, - struct bit_names *bits, bool *first) -{ - int i = 0, printed = comma_fprintf(fp, first, " %s: ", field); - bool first_bit = true; - - do { - if (value & bits[i].bit) { - printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name); - first_bit = false; - } - } while (bits[++i].name != NULL); - - return printed; -} - -static int sample_type__fprintf(FILE *fp, bool *first, u64 value) -{ -#define bit_name(n) { PERF_SAMPLE_##n, #n } - struct bit_names bits[] = { - bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), - bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), - bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), - bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), - { .name = NULL, } - }; -#undef bit_name - return bits__fprintf(fp, "sample_type", value, bits, first); -} - -static int read_format__fprintf(FILE *fp, bool *first, u64 value) -{ -#define bit_name(n) { PERF_FORMAT_##n, #n } - struct bit_names bits[] = { - bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), - bit_name(ID), bit_name(GROUP), - { .name = NULL, } - }; -#undef bit_name - return bits__fprintf(fp, "read_format", value, bits, first); + return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); } int perf_evsel__fprintf(struct perf_evsel *evsel, @@ -2013,47 +2087,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, printed += fprintf(fp, "%s", perf_evsel__name(evsel)); - if (details->verbose || details->freq) { + if (details->verbose) { + printed += perf_event_attr__fprintf(fp, &evsel->attr, + __print_attr__fprintf, &first); + } else if (details->freq) { printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, (u64)evsel->attr.sample_freq); } - - if (details->verbose) { - if_print(type); - if_print(config); - if_print(config1); - if_print(config2); - if_print(size); - printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type); - if (evsel->attr.read_format) - printed += read_format__fprintf(fp, &first, evsel->attr.read_format); - if_print(disabled); - if_print(inherit); - if_print(pinned); - if_print(exclusive); - if_print(exclude_user); - if_print(exclude_kernel); - if_print(exclude_hv); - if_print(exclude_idle); - if_print(mmap); - if_print(mmap2); - if_print(comm); - if_print(comm_exec); - if_print(freq); - if_print(inherit_stat); - if_print(enable_on_exec); - if_print(task); - if_print(watermark); - if_print(precise_ip); - if_print(mmap_data); - if_print(sample_id_all); - if_print(exclude_host); - if_print(exclude_guest); - if_print(__reserved_1); - if_print(wakeup_events); - if_print(bp_type); - if_print(branch_sample_type); - } out: fputc('\n', fp); return ++printed; @@ -2131,6 +2171,12 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "The PMU counters are busy/taken by another profiler.\n" "We found oprofile daemon running, please stop it and try again."); break; + case EINVAL: + if (perf_missing_features.clockid) + return scnprintf(msg, size, "clockid feature not supported."); + if (perf_missing_features.clockid_wrong) + return scnprintf(msg, size, "wrong clockid (%d).", clockid); + break; default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 38622747d130..e486151b0308 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -335,6 +335,7 @@ struct perf_attr_details { bool freq; bool verbose; bool event_group; + bool force; }; int perf_evsel__fprintf(struct perf_evsel *evsel, @@ -355,4 +356,14 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) +static inline bool has_branch_callstack(struct perf_evsel *evsel) +{ + return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} + +typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b20e40c74468..918fd8ae2d80 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1055,6 +1055,12 @@ error: goto out; } +static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __attribute__((unused))) +{ + return fprintf(fp, ", %s = %s", name, val); +} + static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) { struct perf_evsel *evsel, *events = read_event_desc(ph, fd); @@ -1069,26 +1075,6 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) for (evsel = events; evsel->attr.size; evsel++) { fprintf(fp, "# event : name = %s, ", evsel->name); - fprintf(fp, "type = %d, config = 0x%"PRIx64 - ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, - evsel->attr.type, - (u64)evsel->attr.config, - (u64)evsel->attr.config1, - (u64)evsel->attr.config2); - - fprintf(fp, ", excl_usr = %d, excl_kern = %d", - evsel->attr.exclude_user, - evsel->attr.exclude_kernel); - - fprintf(fp, ", excl_host = %d, excl_guest = %d", - evsel->attr.exclude_host, - evsel->attr.exclude_guest); - - fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); - - fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2); - fprintf(fp, ", attr_mmap = %d", evsel->attr.mmap); - fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data); if (evsel->ids) { fprintf(fp, ", id = {"); for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { @@ -1099,6 +1085,8 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) fprintf(fp, " }"); } + perf_event_attr__fprintf(fp, &evsel->attr, __desc_attr__fprintf, NULL); + fputc('\n', fp); } @@ -1266,7 +1254,7 @@ static int __event_process_build_id(struct build_id_event *bev, dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, NULL)) + if (!is_kernel_module(filename)) dso->kernel = dso_type; build_id__sprintf(dso->build_id, sizeof(dso->build_id), @@ -2237,6 +2225,7 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz, * - unique number to identify actual perf.data files * - encode endianness of file */ + ph->version = PERF_HEADER_VERSION_2; /* check magic number with one endianness */ if (magic == __perf_magic2) @@ -2247,7 +2236,6 @@ static int check_magic_endian(u64 magic, uint64_t hdr_sz, return -1; ph->needs_swap = true; - ph->version = PERF_HEADER_VERSION_2; return 0; } @@ -2516,8 +2504,11 @@ int perf_session__read_header(struct perf_session *session) if (read_attr(fd, header, &f_attr) < 0) goto out_errno; - if (header->needs_swap) + if (header->needs_swap) { + f_attr.ids.size = bswap_64(f_attr.ids.size); + f_attr.ids.offset = bswap_64(f_attr.ids.offset); perf_event__attr_swap(&f_attr.attr); + } tmp = lseek(fd, 0, SEEK_CUR); evsel = perf_evsel__new(&f_attr.attr); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6e88b9e395df..cc22b9158b93 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -6,6 +6,7 @@ #include "evlist.h" #include "evsel.h" #include "annotate.h" +#include "ui/progress.h" #include <math.h> static bool hists__filter_entry_by_dso(struct hists *hists, @@ -240,6 +241,20 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) return he->stat.period == 0; } +static void hists__delete_entry(struct hists *hists, struct hist_entry *he) +{ + rb_erase(&he->rb_node, &hists->entries); + + if (sort__need_collapse) + rb_erase(&he->rb_node_in, &hists->entries_collapsed); + + --hists->nr_entries; + if (!he->filtered) + --hists->nr_non_filtered_entries; + + hist_entry__delete(he); +} + void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) { struct rb_node *next = rb_first(&hists->entries); @@ -248,25 +263,10 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - /* - * We may be annotating this, for instance, so keep it here in - * case some it gets new samples, we'll eventually free it when - * the user stops browsing and it agains gets fully decayed. - */ if (((zap_user && n->level == '.') || (zap_kernel && n->level != '.') || - hists__decay_entry(hists, n)) && - !n->used) { - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__free(n); + hists__decay_entry(hists, n))) { + hists__delete_entry(hists, n); } } } @@ -280,16 +280,7 @@ void hists__delete_entries(struct hists *hists) n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - rb_erase(&n->rb_node, &hists->entries); - - if (sort__need_collapse) - rb_erase(&n->rb_node_in, &hists->entries_collapsed); - - --hists->nr_entries; - if (!n->filtered) - --hists->nr_non_filtered_entries; - - hist_entry__free(n); + hists__delete_entry(hists, n); } } @@ -303,7 +294,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, size_t callchain_size = 0; struct hist_entry *he; - if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) + if (symbol_conf.use_callchain) callchain_size = sizeof(struct callchain_root); he = zalloc(sizeof(*he) + callchain_size); @@ -358,6 +349,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, callchain_init(he->callchain); INIT_LIST_HEAD(&he->pairs.node); + thread__get(he->thread); } return he; @@ -432,6 +424,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, if (!he) return NULL; + hists->nr_entries++; + rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, hists->entries_in); out: @@ -736,7 +730,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &callchain_cursor, sample->period); + hist_entry__append_callchain(he, sample); /* * We need to re-initialize the cursor since callchain_append() @@ -809,7 +803,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - callchain_append(he->callchain, &cursor, sample->period); + if (symbol_conf.use_callchain) + callchain_append(he->callchain, &cursor, sample->period); return 0; } @@ -913,7 +908,7 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->cmp(left, right); + cmp = fmt->cmp(fmt, left, right); if (cmp) break; } @@ -931,7 +926,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->collapse(left, right); + cmp = fmt->collapse(fmt, left, right); if (cmp) break; } @@ -939,12 +934,14 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) return cmp; } -void hist_entry__free(struct hist_entry *he) +void hist_entry__delete(struct hist_entry *he) { + thread__zput(he->thread); zfree(&he->branch_info); zfree(&he->mem_info); zfree(&he->stat_acc); free_srcline(he->srcline); + free_callchain(he->callchain); free(he); } @@ -978,7 +975,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, iter->callchain, he->callchain); } - hist_entry__free(he); + hist_entry__delete(he); return false; } @@ -987,6 +984,7 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, else p = &(*p)->rb_right; } + hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); @@ -1024,7 +1022,10 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) if (!sort__need_collapse) return; + hists->nr_entries = 0; + root = hists__get_rotate_entries_in(hists); + next = rb_first(root); while (next) { @@ -1056,7 +1057,7 @@ static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) if (perf_hpp__should_skip(fmt)) continue; - cmp = fmt->sort(a, b); + cmp = fmt->sort(fmt, a, b); if (cmp) break; } @@ -1119,7 +1120,7 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_insert_color(&he->rb_node, entries); } -void hists__output_resort(struct hists *hists) +void hists__output_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; @@ -1148,6 +1149,9 @@ void hists__output_resort(struct hists *hists) if (!n->filtered) hists__calc_col_len(hists, n); + + if (prog) + ui_progress__update(prog, 1); } } @@ -1161,6 +1165,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h /* force fold unfiltered entry for simplicity */ h->ms.unfolded = false; h->row_offset = 0; + h->nr_rows = 0; hists->stats.nr_non_filtered_samples += h->stat.nr_events; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index d0ef9a19a744..9f31b89a527a 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -60,7 +60,7 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; - const struct thread *thread_filter; + struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; @@ -119,9 +119,9 @@ int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); -void hist_entry__free(struct hist_entry *); +void hist_entry__delete(struct hist_entry *he); -void hists__output_resort(struct hists *hists); +void hists__output_resort(struct hists *hists, struct ui_progress *prog); void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); @@ -195,9 +195,12 @@ struct perf_hpp_fmt { struct hist_entry *he); int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he); - int64_t (*cmp)(struct hist_entry *a, struct hist_entry *b); - int64_t (*collapse)(struct hist_entry *a, struct hist_entry *b); - int64_t (*sort)(struct hist_entry *a, struct hist_entry *b); + int64_t (*cmp)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*collapse)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); + int64_t (*sort)(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b); struct list_head list; struct list_head sort_list; @@ -300,6 +303,9 @@ struct hist_browser_timer { #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt); + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt); @@ -318,6 +324,12 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, { return 0; } +static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused) +{ + return 0; +} static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct perf_evsel *evsel __maybe_unused, diff --git a/tools/perf/util/hweight.c b/tools/perf/util/hweight.c deleted file mode 100644 index 5c1d0d099f0d..000000000000 --- a/tools/perf/util/hweight.c +++ /dev/null @@ -1,31 +0,0 @@ -#include <linux/bitops.h> - -/** - * hweightN - returns the hamming weight of a N-bit word - * @x: the word to weigh - * - * The Hamming Weight of a number is the total number of bits set in it. - */ - -unsigned int hweight32(unsigned int w) -{ - unsigned int res = w - ((w >> 1) & 0x55555555); - res = (res & 0x33333333) + ((res >> 2) & 0x33333333); - res = (res + (res >> 4)) & 0x0F0F0F0F; - res = res + (res >> 8); - return (res + (res >> 16)) & 0x000000FF; -} - -unsigned long hweight64(__u64 w) -{ -#if BITS_PER_LONG == 32 - return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w); -#elif BITS_PER_LONG == 64 - __u64 res = w - ((w >> 1) & 0x5555555555555555ul); - res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); - res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; - res = res + (res >> 8); - res = res + (res >> 16); - return (res + (res >> 32)) & 0x00000000000000FFul; -#endif -} diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h index 6789d788d494..3a3a0f16456a 100644 --- a/tools/perf/util/include/asm/alternative-asm.h +++ b/tools/perf/util/include/asm/alternative-asm.h @@ -4,5 +4,6 @@ /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ #define altinstruction_entry # +#define ALTERNATIVE_2 # #endif diff --git a/tools/perf/util/include/asm/hweight.h b/tools/perf/util/include/asm/hweight.h deleted file mode 100644 index 36cf26d434a5..000000000000 --- a/tools/perf/util/include/asm/hweight.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef PERF_HWEIGHT_H -#define PERF_HWEIGHT_H - -#include <linux/types.h> -unsigned int hweight32(unsigned int w); -unsigned long hweight64(__u64 w); - -#endif /* PERF_HWEIGHT_H */ diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index cf1d7913783b..ae825d4ec110 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -99,6 +99,7 @@ struct perf_kvm_stat { int timerfd; unsigned int display_time; bool live; + bool force; }; struct kvm_reg_events_ops { diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c new file mode 100644 index 000000000000..95a1acb61245 --- /dev/null +++ b/tools/perf/util/lzma.c @@ -0,0 +1,95 @@ +#include <lzma.h> +#include <stdio.h> +#include <linux/compiler.h> +#include "util.h" +#include "debug.h" + +#define BUFSIZE 8192 + +static const char *lzma_strerror(lzma_ret ret) +{ + switch ((int) ret) { + case LZMA_MEM_ERROR: + return "Memory allocation failed"; + case LZMA_OPTIONS_ERROR: + return "Unsupported decompressor flags"; + case LZMA_FORMAT_ERROR: + return "The input is not in the .xz format"; + case LZMA_DATA_ERROR: + return "Compressed file is corrupt"; + case LZMA_BUF_ERROR: + return "Compressed file is truncated or otherwise corrupt"; + default: + return "Unknown error, possibly a bug"; + } +} + +int lzma_decompress_to_file(const char *input, int output_fd) +{ + lzma_action action = LZMA_RUN; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_ret ret; + + u8 buf_in[BUFSIZE]; + u8 buf_out[BUFSIZE]; + FILE *infile; + + infile = fopen(input, "rb"); + if (!infile) { + pr_err("lzma: fopen failed on %s: '%s'\n", + input, strerror(errno)); + return -1; + } + + ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); + if (ret != LZMA_OK) { + pr_err("lzma: lzma_stream_decoder failed %s (%d)\n", + lzma_strerror(ret), ret); + return -1; + } + + strm.next_in = NULL; + strm.avail_in = 0; + strm.next_out = buf_out; + strm.avail_out = sizeof(buf_out); + + while (1) { + if (strm.avail_in == 0 && !feof(infile)) { + strm.next_in = buf_in; + strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile); + + if (ferror(infile)) { + pr_err("lzma: read error: %s\n", strerror(errno)); + return -1; + } + + if (feof(infile)) + action = LZMA_FINISH; + } + + ret = lzma_code(&strm, action); + + if (strm.avail_out == 0 || ret == LZMA_STREAM_END) { + ssize_t write_size = sizeof(buf_out) - strm.avail_out; + + if (writen(output_fd, buf_out, write_size) != write_size) { + pr_err("lzma: write error: %s\n", strerror(errno)); + return -1; + } + + strm.next_out = buf_out; + strm.avail_out = sizeof(buf_out); + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) + return 0; + + pr_err("lzma: failed %s\n", lzma_strerror(ret)); + return -1; + } + } + + fclose(infile); + return 0; +} diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 94de3e48b490..527e032e24f6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -89,16 +89,6 @@ static void dsos__delete(struct dsos *dsos) } } -void machine__delete_dead_threads(struct machine *machine) -{ - struct thread *n, *t; - - list_for_each_entry_safe(t, n, &machine->dead_threads, node) { - list_del(&t->node); - thread__delete(t); - } -} - void machine__delete_threads(struct machine *machine) { struct rb_node *nd = rb_first(&machine->threads); @@ -106,9 +96,8 @@ void machine__delete_threads(struct machine *machine) while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); - rb_erase(&t->rb_node, &machine->threads); nd = rb_next(nd); - thread__delete(t); + machine__remove_thread(machine, t); } } @@ -361,9 +350,13 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * the full rbtree: */ th = machine->last_match; - if (th && th->tid == tid) { - machine__update_thread_pid(machine, th, pid); - return th; + if (th != NULL) { + if (th->tid == tid) { + machine__update_thread_pid(machine, th, pid); + return th; + } + + thread__zput(machine->last_match); } while (*p != NULL) { @@ -371,7 +364,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine, th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - machine->last_match = th; + machine->last_match = thread__get(th); machine__update_thread_pid(machine, th, pid); return th; } @@ -389,7 +382,6 @@ static struct thread *__machine__findnew_thread(struct machine *machine, if (th != NULL) { rb_link_node(&th->rb_node, parent, p); rb_insert_color(&th->rb_node, &machine->threads); - machine->last_match = th; /* * We have to initialize map_groups separately @@ -400,9 +392,15 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { + rb_erase(&th->rb_node, &machine->threads); thread__delete(th); return NULL; } + /* + * It is now in the rbtree, get a ref + */ + thread__get(th); + machine->last_match = thread__get(th); } return th; @@ -460,30 +458,61 @@ int machine__process_lost_event(struct machine *machine __maybe_unused, return 0; } +static struct dso* +machine__module_dso(struct machine *machine, struct kmod_path *m, + const char *filename) +{ + struct dso *dso; + + dso = dsos__find(&machine->kernel_dsos, m->name, true); + if (!dso) { + dso = dsos__addnew(&machine->kernel_dsos, m->name); + if (dso == NULL) + return NULL; + + if (machine__is_host(machine)) + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + else + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + + /* _KMODULE_COMP should be next to _KMODULE */ + if (m->kmod && m->comp) + dso->symtab_type++; + + dso__set_short_name(dso, strdup(m->name), true); + dso__set_long_name(dso, strdup(filename), true); + } + + return dso; +} + struct map *machine__new_module(struct machine *machine, u64 start, const char *filename) { - struct map *map; - struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename); - bool compressed; + struct map *map = NULL; + struct dso *dso; + struct kmod_path m; - if (dso == NULL) + if (kmod_path__parse_name(&m, filename)) return NULL; - map = map__new2(start, dso, MAP__FUNCTION); - if (map == NULL) - return NULL; + map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, + m.name); + if (map) + goto out; - if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; - else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + dso = machine__module_dso(machine, &m, filename); + if (dso == NULL) + goto out; - /* _KMODULE_COMP should be next to _KMODULE */ - if (is_kernel_module(filename, &compressed) && compressed) - dso->symtab_type++; + map = map__new2(start, dso, MAP__FUNCTION); + if (map == NULL) + goto out; map_groups__insert(&machine->kmaps, map); + +out: + free(m.name); return map; } @@ -648,6 +677,9 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) machine->vmlinux_maps[type]->unmap_ip = identity__map_ip; kmap = map__kmap(machine->vmlinux_maps[type]); + if (!kmap) + return -1; + kmap->kmaps = &machine->kmaps; map_groups__insert(&machine->kmaps, machine->vmlinux_maps[type]); @@ -669,7 +701,7 @@ void machine__destroy_kernel_maps(struct machine *machine) kmap = map__kmap(machine->vmlinux_maps[type]); map_groups__remove(&machine->kmaps, machine->vmlinux_maps[type]); - if (kmap->ref_reloc_sym) { + if (kmap && kmap->ref_reloc_sym) { /* * ref_reloc_sym is shared among all maps, so free just * on one of them. @@ -825,6 +857,39 @@ static char *get_kernel_version(const char *root_dir) return strdup(name); } +static bool is_kmod_dso(struct dso *dso) +{ + return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; +} + +static int map_groups__set_module_path(struct map_groups *mg, const char *path, + struct kmod_path *m) +{ + struct map *map; + char *long_name; + + map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name); + if (map == NULL) + return 0; + + long_name = strdup(path); + if (long_name == NULL) + return -ENOMEM; + + dso__set_long_name(map->dso, long_name, true); + dso__kernel_module_get_build_id(map->dso, ""); + + /* + * Full name could reveal us kmod compression, so + * we need to update the symtab_type if needed. + */ + if (m->comp && is_kmod_dso(map->dso)) + map->dso->symtab_type++; + + return 0; +} + static int map_groups__set_modules_path_dir(struct map_groups *mg, const char *dir_name, int depth) { @@ -863,35 +928,19 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, if (ret < 0) goto out; } else { - char *dot = strrchr(dent->d_name, '.'), - dso_name[PATH_MAX]; - struct map *map; - char *long_name; + struct kmod_path m; - if (dot == NULL) - continue; - - /* On some system, modules are compressed like .ko.gz */ - if (is_supported_compression(dot + 1) && - is_kmodule_extension(dot - 2)) - dot -= 3; + ret = kmod_path__parse_name(&m, dent->d_name); + if (ret) + goto out; - snprintf(dso_name, sizeof(dso_name), "[%.*s]", - (int)(dot - dent->d_name), dent->d_name); + if (m.kmod) + ret = map_groups__set_module_path(mg, path, &m); - strxfrchar(dso_name, '-', '_'); - map = map_groups__find_by_name(mg, MAP__FUNCTION, - dso_name); - if (map == NULL) - continue; + free(m.name); - long_name = strdup(path); - if (long_name == NULL) { - ret = -1; + if (ret) goto out; - } - dso__set_long_name(map->dso, long_name, true); - dso__kernel_module_get_build_id(map->dso, ""); } } @@ -1044,40 +1093,11 @@ static int machine__process_kernel_mmap_event(struct machine *machine, strlen(kmmap_prefix) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { - - char short_module_name[1024]; - char *name, *dot; - - if (event->mmap.filename[0] == '/') { - name = strrchr(event->mmap.filename, '/'); - if (name == NULL) - goto out_problem; - - ++name; /* skip / */ - dot = strrchr(name, '.'); - if (dot == NULL) - goto out_problem; - /* On some system, modules are compressed like .ko.gz */ - if (is_supported_compression(dot + 1)) - dot -= 3; - if (!is_kmodule_extension(dot + 1)) - goto out_problem; - snprintf(short_module_name, sizeof(short_module_name), - "[%.*s]", (int)(dot - name), name); - strxfrchar(short_module_name, '-', '_'); - } else - strcpy(short_module_name, event->mmap.filename); - map = machine__new_module(machine, event->mmap.start, event->mmap.filename); if (map == NULL) goto out_problem; - name = strdup(short_module_name); - if (name == NULL) - goto out_problem; - - dso__set_short_name(map->dso, name, true); map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { const char *symbol_name = (event->mmap.filename + @@ -1090,7 +1110,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *dso; list_for_each_entry(dso, &machine->kernel_dsos.head, node) { - if (is_kernel_module(dso->long_name, NULL)) + if (is_kernel_module(dso->long_name)) continue; kernel = dso; @@ -1234,15 +1254,19 @@ out_problem: return 0; } -static void machine__remove_thread(struct machine *machine, struct thread *th) +void machine__remove_thread(struct machine *machine, struct thread *th) { - machine->last_match = NULL; + if (machine->last_match == th) + thread__zput(machine->last_match); + rb_erase(&th->rb_node, &machine->threads); /* - * We may have references to this thread, for instance in some hist_entry - * instances, so just move them to a separate list. + * Move it first to the dead_threads list, then drop the reference, + * if this is the last reference, then the thread__delete destructor + * will be called and we will remove it from the dead_threads list. */ list_add_tail(&th->node, &machine->dead_threads); + thread__put(th); } int machine__process_fork_event(struct machine *machine, union perf_event *event, @@ -1385,29 +1409,27 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, - bool branch_history, + u8 *cpumode, u64 ip) { struct addr_location al; al.filtered = 0; al.sym = NULL; - if (branch_history) + if (!cpumode) { thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); - else { - u8 cpumode = PERF_RECORD_MISC_USER; - + } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; + *cpumode = PERF_RECORD_MISC_HYPERVISOR; break; case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; + *cpumode = PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; + *cpumode = PERF_RECORD_MISC_USER; break; default: pr_debug("invalid callchain context: " @@ -1421,8 +1443,8 @@ static int add_callchain_ip(struct thread *thread, } return 0; } - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, - ip, &al); + thread__find_addr_location(thread, *cpumode, MAP__FUNCTION, + ip, &al); } if (al.sym != NULL) { @@ -1500,18 +1522,102 @@ static int remove_loops(struct branch_entry *l, int nr) return nr; } -static int thread__resolve_callchain_sample(struct thread *thread, - struct ip_callchain *chain, - struct branch_stack *branch, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +/* + * Recolve LBR callstack chain sample + * Return: + * 1 on success get LBR callchain information + * 0 no available LBR callchain information, should try fp + * negative error code on other errors. + */ +static int resolve_lbr_callchain_sample(struct thread *thread, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) { + struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr); + u8 cpumode = PERF_RECORD_MISC_USER; + int i, j, err; + u64 ip; + + for (i = 0; i < chain_nr; i++) { + if (chain->ips[i] == PERF_CONTEXT_USER) + break; + } + + /* LBR only affects the user callchain */ + if (i != chain_nr) { + struct branch_stack *lbr_stack = sample->branch_stack; + int lbr_nr = lbr_stack->nr; + /* + * LBR callstack can only get user call chain. + * The mix_chain_nr is kernel call chain + * number plus LBR user call chain number. + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER, + * lbr_nr + 1 is the user call chain number. + * For details, please refer to the comments + * in callchain__printf + */ + int mix_chain_nr = i + 1 + lbr_nr + 1; + + if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + + for (j = 0; j < mix_chain_nr; j++) { + if (callchain_param.order == ORDER_CALLEE) { + if (j < i + 1) + ip = chain->ips[j]; + else if (j > i + 1) + ip = lbr_stack->entries[j - i - 2].from; + else + ip = lbr_stack->entries[0].to; + } else { + if (j < lbr_nr) + ip = lbr_stack->entries[lbr_nr - j - 1].from; + else if (j > lbr_nr) + ip = chain->ips[i + 1 - (j - lbr_nr)]; + else + ip = lbr_stack->entries[0].to; + } + + err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); + if (err) + return (err < 0) ? err : 0; + } + return 1; + } + + return 0; +} + +static int thread__resolve_callchain_sample(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + struct branch_stack *branch = sample->branch_stack; + struct ip_callchain *chain = sample->callchain; + int chain_nr = min(max_stack, (int)chain->nr); + u8 cpumode = PERF_RECORD_MISC_USER; int i, j, err; int skip_idx = -1; int first_call = 0; + callchain_cursor_reset(&callchain_cursor); + + if (has_branch_callstack(evsel)) { + err = resolve_lbr_callchain_sample(thread, sample, parent, + root_al, max_stack); + if (err) + return (err < 0) ? err : 0; + } + /* * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. @@ -1519,8 +1625,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain->nr < PERF_MAX_STACK_DEPTH) skip_idx = arch_skip_callchain_idx(thread, chain); - callchain_cursor_reset(&callchain_cursor); - /* * Add branches to call stack for easier browsing. This gives * more context for a sample than just the callers. @@ -1566,10 +1670,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { err = add_callchain_ip(thread, parent, root_al, - true, be[i].to); + NULL, be[i].to); if (!err) err = add_callchain_ip(thread, parent, root_al, - true, be[i].from); + NULL, be[i].from); if (err == -EINVAL) break; if (err) @@ -1598,7 +1702,7 @@ check_calls: #endif ip = chain->ips[j]; - err = add_callchain_ip(thread, parent, root_al, false, ip); + err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); if (err) return (err < 0) ? err : 0; @@ -1621,9 +1725,9 @@ int thread__resolve_callchain(struct thread *thread, struct addr_location *root_al, int max_stack) { - int ret = thread__resolve_callchain_sample(thread, sample->callchain, - sample->branch_stack, - parent, root_al, max_stack); + int ret = thread__resolve_callchain_sample(thread, evsel, + sample, parent, + root_al, max_stack); if (ret) return ret; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index e8b7779a0a3f..6d64cedb9d1e 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -118,9 +118,9 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); -void machine__delete_dead_threads(struct machine *machine); void machine__delete_threads(struct machine *machine); void machine__delete(struct machine *machine); +void machine__remove_thread(struct machine *machine, struct thread *th); struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct addr_location *al); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 62ca9f2607d5..a14f08f41686 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -778,3 +778,23 @@ struct map *maps__next(struct map *map) return rb_entry(next, struct map, rb_node); return NULL; } + +struct kmap *map__kmap(struct map *map) +{ + if (!map->dso || !map->dso->kernel) { + pr_err("Internal error: map__kmap with a non-kernel map\n"); + return NULL; + } + return (struct kmap *)(map + 1); +} + +struct map_groups *map__kmaps(struct map *map) +{ + struct kmap *kmap = map__kmap(map); + + if (!kmap || !kmap->kmaps) { + pr_err("Internal error: map__kmaps with a non-kernel map\n"); + return NULL; + } + return kmap->kmaps; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 6951a9d42339..ec19c59ca38e 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -76,10 +76,8 @@ static inline struct map_groups *map_groups__get(struct map_groups *mg) void map_groups__put(struct map_groups *mg); -static inline struct kmap *map__kmap(struct map *map) -{ - return (struct kmap *)(map + 1); -} +struct kmap *map__kmap(struct map *map); +struct map_groups *map__kmaps(struct map *map); static inline u64 map__map_ip(struct map *map, u64 ip) { @@ -116,6 +114,22 @@ struct thread; #define map__for_each_symbol(map, pos, n) \ dso__for_each_symbol(map->dso, pos, n, map->type) +/* map__for_each_symbol_with_name - iterate over the symbols in the given map + * that have the given name + * + * @map: the 'struct map *' in which symbols itereated + * @sym_name: the symbol name + * @pos: the 'struct symbol *' to use as a loop cursor + * @filter: to use when loading the DSO + */ +#define __map__for_each_symbol_by_name(map, sym_name, pos, filter) \ + for (pos = map__find_symbol_by_name(map, sym_name, filter); \ + pos && strcmp(pos->name, sym_name) == 0; \ + pos = symbol__next_by_name(pos)) + +#define map__for_each_symbol_by_name(map, sym_name, pos) \ + __map__for_each_symbol_by_name(map, sym_name, (pos), NULL) + typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *map, enum map_type type, diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index fd4be94125fb..52be201b9b25 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -2,7 +2,6 @@ #include <linux/compiler.h> #include <linux/string.h> #include "ordered-events.h" -#include "evlist.h" #include "session.h" #include "asm/bug.h" #include "debug.h" @@ -131,8 +130,8 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, return new; } -struct ordered_event * -ordered_events__new(struct ordered_events *oe, u64 timestamp, +static struct ordered_event * +ordered_events__new_event(struct ordered_events *oe, u64 timestamp, union perf_event *event) { struct ordered_event *new; @@ -153,20 +152,47 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve free_dup_event(oe, event->event); } -static int __ordered_events__flush(struct perf_session *s, - struct perf_tool *tool) +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset) +{ + u64 timestamp = sample->time; + struct ordered_event *oevent; + + if (!timestamp || timestamp == ~0ULL) + return -ETIME; + + if (timestamp < oe->last_flush) { + pr_oe_time(timestamp, "out of order event\n"); + pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", + oe->last_flush_type); + + oe->nr_unordered_events++; + } + + oevent = ordered_events__new_event(oe, timestamp, event); + if (!oevent) { + ordered_events__flush(oe, OE_FLUSH__HALF); + oevent = ordered_events__new_event(oe, timestamp, event); + } + + if (!oevent) + return -ENOMEM; + + oevent->file_offset = file_offset; + return 0; +} + +static int __ordered_events__flush(struct ordered_events *oe) { - struct ordered_events *oe = &s->ordered_events; struct list_head *head = &oe->events; struct ordered_event *tmp, *iter; - struct perf_sample sample; u64 limit = oe->next_flush; u64 last_ts = oe->last ? oe->last->timestamp : 0ULL; bool show_progress = limit == ULLONG_MAX; struct ui_progress prog; int ret; - if (!tool->ordered_events || !limit) + if (!limit) return 0; if (show_progress) @@ -178,16 +204,9 @@ static int __ordered_events__flush(struct perf_session *s, if (iter->timestamp > limit) break; - - ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); + ret = oe->deliver(oe, iter); if (ret) - pr_err("Can't parse sample, err = %d\n", ret); - else { - ret = perf_session__deliver_event(s, iter->event, &sample, tool, - iter->file_offset); - if (ret) - return ret; - } + return ret; ordered_events__delete(oe, iter); oe->last_flush = iter->timestamp; @@ -204,10 +223,8 @@ static int __ordered_events__flush(struct perf_session *s, return 0; } -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, - enum oe_flush how) +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) { - struct ordered_events *oe = &s->ordered_events; static const char * const str[] = { "NONE", "FINAL", @@ -216,6 +233,9 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, }; int err; + if (oe->nr_events == 0) + return 0; + switch (how) { case OE_FLUSH__FINAL: oe->next_flush = ULLONG_MAX; @@ -248,7 +268,7 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, str[how], oe->nr_events); pr_oe_time(oe->max_timestamp, "max_timestamp\n"); - err = __ordered_events__flush(s, tool); + err = __ordered_events__flush(oe); if (!err) { if (how == OE_FLUSH__ROUND) @@ -264,13 +284,14 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, return err; } -void ordered_events__init(struct ordered_events *oe) +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver) { INIT_LIST_HEAD(&oe->events); INIT_LIST_HEAD(&oe->cache); INIT_LIST_HEAD(&oe->to_free); oe->max_alloc_size = (u64) -1; oe->cur_alloc_size = 0; + oe->deliver = deliver; } void ordered_events__free(struct ordered_events *oe) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 7b8f9b011f38..f403991e3bfd 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -2,9 +2,8 @@ #define __ORDERED_EVENTS_H #include <linux/types.h> -#include "tool.h" -struct perf_session; +struct perf_sample; struct ordered_event { u64 timestamp; @@ -20,6 +19,11 @@ enum oe_flush { OE_FLUSH__HALF, }; +struct ordered_events; + +typedef int (*ordered_events__deliver_t)(struct ordered_events *oe, + struct ordered_event *event); + struct ordered_events { u64 last_flush; u64 next_flush; @@ -31,18 +35,19 @@ struct ordered_events { struct list_head to_free; struct ordered_event *buffer; struct ordered_event *last; + ordered_events__deliver_t deliver; int buffer_idx; unsigned int nr_events; enum oe_flush last_flush_type; + u32 nr_unordered_events; bool copy_on_queue; }; -struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp, - union perf_event *event); +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, - enum oe_flush how); -void ordered_events__init(struct ordered_events *oe); +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); void ordered_events__free(struct ordered_events *oe); static inline diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 77b43fe43d55..be0655388b38 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -20,11 +20,6 @@ #define MAX_NAME_LEN 100 -struct event_symbol { - const char *symbol; - const char *alias; -}; - #ifdef PARSER_DEBUG extern int parse_events_debug; #endif @@ -39,7 +34,7 @@ static struct perf_pmu_event_symbol *perf_pmu_events_list; */ static int perf_pmu_events_list_num; -static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { +struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { .symbol = "cpu-cycles", .alias = "cycles", @@ -82,7 +77,7 @@ static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { }, }; -static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { +struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { [PERF_COUNT_SW_CPU_CLOCK] = { .symbol = "cpu-clock", .alias = "", @@ -175,9 +170,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return NULL; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return NULL; @@ -473,12 +465,6 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event) { - int ret; - - ret = debugfs_valid_mountpoint(tracing_events_path); - if (ret) - return ret; - if (strpbrk(sys, "*?")) return add_tracepoint_multi_sys(list, idx, sys, event); else @@ -526,7 +512,7 @@ do { \ } int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type) + void *ptr, char *type, u64 len) { struct perf_event_attr attr; @@ -536,14 +522,15 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, if (parse_breakpoint_type(type, &attr)) return -EINVAL; - /* - * We should find a nice way to override the access length - * Provide some defaults for now - */ - if (attr.bp_type == HW_BREAKPOINT_X) - attr.bp_len = sizeof(long); - else - attr.bp_len = HW_BREAKPOINT_LEN_4; + /* Provide some defaults if len is not specified */ + if (!len) { + if (attr.bp_type == HW_BREAKPOINT_X) + len = sizeof(long); + else + len = HW_BREAKPOINT_LEN_4; + } + + attr.bp_len = len; attr.type = PERF_TYPE_BREAKPOINT; attr.sample_period = 1; @@ -722,6 +709,7 @@ struct event_modifier { int eh; int eH; int eG; + int eI; int precise; int exclude_GH; int sample_read; @@ -736,6 +724,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int eh = evsel ? evsel->attr.exclude_hv : 0; int eH = evsel ? evsel->attr.exclude_host : 0; int eG = evsel ? evsel->attr.exclude_guest : 0; + int eI = evsel ? evsel->attr.exclude_idle : 0; int precise = evsel ? evsel->attr.precise_ip : 0; int sample_read = 0; int pinned = evsel ? evsel->attr.pinned : 0; @@ -766,6 +755,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, if (!exclude_GH) exclude_GH = eG = eH = 1; eH = 0; + } else if (*str == 'I') { + eI = 1; } else if (*str == 'p') { precise++; /* use of precise requires exclude_guest */ @@ -799,6 +790,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->eh = eh; mod->eH = eH; mod->eG = eG; + mod->eI = eI; mod->precise = precise; mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; @@ -816,7 +808,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppSD") - 1)) + if (strlen(str) > (sizeof("ukhGHpppSDI") - 1)) return -1; while (*p) { @@ -852,6 +844,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->attr.precise_ip = mod.precise; evsel->attr.exclude_host = mod.eH; evsel->attr.exclude_guest = mod.eG; + evsel->attr.exclude_idle = mod.eI; evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; @@ -1097,6 +1090,14 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; +static int cmp_string(const void *a, const void *b) +{ + const char * const *as = a; + const char * const *bs = b; + + return strcmp(*as, *bs); +} + /* * Print the events from <debugfs_mount_point>/tracing/events */ @@ -1108,20 +1109,23 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - char sbuf[STRERR_BUFSIZE]; - - if (debugfs_valid_mountpoint(tracing_events_path)) { - printf(" [ Tracepoints not available: %s ]\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - return; - } + char **evt_list = NULL; + unsigned int evt_i = 0, evt_num = 0; + bool evt_num_known = false; +restart: sys_dir = opendir(tracing_events_path); if (!sys_dir) return; + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_close_sys_dir; + } + for_each_subsystem(sys_dir, sys_dirent, sys_next) { - if (subsys_glob != NULL && + if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) continue; @@ -1132,23 +1136,60 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, continue; for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { - if (event_glob != NULL && + if (event_glob != NULL && !strglobmatch(evt_dirent.d_name, event_glob)) continue; - if (name_only) { - printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + if (!evt_num_known) { + evt_num++; continue; } snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - printf(" %-50s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT]); + + evt_list[evt_i] = strdup(evt_path); + if (evt_list[evt_i] == NULL) + goto out_close_evt_dir; + evt_i++; } closedir(evt_dir); } closedir(sys_dir); + + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + } + if (evt_num) + printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_close_evt_dir: + closedir(evt_dir); +out_close_sys_dir: + closedir(sys_dir); + + printf("FATAL: not enough memory to print %s\n", + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + if (evt_list) + goto out_free; } /* @@ -1162,9 +1203,6 @@ int is_valid_tracepoint(const char *event_string) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return 0; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return 0; @@ -1232,38 +1270,19 @@ static bool is_event_supported(u8 type, unsigned config) return ret; } -static void __print_events_type(u8 type, struct event_symbol *syms, - unsigned max) -{ - char name[64]; - unsigned i; - - for (i = 0; i < max ; i++, syms++) { - if (!is_event_supported(type, i)) - continue; - - if (strlen(syms->alias)) - snprintf(name, sizeof(name), "%s OR %s", - syms->symbol, syms->alias); - else - snprintf(name, sizeof(name), "%s", syms->symbol); - - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - } -} - -void print_events_type(u8 type) -{ - if (type == PERF_TYPE_SOFTWARE) - __print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); - else - __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); -} - int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, printed = 0; + unsigned int type, op, i, evt_i = 0, evt_num = 0; char name[64]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + } for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { @@ -1281,31 +1300,70 @@ int print_hwcache_events(const char *event_glob, bool name_only) type | (op << 8) | (i << 16))) continue; - if (name_only) - printf("%s ", name); - else - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); - ++printed; + if (!evt_num_known) { + evt_num++; + continue; + } + + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } } } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_HW_CACHE]); + } + if (evt_num) printf("\n"); - return printed; + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return evt_num; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (evt_list) + goto out_free; + return evt_num; } -static void print_symbol_events(const char *event_glob, unsigned type, +void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only) { - unsigned i, printed = 0; + unsigned int i, evt_i = 0, evt_num = 0; char name[MAX_NAME_LEN]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + syms -= max; + } for (i = 0; i < max; i++, syms++) { - if (event_glob != NULL && + if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; @@ -1313,23 +1371,49 @@ static void print_symbol_events(const char *event_glob, unsigned type, if (!is_event_supported(type, i)) continue; - if (name_only) { - printf("%s ", syms->symbol); + if (!evt_num_known) { + evt_num++; continue; } - if (strlen(syms->alias)) + if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else strncpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - - printed++; + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + } + if (evt_num) printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); + if (evt_list) + goto out_free; } /* @@ -1337,11 +1421,6 @@ static void print_symbol_events(const char *event_glob, unsigned type, */ void print_events(const char *event_glob, bool name_only) { - if (!name_only) { - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); - } - print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1366,7 +1445,7 @@ void print_events(const char *event_glob, bool name_only) printf("\n"); printf(" %-50s [%s]\n", - "mem:<addr>[:access]", + "mem:<addr>[/len][:access]", event_type_descriptors[PERF_TYPE_BREAKPOINT]); printf("\n"); } diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index db2cf78ff0f3..52a2dda4f954 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ struct parse_events_term { int type_val; int type_term; struct list_head list; + bool used; }; struct parse_events_evlist { @@ -104,7 +105,7 @@ int parse_events_add_numeric(struct list_head *list, int *idx, int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2); int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type); + void *ptr, char *type, u64 len); int parse_events_add_pmu(struct list_head *list, int *idx, char *pmu , struct list_head *head_config); enum perf_pmu_event_symbol_type @@ -115,12 +116,21 @@ void parse_events_update_lists(struct list_head *list_event, void parse_events_error(void *data, void *scanner, char const *msg); void print_events(const char *event_glob, bool name_only); -void print_events_type(u8 type); + +struct event_symbol { + const char *symbol; + const char *alias; +}; +extern struct event_symbol event_symbols_hw[]; +extern struct event_symbol event_symbols_sw[]; +void print_symbol_events(const char *event_glob, unsigned type, + struct event_symbol *syms, unsigned max, + bool name_only); void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); -extern int valid_debugfs_mount(const char *debugfs); +int valid_event_mount(const char *eventfs); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 906630bbf8eb..8895cf3132ab 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -101,7 +101,7 @@ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?]* name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]* /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpGHSD]+ +modifier_event [ukhpGHSDI]+ modifier_bp [rwx]{1,3} %% @@ -159,6 +159,7 @@ branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE <mem>{ {modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); } : { return ':'; } +"/" { return '/'; } {num_dec} { return value(yyscanner, 10); } {num_hex} { return value(yyscanner, 16); } /* diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 93c4c9fbc922..72def077dbbf 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -326,6 +326,28 @@ PE_NAME_CACHE_TYPE } event_legacy_mem: +PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc +{ + struct parse_events_evlist *data = _data; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + (void *) $2, $6, $4)); + $$ = list; +} +| +PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc +{ + struct parse_events_evlist *data = _data; + struct list_head *list; + + ALLOC_LIST(list); + ABORT_ON(parse_events_add_breakpoint(list, &data->idx, + (void *) $2, NULL, $4)); + $$ = list; +} +| PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc { struct parse_events_evlist *data = _data; @@ -333,7 +355,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_breakpoint(list, &data->idx, - (void *) $2, $4)); + (void *) $2, $4, 0)); $$ = list; } | @@ -344,7 +366,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_breakpoint(list, &data->idx, - (void *) $2, NULL)); + (void *) $2, NULL, 0)); $$ = list; } diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index f62dee7bd924..01626be2a8eb 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -37,6 +37,7 @@ static int get_value(struct parse_opt_ctx_t *p, { const char *s, *arg = NULL; const int unset = flags & OPT_UNSET; + int err; if (unset && p->opt) return opterror(opt, "takes no value", flags); @@ -46,7 +47,7 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "is not usable", flags); if (opt->flags & PARSE_OPT_EXCLUSIVE) { - if (p->excl_opt) { + if (p->excl_opt && p->excl_opt != opt) { char msg[128]; if (((flags & OPT_SHORT) && p->excl_opt->short_name) || @@ -114,13 +115,29 @@ static int get_value(struct parse_opt_ctx_t *p, return 0; case OPTION_STRING: + err = 0; if (unset) *(const char **)opt->value = NULL; else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) *(const char **)opt->value = (const char *)opt->defval; else - return get_arg(p, opt, flags, (const char **)opt->value); - return 0; + err = get_arg(p, opt, flags, (const char **)opt->value); + + /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ + if (opt->flags & PARSE_OPT_NOEMPTY) { + const char *val = *(const char **)opt->value; + + if (!val) + return err; + + /* Similar to unset if we are given an empty string. */ + if (val[0] == '\0') { + *(const char **)opt->value = NULL; + return 0; + } + } + + return err; case OPTION_CALLBACK: if (unset) @@ -505,13 +522,18 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o break; case PARSE_OPT_LIST_OPTS: while (options->type != OPTION_END) { - printf("--%s ", options->long_name); + if (options->long_name) + printf("--%s ", options->long_name); options++; } + putchar('\n'); exit(130); case PARSE_OPT_LIST_SUBCMDS: - for (int i = 0; subcommands[i]; i++) - printf("%s ", subcommands[i]); + if (subcommands) { + for (int i = 0; subcommands[i]; i++) + printf("%s ", subcommands[i]); + } + putchar('\n'); exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 97b153fb4999..59561fd86278 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -40,6 +40,7 @@ enum parse_opt_option_flags { PARSE_OPT_LASTARG_DEFAULT = 16, PARSE_OPT_DISABLED = 32, PARSE_OPT_EXCLUSIVE = 64, + PARSE_OPT_NOEMPTY = 128, }; struct option; @@ -122,6 +123,7 @@ struct option { #define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } #define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } #define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) } +#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} #define OPT_DATE(s, l, v, h) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 5c9c4947cfb4..48411674da0f 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -551,31 +551,68 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, } /* + * Term is a string term, and might be a param-term. Try to look up it's value + * in the remaining terms. + * - We have a term like "base-or-format-term=param-term", + * - We need to find the value supplied for "param-term" (with param-term named + * in a config string) later on in the term list. + */ +static int pmu_resolve_param_term(struct parse_events_term *term, + struct list_head *head_terms, + __u64 *value) +{ + struct parse_events_term *t; + + list_for_each_entry(t, head_terms, list) { + if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) { + if (!strcmp(t->config, term->config)) { + t->used = true; + *value = t->val.num; + return 0; + } + } + } + + if (verbose) + printf("Required parameter '%s' not specified\n", term->config); + + return -1; +} + +/* * Setup one of config[12] attr members based on the * user input data - term parameter. */ static int pmu_config_term(struct list_head *formats, struct perf_event_attr *attr, struct parse_events_term *term, + struct list_head *head_terms, bool zero) { struct perf_pmu_format *format; __u64 *vp; + __u64 val; + + /* + * If this is a parameter we've already used for parameterized-eval, + * skip it in normal eval. + */ + if (term->used) + return 0; /* - * Support only for hardcoded and numnerial terms. * Hardcoded terms should be already in, so nothing * to be done for them. */ if (parse_events__is_hardcoded_term(term)) return 0; - if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) - return -EINVAL; - format = pmu_find_format(formats, term->config); - if (!format) + if (!format) { + if (verbose) + printf("Invalid event/parameter '%s'\n", term->config); return -EINVAL; + } switch (format->value) { case PERF_PMU_FORMAT_VALUE_CONFIG: @@ -592,11 +629,25 @@ static int pmu_config_term(struct list_head *formats, } /* - * XXX If we ever decide to go with string values for - * non-hardcoded terms, here's the place to translate - * them into value. + * Either directly use a numeric term, or try to translate string terms + * using event parameters. */ - pmu_format_value(format->bits, term->val.num, vp, zero); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + val = term->val.num; + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { + if (strcmp(term->val.str, "?")) { + if (verbose) + pr_info("Invalid sysfs entry %s=%s\n", + term->config, term->val.str); + return -EINVAL; + } + + if (pmu_resolve_param_term(term, head_terms, &val)) + return -EINVAL; + } else + return -EINVAL; + + pmu_format_value(format->bits, val, vp, zero); return 0; } @@ -607,9 +658,10 @@ int perf_pmu__config_terms(struct list_head *formats, { struct parse_events_term *term; - list_for_each_entry(term, head_terms, list) - if (pmu_config_term(formats, attr, term, zero)) + list_for_each_entry(term, head_terms, list) { + if (pmu_config_term(formats, attr, term, head_terms, zero)) return -EINVAL; + } return 0; } @@ -767,10 +819,36 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to) set_bit(b, bits); } +static int sub_non_neg(int a, int b) +{ + if (b > a) + return 0; + return a - b; +} + static char *format_alias(char *buf, int len, struct perf_pmu *pmu, struct perf_pmu_alias *alias) { - snprintf(buf, len, "%s/%s/", pmu->name, alias->name); + struct parse_events_term *term; + int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); + + list_for_each_entry(term, &alias->terms, list) { + if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + used += snprintf(buf + used, sub_non_neg(len, used), + ",%s=%s", term->config, + term->val.str); + } + + if (sub_non_neg(len, used) > 0) { + buf[used] = '/'; + used++; + } + if (sub_non_neg(len, used) > 0) { + buf[used] = '\0'; + used++; + } else + buf[len - 1] = '\0'; + return buf; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 28eb1417cb2a..d05b77cf35f7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,6 +41,7 @@ #include "symbol.h" #include "thread.h" #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" @@ -79,6 +80,7 @@ static int init_symbol_maps(bool user_only) int ret; symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { pr_debug("Failed to init symbol map.\n"); @@ -133,6 +135,8 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) return NULL; kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + if (!kmap) + return NULL; return kmap->ref_reloc_sym; } @@ -150,7 +154,7 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) sym = __find_kernel_function_by_name(name, &map); if (sym) return map->unmap_ip(map, sym->start) - - (reloc) ? 0 : map->reloc; + ((reloc) ? 0 : map->reloc); } return 0; } @@ -177,6 +181,25 @@ static struct map *kernel_get_module_map(const char *module) return NULL; } +static struct map *get_target_map(const char *target, bool user) +{ + /* Init maps of given executable or kernel */ + if (user) + return dso__new_map(target); + else + return kernel_get_module_map(target); +} + +static void put_target_map(struct map *map, bool user) +{ + if (map && user) { + /* Only the user map needs to be released */ + dso__delete(map->dso); + map__delete(map); + } +} + + static struct dso *kernel_get_module_dso(const char *module) { struct dso *dso; @@ -248,6 +271,13 @@ out: return ret; } +static void clear_perf_probe_point(struct perf_probe_point *pp) +{ + free(pp->file); + free(pp->function); + free(pp->lazy_line); +} + static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) { int i; @@ -257,6 +287,104 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) } #ifdef HAVE_DWARF_SUPPORT +/* + * Some binaries like glibc have special symbols which are on the symbol + * table, but not in the debuginfo. If we can find the address of the + * symbol from map, we can translate the address back to the probe point. + */ +static int find_alternative_probe_point(struct debuginfo *dinfo, + struct perf_probe_point *pp, + struct perf_probe_point *result, + const char *target, bool uprobes) +{ + struct map *map = NULL; + struct symbol *sym; + u64 address = 0; + int ret = -ENOENT; + + /* This can work only for function-name based one */ + if (!pp->function || pp->file) + return -ENOTSUP; + + map = get_target_map(target, uprobes); + if (!map) + return -EINVAL; + + /* Find the address of given function */ + map__for_each_symbol_by_name(map, pp->function, sym) { + if (uprobes) + address = sym->start; + else + address = map->unmap_ip(map, sym->start); + break; + } + if (!address) { + ret = -ENOENT; + goto out; + } + pr_debug("Symbol %s address found : %" PRIx64 "\n", + pp->function, address); + + ret = debuginfo__find_probe_point(dinfo, (unsigned long)address, + result); + if (ret <= 0) + ret = (!ret) ? -ENOENT : ret; + else { + result->offset += pp->offset; + result->line += pp->line; + result->retprobe = pp->retprobe; + ret = 0; + } + +out: + put_target_map(map, uprobes); + return ret; + +} + +static int get_alternative_probe_event(struct debuginfo *dinfo, + struct perf_probe_event *pev, + struct perf_probe_point *tmp, + const char *target) +{ + int ret; + + memcpy(tmp, &pev->point, sizeof(*tmp)); + memset(&pev->point, 0, sizeof(pev->point)); + ret = find_alternative_probe_point(dinfo, tmp, &pev->point, + target, pev->uprobes); + if (ret < 0) + memcpy(&pev->point, tmp, sizeof(*tmp)); + + return ret; +} + +static int get_alternative_line_range(struct debuginfo *dinfo, + struct line_range *lr, + const char *target, bool user) +{ + struct perf_probe_point pp = { .function = lr->function, + .file = lr->file, + .line = lr->start }; + struct perf_probe_point result; + int ret, len = 0; + + memset(&result, 0, sizeof(result)); + + if (lr->end != INT_MAX) + len = lr->end - lr->start; + ret = find_alternative_probe_point(dinfo, &pp, &result, + target, user); + if (!ret) { + lr->function = result.function; + lr->file = result.file; + lr->start = result.line; + if (lr->end != INT_MAX) + lr->end = lr->start + len; + clear_perf_probe_point(&pp); + } + return ret; +} /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module, bool silent) @@ -446,7 +574,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, } for (i = 0; i < ntevs; i++) { - if (tevs[i].point.address) { + if (tevs[i].point.address && !tevs[i].point.retprobe) { tmp = strdup(reloc_sym->name); if (!tmp) return -ENOMEM; @@ -465,6 +593,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, int max_tevs, const char *target) { bool need_dwarf = perf_probe_event_need_dwarf(pev); + struct perf_probe_point tmp; struct debuginfo *dinfo; int ntevs, ret = 0; @@ -481,6 +610,20 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, /* Searching trace events corresponding to a probe event */ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs); + if (ntevs == 0) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp, target); + if (!ret) { + ntevs = debuginfo__find_trace_events(dinfo, pev, + tevs, max_tevs); + /* + * Write back to the original probe_event for + * setting appropriate (user given) event name + */ + clear_perf_probe_point(&pev->point); + memcpy(&pev->point, &tmp, sizeof(tmp)); + } + } + debuginfo__delete(dinfo); if (ntevs > 0) { /* Succeeded to find trace events */ @@ -512,63 +655,6 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, return ntevs; } -/* - * Find a src file from a DWARF tag path. Prepend optional source path prefix - * and chop off leading directories that do not exist. Result is passed back as - * a newly allocated path on success. - * Return 0 if file was found and readable, -errno otherwise. - */ -static int get_real_path(const char *raw_path, const char *comp_dir, - char **new_path) -{ - const char *prefix = symbol_conf.source_prefix; - - if (!prefix) { - if (raw_path[0] != '/' && comp_dir) - /* If not an absolute path, try to use comp_dir */ - prefix = comp_dir; - else { - if (access(raw_path, R_OK) == 0) { - *new_path = strdup(raw_path); - return 0; - } else - return -errno; - } - } - - *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); - if (!*new_path) - return -ENOMEM; - - for (;;) { - sprintf(*new_path, "%s/%s", prefix, raw_path); - - if (access(*new_path, R_OK) == 0) - return 0; - - if (!symbol_conf.source_prefix) - /* In case of searching comp_dir, don't retry */ - return -errno; - - switch (errno) { - case ENAMETOOLONG: - case ENOENT: - case EROFS: - case EFAULT: - raw_path = strchr(++raw_path, '/'); - if (!raw_path) { - zfree(new_path); - return -ENOENT; - } - continue; - - default: - zfree(new_path); - return -errno; - } - } -} - #define LINEBUF_SIZE 256 #define NR_ADDITIONAL_LINES 2 @@ -620,7 +706,8 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) * Show line-range always requires debuginfo to find source file and * line number. */ -static int __show_line_range(struct line_range *lr, const char *module) +static int __show_line_range(struct line_range *lr, const char *module, + bool user) { int l = 1; struct int_node *ln; @@ -636,6 +723,11 @@ static int __show_line_range(struct line_range *lr, const char *module) return -ENOENT; ret = debuginfo__find_line_range(dinfo, lr); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_line_range(dinfo, lr, module, user); + if (!ret) + ret = debuginfo__find_line_range(dinfo, lr); + } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); @@ -648,7 +740,11 @@ static int __show_line_range(struct line_range *lr, const char *module) /* Convert source file path */ tmp = lr->path; ret = get_real_path(tmp, lr->comp_dir, &lr->path); - free(tmp); /* Free old path */ + + /* Free old path when new path is assigned */ + if (tmp != lr->path) + free(tmp); + if (ret < 0) { pr_warning("Failed to find source file path.\n"); return ret; @@ -705,7 +801,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user) ret = init_symbol_maps(user); if (ret < 0) return ret; - ret = __show_line_range(lr, module); + ret = __show_line_range(lr, module, user); exit_symbol_maps(); return ret; @@ -714,12 +810,13 @@ int show_line_range(struct line_range *lr, const char *module, bool user) static int show_available_vars_at(struct debuginfo *dinfo, struct perf_probe_event *pev, int max_vls, struct strfilter *_filter, - bool externs) + bool externs, const char *target) { char *buf; int ret, i, nvars; struct str_node *node; struct variable_list *vls = NULL, *vl; + struct perf_probe_point tmp; const char *var; buf = synthesize_perf_probe_point(&pev->point); @@ -729,6 +826,15 @@ static int show_available_vars_at(struct debuginfo *dinfo, ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, max_vls, externs); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp, target); + if (!ret) { + ret = debuginfo__find_available_vars_at(dinfo, pev, + &vls, max_vls, externs); + /* Release the old probe_point */ + clear_perf_probe_point(&tmp); + } + } if (ret <= 0) { if (ret == 0 || ret == -ENOENT) { pr_err("Failed to find the address of %s\n", buf); @@ -791,7 +897,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, for (i = 0; i < npevs && ret >= 0; i++) ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter, - externs); + externs, module); debuginfo__delete(dinfo); out: @@ -978,6 +1084,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * TODO:Group name support */ + if (!arg) + return -EINVAL; ptr = strpbrk(arg, ";=@+%"); if (ptr && *ptr == '=') { /* Event name */ @@ -1737,15 +1845,13 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, void clear_perf_probe_event(struct perf_probe_event *pev) { - struct perf_probe_point *pp = &pev->point; struct perf_probe_arg_field *field, *next; int i; free(pev->event); free(pev->group); - free(pp->file); - free(pp->function); - free(pp->lazy_line); + free(pev->target); + clear_perf_probe_point(&pev->point); for (i = 0; i < pev->nargs; i++) { free(pev->args[i].name); @@ -1803,7 +1909,7 @@ static void print_open_warning(int err, bool is_kprobe) " - please rebuild kernel with %s.\n", is_kprobe ? 'k' : 'u', config); } else if (err == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else pr_warning("Failed to open %cprobe_events: %s\n", is_kprobe ? 'k' : 'u', @@ -1814,7 +1920,7 @@ static void print_both_open_warning(int kerr, int uerr) { /* Both kprobes and uprobes are disabled, warn it. */ if (kerr == -ENOTSUP && uerr == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else if (kerr == -ENOENT && uerr == -ENOENT) pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " "or/and CONFIG_UPROBE_EVENTS.\n"); @@ -1831,13 +1937,20 @@ static int open_probe_events(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; const char *__debugfs; + const char *tracing_dir = ""; int ret; - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; + __debugfs = tracefs_find_mountpoint(); + if (__debugfs == NULL) { + tracing_dir = "tracing/"; + + __debugfs = debugfs_find_mountpoint(); + if (__debugfs == NULL) + return -ENOTSUP; + } - ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file); + ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", + __debugfs, tracing_dir, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) @@ -1853,12 +1966,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("tracing/kprobe_events", readwrite); + return open_probe_events("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("tracing/uprobe_events", readwrite); + return open_probe_events("uprobe_events", readwrite); } /* Get raw string list of current kprobe_events or uprobe_events */ @@ -1893,6 +2006,95 @@ static struct strlist *get_probe_trace_command_rawlist(int fd) return sl; } +struct kprobe_blacklist_node { + struct list_head list; + unsigned long start; + unsigned long end; + char *symbol; +}; + +static void kprobe_blacklist__delete(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + + while (!list_empty(blacklist)) { + node = list_first_entry(blacklist, + struct kprobe_blacklist_node, list); + list_del(&node->list); + free(node->symbol); + free(node); + } +} + +static int kprobe_blacklist__load(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + const char *__debugfs = debugfs_find_mountpoint(); + char buf[PATH_MAX], *p; + FILE *fp; + int ret; + + if (__debugfs == NULL) + return -ENOTSUP; + + ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs); + if (ret < 0) + return ret; + + fp = fopen(buf, "r"); + if (!fp) + return -errno; + + ret = 0; + while (fgets(buf, PATH_MAX, fp)) { + node = zalloc(sizeof(*node)); + if (!node) { + ret = -ENOMEM; + break; + } + INIT_LIST_HEAD(&node->list); + list_add_tail(&node->list, blacklist); + if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) { + ret = -EINVAL; + break; + } + p = strchr(buf, '\t'); + if (p) { + p++; + if (p[strlen(p) - 1] == '\n') + p[strlen(p) - 1] = '\0'; + } else + p = (char *)"unknown"; + node->symbol = strdup(p); + if (!node->symbol) { + ret = -ENOMEM; + break; + } + pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n", + node->start, node->end, node->symbol); + ret++; + } + if (ret < 0) + kprobe_blacklist__delete(blacklist); + fclose(fp); + + return ret; +} + +static struct kprobe_blacklist_node * +kprobe_blacklist__find_by_address(struct list_head *blacklist, + unsigned long address) +{ + struct kprobe_blacklist_node *node; + + list_for_each_entry(node, blacklist, list) { + if (node->start <= address && address <= node->end) + return node; + } + + return NULL; +} + /* Show an event */ static int show_perf_probe_event(struct perf_probe_event *pev, const char *module) @@ -2050,9 +2252,11 @@ static int write_probe_trace_event(int fd, struct probe_trace_event *tev) pr_debug("Writing event: %s\n", buf); if (!probe_event_dry_run) { ret = write(fd, buf, strlen(buf)); - if (ret <= 0) + if (ret <= 0) { + ret = -errno; pr_warning("Failed to write event: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); + } } free(buf); return ret; @@ -2096,6 +2300,27 @@ static int get_new_event_name(char *buf, size_t len, const char *base, return ret; } +/* Warn if the current kernel's uprobe implementation is old */ +static void warn_uprobe_event_compat(struct probe_trace_event *tev) +{ + int i; + char *buf = synthesize_probe_trace_command(tev); + + /* Old uprobe event doesn't support memory dereference */ + if (!tev->uprobes || tev->nargs == 0 || !buf) + goto out; + + for (i = 0; i < tev->nargs; i++) + if (strglobmatch(tev->args[i].value, "[$@+-]*")) { + pr_warning("Please upgrade your kernel to at least " + "3.14 to have access to feature %s\n", + tev->args[i].value); + break; + } +out: + free(buf); +} + static int __add_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs, bool allow_suffix) @@ -2105,6 +2330,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, char buf[64]; const char *event, *group; struct strlist *namelist; + LIST_HEAD(blacklist); + struct kprobe_blacklist_node *node; if (pev->uprobes) fd = open_uprobe_events(true); @@ -2122,11 +2349,25 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, pr_debug("Failed to get current event list.\n"); return -EIO; } + /* Get kprobe blacklist if exists */ + if (!pev->uprobes) { + ret = kprobe_blacklist__load(&blacklist); + if (ret < 0) + pr_debug("No kprobe blacklist support, ignored\n"); + } ret = 0; pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; + /* Ensure that the address is NOT blacklisted */ + node = kprobe_blacklist__find_by_address(&blacklist, + tev->point.address); + if (node) { + pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol); + continue; + } + if (pev->event) event = pev->event; else @@ -2176,31 +2417,33 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, */ allow_suffix = true; } + if (ret == -EINVAL && pev->uprobes) + warn_uprobe_event_compat(tev); - if (ret >= 0) { + /* Note that it is possible to skip all events because of blacklist */ + if (ret >= 0 && tev->event) { /* Show how to use the event. */ pr_info("\nYou can now use it in all perf tools, such as:\n\n"); pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, tev->event); } + kprobe_blacklist__delete(&blacklist); strlist__delete(namelist); close(fd); return ret; } -static char *looking_function_name; -static int num_matched_functions; - -static int probe_function_filter(struct map *map __maybe_unused, - struct symbol *sym) +static int find_probe_functions(struct map *map, char *name) { - if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && - strcmp(looking_function_name, sym->name) == 0) { - num_matched_functions++; - return 0; + int found = 0; + struct symbol *sym; + + map__for_each_symbol_by_name(map, name, sym) { + found++; } - return 1; + + return found; } #define strdup_or_goto(str, label) \ @@ -2215,20 +2458,15 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, int max_tevs, const char *target) { struct map *map = NULL; - struct kmap *kmap = NULL; struct ref_reloc_sym *reloc_sym = NULL; struct symbol *sym; - struct rb_node *nd; struct probe_trace_event *tev; struct perf_probe_point *pp = &pev->point; struct probe_trace_point *tp; + int num_matched_functions; int ret, i; - /* Init maps of given executable or kernel */ - if (pev->uprobes) - map = dso__new_map(target); - else - map = kernel_get_module_map(target); + map = get_target_map(target, pev->uprobes); if (!map) { ret = -EINVAL; goto out; @@ -2238,10 +2476,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * Load matched symbols: Since the different local symbols may have * same name but different addresses, this lists all the symbols. */ - num_matched_functions = 0; - looking_function_name = pp->function; - ret = map__load(map, probe_function_filter); - if (ret || num_matched_functions == 0) { + num_matched_functions = find_probe_functions(map, pp->function); + if (num_matched_functions == 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, target ? : "kernel"); ret = -ENOENT; @@ -2253,9 +2489,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, goto out; } - if (!pev->uprobes) { - kmap = map__kmap(map); - reloc_sym = kmap->ref_reloc_sym; + if (!pev->uprobes && !pp->retprobe) { + reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); ret = -EINVAL; @@ -2271,7 +2506,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } ret = 0; - map__for_each_symbol(map, sym, nd) { + + map__for_each_symbol_by_name(map, pp->function, sym) { tev = (*tevs) + ret; tp = &tev->point; if (ret == num_matched_functions) { @@ -2322,11 +2558,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } out: - if (map && pev->uprobes) { - /* Only when using uprobe(exec) map needs to be released */ - dso__delete(map->dso); - map__delete(map); - } + put_target_map(map, pev->uprobes); return ret; nomem_out: @@ -2367,7 +2599,7 @@ struct __event_package { }; int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, - int max_tevs, const char *target, bool force_add) + int max_tevs, bool force_add) { int i, j, ret; struct __event_package *pkgs; @@ -2391,7 +2623,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, ret = convert_to_probe_trace_events(pkgs[i].pev, &pkgs[i].tevs, max_tevs, - target); + pkgs[i].pev->target); if (ret < 0) goto end; pkgs[i].ntevs = ret; @@ -2566,8 +2798,7 @@ static struct strfilter *available_func_filter; static int filter_available_functions(struct map *map __maybe_unused, struct symbol *sym) { - if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && - strfilter__compare(available_func_filter, sym->name)) + if (strfilter__compare(available_func_filter, sym->name)) return 0; return 1; } diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index e01e9943139f..d6b783447be9 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -73,7 +73,8 @@ struct perf_probe_event { char *group; /* Group name */ struct perf_probe_point point; /* Probe point */ int nargs; /* Number of arguments */ - bool uprobes; + bool uprobes; /* Uprobe event flag */ + char *target; /* Target binary */ struct perf_probe_arg *args; /* Arguments */ }; @@ -124,8 +125,7 @@ extern int line_range__init(struct line_range *lr); extern const char *kernel_get_module_path(const char *module); extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, - int max_probe_points, const char *module, - bool force_add); + int max_probe_points, bool force_add); extern int del_perf_probe_events(struct strlist *dellist); extern int show_perf_probe_events(void); extern int show_line_range(struct line_range *lr, const char *module, diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index c7918f83b300..2a76e14db732 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -456,11 +456,12 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, return -EINVAL; } if (field->name[0] == '[') { - pr_err("Semantic error: %s is not a pointor" + pr_err("Semantic error: %s is not a pointer" " nor array.\n", varname); return -EINVAL; } - if (field->ref) { + /* While prcessing unnamed field, we don't care about this */ + if (field->ref && dwarf_diename(vr_die)) { pr_err("Semantic error: %s must be referred by '.'\n", field->name); return -EINVAL; @@ -491,6 +492,11 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } ref->offset += (long)offs; + /* If this member is unnamed, we need to reuse this field */ + if (!dwarf_diename(die_mem)) + return convert_variable_fields(die_mem, varname, field, + &ref, die_mem); + next: /* Converting next field */ if (field->next) @@ -572,10 +578,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) /* Search child die for local variables and parameters. */ if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) { /* Search again in global variables */ - if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) + if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, + 0, &vr_die)) { pr_warning("Failed to find '%s' in this function.\n", pf->pvar->var); ret = -ENOENT; + } } if (ret >= 0) ret = convert_variable(&vr_die, pf); @@ -849,11 +857,22 @@ static int probe_point_lazy_walker(const char *fname, int lineno, static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { int ret = 0; + char *fpath; if (intlist__empty(pf->lcache)) { + const char *comp_dir; + + comp_dir = cu_get_comp_dir(&pf->cu_die); + ret = get_real_path(pf->fname, comp_dir, &fpath); + if (ret < 0) { + pr_warning("Failed to find source file path.\n"); + return ret; + } + /* Matching lazy line pattern */ - ret = find_lazy_match_lines(pf->lcache, pf->fname, + ret = find_lazy_match_lines(pf->lcache, fpath, pf->pev->point.lazy_line); + free(fpath); if (ret <= 0) return ret; } @@ -915,17 +934,13 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; param->retval = find_probe_point_by_line(pf); - } else if (!dwarf_func_inline(sp_die)) { + } else if (die_is_func_instance(sp_die)) { + /* Instances always have the entry address */ + dwarf_entrypc(sp_die, &pf->addr); /* Real function */ if (pp->lazy_line) param->retval = find_probe_point_lazy(sp_die, pf); else { - if (dwarf_entrypc(sp_die, &pf->addr) != 0) { - pr_warning("Failed to get entry address of " - "%s.\n", dwarf_diename(sp_die)); - param->retval = -ENOENT; - return DWARF_CB_ABORT; - } pf->addr += pp->offset; /* TODO: Check the address in this function */ param->retval = call_probe_finder(sp_die, pf); @@ -989,8 +1004,24 @@ static int debuginfo__find_probes(struct debuginfo *dbg, int ret = 0; #if _ELFUTILS_PREREQ(0, 142) + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + /* Get the call frame information from this dwarf */ - pf->cfi = dwarf_getcfi_elf(dwarf_getelf(dbg->dbg)); + elf = dwarf_getelf(dbg->dbg); + if (elf == NULL) + return -EINVAL; + + if (gelf_getehdr(elf, &ehdr) == NULL) + return -EINVAL; + + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && + shdr.sh_type == SHT_PROGBITS) { + pf->cfi = dwarf_getcfi_elf(elf); + } else { + pf->cfi = dwarf_getcfi(dbg->dbg); + } #endif off = 0; @@ -1037,7 +1068,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, if (pp->function) ret = find_probe_point_by_func(pf); else if (pp->lazy_line) - ret = find_probe_point_lazy(NULL, pf); + ret = find_probe_point_lazy(&pf->cu_die, pf); else { pf->lno = pp->line; ret = find_probe_point_by_line(pf); @@ -1333,11 +1364,8 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; - /* Adjust address with bias */ - addr += dbg->bias; - /* Find cu die */ - if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) { + if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; @@ -1520,7 +1548,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); lr->start = lf->lno_s; lr->end = lf->lno_e; - if (dwarf_func_inline(sp_die)) + if (!die_is_func_instance(sp_die)) param->retval = die_walk_instances(sp_die, line_range_inline_cb, lf); else @@ -1607,3 +1635,61 @@ found: return (ret < 0) ? ret : lf.found; } +/* + * Find a src file from a DWARF tag path. Prepend optional source path prefix + * and chop off leading directories that do not exist. Result is passed back as + * a newly allocated path on success. + * Return 0 if file was found and readable, -errno otherwise. + */ +int get_real_path(const char *raw_path, const char *comp_dir, + char **new_path) +{ + const char *prefix = symbol_conf.source_prefix; + + if (!prefix) { + if (raw_path[0] != '/' && comp_dir) + /* If not an absolute path, try to use comp_dir */ + prefix = comp_dir; + else { + if (access(raw_path, R_OK) == 0) { + *new_path = strdup(raw_path); + return *new_path ? 0 : -ENOMEM; + } else + return -errno; + } + } + + *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); + if (!*new_path) + return -ENOMEM; + + for (;;) { + sprintf(*new_path, "%s/%s", prefix, raw_path); + + if (access(*new_path, R_OK) == 0) + return 0; + + if (!symbol_conf.source_prefix) { + /* In case of searching comp_dir, don't retry */ + zfree(new_path); + return -errno; + } + + switch (errno) { + case ENAMETOOLONG: + case ENOENT: + case EROFS: + case EFAULT: + raw_path = strchr(++raw_path, '/'); + if (!raw_path) { + zfree(new_path); + return -ENOENT; + } + continue; + + default: + zfree(new_path); + return -errno; + } + } +} diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 92590b2c7e1c..ebf8c8c81453 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -55,6 +55,10 @@ extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct variable_list **vls, int max_points, bool externs); +/* Find a src file from a DWARF tag path */ +int get_real_path(const char *raw_path, const char *comp_dir, + char **new_path); + struct probe_finder { struct perf_probe_event *pev; /* Target probe event */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 16a475a7d492..4d28624a1eca 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,13 +10,12 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c -util/hweight.c +../../lib/hweight.c util/thread_map.c util/util.c util/xyarray.c util/cgroup.c util/rblist.c util/strlist.c -../lib/api/fs/fs.c util/trace-event.c ../../lib/rbtree.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3dda85ca50c1..d906d0ad5d40 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -768,7 +768,7 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist, Py_DECREF(file); goto free_list; } - + Py_DECREF(file); } diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build new file mode 100644 index 000000000000..6516e220c247 --- /dev/null +++ b/tools/perf/util/scripting-engines/Build @@ -0,0 +1,6 @@ +libperf-$(CONFIG_LIBPERL) += trace-event-perl.o +libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o + +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default + +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 22ebc46226e7..430b5d27828e 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -214,6 +214,11 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; + case PRINT_INT_ARRAY: + define_event_symbols(event, ev_name, args->int_array.field); + define_event_symbols(event, ev_name, args->int_array.count); + define_event_symbols(event, ev_name, args->int_array.el_size); + break; case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_STRING: @@ -355,10 +360,9 @@ static void perl_process_event_generic(union perf_event *event, static void perl_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al __maybe_unused) + struct addr_location *al) { - perl_process_tracepoint(sample, evsel, thread); + perl_process_tracepoint(sample, evsel, al->thread); perl_process_event_generic(event, sample, evsel); } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d808a328f4dc..5544b8cdd1ee 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -89,7 +89,7 @@ static void handler_call_die(const char *handler_name) /* * Insert val into into the dictionary and decrement the reference counter. - * This is necessary for dictionaries since PyDict_SetItemString() does not + * This is necessary for dictionaries since PyDict_SetItemString() does not * steal a reference, as opposed to PyTuple_SetItem(). */ static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val) @@ -231,6 +231,11 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; + case PRINT_INT_ARRAY: + define_event_symbols(event, ev_name, args->int_array.field); + define_event_symbols(event, ev_name, args->int_array.count); + define_event_symbols(event, ev_name, args->int_array.el_size); + break; case PRINT_STRING: break; case PRINT_TYPE: @@ -376,7 +381,6 @@ exit: static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { struct event_format *event = evsel->tp_format; @@ -390,7 +394,7 @@ static void python_process_tracepoint(struct perf_sample *sample, int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; - const char *comm = thread__comm_str(thread); + const char *comm = thread__comm_str(al->thread); t = PyTuple_New(MAX_FIELDS); if (!t) @@ -675,7 +679,7 @@ static int python_export_sample(struct db_export *dbe, tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); tuple_set_u64(t, 2, es->al->machine->db_id); - tuple_set_u64(t, 3, es->thread->db_id); + tuple_set_u64(t, 3, es->al->thread->db_id); tuple_set_u64(t, 4, es->comm_db_id); tuple_set_u64(t, 5, es->dso_db_id); tuple_set_u64(t, 6, es->sym_db_id); @@ -761,7 +765,6 @@ static int python_process_call_return(struct call_return *cr, void *data) static void python_process_general_event(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { PyObject *handler, *t, *dict, *callchain, *dict_sample; @@ -811,7 +814,7 @@ static void python_process_general_event(struct perf_sample *sample, pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", - PyString_FromString(thread__comm_str(thread))); + PyString_FromString(thread__comm_str(al->thread))); if (al->map) { pydict_set_item_string_decref(dict, "dso", PyString_FromString(al->map->dso->name)); @@ -838,22 +841,20 @@ exit: static void python_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { struct tables *tables = &tables_global; switch (evsel->attr.type) { case PERF_TYPE_TRACEPOINT: - python_process_tracepoint(sample, evsel, thread, al); + python_process_tracepoint(sample, evsel, al); break; /* Reserve for future process_hw/sw/raw APIs */ default: if (tables->db_export_mode) - db_export__sample(&tables->dbe, event, sample, evsel, - thread, al); + db_export__sample(&tables->dbe, event, sample, evsel, al); else - python_process_general_event(sample, evsel, thread, al); + python_process_general_event(sample, evsel, al); } } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5f0e05a76c05..0c74012575ac 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -16,6 +16,12 @@ #include "perf_regs.h" #include "asm/bug.h" +static int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, u64 file_offset); + static int perf_session__open(struct perf_session *session) { struct perf_data_file *file = session->file; @@ -86,6 +92,23 @@ static void perf_session__set_comm_exec(struct perf_session *session) machines__set_comm_exec(&session->machines, comm_exec); } +static int ordered_events__deliver_event(struct ordered_events *oe, + struct ordered_event *event) +{ + struct perf_sample sample; + struct perf_session *session = container_of(oe, struct perf_session, + ordered_events); + int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); + + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + return machines__deliver_event(&session->machines, session->evlist, event->event, + &sample, session->tool, event->file_offset); +} + struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { @@ -95,8 +118,9 @@ struct perf_session *perf_session__new(struct perf_data_file *file, goto out; session->repipe = repipe; - ordered_events__init(&session->ordered_events); + session->tool = tool; machines__init(&session->machines); + ordered_events__init(&session->ordered_events, ordered_events__deliver_event); if (file) { if (perf_data_file__open(file)) @@ -138,11 +162,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file, return NULL; } -static void perf_session__delete_dead_threads(struct perf_session *session) -{ - machine__delete_dead_threads(&session->machines.host); -} - static void perf_session__delete_threads(struct perf_session *session) { machine__delete_threads(&session->machines.host); @@ -167,7 +186,6 @@ static void perf_session_env__delete(struct perf_session_env *env) void perf_session__delete(struct perf_session *session) { perf_session__destroy_kernel_maps(session); - perf_session__delete_dead_threads(session); perf_session__delete_threads(session); perf_session_env__delete(&session->header.env); machines__exit(&session->machines); @@ -215,10 +233,17 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused, return 0; } +static int process_build_id_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) + struct ordered_events *oe __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -226,7 +251,7 @@ static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, static int process_finished_round(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); + struct ordered_events *oe); static int process_id_index_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, @@ -264,7 +289,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) - tool->build_id = process_finished_round_stub; + tool->build_id = process_build_id_stub; if (tool->finished_round == NULL) { if (tool->ordered_events) tool->finished_round = process_finished_round; @@ -274,7 +299,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->id_index == NULL) tool->id_index = process_id_index_stub; } - + static void swap_sample_id_all(union perf_event *event, void *data) { void *end = (void *) event + event->header.size; @@ -514,54 +539,80 @@ static perf_event__swap_op perf_event__swap_ops[] = { * Flush every events below timestamp 7 * etc... */ -static int process_finished_round(struct perf_tool *tool, +static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *session) + struct ordered_events *oe) { - return ordered_events__flush(session, tool, OE_FLUSH__ROUND); + return ordered_events__flush(oe, OE_FLUSH__ROUND); } -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset) +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset) { - struct ordered_events *oe = &s->ordered_events; - u64 timestamp = sample->time; - struct ordered_event *new; - - if (!timestamp || timestamp == ~0ULL) - return -ETIME; + return ordered_events__queue(&s->ordered_events, event, sample, file_offset); +} - if (timestamp < oe->last_flush) { - pr_oe_time(timestamp, "out of order event\n"); - pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", - oe->last_flush_type); +static void callchain__lbr_callstack_printf(struct perf_sample *sample) +{ + struct ip_callchain *callchain = sample->callchain; + struct branch_stack *lbr_stack = sample->branch_stack; + u64 kernel_callchain_nr = callchain->nr; + unsigned int i; - s->stats.nr_unordered_events++; + for (i = 0; i < kernel_callchain_nr; i++) { + if (callchain->ips[i] == PERF_CONTEXT_USER) + break; } - new = ordered_events__new(oe, timestamp, event); - if (!new) { - ordered_events__flush(s, tool, OE_FLUSH__HALF); - new = ordered_events__new(oe, timestamp, event); - } + if ((i != kernel_callchain_nr) && lbr_stack->nr) { + u64 total_nr; + /* + * LBR callstack can only get user call chain, + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER. + * + * The user call chain is stored in LBR registers. + * LBR are pair registers. The caller is stored + * in "from" register, while the callee is stored + * in "to" register. + * For example, there is a call stack + * "A"->"B"->"C"->"D". + * The LBR registers will recorde like + * "C"->"D", "B"->"C", "A"->"B". + * So only the first "to" register and all "from" + * registers are needed to construct the whole stack. + */ + total_nr = i + 1 + lbr_stack->nr + 1; + kernel_callchain_nr = i + 1; - if (!new) - return -ENOMEM; + printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr); - new->file_offset = file_offset; - return 0; + for (i = 0; i < kernel_callchain_nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + i, callchain->ips[i]); + + printf("..... %2d: %016" PRIx64 "\n", + (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + for (i = 0; i < lbr_stack->nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + } } -static void callchain__printf(struct perf_sample *sample) +static void callchain__printf(struct perf_evsel *evsel, + struct perf_sample *sample) { unsigned int i; + struct ip_callchain *callchain = sample->callchain; + + if (has_branch_callstack(evsel)) + callchain__lbr_callstack_printf(sample); - printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr); + printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); - for (i = 0; i < sample->callchain->nr; i++) + for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - i, sample->callchain->ips[i]); + i, callchain->ips[i]); } static void branch_stack__printf(struct perf_sample *sample) @@ -636,14 +687,14 @@ static void stack_user__printf(struct stack_dump *dump) dump->size, dump->offset); } -static void perf_session__print_tstamp(struct perf_session *session, +static void perf_evlist__print_tstamp(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = __perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = __perf_evlist__combined_sample_type(evlist); if (event->header.type != PERF_RECORD_SAMPLE && - !perf_evlist__sample_id_all(session->evlist)) { + !perf_evlist__sample_id_all(evlist)) { fputs("-1 -1 ", stdout); return; } @@ -685,7 +736,7 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format) sample->read.one.id, sample->read.one.value); } -static void dump_event(struct perf_session *session, union perf_event *event, +static void dump_event(struct perf_evlist *evlist, union perf_event *event, u64 file_offset, struct perf_sample *sample) { if (!dump_trace) @@ -697,7 +748,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, trace_event(event); if (sample) - perf_session__print_tstamp(session, event, sample); + perf_evlist__print_tstamp(evlist, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, event->header.size, perf_event__name(event->header.type)); @@ -718,9 +769,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) - callchain__printf(sample); + callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) + if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel)) branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) @@ -745,8 +796,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } -static struct machine * - perf_session__find_machine_for_cpumode(struct perf_session *session, +static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { @@ -764,26 +814,24 @@ static struct machine * else pid = sample->pid; - machine = perf_session__find_machine(session, pid); + machine = machines__find(machines, pid); if (!machine) - machine = perf_session__findnew_machine(session, - DEFAULT_GUEST_KERNEL_ID); + machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } - return &session->machines.host; + return &machines->host; } -static int deliver_sample_value(struct perf_session *session, +static int deliver_sample_value(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, struct machine *machine) { - struct perf_sample_id *sid; + struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); - sid = perf_evlist__id2sid(session->evlist, v->id); if (sid) { sample->id = v->id; sample->period = v->value - sid->period; @@ -791,14 +839,14 @@ static int deliver_sample_value(struct perf_session *session, } if (!sid || sid->evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } return tool->sample(tool, event, sample, sid->evsel, machine); } -static int deliver_sample_group(struct perf_session *session, +static int deliver_sample_group(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -808,7 +856,7 @@ static int deliver_sample_group(struct perf_session *session, u64 i; for (i = 0; i < sample->read.group.nr; i++) { - ret = deliver_sample_value(session, tool, event, sample, + ret = deliver_sample_value(evlist, tool, event, sample, &sample->read.group.values[i], machine); if (ret) @@ -819,7 +867,7 @@ static int deliver_sample_group(struct perf_session *session, } static int -perf_session__deliver_sample(struct perf_session *session, + perf_evlist__deliver_sample(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -836,41 +884,40 @@ perf_session__deliver_sample(struct perf_session *session, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) - return deliver_sample_group(session, tool, event, sample, + return deliver_sample_group(evlist, tool, event, sample, machine); else - return deliver_sample_value(session, tool, event, sample, + return deliver_sample_value(evlist, tool, event, sample, &sample->read.one, machine); } -int perf_session__deliver_event(struct perf_session *session, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset) +static int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, u64 file_offset) { struct perf_evsel *evsel; struct machine *machine; - dump_event(session, event, file_offset, sample); + dump_event(evlist, event, file_offset, sample); - evsel = perf_evlist__id2evsel(session->evlist, sample->id); + evsel = perf_evlist__id2evsel(evlist, sample->id); - machine = perf_session__find_machine_for_cpumode(session, event, - sample); + machine = machines__find_for_cpumode(machines, event, sample); switch (event->header.type) { case PERF_RECORD_SAMPLE: dump_sample(evsel, event, sample); if (evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } if (machine == NULL) { - ++session->stats.nr_unprocessable_samples; + ++evlist->stats.nr_unprocessable_samples; return 0; } - return perf_session__deliver_sample(session, tool, event, - sample, evsel, machine); + return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_MMAP2: @@ -883,7 +930,7 @@ int perf_session__deliver_event(struct perf_session *session, return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: if (tool->lost == perf_event__process_lost) - session->stats.total_lost += event->lost.lost; + evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); case PERF_RECORD_READ: return tool->read(tool, event, sample, evsel, machine); @@ -892,20 +939,21 @@ int perf_session__deliver_event(struct perf_session *session, case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); default: - ++session->stats.nr_unknown_events; + ++evlist->stats.nr_unknown_events; return -1; } } static s64 perf_session__process_user_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, u64 file_offset) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); int err; - dump_event(session, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, NULL); /* These events are processed right away */ switch (event->header.type) { @@ -929,7 +977,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_HEADER_BUILD_ID: return tool->build_id(tool, event, session); case PERF_RECORD_FINISHED_ROUND: - return tool->finished_round(tool, event, session); + return tool->finished_round(tool, event, oe); case PERF_RECORD_ID_INDEX: return tool->id_index(tool, event, session); default: @@ -939,15 +987,17 @@ static s64 perf_session__process_user_event(struct perf_session *session, int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool) + struct perf_sample *sample) { - events_stats__inc(&session->stats, event->header.type); + struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->tool; + + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, 0); + return perf_session__process_user_event(session, event, 0); - return perf_session__deliver_event(session, event, sample, tool, 0); + return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0); } static void event_swap(union perf_event *event, bool sample_id_all) @@ -1015,40 +1065,39 @@ out_parse_sample: } static s64 perf_session__process_event(struct perf_session *session, - union perf_event *event, - struct perf_tool *tool, - u64 file_offset) + union perf_event *event, u64 file_offset) { + struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->tool; struct perf_sample sample; int ret; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(session->evlist)); + event_swap(event, perf_evlist__sample_id_all(evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; - events_stats__inc(&session->stats, event->header.type); + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, file_offset); + return perf_session__process_user_event(session, event, file_offset); /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample); + ret = perf_evlist__parse_sample(evlist, event, &sample); if (ret) return ret; if (tool->ordered_events) { - ret = perf_session_queue_event(session, event, tool, &sample, - file_offset); + ret = perf_session__queue_event(session, event, &sample, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return machines__deliver_event(&session->machines, evlist, event, + &sample, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) @@ -1076,54 +1125,57 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se return thread; } -static void perf_session__warn_about_errors(const struct perf_session *session, - const struct perf_tool *tool) +static void perf_session__warn_about_errors(const struct perf_session *session) { - if (tool->lost == perf_event__process_lost && - session->stats.nr_events[PERF_RECORD_LOST] != 0) { + const struct events_stats *stats = &session->evlist->stats; + const struct ordered_events *oe = &session->ordered_events; + + if (session->tool->lost == perf_event__process_lost && + stats->nr_events[PERF_RECORD_LOST] != 0) { ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n", - session->stats.nr_events[0], - session->stats.nr_events[PERF_RECORD_LOST]); + stats->nr_events[0], + stats->nr_events[PERF_RECORD_LOST]); } - if (session->stats.nr_unknown_events != 0) { + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_unknown_events); + stats->nr_unknown_events); } - if (session->stats.nr_unknown_id != 0) { + if (stats->nr_unknown_id != 0) { ui__warning("%u samples with id not present in the header\n", - session->stats.nr_unknown_id); + stats->nr_unknown_id); } - if (session->stats.nr_invalid_chains != 0) { - ui__warning("Found invalid callchains!\n\n" - "%u out of %u events were discarded for this reason.\n\n" - "Consider reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_invalid_chains, - session->stats.nr_events[PERF_RECORD_SAMPLE]); - } + if (stats->nr_invalid_chains != 0) { + ui__warning("Found invalid callchains!\n\n" + "%u out of %u events were discarded for this reason.\n\n" + "Consider reporting to linux-kernel@vger.kernel.org.\n\n", + stats->nr_invalid_chains, + stats->nr_events[PERF_RECORD_SAMPLE]); + } - if (session->stats.nr_unprocessable_samples != 0) { + if (stats->nr_unprocessable_samples != 0) { ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n", - session->stats.nr_unprocessable_samples); + stats->nr_unprocessable_samples); } - if (session->stats.nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); + if (oe->nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); } volatile int session_done; -static int __perf_session__process_pipe_events(struct perf_session *session, - struct perf_tool *tool) +static int __perf_session__process_pipe_events(struct perf_session *session) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); union perf_event *event; uint32_t size, cur_size = 0; @@ -1187,7 +1239,7 @@ more: } } - if ((skip = perf_session__process_event(session, event, tool, head)) < 0) { + if ((skip = perf_session__process_event(session, event, head)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", head, event->header.size, event->header.type); err = -EINVAL; @@ -1203,10 +1255,10 @@ more: goto more; done: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: free(buf); - perf_session__warn_about_errors(session, tool); + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); return err; } @@ -1251,10 +1303,12 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif -int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, - u64 file_size, struct perf_tool *tool) +static int __perf_session__process_events(struct perf_session *session, + u64 data_offset, u64 data_size, + u64 file_size) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; @@ -1323,8 +1377,7 @@ more: size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, tool, file_pos)) - < 0) { + (skip = perf_session__process_event(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", file_offset + head, event->header.size, event->header.type); @@ -1348,17 +1401,16 @@ more: out: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session, tool); + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); session->one_mmap = false; return err; } -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool) +int perf_session__process_events(struct perf_session *session) { u64 size = perf_data_file__size(session->file); int err; @@ -1369,10 +1421,9 @@ int perf_session__process_events(struct perf_session *session, if (!perf_data_file__is_pipe(session->file)) err = __perf_session__process_events(session, session->header.data_offset, - session->header.data_size, - size, tool); + session->header.data_size, size); else - err = __perf_session__process_pipe_events(session, tool); + err = __perf_session__process_pipe_events(session); return err; } @@ -1415,6 +1466,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps, for (i = 0; i < MAP__NR_TYPES; ++i) { struct kmap *kmap = map__kmap(maps[i]); + + if (!kmap) + continue; kmap->ref_reloc_sym = ref; } @@ -1436,7 +1490,7 @@ size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) { size_t ret = fprintf(fp, "Aggregated stats:\n"); - ret += events_stats__fprintf(&session->stats, fp); + ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index dc26ebf60fe4..d5fa7b7916ef 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -20,13 +20,13 @@ struct perf_session { struct machines machines; struct perf_evlist *evlist; struct trace_event tevent; - struct events_stats stats; bool repipe; bool one_mmap; void *one_mmap_addr; u64 one_mmap_offset; struct ordered_events ordered_events; struct perf_data_file *file; + struct perf_tool *tool; }; #define PRINT_IP_OPT_IP (1<<0) @@ -49,23 +49,13 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, union perf_event **event_ptr, struct perf_sample *sample); -int __perf_session__process_events(struct perf_session *session, - u64 data_offset, u64 data_size, u64 size, - struct perf_tool *tool); -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool); +int perf_session__process_events(struct perf_session *session); -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset); +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); -int perf_session__deliver_event(struct perf_session *session, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset); - int perf_session__resolve_callchain(struct perf_session *session, struct perf_evsel *evsel, struct thread *thread, @@ -129,8 +119,7 @@ extern volatile int session_done; int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool); + struct perf_sample *sample); int perf_event__process_id_index(struct perf_tool *tool, union perf_event *event, diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index d0aee4b9dfd4..1833103768cb 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,7 +25,7 @@ cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') -libapikfs = getenv('LIBAPIKFS') +libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 9139dda9f9a3..4593f36ecc4c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1304,6 +1304,37 @@ static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return hse->se->se_snprintf(he, hpp->buf, hpp->size, len); } +static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + return hse->se->se_cmp(a, b); +} + +static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp; + return collapse_fn(a, b); +} + +static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_sort_entry *hse; + int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *); + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + sort_fn = hse->se->se_sort ?: hse->se->se_cmp; + return sort_fn(a, b); +} + static struct hpp_sort_entry * __sort_dimension__alloc_hpp(struct sort_dimension *sd) { @@ -1322,9 +1353,9 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.entry = __sort__hpp_entry; hse->hpp.color = NULL; - hse->hpp.cmp = sd->entry->se_cmp; - hse->hpp.collapse = sd->entry->se_collapse ? : sd->entry->se_cmp; - hse->hpp.sort = sd->entry->se_sort ? : hse->hpp.collapse; + hse->hpp.cmp = __sort__hpp_cmp; + hse->hpp.collapse = __sort__hpp_collapse; + hse->hpp.sort = __sort__hpp_sort; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); @@ -1432,6 +1463,15 @@ int sort_dimension__add(const char *tok) sort__has_parent = 1; } else if (sd->entry == &sort_sym) { sort__has_sym = 1; + /* + * perf diff displays the performance difference amongst + * two or more perf.data files. Those files could come + * from different binaries. So we should not compare + * their ips, but the name of symbol. + */ + if (sort__mode == SORT_MODE__DIFF) + sd->entry->se_collapse = sort__sym_sort; + } else if (sd->entry == &sort_dso) { sort__has_dso = 1; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index c03e4ff8beff..846036a921dc 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -44,6 +44,7 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern enum sort_type sort__first_dimension; +extern const char default_mem_sort_order[]; struct he_stat { u64 period; @@ -102,7 +103,6 @@ struct hist_entry { bool init_have_children; char level; - bool used; u8 filtered; char *srcline; struct symbol *parent; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 06fcd1bf98b6..a7ab6063e038 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -11,6 +11,11 @@ #include <symbol/kallsyms.h> #include "debug.h" +#ifndef EM_AARCH64 +#define EM_AARCH64 183 /* ARM 64 bit */ +#endif + + #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT extern char *cplus_demangle(const char *, int); @@ -69,6 +74,10 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +#ifndef STT_GNU_IFUNC +#define STT_GNU_IFUNC 10 +#endif + static inline int elf_sym__is_function(const GElf_Sym *sym) { return (elf_sym__type(sym) == STT_FUNC || @@ -570,29 +579,37 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) static int decompress_kmodule(struct dso *dso, const char *name, enum dso_binary_type type) { - int fd; - const char *ext = strrchr(name, '.'); + int fd = -1; char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; + struct kmod_path m; - if ((type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && - type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP) || - type != dso->symtab_type) + if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && + type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && + type != DSO_BINARY_TYPE__BUILD_ID_CACHE) return -1; - if (!ext || !is_supported_compression(ext + 1)) + if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + name = dso->long_name; + + if (kmod_path__parse_ext(&m, name) || !m.comp) return -1; fd = mkstemp(tmpbuf); - if (fd < 0) - return -1; + if (fd < 0) { + dso->load_errno = errno; + goto out; + } - if (!decompress_to_file(ext + 1, name, fd)) { + if (!decompress_to_file(m.ext, name, fd)) { + dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; close(fd); fd = -1; } unlink(tmpbuf); +out: + free(m.ext); return fd; } @@ -621,37 +638,49 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, Elf *elf; int fd; - if (dso__needs_decompress(dso)) + if (dso__needs_decompress(dso)) { fd = decompress_kmodule(dso, name, type); - else + if (fd < 0) + return -1; + } else { fd = open(name, O_RDONLY); - - if (fd < 0) - return -1; + if (fd < 0) { + dso->load_errno = errno; + return -1; + } + } elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { + dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; pr_debug("%s: cannot get elf header.\n", __func__); goto out_elf_end; } - if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) { + dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR; goto out_elf_end; + } /* Always reject images with a mismatched build-id: */ if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) { + dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID; goto out_elf_end; + } - if (!dso__build_id_equal(dso, build_id)) + if (!dso__build_id_equal(dso, build_id)) { + dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; goto out_elf_end; + } } ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64); @@ -687,8 +716,10 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, } ss->name = strdup(name); - if (!ss->name) + if (!ss->name) { + dso->load_errno = errno; goto out_elf_end; + } ss->elf = elf; ss->fd = fd; @@ -745,6 +776,7 @@ int dso__load_sym(struct dso *dso, struct map *map, symbol_filter_t filter, int kmodule) { struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = dso; Elf_Data *symstrs, *secstrs; @@ -760,6 +792,9 @@ int dso__load_sym(struct dso *dso, struct map *map, int nr = 0; bool remap_kernel = false, adjust_kernel_syms = false; + if (kmap && !kmaps) + return -1; + dso->symtab_type = syms_ss->type; dso->is_64_bit = syms_ss->is_64_bit; dso->rel = syms_ss->ehdr.e_type == ET_REL; @@ -856,10 +891,9 @@ int dso__load_sym(struct dso *dso, struct map *map, /* Reject ARM ELF "mapping symbols": these aren't unique and * don't identify functions, so will confuse the profile * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) + if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) { + if (elf_name[0] == '$' && strchr("adtx", elf_name[1]) + && (elf_name[2] == '\0' || elf_name[2] == '.')) continue; } @@ -928,8 +962,10 @@ int dso__load_sym(struct dso *dso, struct map *map, map->map_ip = map__map_ip; map->unmap_ip = map__unmap_ip; /* Ensure maps are correctly ordered */ - map_groups__remove(kmap->kmaps, map); - map_groups__insert(kmap->kmaps, map); + if (kmaps) { + map_groups__remove(kmaps, map); + map_groups__insert(kmaps, map); + } } /* @@ -953,7 +989,7 @@ int dso__load_sym(struct dso *dso, struct map *map, snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); - curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); + curr_map = map_groups__find_by_name(kmaps, map->type, dso_name); if (curr_map == NULL) { u64 start = sym.st_value; @@ -983,7 +1019,7 @@ int dso__load_sym(struct dso *dso, struct map *map, curr_map->unmap_ip = identity__map_ip; } curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmap->kmaps, curr_map); + map_groups__insert(kmaps, curr_map); /* * The new DSO should go to the kernel DSOS */ @@ -1037,14 +1073,15 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_duplicate(&dso->symbols[map->type]); + if (!symbol_conf.allow_aliases) + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); if (kmap) { /* * We need to fixup this here too because we create new * maps here, for things like vsyscall sections. */ - __map_groups__fixup_end(kmap->kmaps, map->type); + __map_groups__fixup_end(kmaps, map->type); } } err = nr; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index d7efb03b3f9a..fd8477cacf88 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -246,13 +246,12 @@ out: return ret; } -int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, - const char *name, +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { int fd = open(name, O_RDONLY); if (fd < 0) - return -1; + goto out_errno; ss->name = strdup(name); if (!ss->name) @@ -264,6 +263,8 @@ int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, return 0; out_close: close(fd); +out_errno: + dso->load_errno = errno; return -1; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index c24c5b83156c..201f6c4ca738 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -15,6 +15,7 @@ #include "machine.h" #include "symbol.h" #include "strlist.h" +#include "intlist.h" #include "header.h" #include <elf.h> @@ -396,6 +397,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, const char *name) { struct rb_node *n; + struct symbol_name_rb_node *s; if (symbols == NULL) return NULL; @@ -403,7 +405,6 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, n = symbols->rb_node; while (n) { - struct symbol_name_rb_node *s; int cmp; s = rb_entry(n, struct symbol_name_rb_node, rb_node); @@ -414,10 +415,24 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, else if (cmp > 0) n = n->rb_right; else - return &s->sym; + break; } - return NULL; + if (n == NULL) + return NULL; + + /* return first symbol that has same name (if any) */ + for (n = rb_prev(n); n; n = rb_prev(n)) { + struct symbol_name_rb_node *tmp; + + tmp = rb_entry(n, struct symbol_name_rb_node, rb_node); + if (strcmp(tmp->sym.name, s->sym.name)) + break; + + s = tmp; + } + + return &s->sym; } struct symbol *dso__find_symbol(struct dso *dso, @@ -436,6 +451,17 @@ struct symbol *dso__next_symbol(struct symbol *sym) return symbols__next(sym); } +struct symbol *symbol__next_by_name(struct symbol *sym) +{ + struct symbol_name_rb_node *s = container_of(sym, struct symbol_name_rb_node, sym); + struct rb_node *n = rb_next(&s->rb_node); + + return n ? &rb_entry(n, struct symbol_name_rb_node, rb_node)->sym : NULL; +} + + /* + * Teturns first symbol that matched with @name. + */ struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name) { @@ -604,13 +630,16 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename, static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, symbol_filter_t filter) { - struct map_groups *kmaps = map__kmap(map)->kmaps; + struct map_groups *kmaps = map__kmaps(map); struct map *curr_map; struct symbol *pos; int count = 0, moved = 0; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); + if (!kmaps) + return -1; + while (next) { char *module; @@ -656,15 +685,20 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, symbol_filter_t filter) { - struct map_groups *kmaps = map__kmap(map)->kmaps; - struct machine *machine = kmaps->machine; + struct map_groups *kmaps = map__kmaps(map); + struct machine *machine; struct map *curr_map = map; struct symbol *pos; - int count = 0, moved = 0; + int count = 0, moved = 0; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); int kernel_range = 0; + if (!kmaps) + return -1; + + machine = kmaps->machine; + while (next) { char *module; @@ -999,9 +1033,12 @@ static bool filename_from_kallsyms_filename(char *filename, static int validate_kcore_modules(const char *kallsyms_filename, struct map *map) { - struct map_groups *kmaps = map__kmap(map)->kmaps; + struct map_groups *kmaps = map__kmaps(map); char modules_filename[PATH_MAX]; + if (!kmaps) + return -EINVAL; + if (!filename_from_kallsyms_filename(modules_filename, "modules", kallsyms_filename)) return -EINVAL; @@ -1017,6 +1054,9 @@ static int validate_kcore_addresses(const char *kallsyms_filename, { struct kmap *kmap = map__kmap(map); + if (!kmap) + return -EINVAL; + if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) { u64 start; @@ -1055,8 +1095,8 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { - struct map_groups *kmaps = map__kmap(map)->kmaps; - struct machine *machine = kmaps->machine; + struct map_groups *kmaps = map__kmaps(map); + struct machine *machine; struct kcore_mapfn_data md; struct map *old_map, *new_map, *replacement_map = NULL; bool is_64_bit; @@ -1064,6 +1104,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map, char kcore_filename[PATH_MAX]; struct symbol *sym; + if (!kmaps) + return -EINVAL; + + machine = kmaps->machine; + /* This function requires that the map is the kernel map */ if (map != machine->vmlinux_maps[map->type]) return -EINVAL; @@ -1176,6 +1221,9 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) struct kmap *kmap = map__kmap(map); u64 addr; + if (!kmap) + return -1; + if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) return 0; @@ -1834,6 +1882,20 @@ int setup_list(struct strlist **list, const char *list_str, return 0; } +int setup_intlist(struct intlist **list, const char *list_str, + const char *list_name) +{ + if (list_str == NULL) + return 0; + + *list = intlist__new(list_str); + if (!*list) { + pr_err("problems parsing %s list\n", list_name); + return -1; + } + return 0; +} + static bool symbol__read_kptr_restrict(void) { bool value = false; @@ -1884,9 +1946,17 @@ int symbol__init(struct perf_session_env *env) symbol_conf.comm_list_str, "comm") < 0) goto out_free_dso_list; + if (setup_intlist(&symbol_conf.pid_list, + symbol_conf.pid_list_str, "pid") < 0) + goto out_free_comm_list; + + if (setup_intlist(&symbol_conf.tid_list, + symbol_conf.tid_list_str, "tid") < 0) + goto out_free_pid_list; + if (setup_list(&symbol_conf.sym_list, symbol_conf.sym_list_str, "symbol") < 0) - goto out_free_comm_list; + goto out_free_tid_list; /* * A path to symbols of "/" is identical to "" @@ -1905,6 +1975,10 @@ int symbol__init(struct perf_session_env *env) symbol_conf.initialized = true; return 0; +out_free_tid_list: + intlist__delete(symbol_conf.tid_list); +out_free_pid_list: + intlist__delete(symbol_conf.pid_list); out_free_comm_list: strlist__delete(symbol_conf.comm_list); out_free_dso_list: @@ -1919,6 +1993,8 @@ void symbol__exit(void) strlist__delete(symbol_conf.sym_list); strlist__delete(symbol_conf.dso_list); strlist__delete(symbol_conf.comm_list); + intlist__delete(symbol_conf.tid_list); + intlist__delete(symbol_conf.pid_list); vmlinux_path__exit(); symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; symbol_conf.initialized = false; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 9d602e9c6f59..09561500164a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -78,6 +78,7 @@ static inline size_t symbol__size(const struct symbol *sym) } struct strlist; +struct intlist; struct symbol_conf { unsigned short priv_size; @@ -87,6 +88,7 @@ struct symbol_conf { ignore_vmlinux_buildid, show_kernel_path, use_modules, + allow_aliases, sort_by_name, show_nr_samples, show_total_period, @@ -114,6 +116,8 @@ struct symbol_conf { const char *guestmount; const char *dso_list_str, *comm_list_str, + *pid_list_str, + *tid_list_str, *sym_list_str, *col_width_list_str; struct strlist *dso_list, @@ -123,6 +127,8 @@ struct symbol_conf { *dso_to_list, *sym_from_list, *sym_to_list; + struct intlist *pid_list, + *tid_list; const char *symfs; }; @@ -231,6 +237,7 @@ struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, const char *name); +struct symbol *symbol__next_by_name(struct symbol *sym); struct symbol *dso__first_symbol(struct dso *dso, enum map_type type); struct symbol *dso__next_symbol(struct symbol *sym); @@ -293,5 +300,7 @@ int compare_proc_modules(const char *from, const char *to); int setup_list(struct strlist **list, const char *list_str, const char *list_name); +int setup_intlist(struct intlist **list, const char *list_str, + const char *list_name); #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index e74c5963dc7a..a53603b27e52 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -123,11 +123,8 @@ int target__strerror(struct target *target, int errnum, if (errnum >= 0) { const char *err = strerror_r(errnum, buf, buflen); - if (err != buf) { - size_t len = strlen(err); - memcpy(buf, err, min(buflen - 1, len)); - *(buf + min(buflen - 1, len)) = '\0'; - } + if (err != buf) + scnprintf(buf, buflen, "%s", err); return 0; } diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9ebc8b1f9be5..1c8fbc9588c5 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -82,6 +82,20 @@ void thread__delete(struct thread *thread) free(thread); } +struct thread *thread__get(struct thread *thread) +{ + ++thread->refcnt; + return thread; +} + +void thread__put(struct thread *thread) +{ + if (thread && --thread->refcnt == 0) { + list_del_init(&thread->node); + thread__delete(thread); + } +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) @@ -192,7 +206,6 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) err = thread__set_comm(thread, comm, timestamp); if (err) return err; - thread->comm_set = true; } thread->ppid = parent->tid; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 160fd066a7d1..9b8a54dc34a8 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -7,6 +7,7 @@ #include <sys/types.h> #include "symbol.h" #include <strlist.h> +#include <intlist.h> struct thread_stack; @@ -20,6 +21,7 @@ struct thread { pid_t tid; pid_t ppid; int cpu; + int refcnt; char shortname[3]; bool comm_set; bool dead; /* if set thread has exited */ @@ -37,6 +39,18 @@ struct comm; struct thread *thread__new(pid_t pid, pid_t tid); int thread__init_map_groups(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); + +struct thread *thread__get(struct thread *thread); +void thread__put(struct thread *thread); + +static inline void __thread__zput(struct thread **thread) +{ + thread__put(*thread); + *thread = NULL; +} + +#define thread__zput(thread) __thread__zput(&thread) + static inline void thread__exited(struct thread *thread) { thread->dead = true; @@ -87,6 +101,16 @@ static inline bool thread__is_filtered(struct thread *thread) return true; } + if (symbol_conf.pid_list && + !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) { + return true; + } + + if (symbol_conf.tid_list && + !intlist__has_entry(symbol_conf.tid_list, thread->tid)) { + return true; + } + return false; } diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index bb2708bbfaca..51d9e56c0f84 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -10,6 +10,7 @@ struct perf_evsel; struct perf_sample; struct perf_tool; struct machine; +struct ordered_events; typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -25,6 +26,9 @@ typedef int (*event_attr_op)(struct perf_tool *tool, typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, struct perf_session *session); +typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, + struct ordered_events *oe); + struct perf_tool { event_sample sample, read; @@ -38,8 +42,8 @@ struct perf_tool { unthrottle; event_attr_op attr; event_op2 tracing_data; - event_op2 finished_round, - build_id, + event_oe finished_round; + event_op2 build_id, id_index; bool ordered_events; bool ordering_requires_timestamps; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c36636fd825b..25d6c737be3e 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -112,8 +112,8 @@ unsigned long long read_size(struct event_format *event, void *ptr, int size) return pevent_read_number(event->pevent, ptr, size); } -void event_format__print(struct event_format *event, - int cpu, void *data, int size) +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp) { struct pevent_record record; struct trace_seq s; @@ -125,10 +125,16 @@ void event_format__print(struct event_format *event, trace_seq_init(&s); pevent_event_info(&s, event, &record); - trace_seq_do_printf(&s); + trace_seq_do_fprintf(&s, fp); trace_seq_destroy(&s); } +void event_format__print(struct event_format *event, + int cpu, void *data, int size) +{ + return event_format__fprintf(event, cpu, data, size, stdout); +} + void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 5c9bdd1591a9..9df61059a85d 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -43,7 +43,6 @@ static int stop_script_unsupported(void) static void process_event_unsupported(union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct thread *thread __maybe_unused, struct addr_location *al __maybe_unused) { } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 52aaa19e1eb1..d5168f0be4ec 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -23,6 +23,9 @@ trace_event__tp_format(const char *sys, const char *name); int bigendian(void); +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp); + void event_format__print(struct event_format *event, int cpu, void *data, int size); @@ -69,8 +72,7 @@ struct scripting_ops { void (*process_event) (union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al); + struct addr_location *al); int (*generate_script) (struct pevent *pevent, const char *outfile); }; diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 371219a6daf1..7b09a443a280 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -185,6 +185,28 @@ static u64 elf_section_offset(int fd, const char *name) return offset; } +#ifndef NO_LIBUNWIND_DEBUG_FRAME +static int elf_is_exec(int fd, const char *name) +{ + Elf *elf; + GElf_Ehdr ehdr; + int retval = 0; + + elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); + if (elf == NULL) + return 0; + if (gelf_getehdr(elf, &ehdr) == NULL) + goto out; + + retval = (ehdr.e_type == ET_EXEC); + +out: + elf_end(elf); + pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval); + return retval; +} +#endif + struct table_entry { u32 start_ip_offset; u32 fde_offset; @@ -244,14 +266,17 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, u64 *fde_count) { int ret = -EINVAL, fd; - u64 offset; + u64 offset = dso->data.eh_frame_hdr_offset; - fd = dso__data_fd(dso, machine); - if (fd < 0) - return -EINVAL; + if (offset == 0) { + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; - /* Check the .eh_frame section for unwinding info */ - offset = elf_section_offset(fd, ".eh_frame_hdr"); + /* Check the .eh_frame section for unwinding info */ + offset = elf_section_offset(fd, ".eh_frame_hdr"); + dso->data.eh_frame_hdr_offset = offset; + } if (offset) ret = unwind_spec_ehframe(dso, machine, offset, @@ -265,14 +290,20 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, static int read_unwind_spec_debug_frame(struct dso *dso, struct machine *machine, u64 *offset) { - int fd = dso__data_fd(dso, machine); + int fd; + u64 ofs = dso->data.debug_frame_offset; - if (fd < 0) - return -EINVAL; + if (ofs == 0) { + fd = dso__data_fd(dso, machine); + if (fd < 0) + return -EINVAL; - /* Check the .debug_frame section for unwinding info */ - *offset = elf_section_offset(fd, ".debug_frame"); + /* Check the .debug_frame section for unwinding info */ + ofs = elf_section_offset(fd, ".debug_frame"); + dso->data.debug_frame_offset = ofs; + } + *offset = ofs; if (*offset) return 0; @@ -322,8 +353,12 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, #ifndef NO_LIBUNWIND_DEBUG_FRAME /* Check the .debug_frame section for unwinding info */ if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { + int fd = dso__data_fd(map->dso, ui->machine); + int is_exec = elf_is_exec(fd, map->dso->name); + unw_word_t base = is_exec ? 0 : map->start; + memset(&di, 0, sizeof(di)); - if (dwarf_find_debug_frame(0, &di, ip, 0, map->dso->name, + if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name, map->start, map->end)) return dwarf_search_unwind_table(as, ip, &di, pi, need_unwind_info, arg); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b86744f29eef..4ee6d0d4c993 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -269,6 +269,13 @@ void dump_stack(void) void dump_stack(void) {} #endif +void sighandler_dump_stack(int sig) +{ + psignal(sig, "perf"); + dump_stack(); + exit(sig); +} + void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -303,13 +310,26 @@ void set_term_quiet_input(struct termios *old) tcsetattr(0, TCSANOW, &tc); } -static void set_tracing_events_path(const char *mountpoint) +static void set_tracing_events_path(const char *tracing, const char *mountpoint) { - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", - mountpoint, "tracing/events"); + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", + mountpoint, tracing, "events"); } -const char *perf_debugfs_mount(const char *mountpoint) +static const char *__perf_tracefs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = tracefs_mount(mountpoint); + if (!mnt) + return NULL; + + set_tracing_events_path("", mnt); + + return mnt; +} + +static const char *__perf_debugfs_mount(const char *mountpoint) { const char *mnt; @@ -317,7 +337,20 @@ const char *perf_debugfs_mount(const char *mountpoint) if (!mnt) return NULL; - set_tracing_events_path(mnt); + set_tracing_events_path("tracing/", mnt); + + return mnt; +} + +const char *perf_debugfs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = __perf_tracefs_mount(mountpoint); + if (mnt) + return mnt; + + mnt = __perf_debugfs_mount(mountpoint); return mnt; } @@ -325,12 +358,19 @@ const char *perf_debugfs_mount(const char *mountpoint) void perf_debugfs_set_path(const char *mntpt) { snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); - set_tracing_events_path(mntpt); + set_tracing_events_path("tracing/", mntpt); +} + +static const char *find_tracefs(void) +{ + const char *path = __perf_tracefs_mount(NULL); + + return path; } static const char *find_debugfs(void) { - const char *path = perf_debugfs_mount(NULL); + const char *path = __perf_debugfs_mount(NULL); if (!path) fprintf(stderr, "Your kernel does not support the debugfs filesystem"); @@ -344,6 +384,7 @@ static const char *find_debugfs(void) */ const char *find_tracing_dir(void) { + const char *tracing_dir = ""; static char *tracing; static int tracing_found; const char *debugfs; @@ -351,11 +392,15 @@ const char *find_tracing_dir(void) if (tracing_found) return tracing; - debugfs = find_debugfs(); - if (!debugfs) - return NULL; + debugfs = find_tracefs(); + if (!debugfs) { + tracing_dir = "/tracing"; + debugfs = find_debugfs(); + if (!debugfs) + return NULL; + } - if (asprintf(&tracing, "%s/tracing", debugfs) < 0) + if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) return NULL; tracing_found = 1; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 027a5153495c..1ff23e04ad27 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -75,6 +75,7 @@ #include <linux/types.h> #include <sys/ttydefaults.h> #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include <termios.h> #include <linux/bitops.h> #include <termios.h> @@ -276,6 +277,7 @@ char *ltrim(char *s); char *rtrim(char *s); void dump_stack(void); +void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; @@ -327,4 +329,8 @@ bool find_process(const char *name); int gzip_decompress_to_file(const char *input, int output_fd); #endif +#ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_to_file(const char *input, int output_fd); +#endif + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index f4b953354ff7..eec688041500 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 2f0f34a36db4..5da129e10aa2 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslibcfs.c b/tools/power/acpi/os_specific/service_layers/oslibcfs.c index c13ff9c51d74..b51e40a9a120 100644 --- a/tools/power/acpi/os_specific/service_layers/oslibcfs.c +++ b/tools/power/acpi/os_specific/service_layers/oslibcfs.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 0dc2485dedf5..db15c9d2049e 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1156,7 +1156,7 @@ osl_table_name_from_file(char *filename, char *signature, u32 *instance) /* Extract instance number */ if (isdigit((int)filename[ACPI_NAME_SIZE])) { - sscanf(&filename[ACPI_NAME_SIZE], "%d", instance); + sscanf(&filename[ACPI_NAME_SIZE], "%u", instance); } else if (strlen(filename) != ACPI_NAME_SIZE) { return (AE_BAD_SIGNATURE); } else { diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index 733f9e490fc4..e153fcb12b1a 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 99b47b6194a3..0b1fa290245a 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -146,6 +146,6 @@ void acpi_os_unmap_memory(void *where, acpi_size length) acpi_size page_size; page_size = acpi_os_get_page_size(); - offset = (acpi_physical_address) where % page_size; + offset = ACPI_TO_INTEGER(where) % page_size; munmap((u8 *)where - offset, (length + offset)); } diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 7ccb073f8316..6858c0893c91 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index a2d37d610639..84bdef0136cb 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 24d32968802d..c736adf5fb55 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index d470046a6d81..8f2fe168228e 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 853b4da22c3e..d0ba6535f5af 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2014, Intel Corp. + * Copyright (C) 2000 - 2015, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 7cdcf88659c7..9ea914378985 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -199,7 +199,7 @@ int main(int argc, const char *argv[]) } get_cpu_info(0, &cpupower_cpu_info); - run_as_root = !getuid(); + run_as_root = !geteuid(); if (run_as_root) { ret = uname(&uts); if (!ret && !strcmp(uts.machine, "x86_64") && diff --git a/tools/power/cpupower/utils/helpers/pci.c b/tools/power/cpupower/utils/helpers/pci.c index 9690798e6446..8b278983cfc5 100644 --- a/tools/power/cpupower/utils/helpers/pci.c +++ b/tools/power/cpupower/utils/helpers/pci.c @@ -25,14 +25,21 @@ struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus, int slot, int func, int vendor, int dev) { - struct pci_filter filter_nb_link = { domain, bus, slot, func, - vendor, dev }; + struct pci_filter filter_nb_link; struct pci_dev *device; *pacc = pci_alloc(); if (*pacc == NULL) return NULL; + pci_filter_init(*pacc, &filter_nb_link); + filter_nb_link.domain = domain; + filter_nb_link.bus = bus; + filter_nb_link.slot = slot; + filter_nb_link.func = func; + filter_nb_link.vendor = vendor; + filter_nb_link.device = dev; + pci_init(*pacc); pci_scan_bus(*pacc); diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index 09afe5d87f2b..4e8fe2c7b054 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c @@ -361,7 +361,7 @@ unsigned int sysfs_get_idlestate_count(unsigned int cpu) snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle"); if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) - return -ENODEV; + return 0; snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu); if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d1b3a361e526..4039854560d0 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,8 +1,12 @@ CC = $(CROSS_COMPILE)gcc -BUILD_OUTPUT := $(PWD) +BUILD_OUTPUT := $(CURDIR) PREFIX := /usr DESTDIR := +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + turbostat : turbostat.c CFLAGS += -Wall CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 56bfb523c5bb..05b8fc38dc8b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -9,52 +9,68 @@ turbostat \- Report processor frequency and idle statistics .br .B turbostat .RB [ Options ] -.RB [ "\-i interval_sec" ] +.RB [ "\--interval seconds" ] .SH DESCRIPTION \fBturbostat \fP reports processor topology, frequency, -idle power-state statistics, temperature and power on modern X86 processors. -Either \fBcommand\fP is forked and statistics are printed -upon its completion, or statistics are printed periodically. - -\fBturbostat \fP -must be run on root, and -minimally requires that the processor -supports an "invariant" TSC, plus the APERF and MPERF MSRs. -Additional information is reported depending on hardware counter support. - +idle power-state statistics, temperature and power on X86 processors. +There are two ways to invoke turbostat. +The first method is to supply a +\fBcommand\fP, which is forked and statistics are printed +upon its completion. +The second method is to omit the command, +and turbostat displays statistics every 5 seconds. +The 5-second interval can be changed using the --interval option. +.PP +Some information is not available on older processors. .SS Options -The \fB-p\fP option limits output to the 1st thread in 1st core of each package. +Options can be specified with a single or double '-', and only as much of the option +name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. +\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. +.PP +\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. +.PP +\fB--Dump\fP displays the raw counter values. +.PP +\fB--debug\fP displays additional system configuration information. Invoking this parameter +more than once may also enable internal turbostat debug information. +.PP +\fB--interval seconds\fP overrides the default 5-second measurement interval. +.PP +\fB--help\fP displays usage for the most common parameters. .PP -The \fB-P\fP option limits output to the 1st thread in each Package. +\fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. .PP -The \fB-S\fP option limits output to a 1-line System Summary for each interval. +\fB--MSR MSR#\fP shows the specified 64-bit MSR value. .PP -The \fB-v\fP option increases verbosity. +\fB--msr MSR#\fP shows the specified 32-bit MSR value. .PP -The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. +\fB--Package\fP limits output to the system summary plus the 1st thread in each Package. .PP -The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. +\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings. .PP -The \fB-m MSR#\fP option includes the the specified 32-bit MSR value. +\fB--Summary\fP limits output to a 1-line System Summary for each interval. .PP -The \fB-M MSR#\fP option includes the the specified 64-bit MSR value. +\fB--TCC temperature\fP sets the Thermal Control Circuit temperature for systems which do not export that value. This is used for making sense of the Digital Thermal Sensor outputs, as they return degrees Celsius below the TCC activation temperature. .PP -The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. -The default is 5 seconds. +\fB--version\fP displays the version. .PP -The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit, +The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, displays the statistics gathered since it was forked. .PP -.SH FIELD DESCRIPTIONS +.SH DEFAULT FIELD DESCRIPTIONS .nf -\fBPackage\fP processor package number. -\fBCore\fP processor core number. -\fBCPU\fP Linux CPU (logical processor) number. -Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. +\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. \fBAVG_MHz\fP number of cycles executed divided by time elapsed. -\fB%Buzy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. +\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. +.fi +.PP +.SH DEBUG FIELD DESCRIPTIONS +.nf +\fBPackage\fP processor package number. +\fBCore\fP processor core number. +Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. @@ -68,74 +84,91 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T .fi .PP .SH EXAMPLE -Without any parameters, turbostat prints out counters ever 5 seconds. +Without any parameters, turbostat displays statistics ever 5 seconds. (override interval with "-i sec" option, or specify a command for turbostat to fork). +.nf +[root@hsw]# ./turbostat + CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + - 488 12.51 3898 3498 + 0 0 0.01 3885 3498 + 4 3897 99.99 3898 3498 + 1 0 0.00 3861 3498 + 5 0 0.00 3882 3498 + 2 1 0.02 3894 3498 + 6 2 0.06 3898 3498 + 3 0 0.00 3849 3498 + 7 0 0.00 3877 3498 + +.fi +.SH DEBUG EXAMPLE +The "--debug" option prints additional system information before measurements: The first row of statistics is a summary for the entire system. For residency % columns, the summary is a weighted average. For Temperature columns, the summary is the column maximum. For Watts columns, the summary is a system total. Subsequent rows show per-CPU statistics. - .nf -[root@ivy]# ./turbostat - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - - - 6 0.36 1596 3492 0 0.59 0.01 99.04 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 - 0 0 9 0.58 1596 3492 0 0.28 0.01 99.13 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 - 0 4 1 0.07 1596 3492 0 0.79 - 1 1 10 0.65 1596 3492 0 0.59 0.00 98.76 0.00 23 - 1 5 5 0.28 1596 3492 0 0.95 - 2 2 10 0.66 1596 3492 0 0.41 0.01 98.92 0.00 23 - 2 6 2 0.10 1597 3492 0 0.97 - 3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23 - 3 7 5 0.31 1596 3492 0 0.33 -.fi -.SH VERBOSE EXAMPLE -The "-v" option adds verbosity to the output: - -.nf -[root@ivy]# turbostat -v -turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org> -CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) +turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org> +CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3) CPUID(6): APERF, DTS, PTM, EPB -RAPL: 851 sec. Joule Counter Range -cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 -16 * 100 = 1600 MHz max efficiency +RAPL: 3121 sec. Joule Counter Range, at 84 Watts +cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300 +8 * 100 = 800 MHz max efficiency 35 * 100 = 3500 MHz TSC frequency -cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) +cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) +cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 37 * 100 = 3700 MHz max turbo 4 active cores 38 * 100 = 3800 MHz max turbo 3 active cores 39 * 100 = 3900 MHz max turbo 2 active cores 39 * 100 = 3900 MHz max turbo 1 active cores cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) -cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) -cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) -cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) -cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) -cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) +cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, ) +cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: ) +cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, ) +cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.) +cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.) +cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked) +cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled) +cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled) cpu0: MSR_PP0_POLICY: 0 cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) cpu0: MSR_PP1_POLICY: 0 cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) -cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) -cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) -cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) -cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) -cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) -cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) - ... +cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C) +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C) +cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) +cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) + Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt + - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 + 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 + 0 4 3897 99.98 3898 3498 0 0.02 + 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32 + 1 5 0 0.00 3885 3498 0 0.21 + 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32 + 2 6 2 0.06 3896 3498 0 0.80 + 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28 + 3 7 0 0.00 3879 3498 0 0.04 +^C + .fi The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency -available at the minimum package voltage. The \fBTSC frequency\fP is the nominal -maximum frequency of the processor if turbo-mode were not available. This frequency +available at the minimum package voltage. The \fBTSC frequency\fP is the base +frequency of the processor -- this should match the brand string +in /proc/cpuinfo. This base frequency should be sustainable on all CPUs indefinitely, given nominal power and cooling. The remaining rows show what maximum turbo frequency is possible -depending on the number of idle cores. Note that this information is -not available on all processors. +depending on the number of idle cores. Note that not all information is +available on all processors. +.PP +The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds. +See the field definitions above. .SH FORK EXAMPLE If turbostat is invoked with a command, it will fork that command and output the statistics gathered when the command exits. @@ -143,27 +176,23 @@ eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds until ^C while the other CPUs are mostly idle: .nf -root@ivy: turbostat cat /dev/zero > /dev/null +root@hsw: turbostat cat /dev/zero > /dev/null ^C - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - - - 496 12.75 3886 3492 0 13.16 0.04 74.04 0.00 36 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 - 0 0 22 0.57 3830 3492 0 0.83 0.02 98.59 0.00 27 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 - 0 4 9 0.24 3829 3492 0 1.15 - 1 1 4 0.09 3783 3492 0 99.91 0.00 0.00 0.00 36 - 1 5 3880 99.82 3888 3492 0 0.18 - 2 2 17 0.44 3813 3492 0 0.77 0.04 98.75 0.00 28 - 2 6 12 0.32 3823 3492 0 0.89 - 3 3 16 0.43 3844 3492 0 0.63 0.11 98.84 0.00 30 - 3 7 4 0.11 3827 3492 0 0.94 -30.372243 sec + CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + - 482 12.51 3854 3498 + 0 0 0.01 1960 3498 + 4 0 0.00 2128 3498 + 1 0 0.00 3003 3498 + 5 3854 99.98 3855 3498 + 2 0 0.01 3504 3498 + 6 3 0.08 3884 3498 + 3 0 0.00 2553 3498 + 7 0 0.00 2126 3498 +10.783983 sec .fi -Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit -while the other processors are generally in various states of idle. - -Note that cpu1 and cpu5 are HT siblings within core1. -As cpu5 is very busy, it prevents its sibling, cpu1, -from entering a c-state deeper than c1. +Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. +The first row shows the average MHz and %Busy across all the processors in the system. Note that the Avg_MHz column reflects the total number of cycles executed divided by the measurement interval. If the %Busy column is 100%, @@ -176,6 +205,11 @@ not including any non-busy idle time. .B "turbostat " must be run as root. +Alternatively, non-root users can be enabled to run turbostat this way: + +# setcap cap_sys_rawio=ep ./turbostat + +# chmod +r /dev/cpu/*/msr .B "turbostat " reads hardware counters, but doesn't write them. @@ -184,15 +218,33 @@ multiple invocations of itself. \fBturbostat \fP may work poorly on Linux-2.6.20 through 2.6.29, -as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF +as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF MSRs in those kernels. -If the TSC column does not make sense, then -the other numbers will also make no sense. -Turbostat is lightweight, and its data collection is not atomic. -These issues are usually caused by an extremely short measurement -interval (much less than 1 second), or system activity that prevents -turbostat from being able to run on all CPUS to quickly collect data. +AVG_MHz = APERF_delta/measurement_interval. This is the actual +number of elapsed cycles divided by the entire sample interval -- +including idle time. Note that this calculation is resilient +to systems lacking a non-stop TSC. + +TSC_MHz = TSC_delta/measurement_interval. +On a system with an invariant TSC, this value will be constant +and will closely match the base frequency value shown +in the brand string in /proc/cpuinfo. On a system where +the TSC stops in idle, TSC_MHz will drop +below the processor's base frequency. + +%Busy = MPERF_delta/TSC_delta + +Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval + +Note that these calculations depend on TSC_delta, so they +are not reliable during intervals when TSC_MHz is not running at the base frequency. + +Turbostat data collection is not atomic. +Extremely short measurement intervals (much less than 1 second), +or system activity that prevents turbostat from being able +to run on all CPUS to quickly collect data, will result in +inconsistent results. The APERF, MPERF MSRs are defined to count non-halted cycles. Although it is not guaranteed by the architecture, turbostat assumes diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5b1b807265a1..bac98ca3d4ca 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -33,25 +33,31 @@ #include <signal.h> #include <sys/time.h> #include <stdlib.h> +#include <getopt.h> #include <dirent.h> #include <string.h> #include <ctype.h> #include <sched.h> #include <cpuid.h> +#include <linux/capability.h> +#include <errno.h> char *proc_stat = "/proc/stat"; -unsigned int interval_sec = 5; /* set with -i interval_sec */ -unsigned int verbose; /* set with -v */ -unsigned int rapl_verbose; /* set with -R */ -unsigned int rapl_joules; /* set with -J */ -unsigned int thermal_verbose; /* set with -T */ -unsigned int summary_only; /* set with -S */ -unsigned int dump_only; /* set with -s */ +unsigned int interval_sec = 5; +unsigned int debug; +unsigned int rapl_joules; +unsigned int summary_only; +unsigned int dump_only; unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; +unsigned int do_pc2; +unsigned int do_pc3; +unsigned int do_pc6; +unsigned int do_pc7; unsigned int do_c8_c9_c10; +unsigned int do_skl_residency; unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; @@ -59,9 +65,7 @@ unsigned int has_epb; unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; -unsigned int do_nehalem_platform_info; -unsigned int do_nehalem_turbo_ratio_limit; -unsigned int do_ivt_turbo_ratio_limit; +unsigned int do_nhm_platform_info; unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; @@ -79,8 +83,14 @@ unsigned int do_dts; unsigned int do_ptm; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; -double rapl_power_units, rapl_energy_units, rapl_time_units; +double rapl_power_units, rapl_time_units; +double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; +unsigned int do_core_perf_limit_reasons; +unsigned int do_gfx_perf_limit_reasons; +unsigned int do_ring_perf_limit_reasons; +unsigned int crystal_hz; +unsigned long long tsc_hz; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -93,18 +103,18 @@ double rapl_joule_counter_range; #define RAPL_DRAM (1 << 3) /* 0x618 MSR_DRAM_POWER_LIMIT */ /* 0x619 MSR_DRAM_ENERGY_STATUS */ - /* 0x61c MSR_DRAM_POWER_INFO */ #define RAPL_DRAM_PERF_STATUS (1 << 4) /* 0x61b MSR_DRAM_PERF_STATUS */ +#define RAPL_DRAM_POWER_INFO (1 << 5) + /* 0x61c MSR_DRAM_POWER_INFO */ -#define RAPL_CORES (1 << 5) +#define RAPL_CORES (1 << 6) /* 0x638 MSR_PP0_POWER_LIMIT */ /* 0x639 MSR_PP0_ENERGY_STATUS */ -#define RAPL_CORE_POLICY (1 << 6) +#define RAPL_CORE_POLICY (1 << 7) /* 0x63a MSR_PP0_POLICY */ - -#define RAPL_GFX (1 << 7) +#define RAPL_GFX (1 << 8) /* 0x640 MSR_PP1_POWER_LIMIT */ /* 0x641 MSR_PP1_ENERGY_STATUS */ /* 0x642 MSR_PP1_POLICY */ @@ -151,6 +161,10 @@ struct pkg_data { unsigned long long pc8; unsigned long long pc9; unsigned long long pc10; + unsigned long long pkg_wtd_core_c0; + unsigned long long pkg_any_core_c0; + unsigned long long pkg_any_gfxe_c0; + unsigned long long pkg_both_core_gfxe_c0; unsigned int package_id; unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -251,15 +265,13 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) sprintf(pathname, "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) - return -1; + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); retval = pread(fd, msr, sizeof *msr, offset); close(fd); - if (retval != sizeof *msr) { - fprintf(stderr, "%s offset 0x%llx read failed\n", pathname, (unsigned long long)offset); - return -1; - } + if (retval != sizeof *msr) + err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); return 0; } @@ -281,13 +293,12 @@ void print_header(void) outp += sprintf(outp, " CPU"); if (has_aperf) outp += sprintf(outp, " Avg_MHz"); - if (do_nhm_cstates) + if (has_aperf) outp += sprintf(outp, " %%Busy"); if (has_aperf) outp += sprintf(outp, " Bzy_MHz"); outp += sprintf(outp, " TSC_MHz"); - if (do_smi) - outp += sprintf(outp, " SMI"); + if (extra_delta_offset32) outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); if (extra_delta_offset64) @@ -296,6 +307,13 @@ void print_header(void) outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); if (extra_msr_offset64) outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); + + if (!debug) + goto done; + + if (do_smi) + outp += sprintf(outp, " SMI"); + if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c1"); if (do_nhm_cstates && !do_slm_cstates) @@ -310,13 +328,20 @@ void print_header(void) if (do_ptm) outp += sprintf(outp, " PkgTmp"); - if (do_snb_cstates) + if (do_skl_residency) { + outp += sprintf(outp, " Totl%%C0"); + outp += sprintf(outp, " Any%%C0"); + outp += sprintf(outp, " GFX%%C0"); + outp += sprintf(outp, " CPUGFX%%"); + } + + if (do_pc2) outp += sprintf(outp, " Pkg%%pc2"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc3) outp += sprintf(outp, " Pkg%%pc3"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc6) outp += sprintf(outp, " Pkg%%pc6"); - if (do_snb_cstates) + if (do_pc7) outp += sprintf(outp, " Pkg%%pc7"); if (do_c8_c9_c10) { outp += sprintf(outp, " Pkg%%pc8"); @@ -337,7 +362,7 @@ void print_header(void) outp += sprintf(outp, " PKG_%%"); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, " RAM_%%"); - } else { + } else if (do_rapl && rapl_joules) { if (do_rapl & RAPL_PKG) outp += sprintf(outp, " Pkg_J"); if (do_rapl & RAPL_CORES) @@ -353,6 +378,7 @@ void print_header(void) outp += sprintf(outp, " time"); } + done: outp += sprintf(outp, "\n"); } @@ -390,10 +416,19 @@ int dump_counters(struct thread_data *t, struct core_data *c, if (p) { outp += sprintf(outp, "package: %d\n", p->package_id); + + outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); + outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); + outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); + outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); + outp += sprintf(outp, "pc2: %016llX\n", p->pc2); - outp += sprintf(outp, "pc3: %016llX\n", p->pc3); - outp += sprintf(outp, "pc6: %016llX\n", p->pc6); - outp += sprintf(outp, "pc7: %016llX\n", p->pc7); + if (do_pc3) + outp += sprintf(outp, "pc3: %016llX\n", p->pc3); + if (do_pc6) + outp += sprintf(outp, "pc6: %016llX\n", p->pc6); + if (do_pc7) + outp += sprintf(outp, "pc7: %016llX\n", p->pc7); outp += sprintf(outp, "pc8: %016llX\n", p->pc8); outp += sprintf(outp, "pc9: %016llX\n", p->pc9); outp += sprintf(outp, "pc10: %016llX\n", p->pc10); @@ -457,31 +492,27 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%8d", t->cpu_id); } - /* AvgMHz */ + /* Avg_MHz */ if (has_aperf) outp += sprintf(outp, "%8.0f", 1.0 / units * t->aperf / interval_float); - /* %c0 */ - if (do_nhm_cstates) { + /* %Busy */ + if (has_aperf) { if (!skip_c0) outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); else outp += sprintf(outp, "********"); } - /* BzyMHz */ + /* Bzy_MHz */ if (has_aperf) outp += sprintf(outp, "%8.0f", 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); - /* TSC */ + /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); - /* SMI */ - if (do_smi) - outp += sprintf(outp, "%8d", t->smi_count); - /* delta */ if (extra_delta_offset32) outp += sprintf(outp, " %11llu", t->extra_delta32); @@ -497,6 +528,13 @@ int format_counters(struct thread_data *t, struct core_data *c, if (extra_msr_offset64) outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + if (!debug) + goto done; + + /* SMI */ + if (do_smi) + outp += sprintf(outp, "%8d", t->smi_count); + if (do_nhm_cstates) { if (!skip_c1) outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); @@ -522,16 +560,25 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; + /* PkgTmp */ if (do_ptm) outp += sprintf(outp, "%8d", p->pkg_temp_c); - if (do_snb_cstates) + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ + if (do_skl_residency) { + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); + } + + if (do_pc2) outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc3) outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc); - if (do_nhm_cstates && !do_slm_cstates) + if (do_pc6) outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc); - if (do_snb_cstates) + if (do_pc7) outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc); if (do_c8_c9_c10) { outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc); @@ -556,12 +603,12 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_rapl & RAPL_GFX) outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float); + outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float); if (do_rapl & RAPL_PKG_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - } else { + } else if (do_rapl && rapl_joules) { if (do_rapl & RAPL_PKG) outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units); @@ -573,13 +620,13 @@ int format_counters(struct thread_data *t, struct core_data *c, p->energy_gfx * rapl_energy_units); if (do_rapl & RAPL_DRAM) outp += sprintf(outp, fmt8, - p->energy_dram * rapl_energy_units); + p->energy_dram * rapl_dram_energy_units); if (do_rapl & RAPL_PKG_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); - outp += sprintf(outp, fmt8, interval_float); + outp += sprintf(outp, fmt8, interval_float); } done: outp += sprintf(outp, "\n"); @@ -627,10 +674,20 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ void delta_package(struct pkg_data *new, struct pkg_data *old) { + + if (do_skl_residency) { + old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; + old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; + old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; + old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; + } old->pc2 = new->pc2 - old->pc2; - old->pc3 = new->pc3 - old->pc3; - old->pc6 = new->pc6 - old->pc6; - old->pc7 = new->pc7 - old->pc7; + if (do_pc3) + old->pc3 = new->pc3 - old->pc3; + if (do_pc6) + old->pc6 = new->pc6 - old->pc6; + if (do_pc7) + old->pc7 = new->pc7 - old->pc7; old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; @@ -670,24 +727,26 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->c1 = new->c1 - old->c1; - if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { - old->aperf = new->aperf - old->aperf; - old->mperf = new->mperf - old->mperf; - } else { + if (has_aperf) { + if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { + old->aperf = new->aperf - old->aperf; + old->mperf = new->mperf - old->mperf; + } else { - if (!aperf_mperf_unstable) { - fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); - fprintf(stderr, "* Frequency results do not cover entire interval *\n"); - fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + if (!aperf_mperf_unstable) { + fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); + fprintf(stderr, "* Frequency results do not cover entire interval *\n"); + fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); - aperf_mperf_unstable = 1; + aperf_mperf_unstable = 1; + } + /* + * mperf delta is likely a huge "positive" number + * can not use it for calculating c0 time + */ + skip_c0 = 1; + skip_c1 = 1; } - /* - * mperf delta is likely a huge "positive" number - * can not use it for calculating c0 time - */ - skip_c0 = 1; - skip_c1 = 1; } @@ -712,7 +771,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, } if (old->mperf == 0) { - if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); + if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); old->mperf = 1; /* divide by 0 protection */ } @@ -768,10 +827,18 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c7 = 0; c->core_temp_c = 0; + p->pkg_wtd_core_c0 = 0; + p->pkg_any_core_c0 = 0; + p->pkg_any_gfxe_c0 = 0; + p->pkg_both_core_gfxe_c0 = 0; + p->pc2 = 0; - p->pc3 = 0; - p->pc6 = 0; - p->pc7 = 0; + if (do_pc3) + p->pc3 = 0; + if (do_pc6) + p->pc6 = 0; + if (do_pc7) + p->pc7 = 0; p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; @@ -809,10 +876,20 @@ int sum_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; + if (do_skl_residency) { + average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; + average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; + average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; + average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; + } + average.packages.pc2 += p->pc2; - average.packages.pc3 += p->pc3; - average.packages.pc6 += p->pc6; - average.packages.pc7 += p->pc7; + if (do_pc3) + average.packages.pc3 += p->pc3; + if (do_pc6) + average.packages.pc6 += p->pc6; + if (do_pc7) + average.packages.pc7 += p->pc7; average.packages.pc8 += p->pc8; average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; @@ -853,10 +930,20 @@ void compute_average(struct thread_data *t, struct core_data *c, average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; + if (do_skl_residency) { + average.packages.pkg_wtd_core_c0 /= topo.num_packages; + average.packages.pkg_any_core_c0 /= topo.num_packages; + average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; + } + average.packages.pc2 /= topo.num_packages; - average.packages.pc3 /= topo.num_packages; - average.packages.pc6 /= topo.num_packages; - average.packages.pc7 /= topo.num_packages; + if (do_pc3) + average.packages.pc3 /= topo.num_packages; + if (do_pc6) + average.packages.pc6 /= topo.num_packages; + if (do_pc7) + average.packages.pc7 /= topo.num_packages; average.packages.pc8 /= topo.num_packages; average.packages.pc9 /= topo.num_packages; @@ -956,18 +1043,28 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - if (do_nhm_cstates && !do_slm_cstates) { + if (do_skl_residency) { + if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) + return -10; + if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) + return -11; + if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) + return -12; + if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) + return -13; + } + if (do_pc3) if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; + if (do_pc6) if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) return -10; - } - if (do_snb_cstates) { + if (do_pc2) if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) return -11; + if (do_pc7) if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) return -12; - } if (do_c8_c9_c10) { if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8)) return -13; @@ -1014,36 +1111,95 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } -void print_verbose_header(void) +/* + * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: + * If you change the values, note they are used both in comparisons + * (>= PCL__7) and to index pkg_cstate_limit_strings[]. + */ + +#define PCLUKN 0 /* Unknown */ +#define PCLRSV 1 /* Reserved */ +#define PCL__0 2 /* PC0 */ +#define PCL__1 3 /* PC1 */ +#define PCL__2 4 /* PC2 */ +#define PCL__3 5 /* PC3 */ +#define PCL__4 6 /* PC4 */ +#define PCL__6 7 /* PC6 */ +#define PCL_6N 8 /* PC6 No Retention */ +#define PCL_6R 9 /* PC6 Retention */ +#define PCL__7 10 /* PC7 */ +#define PCL_7S 11 /* PC7 Shrink */ +#define PCL__8 12 /* PC8 */ +#define PCL__9 13 /* PC9 */ +#define PCLUNL 14 /* Unlimited */ + +int pkg_cstate_limit = PCLUKN; +char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", + "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"}; + +int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; + +static void +dump_nhm_platform_info(void) { unsigned long long msr; unsigned int ratio; - if (!do_nehalem_platform_info) - return; - get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); ratio = (msr >> 40) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", + fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", + fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", ratio, bclk, ratio * bclk); get_msr(0, MSR_IA32_POWER_CTL, &msr); fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", msr, msr & 0x2 ? "EN" : "DIS"); - if (!do_ivt_turbo_ratio_limit) - goto print_nhm_turbo_ratio_limits; + return; +} + +static void +dump_hsw_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr); + + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", + ratio, bclk, ratio * bclk); + return; +} + +static void +dump_ivt_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; - get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); + get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr); - fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1084,60 +1240,18 @@ void print_verbose_header(void) if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); + return; +} -print_nhm_turbo_ratio_limits: - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - - fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); - - fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", - (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", - (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", - (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", - (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", - (msr & (1 << 15)) ? "" : "UN", - (unsigned int)msr & 7); - - - switch(msr & 0x7) { - case 0: - fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0"); - break; - case 1: - fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0"); - break; - case 2: - fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3"); - break; - case 3: - fprintf(stderr, do_slm_cstates ? "invalid" : "pc6"); - break; - case 4: - fprintf(stderr, do_slm_cstates ? "pc4" : "pc7"); - break; - case 5: - fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid"); - break; - case 6: - fprintf(stderr, do_slm_cstates ? "pc6" : "invalid"); - break; - case 7: - fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited"); - break; - default: - fprintf(stderr, "invalid"); - } - fprintf(stderr, ")\n"); - - if (!do_nehalem_turbo_ratio_limit) - return; +static void +dump_nhm_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; - get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1178,6 +1292,30 @@ print_nhm_turbo_ratio_limits: if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); + return; +} + +static void +dump_nhm_cst_cfg(void) +{ + unsigned long long msr; + + get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + + fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); + + fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", + (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", + (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", + (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", + (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", + (msr & (1 << 15)) ? "" : "UN", + (unsigned int)msr & 7, + pkg_cstate_limit_strings[pkg_cstate_limit]); + return; } void free_all_buffers(void) @@ -1458,18 +1596,67 @@ void check_dev_msr() struct stat sb; if (stat("/dev/cpu/0/msr", &sb)) - err(-5, "no /dev/cpu/0/msr\n" - "Try \"# modprobe msr\""); + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } -void check_super_user() +void check_permissions() { - if (getuid() != 0) - errx(-6, "must be root"); + struct __user_cap_header_struct cap_header_data; + cap_user_header_t cap_header = &cap_header_data; + struct __user_cap_data_struct cap_data_data; + cap_user_data_t cap_data = &cap_data_data; + extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); + int do_exit = 0; + + /* check for CAP_SYS_RAWIO */ + cap_header->pid = getpid(); + cap_header->version = _LINUX_CAPABILITY_VERSION; + if (capget(cap_header, cap_data) < 0) + err(-6, "capget(2) failed"); + + if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) { + do_exit++; + warnx("capget(CAP_SYS_RAWIO) failed," + " try \"# setcap cap_sys_rawio=ep %s\"", progname); + } + + /* test file permissions */ + if (euidaccess("/dev/cpu/0/msr", R_OK)) { + do_exit++; + warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); + } + + /* if all else fails, thell them to be root */ + if (do_exit) + if (getuid() != 0) + warnx("... or simply run as root"); + + if (do_exit) + exit(-6); } -int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) +/* + * NHM adds support for additional MSRs: + * + * MSR_SMI_COUNT 0x00000034 + * + * MSR_NHM_PLATFORM_INFO 0x000000ce + * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 + * + * MSR_PKG_C3_RESIDENCY 0x000003f8 + * MSR_PKG_C6_RESIDENCY 0x000003f9 + * MSR_CORE_C3_RESIDENCY 0x000003fc + * MSR_CORE_C6_RESIDENCY 0x000003fd + * + * Side effect: + * sets global pkg_cstate_limit to decode MSR_NHM_SNB_PKG_CST_CFG_CTL + */ +int probe_nhm_msrs(unsigned int family, unsigned int model) { + unsigned long long msr; + int *pkg_cstate_limits; + if (!genuine_intel) return 0; @@ -1482,24 +1669,56 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x1F: /* Core i7 and i5 Processor - Nehalem */ case 0x25: /* Westmere Client - Clarkdale, Arrandale */ case 0x2C: /* Westmere EP - Gulftown */ + case 0x2E: /* Nehalem-EX Xeon - Beckton */ + case 0x2F: /* Westmere-EX Xeon - Eagleton */ + pkg_cstate_limits = nhm_pkg_cstate_limits; + break; case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ case 0x3A: /* IVB */ case 0x3E: /* IVB Xeon */ + pkg_cstate_limits = snb_pkg_cstate_limits; + break; case 0x3C: /* HSW */ case 0x3F: /* HSX */ case 0x45: /* HSW */ case 0x46: /* HSW */ - case 0x37: /* BYT */ - case 0x4D: /* AVN */ case 0x3D: /* BDW */ + case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ - return 1; + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + pkg_cstate_limits = hsw_pkg_cstate_limits; + break; + case 0x37: /* BYT */ + case 0x4D: /* AVN */ + pkg_cstate_limits = slv_pkg_cstate_limits; + break; + case 0x4C: /* AMT */ + pkg_cstate_limits = amt_pkg_cstate_limits; + break; + case 0x57: /* PHI */ + pkg_cstate_limits = phi_pkg_cstate_limits; + break; + default: + return 0; + } + get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + + pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; + + return 1; +} +int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + switch (model) { + /* Nehalem compatible, but do not include turbo-ratio limit support */ case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ - default: return 0; + default: + return 1; } } int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) @@ -1512,11 +1731,48 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) switch (model) { case 0x3E: /* IVB Xeon */ + case 0x3F: /* HSW Xeon */ return 1; default: return 0; } } +int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x3F: /* HSW Xeon */ + return 1; + default: + return 0; + } +} + +static void +dump_cstate_pstate_config_info(family, model) +{ + if (!do_nhm_platform_info) + return; + + dump_nhm_platform_info(); + + if (has_hsw_turbo_ratio_limit(family, model)) + dump_hsw_turbo_ratio_limits(); + + if (has_ivt_turbo_ratio_limit(family, model)) + dump_ivt_turbo_ratio_limits(); + + if (has_nhm_turbo_ratio_limit(family, model)) + dump_nhm_turbo_ratio_limits(); + + dump_nhm_cst_cfg(); +} + /* * print_epb() @@ -1564,6 +1820,103 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } +/* + * print_perf_limit() + */ +int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + + cpu = t->cpu_id; + + /* per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(stderr, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_core_perf_limit_reasons) { + get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); + fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", + (msr & 1 << 15) ? "bit15, " : "", + (msr & 1 << 14) ? "bit14, " : "", + (msr & 1 << 13) ? "Transitions, " : "", + (msr & 1 << 12) ? "MultiCoreTurbo, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 9) ? "CorePwr, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 5) ? "Auto-HWP, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 2) ? "bit2, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 0) ? "PROCHOT, " : ""); + fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + (msr & 1 << 31) ? "bit31, " : "", + (msr & 1 << 30) ? "bit30, " : "", + (msr & 1 << 29) ? "Transitions, " : "", + (msr & 1 << 28) ? "MultiCoreTurbo, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 25) ? "CorePwr, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 21) ? "Auto-HWP, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 18) ? "bit18, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 16) ? "PROCHOT, " : ""); + + } + if (do_gfx_perf_limit_reasons) { + get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); + fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", + (msr & 1 << 0) ? "PROCHOT, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 9) ? "GFXPwr, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : ""); + fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", + (msr & 1 << 16) ? "PROCHOT, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 25) ? "GFXPwr, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : ""); + } + if (do_ring_perf_limit_reasons) { + get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); + fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(stderr, " (Active: %s%s%s%s%s%s)", + (msr & 1 << 0) ? "PROCHOT, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : ""); + fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", + (msr & 1 << 16) ? "PROCHOT, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : ""); + } + return 0; +} + #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ @@ -1584,6 +1937,25 @@ double get_tdp(model) } } +/* + * rapl_dram_energy_units_probe() + * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. + */ +static double +rapl_dram_energy_units_probe(int model, double rapl_energy_units) +{ + /* only called for genuine_intel, family 6 */ + + switch (model) { + case 0x3F: /* HSX */ + case 0x4F: /* BDX */ + case 0x56: /* BDX-DE */ + return (rapl_dram_energy_units = 15.3 / 1000000); + default: + return (rapl_energy_units); + } +} + /* * rapl_probe() @@ -1609,16 +1981,21 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x45: /* HSW */ case 0x46: /* HSW */ case 0x3D: /* BDW */ + case 0x47: /* BDW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; break; + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + break; case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ - do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x2D: case 0x3E: - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x37: /* BYT */ case 0x4D: /* AVN */ @@ -1638,6 +2015,8 @@ void rapl_probe(unsigned int family, unsigned int model) else rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units); + time_unit = msr >> 16 & 0xF; if (time_unit == 0) time_unit = 0xA; @@ -1647,12 +2026,33 @@ void rapl_probe(unsigned int family, unsigned int model) tdp = get_tdp(model); rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; - if (verbose) + if (debug) fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); return; } +void perf_limit_reasons_probe(family, model) +{ + if (!genuine_intel) + return; + + if (family != 6) + return; + + switch (model) { + case 0x3C: /* HSW */ + case 0x45: /* HSW */ + case 0x46: /* HSW */ + do_gfx_perf_limit_reasons = 1; + case 0x3F: /* HSX */ + do_core_perf_limit_reasons = 1; + do_ring_perf_limit_reasons = 1; + default: + return; + } +} + int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; @@ -1751,7 +2151,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) return -1; - if (verbose) { + if (debug) { fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, rapl_power_units, rapl_energy_units, rapl_time_units); @@ -1787,19 +2187,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 48) & 1) ? "EN" : "DIS"); } - if (do_rapl & RAPL_DRAM) { + if (do_rapl & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); - - + } + if (do_rapl & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", @@ -1808,7 +2207,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) print_power_limit_msr(cpu, msr, "DRAM Limit"); } if (do_rapl & RAPL_CORE_POLICY) { - if (verbose) { + if (debug) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; @@ -1816,7 +2215,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) } } if (do_rapl & RAPL_CORES) { - if (verbose) { + if (debug) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; @@ -1826,7 +2225,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) } } if (do_rapl & RAPL_GFX) { - if (verbose) { + if (debug) { if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; @@ -1842,8 +2241,15 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } +/* + * SNB adds support for additional MSRs: + * + * MSR_PKG_C7_RESIDENCY 0x000003fa + * MSR_CORE_C7_RESIDENCY 0x000003fe + * MSR_PKG_C2_RESIDENCY 0x0000060d + */ -int is_snb(unsigned int family, unsigned int model) +int has_snb_msrs(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; @@ -1858,14 +2264,24 @@ int is_snb(unsigned int family, unsigned int model) case 0x45: /* HSW */ case 0x46: /* HSW */ case 0x3D: /* BDW */ + case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ return 1; } return 0; } -int has_c8_c9_c10(unsigned int family, unsigned int model) +/* + * HSW adds support for additional MSRs: + * + * MSR_PKG_C8_RESIDENCY 0x00000630 + * MSR_PKG_C9_RESIDENCY 0x00000631 + * MSR_PKG_C10_RESIDENCY 0x00000632 + */ +int has_hsw_msrs(unsigned int family, unsigned int model) { if (!genuine_intel) return 0; @@ -1873,11 +2289,35 @@ int has_c8_c9_c10(unsigned int family, unsigned int model) switch (model) { case 0x45: /* HSW */ case 0x3D: /* BDW */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ return 1; } return 0; } +/* + * SKL adds support for additional MSRS: + * + * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 + * MSR_PKG_ANY_CORE_C0_RES 0x00000659 + * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A + * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + */ +int has_skl_msrs(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + return 1; + } + return 0; +} + + int is_slm(unsigned int family, unsigned int model) { @@ -1917,7 +2357,7 @@ double slm_bclk(void) double discover_bclk(unsigned int family, unsigned int model) { - if (is_snb(family, model)) + if (has_snb_msrs(family, model)) return 100.00; else if (is_slm(family, model)) return slm_bclk(); @@ -1965,7 +2405,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk } /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nehalem_platform_info) + if (!do_nhm_platform_info) goto guess; if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) @@ -1973,7 +2413,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk target_c_local = (msr >> 16) & 0xFF; - if (verbose) + if (debug) fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); @@ -1991,7 +2431,7 @@ guess: return 0; } -void check_cpuid() +void process_cpuid() { unsigned int eax, ebx, ecx, edx, max_level; unsigned int fms, family, model, stepping; @@ -2003,7 +2443,7 @@ void check_cpuid() if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) genuine_intel = 1; - if (verbose) + if (debug) fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", (char *)&ebx, (char *)&edx, (char *)&ecx); @@ -2014,7 +2454,7 @@ void check_cpuid() if (family == 6 || family == 0xf) model += ((fms >> 16) & 0xf) << 4; - if (verbose) + if (debug) fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); @@ -2029,18 +2469,15 @@ void check_cpuid() ebx = ecx = edx = 0; __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); - if (max_level < 0x80000007) - errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level); + if (max_level >= 0x80000007) { - /* - * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 - * this check is valid for both Intel and AMD - */ - __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); - has_invariant_tsc = edx & (1 << 8); - - if (!has_invariant_tsc) - errx(1, "No invariant TSC"); + /* + * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 + * this check is valid for both Intel and AMD + */ + __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); + has_invariant_tsc = edx & (1 << 8); + } /* * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 @@ -2053,36 +2490,87 @@ void check_cpuid() do_ptm = eax & (1 << 6); has_epb = ecx & (1 << 3); - if (verbose) - fprintf(stderr, "CPUID(6): %s%s%s%s\n", - has_aperf ? "APERF" : "No APERF!", - do_dts ? ", DTS" : "", - do_ptm ? ", PTM": "", - has_epb ? ", EPB": ""); - - if (!has_aperf) - errx(-1, "No APERF"); - - do_nehalem_platform_info = genuine_intel && has_invariant_tsc; - do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ - do_smi = do_nhm_cstates; - do_snb_cstates = is_snb(family, model); - do_c8_c9_c10 = has_c8_c9_c10(family, model); + if (debug) + fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", + has_aperf ? "" : "No ", + do_dts ? "" : "No ", + do_ptm ? "" : "No ", + has_epb ? "" : "No "); + + if (max_level > 0x15) { + unsigned int eax_crystal; + unsigned int ebx_tsc; + + /* + * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz + */ + eax_crystal = ebx_tsc = crystal_hz = edx = 0; + __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); + + if (ebx_tsc != 0) { + + if (debug && (ebx != 0)) + fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", + eax_crystal, ebx_tsc, crystal_hz); + + if (crystal_hz == 0) + switch(model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + crystal_hz = 24000000; /* 24 MHz */ + break; + default: + crystal_hz = 0; + } + + if (crystal_hz) { + tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; + if (debug) + fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", + tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); + } + } + } + + do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); + do_snb_cstates = has_snb_msrs(family, model); + do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); + do_pc3 = (pkg_cstate_limit >= PCL__3); + do_pc6 = (pkg_cstate_limit >= PCL__6); + do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); + do_c8_c9_c10 = has_hsw_msrs(family, model); + do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); bclk = discover_bclk(family, model); - do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); - do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); rapl_probe(family, model); + perf_limit_reasons_probe(family, model); + + if (debug) + dump_cstate_pstate_config_info(); return; } - -void usage() +void help() { - errx(1, "%s: [-v][-R][-T][-p|-P|-S][-c MSR#][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", - progname); + fprintf(stderr, + "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" + "\n" + "Turbostat forks the specified COMMAND and prints statistics\n" + "when COMMAND completes.\n" + "If no COMMAND is specified, turbostat wakes every 5-seconds\n" + "to print statistics, until interrupted.\n" + "--debug run in \"debug\" mode\n" + "--interval sec Override default 5-second measurement interval\n" + "--help print this help message\n" + "--counter msr print 32-bit counter at address \"msr\"\n" + "--Counter msr print 64-bit Counter at address \"msr\"\n" + "--msr msr print 32-bit value at address \"msr\"\n" + "--MSR msr print 64-bit Value at address \"msr\"\n" + "--version print version information\n" + "\n" + "For more help, run \"man turbostat\"\n"); } @@ -2121,7 +2609,7 @@ void topology_probe() if (!summary_only && topo.num_cpus > 1) show_cpu = 1; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); @@ -2156,7 +2644,7 @@ void topology_probe() int siblings; if (cpu_is_not_present(i)) { - if (verbose > 1) + if (debug > 1) fprintf(stderr, "cpu%d NOT PRESENT\n", i); continue; } @@ -2171,26 +2659,26 @@ void topology_probe() siblings = get_num_ht_siblings(i); if (siblings > max_siblings) max_siblings = siblings; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "cpu %d pkg %d core %d\n", i, cpus[i].physical_package_id, cpus[i].core_id); } topo.num_cores_per_pkg = max_core_id + 1; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.num_cores_per_pkg); - if (!summary_only && topo.num_cores_per_pkg > 1) + if (debug && !summary_only && topo.num_cores_per_pkg > 1) show_core = 1; topo.num_packages = max_package_id + 1; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); - if (!summary_only && topo.num_packages > 1) + if (debug && !summary_only && topo.num_packages > 1) show_pkg = 1; topo.num_threads_per_core = max_siblings; - if (verbose > 1) + if (debug > 1) fprintf(stderr, "max_siblings %d\n", max_siblings); free(cpus); @@ -2299,25 +2787,24 @@ void setup_all_buffers(void) void turbostat_init() { - check_cpuid(); - check_dev_msr(); - check_super_user(); + check_permissions(); + process_cpuid(); setup_all_buffers(); - if (verbose) - print_verbose_header(); - - if (verbose) + if (debug) for_all_cpus(print_epb, ODD_COUNTERS); - if (verbose) + if (debug) + for_all_cpus(print_perf_limit, ODD_COUNTERS); + + if (debug) for_all_cpus(print_rapl, ODD_COUNTERS); for_all_cpus(set_temperature_target, ODD_COUNTERS); - if (verbose) + if (debug) for_all_cpus(print_thermal, ODD_COUNTERS); } @@ -2382,56 +2869,82 @@ int get_and_dump_counters(void) return status; } +void print_version() { + fprintf(stderr, "turbostat version 4.5 2 Apr, 2015" + " - Len Brown <lenb@kernel.org>\n"); +} + void cmdline(int argc, char **argv) { int opt; + int option_index = 0; + static struct option long_options[] = { + {"Counter", required_argument, 0, 'C'}, + {"counter", required_argument, 0, 'c'}, + {"Dump", no_argument, 0, 'D'}, + {"debug", no_argument, 0, 'd'}, + {"interval", required_argument, 0, 'i'}, + {"help", no_argument, 0, 'h'}, + {"Joules", no_argument, 0, 'J'}, + {"MSR", required_argument, 0, 'M'}, + {"msr", required_argument, 0, 'm'}, + {"Package", no_argument, 0, 'p'}, + {"processor", no_argument, 0, 'p'}, + {"Summary", no_argument, 0, 'S'}, + {"TCC", required_argument, 0, 'T'}, + {"version", no_argument, 0, 'v' }, + {0, 0, 0, 0 } + }; progname = argv[0]; - while ((opt = getopt(argc, argv, "+pPsSvi:c:C:m:M:RJT:")) != -1) { + while ((opt = getopt_long_only(argc, argv, "C:c:Ddhi:JM:m:PpST:v", + long_options, &option_index)) != -1) { switch (opt) { - case 'p': - show_core_only++; + case 'C': + sscanf(optarg, "%x", &extra_delta_offset64); break; - case 'P': - show_pkg_only++; + case 'c': + sscanf(optarg, "%x", &extra_delta_offset32); break; - case 's': + case 'D': dump_only++; break; - case 'S': - summary_only++; - break; - case 'v': - verbose++; + case 'd': + debug++; break; + case 'h': + default: + help(); + exit(1); case 'i': interval_sec = atoi(optarg); break; - case 'c': - sscanf(optarg, "%x", &extra_delta_offset32); + case 'J': + rapl_joules++; break; - case 'C': - sscanf(optarg, "%x", &extra_delta_offset64); + case 'M': + sscanf(optarg, "%x", &extra_msr_offset64); break; case 'm': sscanf(optarg, "%x", &extra_msr_offset32); break; - case 'M': - sscanf(optarg, "%x", &extra_msr_offset64); + case 'P': + show_pkg_only++; break; - case 'R': - rapl_verbose++; + case 'p': + show_core_only++; + break; + case 'S': + summary_only++; break; case 'T': tcc_activation_temp_override = atoi(optarg); break; - case 'J': - rapl_joules++; + case 'v': + print_version(); + exit(0); break; - - default: - usage(); } } } @@ -2440,9 +2953,8 @@ int main(int argc, char **argv) { cmdline(argc, argv); - if (verbose) - fprintf(stderr, "turbostat v3.7 Feb 6, 2014" - " - Len Brown <lenb@kernel.org>\n"); + if (debug) + print_version(); turbostat_init(); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index b9cd036f0442..d08e214ec6e7 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -178,6 +178,7 @@ my $checkout; my $localversion; my $iteration = 0; my $successes = 0; +my $stty_orig; my $bisect_good; my $bisect_bad; @@ -197,6 +198,11 @@ my $patchcheck_start; my $patchcheck_cherry; my $patchcheck_end; +my $build_time; +my $install_time; +my $reboot_time; +my $test_time; + # set when a test is something other that just building or install # which would require more options. my $buildonly = 1; @@ -554,6 +560,66 @@ sub get_mandatory_config { } } +sub show_time { + my ($time) = @_; + + my $hours = 0; + my $minutes = 0; + + if ($time > 3600) { + $hours = int($time / 3600); + $time -= $hours * 3600; + } + if ($time > 60) { + $minutes = int($time / 60); + $time -= $minutes * 60; + } + + if ($hours > 0) { + doprint "$hours hour"; + doprint "s" if ($hours > 1); + doprint " "; + } + + if ($minutes > 0) { + doprint "$minutes minute"; + doprint "s" if ($minutes > 1); + doprint " "; + } + + doprint "$time second"; + doprint "s" if ($time != 1); +} + +sub print_times { + doprint "\n"; + if ($build_time) { + doprint "Build time: "; + show_time($build_time); + doprint "\n"; + } + if ($install_time) { + doprint "Install time: "; + show_time($install_time); + doprint "\n"; + } + if ($reboot_time) { + doprint "Reboot time: "; + show_time($reboot_time); + doprint "\n"; + } + if ($test_time) { + doprint "Test time: "; + show_time($test_time); + doprint "\n"; + } + # reset for iterations like bisect + $build_time = 0; + $install_time = 0; + $reboot_time = 0; + $test_time = 0; +} + sub get_mandatory_configs { get_mandatory_config("MACHINE"); get_mandatory_config("BUILD_DIR"); @@ -1341,23 +1407,83 @@ sub dodie { print " See $opt{LOG_FILE} for more info.\n"; } + if ($monitor_cnt) { + # restore terminal settings + system("stty $stty_orig"); + } + die @_, "\n"; } +sub create_pty { + my ($ptm, $pts) = @_; + my $tmp; + my $TIOCSPTLCK = 0x40045431; + my $TIOCGPTN = 0x80045430; + + sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or + dodie "Cant open /dev/ptmx"; + + # unlockpt() + $tmp = pack("i", 0); + ioctl($ptm, $TIOCSPTLCK, $tmp) or + dodie "ioctl TIOCSPTLCK for /dev/ptmx failed"; + + # ptsname() + ioctl($ptm, $TIOCGPTN, $tmp) or + dodie "ioctl TIOCGPTN for /dev/ptmx failed"; + $tmp = unpack("i", $tmp); + + sysopen($pts, "/dev/pts/$tmp", O_RDWR | O_NONBLOCK) or + dodie "Can't open /dev/pts/$tmp"; +} + +sub exec_console { + my ($ptm, $pts) = @_; + + close($ptm); + + close(\*STDIN); + close(\*STDOUT); + close(\*STDERR); + + open(\*STDIN, '<&', $pts); + open(\*STDOUT, '>&', $pts); + open(\*STDERR, '>&', $pts); + + close($pts); + + exec $console or + die "Can't open console $console"; +} + sub open_console { - my ($fp) = @_; + my ($ptm) = @_; + my $pts = \*PTSFD; + my $pid; - my $flags; + # save terminal settings + $stty_orig = `stty -g`; - my $pid = open($fp, "$console|") or - dodie "Can't open console $console"; + # place terminal in cbreak mode so that stdin can be read one character at + # a time without having to wait for a newline + system("stty -icanon -echo -icrnl"); - $flags = fcntl($fp, F_GETFL, 0) or - dodie "Can't get flags for the socket: $!"; - $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or - dodie "Can't set flags for the socket: $!"; + create_pty($ptm, $pts); + + $pid = fork; + + if (!$pid) { + # child + exec_console($ptm, $pts) + } + + # parent + close($pts); return $pid; + + open(PTSFD, "Stop perl from warning about single use of PTSFD"); } sub close_console { @@ -1368,6 +1494,9 @@ sub close_console { print "closing!\n"; close($fp); + + # restore terminal settings + system("stty $stty_orig"); } sub start_monitor { @@ -1519,6 +1648,8 @@ sub fail { $name = " ($test_name)"; } + print_times; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; @@ -1534,10 +1665,14 @@ sub fail { sub run_command { my ($command, $redirect) = @_; + my $start_time; + my $end_time; my $dolog = 0; my $dord = 0; my $pid; + $start_time = time; + $command =~ s/\$SSH_USER/$ssh_user/g; $command =~ s/\$MACHINE/$machine/g; @@ -1570,6 +1705,15 @@ sub run_command { close(LOG) if ($dolog); close(RD) if ($dord); + $end_time = time; + my $delta = $end_time - $start_time; + + if ($delta == 1) { + doprint "[1 second] "; + } else { + doprint "[$delta seconds] "; + } + if ($failed) { doprint "FAILED!\n"; } else { @@ -1694,7 +1838,9 @@ sub wait_for_input { my ($fp, $time) = @_; my $rin; - my $ready; + my $rout; + my $nr; + my $buf; my $line; my $ch; @@ -1704,21 +1850,36 @@ sub wait_for_input $rin = ''; vec($rin, fileno($fp), 1) = 1; - ($ready, $time) = select($rin, undef, undef, $time); + vec($rin, fileno(\*STDIN), 1) = 1; - $line = ""; + while (1) { + $nr = select($rout=$rin, undef, undef, $time); - # try to read one char at a time - while (sysread $fp, $ch, 1) { - $line .= $ch; - last if ($ch eq "\n"); - } + if ($nr <= 0) { + return undef; + } - if (!length($line)) { - return undef; - } + # copy data from stdin to the console + if (vec($rout, fileno(\*STDIN), 1) == 1) { + sysread(\*STDIN, $buf, 1000); + syswrite($fp, $buf, 1000); + next; + } - return $line; + $line = ""; + + # try to read one char at a time + while (sysread $fp, $ch, 1) { + $line .= $ch; + last if ($ch eq "\n"); + } + + if (!length($line)) { + return undef; + } + + return $line; + } } sub reboot_to { @@ -1766,6 +1927,8 @@ sub monitor { my $skip_call_trace = 0; my $loops; + my $start_time = time; + wait_for_monitor 5; my $line; @@ -1890,6 +2053,9 @@ sub monitor { } } + my $end_time = time; + $reboot_time = $end_time - $start_time; + close(DMESG); if ($bug) { @@ -1938,6 +2104,8 @@ sub install { return if ($no_install); + my $start_time = time; + if (defined($pre_install)) { my $cp_pre_install = eval_kernel_version $pre_install; run_command "$cp_pre_install" or @@ -1969,6 +2137,8 @@ sub install { if (!$install_mods) { do_post_install; doprint "No modules needed\n"; + my $end_time = time; + $install_time = $end_time - $start_time; return; } @@ -1996,6 +2166,9 @@ sub install { run_ssh "rm -f /tmp/$modtar"; do_post_install; + + my $end_time = time; + $install_time = $end_time - $start_time; } sub get_version { @@ -2008,7 +2181,7 @@ sub get_version { $have_version = 1; } -sub start_monitor_and_boot { +sub start_monitor_and_install { # Make sure the stable kernel has finished booting # Install bisects, don't need console @@ -2208,6 +2381,8 @@ sub build { unlink $buildlog; + my $start_time = time; + # Failed builds should not reboot the target my $save_no_reboot = $no_reboot; $no_reboot = 1; @@ -2293,6 +2468,9 @@ sub build { $no_reboot = $save_no_reboot; + my $end_time = time; + $build_time = $end_time - $start_time; + return 1; } @@ -2323,6 +2501,8 @@ sub success { $name = " ($test_name)"; } + print_times; + doprint "\n\n*******************************************\n"; doprint "*******************************************\n"; doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; @@ -2383,6 +2563,8 @@ sub do_run_test { my $bug = 0; my $bug_ignored = 0; + my $start_time = time; + wait_for_monitor 1; doprint "run test $run_test\n"; @@ -2449,6 +2631,9 @@ sub do_run_test { waitpid $child_pid, 0; $child_exit = $?; + my $end_time = time; + $test_time = $end_time - $start_time; + if (!$bug && $in_bisect) { if (defined($bisect_ret_good)) { if ($child_exit == $bisect_ret_good) { @@ -2549,7 +2734,7 @@ sub run_bisect_test { dodie "Failed on build" if $failed; # Now boot the box - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if ($type ne "boot") { if ($failed && $bisect_skip) { @@ -2755,6 +2940,7 @@ sub bisect { do { $result = run_bisect $type; $test = run_git_bisect "git bisect $result"; + print_times; } while ($test); run_command "git bisect log" or @@ -3168,6 +3354,7 @@ sub config_bisect { do { $ret = run_config_bisect \%good_configs, \%bad_configs; + print_times; } while (!$ret); return $ret if ($ret < 0); @@ -3260,7 +3447,7 @@ sub patchcheck { my $sha1 = $item; $sha1 =~ s/^([[:xdigit:]]+).*/$1/; - doprint "\nProcessing commit $item\n\n"; + doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or die "Failed to checkout $sha1"; @@ -3291,16 +3478,18 @@ sub patchcheck { my $failed = 0; - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if (!$failed && $type ne "boot"){ do_run_test or $failed = 1; } end_monitor; - return 0 if ($failed); - + if ($failed) { + print_times; + return 0; + } patchcheck_reboot; - + print_times; } $in_patchcheck = 0; success $i; @@ -3753,7 +3942,7 @@ sub make_min_config { my $failed = 0; build "oldconfig" or $failed = 1; if (!$failed) { - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if ($type eq "test" && !$failed) { do_run_test or $failed = 1; @@ -4000,6 +4189,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $iteration = $i; + $build_time = 0; + $install_time = 0; + $reboot_time = 0; + $test_time = 0; + undef %force_config; my $makecmd = set_test_option("MAKE_CMD", $i); @@ -4157,15 +4351,20 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if ($test_type ne "build") { my $failed = 0; - start_monitor_and_boot or $failed = 1; + start_monitor_and_install or $failed = 1; if (!$failed && $test_type ne "boot" && defined($run_test)) { do_run_test or $failed = 1; } end_monitor; - next if ($failed); + if ($failed) { + print_times; + next; + } } + print_times; + success $i; } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 4e511221a0c1..95abddcd7839 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -17,11 +17,20 @@ TARGETS += sysctl TARGETS += timers TARGETS += user TARGETS += vm +TARGETS += x86 #Please keep the TARGETS list alphabetically sorted TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# Clear LDFLAGS and MAKEFLAGS if called from main +# Makefile to avoid test build failures when test +# Makefile doesn't have explicit build rules. +ifeq (1,$(MAKELEVEL)) +undefine LDFLAGS +override MAKEFLAGS = +endif + all: for TARGET in $(TARGETS); do \ make -C $$TARGET; \ @@ -47,7 +56,40 @@ clean_hotplug: make -C $$TARGET clean; \ done; +INSTALL_PATH ?= install +INSTALL_PATH := $(abspath $(INSTALL_PATH)) +ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh + +install: +ifdef INSTALL_PATH + @# Ask all targets to install their files + mkdir -p $(INSTALL_PATH) + for TARGET in $(TARGETS); do \ + mkdir -p $(INSTALL_PATH)/$$TARGET ; \ + make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + done; + + @# Ask all targets to emit their test scripts + echo "#!/bin/bash" > $(ALL_SCRIPT) + echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + + for TARGET in $(TARGETS); do \ + echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ + echo "echo ========================================" >> $(ALL_SCRIPT); \ + echo "cd $$TARGET" >> $(ALL_SCRIPT); \ + make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ + echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ + done; + + chmod u+x $(ALL_SCRIPT) +else + $(error Error: set INSTALL_PATH to use install) +endif + clean: for TARGET in $(TARGETS); do \ make -C $$TARGET clean; \ done; + +.PHONY: install diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index e18b42b254af..182235640209 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -16,8 +16,9 @@ else echo "Not an x86 target, can't build breakpoints selftests" endif -run_tests: - @./breakpoint_test || echo "breakpoints selftests: [FAIL]" +TEST_PROGS := breakpoint_test + +include ../lib.mk clean: rm -fr breakpoint_test diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile index e9c28d8dc84b..fe1f99101c5d 100644 --- a/tools/testing/selftests/cpu-hotplug/Makefile +++ b/tools/testing/selftests/cpu-hotplug/Makefile @@ -1,9 +1,10 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" +TEST_PROGS := cpu-on-off-test.sh + +include ../lib.mk run_full_test: - @/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" + @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/cpu-hotplug/on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index 98b1d6565f2c..98b1d6565f2c 100644..100755 --- a/tools/testing/selftests/cpu-hotplug/on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile index 29e8c6bc81b0..736c3ddfc787 100644 --- a/tools/testing/selftests/efivarfs/Makefile +++ b/tools/testing/selftests/efivarfs/Makefile @@ -1,12 +1,13 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall test_objs = open-unlink create-read all: $(test_objs) -run_tests: all - @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]" +TEST_PROGS := efivarfs.sh +TEST_FILES := $(test_objs) + +include ../lib.mk clean: rm -f $(test_objs) diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index 77edcdcc016b..77edcdcc016b 100644..100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 66dfc2ce1788..4edb7d0da29b 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,4 +1,3 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall BINARIES = execveat DEPS = execveat.symlink execveat.denatured script subdir @@ -18,8 +17,12 @@ execveat.denatured: execveat %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - ./execveat +TEST_PROGS := execveat +TEST_FILES := $(DEPS) + +include ../lib.mk + +override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\"" clean: rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx* diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 33a5c06d95ca..8d5d1d2ee7c1 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -30,7 +30,7 @@ static int execveat_(int fd, const char *path, char **argv, char **envp, #ifdef __NR_execveat return syscall(__NR_execveat, fd, path, argv, envp, flags); #else - errno = -ENOSYS; + errno = ENOSYS; return -1; #endif } @@ -62,7 +62,7 @@ static int _check_execveat_fail(int fd, const char *path, int flags, } static int check_execveat_invoked_rc(int fd, const char *path, int flags, - int expected_rc) + int expected_rc, int expected_rc2) { int status; int rc; @@ -98,9 +98,10 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags, child, status); return 1; } - if (WEXITSTATUS(status) != expected_rc) { - printf("[FAIL] (child %d exited with %d not %d)\n", - child, WEXITSTATUS(status), expected_rc); + if ((WEXITSTATUS(status) != expected_rc) && + (WEXITSTATUS(status) != expected_rc2)) { + printf("[FAIL] (child %d exited with %d not %d nor %d)\n", + child, WEXITSTATUS(status), expected_rc, expected_rc2); return 1; } printf("[OK]\n"); @@ -109,7 +110,7 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags, static int check_execveat(int fd, const char *path, int flags) { - return check_execveat_invoked_rc(fd, path, flags, 99); + return check_execveat_invoked_rc(fd, path, flags, 99, 99); } static char *concat(const char *left, const char *right) @@ -179,11 +180,11 @@ static int check_execveat_pathmax(int dot_dfd, const char *src, int is_script) */ fd = open(longpath, O_RDONLY); if (fd > 0) { - printf("Invoke copy of '%s' via filename of length %lu:\n", + printf("Invoke copy of '%s' via filename of length %zu:\n", src, strlen(longpath)); fail += check_execveat(fd, "", AT_EMPTY_PATH); } else { - printf("Failed to open length %lu filename, errno=%d (%s)\n", + printf("Failed to open length %zu filename, errno=%d (%s)\n", strlen(longpath), errno, strerror(errno)); fail++; } @@ -192,9 +193,15 @@ static int check_execveat_pathmax(int dot_dfd, const char *src, int is_script) * Execute as a long pathname relative to ".". If this is a script, * the interpreter will launch but fail to open the script because its * name ("/dev/fd/5/xxx....") is bigger than PATH_MAX. + * + * The failure code is usually 127 (POSIX: "If a command is not found, + * the exit status shall be 127."), but some systems give 126 (POSIX: + * "If the command name is found, but it is not an executable utility, + * the exit status shall be 126."), so allow either. */ if (is_script) - fail += check_execveat_invoked_rc(dot_dfd, longpath, 0, 127); + fail += check_execveat_invoked_rc(dot_dfd, longpath, 0, + 127, 126); else fail += check_execveat(dot_dfd, longpath, 0); @@ -227,6 +234,14 @@ static int run_tests(void) int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC); int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC); + /* Check if we have execveat at all, and bail early if not */ + errno = 0; + execveat_(-1, NULL, NULL, NULL, 0); + if (errno == ENOSYS) { + printf("[FAIL] ENOSYS calling execveat - no kernel support?\n"); + return 1; + } + /* Change file position to confirm it doesn't affect anything */ lseek(fd, 10, SEEK_SET); diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index e23cce0bbc3a..9bf82234855b 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -3,25 +3,9 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -fw_filesystem: - @if /bin/sh ./fw_filesystem.sh ; then \ - echo "fw_filesystem: ok"; \ - else \ - echo "fw_filesystem: [FAIL]"; \ - exit 1; \ - fi +TEST_PROGS := fw_filesystem.sh fw_userhelper.sh -fw_userhelper: - @if /bin/sh ./fw_userhelper.sh ; then \ - echo "fw_userhelper: ok"; \ - else \ - echo "fw_userhelper: [FAIL]"; \ - exit 1; \ - fi - -run_tests: all fw_filesystem fw_userhelper +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all clean run_tests fw_filesystem fw_userhelper diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index 3fc6c10c2479..3fc6c10c2479 100644..100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh index 6efbade12139..6efbade12139 100644..100755 --- a/tools/testing/selftests/firmware/fw_userhelper.sh +++ b/tools/testing/selftests/firmware/fw_userhelper.sh diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 76cc9f156267..346720639d1d 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -1,7 +1,8 @@ all: -run_tests: - @/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]" +TEST_PROGS := ftracetest + +include ../lib.mk clean: rm -rf logs/* diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc index fd9c49a13612..aa51f6c17359 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc @@ -2,4 +2,4 @@ # description: Basic event tracing check test -f available_events -a -f set_event -a -d events # check scheduler events are available -grep -q sched available_events && exit 0 || exit -1
\ No newline at end of file +grep -q sched available_events && exit 0 || exit $FAIL diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc index 668616d9bb03..87eb9d6dd4ca 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:sched_switch' > set_event -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -ne 0 ]; then diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index 655c415b6e7f..ced27ef0638f 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:*' > set_event -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc index 480845774007..0bb5df3c00d4 100644 --- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then @@ -21,6 +25,9 @@ reset_tracer do_reset echo '*:*' > set_event + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -29,6 +36,9 @@ fi do_reset echo 1 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -37,6 +47,9 @@ fi do_reset echo 0 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -ne 0 ]; then fail "any of events should not be recorded" diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index c15e018e0220..15c2dba06ea2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -16,7 +16,9 @@ fi do_reset() { reset_tracer - echo 0 > /proc/sys/kernel/stack_tracer_enabled + if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then + echo 0 > /proc/sys/kernel/stack_tracer_enabled + fi enable_tracing clear_trace echo > set_ftrace_filter @@ -25,7 +27,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc index 6af5f6360b18..0ab2189613ef 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc @@ -17,7 +17,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index 2e719cb1fc4d..7808336d6f50 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -31,7 +31,7 @@ fail() { # mesg reset_tracer echo > set_ftrace_filter echo $1 - exit -1 + exit $FAIL } echo "Testing function tracer with profiler:" diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh new file mode 100755 index 000000000000..17d5bd0c0936 --- /dev/null +++ b/tools/testing/selftests/gen_kselftest_tar.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# gen_kselftest_tar +# Generate kselftest tarball +# Author: Shuah Khan <shuahkh@osg.samsung.com> +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +# main +main() +{ + if [ "$#" -eq 0 ]; then + echo "$0: Generating default compression gzip" + copts="cvzf" + ext=".tar.gz" + else + case "$1" in + tar) + copts="cvf" + ext=".tar" + ;; + targz) + copts="cvzf" + ext=".tar.gz" + ;; + tarbz2) + copts="cvjf" + ext=".tar.bz2" + ;; + tarxz) + copts="cvJf" + ext=".tar.xz" + ;; + *) + echo "Unknown tarball format $1" + exit 1 + ;; + esac + fi + + install_dir=./kselftest + +# Run install using INSTALL_KSFT_PATH override to generate install +# directory +./kselftest_install.sh +tar $copts kselftest${ext} $install_dir +echo "Kselftest archive kselftest${ext} created!" + +# clean up install directory +rm -rf kselftest +} + +main "$@" diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile index 74bbefdeaf4c..25d2e702c68a 100644 --- a/tools/testing/selftests/ipc/Makefile +++ b/tools/testing/selftests/ipc/Makefile @@ -12,14 +12,11 @@ endif CFLAGS += -I../../../../usr/include/ all: -ifeq ($(ARCH),x86) - gcc $(CFLAGS) msgque.c -o msgque_test -else - echo "Not an x86 target, can't build msgque selftest" -endif + $(CC) $(CFLAGS) msgque.c -o msgque_test + +TEST_PROGS := msgque_test -run_tests: all - ./msgque_test +include ../lib.mk clean: rm -fr ./msgque_test diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile index ff0eefdc6ceb..2ae7450a9a89 100644 --- a/tools/testing/selftests/kcmp/Makefile +++ b/tools/testing/selftests/kcmp/Makefile @@ -1,10 +1,10 @@ -CC := $(CROSS_COMPILE)$(CC) CFLAGS += -I../../../../usr/include/ all: kcmp_test -run_tests: all - @./kcmp_test || echo "kcmp_test: [FAIL]" +TEST_PROGS := kcmp_test + +include ../lib.mk clean: $(RM) kcmp_test kcmp-test-file diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh new file mode 100755 index 000000000000..1555fbdb08da --- /dev/null +++ b/tools/testing/selftests/kselftest_install.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# Kselftest Install +# Install kselftest tests +# Author: Shuah Khan <shuahkh@osg.samsung.com> +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +install_loc=`pwd` + +main() +{ + if [ $(basename $install_loc) != "selftests" ]; then + echo "$0: Please run it in selftests directory ..." + exit 1; + fi + if [ "$#" -eq 0 ]; then + echo "$0: Installing in default location - $install_loc ..." + elif [ ! -d "$1" ]; then + echo "$0: $1 doesn't exist!!" + exit 1; + else + install_loc=$1 + echo "$0: Installing in specified location - $install_loc ..." + fi + + install_dir=$install_loc/kselftest + +# Create install directory + mkdir -p $install_dir +# Build tests + INSTALL_PATH=$install_dir make install +} + +main "$@" diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk new file mode 100644 index 000000000000..2194155ae62a --- /dev/null +++ b/tools/testing/selftests/lib.mk @@ -0,0 +1,35 @@ +# This mimics the top-level Makefile. We do it explicitly here so that this +# Makefile can operate with or without the kbuild infrastructure. +CC := $(CROSS_COMPILE)gcc + +define RUN_TESTS + @for TEST in $(TEST_PROGS); do \ + (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \ + done; +endef + +run_tests: all + $(RUN_TESTS) + +define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) +endef + +install: all +ifdef INSTALL_PATH + $(INSTALL_RULE) +else + $(error Error: set INSTALL_PATH to use install) +endif + +define EMIT_TESTS + @for TEST in $(TEST_PROGS); do \ + echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \ + done; +endef + +emit_tests: + $(EMIT_TESTS) + +.PHONY: run_tests all clean install emit_tests diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index b80cd10d53ba..3e7eb7972511 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -1,17 +1,19 @@ +CC = $(CROSS_COMPILE)gcc CFLAGS += -D_FILE_OFFSET_BITS=64 CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ all: - gcc $(CFLAGS) memfd_test.c -o memfd_test + $(CC) $(CFLAGS) memfd_test.c -o memfd_test -run_tests: all - gcc $(CFLAGS) memfd_test.c -o memfd_test - @./memfd_test || echo "memfd_test: [FAIL]" +TEST_PROGS := memfd_test + +include ../lib.mk build_fuse: - gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt - gcc $(CFLAGS) fuse_test.c -o fuse_test + $(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt + $(CC) $(CFLAGS) fuse_test.c -o fuse_test run_fuse: build_fuse @./run_fuse_test.sh || echo "fuse_test: [FAIL]" diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index d46b8d489cd2..afb2624c7048 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -1,9 +1,12 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]" +include ../lib.mk + +TEST_PROGS := mem-on-off-test.sh +override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]" +override EMIT_TESTS := echo "$(RUN_TESTS)" run_full_test: - @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" + @/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/memory-hotplug/on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index 6cddde0b96f8..6cddde0b96f8 100644..100755 --- a/tools/testing/selftests/memory-hotplug/on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore new file mode 100644 index 000000000000..856ad4107eb3 --- /dev/null +++ b/tools/testing/selftests/mount/.gitignore @@ -0,0 +1 @@ +unprivileged-remount-test diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index 337d853c2b72..95580a97326e 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -1,17 +1,16 @@ # Makefile for mount selftests. - +CFLAGS = -Wall \ + -O2 all: unprivileged-remount-test unprivileged-remount-test: unprivileged-remount-test.c - gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test + $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test -# Allow specific tests to be selected. -test_unprivileged_remount: unprivileged-remount-test - @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +include ../lib.mk -run_tests: all test_unprivileged_remount +TEST_PROGS := unprivileged-remount-test +override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +override EMIT_TESTS := echo "$(RUN_TESTS)" clean: rm -f unprivileged-remount-test - -.PHONY: all test_unprivileged_remount diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 8056e2e68fa4..0e3b41eb85cd 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -1,10 +1,22 @@ +CFLAGS = -O2 + all: - gcc -O2 mq_open_tests.c -o mq_open_tests -lrt - gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt + $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt + $(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt + +include ../lib.mk + +override define RUN_TESTS + @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" +endef + +TEST_PROGS := mq_open_tests mq_perf_tests -run_tests: - @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]" - @./mq_perf_tests || echo "mq_perf_tests: [FAIL]" +override define EMIT_TESTS + echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" + echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" +endef clean: rm -f mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 94dae65eea41..8519e9ee97e3 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -536,10 +536,9 @@ int main(int argc, char *argv[]) { struct mq_attr attr; char *option, *next_option; - int i, cpu; + int i, cpu, rc; struct sigaction sa; poptContext popt_context; - char rc; void *retval; main_thread = pthread_self(); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 62f22cc9941c..fac4782c51d8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,6 +1,5 @@ # Makefile for net selftests -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ @@ -11,9 +10,10 @@ all: $(NET_PROGS) %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]" - @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" - ./test_bpf.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh +TEST_FILES := $(NET_PROGS) + +include ../lib.mk + clean: $(RM) $(NET_PROGS) diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 5246e782d6e8..5246e782d6e8 100644..100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index c09a682df56a..c09a682df56a 100644..100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index f6ff90a76bd7..5ad042345ab9 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,27 +8,41 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CC := $(CROSS_COMPILE)$(CC) CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) -export CC CFLAGS +export CFLAGS -TARGETS = pmu copyloops mm tm primitives +SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian endif -all: $(TARGETS) +all: $(SUB_DIRS) -$(TARGETS): +$(SUB_DIRS): $(MAKE) -k -C $@ all -run_tests: all - @for TARGET in $(TARGETS); do \ +include ../lib.mk + +override define RUN_TESTS + @for TARGET in $(SUB_DIRS); do \ $(MAKE) -C $$TARGET run_tests; \ done; +endef + +override define INSTALL_RULE + @for TARGET in $(SUB_DIRS); do \ + $(MAKE) -C $$TARGET install; \ + done; +endef + +override define EMIT_TESTS + @for TARGET in $(SUB_DIRS); do \ + $(MAKE) -s -C $$TARGET emit_tests; \ + done; +endef clean: - @for TARGET in $(TARGETS); do \ + @for TARGET in $(SUB_DIRS); do \ $(MAKE) -C $$TARGET clean; \ done; rm -f tags @@ -36,4 +50,4 @@ clean: tags: find . -name '*.c' -o -name '*.h' | xargs ctags -.PHONY: all run_tests clean tags $(TARGETS) +.PHONY: tags $(SUB_DIRS) diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore new file mode 100644 index 000000000000..25a192f62c4d --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -0,0 +1,4 @@ +copyuser_64 +copyuser_power7 +memcpy_64 +memcpy_power7 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 6f2d3be227f9..384843ea0d40 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -2,28 +2,24 @@ CFLAGS += -m64 CFLAGS += -I$(CURDIR) CFLAGS += -D SELFTEST +CFLAGS += -maltivec # Use our CFLAGS for the implicit .S rule ASFLAGS = $(CFLAGS) -PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 +TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 EXTRA_SOURCES := validate.c ../harness.c -all: $(PROGS) +all: $(TEST_PROGS) copyuser_64: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7 memcpy_64: CPPFLAGS += -D COPY_LOOP=test_memcpy memcpy_power7: CPPFLAGS += -D COPY_LOOP=test_memcpy_power7 -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index d1dc37425510..50ae7d2091ce 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -4,39 +4,6 @@ #define r1 1 -#define vr0 0 -#define vr1 1 -#define vr2 2 -#define vr3 3 -#define vr4 4 -#define vr5 5 -#define vr6 6 -#define vr7 7 -#define vr8 8 -#define vr9 9 -#define vr10 10 -#define vr11 11 -#define vr12 12 -#define vr13 13 -#define vr14 14 -#define vr15 15 -#define vr16 16 -#define vr17 17 -#define vr18 18 -#define vr19 19 -#define vr20 20 -#define vr21 21 -#define vr22 22 -#define vr23 23 -#define vr24 24 -#define vr25 25 -#define vr26 26 -#define vr27 27 -#define vr28 28 -#define vr29 29 -#define vr30 30 -#define vr31 31 - #define R14 r14 #define R15 r15 #define R16 r16 diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 8ebc58a09311..f7997affd143 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -11,6 +11,10 @@ #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> +#include <elf.h> +#include <fcntl.h> +#include <link.h> +#include <sys/stat.h> #include "subunit.h" #include "utils.h" @@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name) return rc; } + +static char auxv[4096]; + +void *get_auxv_entry(int type) +{ + ElfW(auxv_t) *p; + void *result; + ssize_t num; + int fd; + + fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + perror("open"); + return NULL; + } + + result = NULL; + + num = read(fd, auxv, sizeof(auxv)); + if (num < 0) { + perror("read"); + goto out; + } + + if (num > sizeof(auxv)) { + printf("Overflowed auxv buffer\n"); + goto out; + } + + p = (ElfW(auxv_t) *)auxv; + + while (p->a_type != AT_NULL) { + if (p->a_type == type) { + result = (void *)p->a_un.a_val; + break; + } + + p++; + } +out: + close(fd); + return result; +} diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore new file mode 100644 index 000000000000..b43ade0ec861 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -0,0 +1,3 @@ +hugetlb_vs_thp_test +subpage_prot +tempfile diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 357ccbd6bad9..41cc3ed66818 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -1,18 +1,16 @@ noarg: $(MAKE) -C ../ -PROGS := hugetlb_vs_thp_test +TEST_PROGS := hugetlb_vs_thp_test subpage_prot -all: $(PROGS) +all: $(TEST_PROGS) tempfile -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk -clean: - rm -f $(PROGS) +tempfile: + dd if=/dev/zero of=tempfile bs=64k count=1 -.PHONY: all run_tests clean +clean: + rm -f $(TEST_PROGS) tempfile diff --git a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c index 3d8e5b033e1d..49003674de4f 100644 --- a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c +++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c @@ -21,9 +21,13 @@ static int test_body(void) * Typically the mmap will fail because no huge pages are * allocated on the system. But if there are huge pages * allocated the mmap will succeed. That's fine too, we just - * munmap here before continuing. + * munmap here before continuing. munmap() length of + * MAP_HUGETLB memory must be hugepage aligned. */ - munmap(addr, SIZE); + if (munmap(addr, SIZE)) { + perror("munmap"); + return 1; + } } p = mmap(addr, SIZE, PROT_READ | PROT_WRITE, diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c new file mode 100644 index 000000000000..440180ff8089 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -0,0 +1,220 @@ +/* + * Copyright IBM Corp. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <ucontext.h> +#include <unistd.h> + +#include "utils.h" + +char *file_name; + +int in_test; +volatile int faulted; +volatile void *dar; +int errors; + +static void segv(int signum, siginfo_t *info, void *ctxt_v) +{ + ucontext_t *ctxt = (ucontext_t *)ctxt_v; + struct pt_regs *regs = ctxt->uc_mcontext.regs; + + if (!in_test) { + fprintf(stderr, "Segfault outside of test !\n"); + exit(1); + } + + faulted = 1; + dar = (void *)regs->dar; + regs->nip += 4; +} + +static inline void do_read(const volatile void *addr) +{ + int ret; + + asm volatile("lwz %0,0(%1); twi 0,%0,0; isync;\n" + : "=r" (ret) : "r" (addr) : "memory"); +} + +static inline void do_write(const volatile void *addr) +{ + int val = 0x1234567; + + asm volatile("stw %0,0(%1); sync; \n" + : : "r" (val), "r" (addr) : "memory"); +} + +static inline void check_faulted(void *addr, long page, long subpage, int write) +{ + int want_fault = (subpage == ((page + 3) % 16)); + + if (write) + want_fault |= (subpage == ((page + 1) % 16)); + + if (faulted != want_fault) { + printf("Failed at 0x%p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n", + addr, page, subpage, write, + want_fault ? "fault" : "pass", + faulted ? "fault" : "pass"); + ++errors; + } + + if (faulted) { + if (dar != addr) { + printf("Fault expected at 0x%p and happened at 0x%p !\n", + addr, dar); + } + faulted = 0; + asm volatile("sync" : : : "memory"); + } +} + +static int run_test(void *addr, unsigned long size) +{ + unsigned int *map; + long i, j, pages, err; + + pages = size / 0x10000; + map = malloc(pages * 4); + assert(map); + + /* + * for each page, mark subpage i % 16 read only and subpage + * (i + 3) % 16 inaccessible + */ + for (i = 0; i < pages; i++) { + map[i] = (0x40000000 >> (((i + 1) * 2) % 32)) | + (0xc0000000 >> (((i + 3) * 2) % 32)); + } + + err = syscall(__NR_subpage_prot, addr, size, map); + if (err) { + perror("subpage_perm"); + return 1; + } + free(map); + + in_test = 1; + errors = 0; + for (i = 0; i < pages; i++) { + for (j = 0; j < 16; j++, addr += 0x1000) { + do_read(addr); + check_faulted(addr, i, j, 0); + do_write(addr); + check_faulted(addr, i, j, 1); + } + } + + in_test = 0; + if (errors) { + printf("%d errors detected\n", errors); + return 1; + } + + return 0; +} + +int test_anon(void) +{ + unsigned long align; + struct sigaction act = { + .sa_sigaction = segv, + .sa_flags = SA_SIGINFO + }; + void *mallocblock; + unsigned long mallocsize; + + if (getpagesize() != 0x10000) { + fprintf(stderr, "Kernel page size must be 64K!\n"); + return 1; + } + + sigaction(SIGSEGV, &act, NULL); + + mallocsize = 4 * 16 * 1024 * 1024; + + FAIL_IF(posix_memalign(&mallocblock, 64 * 1024, mallocsize)); + + align = (unsigned long)mallocblock; + if (align & 0xffff) + align = (align | 0xffff) + 1; + + mallocblock = (void *)align; + + printf("allocated malloc block of 0x%lx bytes at 0x%p\n", + mallocsize, mallocblock); + + printf("testing malloc block...\n"); + + return run_test(mallocblock, mallocsize); +} + +int test_file(void) +{ + struct sigaction act = { + .sa_sigaction = segv, + .sa_flags = SA_SIGINFO + }; + void *fileblock; + off_t filesize; + int fd; + + fd = open(file_name, O_RDWR); + if (fd == -1) { + perror("failed to open file"); + return 1; + } + sigaction(SIGSEGV, &act, NULL); + + filesize = lseek(fd, 0, SEEK_END); + if (filesize & 0xffff) + filesize &= ~0xfffful; + + fileblock = mmap(NULL, filesize, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (fileblock == MAP_FAILED) { + perror("failed to map file"); + return 1; + } + printf("allocated %s for 0x%lx bytes at 0x%p\n", + file_name, filesize, fileblock); + + printf("testing file map...\n"); + + return run_test(fileblock, filesize); +} + +int main(int argc, char *argv[]) +{ + test_harness(test_anon, "subpage_prot_anon"); + + if (argc > 1) + file_name = argv[1]; + else + file_name = "tempfile"; + + test_harness(test_file, "subpage_prot_file"); + + return 0; +} diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore new file mode 100644 index 000000000000..e748f336eed3 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/.gitignore @@ -0,0 +1,3 @@ +count_instructions +l3_bank_test +per_event_excludes diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index c9f4263906a5..a9099d9f8f39 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -1,38 +1,42 @@ noarg: $(MAKE) -C ../ -PROGS := count_instructions l3_bank_test per_event_excludes +TEST_PROGS := count_instructions l3_bank_test per_event_excludes EXTRA_SOURCES := ../harness.c event.c lib.c -SUB_TARGETS = ebb +all: $(TEST_PROGS) ebb -all: $(PROGS) $(SUB_TARGETS) - -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) # loop.S can only be built 64-bit count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES) $(CC) $(CFLAGS) -m64 -o $@ $^ -run_tests: all sub_run_tests - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk -clean: sub_clean - rm -f $(PROGS) loop.o +DEFAULT_RUN_TESTS := $(RUN_TESTS) +override define RUN_TESTS + $(DEFAULT_RUN_TESTS) + $(MAKE) -C ebb run_tests +endef -$(SUB_TARGETS): - $(MAKE) -k -C $@ all +DEFAULT_EMIT_TESTS := $(EMIT_TESTS) +override define EMIT_TESTS + $(DEFAULT_EMIT_TESTS) + $(MAKE) -s -C ebb emit_tests +endef -sub_run_tests: all - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET run_tests; \ - done; +DEFAULT_INSTALL_RULE := $(INSTALL_RULE) +override define INSTALL_RULE + $(DEFAULT_INSTALL_RULE) + $(MAKE) -C ebb install +endef -sub_clean: - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET clean; \ - done; +clean: + rm -f $(TEST_PROGS) loop.o + $(MAKE) -C ebb clean + +ebb: + $(MAKE) -k -C $@ all -.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS) +.PHONY: all run_tests clean ebb diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore new file mode 100644 index 000000000000..42bddbed8b64 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -0,0 +1,22 @@ +reg_access_test +event_attributes_test +cycles_test +cycles_with_freeze_test +pmc56_overflow_test +ebb_vs_cpu_event_test +cpu_event_vs_ebb_test +cpu_event_pinned_vs_ebb_test +task_event_vs_ebb_test +task_event_pinned_vs_ebb_test +multi_ebb_procs_test +multi_counter_test +pmae_handling_test +close_clears_pmcc_test +instruction_count_test +fork_cleanup_test +ebb_on_child_test +ebb_on_willing_child_test +back_to_back_ebbs_test +lost_exception_test +no_handler_test +cycles_with_mmcr2_test diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 3dc4332698cb..5cdc9dbf2b27 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -4,7 +4,7 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 -PROGS := reg_access_test event_attributes_test cycles_test \ +TEST_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \ @@ -16,18 +16,15 @@ PROGS := reg_access_test event_attributes_test cycles_test \ lost_exception_test no_handler_test \ cycles_with_mmcr2_test -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S +$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S instruction_count_test: ../loop.S lost_exception_test: ../lib.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../../lib.mk clean: - rm -f $(PROGS) + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c index 9768dea37bf3..a07104c2afe6 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.c +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -5,15 +5,10 @@ #define _GNU_SOURCE /* For CPU_ZERO etc. */ -#include <elf.h> #include <errno.h> -#include <fcntl.h> -#include <link.h> #include <sched.h> #include <setjmp.h> #include <stdlib.h> -#include <sys/stat.h> -#include <sys/types.h> #include <sys/wait.h> #include "utils.h" @@ -256,45 +251,3 @@ out: return rc; } -static char auxv[4096]; - -void *get_auxv_entry(int type) -{ - ElfW(auxv_t) *p; - void *result; - ssize_t num; - int fd; - - fd = open("/proc/self/auxv", O_RDONLY); - if (fd == -1) { - perror("open"); - return NULL; - } - - result = NULL; - - num = read(fd, auxv, sizeof(auxv)); - if (num < 0) { - perror("read"); - goto out; - } - - if (num > sizeof(auxv)) { - printf("Overflowed auxv buffer\n"); - goto out; - } - - p = (ElfW(auxv_t) *)auxv; - - while (p->a_type != AT_NULL) { - if (p->a_type == type) { - result = (void *)p->a_un.a_val; - break; - } - - p++; - } -out: - close(fd); - return result; -} diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h index 0f0339c8a6f6..ca5d72ae3be6 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.h +++ b/tools/testing/selftests/powerpc/pmu/lib.h @@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe); extern int notify_parent_of_error(union pipe write_pipe); extern pid_t eat_cpu(int (test_function)(void)); extern bool require_paranoia_below(int level); -extern void *get_auxv_entry(int type); struct addr_range { uint64_t first, last; diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore new file mode 100644 index 000000000000..4cc4e31bed1d --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/.gitignore @@ -0,0 +1 @@ +load_unaligned_zeropad diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index ea737ca01732..b68c6221d3d1 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -1,17 +1,12 @@ CFLAGS += -I$(CURDIR) -PROGS := load_unaligned_zeropad +TEST_PROGS := load_unaligned_zeropad -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore new file mode 100644 index 000000000000..0b43da74ee46 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/.gitignore @@ -0,0 +1 @@ +memcmp diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile new file mode 100644 index 000000000000..2a728f4d2873 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -0,0 +1,15 @@ +# The loops are all 64-bit code +CFLAGS += -m64 +CFLAGS += -I$(CURDIR) + +TEST_PROGS := memcmp +EXTRA_SOURCES := memcmp_64.S ../harness.c + +all: $(TEST_PROGS) + +$(TEST_PROGS): $(EXTRA_SOURCES) + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h new file mode 100644 index 000000000000..11bece87e880 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h @@ -0,0 +1,7 @@ +#include <ppc-asm.h> + +#ifndef r1 +#define r1 sp +#endif + +#define _GLOBAL(A) FUNC_START(test_ ## A) diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c new file mode 100644 index 000000000000..17417dd70708 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -0,0 +1,103 @@ +#include <malloc.h> +#include <stdlib.h> +#include <string.h> +#include "../utils.h" + +#define SIZE 256 +#define ITERATIONS 10000 + +int test_memcmp(const void *s1, const void *s2, size_t n); + +/* test all offsets and lengths */ +static void test_one(char *s1, char *s2) +{ + unsigned long offset, size; + + for (offset = 0; offset < SIZE; offset++) { + for (size = 0; size < (SIZE-offset); size++) { + int x, y; + unsigned long i; + + y = memcmp(s1+offset, s2+offset, size); + x = test_memcmp(s1+offset, s2+offset, size); + + if (((x ^ y) < 0) && /* Trick to compare sign */ + ((x | y) != 0)) { /* check for zero */ + printf("memcmp returned %d, should have returned %d (offset %ld size %ld)\n", x, y, offset, size); + + for (i = offset; i < offset+size; i++) + printf("%02x ", s1[i]); + printf("\n"); + + for (i = offset; i < offset+size; i++) + printf("%02x ", s2[i]); + printf("\n"); + abort(); + } + } + } +} + +static int testcase(void) +{ + char *s1; + char *s2; + unsigned long i; + + s1 = memalign(128, SIZE); + if (!s1) { + perror("memalign"); + exit(1); + } + + s2 = memalign(128, SIZE); + if (!s2) { + perror("memalign"); + exit(1); + } + + srandom(1); + + for (i = 0; i < ITERATIONS; i++) { + unsigned long j; + unsigned long change; + + for (j = 0; j < SIZE; j++) + s1[j] = random(); + + memcpy(s2, s1, SIZE); + + /* change one byte */ + change = random() % SIZE; + s2[change] = random() & 0xff; + + test_one(s1, s2); + } + + srandom(1); + + for (i = 0; i < ITERATIONS; i++) { + unsigned long j; + unsigned long change; + + for (j = 0; j < SIZE; j++) + s1[j] = random(); + + memcpy(s2, s1, SIZE); + + /* change multiple bytes, 1/8 of total */ + for (j = 0; j < SIZE / 8; j++) { + change = random() % SIZE; + s2[change] = random() & 0xff; + } + + test_one(s1, s2); + } + + return 0; +} + +int main(void) +{ + return test_harness(testcase, "memcmp"); +} diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S new file mode 120000 index 000000000000..9bc87e438ae9 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/memcmp_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore new file mode 100644 index 000000000000..89e762eab676 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore @@ -0,0 +1,2 @@ +switch_endian_test +check-reversed.S diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile new file mode 100644 index 000000000000..081473db22b7 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -0,0 +1,24 @@ +CC := $(CROSS_COMPILE)gcc +PROGS := switch_endian_test + +ASFLAGS += -O2 -Wall -g -nostdlib -m64 + +all: $(PROGS) + +switch_endian_test: check-reversed.S + +check-reversed.o: check.o + $(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@ + +check-reversed.S: check-reversed.o + hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@ + +run_tests: all + @-for PROG in $(PROGS); do \ + ./$$PROG; \ + done; + +clean: + rm -f $(PROGS) *.o check-reversed.S + +.PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/switch_endian/check.S b/tools/testing/selftests/powerpc/switch_endian/check.S new file mode 100644 index 000000000000..e2484d2c24f4 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/check.S @@ -0,0 +1,100 @@ +#include "common.h" + +/* + * Checks that registers contain what we expect, ie. they were not clobbered by + * the syscall. + * + * r15: pattern to check registers against. + * + * At the end r3 == 0 if everything's OK. + */ + nop # guaranteed to be illegal in reverse-endian + mr r9,r15 + cmpd r9,r3 # check r3 + bne 1f + addi r9,r15,4 # check r4 + cmpd r9,r4 + bne 1f + lis r9,0x00FF # check CR + ori r9,r9,0xF000 + mfcr r10 + and r10,r10,r9 + cmpw r9,r10 + addi r9,r15,34 + bne 1f + addi r9,r15,32 # check LR + mflr r10 + cmpd r9,r10 + bne 1f + addi r9,r15,5 # check r5 + cmpd r9,r5 + bne 1f + addi r9,r15,6 # check r6 + cmpd r9,r6 + bne 1f + addi r9,r15,7 # check r7 + cmpd r9,r7 + bne 1f + addi r9,r15,8 # check r8 + cmpd r9,r8 + bne 1f + addi r9,r15,13 # check r13 + cmpd r9,r13 + bne 1f + addi r9,r15,14 # check r14 + cmpd r9,r14 + bne 1f + addi r9,r15,16 # check r16 + cmpd r9,r16 + bne 1f + addi r9,r15,17 # check r17 + cmpd r9,r17 + bne 1f + addi r9,r15,18 # check r18 + cmpd r9,r18 + bne 1f + addi r9,r15,19 # check r19 + cmpd r9,r19 + bne 1f + addi r9,r15,20 # check r20 + cmpd r9,r20 + bne 1f + addi r9,r15,21 # check r21 + cmpd r9,r21 + bne 1f + addi r9,r15,22 # check r22 + cmpd r9,r22 + bne 1f + addi r9,r15,23 # check r23 + cmpd r9,r23 + bne 1f + addi r9,r15,24 # check r24 + cmpd r9,r24 + bne 1f + addi r9,r15,25 # check r25 + cmpd r9,r25 + bne 1f + addi r9,r15,26 # check r26 + cmpd r9,r26 + bne 1f + addi r9,r15,27 # check r27 + cmpd r9,r27 + bne 1f + addi r9,r15,28 # check r28 + cmpd r9,r28 + bne 1f + addi r9,r15,29 # check r29 + cmpd r9,r29 + bne 1f + addi r9,r15,30 # check r30 + cmpd r9,r30 + bne 1f + addi r9,r15,31 # check r31 + cmpd r9,r31 + bne 1f + b 2f +1: mr r3, r9 + li r0, __NR_exit + sc +2: li r0, __NR_switch_endian + nop diff --git a/tools/testing/selftests/powerpc/switch_endian/common.h b/tools/testing/selftests/powerpc/switch_endian/common.h new file mode 100644 index 000000000000..69e399698c64 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/common.h @@ -0,0 +1,6 @@ +#include <ppc-asm.h> +#include <asm/unistd.h> + +#ifndef __NR_switch_endian +#define __NR_switch_endian 363 +#endif diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S new file mode 100644 index 000000000000..ef7c971abb67 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S @@ -0,0 +1,81 @@ +#include "common.h" + + .data + .balign 8 +message: + .ascii "success: switch_endian_test\n\0" + + .section ".toc" + .balign 8 +pattern: + .llong 0x5555AAAA5555AAAA + + .text +FUNC_START(_start) + /* Load the pattern */ + ld r15, pattern@TOC(%r2) + + /* Setup CR, only CR2-CR4 are maintained */ + lis r3, 0x00FF + ori r3, r3, 0xF000 + mtcr r3 + + /* Load the pattern slightly modified into the registers */ + mr r3, r15 + addi r4, r15, 4 + + addi r5, r15, 32 + mtlr r5 + + addi r5, r15, 5 + addi r6, r15, 6 + addi r7, r15, 7 + addi r8, r15, 8 + + /* r9 - r12 are clobbered */ + + addi r13, r15, 13 + addi r14, r15, 14 + + /* Skip r15 we're using it */ + + addi r16, r15, 16 + addi r17, r15, 17 + addi r18, r15, 18 + addi r19, r15, 19 + addi r20, r15, 20 + addi r21, r15, 21 + addi r22, r15, 22 + addi r23, r15, 23 + addi r24, r15, 24 + addi r25, r15, 25 + addi r26, r15, 26 + addi r27, r15, 27 + addi r28, r15, 28 + addi r29, r15, 29 + addi r30, r15, 30 + addi r31, r15, 31 + + /* + * Call the syscall to switch endian. + * It clobbers r9-r12, XER, CTR and CR0-1,5-7. + */ + li r0, __NR_switch_endian + sc + +#include "check-reversed.S" + + /* Flip back, r0 already has the switch syscall number */ + .long 0x02000044 /* sc */ + +#include "check.S" + + li r0, __NR_write + li r3, 1 /* stdout */ + ld r4, message@got(%r2) + li r5, 28 /* strlen(message3) */ + sc + li r0, __NR_exit + li r3, 0 + sc + b . diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore new file mode 100644 index 000000000000..2699635d2cd9 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -0,0 +1,2 @@ +tm-resched-dscr +tm-syscall diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 2cede239a074..6bff955e1d55 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,15 +1,13 @@ -PROGS := tm-resched-dscr +TEST_PROGS := tm-resched-dscr -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +tm-syscall: tm-syscall-asm.S +tm-syscall: CFLAGS += -mhtm -clean: - rm -f $(PROGS) *.o +include ../../lib.mk -.PHONY: all run_tests clean +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S new file mode 100644 index 000000000000..431f61ae2368 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S @@ -0,0 +1,27 @@ +#include <ppc-asm.h> +#include <asm/unistd.h> + + .text +FUNC_START(getppid_tm_active) + tbegin. + beq 1f + li r0, __NR_getppid + sc + tend. + blr +1: + li r3, -1 + blr + +FUNC_START(getppid_tm_suspended) + tbegin. + beq 1f + li r0, __NR_getppid + tsuspend. + sc + tresume. + tend. + blr +1: + li r3, -1 + blr diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c new file mode 100644 index 000000000000..3ed8d4b252fa --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -0,0 +1,121 @@ +/* + * Copyright 2015, Sam Bobroff, IBM Corp. + * Licensed under GPLv2. + * + * Test the kernel's system call code to ensure that a system call + * made from within an active HTM transaction is aborted with the + * correct failure code. + * Conversely, ensure that a system call made from within a + * suspended transaction can succeed. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <asm/tm.h> +#include <asm/cputable.h> +#include <linux/auxvec.h> +#include <sys/time.h> +#include <stdlib.h> + +#include "utils.h" + +extern int getppid_tm_active(void); +extern int getppid_tm_suspended(void); + +unsigned retries = 0; + +#define TEST_DURATION 10 /* seconds */ +#define TM_RETRIES 100 + +long failure_code(void) +{ + return __builtin_get_texasru() >> 24; +} + +bool failure_is_persistent(void) +{ + return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT; +} + +bool failure_is_syscall(void) +{ + return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL; +} + +pid_t getppid_tm(bool suspend) +{ + int i; + pid_t pid; + + for (i = 0; i < TM_RETRIES; i++) { + if (suspend) + pid = getppid_tm_suspended(); + else + pid = getppid_tm_active(); + + if (pid >= 0) + return pid; + + if (failure_is_persistent()) { + if (failure_is_syscall()) + return -1; + + printf("Unexpected persistent transaction failure.\n"); + printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n", + __builtin_get_texasr(), __builtin_get_tfiar()); + exit(-1); + } + + retries++; + } + + printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES); + printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n", + __builtin_get_texasr(), __builtin_get_tfiar()); + + exit(-1); +} + +int tm_syscall(void) +{ + unsigned count = 0; + struct timeval end, now; + + SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM)); + setbuf(stdout, NULL); + + printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION); + + gettimeofday(&end, NULL); + now.tv_sec = TEST_DURATION; + now.tv_usec = 0; + timeradd(&end, &now, &end); + + for (count = 0; timercmp(&now, &end, <); count++) { + /* + * Test a syscall within a suspended transaction and verify + * that it succeeds. + */ + FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */ + + /* + * Test a syscall within an active transaction and verify that + * it fails with the correct failure code. + */ + FAIL_IF(getppid_tm(false) != -1); /* Should fail... */ + FAIL_IF(!failure_is_persistent()); /* ...persistently... */ + FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + gettimeofday(&now, 0); + } + + printf("%d active and suspended transactions behaved correctly.\n", count); + printf("(There were %d transaction retries.)\n", retries); + + return 0; +} + +int main(void) +{ + return test_harness(tm_syscall, "tm_syscall"); +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index a93777ae0684..b7d41086bb0a 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -15,11 +15,12 @@ typedef signed long long s64; /* Just for familiarity */ typedef uint32_t u32; +typedef uint16_t u16; typedef uint8_t u8; int test_harness(int (test_function)(void), char *name); - +extern void *get_auxv_entry(int type); /* Yes, this is evil */ #define FAIL_IF(x) \ diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore new file mode 100644 index 000000000000..7c04395010cb --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/.gitignore @@ -0,0 +1 @@ +test-vphn diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile new file mode 100644 index 000000000000..e539f775fd8f --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/Makefile @@ -0,0 +1,15 @@ +PROG := test-vphn + +CFLAGS += -m64 + +all: $(PROG) + +$(PROG): ../harness.c + +run_tests: all + ./$(PROG) + +clean: + rm -f $(PROG) + +.PHONY: all run_tests clean diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c new file mode 100644 index 000000000000..5742f6876b25 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c @@ -0,0 +1,410 @@ +#include <stdio.h> +#include <byteswap.h> +#include "utils.h" +#include "subunit.h" + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define cpu_to_be32(x) bswap_32(x) +#define be32_to_cpu(x) bswap_32(x) +#define be16_to_cpup(x) bswap_16(*x) +#define cpu_to_be64(x) bswap_64(x) +#else +#define cpu_to_be32(x) (x) +#define be32_to_cpu(x) (x) +#define be16_to_cpup(x) (*x) +#define cpu_to_be64(x) (x) +#endif + +#include "vphn.c" + +static struct test { + char *descr; + long input[VPHN_REGISTER_COUNT]; + u32 expected[VPHN_ASSOC_BUFSIZE]; +} all_tests[] = { + { + "vphn: no data", + { + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000000 + } + }, + { + "vphn: 1 x 16-bit value", + { + 0x8001ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000001, + 0x00000001 + } + }, + { + "vphn: 2 x 16-bit values", + { + 0x80018002ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + "vphn: 3 x 16-bit values", + { + 0x800180028003ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003 + } + }, + { + "vphn: 4 x 16-bit values", + { + 0x8001800280038004, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000004, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004 + } + }, + { + /* Parsing the next 16-bit value out of the next 64-bit input + * value. + */ + "vphn: 5 x 16-bit values", + { + 0x8001800280038004, + 0x8005ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000005, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005 + } + }, + { + /* Parse at most 6 x 64-bit input values */ + "vphn: 24 x 16-bit values", + { + 0x8001800280038004, + 0x8005800680078008, + 0x8009800a800b800c, + 0x800d800e800f8010, + 0x8011801280138014, + 0x8015801680178018 + }, + { + 0x00000018, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c, + 0x0000000d, + 0x0000000e, + 0x0000000f, + 0x00000010, + 0x00000011, + 0x00000012, + 0x00000013, + 0x00000014, + 0x00000015, + 0x00000016, + 0x00000017, + 0x00000018 + } + }, + { + "vphn: 1 x 32-bit value", + { + 0x00000001ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000001, + 0x00000001 + } + }, + { + "vphn: 2 x 32-bit values", + { + 0x0000000100000002, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + /* Parsing the next 32-bit value out of the next 64-bit input + * value. + */ + "vphn: 3 x 32-bit values", + { + 0x0000000100000002, + 0x00000003ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003 + } + }, + { + /* Parse at most 6 x 64-bit input values */ + "vphn: 12 x 32-bit values", + { + 0x0000000100000002, + 0x0000000300000004, + 0x0000000500000006, + 0x0000000700000008, + 0x000000090000000a, + 0x0000000b0000000c + }, + { + 0x0000000c, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c + } + }, + { + "vphn: 16-bit value followed by 32-bit value", + { + 0x800100000002ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + "vphn: 32-bit value followed by 16-bit value", + { + 0x000000018002ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + /* Parse a 32-bit value split accross two consecutives 64-bit + * input values. + */ + "vphn: 16-bit value followed by 2 x 32-bit values", + { + 0x8001000000020000, + 0x0003ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005 + } + }, + { + /* The lower bits in 0x0001ffff don't get mixed up with the + * 0xffff terminator. + */ + "vphn: 32-bit value has all ones in 16 lower bits", + { + 0x0001ffff80028003, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x0001ffff, + 0x00000002, + 0x00000003 + } + }, + { + /* The following input doesn't follow the specification. + */ + "vphn: last 32-bit value is truncated", + { + 0x0000000100000002, + 0x0000000300000004, + 0x0000000500000006, + 0x0000000700000008, + 0x000000090000000a, + 0x0000000b800c2bad + }, + { + 0x0000000c, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c + } + }, + { + "vphn: garbage after terminator", + { + 0xffff2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad + }, + { + 0x00000000 + } + }, + { + NULL + } +}; + +static int test_one(struct test *test) +{ + __be32 output[VPHN_ASSOC_BUFSIZE] = { 0 }; + int i, len; + + vphn_unpack_associativity(test->input, output); + + len = be32_to_cpu(output[0]); + if (len != test->expected[0]) { + printf("expected %d elements, got %d\n", test->expected[0], + len); + return 1; + } + + for (i = 1; i < len; i++) { + u32 val = be32_to_cpu(output[i]); + if (val != test->expected[i]) { + printf("element #%d is 0x%x, should be 0x%x\n", i, val, + test->expected[i]); + return 1; + } + } + + return 0; +} + +static int test_vphn(void) +{ + static struct test *test; + + for (test = all_tests; test->descr; test++) { + int ret; + + ret = test_one(test); + test_finish(test->descr, ret); + if (ret) + return ret; + } + + return 0; +} + +int main(int argc, char **argv) +{ + return test_harness(test_vphn, "test-vphn"); +} diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c new file mode 120000 index 000000000000..186b906e66d5 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/vphn.c @@ -0,0 +1 @@ +../../../../../arch/powerpc/mm/vphn.c
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/vphn/vphn.h b/tools/testing/selftests/powerpc/vphn/vphn.h new file mode 120000 index 000000000000..7131efe38c65 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/vphn.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/mm/vphn.h
\ No newline at end of file diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index 47ae2d385ce8..453927fea90c 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -6,5 +6,6 @@ all: peeksiginfo clean: rm -f peeksiginfo -run_tests: all - @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" +TEST_PROGS := peeksiginfo + +include ../lib.mk diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh index abe14b7f36e9..bb99cde3f5f9 100755 --- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh +++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh @@ -24,7 +24,7 @@ ncpus=`grep '^processor' /proc/cpuinfo | wc -l` idlecpus=`mpstat | tail -1 | \ - awk -v ncpus=$ncpus '{ print ncpus * ($7 + $12) / 100 }'` + awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'` awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null ' BEGIN { cpus2use = idlecpus; diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index d6cc07fc137f..559e01ac86be 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -30,6 +30,7 @@ else echo Unreadable results directory: $i exit 1 fi +. tools/testing/selftests/rcutorture/bin/functions.sh configfile=`echo $i | sed -e 's/^.*\///'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` @@ -48,4 +49,21 @@ else title="$title ($ngpsps per second)" fi echo $title + nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` + if test -z "$nclosecalls" + then + exit 0 + fi + if test "$nclosecalls" -eq 0 + then + exit 0 + fi + # Compute number of close calls per tenth of an hour + nclosecalls10=`awk -v nclosecalls=$nclosecalls -v dur=$dur 'BEGIN { print int(nclosecalls * 36000 / dur) }' < /dev/null` + if test $nclosecalls10 -gt 5 -a $nclosecalls -gt 1 + then + print_bug $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i + else + print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i + fi fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 8ca9f21f2efc..5236e073919d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -8,9 +8,9 @@ # # Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args # -# qemu-args defaults to "-nographic", along with arguments specifying the -# number of CPUs and other options generated from -# the underlying CPU architecture. +# qemu-args defaults to "-enable-kvm -soundhw pcspk -nographic", along with +# arguments specifying the number of CPUs and other +# options generated from the underlying CPU architecture. # boot_args defaults to value returned by the per_version_boot_params # shell function. # @@ -138,7 +138,7 @@ then fi # Generate -smp qemu argument. -qemu_args="-nographic $qemu_args" +qemu_args="-enable-kvm -soundhw pcspk -nographic $qemu_args" cpu_count=`configNR_CPUS.sh $config_template` cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` vcpus=`identify_qemu_vcpus` @@ -168,6 +168,7 @@ then touch $resdir/buildonly exit 0 fi +echo "NOTE: $QEMU either did not run or was interactive" > $builddir/console.log echo $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd ( $QEMU $qemu_args -m 512 -kernel $resdir/bzImage -append "$qemu_append $boot_args"; echo $? > $resdir/qemu-retval ) & qemu_pid=$! diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 368d64ac779e..dd2812ceb0ba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -310,7 +310,7 @@ function dump(first, pastlast) cfr[jn] = cf[j] "." cfrep[cf[j]]; } if (cpusr[jn] > ncpus && ncpus != 0) - ovf = "(!)"; + ovf = "-ovf"; else ovf = ""; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh index 499d1e598e42..a6b57622c2e5 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-build.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh @@ -26,12 +26,15 @@ # # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> -T=$1 +F=$1 title=$2 +T=/tmp/parse-build.sh.$$ +trap 'rm -rf $T' 0 +mkdir $T . functions.sh -if grep -q CC < $T +if grep -q CC < $F then : else @@ -39,18 +42,21 @@ else exit 1 fi -if grep -q "error:" < $T +if grep -q "error:" < $F then print_bug $title build errors: - grep "error:" < $T + grep "error:" < $F exit 2 fi -exit 0 -if egrep -q "rcu[^/]*\.c.*warning:|rcu.*\.h.*warning:" < $T +grep warning: < $F > $T/warnings +grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings +grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings +cat $T/hwarnings $T/cwarnings > $T/rcuwarnings +if test -s $T/rcuwarnings then print_warning $title build errors: - egrep "rcu[^/]*\.c.*warning:|rcu.*\.h.*warning:" < $T + cat $T/rcuwarnings exit 2 fi exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index f962ba4cf68b..d8f35cf116be 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -36,7 +36,7 @@ if grep -Pq '\x00' < $file then print_warning Console output contains nul bytes, old qemu still running? fi -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|Stall ended before state dump start' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T if test -s $T then print_warning Assertion failure in $file $title diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index d2d2a86139db..49701218dc62 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -1,2 +1,3 @@ CONFIG_RCU_TORTURE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile index 04dc25e4fa92..bbd0b5398b61 100644 --- a/tools/testing/selftests/size/Makefile +++ b/tools/testing/selftests/size/Makefile @@ -1,12 +1,11 @@ -CC = $(CROSS_COMPILE)gcc - all: get_size get_size: get_size.c $(CC) -static -ffreestanding -nostartfiles -s $< -o $@ -run_tests: all - ./get_size +TEST_PROGS := get_size + +include ../lib.mk clean: $(RM) get_size diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile index 0a92adaf0865..b3c33e071f10 100644 --- a/tools/testing/selftests/sysctl/Makefile +++ b/tools/testing/selftests/sysctl/Makefile @@ -4,16 +4,10 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests". all: -# Allow specific tests to be selected. -test_num: - @/bin/sh ./run_numerictests +TEST_PROGS := run_numerictests run_stringtests +TEST_FILES := common_tests -test_string: - @/bin/sh ./run_stringtests - -run_tests: all test_num test_string +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all run_tests clean test_num test_string diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests index 8510f93f2d14..8510f93f2d14 100644..100755 --- a/tools/testing/selftests/sysctl/run_numerictests +++ b/tools/testing/selftests/sysctl/run_numerictests diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests index 90a9293d520c..90a9293d520c 100644..100755 --- a/tools/testing/selftests/sysctl/run_stringtests +++ b/tools/testing/selftests/sysctl/run_stringtests diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index eb2859f4ad21..89a3f44bf355 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,8 +1,36 @@ -all: - gcc posix_timers.c -o posix_timers -lrt +CC = $(CROSS_COMPILE)gcc +BUILD_FLAGS = -DKTEST +CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) +LDFLAGS += -lrt -lpthread -run_tests: all - ./posix_timers +# these are all "safe" tests that don't modify +# system time or require escalated privledges +TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ + inconsistency-check raw_skew threadtest rtctest + +TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \ + skew_consistency clocksource-switch leap-a-day \ + leapcrash set-tai set-2038 + +bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED) + +all: ${bins} + +include ../lib.mk + +# these tests require escalated privledges +# and may modify the system time or trigger +# other behavior like suspend +run_destructive_tests: run_tests + ./alarmtimer-suspend + ./valid-adjtimex + ./change_skew + ./skew_consistency + ./clocksource-switch + ./leap-a-day -s -i 10 + ./leapcrash + ./set-tai + ./set-2038 clean: - rm -f ./posix_timers + rm -f ${bins} diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c new file mode 100644 index 000000000000..aaffbde1d5ee --- /dev/null +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -0,0 +1,185 @@ +/* alarmtimer suspend test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This test makes sure the alarmtimer & RTC wakeup code is + * functioning. + * + * To build: + * $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */ + +#define SUSPEND_SECS 15 +int alarmcount; +int alarm_clock_id; +struct timespec start_time; + + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int final_ret = 0; + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(alarm_clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount; + + printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec, + ts.tv_nsec, delta_ns); + + if (delta_ns > UNREASONABLE_LAT) { + printf("[FAIL]\n"); + final_ret = -1; + } else + printf("[OK]\n"); + +} + +int main(void) +{ + timer_t tm1; + struct itimerspec its1, its2; + struct sigevent se; + struct sigaction act; + int signum = SIGRTMAX; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = signum; + se.sigev_value.sival_int = 0; + + for (alarm_clock_id = CLOCK_REALTIME_ALARM; + alarm_clock_id <= CLOCK_BOOTTIME_ALARM; + alarm_clock_id++) { + + alarmcount = 0; + timer_create(alarm_clock_id, &se, &tm1); + + clock_gettime(alarm_clock_id, &start_time); + printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id), + start_time.tv_sec, start_time.tv_nsec); + printf("Setting alarm for every %i seconds\n", SUSPEND_SECS); + its1.it_value = start_time; + its1.it_value.tv_sec += SUSPEND_SECS; + its1.it_interval.tv_sec = SUSPEND_SECS; + its1.it_interval.tv_nsec = 0; + + timer_settime(tm1, TIMER_ABSTIME, &its1, &its2); + + while (alarmcount < 5) + sleep(1); /* First 5 alarms, do nothing */ + + printf("Starting suspend loops\n"); + while (alarmcount < 10) { + int ret; + + sleep(1); + ret = system("echo mem > /sys/power/state"); + if (ret) + break; + } + timer_delete(tm1); + } + if (final_ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c new file mode 100644 index 000000000000..cb1968977c04 --- /dev/null +++ b/tools/testing/selftests/timers/change_skew.c @@ -0,0 +1,107 @@ +/* ADJ_FREQ Skew change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob and + * then uses other tests to detect problems. Thus this test requires + * that the raw_skew, inconsistency-check and nanosleep tests be + * present in the same directory it is run from. + * + * To build: + * $ gcc change_skew.c -o change_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + + +int change_skew_test(int ppm) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error adjusting freq\n"); + return ret; + } + + ret = system("./raw_skew"); + ret |= system("./inconsistency-check"); + ret |= system("./nanosleep"); + + return ret; +} + + +int main(int argv, char **argc) +{ + struct timex tx; + int i, ret; + + int ppm[5] = {0, 250, 500, -250, -500}; + + /* Kill ntpd */ + ret = system("killall -9 ntpd"); + + /* Make sure there's no offset adjustment going on */ + tx.modes = ADJ_OFFSET; + tx.offset = 0; + ret = adjtimex(&tx); + + if (ret < 0) { + printf("Maybe you're not running as root?\n"); + return -1; + } + + for (i = 0; i < 5; i++) { + printf("Using %i ppm adjustment\n", ppm[i]); + ret = change_skew_test(ppm[i]); + if (ret) + break; + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + if (ret) { + printf("[FAIL]"); + return ksft_exit_fail(); + } + printf("[OK]"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c new file mode 100644 index 000000000000..627ec7425f78 --- /dev/null +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -0,0 +1,179 @@ +/* Clocksource change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which quickly changes the clocksourc and + * then uses other tests to detect problems. Thus this test requires + * that the inconsistency-check and nanosleep tests be present in the + * same directory it is run from. + * + * To build: + * $ gcc clocksource-switch.c -o clocksource-switch -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <sys/wait.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +int get_clocksources(char list[][30]) +{ + int fd, i; + size_t size; + char buf[512]; + char *head, *tmp; + + fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY); + + size = read(fd, buf, 512); + + close(fd); + + for (i = 0; i < 30; i++) + list[i][0] = '\0'; + + head = buf; + i = 0; + while (head - buf < size) { + /* Find the next space */ + for (tmp = head; *tmp != ' '; tmp++) { + if (*tmp == '\n') + break; + if (*tmp == '\0') + break; + } + *tmp = '\0'; + strcpy(list[i], head); + head = tmp + 1; + i++; + } + + return i-1; +} + +int get_cur_clocksource(char *buf, size_t size) +{ + int fd; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY); + + size = read(fd, buf, size); + + return 0; +} + +int change_clocksource(char *clocksource) +{ + int fd; + size_t size; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY); + + if (fd < 0) + return -1; + + size = write(fd, clocksource, strlen(clocksource)); + + if (size < 0) + return -1; + + close(fd); + return 0; +} + + +int run_tests(int secs) +{ + int ret; + char buf[255]; + + sprintf(buf, "./inconsistency-check -t %i", secs); + ret = system(buf); + if (ret) + return ret; + ret = system("./nanosleep"); + return ret; +} + + +char clocksource_list[10][30]; + +int main(int argv, char **argc) +{ + char orig_clk[512]; + int count, i, status; + pid_t pid; + + get_cur_clocksource(orig_clk, 512); + + count = get_clocksources(clocksource_list); + + if (change_clocksource(clocksource_list[0])) { + printf("Error: You probably need to run this as root\n"); + return -1; + } + + /* Check everything is sane before we start switching asyncrhonously */ + for (i = 0; i < count; i++) { + printf("Validating clocksource %s\n", clocksource_list[i]); + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } + if (run_tests(5)) { + status = -1; + goto out; + } + } + + + printf("Running Asyncrhonous Switching Tests...\n"); + pid = fork(); + if (!pid) + return run_tests(60); + + while (pid != waitpid(pid, &status, WNOHANG)) + for (i = 0; i < count; i++) + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } +out: + change_clocksource(orig_clk); + + if (status) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c new file mode 100644 index 000000000000..caf1bc9257c4 --- /dev/null +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -0,0 +1,204 @@ +/* Time inconsistency check test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2003, 2004, 2005, 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc inconsistency-check.c -o inconsistency-check -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CALLS_PER_LOOP 64 +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + /* use unsigned to avoid false positives on 2038 rollover */ + if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec) + return 1; + if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + + + +int consistency_test(int clock_type, unsigned long seconds) +{ + struct timespec list[CALLS_PER_LOOP]; + int i, inconsistent; + long now, then; + time_t t; + char *start_str; + + clock_gettime(clock_type, &list[0]); + now = then = list[0].tv_sec; + + /* timestamp start of test */ + t = time(0); + start_str = ctime(&t); + + while (seconds == -1 || now - then < seconds) { + inconsistent = 0; + + /* Fill list */ + for (i = 0; i < CALLS_PER_LOOP; i++) + clock_gettime(clock_type, &list[i]); + + /* Check for inconsistencies */ + for (i = 0; i < CALLS_PER_LOOP - 1; i++) + if (!in_order(list[i], list[i+1])) + inconsistent = i; + + /* display inconsistency */ + if (inconsistent) { + unsigned long long delta; + + printf("\%s\n", start_str); + for (i = 0; i < CALLS_PER_LOOP; i++) { + if (i == inconsistent) + printf("--------------------\n"); + printf("%lu:%lu\n", list[i].tv_sec, + list[i].tv_nsec); + if (i == inconsistent + 1) + printf("--------------------\n"); + } + delta = list[inconsistent].tv_sec * NSEC_PER_SEC; + delta += list[inconsistent].tv_nsec; + delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC; + delta -= list[inconsistent+1].tv_nsec; + printf("Delta: %llu ns\n", delta); + fflush(0); + /* timestamp inconsistency*/ + t = time(0); + printf("%s\n", ctime(&t)); + printf("[FAILED]\n"); + return -1; + } + now = list[0].tv_sec; + } + printf("[OK]\n"); + return 0; +} + + +int main(int argc, char *argv[]) +{ + int clockid, opt; + int userclock = CLOCK_REALTIME; + int maxclocks = NR_CLOCKIDS; + int runtime = 10; + struct timespec ts; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:c:")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'c': + userclock = atoi(optarg); + maxclocks = userclock + 1; + break; + default: + printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]); + printf(" -t: Number of seconds to run\n"); + printf(" -c: clockid to use (default, all clockids)\n"); + exit(-1); + } + } + + setbuf(stdout, NULL); + + for (clockid = userclock; clockid < maxclocks; clockid++) { + + if (clockid == CLOCK_HWSPECIFIC) + continue; + + if (!clock_gettime(clockid, &ts)) { + printf("Consistent %-30s ", clockstring(clockid)); + if (consistency_test(clockid, runtime)) + return ksft_exit_fail(); + } + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c new file mode 100644 index 000000000000..b8272e6c4b3b --- /dev/null +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -0,0 +1,319 @@ +/* Leap second stress test + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPLv2 + * + * This test signals the kernel to insert a leap second + * every day at midnight GMT. This allows for stessing the + * kernel's leap-second behavior, as well as how well applications + * handle the leap-second discontinuity. + * + * Usage: leap-a-day [-s] [-i <num>] + * + * Options: + * -s: Each iteration, set the date to 10 seconds before midnight GMT. + * This speeds up the number of leapsecond transitions tested, + * but because it calls settimeofday frequently, advancing the + * time by 24 hours every ~16 seconds, it may cause application + * disruption. + * + * -i: Number of iterations to run (default: infinite) + * + * Other notes: Disabling NTP prior to running this is advised, as the two + * may conflict in their commands to the kernel. + * + * To build: + * $ gcc leap-a-day.c -o leap-a-day -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL +#define CLOCK_TAI 11 + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +char *time_state_str(int state) +{ + switch (state) { + case TIME_OK: return "TIME_OK"; + case TIME_INS: return "TIME_INS"; + case TIME_DEL: return "TIME_DEL"; + case TIME_OOP: return "TIME_OOP"; + case TIME_WAIT: return "TIME_WAIT"; + case TIME_BAD: return "TIME_BAD"; + } + return "ERROR"; +} + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + /* Clear maxerror, as it can cause UNSYNC to be set */ + tx.modes = ADJ_MAXERROR; + tx.maxerror = 0; + ret = adjtimex(&tx); + + /* Clear the status */ + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + +/* Test for known hrtimer failure */ +void test_hrtimer_failure(void) +{ + struct timespec now, target; + + clock_gettime(CLOCK_REALTIME, &now); + target = timespec_add(now, NSEC_PER_SEC/2); + clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL); + clock_gettime(CLOCK_REALTIME, &now); + + if (!in_order(target, now)) + printf("ERROR: hrtimer early expiration failure observed.\n"); +} + +int main(int argc, char **argv) +{ + int settime = 0; + int tai_time = 0; + int insert = 1; + int iterations = -1; + int opt; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "sti:")) != -1) { + switch (opt) { + case 's': + printf("Setting time to speed up testing\n"); + settime = 1; + break; + case 'i': + iterations = atoi(optarg); + break; + case 't': + tai_time = 1; + break; + default: + printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]); + printf(" -s: Set time to right before leap second each iteration\n"); + printf(" -i: Number of iterations\n"); + printf(" -t: Print TAI time\n"); + exit(-1); + } + } + + /* Make sure TAI support is present if -t was used */ + if (tai_time) { + struct timespec ts; + + if (clock_gettime(CLOCK_TAI, &ts)) { + printf("System doesn't support CLOCK_TAI\n"); + ksft_exit_fail(); + } + } + + signal(SIGINT, handler); + signal(SIGKILL, handler); + + if (iterations < 0) + printf("This runs continuously. Press ctrl-c to stop\n"); + else + printf("Running for %i iterations. Press ctrl-c to stop\n", iterations); + + printf("\n"); + while (1) { + int ret; + struct timespec ts; + struct timex tx; + time_t now, next_leap; + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + if (settime) { + struct timeval tv; + + tv.tv_sec = next_leap - 10; + tv.tv_usec = 0; + settimeofday(&tv, NULL); + printf("Setting time to %s", ctime(&tv.tv_sec)); + } + + /* Reset NTP time state */ + clear_time_state(); + + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error: Problem setting STA_INS/STA_DEL!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + /* Validate STA_INS was set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Error: STA_INS/STA_DEL not set!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + if (tai_time) { + printf("Using TAI time," + " no inconsistencies should be seen!\n"); + } + + printf("Scheduling leap second for %s", ctime(&next_leap)); + + /* Wake up 3 seconds before leap */ + ts.tv_sec = next_leap - 3; + ts.tv_nsec = 0; + + while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL)) + printf("Something woke us up, returning to sleep\n"); + + /* Validate STA_INS is still set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Something cleared STA_INS/STA_DEL, setting it again.\n"); + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + } + + /* Check adjtimex output every half second */ + now = tx.time.tv_sec; + while (now < next_leap + 2) { + char buf[26]; + struct timespec tai; + + tx.modes = 0; + ret = adjtimex(&tx); + + if (tai_time) { + clock_gettime(CLOCK_TAI, &tai); + printf("%ld sec, %9ld ns\t%s\n", + tai.tv_sec, + tai.tv_nsec, + time_state_str(ret)); + } else { + ctime_r(&tx.time.tv_sec, buf); + buf[strlen(buf)-1] = 0; /*remove trailing\n */ + + printf("%s + %6ld us (%i)\t%s\n", + buf, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } + now = tx.time.tv_sec; + /* Sleep for another half second */ + ts.tv_sec = 0; + ts.tv_nsec = NSEC_PER_SEC / 2; + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); + } + /* Switch to using other mode */ + insert = !insert; + + /* Note if kernel has known hrtimer failure */ + test_hrtimer_failure(); + + printf("Leap complete\n\n"); + + if ((iterations != -1) && !(--iterations)) + break; + } + + clear_time_state(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c new file mode 100644 index 000000000000..a1071bdbdeb7 --- /dev/null +++ b/tools/testing/selftests/timers/leapcrash.c @@ -0,0 +1,120 @@ +/* Demo leapsecond deadlock + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPL + * + * This test demonstrates leapsecond deadlock that is possibe + * on kernels from 2.6.26 to 3.3. + * + * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA + * RUN AT YOUR OWN RISK! + * To build: + * $ gcc leapcrash.c -o leapcrash -lrt + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + + +int main(void) +{ + struct timex tx; + struct timespec ts; + time_t next_leap; + int count = 0; + + setbuf(stdout, NULL); + + signal(SIGINT, handler); + signal(SIGKILL, handler); + printf("This runs for a few minutes. Press ctrl-c to stop\n"); + + clear_time_state(); + + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + for (count = 0; count < 20; count++) { + struct timeval tv; + + + /* set the time to 2 seconds before the leap */ + tv.tv_sec = next_leap - 2; + tv.tv_usec = 0; + if (settimeofday(&tv, NULL)) { + printf("Error: You're likely not running with proper (ie: root) permissions\n"); + return ksft_exit_fail(); + } + tx.modes = 0; + adjtimex(&tx); + + /* hammer on adjtime w/ STA_INS */ + while (tx.time.tv_sec < next_leap + 1) { + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + tx.status = STA_INS; + adjtimex(&tx); + } + clear_time_state(); + printf("."); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c new file mode 100644 index 000000000000..a2a3924d0b41 --- /dev/null +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -0,0 +1,124 @@ +/* Measure mqueue timeout latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * + * Inspired with permission from example test by: + * Romain Francoise <romain@orebokech.com> + * Licensed under the GPLv2 + * + * To build: + * $ gcc mqueue-lat.c -o mqueue-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <errno.h> +#include <mqueue.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */ +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int mqueue_lat_test(void) +{ + + mqd_t q; + struct mq_attr attr; + struct timespec start, end, now, target; + int i, count, ret; + + q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL); + if (q < 0) { + perror("mq_open"); + return -1; + } + mq_getattr(q, &attr); + + + count = 100; + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < count; i++) { + char buf[attr.mq_msgsize]; + + clock_gettime(CLOCK_REALTIME, &now); + target = now; + target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */ + + ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target); + if (ret < 0 && errno != ETIMEDOUT) { + perror("mq_timedreceive"); + return -1; + } + } + clock_gettime(CLOCK_MONOTONIC, &end); + + mq_close(q); + + if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY) + return -1; + + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + + printf("Mqueue latency : "); + + ret = mqueue_lat_test(); + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c new file mode 100644 index 000000000000..8a3c29de7d49 --- /dev/null +++ b/tools/testing/selftests/timers/nanosleep.c @@ -0,0 +1,174 @@ +/* Make sure timers don't return early + * by: john stultz (johnstul@us.ibm.com) + * John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright Linaro 2013 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nanosleep.c -o nanosleep -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int nanosleep_test(int clockid, long long ns) +{ + struct timespec now, target, rel; + + /* First check abs time */ + if (clock_gettime(clockid, &now)) + return UNSUPPORTED; + target = timespec_add(now, ns); + + if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL)) + return UNSUPPORTED; + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + + /* Second check reltime */ + clock_gettime(clockid, &now); + rel.tv_sec = 0; + rel.tv_nsec = 0; + rel = timespec_add(rel, ns); + target = timespec_add(now, ns); + clock_nanosleep(clockid, 0, &rel, NULL); + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + return 0; +} + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("Nanosleep %-31s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_test(clockid, length); + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + goto next; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + length *= 100; + } + printf("[OK]\n"); +next: + ret = 0; + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c new file mode 100644 index 000000000000..2d7898fda0f1 --- /dev/null +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -0,0 +1,190 @@ +/* Measure nanosleep timer latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nsleep-lat.c -o nsleep-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int nanosleep_lat_test(int clockid, long long ns) +{ + struct timespec start, end, target; + long long latency = 0; + int i, count; + + target.tv_sec = ns/NSEC_PER_SEC; + target.tv_nsec = ns%NSEC_PER_SEC; + + if (clock_gettime(clockid, &start)) + return UNSUPPORTED; + if (clock_nanosleep(clockid, 0, &target, NULL)) + return UNSUPPORTED; + + count = 10; + + /* First check relative latency */ + clock_gettime(clockid, &start); + for (i = 0; i < count; i++) + clock_nanosleep(clockid, 0, &target, NULL); + clock_gettime(clockid, &end); + + if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) { + printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns); + return -1; + } + + /* Next check absolute latency */ + for (i = 0; i < count; i++) { + clock_gettime(clockid, &start); + target = timespec_add(start, ns); + clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL); + clock_gettime(clockid, &end); + latency += timespec_sub(target, end); + } + + if (latency/count > UNRESONABLE_LATENCY) { + printf("Large abs latency: %lld ns :", latency/count); + return -1; + } + + return 0; +} + + + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("nsleep latency %-26s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_lat_test(clockid, length); + if (ret) + break; + length *= 100; + + } + + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + continue; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index f87d970a485c..5a246a02dff3 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -35,10 +35,11 @@ static void user_loop(void) static void kernel_loop(void) { void *addr = sbrk(0); + int err = 0; - while (!done) { - brk(addr + 4096); - brk(addr); + while (!done && !err) { + err = brk(addr + 4096); + err |= brk(addr); } } @@ -190,8 +191,6 @@ static int check_timer_create(int which) int main(int argc, char **argv) { - int err; - printf("Testing posix timers. False negative may happen on CPU execution \n"); printf("based timers if other threads run on the CPU...\n"); diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c new file mode 100644 index 000000000000..30906bfd9c1b --- /dev/null +++ b/tools/testing/selftests/timers/raw_skew.c @@ -0,0 +1,154 @@ +/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test + * by: john stultz (johnstul@us.ibm.com) + * John Stultz <john.stultz@linaro.org> + * (C) Copyright IBM 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc raw_skew.c -o raw_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +#define CLOCK_MONOTONIC_RAW 4 +#define NSEC_PER_SEC 1000000000LL + +#define shift_right(x, s) ({ \ + __typeof__(x) __x = (x); \ + __typeof__(s) __s = (s); \ + __x < 0 ? -(-__x >> __s) : __x >> __s; \ +}) + +long long llabs(long long val) +{ + if (val < 0) + val = -val; + return val; +} + +unsigned long long ts_to_nsec(struct timespec ts) +{ + return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +struct timespec nsec_to_ts(long long ns) +{ + struct timespec ts; + + ts.tv_sec = ns/NSEC_PER_SEC; + ts.tv_nsec = ns%NSEC_PER_SEC; + return ts; +} + +long long diff_timespec(struct timespec start, struct timespec end) +{ + long long start_ns, end_ns; + + start_ns = ts_to_nsec(start); + end_ns = ts_to_nsec(end); + return end_ns - start_ns; +} + +void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw) +{ + struct timespec start, mid, end; + long long diff = 0, tmp; + int i; + + for (i = 0; i < 3; i++) { + long long newdiff; + + clock_gettime(CLOCK_MONOTONIC, &start); + clock_gettime(CLOCK_MONOTONIC_RAW, &mid); + clock_gettime(CLOCK_MONOTONIC, &end); + + newdiff = diff_timespec(start, end); + if (diff == 0 || newdiff < diff) { + diff = newdiff; + *raw = mid; + tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2; + *mon = nsec_to_ts(tmp); + } + } +} + +int main(int argv, char **argc) +{ + struct timespec mon, raw, start, end; + long long delta1, delta2, interval, eppm, ppm; + struct timex tx1, tx2; + + setbuf(stdout, NULL); + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) { + printf("ERR: NO CLOCK_MONOTONIC_RAW\n"); + return -1; + } + + tx1.modes = 0; + adjtimex(&tx1); + get_monotonic_and_raw(&mon, &raw); + start = mon; + delta1 = diff_timespec(mon, raw); + + if (tx1.offset) + printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n"); + + printf("Estimating clock drift: "); + sleep(120); + + get_monotonic_and_raw(&mon, &raw); + end = mon; + tx2.modes = 0; + adjtimex(&tx2); + delta2 = diff_timespec(mon, raw); + + interval = diff_timespec(start, end); + + /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */ + eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval; + eppm = -eppm; + printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000))); + + /* Avg the two actual freq samples adjtimex gave us */ + ppm = (tx1.freq + tx2.freq) * 1000 / 2; + ppm = (long long)tx1.freq * 1000; + ppm = shift_right(ppm, 16); + printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000))); + + if (llabs(eppm - ppm) > 1000) { + printf(" [FAILED]\n"); + return ksft_exit_fail(); + } + printf(" [OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c new file mode 100644 index 000000000000..d80ae852334d --- /dev/null +++ b/tools/testing/selftests/timers/rtctest.c @@ -0,0 +1,271 @@ +/* + * Real Time Clock Driver Test/Example Program + * + * Compile with: + * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest + * + * Copyright (C) 1996, Paul Gortmaker. + * + * Released under the GNU General Public License, version 2, + * included herein by reference. + * + */ + +#include <stdio.h> +#include <linux/rtc.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> + + +/* + * This expects the new RTC class driver framework, working with + * clocks that will often not be clones of what the PC-AT had. + * Use the command line to specify another RTC if you need one. + */ +static const char default_rtc[] = "/dev/rtc0"; + + +int main(int argc, char **argv) +{ + int i, fd, retval, irqcount = 0; + unsigned long tmp, data; + struct rtc_time rtc_tm; + const char *rtc = default_rtc; + struct timeval start, end, diff; + + switch (argc) { + case 2: + rtc = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: rtctest [rtcdev]\n"); + return 1; + } + + fd = open(rtc, O_RDONLY); + + if (fd == -1) { + perror(rtc); + exit(errno); + } + + fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n"); + + /* Turn on update interrupts (one per second) */ + retval = ioctl(fd, RTC_UIE_ON, 0); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Update IRQs not supported.\n"); + goto test_READ; + } + perror("RTC_UIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:", + rtc); + fflush(stderr); + for (i=1; i<6; i++) { + /* This read will block */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:"); + fflush(stderr); + for (i=1; i<6; i++) { + struct timeval tv = {5, 0}; /* 5 second timeout on select */ + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(fd, &readfds); + /* The select will wait until an RTC interrupt happens. */ + retval = select(fd+1, &readfds, NULL, NULL, &tv); + if (retval == -1) { + perror("select"); + exit(errno); + } + /* This read won't block unlike the select-less case above. */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Turn off update interrupts */ + retval = ioctl(fd, RTC_UIE_OFF, 0); + if (retval == -1) { + perror("RTC_UIE_OFF ioctl"); + exit(errno); + } + +test_READ: + /* Read the RTC time/date */ + retval = ioctl(fd, RTC_RD_TIME, &rtc_tm); + if (retval == -1) { + perror("RTC_RD_TIME ioctl"); + exit(errno); + } + + fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", + rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Set the alarm to 5 sec in the future, and check for rollover */ + rtc_tm.tm_sec += 5; + if (rtc_tm.tm_sec >= 60) { + rtc_tm.tm_sec %= 60; + rtc_tm.tm_min++; + } + if (rtc_tm.tm_min == 60) { + rtc_tm.tm_min = 0; + rtc_tm.tm_hour++; + } + if (rtc_tm.tm_hour == 24) + rtc_tm.tm_hour = 0; + + retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Alarm IRQs not supported.\n"); + goto test_PIE; + } + perror("RTC_ALM_SET ioctl"); + exit(errno); + } + + /* Read the current alarm settings */ + retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); + if (retval == -1) { + perror("RTC_ALM_READ ioctl"); + exit(errno); + } + + fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n", + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Enable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_ON, 0); + if (retval == -1) { + perror("RTC_AIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Waiting 5 seconds for alarm..."); + fflush(stderr); + /* This blocks until the alarm ring causes an interrupt */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + irqcount++; + fprintf(stderr, " okay. Alarm rang.\n"); + + /* Disable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_OFF, 0); + if (retval == -1) { + perror("RTC_AIE_OFF ioctl"); + exit(errno); + } + +test_PIE: + /* Read periodic IRQ rate */ + retval = ioctl(fd, RTC_IRQP_READ, &tmp); + if (retval == -1) { + /* not all RTCs support periodic IRQs */ + if (errno == ENOTTY) { + fprintf(stderr, "\nNo periodic IRQ support\n"); + goto done; + } + perror("RTC_IRQP_READ ioctl"); + exit(errno); + } + fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp); + + fprintf(stderr, "Counting 20 interrupts at:"); + fflush(stderr); + + /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ + for (tmp=2; tmp<=64; tmp*=2) { + + retval = ioctl(fd, RTC_IRQP_SET, tmp); + if (retval == -1) { + /* not all RTCs can change their periodic IRQ rate */ + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Periodic IRQ rate is fixed\n"); + goto done; + } + perror("RTC_IRQP_SET ioctl"); + exit(errno); + } + + fprintf(stderr, "\n%ldHz:\t", tmp); + fflush(stderr); + + /* Enable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_ON, 0); + if (retval == -1) { + perror("RTC_PIE_ON ioctl"); + exit(errno); + } + + for (i=1; i<21; i++) { + gettimeofday(&start, NULL); + /* This blocks */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + if (diff.tv_sec > 0 || + diff.tv_usec > ((1000000L / tmp) * 1.10)) { + fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", + diff.tv_sec, diff.tv_usec, + (1000000L / tmp)); + fflush(stdout); + exit(-1); + } + + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Disable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_OFF, 0); + if (retval == -1) { + perror("RTC_PIE_OFF ioctl"); + exit(errno); + } + } + +done: + fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); + + close(fd); + + return 0; +} diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c new file mode 100644 index 000000000000..c8a7e14446b1 --- /dev/null +++ b/tools/testing/selftests/timers/set-2038.c @@ -0,0 +1,144 @@ +/* Time bounds setting test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which sets the time to edge cases then + * uses other tests to detect problems. Thus this test requires that + * the inconsistency-check and nanosleep tests be present in the same + * directory it is run from. + * + * To build: + * $ gcc set-2038.c -o set-2038 -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <sys/time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +#define KTIME_MAX ((long long)~((unsigned long long)1 << 63)) +#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) + +#define YEAR_1901 (-0x7fffffffL) +#define YEAR_1970 1 +#define YEAR_2038 0x7fffffffL /*overflows 32bit time_t */ +#define YEAR_2262 KTIME_SEC_MAX /*overflows 64bit ktime_t */ +#define YEAR_MAX ((long long)((1ULL<<63)-1)) /*overflows 64bit time_t */ + +int is32bits(void) +{ + return (sizeof(long) == 4); +} + +int settime(long long time) +{ + struct timeval now; + int ret; + + now.tv_sec = (time_t)time; + now.tv_usec = 0; + + ret = settimeofday(&now, NULL); + + printf("Setting time to 0x%lx: %d\n", (long)time, ret); + return ret; +} + +int do_tests(void) +{ + int ret; + + ret = system("date"); + ret = system("./inconsistency-check -c 0 -t 20"); + ret |= system("./nanosleep"); + ret |= system("./nsleep-lat"); + return ret; + +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + int opt, dangerous = 0; + time_t start; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "d")) != -1) { + switch (opt) { + case 'd': + dangerous = 1; + } + } + + start = time(0); + + /* First test that crazy values don't work */ + if (!settime(YEAR_1901)) { + ret = -1; + goto out; + } + if (!settime(YEAR_MAX)) { + ret = -1; + goto out; + } + if (!is32bits() && !settime(YEAR_2262)) { + ret = -1; + goto out; + } + + /* Now test behavior near edges */ + settime(YEAR_1970); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2038 - 600); + ret = do_tests(); + if (ret) + goto out; + + /* The rest of the tests can blowup on 32bit systems */ + if (is32bits() && !dangerous) + goto out; + /* Test rollover behavior 32bit edge */ + settime(YEAR_2038 - 10); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2262 - 600); + ret = do_tests(); + +out: + /* restore clock */ + settime(start); + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c new file mode 100644 index 000000000000..dc88dbc8831f --- /dev/null +++ b/tools/testing/selftests/timers/set-tai.c @@ -0,0 +1,79 @@ +/* Set tai offset + * by: John Stultz <john.stultz@linaro.org> + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +int set_tai(int offset) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + tx.modes = ADJ_TAI; + tx.constant = offset; + + return adjtimex(&tx); +} + +int get_tai(void) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + adjtimex(&tx); + return tx.tai; +} + +int main(int argc, char **argv) +{ + int i, ret; + + ret = get_tai(); + printf("tai offset started at %i\n", ret); + + printf("Checking tai offsets can be properly set: "); + for (i = 1; i <= 60; i++) { + ret = set_tai(i); + ret = get_tai(); + if (ret != i) { + printf("[FAILED] expected: %i got %i\n", i, ret); + return ksft_exit_fail(); + } + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c new file mode 100644 index 000000000000..4fc98c5b0899 --- /dev/null +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -0,0 +1,216 @@ +/* set_timer latency test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2014 + * Licensed under the GPLv2 + * + * This test makes sure the set_timer api is correct + * + * To build: + * $ gcc set-timer-lat.c -o set-timer-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + +#define TIMER_SECS 1 +int alarmcount; +int clock_id; +struct timespec start_time; +long long max_latency_ns; + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount; + + if (delta_ns < 0) + printf("%s timer fired early: FAIL\n", clockstring(clock_id)); + + if (delta_ns > max_latency_ns) + max_latency_ns = delta_ns; +} + +int do_timer(int clock_id, int flags) +{ + struct sigevent se; + timer_t tm1; + struct itimerspec its1, its2; + int err; + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = SIGRTMAX; + se.sigev_value.sival_int = 0; + + max_latency_ns = 0; + alarmcount = 0; + + err = timer_create(clock_id, &se, &tm1); + if (err) { + if ((clock_id == CLOCK_REALTIME_ALARM) || + (clock_id == CLOCK_BOOTTIME_ALARM)) { + printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME"); + return 0; + } + printf("%s - timer_create() failed\n", clockstring(clock_id)); + return -1; + } + + clock_gettime(clock_id, &start_time); + if (flags) { + its1.it_value = start_time; + its1.it_value.tv_sec += TIMER_SECS; + } else { + its1.it_value.tv_sec = TIMER_SECS; + its1.it_value.tv_nsec = 0; + } + its1.it_interval.tv_sec = TIMER_SECS; + its1.it_interval.tv_nsec = 0; + + err = timer_settime(tm1, flags, &its1, &its2); + if (err) { + printf("%s - timer_settime() failed\n", clockstring(clock_id)); + return -1; + } + + while (alarmcount < 5) + sleep(1); + + printf("%-22s %s max latency: %10lld ns : ", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME", + max_latency_ns); + + timer_delete(tm1); + if (max_latency_ns < UNRESONABLE_LATENCY) { + printf("[OK]\n"); + return 0; + } + printf("[FAILED]\n"); + return -1; +} + +int main(void) +{ + struct sigaction act; + int signum = SIGRTMAX; + int ret = 0; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + printf("Setting timers for every %i seconds\n", TIMER_SECS); + for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) { + + if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) || + (clock_id == CLOCK_THREAD_CPUTIME_ID) || + (clock_id == CLOCK_MONOTONIC_RAW) || + (clock_id == CLOCK_REALTIME_COARSE) || + (clock_id == CLOCK_MONOTONIC_COARSE) || + (clock_id == CLOCK_HWSPECIFIC)) + continue; + + ret |= do_timer(clock_id, TIMER_ABSTIME); + ret |= do_timer(clock_id, 0); + } + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c new file mode 100644 index 000000000000..5562f84ee07c --- /dev/null +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -0,0 +1,89 @@ +/* ADJ_FREQ Skew consistency test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob back + * and forth and watches for consistency problems. Thus this test requires + * that the inconsistency-check tests be present in the same directory it + * is run from. + * + * To build: + * $ gcc skew_consistency.c -o skew_consistency -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/wait.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +int main(int argv, char **argc) +{ + struct timex tx; + int ret, ppm; + pid_t pid; + + + printf("Running Asyncrhonous Frequency Changing Tests...\n"); + + pid = fork(); + if (!pid) + return system("./inconsistency-check -c 1 -t 600"); + + ppm = 500; + ret = 0; + + while (pid != waitpid(pid, &ret, WNOHANG)) { + ppm = -ppm; + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + adjtimex(&tx); + usleep(500000); + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + + if (ret) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c new file mode 100644 index 000000000000..e632e116f05e --- /dev/null +++ b/tools/testing/selftests/timers/threadtest.c @@ -0,0 +1,204 @@ +/* threadtest.c + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2004, 2005, 2006, 2012 + * Licensed under the GPLv2 + * + * To build: + * $ gcc threadtest.c -o threadtest -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +/* serializes shared list access */ +pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; +/* serializes console output */ +pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER; + + +#define MAX_THREADS 128 +#define LISTSIZE 128 + +int done = 0; + +struct timespec global_list[LISTSIZE]; +int listcount = 0; + + +void checklist(struct timespec *list, int size) +{ + int i, j; + struct timespec *a, *b; + + /* scan the list */ + for (i = 0; i < size-1; i++) { + a = &list[i]; + b = &list[i+1]; + + /* look for any time inconsistencies */ + if ((b->tv_sec <= a->tv_sec) && + (b->tv_nsec < a->tv_nsec)) { + + /* flag other threads */ + done = 1; + + /*serialize printing to avoid junky output*/ + pthread_mutex_lock(&print_lock); + + /* dump the list */ + printf("\n"); + for (j = 0; j < size; j++) { + if (j == i) + printf("---------------\n"); + printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec); + if (j == i+1) + printf("---------------\n"); + } + printf("[FAILED]\n"); + + pthread_mutex_unlock(&print_lock); + } + } +} + +/* The shared thread shares a global list + * that each thread fills while holding the lock. + * This stresses clock syncronization across cpus. + */ +void *shared_thread(void *arg) +{ + while (!done) { + /* protect the list */ + pthread_mutex_lock(&list_lock); + + /* see if we're ready to check the list */ + if (listcount >= LISTSIZE) { + checklist(global_list, LISTSIZE); + listcount = 0; + } + clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]); + + pthread_mutex_unlock(&list_lock); + } + return NULL; +} + + +/* Each independent thread fills in its own + * list. This stresses clock_gettime() lock contention. + */ +void *independent_thread(void *arg) +{ + struct timespec my_list[LISTSIZE]; + int count; + + while (!done) { + /* fill the list */ + for (count = 0; count < LISTSIZE; count++) + clock_gettime(CLOCK_MONOTONIC, &my_list[count]); + checklist(my_list, LISTSIZE); + } + return NULL; +} + +#define DEFAULT_THREAD_COUNT 8 +#define DEFAULT_RUNTIME 30 + +int main(int argc, char **argv) +{ + int thread_count, i; + time_t start, now, runtime; + char buf[255]; + pthread_t pth[MAX_THREADS]; + int opt; + void *tret; + int ret = 0; + void *(*thread)(void *) = shared_thread; + + thread_count = DEFAULT_THREAD_COUNT; + runtime = DEFAULT_RUNTIME; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:n:i")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'n': + thread_count = atoi(optarg); + break; + case 'i': + thread = independent_thread; + printf("using independent threads\n"); + break; + default: + printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]); + printf(" -t: time to run\n"); + printf(" -n: number of threads\n"); + printf(" -i: use independent threads\n"); + return -1; + } + } + + if (thread_count > MAX_THREADS) + thread_count = MAX_THREADS; + + + setbuf(stdout, NULL); + + start = time(0); + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start)); + printf("%s\n", buf); + printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime); + + /* spawn */ + for (i = 0; i < thread_count; i++) + pthread_create(&pth[i], 0, thread, 0); + + while (time(&now) < start + runtime) { + sleep(1); + if (done) { + ret = 1; + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now)); + printf("%s\n", buf); + goto out; + } + } + printf("[OK]\n"); + done = 1; + +out: + /* wait */ + for (i = 0; i < thread_count; i++) + pthread_join(pth[i], &tret); + + /* die */ + if (ret) + ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c new file mode 100644 index 000000000000..e86d937cc22c --- /dev/null +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -0,0 +1,202 @@ +/* valid adjtimex test + * by: John Stultz <john.stultz@linaro.org> + * (C) Copyright Linaro 2015 + * Licensed under the GPLv2 + * + * This test validates adjtimex interface with valid + * and invalid test data. + * + * Usage: valid-adjtimex + * + * To build: + * $ gcc valid-adjtimex.c -o valid-adjtimex -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000L + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + return ret; +} + +#define NUM_FREQ_VALID 32 +#define NUM_FREQ_OUTOFRANGE 4 +#define NUM_FREQ_INVALID 2 + +long valid_freq[NUM_FREQ_VALID] = { + -499<<16, + -450<<16, + -400<<16, + -350<<16, + -300<<16, + -250<<16, + -200<<16, + -150<<16, + -100<<16, + -75<<16, + -50<<16, + -25<<16, + -10<<16, + -5<<16, + -1<<16, + -1000, + 1<<16, + 5<<16, + 10<<16, + 25<<16, + 50<<16, + 75<<16, + 100<<16, + 150<<16, + 200<<16, + 250<<16, + 300<<16, + 350<<16, + 400<<16, + 450<<16, + 499<<16, +}; + +long outofrange_freq[NUM_FREQ_OUTOFRANGE] = { + -1000<<16, + -550<<16, + 550<<16, + 1000<<16, +}; + +#define LONG_MAX (~0UL>>1) +#define LONG_MIN (-LONG_MAX - 1) + +long invalid_freq[NUM_FREQ_INVALID] = { + LONG_MAX, + LONG_MIN, +}; + +int validate_freq(void) +{ + struct timex tx; + int ret, pass = 0; + int i; + + clear_time_state(); + + memset(&tx, 0, sizeof(struct timex)); + /* Set the leap second insert flag */ + + printf("Testing ADJ_FREQ... "); + for (i = 0; i < NUM_FREQ_VALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = valid_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + valid_freq[i], valid_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq != valid_freq[i]) { + printf("Warning: freq value %ld not what we set it (%ld)!\n", + tx.freq, valid_freq[i]); + } + } + for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = outofrange_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + outofrange_freq[i], outofrange_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq == outofrange_freq[i]) { + printf("[FAIL]\n"); + printf("ERROR: out of range value %ld actually set!\n", + tx.freq); + pass = -1; + goto out; + } + } + + + if (sizeof(long) == 8) { /* this case only applies to 64bit systems */ + for (i = 0; i < NUM_FREQ_INVALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = invalid_freq[i]; + ret = adjtimex(&tx); + if (ret >= 0) { + printf("[FAIL]\n"); + printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n", + invalid_freq[i]); + pass = -1; + goto out; + } + } + } + + printf("[OK]\n"); +out: + /* reset freq to zero */ + tx.modes = ADJ_FREQUENCY; + tx.freq = 0; + ret = adjtimex(&tx); + + return pass; +} + + +int main(int argc, char **argv) +{ + if (validate_freq()) + return ksft_exit_fail(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile index 12c9d15bab07..d401b63c5b1a 100644 --- a/tools/testing/selftests/user/Makefile +++ b/tools/testing/selftests/user/Makefile @@ -3,5 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -run_tests: all - ./test_user_copy.sh +TEST_PROGS := test_user_copy.sh + +include ../lib.mk diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 4c4b1f631ecf..a5ce9534eb15 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,16 +1,17 @@ # Makefile for vm selftests -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest BINARIES += transhuge-stress all: $(BINARIES) %: %.c - $(CC) $(CFLAGS) -o $@ $^ + $(CC) $(CFLAGS) -o $@ $^ -lrt -run_tests: all - @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1) +TEST_PROGS := run_vmtests +TEST_FILES := $(BINARIES) + +include ../lib.mk clean: $(RM) $(BINARIES) diff --git a/tools/testing/selftests/vm/hugetlbfstest.c b/tools/testing/selftests/vm/hugetlbfstest.c index ea40ff8c2391..02e1072ec187 100644 --- a/tools/testing/selftests/vm/hugetlbfstest.c +++ b/tools/testing/selftests/vm/hugetlbfstest.c @@ -34,6 +34,7 @@ static void do_mmap(int fd, int extra_flags, int unmap) int *p; int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; u64 before, after; + int ret; before = read_rss(); p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); @@ -44,7 +45,8 @@ static void do_mmap(int fd, int extra_flags, int unmap) !"rss didn't grow as expected"); if (!unmap) return; - munmap(p, length); + ret = munmap(p, length); + assert(!ret || !"munmap returned an unexpected error"); after = read_rss(); assert(llabs(after - before) < 0x40000 || !"rss didn't shrink as expected"); diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index ac56639dd4a9..addcd6fc1ecc 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -73,7 +73,11 @@ int main(void) write_bytes(addr); ret = read_bytes(addr); - munmap(addr, LENGTH); + /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ + if (munmap(addr, LENGTH)) { + perror("munmap"); + exit(1); + } return ret; } diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index c87b6812300d..c87b6812300d 100644..100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore new file mode 100644 index 000000000000..15034fef9698 --- /dev/null +++ b/tools/testing/selftests/x86/.gitignore @@ -0,0 +1,2 @@ +*_32 +*_64 diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile new file mode 100644 index 000000000000..5bdb781163d1 --- /dev/null +++ b/tools/testing/selftests/x86/Makefile @@ -0,0 +1,57 @@ +all: + +include ../lib.mk + +.PHONY: all all_32 all_64 warn_32bit_failure clean + +TARGETS_C_BOTHBITS := sigreturn single_step_syscall + +BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32) +BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) + +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall + +UNAME_M := $(shell uname -m) +CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) + +ifeq ($(CAN_BUILD_I386),1) +all: all_32 +TEST_PROGS += $(BINARIES_32) +endif + +ifeq ($(CAN_BUILD_X86_64),1) +all: all_64 +TEST_PROGS += $(BINARIES_64) +endif + +all_32: $(BINARIES_32) + +all_64: $(BINARIES_64) + +clean: + $(RM) $(BINARIES_32) $(BINARIES_64) + +$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c + $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c + $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +# x86_64 users should be encouraged to install 32-bit libraries +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01) +all: warn_32bit_failure + +warn_32bit_failure: + @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \ + echo "environment. This will reduce test coverage of 64-bit" 2>&1; \ + echo "kernels. If you are using a Debian-like distribution," 2>&1; \ + echo "try:"; 2>&1; \ + echo ""; \ + echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \ + echo ""; \ + echo "If you are using a Fedora-like distribution, try:"; \ + echo ""; \ + echo " yum install glibc-devel.*i686"; \ + exit 0; +endif diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh new file mode 100755 index 000000000000..172d3293fb7b --- /dev/null +++ b/tools/testing/selftests/x86/check_cc.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# check_cc.sh - Helper to test userspace compilation support +# Copyright (c) 2015 Andrew Lutomirski +# GPL v2 + +CC="$1" +TESTPROG="$2" +shift 2 + +if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then + echo 1 +else + echo 0 +fi + +exit 0 diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c new file mode 100644 index 000000000000..b5aa1bab7416 --- /dev/null +++ b/tools/testing/selftests/x86/sigreturn.c @@ -0,0 +1,684 @@ +/* + * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This is a series of tests that exercises the sigreturn(2) syscall and + * the IRET / SYSRET paths in the kernel. + * + * For now, this focuses on the effects of unusual CS and SS values, + * and it has a bunch of tests to make sure that ESP/RSP is restored + * properly. + * + * The basic idea behind these tests is to raise(SIGUSR1) to create a + * sigcontext frame, plug in the values to be tested, and then return, + * which implicitly invokes sigreturn(2) and programs the user context + * as desired. + * + * For tests for which we expect sigreturn and the subsequent return to + * user mode to succeed, we return to a short trampoline that generates + * SIGTRAP so that the meat of the tests can be ordinary C code in a + * SIGTRAP handler. + * + * The inner workings of each test is documented below. + * + * Do not run on outdated, unpatched kernels at risk of nasty crashes. + */ + +#define _GNU_SOURCE + +#include <sys/time.h> +#include <time.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <asm/ldt.h> +#include <err.h> +#include <setjmp.h> +#include <stddef.h> +#include <stdbool.h> +#include <sys/ptrace.h> +#include <sys/user.h> + +/* + * In principle, this test can run on Linux emulation layers (e.g. + * Illumos "LX branded zones"). Solaris-based kernels reserve LDT + * entries 0-5 for their own internal purposes, so start our LDT + * allocations above that reservation. (The tests don't pass on LX + * branded zones, but at least this lets them run.) + */ +#define LDT_OFFSET 6 + +/* An aligned stack accessible through some of our segments. */ +static unsigned char stack16[65536] __attribute__((aligned(4096))); + +/* + * An aligned int3 instruction used as a trampoline. Some of the tests + * want to fish out their ss values, so this trampoline copies ss to eax + * before the int3. + */ +asm (".pushsection .text\n\t" + ".type int3, @function\n\t" + ".align 4096\n\t" + "int3:\n\t" + "mov %ss,%eax\n\t" + "int3\n\t" + ".size int3, . - int3\n\t" + ".align 4096, 0xcc\n\t" + ".popsection"); +extern char int3[4096]; + +/* + * At startup, we prepapre: + * + * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero + * descriptor or out of bounds). + * - code16_sel: A 16-bit LDT code segment pointing to int3. + * - data16_sel: A 16-bit LDT data segment pointing to stack16. + * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. + * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. + * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. + * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to + * stack16. + * + * For no particularly good reason, xyz_sel is a selector value with the + * RPL and LDT bits filled in, whereas xyz_idx is just an index into the + * descriptor table. These variables will be zero if their respective + * segments could not be allocated. + */ +static unsigned short ldt_nonexistent_sel; +static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; + +static unsigned short gdt_data16_idx, gdt_npdata32_idx; + +static unsigned short GDT3(int idx) +{ + return (idx << 3) | 3; +} + +static unsigned short LDT3(int idx) +{ + return (idx << 3) | 7; +} + +/* Our sigaltstack scratch space. */ +static char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void add_ldt(const struct user_desc *desc, unsigned short *var, + const char *name) +{ + if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { + *var = LDT3(desc->entry_number); + } else { + printf("[NOTE]\tFailed to create %s segment\n", name); + *var = 0; + } +} + +static void setup_ldt(void) +{ + if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) + errx(1, "stack16 is too high\n"); + if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) + errx(1, "int3 is too high\n"); + + ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); + + const struct user_desc code16_desc = { + .entry_number = LDT_OFFSET + 0, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 0, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&code16_desc, &code16_sel, "code16"); + + const struct user_desc data16_desc = { + .entry_number = LDT_OFFSET + 1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&data16_desc, &data16_sel, "data16"); + + const struct user_desc npcode32_desc = { + .entry_number = LDT_OFFSET + 3, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); + + const struct user_desc npdata32_desc = { + .entry_number = LDT_OFFSET + 4, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); + + struct user_desc gdt_data16_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { + /* + * This probably indicates vulnerability to CVE-2014-8133. + * Merely getting here isn't definitive, though, and we'll + * diagnose the problem for real later on. + */ + printf("[WARN]\tset_thread_area allocated data16 at index %d\n", + gdt_data16_desc.entry_number); + gdt_data16_idx = gdt_data16_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } + + struct user_desc gdt_npdata32_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { + /* + * As a hardening measure, newer kernels don't allow this. + */ + printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", + gdt_npdata32_desc.entry_number); + gdt_npdata32_idx = gdt_npdata32_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } +} + +/* State used by our signal handlers. */ +static gregset_t initial_regs, requested_regs, resulting_regs; + +/* Instructions for the SIGUSR1 handler. */ +static volatile unsigned short sig_cs, sig_ss; +static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; + +/* Abstractions for some 32-bit vs 64-bit differences. */ +#ifdef __x86_64__ +# define REG_IP REG_RIP +# define REG_SP REG_RSP +# define REG_AX REG_RAX + +struct selectors { + unsigned short cs, gs, fs, ss; +}; + +static unsigned short *ssptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->ss; +} + +static unsigned short *csptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->cs; +} +#else +# define REG_IP REG_EIP +# define REG_SP REG_ESP +# define REG_AX REG_EAX + +static greg_t *ssptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_SS]; +} + +static greg_t *csptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_CS]; +} +#endif + +/* Number of errors in the current test case. */ +static volatile sig_atomic_t nerrs; + +/* + * SIGUSR1 handler. Sets CS and SS as requested and points IP to the + * int3 trampoline. Sets SP to a large known value so that we can see + * whether the value round-trips back to user mode correctly. + */ +static void sigusr1(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + + *csptr(ctx) = sig_cs; + *ssptr(ctx) = sig_ss; + + ctx->uc_mcontext.gregs[REG_IP] = + sig_cs == code16_sel ? 0 : (unsigned long)&int3; + ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; + ctx->uc_mcontext.gregs[REG_AX] = 0; + + memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ + + return; +} + +/* + * Called after a successful sigreturn. Restores our state so that + * the original raise(SIGUSR1) returns. + */ +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + sig_err = ctx->uc_mcontext.gregs[REG_ERR]; + sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; + + unsigned short ss; + asm ("mov %%ss,%0" : "=r" (ss)); + + greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; + if (asm_ss != sig_ss && sig == SIGTRAP) { + /* Sanity check failure. */ + printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", + ss, *ssptr(ctx), (unsigned long long)asm_ss); + nerrs++; + } + + memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); + + sig_trapped = sig; +} + +/* + * Checks a given selector for its code bitness or returns -1 if it's not + * a usable code segment selector. + */ +int cs_bitness(unsigned short cs) +{ + uint32_t valid = 0, ar; + asm ("lar %[cs], %[ar]\n\t" + "jnz 1f\n\t" + "mov $1, %[valid]\n\t" + "1:" + : [ar] "=r" (ar), [valid] "+rm" (valid) + : [cs] "r" (cs)); + + if (!valid) + return -1; + + bool db = (ar & (1 << 22)); + bool l = (ar & (1 << 21)); + + if (!(ar & (1<<11))) + return -1; /* Not code. */ + + if (l && !db) + return 64; + else if (!l && db) + return 32; + else if (!l && !db) + return 16; + else + return -1; /* Unknown bitness. */ +} + +/* Finds a usable code segment of the requested bitness. */ +int find_cs(int bitness) +{ + unsigned short my_cs; + + asm ("mov %%cs,%0" : "=r" (my_cs)); + + if (cs_bitness(my_cs) == bitness) + return my_cs; + if (cs_bitness(my_cs + (2 << 3)) == bitness) + return my_cs + (2 << 3); + if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) + return my_cs - (2 << 3); + if (cs_bitness(code16_sel) == bitness) + return code16_sel; + + printf("[WARN]\tCould not find %d-bit CS\n", bitness); + return -1; +} + +static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) +{ + int cs = find_cs(cs_bits); + if (cs == -1) { + printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", + cs_bits, use_16bit_ss ? 16 : 32); + return 0; + } + + if (force_ss != -1) { + sig_ss = force_ss; + } else { + if (use_16bit_ss) { + if (!data16_sel) { + printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", + cs_bits); + return 0; + } + sig_ss = data16_sel; + } else { + asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); + } + } + + sig_cs = cs; + + printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", + cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, + (sig_ss & 4) ? "" : ", GDT"); + + raise(SIGUSR1); + + nerrs = 0; + + /* + * Check that each register had an acceptable value when the + * int3 trampoline was invoked. + */ + for (int i = 0; i < NGREG; i++) { + greg_t req = requested_regs[i], res = resulting_regs[i]; + if (i == REG_TRAPNO || i == REG_IP) + continue; /* don't care */ + if (i == REG_SP) { + printf("\tSP: %llx -> %llx\n", (unsigned long long)req, + (unsigned long long)res); + + /* + * In many circumstances, the high 32 bits of rsp + * are zeroed. For example, we could be a real + * 32-bit program, or we could hit any of a number + * of poorly-documented IRET or segmented ESP + * oddities. If this happens, it's okay. + */ + if (res == (req & 0xFFFFFFFF)) + continue; /* OK; not expected to work */ + } + + bool ignore_reg = false; +#if __i386__ + if (i == REG_UESP) + ignore_reg = true; +#else + if (i == REG_CSGSFS) { + struct selectors *req_sels = + (void *)&requested_regs[REG_CSGSFS]; + struct selectors *res_sels = + (void *)&resulting_regs[REG_CSGSFS]; + if (req_sels->cs != res_sels->cs) { + printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->cs, res_sels->cs); + nerrs++; + } + + if (req_sels->ss != res_sels->ss) { + printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->ss, res_sels->ss); + nerrs++; + } + + continue; + } +#endif + + /* Sanity check on the kernel */ + if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { + printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; + } + + if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { + /* + * SP is particularly interesting here. The + * usual cause of failures is that we hit the + * nasty IRET case of returning to a 16-bit SS, + * in which case bits 16:31 of the *kernel* + * stack pointer persist in ESP. + */ + printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", + i, (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + } + } + + if (nerrs == 0) + printf("[OK]\tall registers okay\n"); + + return nerrs; +} + +static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) +{ + int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; + if (cs == -1) + return 0; + + sig_cs = cs; + sig_ss = ss; + + printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", + cs_bits, sig_cs, sig_ss); + + sig_trapped = 0; + raise(SIGUSR1); + if (sig_trapped) { + char errdesc[32] = ""; + if (sig_err) { + const char *src = (sig_err & 1) ? " EXT" : ""; + const char *table; + if ((sig_err & 0x6) == 0x0) + table = "GDT"; + else if ((sig_err & 0x6) == 0x4) + table = "LDT"; + else if ((sig_err & 0x6) == 0x2) + table = "IDT"; + else + table = "???"; + + sprintf(errdesc, "%s%s index %d, ", + table, src, sig_err >> 3); + } + + char trapname[32]; + if (sig_trapno == 13) + strcpy(trapname, "GP"); + else if (sig_trapno == 11) + strcpy(trapname, "NP"); + else if (sig_trapno == 12) + strcpy(trapname, "SS"); + else if (sig_trapno == 32) + strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ + else + sprintf(trapname, "%d", sig_trapno); + + printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", + trapname, (unsigned long)sig_err, + errdesc, strsignal(sig_trapped)); + return 0; + } else { + printf("[FAIL]\tDid not get SIGSEGV\n"); + return 1; + } +} + +int main() +{ + int total_nerrs = 0; + unsigned short my_cs, my_ss; + + asm volatile ("mov %%cs,%0" : "=r" (my_cs)); + asm volatile ("mov %%ss,%0" : "=r" (my_ss)); + setup_ldt(); + + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGUSR1, sigusr1, 0); + sethandler(SIGTRAP, sigtrap, SA_ONSTACK); + + /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ + total_nerrs += test_valid_sigreturn(64, false, -1); + total_nerrs += test_valid_sigreturn(32, false, -1); + total_nerrs += test_valid_sigreturn(16, false, -1); + + /* + * Test easy espfix cases: return to a 16-bit LDT SS in each possible + * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. + * + * This catches the original missing-espfix-on-64-bit-kernels issue + * as well as CVE-2014-8134. + */ + total_nerrs += test_valid_sigreturn(64, true, -1); + total_nerrs += test_valid_sigreturn(32, true, -1); + total_nerrs += test_valid_sigreturn(16, true, -1); + + if (gdt_data16_idx) { + /* + * For performance reasons, Linux skips espfix if SS points + * to the GDT. If we were able to allocate a 16-bit SS in + * the GDT, see if it leaks parts of the kernel stack pointer. + * + * This tests for CVE-2014-8133. + */ + total_nerrs += test_valid_sigreturn(64, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(32, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(16, true, + GDT3(gdt_data16_idx)); + } + + /* + * We're done testing valid sigreturn cases. Now we test states + * for which sigreturn itself will succeed but the subsequent + * entry to user mode will fail. + * + * Depending on the failure mode and the kernel bitness, these + * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. + */ + clearhandler(SIGTRAP); + sethandler(SIGSEGV, sigtrap, SA_ONSTACK); + sethandler(SIGBUS, sigtrap, SA_ONSTACK); + sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ + + /* Easy failures: invalid SS, resulting in #GP(0) */ + test_bad_iret(64, ldt_nonexistent_sel, -1); + test_bad_iret(32, ldt_nonexistent_sel, -1); + test_bad_iret(16, ldt_nonexistent_sel, -1); + + /* These fail because SS isn't a data segment, resulting in #GP(SS) */ + test_bad_iret(64, my_cs, -1); + test_bad_iret(32, my_cs, -1); + test_bad_iret(16, my_cs, -1); + + /* Try to return to a not-present code segment, triggering #NP(SS). */ + test_bad_iret(32, my_ss, npcode32_sel); + + /* + * Try to return to a not-present but otherwise valid data segment. + * This will cause IRET to fail with #SS on the espfix stack. This + * exercises CVE-2014-9322. + * + * Note that, if espfix is enabled, 64-bit Linux will lose track + * of the actual cause of failure and report #GP(0) instead. + * This would be very difficult for Linux to avoid, because + * espfix64 causes IRET failures to be promoted to #DF, so the + * original exception frame is never pushed onto the stack. + */ + test_bad_iret(32, npdata32_sel, -1); + + /* + * Try to return to a not-present but otherwise valid data + * segment without invoking espfix. Newer kernels don't allow + * this to happen in the first place. On older kernels, though, + * this can trigger CVE-2014-9322. + */ + if (gdt_npdata32_idx) + test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); + + return total_nerrs ? 1 : 0; +} diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c new file mode 100644 index 000000000000..50c26358e8b7 --- /dev/null +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -0,0 +1,181 @@ +/* + * single_step_syscall.c - single-steps various x86 syscalls + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This is a very simple series of tests that makes system calls with + * the TF flag set. This exercises some nasty kernel code in the + * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set + * immediately issues #DB from CPL 0. This requires special handling in + * the kernel. + */ + +#define _GNU_SOURCE + +#include <sys/time.h> +#include <time.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <asm/ldt.h> +#include <err.h> +#include <setjmp.h> +#include <stddef.h> +#include <stdbool.h> +#include <sys/ptrace.h> +#include <sys/user.h> + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps; + +#ifdef __x86_64__ +# define REG_IP REG_RIP +# define WIDTH "q" +#else +# define REG_IP REG_EIP +# define WIDTH "l" +#endif + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH + : : "rm" (eflags) : "flags"); +} + +#define X86_EFLAGS_TF (1UL << 8) + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (get_eflags() & X86_EFLAGS_TF) { + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + printf("[WARN]\tSIGTRAP handler had TF set\n"); + _exit(1); + } + + sig_traps++; + + if (sig_traps == 10000 || sig_traps == 10001) { + printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n", + (int)sig_traps, + (unsigned long)info->si_addr, + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + } +} + +static void check_result(void) +{ + unsigned long new_eflags = get_eflags(); + set_eflags(new_eflags & ~X86_EFLAGS_TF); + + if (!sig_traps) { + printf("[FAIL]\tNo SIGTRAP\n"); + exit(1); + } + + if (!(new_eflags & X86_EFLAGS_TF)) { + printf("[FAIL]\tTF was cleared\n"); + exit(1); + } + + printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps); + sig_traps = 0; +} + +int main() +{ + int tmp; + + sethandler(SIGTRAP, sigtrap, 0); + + printf("[RUN]\tSet TF and check nop\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ("nop"); + check_result(); + +#ifdef __x86_64__ + printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + extern unsigned char post_nop[]; + asm volatile ("pushf" WIDTH "\n\t" + "pop" WIDTH " %%r11\n\t" + "nop\n\t" + "post_nop:" + : : "c" (post_nop) : "r11"); + check_result(); +#endif + + printf("[RUN]\tSet TF and check int80\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + check_result(); + + /* + * This test is particularly interesting if fast syscalls use + * SYSENTER: it triggers a nasty design flaw in SYSENTER. + * Specifically, SYSENTER does not clear TF, so either SYSENTER + * or the next instruction traps at CPL0. (Of course, Intel + * mostly forgot to document exactly what happens here.) So we + * get a CPL0 fault with usergs (on 64-bit kernels) and possibly + * no stack. The only sane way the kernel can possibly handle + * it is to clear TF on return from the #DB handler, but this + * happens way too early to set TF in the saved pt_regs, so the + * kernel has to do something clever to avoid losing track of + * the TF bit. + * + * Needless to say, we've had bugs in this area. + */ + syscall(SYS_getpid); /* Force symbol binding without TF set. */ + printf("[RUN]\tSet TF and check a fast syscall\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + syscall(SYS_getpid); + check_result(); + + /* Now make sure that another fast syscall doesn't set TF again. */ + printf("[RUN]\tFast syscall with TF cleared\n"); + fflush(stdout); /* Force a syscall */ + if (get_eflags() & X86_EFLAGS_TF) { + printf("[FAIL]\tTF is now set\n"); + exit(1); + } + if (sig_traps) { + printf("[FAIL]\tGot SIGTRAP\n"); + exit(1); + } + printf("[OK]\tNothing unexpected happened\n"); + + return 0; +} diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c new file mode 100644 index 000000000000..fabdf0f51621 --- /dev/null +++ b/tools/testing/selftests/x86/trivial_32bit_program.c @@ -0,0 +1,18 @@ +/* + * Trivial program to check that we have a valid 32-bit build environment. + * Copyright (c) 2015 Andy Lutomirski + * GPL v2 + */ + +#ifndef __i386__ +# error wrong architecture +#endif + +#include <stdio.h> + +int main() +{ + printf("\n"); + + return 0; +} diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c new file mode 100644 index 000000000000..b994946c40fb --- /dev/null +++ b/tools/testing/selftests/x86/trivial_64bit_program.c @@ -0,0 +1,18 @@ +/* + * Trivial program to check that we have a valid 32-bit build environment. + * Copyright (c) 2015 Andy Lutomirski + * GPL v2 + */ + +#ifndef __x86_64__ +# error wrong architecture +#endif + +#include <stdio.h> + +int main() +{ + printf("\n"); + + return 0; +} diff --git a/tools/thermal/tmon/.gitignore b/tools/thermal/tmon/.gitignore new file mode 100644 index 000000000000..06e96be65276 --- /dev/null +++ b/tools/thermal/tmon/.gitignore @@ -0,0 +1 @@ +/tmon diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile index e775adcbd29f..2e83dd3655a2 100644 --- a/tools/thermal/tmon/Makefile +++ b/tools/thermal/tmon/Makefile @@ -2,8 +2,8 @@ VERSION = 1.0 BINDIR=usr/bin WARNFLAGS=-Wall -Wshadow -W -Wformat -Wimplicit-function-declaration -Wimplicit-int -CFLAGS= -O1 ${WARNFLAGS} -fstack-protector -CC=gcc +CFLAGS+= -O1 ${WARNFLAGS} -fstack-protector +CC=$(CROSS_COMPILE)gcc CFLAGS+=-D VERSION=\"$(VERSION)\" LDFLAGS+= @@ -12,16 +12,21 @@ TARGET=tmon INSTALL_PROGRAM=install -m 755 -p DEL_FILE=rm -f -INSTALL_CONFIGFILE=install -m 644 -p -CONFIG_FILE= -CONFIG_PATH= +# Static builds might require -ltinfo, for instance +ifneq ($(findstring -static, $(LDFLAGS)),) +STATIC := --static +endif +TMON_LIBS=-lm -lpthread +TMON_LIBS += $(shell pkg-config --libs $(STATIC) panelw ncursesw 2> /dev/null || \ + pkg-config --libs $(STATIC) panel ncurses 2> /dev/null || \ + echo -lpanel -lncurses) OBJS = tmon.o tui.o sysfs.o pid.o OBJS += tmon: $(OBJS) Makefile tmon.h - $(CC) ${CFLAGS} $(LDFLAGS) $(OBJS) -o $(TARGET) -lm -lpanel -lncursesw -ltinfo -lpthread + $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $(TARGET) $(TMON_LIBS) valgrind: tmon sudo valgrind -v --track-origins=yes --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes ./$(TARGET) 1> /dev/null @@ -29,13 +34,9 @@ valgrind: tmon install: - mkdir -p $(INSTALL_ROOT)/$(BINDIR) - $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" - - mkdir -p $(INSTALL_ROOT)/$(CONFIG_PATH) - - $(INSTALL_CONFIGFILE) "$(CONFIG_FILE)" "$(INSTALL_ROOT)/$(CONFIG_PATH)" uninstall: $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" - $(CONFIG_FILE) "$(CONFIG_PATH)" - clean: find . -name "*.o" | xargs $(DEL_FILE) diff --git a/tools/thermal/tmon/tmon.8 b/tools/thermal/tmon/tmon.8 index 0be727cb9892..02d5179803aa 100644 --- a/tools/thermal/tmon/tmon.8 +++ b/tools/thermal/tmon/tmon.8 @@ -55,6 +55,8 @@ The \fB-l --log\fP option write data to /var/tmp/tmon.log .PP The \fB-t --time-interval\fP option sets the polling interval in seconds .PP +The \fB-T --target-temp\fP option sets the initial target temperature +.PP The \fB-v --version\fP option shows the version of \fBtmon \fP .PP The \fB-z --zone\fP option sets the target therma zone instance to be controlled diff --git a/tools/thermal/tmon/tmon.c b/tools/thermal/tmon/tmon.c index 09b7c3218334..9aa19652e8e8 100644 --- a/tools/thermal/tmon/tmon.c +++ b/tools/thermal/tmon/tmon.c @@ -64,6 +64,7 @@ void usage() printf(" -h, --help show this help message\n"); printf(" -l, --log log data to /var/tmp/tmon.log\n"); printf(" -t, --time-interval sampling time interval, > 1 sec.\n"); + printf(" -T, --target-temp initial target temperature\n"); printf(" -v, --version show version\n"); printf(" -z, --zone target thermal zone id\n"); @@ -219,6 +220,7 @@ static struct option opts[] = { { "control", 1, NULL, 'c' }, { "daemon", 0, NULL, 'd' }, { "time-interval", 1, NULL, 't' }, + { "target-temp", 1, NULL, 'T' }, { "log", 0, NULL, 'l' }, { "help", 0, NULL, 'h' }, { "version", 0, NULL, 'v' }, @@ -231,7 +233,7 @@ int main(int argc, char **argv) { int err = 0; int id2 = 0, c; - double yk = 0.0; /* controller output */ + double yk = 0.0, temp; /* controller output */ int target_tz_index; if (geteuid() != 0) { @@ -239,7 +241,7 @@ int main(int argc, char **argv) exit(EXIT_FAILURE); } - while ((c = getopt_long(argc, argv, "c:dlht:vgz:", opts, &id2)) != -1) { + while ((c = getopt_long(argc, argv, "c:dlht:T:vgz:", opts, &id2)) != -1) { switch (c) { case 'c': no_control = 0; @@ -254,6 +256,14 @@ int main(int argc, char **argv) if (ticktime < 1) ticktime = 1; break; + case 'T': + temp = strtod(optarg, NULL); + if (temp < 0) { + fprintf(stderr, "error: temperature must be positive\n"); + return 1; + } + target_temp_user = temp; + break; case 'l': printf("Logging data to /var/tmp/tmon.log\n"); logging = 1; diff --git a/tools/thermal/tmon/tui.c b/tools/thermal/tmon/tui.c index 89f8ef0e15c8..b5d1c6b22dd3 100644 --- a/tools/thermal/tmon/tui.c +++ b/tools/thermal/tmon/tui.c @@ -30,6 +30,18 @@ #include "tmon.h" +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + static PANEL *data_panel; static PANEL *dialogue_panel; static PANEL *top; @@ -98,6 +110,18 @@ void write_status_bar(int x, char *line) wrefresh(status_bar_window); } +/* wrap at 5 */ +#define DIAG_DEV_ROWS 5 +/* + * list cooling devices + "set temp" entry; wraps after 5 rows, if they fit + */ +static int diag_dev_rows(void) +{ + int entries = ptdata.nr_cooling_dev + 1; + int rows = max(DIAG_DEV_ROWS, (entries + 1) / 2); + return min(rows, entries); +} + void setup_windows(void) { int y_begin = 1; @@ -122,7 +146,7 @@ void setup_windows(void) * dialogue window is a pop-up, when needed it lays on top of cdev win */ - dialogue_window = subwin(stdscr, ptdata.nr_cooling_dev+5, maxx-50, + dialogue_window = subwin(stdscr, diag_dev_rows() + 5, maxx-50, DIAG_Y, DIAG_X); thermal_data_window = subwin(stdscr, ptdata.nr_tz_sensor * @@ -258,21 +282,26 @@ void show_cooling_device(void) } const char DIAG_TITLE[] = "[ TUNABLES ]"; -#define DIAG_DEV_ROWS 5 void show_dialogue(void) { int j, x = 0, y = 0; + int rows, cols; WINDOW *w = dialogue_window; if (tui_disabled || !w) return; + getmaxyx(w, rows, cols); + + /* Silence compiler 'unused' warnings */ + (void)cols; + werase(w); box(w, 0, 0); mvwprintw(w, 0, maxx/4, DIAG_TITLE); /* list all the available tunables */ for (j = 0; j <= ptdata.nr_cooling_dev; j++) { - y = j % DIAG_DEV_ROWS; + y = j % diag_dev_rows(); if (y == 0 && j != 0) x += 20; if (j == ptdata.nr_cooling_dev) @@ -283,12 +312,10 @@ void show_dialogue(void) ptdata.cdi[j].type, ptdata.cdi[j].instance); } wattron(w, A_BOLD); - mvwprintw(w, DIAG_DEV_ROWS+1, 1, "Enter Choice [A-Z]?"); + mvwprintw(w, diag_dev_rows()+1, 1, "Enter Choice [A-Z]?"); wattroff(w, A_BOLD); - /* y size of dialogue win is nr cdev + 5, so print legend - * at the bottom line - */ - mvwprintw(w, ptdata.nr_cooling_dev+3, 1, + /* print legend at the bottom line */ + mvwprintw(w, rows - 2, 1, "Legend: A=Active, P=Passive, C=Critical"); wrefresh(dialogue_window); @@ -437,7 +464,7 @@ static void handle_input_choice(int ch) snprintf(buf, sizeof(buf), "New Value for %.10s-%2d: ", ptdata.cdi[cdev_id].type, ptdata.cdi[cdev_id].instance); - write_dialogue_win(buf, DIAG_DEV_ROWS+2, 2); + write_dialogue_win(buf, diag_dev_rows() + 2, 2); handle_input_val(cdev_id); } else { snprintf(buf, sizeof(buf), "Invalid selection %d", ch); diff --git a/tools/usb/ffs-aio-example/multibuff/host_app/test.c b/tools/usb/ffs-aio-example/multibuff/host_app/test.c index daa3abe6bebd..2cbcce6e8dd7 100644 --- a/tools/usb/ffs-aio-example/multibuff/host_app/test.c +++ b/tools/usb/ffs-aio-example/multibuff/host_app/test.c @@ -33,11 +33,6 @@ #define VENDOR 0x1d6b #define PRODUCT 0x0105 -/* endpoints indexes */ - -#define EP_BULK_IN (1 | LIBUSB_ENDPOINT_IN) -#define EP_BULK_OUT (2 | LIBUSB_ENDPOINT_OUT) - #define BUF_LEN 8192 /* @@ -159,14 +154,21 @@ void test_exit(struct test_state *state) int main(void) { struct test_state state; + struct libusb_config_descriptor *conf; + struct libusb_interface_descriptor const *iface; + unsigned char addr; if (test_init(&state)) return 1; + libusb_get_config_descriptor(state.found, 0, &conf); + iface = &conf->interface[0].altsetting[0]; + addr = iface->endpoint[0].bEndpointAddress; + while (1) { static unsigned char buffer[BUF_LEN]; int bytes; - libusb_bulk_transfer(state.handle, EP_BULK_IN, buffer, BUF_LEN, + libusb_bulk_transfer(state.handle, addr, buffer, BUF_LEN, &bytes, 500); } test_exit(&state); diff --git a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c index adc310a6d489..1f44a29818bf 100644 --- a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c +++ b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c @@ -103,12 +103,14 @@ static const struct { .bDescriptorType = USB_DT_ENDPOINT, .bEndpointAddress = 1 | USB_DIR_IN, .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = htole16(512), }, .bulk_source = { .bLength = sizeof(descriptors.hs_descs.bulk_source), .bDescriptorType = USB_DT_ENDPOINT, .bEndpointAddress = 2 | USB_DIR_OUT, .bmAttributes = USB_ENDPOINT_XFER_BULK, + .wMaxPacketSize = htole16(512), }, }, }; diff --git a/tools/usb/ffs-aio-example/simple/host_app/test.c b/tools/usb/ffs-aio-example/simple/host_app/test.c index acd6332811f3..aed86ffff280 100644 --- a/tools/usb/ffs-aio-example/simple/host_app/test.c +++ b/tools/usb/ffs-aio-example/simple/host_app/test.c @@ -33,11 +33,6 @@ #define VENDOR 0x1d6b #define PRODUCT 0x0105 -/* endpoints indexes */ - -#define EP_BULK_IN (1 | LIBUSB_ENDPOINT_IN) -#define EP_BULK_OUT (2 | LIBUSB_ENDPOINT_OUT) - #define BUF_LEN 8192 /* @@ -159,16 +154,24 @@ void test_exit(struct test_state *state) int main(void) { struct test_state state; + struct libusb_config_descriptor *conf; + struct libusb_interface_descriptor const *iface; + unsigned char in_addr, out_addr; if (test_init(&state)) return 1; + libusb_get_config_descriptor(state.found, 0, &conf); + iface = &conf->interface[0].altsetting[0]; + in_addr = iface->endpoint[0].bEndpointAddress; + out_addr = iface->endpoint[1].bEndpointAddress; + while (1) { static unsigned char buffer[BUF_LEN]; int bytes; - libusb_bulk_transfer(state.handle, EP_BULK_IN, buffer, BUF_LEN, + libusb_bulk_transfer(state.handle, in_addr, buffer, BUF_LEN, &bytes, 500); - libusb_bulk_transfer(state.handle, EP_BULK_OUT, buffer, BUF_LEN, + libusb_bulk_transfer(state.handle, out_addr, buffer, BUF_LEN, &bytes, 500); } test_exit(&state); diff --git a/tools/vm/Makefile b/tools/vm/Makefile index ac884b65a072..93aadaf7ff63 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -3,7 +3,7 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api -LIBS = $(LIB_DIR)/libapikfs.a +LIBS = $(LIB_DIR)/libapi.a CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -I../lib/ diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 264fbc297e0b..8bdf16b8ba60 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -133,6 +133,7 @@ static const char * const page_flag_names[] = { [KPF_KSM] = "x:ksm", [KPF_THP] = "t:thp", [KPF_BALLOON] = "o:balloon", + [KPF_ZERO_PAGE] = "z:zero_page", [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", |