diff options
Diffstat (limited to 'tools')
59 files changed, 2413 insertions, 142 deletions
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index ea69ce35e902..c3bd294a63d1 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3746,7 +3746,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 8ff7d620d942..33b52eaa39db 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -209,7 +209,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c index eec688041500..e73a79fce015 100644 --- a/tools/power/acpi/common/cmfsize.c +++ b/tools/power/acpi/common/cmfsize.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index efefe309367a..0bd343f136a4 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslibcfs.c b/tools/power/acpi/os_specific/service_layers/oslibcfs.c index 6df758302604..11f4aba55aab 100644 --- a/tools/power/acpi/os_specific/service_layers/oslibcfs.c +++ b/tools/power/acpi/os_specific/service_layers/oslibcfs.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index dd5008b0617a..d0e6b857d8d1 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c index e153fcb12b1a..66c4badf03e5 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixdir.c +++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 44ad4889d468..3818fd07e50f 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c index 6858c0893c91..08cb8b2035f2 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixxf.c +++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index eed534481434..025c232e920d 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index 61d0de804b70..da44458d3b6c 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index bbdf9e8e25bc..5fcd9700ac18 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 57620f66ae6c..c3c09152fac6 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -5,7 +5,7 @@ *****************************************************************************/ /* - * Copyright (C) 2000 - 2015, Intel Corp. + * Copyright (C) 2000 - 2016, Intel Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 8f3f5bb9c74e..590d12a25f6e 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -10,6 +10,7 @@ #include <errno.h> #include <stdlib.h> #include <string.h> +#include <limits.h> #include <getopt.h> diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c8edff6803d1..b04afc3295df 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,10 +1,12 @@ TARGETS = breakpoints +TARGETS += capabilities TARGETS += cpu-hotplug TARGETS += efivarfs TARGETS += exec TARGETS += firmware TARGETS += ftrace TARGETS += futex +TARGETS += ipc TARGETS += kcmp TARGETS += lib TARGETS += membarrier diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore new file mode 100644 index 000000000000..9b3193d06608 --- /dev/null +++ b/tools/testing/selftests/breakpoints/.gitignore @@ -0,0 +1 @@ +breakpoint_test diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile index 8c8f0c1f0889..008602aed920 100644 --- a/tools/testing/selftests/capabilities/Makefile +++ b/tools/testing/selftests/capabilities/Makefile @@ -1,18 +1,15 @@ -all: - -include ../lib.mk - -.PHONY: all clean - -TARGETS := validate_cap test_execve +TEST_FILES := validate_cap TEST_PROGS := test_execve -CFLAGS := -O2 -g -std=gnu99 -Wall -lcap-ng +BINARIES := $(TEST_FILES) $(TEST_PROGS) -all: $(TARGETS) +CFLAGS += -O2 -g -std=gnu99 -Wall +LDLIBS += -lcap-ng -lrt -ldl + +all: $(BINARIES) clean: - $(RM) $(TARGETS) + $(RM) $(BINARIES) + +include ../lib.mk -$(TARGETS): %: %.c - $(CC) -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index c4366dc74e01..5c495ad7958a 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -48,8 +48,21 @@ echo "ABCD0123" >"$FW" NAME=$(basename "$FW") +if printf '\000' >"$DIR"/trigger_request; then + echo "$0: empty filename should not succeed" >&2 + exit 1 +fi + +if printf '\000' >"$DIR"/trigger_async_request; then + echo "$0: empty filename should not succeed (async)" >&2 + exit 1 +fi + # Request a firmware that doesn't exist, it should fail. -echo -n "nope-$NAME" >"$DIR"/trigger_request +if echo -n "nope-$NAME" >"$DIR"/trigger_request; then + echo "$0: firmware shouldn't have loaded" >&2 + exit 1 +fi if diff -q "$FW" /dev/test_firmware >/dev/null ; then echo "$0: firmware was not expected to match" >&2 exit 1 @@ -74,4 +87,18 @@ else echo "$0: filesystem loading works" fi +# Try the asynchronous version too +if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then + echo "$0: could not trigger async request" >&2 + exit 1 +fi + +# Verify the contents are what we expect. +if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then + echo "$0: firmware was not loaded (async)" >&2 + exit 1 +else + echo "$0: async filesystem loading works" +fi + exit 0 diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile new file mode 100644 index 000000000000..f5f1a28715ff --- /dev/null +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -0,0 +1,15 @@ +CC := $(CROSS_COMPILE)gcc +CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE +LDFLAGS := $(LDFLAGS) -lm + +TARGETS := msr aperf + +TEST_PROGS := $(TARGETS) run.sh + +.PHONY: all clean +all: $(TARGETS) + +$(TARGETS): $(HEADERS) + +clean: + rm -f $(TARGETS) diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c new file mode 100644 index 000000000000..6046e183f4ad --- /dev/null +++ b/tools/testing/selftests/intel_pstate/aperf.c @@ -0,0 +1,80 @@ +#include <math.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/timeb.h> +#include <sched.h> +#include <errno.h> + +void usage(char *name) { + printf ("Usage: %s cpunum\n", name); +} + +int main(int argc, char **argv) { + int i, cpu, fd; + char msr_file_name[64]; + long long tsc, old_tsc, new_tsc; + long long aperf, old_aperf, new_aperf; + long long mperf, old_mperf, new_mperf; + struct timeb before, after; + long long int start, finish, total; + cpu_set_t cpuset; + + if (argc != 2) { + usage(argv[0]); + return 1; + } + + errno = 0; + cpu = strtol(argv[1], (char **) NULL, 10); + + if (errno) { + usage(argv[0]); + return 1; + } + + sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu); + fd = open(msr_file_name, O_RDONLY); + + if (fd == -1) { + perror("Failed to open"); + return 1; + } + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + if (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset)) { + perror("Failed to set cpu affinity"); + return 1; + } + + ftime(&before); + pread(fd, &old_tsc, sizeof(old_tsc), 0x10); + pread(fd, &old_aperf, sizeof(old_mperf), 0xe7); + pread(fd, &old_mperf, sizeof(old_aperf), 0xe8); + + for (i=0; i<0x8fffffff; i++) { + sqrt(i); + } + + ftime(&after); + pread(fd, &new_tsc, sizeof(new_tsc), 0x10); + pread(fd, &new_aperf, sizeof(new_mperf), 0xe7); + pread(fd, &new_mperf, sizeof(new_aperf), 0xe8); + + tsc = new_tsc-old_tsc; + aperf = new_aperf-old_aperf; + mperf = new_mperf-old_mperf; + + start = before.time*1000 + before.millitm; + finish = after.time*1000 + after.millitm; + total = finish - start; + + printf("runTime: %4.2f\n", 1.0*total/1000); + printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total); + return 0; +} diff --git a/tools/testing/selftests/intel_pstate/msr.c b/tools/testing/selftests/intel_pstate/msr.c new file mode 100644 index 000000000000..abbbfc84d359 --- /dev/null +++ b/tools/testing/selftests/intel_pstate/msr.c @@ -0,0 +1,39 @@ +#include <math.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/timeb.h> +#include <sched.h> +#include <errno.h> + + +int main(int argc, char **argv) { + int cpu, fd; + long long msr; + char msr_file_name[64]; + + if (argc != 2) + return 1; + + errno = 0; + cpu = strtol(argv[1], (char **) NULL, 10); + + if (errno) + return 1; + + sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu); + fd = open(msr_file_name, O_RDONLY); + + if (fd == -1) { + perror("Failed to open"); + return 1; + } + + pread(fd, &msr, sizeof(msr), 0x199); + + printf("msr 0x199: 0x%llx\n", msr); + return 0; +} diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh new file mode 100755 index 000000000000..bdaf37e92684 --- /dev/null +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# +# This test runs on Intel x86 based hardware which support the intel_pstate +# driver. The test checks the frequency settings from the maximum turbo +# state to the minimum supported frequency, in decrements of 100MHz. The +# test runs the aperf.c program to put load on each processor. +# +# The results are displayed in a table which indicate the "Target" state, +# or the requested frequency in MHz, the Actual frequency, as read from +# /proc/cpuinfo, the difference between the Target and Actual frequencies, +# and the value of MSR 0x199 (MSR_IA32_PERF_CTL) which indicates what +# pstate the cpu is in, and the value of +# /sys/devices/system/cpu/intel_pstate/max_perf_pct X maximum turbo state +# +# Notes: In some cases several frequency values may be placed in the +# /tmp/result.X files. This is done on purpose in order to catch cases +# where the pstate driver may not be working at all. There is the case +# where, for example, several "similar" frequencies are in the file: +# +# +#/tmp/result.3100:1:cpu MHz : 2899.980 +#/tmp/result.3100:2:cpu MHz : 2900.000 +#/tmp/result.3100:3:msr 0x199: 0x1e00 +#/tmp/result.3100:4:max_perf_pct 94 +# +# and the test will error out in those cases. The result.X file can be checked +# for consistency and modified to remove the extra MHz values. The result.X +# files can be re-evaluated by setting EVALUATE_ONLY to 1 below. + +EVALUATE_ONLY=0 + +max_cpus=$(($(nproc)-1)) + +# compile programs +gcc -o aperf aperf.c -lm +[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1 +gcc -o msr msr.c -lm +[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1 + +function run_test () { + + file_ext=$1 + for cpu in `seq 0 $max_cpus` + do + echo "launching aperf load on $cpu" + ./aperf $cpu & + done + + echo "sleeping for 5 seconds" + sleep 5 + num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l) + if [ $num_freqs -le 2 ]; then + cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1 + else + cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1 + fi + ./msr 0 >> /tmp/result.$1 + + max_perf_pct=$(cat /sys/devices/system/cpu/intel_pstate/max_perf_pct) + echo "max_perf_pct $max_perf_pct" >> /tmp/result.$1 + + for job in `jobs -p` + do + echo "waiting for job id $job" + wait $job + done +} + +# +# MAIN (ALL UNITS IN MHZ) +# + +# Get the marketing frequency +_mkt_freq=$(cat /proc/cpuinfo | grep -m 1 "model name" | awk '{print $NF}') +_mkt_freq=$(echo $_mkt_freq | tr -d [:alpha:][:punct:]) +mkt_freq=${_mkt_freq}0 + +# Get the ranges from cpupower +_min_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $1 } ') +min_freq=$(($_min_freq / 1000)) +_max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ') +max_freq=$(($_max_freq / 1000)) + + +for freq in `seq $max_freq -100 $min_freq` +do + echo "Setting maximum frequency to $freq" + cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null + [ $EVALUATE_ONLY -eq 0 ] && run_test $freq +done + +echo "==============================================================================" + +echo "The marketing frequency of the cpu is $mkt_freq MHz" +echo "The maximum frequency of the cpu is $max_freq MHz" +echo "The minimum frequency of the cpu is $min_freq MHz" + +cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null + +# make a pretty table +echo "Target Actual Difference MSR(0x199) max_perf_pct" +for freq in `seq $max_freq -100 $min_freq` +do + result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ') + msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ') + max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' ) + if [ $result_freq -eq $freq ]; then + echo " $freq $result_freq 0 $msr $(($max_perf_pct*3300))" + else + echo " $freq $result_freq $(($result_freq-$freq)) $msr $(($max_perf_pct*$max_freq))" + fi +done +exit 0 diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore index b4709ea588c1..6fa673316ac2 100644 --- a/tools/testing/selftests/powerpc/benchmarks/.gitignore +++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore @@ -1 +1,2 @@ gettimeofday +context_switch diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index 5fa48702070d..912445ff7ce7 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -1,4 +1,4 @@ -TEST_PROGS := gettimeofday +TEST_PROGS := gettimeofday context_switch CFLAGS += -O2 @@ -6,6 +6,9 @@ all: $(TEST_PROGS) $(TEST_PROGS): ../harness.c +context_switch: ../utils.c +context_switch: LDLIBS += -lpthread + include ../../lib.mk clean: diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c new file mode 100644 index 000000000000..7b785941adec --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -0,0 +1,466 @@ +/* + * Context switch microbenchmark. + * + * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <getopt.h> +#include <signal.h> +#include <assert.h> +#include <pthread.h> +#include <limits.h> +#include <sys/time.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <linux/futex.h> + +#include "../utils.h" + +static unsigned int timeout = 30; + +static int touch_vdso; +struct timeval tv; + +static int touch_fp = 1; +double fp; + +static int touch_vector = 1; +typedef int v4si __attribute__ ((vector_size (16))); +v4si a, b, c; + +#ifdef __powerpc__ +static int touch_altivec = 1; + +static void __attribute__((__target__("no-vsx"))) altivec_touch_fn(void) +{ + c = a + b; +} +#endif + +static void touch(void) +{ + if (touch_vdso) + gettimeofday(&tv, NULL); + + if (touch_fp) + fp += 0.1; + +#ifdef __powerpc__ + if (touch_altivec) + altivec_touch_fn(); +#endif + + if (touch_vector) + c = a + b; + + asm volatile("# %0 %1 %2": : "r"(&tv), "r"(&fp), "r"(&c)); +} + +static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu) +{ + pthread_t tid; + cpu_set_t cpuset; + pthread_attr_t attr; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + pthread_attr_init(&attr); + + if (pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset)) { + perror("pthread_attr_setaffinity_np"); + exit(1); + } + + if (pthread_create(&tid, &attr, fn, arg)) { + perror("pthread_create"); + exit(1); + } +} + +static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) +{ + int pid; + cpu_set_t cpuset; + + pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + + if (pid) + return; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + perror("sched_setaffinity"); + exit(1); + } + + fn(arg); + + exit(0); +} + +static unsigned long iterations; +static unsigned long iterations_prev; + +static void sigalrm_handler(int junk) +{ + unsigned long i = iterations; + + printf("%ld\n", i - iterations_prev); + iterations_prev = i; + + if (--timeout == 0) + kill(0, SIGUSR1); + + alarm(1); +} + +static void sigusr1_handler(int junk) +{ + exit(0); +} + +struct actions { + void (*setup)(int, int); + void *(*thread1)(void *); + void *(*thread2)(void *); +}; + +#define READ 0 +#define WRITE 1 + +static int pipe_fd1[2]; +static int pipe_fd2[2]; + +static void pipe_setup(int cpu1, int cpu2) +{ + if (pipe(pipe_fd1) || pipe(pipe_fd2)) + exit(1); +} + +static void *pipe_thread1(void *arg) +{ + signal(SIGALRM, sigalrm_handler); + alarm(1); + + while (1) { + assert(read(pipe_fd1[READ], &c, 1) == 1); + touch(); + + assert(write(pipe_fd2[WRITE], &c, 1) == 1); + touch(); + + iterations += 2; + } + + return NULL; +} + +static void *pipe_thread2(void *arg) +{ + while (1) { + assert(write(pipe_fd1[WRITE], &c, 1) == 1); + touch(); + + assert(read(pipe_fd2[READ], &c, 1) == 1); + touch(); + } + + return NULL; +} + +static struct actions pipe_actions = { + .setup = pipe_setup, + .thread1 = pipe_thread1, + .thread2 = pipe_thread2, +}; + +static void yield_setup(int cpu1, int cpu2) +{ + if (cpu1 != cpu2) { + fprintf(stderr, "Both threads must be on the same CPU for yield test\n"); + exit(1); + } +} + +static void *yield_thread1(void *arg) +{ + signal(SIGALRM, sigalrm_handler); + alarm(1); + + while (1) { + sched_yield(); + touch(); + + iterations += 2; + } + + return NULL; +} + +static void *yield_thread2(void *arg) +{ + while (1) { + sched_yield(); + touch(); + } + + return NULL; +} + +static struct actions yield_actions = { + .setup = yield_setup, + .thread1 = yield_thread1, + .thread2 = yield_thread2, +}; + +static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout, + void *addr2, int val3) +{ + return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); +} + +static unsigned long cmpxchg(unsigned long *p, unsigned long expected, + unsigned long desired) +{ + unsigned long exp = expected; + + __atomic_compare_exchange_n(p, &exp, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return exp; +} + +static unsigned long xchg(unsigned long *p, unsigned long val) +{ + return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST); +} + +static int mutex_lock(unsigned long *m) +{ + int c; + + c = cmpxchg(m, 0, 1); + if (!c) + return 0; + + if (c == 1) + c = xchg(m, 2); + + while (c) { + sys_futex(m, FUTEX_WAIT, 2, NULL, NULL, 0); + c = xchg(m, 2); + } + + return 0; +} + +static int mutex_unlock(unsigned long *m) +{ + if (*m == 2) + *m = 0; + else if (xchg(m, 0) == 1) + return 0; + + sys_futex(m, FUTEX_WAKE, 1, NULL, NULL, 0); + + return 0; +} + +static unsigned long *m1, *m2; + +static void futex_setup(int cpu1, int cpu2) +{ + int shmid; + void *shmaddr; + + shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W); + if (shmid < 0) { + perror("shmget"); + exit(1); + } + + shmaddr = shmat(shmid, NULL, 0); + if (shmaddr == (char *)-1) { + perror("shmat"); + shmctl(shmid, IPC_RMID, NULL); + exit(1); + } + + shmctl(shmid, IPC_RMID, NULL); + + m1 = shmaddr; + m2 = shmaddr + sizeof(*m1); + + *m1 = 0; + *m2 = 0; + + mutex_lock(m1); + mutex_lock(m2); +} + +static void *futex_thread1(void *arg) +{ + signal(SIGALRM, sigalrm_handler); + alarm(1); + + while (1) { + mutex_lock(m2); + mutex_unlock(m1); + + iterations += 2; + } + + return NULL; +} + +static void *futex_thread2(void *arg) +{ + while (1) { + mutex_unlock(m2); + mutex_lock(m1); + } + + return NULL; +} + +static struct actions futex_actions = { + .setup = futex_setup, + .thread1 = futex_thread1, + .thread2 = futex_thread2, +}; + +static int processes; + +static struct option options[] = { + { "test", required_argument, 0, 't' }, + { "process", no_argument, &processes, 1 }, + { "timeout", required_argument, 0, 's' }, + { "vdso", no_argument, &touch_vdso, 1 }, + { "no-fp", no_argument, &touch_fp, 0 }, +#ifdef __powerpc__ + { "no-altivec", no_argument, &touch_altivec, 0 }, +#endif + { "no-vector", no_argument, &touch_vector, 0 }, + { 0, }, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: context_switch2 <options> CPU1 CPU2\n\n"); + fprintf(stderr, "\t\t--test=X\tpipe, futex or yield (default)\n"); + fprintf(stderr, "\t\t--process\tUse processes (default threads)\n"); + fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n"); + fprintf(stderr, "\t\t--vdso\t\ttouch VDSO\n"); + fprintf(stderr, "\t\t--fp\t\ttouch FP\n"); +#ifdef __powerpc__ + fprintf(stderr, "\t\t--altivec\ttouch altivec\n"); +#endif + fprintf(stderr, "\t\t--vector\ttouch vector\n"); +} + +int main(int argc, char *argv[]) +{ + signed char c; + struct actions *actions = &yield_actions; + int cpu1; + int cpu2; + static void (*start_fn)(void *(*fn)(void *), void *arg, unsigned long cpu); + + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "", options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 0: + if (options[option_index].flag != 0) + break; + + usage(); + exit(1); + break; + + case 't': + if (!strcmp(optarg, "pipe")) { + actions = &pipe_actions; + } else if (!strcmp(optarg, "yield")) { + actions = &yield_actions; + } else if (!strcmp(optarg, "futex")) { + actions = &futex_actions; + } else { + usage(); + exit(1); + } + break; + + case 's': + timeout = atoi(optarg); + break; + + default: + usage(); + exit(1); + } + } + + if (processes) + start_fn = start_process_on; + else + start_fn = start_thread_on; + + if (((argc - optind) != 2)) { + cpu1 = cpu2 = pick_online_cpu(); + } else { + cpu1 = atoi(argv[optind++]); + cpu2 = atoi(argv[optind++]); + } + + printf("Using %s with ", processes ? "processes" : "threads"); + + if (actions == &pipe_actions) + printf("pipe"); + else if (actions == &yield_actions) + printf("yield"); + else + printf("futex"); + + printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n", + cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no", + touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no"); + + /* Create a new process group so we can signal everyone for exit */ + setpgid(getpid(), getpid()); + + signal(SIGUSR1, sigusr1_handler); + + actions->setup(cpu1, cpu2); + + start_fn(actions->thread1, NULL, cpu1); + start_fn(actions->thread2, NULL, cpu2); + + while (1) + sleep(3600); + + return 0; +} diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c index 8265504de571..08a8b95e3bc1 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -60,14 +60,6 @@ int dscr_inherit_exec(void) else set_dscr(dscr); - /* - * XXX: Force a context switch out so that DSCR - * current value is copied into the thread struct - * which is required for the child to inherit the - * changed value. - */ - sleep(1); - pid = fork(); if (pid == -1) { perror("fork() failed"); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c index 4e414caf7f40..3e5a6d195e9a 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c @@ -40,14 +40,6 @@ int dscr_inherit(void) else set_dscr(dscr); - /* - * XXX: Force a context switch out so that DSCR - * current value is copied into the thread struct - * which is required for the child to inherit the - * changed value. - */ - sleep(1); - pid = fork(); if (pid == -1) { perror("fork() failed"); diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index f7997affd143..52f9be7f61f0 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -116,46 +116,3 @@ int test_harness(int (test_function)(void), char *name) return rc; } - -static char auxv[4096]; - -void *get_auxv_entry(int type) -{ - ElfW(auxv_t) *p; - void *result; - ssize_t num; - int fd; - - fd = open("/proc/self/auxv", O_RDONLY); - if (fd == -1) { - perror("open"); - return NULL; - } - - result = NULL; - - num = read(fd, auxv, sizeof(auxv)); - if (num < 0) { - perror("read"); - goto out; - } - - if (num > sizeof(auxv)) { - printf("Overflowed auxv buffer\n"); - goto out; - } - - p = (ElfW(auxv_t) *)auxv; - - while (p->a_type != AT_NULL) { - if (p->a_type == type) { - result = (void *)p->a_un.a_val; - break; - } - - p++; - } -out: - close(fd); - return result; -} diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index a9099d9f8f39..ac41a7177f2e 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -2,7 +2,7 @@ noarg: $(MAKE) -C ../ TEST_PROGS := count_instructions l3_bank_test per_event_excludes -EXTRA_SOURCES := ../harness.c event.c lib.c +EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c all: $(TEST_PROGS) ebb @@ -12,6 +12,8 @@ $(TEST_PROGS): $(EXTRA_SOURCES) count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES) $(CC) $(CFLAGS) -m64 -o $@ $^ +per_event_excludes: ../utils.c + include ../../lib.mk DEFAULT_RUN_TESTS := $(RUN_TESTS) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 5cdc9dbf2b27..8d2279c4bb4b 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -18,7 +18,8 @@ TEST_PROGS := reg_access_test event_attributes_test cycles_test \ all: $(TEST_PROGS) -$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S +$(TEST_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \ + ebb.c ebb_handler.S trace.c busy_loop.S instruction_count_test: ../loop.S diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c index 9729d9f90218..e67452f1bcff 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c @@ -13,7 +13,6 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> -#include <linux/auxvec.h> #include "trace.h" #include "reg.h" @@ -324,7 +323,7 @@ bool ebb_is_supported(void) { #ifdef PPC_FEATURE2_EBB /* EBB requires at least POWER8 */ - return ((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_EBB); + return have_hwcap2(PPC_FEATURE2_EBB); #else return false; #endif diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c index a07104c2afe6..a361ad3334ce 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.c +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -15,32 +15,6 @@ #include "lib.h" -int pick_online_cpu(void) -{ - cpu_set_t mask; - int cpu; - - CPU_ZERO(&mask); - - if (sched_getaffinity(0, sizeof(mask), &mask)) { - perror("sched_getaffinity"); - return -1; - } - - /* We prefer a primary thread, but skip 0 */ - for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8) - if (CPU_ISSET(cpu, &mask)) - return cpu; - - /* Search for anything, but in reverse */ - for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--) - if (CPU_ISSET(cpu, &mask)) - return cpu; - - printf("No cpus in affinity mask?!\n"); - return -1; -} - int bind_to_cpu(int cpu) { cpu_set_t mask; diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h index ca5d72ae3be6..0213af4ff332 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.h +++ b/tools/testing/selftests/powerpc/pmu/lib.h @@ -19,7 +19,6 @@ union pipe { int fds[2]; }; -extern int pick_online_cpu(void); extern int bind_to_cpu(int cpu); extern int kill_child_and_wait(pid_t child_pid); extern int wait_for_child(pid_t child_pid); diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh new file mode 100755 index 000000000000..83fb253ae3bd --- /dev/null +++ b/tools/testing/selftests/powerpc/scripts/hmi.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# +# Copyright 2015, Daniel Axtens, IBM Corporation +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +# do we have ./getscom, ./putscom? +if [ -x ./getscom ] && [ -x ./putscom ]; then + GETSCOM=./getscom + PUTSCOM=./putscom +elif which getscom > /dev/null; then + GETSCOM=$(which getscom) + PUTSCOM=$(which putscom) +else + cat <<EOF +Can't find getscom/putscom in . or \$PATH. +See https://github.com/open-power/skiboot. +The tool is in external/xscom-utils +EOF + exit 1 +fi + +# We will get 8 HMI events per injection +# todo: deal with things being offline +expected_hmis=8 +COUNT_HMIS() { + dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt' +} + +# massively expand snooze delay, allowing injection on all cores +ppc64_cpu --smt-snooze-delay=1000000000 + +# when we exit, restore it +trap "ppc64_cpu --smt-snooze-delay=100" 0 1 + +# for each chip+core combination +# todo - less fragile parsing +egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | +while read chipcore; do + chip=$(echo "$chipcore"|awk '{print $3}') + core=$(echo "$chipcore"|awk '{print $5}') + fir="0x1${core}013100" + + # verify that Core FIR is zero as expected + if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then + echo "FIR was not zero before injection for chip $chip, core $core. Aborting!" + echo "Result of $GETSCOM -c 0x${chip} $fir:" + $GETSCOM -c 0x${chip} $fir + echo "If you get a -5 error, the core may be in idle state. Try stress-ng." + echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0" + exit 1 + fi + + # keep track of the number of HMIs handled + old_hmis=$(COUNT_HMIS) + + # do injection, adding a marker to dmesg for clarity + echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg + # inject a RegFile recoverable error + if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then + echo "Error injecting. Aborting!" + exit 1 + fi + + # now we want to wait for all the HMIs to be processed + # we expect one per thread on the core + i=0; + new_hmis=$(COUNT_HMIS) + while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do + echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping" + sleep 5; + i=$((i + 1)) + new_hmis=$(COUNT_HMIS) + done + if [ $i = 12 ]; then + echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting." + exit 1 + fi + echo "Processed $expected_hmis events; presumed success. Check dmesg." + echo "" +done diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 2699635d2cd9..7d0f14b8cb2e 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -1,2 +1,5 @@ tm-resched-dscr tm-syscall +tm-signal-msr-resv +tm-signal-stack +tm-vmxcopy diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 4bea62a319dc..737f72c964e6 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,8 +1,8 @@ -TEST_PROGS := tm-resched-dscr tm-syscall +TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack tm-vmxcopy all: $(TEST_PROGS) -$(TEST_PROGS): ../harness.c +$(TEST_PROGS): ../harness.c ../utils.c tm-syscall: tm-syscall-asm.S tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index 42d4c8caad81..8fde93d6021f 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -29,6 +29,7 @@ #include <asm/tm.h> #include "utils.h" +#include "tm.h" #define TBEGIN ".long 0x7C00051D ;" #define TEND ".long 0x7C00055D ;" @@ -42,6 +43,8 @@ int test_body(void) { uint64_t rv, dscr1 = 1, dscr2, texasr; + SKIP_IF(!have_htm()); + printf("Check DSCR TM context switch: "); fflush(stdout); for (;;) { diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c new file mode 100644 index 000000000000..d86653f282b1 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c @@ -0,0 +1,74 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Test the kernel's signal return code to ensure that it doesn't + * crash when both the transactional and suspend MSR bits are set in + * the signal context. + * + * For this test, we send ourselves a SIGUSR1. In the SIGUSR1 handler + * we modify the signal context to set both MSR TM S and T bits (which + * is "reserved" by the PowerISA). When we return from the signal + * handler (implicit sigreturn), the kernel should detect reserved MSR + * value and send us with a SIGSEGV. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <signal.h> +#include <unistd.h> + +#include "utils.h" +#include "tm.h" + +int segv_expected = 0; + +void signal_segv(int signum) +{ + if (segv_expected && (signum == SIGSEGV)) + _exit(0); + _exit(1); +} + +void signal_usr1(int signum, siginfo_t *info, void *uc) +{ + ucontext_t *ucp = uc; + + /* Link tm checkpointed context to normal context */ + ucp->uc_link = ucp; + /* Set all TM bits so that the context is now invalid */ +#ifdef __powerpc64__ + ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32); +#else + ucp->uc_mcontext.regs->gpr[PT_MSR] |= (7ULL); +#endif + /* Should segv on return becuase of invalid context */ + segv_expected = 1; +} + +int tm_signal_msr_resv() +{ + struct sigaction act; + + SKIP_IF(!have_htm()); + + act.sa_sigaction = signal_usr1; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGUSR1, &act, NULL) < 0) { + perror("sigaction sigusr1"); + exit(1); + } + if (signal(SIGSEGV, signal_segv) == SIG_ERR) + exit(1); + + raise(SIGUSR1); + + /* We shouldn't get here as we exit in the segv handler */ + return 1; +} + +int main(void) +{ + return test_harness(tm_signal_msr_resv, "tm_signal_msr_resv"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c new file mode 100644 index 000000000000..e44a238c1d77 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c @@ -0,0 +1,76 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Test the kernel's signal delievery code to ensure that we don't + * trelaim twice in the kernel signal delivery code. This can happen + * if we trigger a signal when in a transaction and the stack pointer + * is bogus. + * + * This test case registers a SEGV handler, sets the stack pointer + * (r1) to NULL, starts a transaction and then generates a SEGV. The + * SEGV should be handled but we exit here as the stack pointer is + * invalid and hance we can't sigreturn. We only need to check that + * this flow doesn't crash the kernel. + */ + +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <stdlib.h> +#include <stdio.h> +#include <signal.h> + +#include "utils.h" +#include "tm.h" + +void signal_segv(int signum) +{ + /* This should never actually run since stack is foobar */ + exit(1); +} + +int tm_signal_stack() +{ + int pid; + + SKIP_IF(!have_htm()); + + pid = fork(); + if (pid < 0) + exit(1); + + if (pid) { /* Parent */ + /* + * It's likely the whole machine will crash here so if + * the child ever exits, we are good. + */ + wait(NULL); + return 0; + } + + /* + * The flow here is: + * 1) register a signal handler (so signal delievery occurs) + * 2) make stack pointer (r1) = NULL + * 3) start transaction + * 4) cause segv + */ + if (signal(SIGSEGV, signal_segv) == SIG_ERR) + exit(1); + asm volatile("li 1, 0 ;" /* stack ptr == NULL */ + "1:" + ".long 0x7C00051D ;" /* tbegin */ + "beq 1b ;" /* retry forever */ + ".long 0x7C0005DD ; ;" /* tsuspend */ + "ld 2, 0(1) ;" /* trigger segv" */ + : : : "memory"); + + /* This should never get here due to above segv */ + return 1; +} + +int main(void) +{ + return test_harness(tm_signal_stack, "tm_signal_stack"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c index e835bf7ec7ae..60560cb20e38 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -13,12 +13,11 @@ #include <unistd.h> #include <sys/syscall.h> #include <asm/tm.h> -#include <asm/cputable.h> -#include <linux/auxvec.h> #include <sys/time.h> #include <stdlib.h> #include "utils.h" +#include "tm.h" extern int getppid_tm_active(void); extern int getppid_tm_suspended(void); @@ -77,16 +76,6 @@ pid_t getppid_tm(bool suspend) exit(-1); } -static inline bool have_htm_nosc(void) -{ -#ifdef PPC_FEATURE2_HTM_NOSC - return ((long)get_auxv_entry(AT_HWCAP2) & PPC_FEATURE2_HTM_NOSC); -#else - printf("PPC_FEATURE2_HTM_NOSC not defined, can't check AT_HWCAP2\n"); - return false; -#endif -} - int tm_syscall(void) { unsigned count = 0; diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c new file mode 100644 index 000000000000..0274de7b11f3 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c @@ -0,0 +1,103 @@ +/* + * Copyright 2015, Michael Neuling, IBM Corp. + * Licensed under GPLv2. + * + * Original: Michael Neuling 4/12/2013 + * Edited: Rashmica Gupta 4/12/2015 + * + * See if the altivec state is leaked out of an aborted transaction due to + * kernel vmx copy loops. + * + * When the transaction aborts, VSR values should rollback to the values + * they held before the transaction commenced. Using VSRs while transaction + * is suspended should not affect the checkpointed values. + * + * (1) write A to a VSR + * (2) start transaction + * (3) suspend transaction + * (4) change the VSR to B + * (5) trigger kernel vmx copy loop + * (6) abort transaction + * (7) check that the VSR value is A + */ + +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include <string.h> +#include <assert.h> + +#include "tm.h" +#include "utils.h" + +int test_vmxcopy() +{ + long double vecin = 1.3; + long double vecout; + unsigned long pgsize = getpagesize(); + int i; + int fd; + int size = pgsize*16; + char tmpfile[] = "/tmp/page_faultXXXXXX"; + char buf[pgsize]; + char *a; + uint64_t aborted = 0; + + SKIP_IF(!have_htm()); + + fd = mkstemp(tmpfile); + assert(fd >= 0); + + memset(buf, 0, pgsize); + for (i = 0; i < size; i += pgsize) + assert(write(fd, buf, pgsize) == pgsize); + + unlink(tmpfile); + + a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + assert(a != MAP_FAILED); + + asm __volatile__( + "lxvd2x 40,0,%[vecinptr];" /* set 40 to initial value*/ + "tbegin.;" + "beq 3f;" + "tsuspend.;" + "xxlxor 40,40,40;" /* set 40 to 0 */ + "std 5, 0(%[map]);" /* cause kernel vmx copy page */ + "tabort. 0;" + "tresume.;" + "tend.;" + "li %[res], 0;" + "b 5f;" + + /* Abort handler */ + "3:;" + "li %[res], 1;" + + "5:;" + "stxvd2x 40,0,%[vecoutptr];" + : [res]"=r"(aborted) + : [vecinptr]"r"(&vecin), + [vecoutptr]"r"(&vecout), + [map]"r"(a) + : "memory", "r0", "r3", "r4", "r5", "r6", "r7"); + + if (aborted && (vecin != vecout)){ + printf("FAILED: vector state leaked on abort %f != %f\n", + (double)vecin, (double)vecout); + return 1; + } + + munmap(a, size); + + close(fd); + + return 0; +} + +int main(void) +{ + return test_harness(test_vmxcopy, "tm_vmxcopy"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h new file mode 100644 index 000000000000..24144b25772c --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -0,0 +1,34 @@ +/* + * Copyright 2015, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#ifndef _SELFTESTS_POWERPC_TM_TM_H +#define _SELFTESTS_POWERPC_TM_TM_H + +#include <stdbool.h> +#include <asm/cputable.h> + +#include "../utils.h" + +static inline bool have_htm(void) +{ +#ifdef PPC_FEATURE2_HTM + return have_hwcap2(PPC_FEATURE2_HTM); +#else + printf("PPC_FEATURE2_HTM not defined, can't check AT_HWCAP2\n"); + return false; +#endif +} + +static inline bool have_htm_nosc(void) +{ +#ifdef PPC_FEATURE2_HTM_NOSC + return have_hwcap2(PPC_FEATURE2_HTM_NOSC); +#else + printf("PPC_FEATURE2_HTM_NOSC not defined, can't check AT_HWCAP2\n"); + return false; +#endif +} + +#endif /* _SELFTESTS_POWERPC_TM_TM_H */ diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c new file mode 100644 index 000000000000..dcf74184bfd0 --- /dev/null +++ b/tools/testing/selftests/powerpc/utils.c @@ -0,0 +1,87 @@ +/* + * Copyright 2013-2015, Michael Ellerman, IBM Corp. + * Licensed under GPLv2. + */ + +#define _GNU_SOURCE /* For CPU_ZERO etc. */ + +#include <elf.h> +#include <errno.h> +#include <fcntl.h> +#include <link.h> +#include <sched.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include "utils.h" + +static char auxv[4096]; + +void *get_auxv_entry(int type) +{ + ElfW(auxv_t) *p; + void *result; + ssize_t num; + int fd; + + fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + perror("open"); + return NULL; + } + + result = NULL; + + num = read(fd, auxv, sizeof(auxv)); + if (num < 0) { + perror("read"); + goto out; + } + + if (num > sizeof(auxv)) { + printf("Overflowed auxv buffer\n"); + goto out; + } + + p = (ElfW(auxv_t) *)auxv; + + while (p->a_type != AT_NULL) { + if (p->a_type == type) { + result = (void *)p->a_un.a_val; + break; + } + + p++; + } +out: + close(fd); + return result; +} + +int pick_online_cpu(void) +{ + cpu_set_t mask; + int cpu; + + CPU_ZERO(&mask); + + if (sched_getaffinity(0, sizeof(mask), &mask)) { + perror("sched_getaffinity"); + return -1; + } + + /* We prefer a primary thread, but skip 0 */ + for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8) + if (CPU_ISSET(cpu, &mask)) + return cpu; + + /* Search for anything, but in reverse */ + for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--) + if (CPU_ISSET(cpu, &mask)) + return cpu; + + printf("No cpus in affinity mask?!\n"); + return -1; +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index b7d41086bb0a..175ac6ad10dd 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -8,6 +8,7 @@ #include <stdint.h> #include <stdbool.h> +#include <linux/auxvec.h> /* Avoid headaches with PRI?64 - just use %ll? always */ typedef unsigned long long u64; @@ -21,6 +22,12 @@ typedef uint8_t u8; int test_harness(int (test_function)(void), char *name); extern void *get_auxv_entry(int type); +int pick_online_cpu(void); + +static inline bool have_hwcap2(unsigned long ftr2) +{ + return ((unsigned long)get_auxv_entry(AT_HWCAP2) & ftr2) == ftr2; +} /* Yes, this is evil */ #define FAIL_IF(x) \ diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore new file mode 100644 index 000000000000..b3e59d41fd82 --- /dev/null +++ b/tools/testing/selftests/ptrace/.gitignore @@ -0,0 +1 @@ +peeksiginfo diff --git a/tools/testing/selftests/seccomp/test_harness.h b/tools/testing/selftests/seccomp/test_harness.h index fb2841601f2f..a786c69c7584 100644 --- a/tools/testing/selftests/seccomp/test_harness.h +++ b/tools/testing/selftests/seccomp/test_harness.h @@ -42,6 +42,7 @@ #define TEST_HARNESS_H_ #define _GNU_SOURCE +#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -370,8 +371,8 @@ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ if (!(__exp _t __seen)) { \ - unsigned long long __exp_print = (unsigned long long)__exp; \ - unsigned long long __seen_print = (unsigned long long)__seen; \ + unsigned long long __exp_print = (uintptr_t)__exp; \ + unsigned long long __seen_print = (uintptr_t)__seen; \ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ #_expected, __exp_print, #_t, \ #_seen, __seen_print); \ diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore index ced998151bc4..68f3fc71ac44 100644 --- a/tools/testing/selftests/timers/.gitignore +++ b/tools/testing/selftests/timers/.gitignore @@ -16,3 +16,4 @@ set-timer-lat skew_consistency threadtest valid-adjtimex +adjtick diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index ff1bb16cec4f..a937a9d26b60 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -2,3 +2,8 @@ hugepage-mmap hugepage-shm map_hugetlb thuge-gen +compaction_test +mlock2-tests +on-fault-limit +transhuge-stress +userfaultfd diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 26b7926bda88..ba34f9e96efd 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -1,15 +1,19 @@ #if defined(__i386__) || defined(__x86_64__) #define barrier() asm volatile("" ::: "memory") -#define mb() __sync_synchronize() - -#define smp_mb() mb() -# define dma_rmb() barrier() -# define dma_wmb() barrier() -# define smp_rmb() barrier() -# define smp_wmb() barrier() +#define virt_mb() __sync_synchronize() +#define virt_rmb() barrier() +#define virt_wmb() barrier() +/* Atomic store should be enough, but gcc generates worse code in that case. */ +#define virt_store_mb(var, value) do { \ + typeof(var) virt_store_mb_value = (value); \ + __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \ + __ATOMIC_SEQ_CST); \ + barrier(); \ +} while (0); /* Weak barriers should be used. If not - it's a bug */ -# define rmb() abort() -# define wmb() abort() +# define mb() abort() +# define rmb() abort() +# define wmb() abort() #else #error Please fill in barrier macros #endif diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h new file mode 100644 index 000000000000..845960e1cbf2 --- /dev/null +++ b/tools/virtio/linux/compiler.h @@ -0,0 +1,9 @@ +#ifndef LINUX_COMPILER_H +#define LINUX_COMPILER_H + +#define WRITE_ONCE(var, val) \ + (*((volatile typeof(val) *)(&(var))) = (val)) + +#define READ_ONCE(var) (*((volatile typeof(val) *)(&(var)))) + +#endif diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 4db7d5691ba7..033849948215 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -8,6 +8,7 @@ #include <assert.h> #include <stdarg.h> +#include <linux/compiler.h> #include <linux/types.h> #include <linux/printk.h> #include <linux/bug.h> diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile new file mode 100644 index 000000000000..feaa64ac4630 --- /dev/null +++ b/tools/virtio/ringtest/Makefile @@ -0,0 +1,22 @@ +all: + +all: ring virtio_ring_0_9 virtio_ring_poll + +CFLAGS += -Wall +CFLAGS += -pthread -O2 -ggdb +LDFLAGS += -pthread -O2 -ggdb + +main.o: main.c main.h +ring.o: ring.c main.h +virtio_ring_0_9.o: virtio_ring_0_9.c main.h +virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h +ring: ring.o main.o +virtio_ring_0_9: virtio_ring_0_9.o main.o +virtio_ring_poll: virtio_ring_poll.o main.o +clean: + -rm main.o + -rm ring.o ring + -rm virtio_ring_0_9.o virtio_ring_0_9 + -rm virtio_ring_poll.o virtio_ring_poll + +.PHONY: all clean diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README new file mode 100644 index 000000000000..34e94c46104f --- /dev/null +++ b/tools/virtio/ringtest/README @@ -0,0 +1,2 @@ +Partial implementation of various ring layouts, useful to tune virtio design. +Uses shared memory heavily. diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c new file mode 100644 index 000000000000..3a5ff438bd62 --- /dev/null +++ b/tools/virtio/ringtest/main.c @@ -0,0 +1,366 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Command line processing and common functions for ring benchmarking. + */ +#define _GNU_SOURCE +#include <getopt.h> +#include <pthread.h> +#include <assert.h> +#include <sched.h> +#include "main.h" +#include <sys/eventfd.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <limits.h> + +int runcycles = 10000000; +int max_outstanding = INT_MAX; +int batch = 1; + +bool do_sleep = false; +bool do_relax = false; +bool do_exit = true; + +unsigned ring_size = 256; + +static int kickfd = -1; +static int callfd = -1; + +void notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = write(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void wait_for_notify(int fd) +{ + unsigned long long v = 1; + int r; + + vmexit(); + r = read(fd, &v, sizeof v); + assert(r == sizeof v); + vmentry(); +} + +void kick(void) +{ + notify(kickfd); +} + +void wait_for_kick(void) +{ + wait_for_notify(kickfd); +} + +void call(void) +{ + notify(callfd); +} + +void wait_for_call(void) +{ + wait_for_notify(callfd); +} + +void set_affinity(const char *arg) +{ + cpu_set_t cpuset; + int ret; + pthread_t self; + long int cpu; + char *endptr; + + if (!arg) + return; + + cpu = strtol(arg, &endptr, 0); + assert(!*endptr); + + assert(cpu >= 0 || cpu < CPU_SETSIZE); + + self = pthread_self(); + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset); + assert(!ret); +} + +static void run_guest(void) +{ + int completed_before; + int completed = 0; + int started = 0; + int bufs = runcycles; + int spurious = 0; + int r; + unsigned len; + void *buf; + int tokick = batch; + + for (;;) { + if (do_sleep) + disable_call(); + completed_before = completed; + do { + if (started < bufs && + started - completed < max_outstanding) { + r = add_inbuf(0, NULL, "Hello, world!"); + if (__builtin_expect(r == 0, true)) { + ++started; + if (!--tokick) { + tokick = batch; + if (do_sleep) + kick_available(); + } + + } + } else + r = -1; + + /* Flush out completed bufs if any */ + if (get_buf(&len, &buf)) { + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + r = 0; + } + } while (r == 0); + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + assert(started <= bufs); + if (do_sleep) { + if (enable_call()) + wait_for_call(); + } else { + poll_used(); + } + } +} + +static void run_host(void) +{ + int completed_before; + int completed = 0; + int spurious = 0; + int bufs = runcycles; + unsigned len; + void *buf; + + for (;;) { + if (do_sleep) { + if (enable_kick()) + wait_for_kick(); + } else { + poll_avail(); + } + if (do_sleep) + disable_kick(); + completed_before = completed; + while (__builtin_expect(use_buf(&len, &buf), true)) { + if (do_sleep) + call_used(); + ++completed; + if (__builtin_expect(completed == bufs, false)) + return; + } + if (completed == completed_before) + ++spurious; + assert(completed <= bufs); + if (completed == bufs) + break; + } +} + +void *start_guest(void *arg) +{ + set_affinity(arg); + run_guest(); + pthread_exit(NULL); +} + +void *start_host(void *arg) +{ + set_affinity(arg); + run_host(); + pthread_exit(NULL); +} + +static const char optstring[] = ""; +static const struct option longopts[] = { + { + .name = "help", + .has_arg = no_argument, + .val = 'h', + }, + { + .name = "host-affinity", + .has_arg = required_argument, + .val = 'H', + }, + { + .name = "guest-affinity", + .has_arg = required_argument, + .val = 'G', + }, + { + .name = "ring-size", + .has_arg = required_argument, + .val = 'R', + }, + { + .name = "run-cycles", + .has_arg = required_argument, + .val = 'C', + }, + { + .name = "outstanding", + .has_arg = required_argument, + .val = 'o', + }, + { + .name = "batch", + .has_arg = required_argument, + .val = 'b', + }, + { + .name = "sleep", + .has_arg = no_argument, + .val = 's', + }, + { + .name = "relax", + .has_arg = no_argument, + .val = 'x', + }, + { + .name = "exit", + .has_arg = no_argument, + .val = 'e', + }, + { + } +}; + +static void help(void) +{ + fprintf(stderr, "Usage: <test> [--help]" + " [--host-affinity H]" + " [--guest-affinity G]" + " [--ring-size R (default: %d)]" + " [--run-cycles C (default: %d)]" + " [--batch b]" + " [--outstanding o]" + " [--sleep]" + " [--relax]" + " [--exit]" + "\n", + ring_size, + runcycles); +} + +int main(int argc, char **argv) +{ + int ret; + pthread_t host, guest; + void *tret; + char *host_arg = NULL; + char *guest_arg = NULL; + char *endptr; + long int c; + + kickfd = eventfd(0, 0); + assert(kickfd >= 0); + callfd = eventfd(0, 0); + assert(callfd >= 0); + + for (;;) { + int o = getopt_long(argc, argv, optstring, longopts, NULL); + switch (o) { + case -1: + goto done; + case '?': + help(); + exit(2); + case 'H': + host_arg = optarg; + break; + case 'G': + guest_arg = optarg; + break; + case 'R': + ring_size = strtol(optarg, &endptr, 0); + assert(ring_size && !(ring_size & (ring_size - 1))); + assert(!*endptr); + break; + case 'C': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + runcycles = c; + break; + case 'o': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + max_outstanding = c; + break; + case 'b': + c = strtol(optarg, &endptr, 0); + assert(!*endptr); + assert(c > 0 && c < INT_MAX); + batch = c; + break; + case 's': + do_sleep = true; + break; + case 'x': + do_relax = true; + break; + case 'e': + do_exit = true; + break; + default: + help(); + exit(4); + break; + } + } + + /* does nothing here, used to make sure all smp APIs compile */ + smp_acquire(); + smp_release(); + smp_mb(); +done: + + if (batch > max_outstanding) + batch = max_outstanding; + + if (optind < argc) { + help(); + exit(4); + } + alloc_ring(); + + ret = pthread_create(&host, NULL, start_host, host_arg); + assert(!ret); + ret = pthread_create(&guest, NULL, start_guest, guest_arg); + assert(!ret); + + ret = pthread_join(guest, &tret); + assert(!ret); + ret = pthread_join(host, &tret); + assert(!ret); + return 0; +} diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h new file mode 100644 index 000000000000..16917acb0ade --- /dev/null +++ b/tools/virtio/ringtest/main.h @@ -0,0 +1,119 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Common macros and functions for ring benchmarking. + */ +#ifndef MAIN_H +#define MAIN_H + +#include <stdbool.h> + +extern bool do_exit; + +#if defined(__x86_64__) || defined(__i386__) +#include "x86intrin.h" + +static inline void wait_cycles(unsigned long long cycles) +{ + unsigned long long t; + + t = __rdtsc(); + while (__rdtsc() - t < cycles) {} +} + +#define VMEXIT_CYCLES 500 +#define VMENTRY_CYCLES 500 + +#else +static inline void wait_cycles(unsigned long long cycles) +{ + _Exit(5); +} +#define VMEXIT_CYCLES 0 +#define VMENTRY_CYCLES 0 +#endif + +static inline void vmexit(void) +{ + if (!do_exit) + return; + + wait_cycles(VMEXIT_CYCLES); +} +static inline void vmentry(void) +{ + if (!do_exit) + return; + + wait_cycles(VMENTRY_CYCLES); +} + +/* implemented by ring */ +void alloc_ring(void); +/* guest side */ +int add_inbuf(unsigned, void *, void *); +void *get_buf(unsigned *, void **); +void disable_call(); +bool enable_call(); +void kick_available(); +void poll_used(); +/* host side */ +void disable_kick(); +bool enable_kick(); +bool use_buf(unsigned *, void **); +void call_used(); +void poll_avail(); + +/* implemented by main */ +extern bool do_sleep; +void kick(void); +void wait_for_kick(void); +void call(void); +void wait_for_call(void); + +extern unsigned ring_size; + +/* Compiler barrier - similar to what Linux uses */ +#define barrier() asm volatile("" ::: "memory") + +/* Is there a portable way to do this? */ +#if defined(__x86_64__) || defined(__i386__) +#define cpu_relax() asm ("rep; nop" ::: "memory") +#else +#define cpu_relax() assert(0) +#endif + +extern bool do_relax; + +static inline void busy_wait(void) +{ + if (do_relax) + cpu_relax(); + else + /* prevent compiler from removing busy loops */ + barrier(); +} + +/* + * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized + * with other __ATOMIC_SEQ_CST calls. + */ +#define smp_mb() __sync_synchronize() + +/* + * This abuses the atomic builtins for thread fences, and + * adds a compiler barrier. + */ +#define smp_release() do { \ + barrier(); \ + __atomic_thread_fence(__ATOMIC_RELEASE); \ +} while (0) + +#define smp_acquire() do { \ + __atomic_thread_fence(__ATOMIC_ACQUIRE); \ + barrier(); \ +} while (0) + +#endif diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c new file mode 100644 index 000000000000..c25c8d248b6b --- /dev/null +++ b/tools/virtio/ringtest/ring.c @@ -0,0 +1,272 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Simple descriptor-based ring. virtio 0.9 compatible event index is used for + * signalling, unconditionally. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +/* Next - Where next entry will be written. + * Prev - "Next" value when event triggered previously. + * Event - Peer requested event after writing this entry. + */ +static inline bool need_event(unsigned short event, + unsigned short next, + unsigned short prev) +{ + return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); +} + +/* Design: + * Guest adds descriptors with unique index values and DESC_HW in flags. + * Host overwrites used descriptors with correct len, index, and DESC_HW clear. + * Flags are always set last. + */ +#define DESC_HW 0x1 + +struct desc { + unsigned short flags; + unsigned short index; + unsigned len; + unsigned long long addr; +}; + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +/* Mostly read */ +struct event { + unsigned short kick_index; + unsigned char reserved0[HOST_GUEST_PADDING - 2]; + unsigned short call_index; + unsigned char reserved1[HOST_GUEST_PADDING - 2]; +}; + +struct data { + void *buf; /* descriptor is writeable, we can't get buf from there */ + void *data; +} *data; + +struct desc *ring; +struct event *event; + +struct guest { + unsigned avail_idx; + unsigned last_used_idx; + unsigned num_free; + unsigned kicked_avail_idx; + unsigned char reserved[HOST_GUEST_PADDING - 12]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned used_idx; + unsigned called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + + ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + event = malloc(sizeof *event); + if (!event) { + perror("Unable to allocate event buffer.\n"); + exit(3); + } + memset(event, 0, sizeof *event); + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; + host.used_idx = 0; + host.called_used_idx = -1; + for (i = 0; i < ring_size; ++i) { + struct desc desc = { + .index = i, + }; + ring[i] = desc; + } + guest.num_free = ring_size; + data = malloc(ring_size * sizeof *data); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } + memset(data, 0, ring_size * sizeof *data); +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head, index; + + if (!guest.num_free) + return -1; + + guest.num_free--; + head = (ring_size - 1) & (guest.avail_idx++); + + /* Start with a write. On MESI architectures this helps + * avoid a shared state with consumer that is polling this descriptor. + */ + ring[head].addr = (unsigned long)(void*)buf; + ring[head].len = len; + /* read below might bypass write above. That is OK because it's just an + * optimization. If this happens, we will get the cache line in a + * shared state which is unfortunate, but probably not worth it to + * add an explicit full barrier to avoid this. + */ + barrier(); + index = ring[head].index; + data[index].buf = buf; + data[index].data = datap; + /* Barrier A (for pairing) */ + smp_release(); + ring[head].flags = DESC_HW; + + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + unsigned index; + void *datap; + + if (ring[head].flags & DESC_HW) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + *lenp = ring[head].len; + index = ring[head].index & (ring_size - 1); + datap = data[index].data; + *bufp = data[index].buf; + data[index].buf = NULL; + data[index].data = NULL; + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +void poll_used(void) +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + + while (ring[head].flags & DESC_HW) + busy_wait(); +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + unsigned head = (ring_size - 1) & guest.last_used_idx; + + event->call_index = guest.last_used_idx; + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); + return ring[head].flags & DESC_HW; +} + +void kick_available(void) +{ + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + if (!need_event(event->kick_index, + guest.avail_idx, + guest.kicked_avail_idx)) + return; + + guest.kicked_avail_idx = guest.avail_idx; + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + unsigned head = (ring_size - 1) & host.used_idx; + + event->kick_index = host.used_idx; + /* Barrier C (for pairing) */ + smp_mb(); + return !(ring[head].flags & DESC_HW); +} + +void poll_avail(void) +{ + unsigned head = (ring_size - 1) & host.used_idx; + + while (!(ring[head].flags & DESC_HW)) + busy_wait(); +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned head = (ring_size - 1) & host.used_idx; + + if (!(ring[head].flags & DESC_HW)) + return false; + + /* make sure length read below is not speculated */ + /* Barrier A (for pairing) */ + smp_acquire(); + + /* simple in-order completion: we don't need + * to touch index at all. This also means we + * can just modify the descriptor in-place. + */ + ring[head].len--; + /* Make sure len is valid before flags. + * Note: alternative is to write len and flags in one access - + * possible on 64 bit architectures but wmb is free on Intel anyway + * so I have no way to test whether it's a gain. + */ + /* Barrier B (for pairing) */ + smp_release(); + ring[head].flags = 0; + host.used_idx++; + return true; +} + +void call_used(void) +{ + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + if (!need_event(event->call_index, + host.used_idx, + host.called_used_idx)) + return; + + host.called_used_idx = host.used_idx; + call(); +} diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh new file mode 100755 index 000000000000..52b0f71ffa8d --- /dev/null +++ b/tools/virtio/ringtest/run-on-all.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +#use last CPU for host. Why not the first? +#many devices tend to use cpu0 by default so +#it tends to be busier +HOST_AFFINITY=$(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n|tail -1) + +#run command on all cpus +for cpu in $(cd /dev/cpu; ls|grep -v '[a-z]'|sort -n); +do + #Don't run guest and host on same CPU + #It actually works ok if using signalling + if + (echo "$@" | grep -e "--sleep" > /dev/null) || \ + test $HOST_AFFINITY '!=' $cpu + then + echo "GUEST AFFINITY $cpu" + "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu + fi +done +echo "NO GUEST AFFINITY" +"$@" --host-affinity $HOST_AFFINITY +echo "NO AFFINITY" +"$@" diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c new file mode 100644 index 000000000000..47c9a1a18d36 --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -0,0 +1,316 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. + * Author: Michael S. Tsirkin <mst@redhat.com> + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Partial implementation of virtio 0.9. event index is used for signalling, + * unconditionally. Design roughly follows linux kernel implementation in order + * to be able to judge its performance. + */ +#define _GNU_SOURCE +#include "main.h" +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include <string.h> +#include <linux/virtio_ring.h> + +struct data { + void *data; +} *data; + +struct vring ring; + +/* enabling the below activates experimental ring polling code + * (which skips index reads on consumer in favor of looking at + * high bits of ring id ^ 0x8000). + */ +/* #ifdef RING_POLL */ + +/* how much padding is needed to avoid false cache sharing */ +#define HOST_GUEST_PADDING 0x80 + +struct guest { + unsigned short avail_idx; + unsigned short last_used_idx; + unsigned short num_free; + unsigned short kicked_avail_idx; + unsigned short free_head; + unsigned char reserved[HOST_GUEST_PADDING - 10]; +} guest; + +struct host { + /* we do not need to track last avail index + * unless we have more than one in flight. + */ + unsigned short used_idx; + unsigned short called_used_idx; + unsigned char reserved[HOST_GUEST_PADDING - 4]; +} host; + +/* implemented by ring */ +void alloc_ring(void) +{ + int ret; + int i; + void *p; + + ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000)); + if (ret) { + perror("Unable to allocate ring buffer.\n"); + exit(3); + } + memset(p, 0, vring_size(ring_size, 0x1000)); + vring_init(&ring, ring_size, p, 0x1000); + + guest.avail_idx = 0; + guest.kicked_avail_idx = -1; + guest.last_used_idx = 0; + /* Put everything in free lists. */ + guest.free_head = 0; + for (i = 0; i < ring_size - 1; i++) + ring.desc[i].next = i + 1; + host.used_idx = 0; + host.called_used_idx = -1; + guest.num_free = ring_size; + data = malloc(ring_size * sizeof *data); + if (!data) { + perror("Unable to allocate data buffer.\n"); + exit(3); + } + memset(data, 0, ring_size * sizeof *data); +} + +/* guest side */ +int add_inbuf(unsigned len, void *buf, void *datap) +{ + unsigned head, avail; + struct vring_desc *desc; + + if (!guest.num_free) + return -1; + + head = guest.free_head; + guest.num_free--; + + desc = ring.desc; + desc[head].flags = VRING_DESC_F_NEXT; + desc[head].addr = (unsigned long)(void *)buf; + desc[head].len = len; + /* We do it like this to simulate the way + * we'd have to flip it if we had multiple + * descriptors. + */ + desc[head].flags &= ~VRING_DESC_F_NEXT; + guest.free_head = desc[head].next; + + data[head].data = datap; + +#ifdef RING_POLL + /* Barrier A (for pairing) */ + smp_release(); + avail = guest.avail_idx++; + ring.avail->ring[avail & (ring_size - 1)] = + (head | (avail & ~(ring_size - 1))) ^ 0x8000; +#else + avail = (ring_size - 1) & (guest.avail_idx++); + ring.avail->ring[avail] = head; + /* Barrier A (for pairing) */ + smp_release(); +#endif + ring.avail->idx = guest.avail_idx; + return 0; +} + +void *get_buf(unsigned *lenp, void **bufp) +{ + unsigned head; + unsigned index; + void *datap; + +#ifdef RING_POLL + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + index &= ring_size - 1; +#else + if (ring.used->idx == guest.last_used_idx) + return NULL; + /* Barrier B (for pairing) */ + smp_acquire(); + head = (ring_size - 1) & guest.last_used_idx; + index = ring.used->ring[head].id; +#endif + *lenp = ring.used->ring[head].len; + datap = data[index].data; + *bufp = (void*)(unsigned long)ring.desc[index].addr; + data[index].data = NULL; + ring.desc[index].next = guest.free_head; + guest.free_head = index; + guest.num_free++; + guest.last_used_idx++; + return datap; +} + +void poll_used(void) +{ +#ifdef RING_POLL + unsigned head = (ring_size - 1) & guest.last_used_idx; + + for (;;) { + unsigned index = ring.used->ring[head].id; + + if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) + busy_wait(); + else + break; + } +#else + unsigned head = guest.last_used_idx; + + while (ring.used->idx == head) + busy_wait(); +#endif +} + +void disable_call() +{ + /* Doing nothing to disable calls might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_call() +{ + unsigned short last_used_idx; + + vring_used_event(&ring) = (last_used_idx = guest.last_used_idx); + /* Flush call index write */ + /* Barrier D (for pairing) */ + smp_mb(); +#ifdef RING_POLL + { + unsigned short head = last_used_idx & (ring_size - 1); + unsigned index = ring.used->ring[head].id; + + return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); + } +#else + return ring.used->idx == last_used_idx; +#endif +} + +void kick_available(void) +{ + /* Flush in previous flags write */ + /* Barrier C (for pairing) */ + smp_mb(); + if (!vring_need_event(vring_avail_event(&ring), + guest.avail_idx, + guest.kicked_avail_idx)) + return; + + guest.kicked_avail_idx = guest.avail_idx; + kick(); +} + +/* host side */ +void disable_kick() +{ + /* Doing nothing to disable kicks might cause + * extra interrupts, but reduces the number of cache misses. + */ +} + +bool enable_kick() +{ + unsigned head = host.used_idx; + + vring_avail_event(&ring) = head; + /* Barrier C (for pairing) */ + smp_mb(); +#ifdef RING_POLL + { + unsigned index = ring.avail->ring[head & (ring_size - 1)]; + + return (index ^ head ^ 0x8000) & ~(ring_size - 1); + } +#else + return head == ring.avail->idx; +#endif +} + +void poll_avail(void) +{ + unsigned head = host.used_idx; +#ifdef RING_POLL + for (;;) { + unsigned index = ring.avail->ring[head & (ring_size - 1)]; + if ((index ^ head ^ 0x8000) & ~(ring_size - 1)) + busy_wait(); + else + break; + } +#else + while (ring.avail->idx == head) + busy_wait(); +#endif +} + +bool use_buf(unsigned *lenp, void **bufp) +{ + unsigned used_idx = host.used_idx; + struct vring_desc *desc; + unsigned head; + +#ifdef RING_POLL + head = ring.avail->ring[used_idx & (ring_size - 1)]; + if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1)) + return false; + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; + desc = &ring.desc[head & (ring_size - 1)]; +#else + if (used_idx == ring.avail->idx) + return false; + + /* Barrier A (for pairing) */ + smp_acquire(); + + used_idx &= ring_size - 1; + head = ring.avail->ring[used_idx]; + desc = &ring.desc[head]; +#endif + + *lenp = desc->len; + *bufp = (void *)(unsigned long)desc->addr; + + /* now update used ring */ + ring.used->ring[used_idx].id = head; + ring.used->ring[used_idx].len = desc->len - 1; + /* Barrier B (for pairing) */ + smp_release(); + host.used_idx++; + ring.used->idx = host.used_idx; + + return true; +} + +void call_used(void) +{ + /* Flush in previous flags write */ + /* Barrier D (for pairing) */ + smp_mb(); + if (!vring_need_event(vring_used_event(&ring), + host.used_idx, + host.called_used_idx)) + return; + + host.called_used_idx = host.used_idx; + call(); +} diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c new file mode 100644 index 000000000000..84fc2c557aaa --- /dev/null +++ b/tools/virtio/ringtest/virtio_ring_poll.c @@ -0,0 +1,2 @@ +#define RING_POLL 1 +#include "virtio_ring_0_9.c" |