diff options
Diffstat (limited to 'tools/testing/selftests')
77 files changed, 3456 insertions, 403 deletions
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 242635d79035..c9fa141ebdcc 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -417,6 +417,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(20); + /* Buffer by byte tests */ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), "Check buffer correctness by byte with sync err mode and mmap memory\n"); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 97bebdecd29e..43bd94f853ba 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -163,6 +163,9 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler); mte_register_signal(SIGBUS, mte_default_handler); + /* Set test plan */ + ksft_set_plan(12); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index bc41ae630c86..3b23c4d61d38 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -140,6 +140,10 @@ int main(int argc, char *argv[]) /* Register signal handlers */ mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + + /* Set test plan */ + ksft_set_plan(4); + /* Enable KSM */ mte_ksm_setup(); diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 33b13b86199b..a04b12c21ac9 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -205,7 +205,11 @@ int main(int argc, char *argv[]) mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(22); + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index 94d245a0ed56..deaef1f61076 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -170,6 +170,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), "Check an included tag value with sync mode\n"); evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 594e98e76880..4bfa80f2a8c3 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -92,9 +92,13 @@ int main(int argc, char *argv[]) err = mte_default_setup(); if (err) return err; + /* Register signal handlers */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c new file mode 100644 index 000000000000..14a31109dd0e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ + +#include <test_progs.h> +#include "test_map_init.skel.h" + +#define TEST_VALUE 0x1234 +#define FILL_VALUE 0xdeadbeef + +static int nr_cpus; +static int duration; + +typedef unsigned long long map_key_t; +typedef unsigned long long map_value_t; +typedef struct { + map_value_t v; /* padding */ +} __bpf_percpu_val_align pcpu_map_value_t; + + +static int map_populate(int map_fd, int num) +{ + pcpu_map_value_t value[nr_cpus]; + int i, err; + map_key_t key; + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value, i) = FILL_VALUE; + + for (key = 1; key <= num; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz, + int *map_fd, int populate) +{ + struct test_map_init *skel; + int err; + + skel = test_map_init__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hashmap1, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto error; + + err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto error; + + err = test_map_init__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto error; + + *map_fd = bpf_map__fd(skel->maps.hashmap1); + if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n")) + goto error; + + err = map_populate(*map_fd, populate); + if (!ASSERT_OK(err, "map_populate")) + goto error_map; + + return skel; + +error_map: + close(*map_fd); +error: + test_map_init__destroy(skel); + return NULL; +} + +/* executes bpf program that updates map with key, value */ +static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key, + map_value_t value) +{ + struct test_map_init__bss *bss; + + bss = skel->bss; + + bss->inKey = key; + bss->inValue = value; + bss->inPid = getpid(); + + if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach")) + return -1; + + /* Let tracepoint trigger */ + syscall(__NR_getpgid); + + test_map_init__detach(skel); + + return 0; +} + +static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected) +{ + int i, nzCnt = 0; + map_value_t val; + + for (i = 0; i < nr_cpus; i++) { + val = bpf_percpu(value, i); + if (val) { + if (CHECK(val != expected, "map value", + "unexpected for cpu %d: 0x%llx\n", i, val)) + return -1; + nzCnt++; + } + } + + if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n", + nzCnt)) + return -1; + + return 0; +} + +/* Add key=1 elem with values set for all CPUs + * Delete elem key=1 + * Run bpf prog that inserts new key=1 elem with value=0x1234 + * (bpf prog can only set value for current CPU) + * Lookup Key=1 and check value is as expected for all CPUs: + * value set by bpf prog for one CPU, 0 for all others + */ +static void test_pcpu_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* max 1 elem in map so insertion is forced to reuse freed entry */ + skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* delete element so the entry can be re-used*/ + key = 1; + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "bpf_map_delete_elem")) + goto cleanup; + + /* run bpf prog that inserts new elem, re-using the slot just freed */ + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=1 was re-created by bpf prog */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +/* Add key=1 and key=2 elems with values set for all CPUs + * Run bpf prog that inserts new key=3 elem + * (only for current cpu; other cpus should have initial value = 0) + * Lookup Key=1 and check value is as expected for all CPUs + */ +static void test_pcpu_lru_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* Set up LRU map with 2 elements, values filled for all CPUs. + * With these 2 elements, the LRU map is full + */ + skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* run bpf prog that inserts new key=3 element, re-using LRU slot */ + key = 3; + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=3 replaced one of earlier elements */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +void test_map_init(void) +{ + nr_cpus = bpf_num_possible_cpus(); + if (nr_cpus <= 1) { + printf("%s:SKIP: >1 cpu needed for this test\n", __func__); + test__skip(); + return; + } + + if (test__start_subtest("pcpu_map_init")) + test_pcpu_map_init(); + if (test__start_subtest("pcpu_lru_map_init")) + test_pcpu_lru_map_init(); +} diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 00578311a423..30982a7e4d0f 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node) } } -int pids_cgrp_id = 1; +extern bool CONFIG_CGROUP_PIDS __kconfig __weak; +enum cgroup_subsys_id___local { + pids_cgrp_id___local = 123, /* value doesn't matter */ +}; static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, struct task_struct* task, @@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); - if (ENABLE_CGROUP_V1_RESOLVER) { + if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { + int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, + pids_cgrp_id___local); #ifdef UNROLL #pragma unroll #endif @@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, cgroups, subsys[i]); if (subsys != NULL) { int subsys_id = BPF_CORE_READ(subsys, ss, id); - if (subsys_id == pids_cgrp_id) { + if (subsys_id == cgrp_id) { proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); break; diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c new file mode 100644 index 000000000000..c89d28ead673 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_init.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u64 inKey = 0; +__u64 inValue = 0; +__u32 inPid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hashmap1 SEC(".maps"); + + +SEC("tp/syscalls/sys_enter_getpgid") +int sysenter_getpgid(const void *ctx) +{ + /* Just do it for once, when called from our own test prog. This + * ensures the map value is only updated for a single CPU. + */ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid == inPid) + bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 55bd387ce7ec..52d3f0364bda 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore) test_clone3_supported(); EXPECT_EQ(getuid(), 0) - XFAIL(return, "Skipping all tests as non-root\n"); + SKIP(return, "Skipping all tests as non-root"); memset(&set_tid, 0, sizeof(set_tid)); diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c99b98b0d461..575b391ddc78 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -44,7 +44,7 @@ TEST(close_range) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -52,7 +52,7 @@ TEST(close_range) EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); } EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); @@ -108,7 +108,7 @@ TEST(close_range_unshare) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -197,7 +197,7 @@ TEST(close_range_unshare_capped) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh new file mode 100755 index 000000000000..be0c1b5ee6b8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the nexthop offload API. It makes use of netdevsim +# which registers a listener to the nexthop notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + nexthop_single_add_test + nexthop_single_add_err_test + nexthop_group_add_test + nexthop_group_add_err_test + nexthop_group_replace_test + nexthop_group_replace_err_test + nexthop_single_replace_test + nexthop_single_replace_err_test + nexthop_single_in_group_replace_test + nexthop_single_in_group_replace_err_test + nexthop_single_in_group_delete_test + nexthop_single_in_group_delete_err_test + nexthop_replay_test + nexthop_replay_err_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +nexthop_check() +{ + local nharg="$1"; shift + local expected="$1"; shift + + out=$($IP nexthop show ${nharg} | sed -e 's/ *$//') + if [[ "$out" != "$expected" ]]; then + return 1 + fi + + return 0 +} + +nexthop_resource_check() +{ + local expected_occ=$1; shift + + occ=$($DEVLINK -jp resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="nexthops") | .["occ"]') + + if [ $expected_occ -ne $occ ]; then + return 1 + fi + + return 0 +} + +nexthop_resource_set() +{ + local size=$1; shift + + $DEVLINK resource set $DEVLINK_DEV path nexthops size $size + $DEVLINK dev reload $DEVLINK_DEV +} + +nexthop_single_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 1 + nexthop_resource_check 0 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Single nexthop add and delete" +} + +nexthop_single_add_err_test() +{ + RET=0 + + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop addition succeeded when should fail" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,20/2,39 + nexthop_check "id 10" "id 10 group 1,20/2,39 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_resource_check 61 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 + nexthop_check "id 10" "id 10 group 1/2/3 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Nexthop group replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_err_test() +{ + RET=0 + + # This is supposed to cause the replace to fail because the new nexthop + # is programmed before deleting the replaced one. + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop replace succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Single nexthop replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_delete_err_test() +{ + RET=0 + + # First, nexthop 1 will be deleted, which will reduce the occupancy to + # 5. Afterwards, a replace notification will be sent for nexthop group + # 10 with only two nexthops. Since the new group is allocated before + # the old is deleted, the replacement will fail as it will result in an + # occupancy of 7. + nexthop_resource_set 6 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 + + $IP nexthop del id 1 + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_replay_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after reload" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop replay" + + $IP nexthop flush &> /dev/null +} + +nexthop_replay_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + # Reduce size of nexthop resource so that reload will fail. + $DEVLINK resource set $DEVLINK_DEV path nexthops size 3 + $DEVLINK dev reload $DEVLINK_DEV &> /dev/null + check_fail $? "Reload succeeded when should fail" + + $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999 + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + log_test "Nexthop replay failure" + + $IP nexthop flush &> /dev/null +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + IP="ip -netns testns1" + DEVLINK="devlink -N testns1" + + $IP link add name dummy1 up type dummy + $IP address add 192.0.2.1/24 dev dummy1 + + set +e +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 1d27f52c61e6..477cbb042f5b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); EXPECT_EQ(ret, 0) { if (errno == ENODEV) - XFAIL(goto out, "binderfs missing"); + SKIP(goto out, "binderfs missing"); TH_LOG("%s - Failed to mount binderfs", strerror(errno)); goto rmdir; } @@ -475,10 +475,10 @@ TEST(binderfs_stress) TEST(binderfs_test_privileged) { if (geteuid() != 0) - XFAIL(return, "Tests are not run as root. Skipping privileged tests"); + SKIP(return, "Tests are not run as root. Skipping privileged tests"); if (__do_binderfs_test(_metadata)) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); } TEST(binderfs_test_unprivileged) @@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged) ret = wait_for_pid(pid); if (ret) { if (ret == 2) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); ASSERT_EQ(ret, 0) { TH_LOG("wait_for_pid() failed"); } diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index d979ff14775a..8f82f99f7748 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3282,4 +3282,99 @@ TEST(epoll60) close(ctx.epfd); } +struct epoll61_ctx { + int epfd; + int evfd; +}; + +static void *epoll61_write_eventfd(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + int64_t l = 1; + + usleep(10950); + write(ctx->evfd, &l, sizeof(l)); + return NULL; +} + +static void *epoll61_epoll_with_timeout(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + int n; + + n = epoll_wait(ctx->epfd, events, 1, 11); + /* + * If epoll returned the eventfd, write on the eventfd to wake up the + * blocking poller. + */ + if (n == 1) { + int64_t l = 1; + + write(ctx->evfd, &l, sizeof(l)); + } + return NULL; +} + +static void *epoll61_blocking_epoll(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + + epoll_wait(ctx->epfd, events, 1, -1); + return NULL; +} + +TEST(epoll61) +{ + struct epoll61_ctx ctx; + struct epoll_event ev; + int i, r; + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + ctx.evfd = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd, 0); + + ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP; + ev.data.ptr = NULL; + r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev); + ASSERT_EQ(r, 0); + + /* + * We are testing a race. Repeat the test case 1000 times to make it + * more likely to fail in case of a bug. + */ + for (i = 0; i < 1000; i++) { + pthread_t threads[3]; + int n; + + /* + * Start 3 threads: + * Thread 1 sleeps for 10.9ms and writes to the evenfd. + * Thread 2 calls epoll with a timeout of 11ms. + * Thread 3 calls epoll with a timeout of -1. + * + * The eventfd write by Thread 1 should either wakeup Thread 2 + * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the + * eventfd to wake up Thread 3. + * + * If no events are missed, all three threads should eventually + * be joinable. + */ + ASSERT_EQ(pthread_create(&threads[0], NULL, + epoll61_write_eventfd, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[1], NULL, + epoll61_epoll_with_timeout, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[2], NULL, + epoll61_blocking_epoll, &ctx), 0); + + for (n = 0; n < ARRAY_SIZE(threads); ++n) + ASSERT_EQ(pthread_join(threads[n], NULL), 0); + } + + close(ctx.epfd); + close(ctx.evfd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 3bcd4c3624ee..b4da41d126d5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index 438961971b7e..3a0e2885fff5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index a8603bd23e0d..d3e138e8377f 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index acb17ce543d2..80541964b927 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_notrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 9f0a9687c773..2f7211254529 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 98305d76bd04..191d116b7883 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo kernel_clone:stacktrace >> set_ftrace_filter +echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter -grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter +grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index c5dec55b7d95..a6fac927ee82 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -133,6 +133,13 @@ yield() { ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1 } +# The fork function in the kernel was renamed from "_do_fork" to +# "kernel_fork". As older tests should still work with older kernels +# as well as newer kernels, check which version of fork is used on this +# kernel so that the tests can use the fork function for the running kernel. +FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then + echo kernel_clone; else echo '_do_fork'; fi)` + # Since probe event command may include backslash, explicitly use printf "%s" # to NOT interpret it. ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index 9737cd0578a7..2428a3ed78c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index f9a40af76888..010a8b1d6c1d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index eb543d3cfe5f..a96a1dc7014f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events +echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep 'kernel_clone' | \ +grep testprobe trace | grep "$FUNCTION_FORK" | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 4e5b63be51c9..a053ee2e7d77 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events +echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a1d70588ab21..84285a6f60b0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index bd25dd0ba0d0..717130ed4feb 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 91fcce1c241c..25b7708eb559 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index a30a9c07290d..d25d01a19778 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported :;: "user-memory access syntax and ustring working on user memory";: echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ > kprobe_events +echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \ + >> kprobe_events grep myevent kprobe_events | \ grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' echo 1 > events/kprobes/myevent/enable +echo 1 > events/kprobes/myevent2/enable echo > /dev/null echo 0 > events/kprobes/myevent/enable +echo 0 > events/kprobes/myevent2/enable grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"' diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 0d179094191f..5556292601a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo kernel_clone > set_ftrace_filter -echo 'p:testprobe kernel_clone' > kprobe_events +echo $FUNCTION_FORK > set_ftrace_filter +echo "p:testprobe $FUNCTION_FORK" > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 45d90b6c763d..f0d5b7777ed7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=kernel_clone +SYM1=$FUNCTION_FORK SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 1b5550ef8a9b..fa928b431555 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,15 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent kernel_clone' > kprobe_events +echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE +printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events +check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE +check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE fi # %return suffix errors diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 7ae492c204a4..197cc2afd404 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 kernel_clone $retval' > kprobe_events +echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- kernel_clone' +cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK" echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index c4093fc1a773..98166fa3eb91 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent kernel_clone > kprobe_events +echo "p:myevent $FUNCTION_FORK" > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..edce85420d19 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -126,7 +126,7 @@ snprintf(_metadata->results->reason, \ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \ if (TH_LOG_ENABLED) { \ - fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ + fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ _metadata->passed = 1; \ @@ -432,7 +432,7 @@ */ /** - * ASSERT_EQ(expected, seen) + * ASSERT_EQ() * * @expected: expected value * @seen: measured value @@ -443,7 +443,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 1) /** - * ASSERT_NE(expected, seen) + * ASSERT_NE() * * @expected: expected value * @seen: measured value @@ -454,7 +454,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 1) /** - * ASSERT_LT(expected, seen) + * ASSERT_LT() * * @expected: expected value * @seen: measured value @@ -465,7 +465,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 1) /** - * ASSERT_LE(expected, seen) + * ASSERT_LE() * * @expected: expected value * @seen: measured value @@ -476,7 +476,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 1) /** - * ASSERT_GT(expected, seen) + * ASSERT_GT() * * @expected: expected value * @seen: measured value @@ -487,7 +487,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 1) /** - * ASSERT_GE(expected, seen) + * ASSERT_GE() * * @expected: expected value * @seen: measured value @@ -498,7 +498,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 1) /** - * ASSERT_NULL(seen) + * ASSERT_NULL() * * @seen: measured value * @@ -508,7 +508,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** - * ASSERT_TRUE(seen) + * ASSERT_TRUE() * * @seen: measured value * @@ -518,7 +518,7 @@ __EXPECT(0, "0", seen, #seen, !=, 1) /** - * ASSERT_FALSE(seen) + * ASSERT_FALSE() * * @seen: measured value * @@ -528,7 +528,7 @@ __EXPECT(0, "0", seen, #seen, ==, 1) /** - * ASSERT_STREQ(expected, seen) + * ASSERT_STREQ() * * @expected: expected value * @seen: measured value @@ -539,7 +539,7 @@ __EXPECT_STR(expected, seen, ==, 1) /** - * ASSERT_STRNE(expected, seen) + * ASSERT_STRNE() * * @expected: expected value * @seen: measured value @@ -550,7 +550,7 @@ __EXPECT_STR(expected, seen, !=, 1) /** - * EXPECT_EQ(expected, seen) + * EXPECT_EQ() * * @expected: expected value * @seen: measured value @@ -561,7 +561,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 0) /** - * EXPECT_NE(expected, seen) + * EXPECT_NE() * * @expected: expected value * @seen: measured value @@ -572,7 +572,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 0) /** - * EXPECT_LT(expected, seen) + * EXPECT_LT() * * @expected: expected value * @seen: measured value @@ -583,7 +583,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 0) /** - * EXPECT_LE(expected, seen) + * EXPECT_LE() * * @expected: expected value * @seen: measured value @@ -594,7 +594,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 0) /** - * EXPECT_GT(expected, seen) + * EXPECT_GT() * * @expected: expected value * @seen: measured value @@ -605,7 +605,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 0) /** - * EXPECT_GE(expected, seen) + * EXPECT_GE() * * @expected: expected value * @seen: measured value @@ -616,7 +616,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 0) /** - * EXPECT_NULL(seen) + * EXPECT_NULL() * * @seen: measured value * @@ -626,7 +626,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** - * EXPECT_TRUE(seen) + * EXPECT_TRUE() * * @seen: measured value * @@ -636,7 +636,7 @@ __EXPECT(0, "0", seen, #seen, !=, 0) /** - * EXPECT_FALSE(seen) + * EXPECT_FALSE() * * @seen: measured value * @@ -646,7 +646,7 @@ __EXPECT(0, "0", seen, #seen, ==, 0) /** - * EXPECT_STREQ(expected, seen) + * EXPECT_STREQ() * * @expected: expected value * @seen: measured value @@ -657,7 +657,7 @@ __EXPECT_STR(expected, seen, ==, 0) /** - * EXPECT_STRNE(expected, seen) + * EXPECT_STRNE() * * @expected: expected value * @seen: measured value diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 307ceaadbbb9..7a2c242b7152 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,10 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/get-reg-list +/aarch64/get-reg-list-sve /s390x/memop /s390x/resets /s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/kvm_pv_test /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test @@ -15,6 +18,7 @@ /x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test +/x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test /x86_64/vmx_set_nested_state_test @@ -23,6 +27,7 @@ /clear_dirty_log_test /demand_paging_test /dirty_log_test +/dirty_log_perf_test /kvm_create_max_vcpus /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 7ebe71fbca53..3d14ef77755e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,13 +34,14 @@ ifeq ($(ARCH),s390) endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test @@ -49,6 +50,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test @@ -57,14 +59,15 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test -TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_aarch64 += clear_dirty_log_test +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus @@ -110,14 +113,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option) include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a -LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* + +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) -$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) $(AR) crs $@ $^ x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c new file mode 100644 index 000000000000..efba76682b4b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define REG_LIST_SVE +#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c new file mode 100644 index 000000000000..33218a395d9f --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -0,0 +1,841 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * Note, the blessed list should be created from the oldest possible + * kernel. We can't go older than v4.15, though, because that's the first + * release to expose the ID system registers in KVM_GET_REG_LIST, see + * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features + * from guests"). Also, one must use the --core-reg-fixup command line + * option when running on an older kernel that doesn't include df205b5c6328 + * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +#ifdef REG_LIST_SVE +#define reg_list_sve() (true) +#else +#define reg_list_sve() (false) +#endif + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + + +static struct kvm_reg_list *reg_list; + +static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; +static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; +static __u64 *blessed_reg, blessed_n; + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +static const char *str_with_index(const char *template, __u64 index) +{ + char *str, *p; + int n; + + str = strdup(template); + p = strstr(str, "##"); + n = sprintf(p, "%lld", index); + strcat(p + n, strstr(template, "##") + 2); + + return (const char *)str; +} + +#define CORE_REGS_XX_NR_WORDS 2 +#define CORE_SPSR_XX_NR_WORDS 2 +#define CORE_FPREGS_XX_NR_WORDS 4 + +static const char *core_id_to_str(__u64 id) +{ + __u64 core_off = id & ~REG_MASK, idx; + + /* + * core_off is the offset into struct kvm_regs + */ + switch (core_off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; + TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + case KVM_REG_ARM_CORE_REG(regs.sp): + return "KVM_REG_ARM_CORE_REG(regs.sp)"; + case KVM_REG_ARM_CORE_REG(regs.pc): + return "KVM_REG_ARM_CORE_REG(regs.pc)"; + case KVM_REG_ARM_CORE_REG(regs.pstate): + return "KVM_REG_ARM_CORE_REG(regs.pstate)"; + case KVM_REG_ARM_CORE_REG(sp_el1): + return "KVM_REG_ARM_CORE_REG(sp_el1)"; + case KVM_REG_ARM_CORE_REG(elr_el1): + return "KVM_REG_ARM_CORE_REG(elr_el1)"; + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; + TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; + TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; + } + + TEST_FAIL("Unknown core reg id: 0x%llx", id); + return NULL; +} + +static const char *sve_id_to_str(__u64 id) +{ + __u64 sve_off, n, i; + + if (id == KVM_REG_ARM64_SVE_VLS) + return "KVM_REG_ARM64_SVE_VLS"; + + sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); + i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); + + TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + + switch (sve_off) { + case KVM_REG_ARM64_SVE_ZREG_BASE ... + KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), + "Unexpected bits set in SVE ZREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + case KVM_REG_ARM64_SVE_PREG_BASE ... + KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), + "Unexpected bits set in SVE PREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + case KVM_REG_ARM64_SVE_FFR_BASE: + TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), + "Unexpected bits set in SVE FFR id: 0x%llx", id); + return "KVM_REG_ARM64_SVE_FFR(0)"; + } + + return NULL; +} + +static void print_reg(__u64 id) +{ + unsigned op0, op1, crn, crm, op2; + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, + "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U8: + reg_size = "KVM_REG_SIZE_U8"; + break; + case KVM_REG_SIZE_U16: + reg_size = "KVM_REG_SIZE_U16"; + break; + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + case KVM_REG_SIZE_U512: + reg_size = "KVM_REG_SIZE_U512"; + break; + case KVM_REG_SIZE_U1024: + reg_size = "KVM_REG_SIZE_U1024"; + break; + case KVM_REG_SIZE_U2048: + reg_size = "KVM_REG_SIZE_U2048"; + break; + default: + TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + break; + case KVM_REG_ARM_DEMUX: + TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), + "Unexpected bits set in DEMUX reg id: 0x%llx", id); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", + reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); + break; + case KVM_REG_ARM64_SYSREG: + op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; + op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; + crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; + crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; + op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; + TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), + "Unexpected bits set in SYSREG reg id: 0x%llx", id); + printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); + break; + case KVM_REG_ARM_FW: + TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), + "Unexpected bits set in FW reg id: 0x%llx", id); + printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM64_SVE: + if (reg_list_sve()) + printf("\t%s,\n", sve_id_to_str(id)); + else + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + break; + default: + TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + } +} + +/* + * Older kernels listed each 32-bit word of CORE registers separately. + * For 64 and 128-bit registers we need to ignore the extra words. We + * also need to fixup the sizes, because the older kernels stated all + * registers were 64-bit, even when they weren't. + */ +static void core_reg_fixup(void) +{ + struct kvm_reg_list *tmp; + __u64 id, core_off; + int i; + + tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); + + for (i = 0; i < reg_list->n; ++i) { + id = reg_list->reg[i]; + + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { + tmp->reg[tmp->n++] = id; + continue; + } + + core_off = id & ~REG_MASK; + + switch (core_off) { + case 0x52: case 0xd2: case 0xd6: + /* + * These offsets are pointing at padding. + * We need to ignore them too. + */ + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + if (core_off & 3) + continue; + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U128; + tmp->reg[tmp->n++] = id; + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U32; + tmp->reg[tmp->n++] = id; + continue; + default: + if (core_off & 1) + continue; + tmp->reg[tmp->n++] = id; + break; + } + } + + free(reg_list); + reg_list = tmp; +} + +static void prepare_vcpu_init(struct kvm_vcpu_init *init) +{ + if (reg_list_sve()) + init->features[0] |= 1 << KVM_ARM_VCPU_SVE; +} + +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + int feature; + + if (reg_list_sve()) { + feature = KVM_ARM_VCPU_SVE; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } +} + +static void check_supported(void) +{ + if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { + fprintf(stderr, "SVE not available, skipping tests\n"); + exit(KSFT_SKIP); + } +} + +int main(int ac, char **av) +{ + struct kvm_vcpu_init init = { .target = -1, }; + int new_regs = 0, missing_regs = 0, i; + int failed_get = 0, failed_set = 0, failed_reject = 0; + bool print_list = false, fixup_core_regs = false; + struct kvm_vm *vm; + __u64 *vec_regs; + + check_supported(); + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else + fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + } + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + prepare_vcpu_init(&init); + aarch64_vcpu_add_default(vm, 0, &init, NULL); + finalize_vcpu(vm, 0); + + reg_list = vcpu_get_reg_list(vm, 0); + + if (fixup_core_regs) + core_reg_fixup(); + + if (print_list) { + putchar('\n'); + for_each_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + return 0; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + */ + for_each_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + int ret; + + ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); + if (ret) { + puts("Failed to get "); + print_reg(reg.id); + putchar('\n'); + ++failed_get; + } + + /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ + if (find_reg(rejects_set, rejects_set_n, reg.id)) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); + print_reg(reg.id); + putchar('\n'); + ++failed_reject; + } + continue; + } + + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + puts("Failed to set "); + print_reg(reg.id); + putchar('\n'); + ++failed_set; + } + } + + if (reg_list_sve()) { + blessed_n = base_regs_n + sve_regs_n; + vec_regs = sve_regs; + } else { + blessed_n = base_regs_n + vregs_n; + vec_regs = vregs; + } + + blessed_reg = calloc(blessed_n, sizeof(__u64)); + for (i = 0; i < base_regs_n; ++i) + blessed_reg[i] = base_regs[i]; + for (i = 0; i < blessed_n - base_regs_n; ++i) + blessed_reg[base_regs_n + i] = vec_regs[i]; + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + printf("Number blessed registers: %5lld\n", blessed_n); + printf("Number registers: %5lld\n", reg_list->n); + } + + if (new_regs) { + printf("\nThere are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", new_regs); + for_each_new_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\nThere are %d missing registers.\n" + "The following lines are missing registers:\n\n", missing_regs); + for_each_missing_reg(i) + print_reg(blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "There are %d missing registers; " + "%d registers failed get; %d registers failed set; %d registers failed reject", + missing_regs, failed_get, failed_set, failed_reject); + + return 0; +} + +/* + * The current blessed list was primed with the output of kernel version + * v4.15 with --core-reg-fixup and then later updated with new registers. + */ +static __u64 base_regs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), + KVM_REG_ARM_FW_REG(0), + KVM_REG_ARM_FW_REG(1), + KVM_REG_ARM_FW_REG(2), + ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 2), + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ + ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ + ARM64_SYS_REG(2, 0, 0, 0, 4), + ARM64_SYS_REG(2, 0, 0, 0, 5), + ARM64_SYS_REG(2, 0, 0, 0, 6), + ARM64_SYS_REG(2, 0, 0, 0, 7), + ARM64_SYS_REG(2, 0, 0, 1, 4), + ARM64_SYS_REG(2, 0, 0, 1, 5), + ARM64_SYS_REG(2, 0, 0, 1, 6), + ARM64_SYS_REG(2, 0, 0, 1, 7), + ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 4), + ARM64_SYS_REG(2, 0, 0, 2, 5), + ARM64_SYS_REG(2, 0, 0, 2, 6), + ARM64_SYS_REG(2, 0, 0, 2, 7), + ARM64_SYS_REG(2, 0, 0, 3, 4), + ARM64_SYS_REG(2, 0, 0, 3, 5), + ARM64_SYS_REG(2, 0, 0, 3, 6), + ARM64_SYS_REG(2, 0, 0, 3, 7), + ARM64_SYS_REG(2, 0, 0, 4, 4), + ARM64_SYS_REG(2, 0, 0, 4, 5), + ARM64_SYS_REG(2, 0, 0, 4, 6), + ARM64_SYS_REG(2, 0, 0, 4, 7), + ARM64_SYS_REG(2, 0, 0, 5, 4), + ARM64_SYS_REG(2, 0, 0, 5, 5), + ARM64_SYS_REG(2, 0, 0, 5, 6), + ARM64_SYS_REG(2, 0, 0, 5, 7), + ARM64_SYS_REG(2, 0, 0, 6, 4), + ARM64_SYS_REG(2, 0, 0, 6, 5), + ARM64_SYS_REG(2, 0, 0, 6, 6), + ARM64_SYS_REG(2, 0, 0, 6, 7), + ARM64_SYS_REG(2, 0, 0, 7, 4), + ARM64_SYS_REG(2, 0, 0, 7, 5), + ARM64_SYS_REG(2, 0, 0, 7, 6), + ARM64_SYS_REG(2, 0, 0, 7, 7), + ARM64_SYS_REG(2, 0, 0, 8, 4), + ARM64_SYS_REG(2, 0, 0, 8, 5), + ARM64_SYS_REG(2, 0, 0, 8, 6), + ARM64_SYS_REG(2, 0, 0, 8, 7), + ARM64_SYS_REG(2, 0, 0, 9, 4), + ARM64_SYS_REG(2, 0, 0, 9, 5), + ARM64_SYS_REG(2, 0, 0, 9, 6), + ARM64_SYS_REG(2, 0, 0, 9, 7), + ARM64_SYS_REG(2, 0, 0, 10, 4), + ARM64_SYS_REG(2, 0, 0, 10, 5), + ARM64_SYS_REG(2, 0, 0, 10, 6), + ARM64_SYS_REG(2, 0, 0, 10, 7), + ARM64_SYS_REG(2, 0, 0, 11, 4), + ARM64_SYS_REG(2, 0, 0, 11, 5), + ARM64_SYS_REG(2, 0, 0, 11, 6), + ARM64_SYS_REG(2, 0, 0, 11, 7), + ARM64_SYS_REG(2, 0, 0, 12, 4), + ARM64_SYS_REG(2, 0, 0, 12, 5), + ARM64_SYS_REG(2, 0, 0, 12, 6), + ARM64_SYS_REG(2, 0, 0, 12, 7), + ARM64_SYS_REG(2, 0, 0, 13, 4), + ARM64_SYS_REG(2, 0, 0, 13, 5), + ARM64_SYS_REG(2, 0, 0, 13, 6), + ARM64_SYS_REG(2, 0, 0, 13, 7), + ARM64_SYS_REG(2, 0, 0, 14, 4), + ARM64_SYS_REG(2, 0, 0, 14, 5), + ARM64_SYS_REG(2, 0, 0, 14, 6), + ARM64_SYS_REG(2, 0, 0, 14, 7), + ARM64_SYS_REG(2, 0, 0, 15, 4), + ARM64_SYS_REG(2, 0, 0, 15, 5), + ARM64_SYS_REG(2, 0, 0, 15, 6), + ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ + ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 3), + ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 7), + ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 2), + ARM64_SYS_REG(3, 0, 0, 4, 3), + ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 5), + ARM64_SYS_REG(3, 0, 0, 4, 6), + ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 2), + ARM64_SYS_REG(3, 0, 0, 5, 3), + ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 6), + ARM64_SYS_REG(3, 0, 0, 5, 7), + ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 2), + ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 4), + ARM64_SYS_REG(3, 0, 0, 6, 5), + ARM64_SYS_REG(3, 0, 0, 6, 6), + ARM64_SYS_REG(3, 0, 0, 6, 7), + ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), + ARM64_SYS_REG(3, 0, 0, 7, 4), + ARM64_SYS_REG(3, 0, 0, 7, 5), + ARM64_SYS_REG(3, 0, 0, 7, 6), + ARM64_SYS_REG(3, 0, 0, 7, 7), + ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ + ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ + ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ + ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ + ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 8, 0), + ARM64_SYS_REG(3, 3, 14, 8, 1), + ARM64_SYS_REG(3, 3, 14, 8, 2), + ARM64_SYS_REG(3, 3, 14, 8, 3), + ARM64_SYS_REG(3, 3, 14, 8, 4), + ARM64_SYS_REG(3, 3, 14, 8, 5), + ARM64_SYS_REG(3, 3, 14, 8, 6), + ARM64_SYS_REG(3, 3, 14, 8, 7), + ARM64_SYS_REG(3, 3, 14, 9, 0), + ARM64_SYS_REG(3, 3, 14, 9, 1), + ARM64_SYS_REG(3, 3, 14, 9, 2), + ARM64_SYS_REG(3, 3, 14, 9, 3), + ARM64_SYS_REG(3, 3, 14, 9, 4), + ARM64_SYS_REG(3, 3, 14, 9, 5), + ARM64_SYS_REG(3, 3, 14, 9, 6), + ARM64_SYS_REG(3, 3, 14, 9, 7), + ARM64_SYS_REG(3, 3, 14, 10, 0), + ARM64_SYS_REG(3, 3, 14, 10, 1), + ARM64_SYS_REG(3, 3, 14, 10, 2), + ARM64_SYS_REG(3, 3, 14, 10, 3), + ARM64_SYS_REG(3, 3, 14, 10, 4), + ARM64_SYS_REG(3, 3, 14, 10, 5), + ARM64_SYS_REG(3, 3, 14, 10, 6), + ARM64_SYS_REG(3, 3, 14, 10, 7), + ARM64_SYS_REG(3, 3, 14, 11, 0), + ARM64_SYS_REG(3, 3, 14, 11, 1), + ARM64_SYS_REG(3, 3, 14, 11, 2), + ARM64_SYS_REG(3, 3, 14, 11, 3), + ARM64_SYS_REG(3, 3, 14, 11, 4), + ARM64_SYS_REG(3, 3, 14, 11, 5), + ARM64_SYS_REG(3, 3, 14, 11, 6), + ARM64_SYS_REG(3, 3, 14, 12, 0), + ARM64_SYS_REG(3, 3, 14, 12, 1), + ARM64_SYS_REG(3, 3, 14, 12, 2), + ARM64_SYS_REG(3, 3, 14, 12, 3), + ARM64_SYS_REG(3, 3, 14, 12, 4), + ARM64_SYS_REG(3, 3, 14, 12, 5), + ARM64_SYS_REG(3, 3, 14, 12, 6), + ARM64_SYS_REG(3, 3, 14, 12, 7), + ARM64_SYS_REG(3, 3, 14, 13, 0), + ARM64_SYS_REG(3, 3, 14, 13, 1), + ARM64_SYS_REG(3, 3, 14, 13, 2), + ARM64_SYS_REG(3, 3, 14, 13, 3), + ARM64_SYS_REG(3, 3, 14, 13, 4), + ARM64_SYS_REG(3, 3, 14, 13, 5), + ARM64_SYS_REG(3, 3, 14, 13, 6), + ARM64_SYS_REG(3, 3, 14, 13, 7), + ARM64_SYS_REG(3, 3, 14, 14, 0), + ARM64_SYS_REG(3, 3, 14, 14, 1), + ARM64_SYS_REG(3, 3, 14, 14, 2), + ARM64_SYS_REG(3, 3, 14, 14, 3), + ARM64_SYS_REG(3, 3, 14, 14, 4), + ARM64_SYS_REG(3, 3, 14, 14, 5), + ARM64_SYS_REG(3, 3, 14, 14, 6), + ARM64_SYS_REG(3, 3, 14, 14, 7), + ARM64_SYS_REG(3, 3, 14, 15, 0), + ARM64_SYS_REG(3, 3, 14, 15, 1), + ARM64_SYS_REG(3, 3, 14, 15, 2), + ARM64_SYS_REG(3, 3, 14, 15, 3), + ARM64_SYS_REG(3, 3, 14, 15, 4), + ARM64_SYS_REG(3, 3, 14, 15, 5), + ARM64_SYS_REG(3, 3, 14, 15, 6), + ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, +}; +static __u64 base_regs_n = ARRAY_SIZE(base_regs); + +static __u64 vregs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), +}; +static __u64 vregs_n = ARRAY_SIZE(vregs); + +static __u64 sve_regs[] = { + KVM_REG_ARM64_SVE_VLS, + KVM_REG_ARM64_SVE_ZREG(0, 0), + KVM_REG_ARM64_SVE_ZREG(1, 0), + KVM_REG_ARM64_SVE_ZREG(2, 0), + KVM_REG_ARM64_SVE_ZREG(3, 0), + KVM_REG_ARM64_SVE_ZREG(4, 0), + KVM_REG_ARM64_SVE_ZREG(5, 0), + KVM_REG_ARM64_SVE_ZREG(6, 0), + KVM_REG_ARM64_SVE_ZREG(7, 0), + KVM_REG_ARM64_SVE_ZREG(8, 0), + KVM_REG_ARM64_SVE_ZREG(9, 0), + KVM_REG_ARM64_SVE_ZREG(10, 0), + KVM_REG_ARM64_SVE_ZREG(11, 0), + KVM_REG_ARM64_SVE_ZREG(12, 0), + KVM_REG_ARM64_SVE_ZREG(13, 0), + KVM_REG_ARM64_SVE_ZREG(14, 0), + KVM_REG_ARM64_SVE_ZREG(15, 0), + KVM_REG_ARM64_SVE_ZREG(16, 0), + KVM_REG_ARM64_SVE_ZREG(17, 0), + KVM_REG_ARM64_SVE_ZREG(18, 0), + KVM_REG_ARM64_SVE_ZREG(19, 0), + KVM_REG_ARM64_SVE_ZREG(20, 0), + KVM_REG_ARM64_SVE_ZREG(21, 0), + KVM_REG_ARM64_SVE_ZREG(22, 0), + KVM_REG_ARM64_SVE_ZREG(23, 0), + KVM_REG_ARM64_SVE_ZREG(24, 0), + KVM_REG_ARM64_SVE_ZREG(25, 0), + KVM_REG_ARM64_SVE_ZREG(26, 0), + KVM_REG_ARM64_SVE_ZREG(27, 0), + KVM_REG_ARM64_SVE_ZREG(28, 0), + KVM_REG_ARM64_SVE_ZREG(29, 0), + KVM_REG_ARM64_SVE_ZREG(30, 0), + KVM_REG_ARM64_SVE_ZREG(31, 0), + KVM_REG_ARM64_SVE_PREG(0, 0), + KVM_REG_ARM64_SVE_PREG(1, 0), + KVM_REG_ARM64_SVE_PREG(2, 0), + KVM_REG_ARM64_SVE_PREG(3, 0), + KVM_REG_ARM64_SVE_PREG(4, 0), + KVM_REG_ARM64_SVE_PREG(5, 0), + KVM_REG_ARM64_SVE_PREG(6, 0), + KVM_REG_ARM64_SVE_PREG(7, 0), + KVM_REG_ARM64_SVE_PREG(8, 0), + KVM_REG_ARM64_SVE_PREG(9, 0), + KVM_REG_ARM64_SVE_PREG(10, 0), + KVM_REG_ARM64_SVE_PREG(11, 0), + KVM_REG_ARM64_SVE_PREG(12, 0), + KVM_REG_ARM64_SVE_PREG(13, 0), + KVM_REG_ARM64_SVE_PREG(14, 0), + KVM_REG_ARM64_SVE_PREG(15, 0), + KVM_REG_ARM64_SVE_FFR(0), + ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ +}; +static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); + +static __u64 rejects_set[] = { +#ifdef REG_LIST_SVE + KVM_REG_ARM64_SVE_VLS, +#endif +}; +static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c deleted file mode 100644 index 11672ec6f74e..000000000000 --- a/tools/testing/selftests/kvm/clear_dirty_log_test.c +++ /dev/null @@ -1,6 +0,0 @@ -#define USE_CLEAR_DIRTY_LOG -#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) -#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) -#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ - KVM_DIRTY_LOG_INITIALLY_SET) -#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 360cd3ea4cd6..3d96a7bfaff3 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -21,20 +21,12 @@ #include <linux/bitops.h> #include <linux/userfaultfd.h> -#include "test_util.h" -#include "kvm_util.h" +#include "perf_test_util.h" #include "processor.h" +#include "test_util.h" #ifdef __NR_userfaultfd -/* The memory slot index demand page */ -#define TEST_MEM_SLOT_INDEX 1 - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - -#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ - #ifdef PRINT_PER_PAGE_UPDATES #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) #else @@ -47,77 +39,17 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif -#define MAX_VCPUS 512 - -/* - * Guest/Host shared variables. Ensure addr_gva2hva() and/or - * sync_global_to/from_guest() are used when accessing from - * the host. READ/WRITE_ONCE() should also be used with anything - * that may change. - */ -static uint64_t host_page_size; -static uint64_t guest_page_size; - static char *guest_data_prototype; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; - -struct vcpu_args { - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - int vcpu_id; - struct kvm_vm *vm; -}; - -static struct vcpu_args vcpu_args[MAX_VCPUS]; - -/* - * Continuously write to the first 8 bytes of each page in the demand paging - * memory region. - */ -static void guest_code(uint32_t vcpu_id) -{ - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id); - - gva = vcpu_args[vcpu_id].gva; - pages = vcpu_args[vcpu_id].pages; - - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * guest_page_size); - - addr &= ~(host_page_size - 1); - *(uint64_t *)addr = 0x0123456789ABCDEF; - } - - GUEST_SYNC(1); -} - static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *args = (struct vcpu_args *)data; - struct kvm_vm *vm = args->vm; - int vcpu_id = args->vcpu_id; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); @@ -133,52 +65,18 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); return NULL; } -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; - - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - vm = _vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - return vm; -} - static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; struct timespec start; - struct timespec end; + struct timespec ts_diff; struct uffdio_copy copy; int r; @@ -186,7 +84,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) copy.src = (uint64_t)guest_data_prototype; copy.dst = addr; - copy.len = host_page_size; + copy.len = perf_test_args.host_page_size; copy.mode = 0; clock_gettime(CLOCK_MONOTONIC, &start); @@ -198,12 +96,12 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } - clock_gettime(CLOCK_MONOTONIC, &end); + ts_diff = timespec_diff_now(start); PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, - timespec_to_ns(timespec_sub(end, start))); + timespec_to_ns(ts_diff)); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", - host_page_size, addr, tid); + perf_test_args.host_page_size, addr, tid); return 0; } @@ -223,7 +121,8 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { @@ -292,8 +191,7 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); @@ -351,99 +249,54 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus, - uint64_t vcpu_memory_bytes) + useconds_t uffd_delay) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; int *pipefds = NULL; struct kvm_vm *vm; - uint64_t guest_num_pages; int vcpu_id; int r; - vm = create_vm(mode, vcpus, vcpu_memory_bytes); - - guest_page_size = vm_get_page_size(vm); - - TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - host_page_size = getpagesize(); - TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0, - "Guest memory size is not host page size aligned."); + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); + perf_test_args.wr_fract = 1; -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing demand paging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - guest_data_prototype = malloc(host_page_size); + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, host_page_size); + memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); - vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + if (use_uffd) { uffd_handler_threads = - malloc(vcpus * sizeof(*uffd_handler_threads)); + malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - uffd_args = malloc(vcpus * sizeof(*uffd_args)); + uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); TEST_ASSERT(uffd_args, "Memory allocation failed"); - pipefds = malloc(sizeof(int) * vcpus * 2); + pipefds = malloc(sizeof(int) * nr_vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - } - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; - void *vcpu_hva; - vm_vcpu_add_default(vm, vcpu_id, guest_code); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + vm_paddr_t vcpu_gpa; + void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); - /* Cache the HVA pointer of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + /* Cache the HVA pointer of the region */ + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - if (use_uffd) { /* * Set up user fault fd to handle demand paging * requests. @@ -456,53 +309,41 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, vcpu_memory_bytes); + vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); } - -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - - vcpu_args[vcpu_id].vm = vm; - vcpu_args[vcpu_id].vcpu_id = vcpu_id; - vcpu_args[vcpu_id].gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size; } /* Export the shared variables to the guest */ - sync_global_to_guest(vm, host_page_size); - sync_global_to_guest(vm, guest_page_size); - sync_global_to_guest(vm, vcpu_args); + sync_global_to_guest(vm, perf_test_args); pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &vcpu_args[vcpu_id]); + &perf_test_args.vcpu_args[vcpu_id]); } pr_info("Started all vCPUs\n"); /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_join(vcpu_threads[vcpu_id], NULL); PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - pr_info("All vCPU threads joined\n"); + ts_diff = timespec_diff_now(start); - clock_gettime(CLOCK_MONOTONIC, &end); + pr_info("All vCPU threads joined\n"); if (use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { r = write(pipefds[vcpu_id * 2 + 1], &c, 1); TEST_ASSERT(r == 1, "Unable to write to pipefd"); @@ -510,11 +351,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - ts_diff = timespec_sub(end, start); pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + perf_test_args.vcpu_args[0].pages * nr_vcpus / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); @@ -568,9 +409,8 @@ static void help(char *name) int main(int argc, char *argv[]) { + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); bool mode_selected = false; - uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; - int vcpus = 1; unsigned int mode; int opt, i; bool use_uffd = false; @@ -619,15 +459,12 @@ int main(int argc, char *argv[]) "A negative UFFD delay is not supported."); break; case 'b': - vcpu_memory_bytes = parse_size(optarg); + guest_percpu_mem_size = parse_size(optarg); break; case 'v': - vcpus = atoi(optarg); - TEST_ASSERT(vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -642,7 +479,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes); + run_test(i, use_uffd, uffd_delay); } return 0; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c new file mode 100644 index 000000000000..85c9b8f73142 --- /dev/null +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging performance test + * + * Based on dirty_log_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2020, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <pthread.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" +#include "test_util.h" + +/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ +#define TEST_HOST_LOOP_N 2UL + +/* Host variables */ +static bool host_quit; +static uint64_t iteration; +static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = perf_test_args.vm; + uint64_t pages_count = 0; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + struct timespec total = (struct timespec){0}; + struct timespec avg; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + uint64_t current_iteration = READ_ONCE(iteration); + + clock_gettime(CLOCK_MONOTONIC, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_diff_now(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("vCPU %d updated last completed iteration to %lu\n", + vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + + if (current_iteration) { + pages_count += vcpu_args->pages; + total = timespec_add(total, ts_diff); + pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } else { + pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } + + while (current_iteration == READ_ONCE(iteration) && + !READ_ONCE(host_quit)) {} + } + + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); + + return NULL; +} + +#ifdef USE_CLEAR_DIRTY_LOG +static u64 dirty_log_manual_caps; +#endif + +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + uint64_t phys_offset, int wr_fract) +{ + pthread_t *vcpu_threads; + struct kvm_vm *vm; + unsigned long *bmap; + uint64_t guest_num_pages; + uint64_t host_num_pages; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + struct timespec get_dirty_log_total = (struct timespec){0}; + struct timespec vcpu_dirty_total = (struct timespec){0}; + struct timespec avg; +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; + struct timespec clear_dirty_log_total = (struct timespec){0}; +#endif + + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + + perf_test_args.wr_fract = wr_fract; + + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + bmap = bitmap_alloc(host_num_pages); + +#ifdef USE_CLEAR_DIRTY_LOG + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); +#endif + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + + sync_global_to_guest(vm, perf_test_args); + + /* Start the iterations */ + iteration = 0; + host_quit = false; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } + + /* Allow the vCPU to populate memory */ + pr_debug("Starting iteration %lu - Populating\n", iteration); + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", + iteration); + + ts_diff = timespec_diff_now(start); + pr_info("Populate memory time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Enable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + ts_diff = timespec_diff_now(start); + pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + while (iteration < iterations) { + /* + * Incrementing the iteration number will start the vCPUs + * dirtying memory again. + */ + clock_gettime(CLOCK_MONOTONIC, &start); + iteration++; + + pr_debug("Starting iteration %lu\n", iteration); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", + vcpu_id, iteration); + } + + ts_diff = timespec_diff_now(start); + vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); + pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + ts_diff = timespec_diff_now(start); + get_dirty_log_total = timespec_add(get_dirty_log_total, + ts_diff); + pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); + + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); +#endif + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + /* Disable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + ts_diff = timespec_diff_now(start); + pr_info("Disabling dirty logging time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + avg = timespec_div(get_dirty_log_total, iterations); + pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, get_dirty_log_total.tv_sec, + get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); +#endif + + free(bmap); + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +struct guest_mode { + bool supported; + bool enabled; +}; +static struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-p offset] " + "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -f: specify the fraction of pages which should be written to\n" + " as opposed to simply read, in the form\n" + " 1/<fraction of pages to write>.\n" + " (default: 1 i.e. all pages are written to.)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + bool mode_selected = false; + uint64_t phys_offset = 0; + unsigned int mode; + int opt, i; + int wr_fract = 1; + +#ifdef USE_CLEAR_DIRTY_LOG + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (!dirty_log_manual_caps) { + print_skip("KVM_CLEAR_DIRTY_LOG not available"); + exit(KSFT_SKIP); + } + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); +#endif + +#ifdef __x86_64__ + guest_mode_init(VM_MODE_PXXV48_4K, true, true); +#endif +#ifdef __aarch64__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + guest_mode_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); + } + } +#endif +#ifdef __s390x__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); +#endif + + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'p': + phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'f': + wr_fract = atoi(optarg); + TEST_ASSERT(wr_fract >= 1, + "Write fraction cannot be less than one"); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0, + "Must have a positive number of vCPUs"); + TEST_ASSERT(nr_vcpus <= MAX_VCPUS, + "This test does not currently support\n" + "more than %d vCPUs.", MAX_VCPUS); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + + pr_info("Test iterations: %"PRIu64"\n", iterations); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i, iterations, phys_offset, wr_fract); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 752ec158ac59..54da9cc20db4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -128,6 +128,78 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +enum log_mode_t { + /* Only use KVM_GET_DIRTY_LOG for logging */ + LOG_MODE_DIRTY_LOG = 0, + + /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ + LOG_MODE_CLEAR_LOG = 1, + + LOG_MODE_NUM, + + /* Run all supported modes */ + LOG_MODE_ALL = LOG_MODE_NUM, +}; + +/* Mode of logging to test. Default is to run all supported modes */ +static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; +/* Logging mode for current run */ +static enum log_mode_t host_log_mode; + +static bool clear_log_supported(void) +{ + return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); +} + +static void clear_log_create_vm_done(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = {}; + u64 manual_caps; + + manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!"); + manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = manual_caps; + vm_enable_cap(vm, &cap); +} + +static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); +} + +static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); + kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); +} + +struct log_mode { + const char *name; + /* Return true if this mode is supported, otherwise false */ + bool (*supported)(void); + /* Hook when the vm creation is done (before vcpu creation) */ + void (*create_vm_done)(struct kvm_vm *vm); + /* Hook to collect the dirty pages into the bitmap provided */ + void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages); +} log_modes[LOG_MODE_NUM] = { + { + .name = "dirty-log", + .collect_dirty_pages = dirty_log_collect_dirty_pages, + }, + { + .name = "clear-log", + .supported = clear_log_supported, + .create_vm_done = clear_log_create_vm_done, + .collect_dirty_pages = clear_log_collect_dirty_pages, + }, +}; + /* * We use this bitmap to track some pages that should have its dirty * bit set in the _next_ iteration. For example, if we detected the @@ -137,6 +209,44 @@ static uint64_t host_track_next_count; */ static unsigned long *host_bmap_track; +static void log_modes_dump(void) +{ + int i; + + printf("all"); + for (i = 0; i < LOG_MODE_NUM; i++) + printf(", %s", log_modes[i].name); + printf("\n"); +} + +static bool log_mode_supported(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->supported) + return mode->supported(); + + return true; +} + +static void log_mode_create_vm_done(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->create_vm_done) + mode->create_vm_done(vm); +} + +static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + TEST_ASSERT(mode->collect_dirty_pages != NULL, + "collect_dirty_pages() is required for any log mode!"); + mode->collect_dirty_pages(vm, slot, bitmap, num_pages); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -195,7 +305,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_bit_le(page, bmap)) { + if (test_and_clear_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto @@ -252,11 +362,12 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); #endif + log_mode_create_vm_done(vm); vm_vcpu_add_default(vm, vcpuid, guest_code); return vm; } @@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; unsigned long *bmap; + if (!log_mode_supported()) { + print_skip("Log mode '%s' not supported", + log_modes[host_log_mode].name); + return; + } + /* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here @@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif - /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_phys_mem, @@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, while (iteration < iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); -#ifdef USE_CLEAR_DIRTY_LOG - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); -#endif + log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, + bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); iteration++; sync_global_to_guest(vm, iteration); @@ -410,6 +512,9 @@ static void help(char *name) TEST_HOST_LOOP_INTERVAL); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -M: specify the host logging mode " + "(default: run all log modes). Supported modes: \n\t"); + log_modes_dump(); printf(" -m: specify the guest mode ID to test " "(default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -429,18 +534,7 @@ int main(int argc, char *argv[]) bool mode_selected = false; uint64_t phys_offset = 0; unsigned int mode; - int opt, i; - -#ifdef USE_CLEAR_DIRTY_LOG - dirty_log_manual_caps = - kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); -#endif + int opt, i, j; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -464,7 +558,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { + while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { switch (opt) { case 'i': iterations = strtol(optarg, NULL, 10); @@ -486,6 +580,26 @@ int main(int argc, char *argv[]) "Guest mode ID %d too big", mode); guest_modes[mode].enabled = true; break; + case 'M': + if (!strcmp(optarg, "all")) { + host_log_mode_option = LOG_MODE_ALL; + break; + } + for (i = 0; i < LOG_MODE_NUM; i++) { + if (!strcmp(optarg, log_modes[i].name)) { + pr_info("Setting log mode to: '%s'\n", + optarg); + host_log_mode_option = i; + break; + } + } + if (i == LOG_MODE_NUM) { + printf("Log mode '%s' invalid. Please choose " + "from: ", optarg); + log_modes_dump(); + exit(1); + } + break; case 'h': default: help(argv[0]); @@ -507,7 +621,18 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (j = 0; j < LOG_MODE_NUM; j++) { + pr_info("Testing Log Mode '%s'\n", + log_modes[j].name); + host_log_mode = j; + run_test(i, iterations, interval, phys_offset); + } + } else { + host_log_mode = host_log_mode_option; + run_test(i, iterations, interval, phys_offset); + } } return 0; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 919e161dd289..7d29aa786959 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -63,9 +63,11 @@ enum vm_mem_backing_src_type { int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); @@ -149,6 +151,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); @@ -294,6 +297,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + /* Common ucalls */ enum { UCALL_NONE, diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h new file mode 100644 index 000000000000..2618052057b1 --- /dev/null +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/perf_test_util.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H +#define SELFTEST_KVM_PERF_TEST_UTIL_H + +#include "kvm_util.h" +#include "processor.h" + +#define MAX_VCPUS 512 + +#define PAGE_SHIFT_4K 12 +#define PTES_PER_4K_PT 512 + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + +/* Number of VCPUs for the test */ +static int nr_vcpus = 1; + +struct vcpu_args { + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + int vcpu_id; +}; + +struct perf_test_args { + struct kvm_vm *vm; + uint64_t host_page_size; + uint64_t guest_page_size; + int wr_fract; + + struct vcpu_args vcpu_args[MAX_VCPUS]; +}; + +static struct perf_test_args perf_test_args; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES; + uint64_t guest_num_pages; + + /* Account for a few pages per-vCPU for stacks */ + pages += DEFAULT_STACK_PGS * vcpus; + + /* + * Reserve twice the ammount of memory needed to map the test region and + * the page table / stacks region, at 4k, for page tables. Do the + * calculation with 4K page size: the smallest of all archs. (e.g., 64K + * page size guest will need even less memory for page tables). + */ + pages += (2 * pages) / PTES_PER_4K_PT; + pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / + PTES_PER_4K_PT; + pages = vm_adjust_num_guest_pages(mode, pages); + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + vm = vm_create(mode, pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + perf_test_args.vm = vm; + perf_test_args.guest_page_size = vm_get_page_size(vm); + perf_test_args.host_page_size = getpagesize(); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + guest_num_pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vm_vcpu_add_default(vm, vcpu_id, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); +#endif + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} + +#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 5eb01bf51b86..ffffa560436b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -64,5 +64,7 @@ int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); +struct timespec timespec_diff_now(struct timespec start); +struct timespec timespec_div(struct timespec ts, int divisor); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 82b7fe16a824..8e61340b3911 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -36,6 +36,8 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +#define UNEXPECTED_VECTOR_PORT 0xfff0u + /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@ -59,7 +61,7 @@ struct gpr64_regs { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; @@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void) return idt; } +static inline void outl(uint16_t port, uint32_t value) +{ + __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -338,6 +345,35 @@ uint32_t kvm_get_cpuid_max_basic(void); uint32_t kvm_get_cpuid_max_extended(void); void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +struct ex_regs { + uint64_t rax, rcx, rdx, rbx; + uint64_t rbp, rsi, rdi; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)); + +/* + * set_cpuid() - overwrites a matching cpuid entry with the provided value. + * matches based on ent->function && ent->index. returns true + * if a match was found and successfully overwritten. + * @cpuid: the kvm cpuid list to modify. + * @ent: cpuid entry to insert + */ +bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 54d624dd6c10..e78d7e26ba61 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -573,6 +573,10 @@ struct vmx_pages { void *eptp_hva; uint64_t eptp_gpa; void *eptp; + + void *apic_access_hva; + uint64_t apic_access_gpa; + void *apic_access; }; union vmx_basic { @@ -615,5 +619,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot, uint32_t eptp_memslot); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 2afa6618b396..d6c32c328e9a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index c8e0ec20d3bf..2f37b90ee1a9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 74776ee228f2..126c6727a6b0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -14,6 +14,7 @@ #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> +#include <unistd.h> #include <linux/kernel.h> #define KVM_UTIL_PGS_PER_HUGEPG 512 @@ -85,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } +/* VCPU Enable Capability + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - VCPU + * cap - Capability + * + * Output Args: None + * + * Return: On success, 0. On failure a TEST_ASSERT failure is produced. + * + * Enables a capability (KVM_CAP_*) on the VCPU. + */ +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpu_id); + int r; + + TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); + + r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); + TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" + " rc: %i, errno: %i", r, errno); + + return r; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); @@ -151,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; @@ -242,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) -{ - return _vm_create(mode, phy_pages, perm); -} - /* * VM Restart * @@ -664,13 +688,21 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { - ret = madvise(region->host_mem, npages * vm->page_size, - src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); - TEST_ASSERT(ret == 0, "madvise failed,\n" - " addr: %p\n" - " length: 0x%lx\n" - " src_type: %x", - region->host_mem, npages * vm->page_size, src_type); + struct stat statbuf; + + ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), + "stat /sys/kernel/mm/transparent_hugepage"); + + TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP, + "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel"); + + if (ret == 0) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x", + region->host_mem, npages * vm->page_size, src_type); + } } region->unused_phy_pages = sparsebit_alloc(); @@ -1195,6 +1227,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); + + assert_on_unhandled_exception(vm, vcpuid); + return rc; } @@ -1252,6 +1287,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, } /* + * VM VCPU Get Reg List + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * None + * + * Return: + * A pointer to an allocated struct kvm_reg_list + * + * Get the list of guest registers which are supported for + * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls + */ +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); + TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); + reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); + reg_list->n = reg_list_n.n; + vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); + return reg_list; +} + +/* * VM VCPU Regs Get * * Input Args: diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 2ef446520748..f07d383d03a1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -50,6 +50,8 @@ struct kvm_vm { vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; + vm_vaddr_t idt; + vm_vaddr_t handlers; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index a88c5d665725..7349bb2e1a24 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index fd589dc9bfab..9d3b0f15249a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 689e97c27ee2..8e04c0b1608e 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,10 +4,13 @@ * * Copyright (C) 2020, Google LLC. */ -#include <stdlib.h> + +#include <assert.h> #include <ctype.h> #include <limits.h> -#include <assert.h> +#include <stdlib.h> +#include <time.h> + #include "test_util.h" /* @@ -81,6 +84,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) return timespec_add_ns((struct timespec){0}, ns1 - ns2); } +struct timespec timespec_diff_now(struct timespec start) +{ + struct timespec end; + + clock_gettime(CLOCK_MONOTONIC, &end); + return timespec_sub(end, start); +} + +struct timespec timespec_div(struct timespec ts, int divisor) +{ + int64_t ns = timespec_to_ns(ts) / divisor; + + return timespec_add_ns((struct timespec){0}, ns); +} + void print_skip(const char *fmt, ...) { va_list ap; diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S new file mode 100644 index 000000000000..aaf7bc7d2ce1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ + current_handler = . +.pushsection .rodata +.quad current_handler +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f6eb34eaa0d2..d10c5c05bdf0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -12,9 +12,18 @@ #include "../kvm_util_internal.h" #include "processor.h" +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define DEFAULT_CODE_SELECTOR 0x8 +#define DEFAULT_DATA_SELECTOR 0x10 + /* Minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +vm_vaddr_t exception_handlers; + /* Virtual translation table structure declarations */ struct pageMapL4Entry { uint64_t present:1; @@ -392,11 +401,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; @@ -556,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); break; @@ -1118,3 +1128,131 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) *va_bits = (entry->eax >> 8) & 0xff; } } + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +void kvm_exit_unexpected_vector(uint32_t value) +{ + outl(UNEXPECTED_VECTOR_PORT, value); +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + kvm_exit_unexpected_vector(regs->vector); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + int i; + + vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); + vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, + DEFAULT_CODE_SELECTOR); +} + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.base = vm->idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->gdt; + sregs.gdt.limit = getpagesize() - 1; + kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); + vcpu_sregs_set(vm, vcpuid, &sregs); + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ + if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO + && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT + && vcpu_state(vm, vcpuid)->io.size == 4) { + /* Grab pointer to io data */ + uint32_t *data = (void *)vcpu_state(vm, vcpuid) + + vcpu_state(vm, vcpuid)->io.data_offset; + + TEST_ASSERT(false, + "Unexpected vectored event in guest (vector:0x%x)", + *data); + } +} + +bool set_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *ent) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function != ent->function || cur->index != ent->index) + continue; + + memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); + return true; + } + + return false; +} + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + uint64_t r; + + asm volatile("vmcall" + : "=a"(r) + : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + return r; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index da4d89ad5419..a3489973e290 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index f1e00d43eea2..2448b30e8efa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -542,3 +542,12 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); } + +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); + vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c new file mode 100644 index 000000000000..b10a27485bad --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include <asm/kvm_para.h> +#include <linux/kvm_para.h> +#include <stdint.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +extern unsigned char rdmsr_start; +extern unsigned char rdmsr_end; + +static u64 do_rdmsr(u32 idx) +{ + u32 lo, hi; + + asm volatile("rdmsr_start: rdmsr;" + "rdmsr_end:" + : "=a"(lo), "=c"(hi) + : "c"(idx)); + + return (((u64) hi) << 32) | lo; +} + +extern unsigned char wrmsr_start; +extern unsigned char wrmsr_end; + +static void do_wrmsr(u32 idx, u64 val) +{ + u32 lo, hi; + + lo = val; + hi = val >> 32; + + asm volatile("wrmsr_start: wrmsr;" + "wrmsr_end:" + : : "a"(lo), "c"(idx), "d"(hi)); +} + +static int nr_gp; + +static void guest_gp_handler(struct ex_regs *regs) +{ + unsigned char *rip = (unsigned char *)regs->rip; + bool r, w; + + r = rip == &rdmsr_start; + w = rip == &wrmsr_start; + GUEST_ASSERT(r || w); + + nr_gp++; + + if (r) + regs->rip = (uint64_t)&rdmsr_end; + else + regs->rip = (uint64_t)&wrmsr_end; +} + +struct msr_data { + uint32_t idx; + const char *name; +}; + +#define TEST_MSR(msr) { .idx = msr, .name = #msr } +#define UCALL_PR_MSR 0xdeadbeef +#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) + +/* + * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or + * written, as the KVM_CPUID_FEATURES leaf is cleared. + */ +static struct msr_data msrs_to_test[] = { + TEST_MSR(MSR_KVM_SYSTEM_TIME), + TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), + TEST_MSR(MSR_KVM_WALL_CLOCK), + TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), + TEST_MSR(MSR_KVM_ASYNC_PF_EN), + TEST_MSR(MSR_KVM_STEAL_TIME), + TEST_MSR(MSR_KVM_PV_EOI_EN), + TEST_MSR(MSR_KVM_POLL_CONTROL), + TEST_MSR(MSR_KVM_ASYNC_PF_INT), + TEST_MSR(MSR_KVM_ASYNC_PF_ACK), +}; + +static void test_msr(struct msr_data *msr) +{ + PR_MSR(msr); + do_rdmsr(msr->idx); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + + nr_gp = 0; + do_wrmsr(msr->idx, 0); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + nr_gp = 0; +} + +struct hcall_data { + uint64_t nr; + const char *name; +}; + +#define TEST_HCALL(hc) { .nr = hc, .name = #hc } +#define UCALL_PR_HCALL 0xdeadc0de +#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) + +/* + * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding + * features have been cleared in KVM_CPUID_FEATURES. + */ +static struct hcall_data hcalls_to_test[] = { + TEST_HCALL(KVM_HC_KICK_CPU), + TEST_HCALL(KVM_HC_SEND_IPI), + TEST_HCALL(KVM_HC_SCHED_YIELD), +}; + +static void test_hcall(struct hcall_data *hc) +{ + uint64_t r; + + PR_HCALL(hc); + r = kvm_hypercall(hc->nr, 0, 0, 0, 0); + GUEST_ASSERT(r == -KVM_ENOSYS); +} + +static void guest_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { + test_msr(&msrs_to_test[i]); + } + + for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { + test_hcall(&hcalls_to_test[i]); + } + + GUEST_DONE(); +} + +static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 ent = {0}; + + ent.function = KVM_CPUID_FEATURES; + TEST_ASSERT(set_cpuid(cpuid, &ent), + "failed to clear KVM_CPUID_FEATURES leaf"); +} + +static void pr_msr(struct ucall *uc) +{ + struct msr_data *msr = (struct msr_data *)uc->args[0]; + + pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); +} + +static void pr_hcall(struct ucall *uc) +{ + struct hcall_data *hc = (struct hcall_data *)uc->args[0]; + + pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); +} + +static void handle_abort(struct ucall *uc) +{ + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], + __FILE__, uc->args[1]); +} + +#define VCPU_ID 0 + +static void enter_guest(struct kvm_vm *vm) +{ + struct kvm_run *run; + struct ucall uc; + int r; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_PR_MSR: + pr_msr(&uc); + break; + case UCALL_PR_HCALL: + pr_hcall(&uc); + break; + case UCALL_ABORT: + handle_abort(&uc); + return; + case UCALL_DONE: + return; + } + } +} + +int main(void) +{ + struct kvm_enable_cap cap = {0}; + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + + if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { + pr_info("will skip kvm paravirt restriction tests.\n"); + return 0; + } + + vm = vm_create_default(VCPU_ID, 0, guest_main); + + cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID; + cap.args[0] = 1; + vcpu_enable_cap(vm, VCPU_ID, &cap); + + best = kvm_get_supported_cpuid(); + clear_kvm_cpuid_features(best); + vcpu_set_cpuid(vm, VCPU_ID, best); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + enter_guest(vm); + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c new file mode 100644 index 000000000000..1f65342d6cb7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_apic_access_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * The first subtest simply checks to see that an L2 guest can be + * launched with a valid APIC-access address that is backed by a + * page of L1 physical memory. + * + * The second subtest sets the APIC-access address to a (valid) L1 + * physical address that is not backed by memory. KVM can't handle + * this situation, so resuming L2 should result in a KVM exit for + * internal error (emulation). This is not an architectural + * requirement. It is just a shortcoming of KVM. The internal error + * is unfortunate, but it's better than what used to happen! + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#include "kselftest.h" + +#define VCPU_ID 0 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa); + + /* Try to launch L2 with the memory-backed APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(APIC_ACCESS_ADDR, high_gpa); + + /* Try to resume L2 with the unbacked APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + unsigned long apic_access_addr = ~0ul; + unsigned int paddr_width; + unsigned int vaddr_width; + vm_vaddr_t vmx_pages_gva; + unsigned long high_gpa; + struct vmx_pages *vmx; + bool done = false; + + nested_vmx_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + kvm_get_cpu_address_width(&paddr_width, &vaddr_width); + high_gpa = (1ul << paddr_width) - getpagesize(); + if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) { + print_skip("No unbacked physical page available"); + exit(KSFT_SKIP); + } + + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + prepare_virtualize_apic_accesses(vmx, vm, 0); + vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa); + + while (!done) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + if (apic_access_addr == high_gpa) { + TEST_ASSERT(run->exit_reason == + KVM_EXIT_INTERNAL_ERROR, + "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->internal.suberror == + KVM_INTERNAL_ERROR_EMULATION, + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + run->internal.suberror); + break; + } + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + apic_access_addr = uc.args[1]; + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + } + } + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 30848ca36555..a5ce26d548e4 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -136,7 +136,7 @@ endif ifeq ($(OVERRIDE_TARGETS),) LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S $(COMPILE.S) $^ -o $@ diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index c6fe22de7d0e..f366cadbc5e8 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -234,6 +234,14 @@ setup_overlay_ipv4() ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 + + # The intermediate namespaces don't have routes for the reverse path, + # as it will be handled by tc. So we need to ensure that rp_filter is + # not going to block the traffic. + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } setup_overlay_ipv6() diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 6bbf69a28e12..464e31eabc73 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -355,7 +355,7 @@ setup_fou_or_gue() { encap="${3}" if [ "${outer}" = "4" ]; then - modprobe fou || return 2 + modprobe fou || return $ksft_skip a_addr="${prefix4}.${a_r1}.1" b_addr="${prefix4}.${b_r1}.1" if [ "${inner}" = "4" ]; then @@ -366,7 +366,7 @@ setup_fou_or_gue() { ipproto="41" fi else - modprobe fou6 || return 2 + modprobe fou6 || return $ksft_skip a_addr="${prefix6}:${a_r1}::1" b_addr="${prefix6}:${b_r1}::1" if [ "${inner}" = "4" ]; then @@ -380,8 +380,8 @@ setup_fou_or_gue() { fi fi - run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 - run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip + run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 @@ -455,7 +455,7 @@ setup_ipvX_over_ipvY() { fi fi - run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2 + run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode} run_cmd ${ns_a} ip link set ip_a up @@ -713,7 +713,7 @@ setup_routing() { } setup_bridge() { - run_cmd ${ns_a} ip link add br0 type bridge || return 2 + run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip run_cmd ${ns_a} ip link set br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -765,7 +765,7 @@ setup_ovs_vxlan6() { } setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return 2 + run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -887,7 +887,7 @@ check_pmtu_value() { test_pmtu_ipvX() { family=${1} - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -985,11 +985,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ${type}4 || return 2 + setup namespaces routing ${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ${type}6 || return 2 + setup namespaces routing ${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1060,11 +1060,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing bridge bridged_${type}4 || return 2 + setup namespaces routing bridge bridged_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing bridge bridged_${type}6 || return 2 + setup namespaces routing bridge bridged_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1144,11 +1144,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ovs_bridge ovs_${type}4 || return 2 + setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ovs_bridge ovs_${type}6 || return 2 + setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1230,7 +1230,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() { encap=${3} ll_mtu=4000 - setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1309,7 +1309,7 @@ test_pmtu_ipvX_over_ipvY_exception() { outer=${2} ll_mtu=4000 - setup namespaces routing ip${inner}ip${outer} || return 2 + setup namespaces routing ip${inner}ip${outer} || return $ksft_skip trace "${ns_a}" ip_a "${ns_b}" ip_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ @@ -1363,7 +1363,7 @@ test_pmtu_ipv6_ipv6_exception() { } test_pmtu_vti4_exception() { - setup namespaces veth vti4 xfrm4 || return 2 + setup namespaces veth vti4 xfrm4 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti4_a "${ns_b}" vti4_b @@ -1393,7 +1393,7 @@ test_pmtu_vti4_exception() { } test_pmtu_vti6_exception() { - setup namespaces veth vti6 xfrm6 || return 2 + setup namespaces veth vti6 xfrm6 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 @@ -1423,7 +1423,7 @@ test_pmtu_vti6_exception() { } test_pmtu_vti4_default_mtu() { - setup namespaces veth vti4 || return 2 + setup namespaces veth vti4 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv4 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1435,7 +1435,7 @@ test_pmtu_vti4_default_mtu() { } test_pmtu_vti6_default_mtu() { - setup namespaces veth vti6 || return 2 + setup namespaces veth vti6 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv6 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1447,10 +1447,10 @@ test_pmtu_vti6_default_mtu() { } test_pmtu_vti4_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 - [ $? -ne 0 ] && err " vti not supported" && return 2 + [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti4_a fail=0 @@ -1485,10 +1485,10 @@ test_pmtu_vti4_link_add_mtu() { } test_pmtu_vti6_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 - [ $? -ne 0 ] && err " vti6 not supported" && return 2 + [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti6_a fail=0 @@ -1523,10 +1523,10 @@ test_pmtu_vti6_link_add_mtu() { } test_pmtu_vti6_link_change_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy - [ $? -ne 0 ] && err " dummy not supported" && return 2 + [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy run_cmd ${ns_a} ip link set dummy0 up run_cmd ${ns_a} ip link set dummy1 up @@ -1579,10 +1579,10 @@ test_cleanup_vxlanX_exception() { encap="vxlan" ll_mtu=4000 - check_command taskset || return 2 + check_command taskset || return $ksft_skip cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) - setup namespaces routing ${encap}${outer} || return 2 + setup namespaces routing ${encap}${outer} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1644,7 +1644,7 @@ run_test() { fi err_flush exit 1 - elif [ $ret -eq 2 ]; then + elif [ $ret -eq $ksft_skip ]; then printf "TEST: %-60s [SKIP]\n" "${tdesc}" err_flush fi @@ -1652,7 +1652,19 @@ run_test() { return $ret ) ret=$? - [ $ret -ne 0 ] && exitcode=1 + case $ret in + 0) + all_skipped=false + [ $exitcode=$ksft_skip ] && exitcode=0 + ;; + $ksft_skip) + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac return $ret } @@ -1667,7 +1679,7 @@ run_test_nh() { } test_list_flush_ipv4_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1721,7 +1733,7 @@ test_list_flush_ipv4_exception() { } test_list_flush_ipv6_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1786,6 +1798,7 @@ usage() { # exitcode=0 desc=0 +all_skipped=true while getopts :ptv o do @@ -1840,7 +1853,7 @@ for t in ${tests}; do if [ $run_this -eq 1 ]; then run_test "${name}" "${desc}" # if test was skipped no need to retry with nexthop objects - [ $? -eq 2 ] && rerun_nh=0 + [ $? -eq $ksft_skip ] && rerun_nh=0 if [ "${rerun_nh}" = "1" ]; then run_test_nh "${name}" "${desc}" diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 2c522f7a0aec..db4521335722 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -56,12 +56,15 @@ #define RING_NUM_FRAMES 20 +static uint32_t cfg_max_num_members; + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { struct sockaddr_ll addr = {0}; - int fd, val; + struct fanout_args args; + int fd, val, err; fd = socket(PF_PACKET, SOCK_RAW, 0); if (fd < 0) { @@ -83,8 +86,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) exit(1); } - val = (((int) typeflags) << 16) | group_id; - if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (cfg_max_num_members) { + args.id = group_id; + args.type_flags = typeflags; + args.max_num_members = cfg_max_num_members; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, + sizeof(args)); + } else { + val = (((int) typeflags) << 16) | group_id; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, + sizeof(val)); + } + if (err) { if (close(fd)) { perror("close packet"); exit(1); @@ -286,6 +299,56 @@ static void test_control_group(void) } } +/* Test illegal max_num_members values */ +static void test_control_group_max_num_members(void) +{ + int fds[3]; + + fprintf(stderr, "test: control multiple sockets, max_num_members\n"); + + /* expected failure on greater than PACKET_FANOUT_MAX */ + cfg_max_num_members = (1 << 16) + 1; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n"); + exit(1); + } + + cfg_max_num_members = 256; + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + + /* expected failure on joining group with different max_num_members */ + cfg_max_num_members = 257; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: set different max_num_members\n"); + exit(1); + } + + /* success on joining group with same max_num_members */ + cfg_max_num_members = 256; + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + /* success on joining group with max_num_members unspecified */ + cfg_max_num_members = 0; + fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[2] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + if (close(fds[2]) || close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + /* Test creating a unique fanout group ids */ static void test_unique_fanout_group_ids(void) { @@ -426,8 +489,11 @@ int main(int argc, char **argv) test_control_single(); test_control_group(); + test_control_group_max_num_members(); test_unique_fanout_group_ids(); + /* PACKET_FANOUT_MAX */ + cfg_max_num_members = 1 << 16; /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, expect_hash[0], expect_hash[1]); diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index bb11de90c0c9..f6f2965e17af 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,3 +4,4 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 7758c98be015..0930e2411dfb 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -204,7 +204,10 @@ TEST_F(child, fetch_fd) fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); ASSERT_GE(fd, 0); - EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd)); + ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd); + if (ret < 0 && errno == ENOSYS) + SKIP(return, "kcmp() syscall not supported"); + EXPECT_EQ(ret, 0); ret = fcntl(fd, F_GETFD); ASSERT_GE(ret, 0); diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index b9fe75fc3e51..8a59438ccc78 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -6,7 +6,6 @@ #include <inttypes.h> #include <limits.h> #include <linux/types.h> -#include <linux/wait.h> #include <sched.h> #include <signal.h> #include <stdbool.h> diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 4b115444dfe9..610811275357 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include <errno.h> #include <linux/types.h> -#include <linux/wait.h> #include <poll.h> #include <signal.h> #include <stdbool.h> diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 1f085b922c6e..6e2f2cd400ca 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,7 +16,6 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> -#include <linux/kcmp.h> #include "pidfd.h" #include "../clone3/clone3_selftests.h" diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c585aaa2acd8..529eb700ac26 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void) ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", test_name, PID_RECYCLE); case PIDFD_SKIP: - ksft_print_msg("%s test: Skipping test\n", test_name); + ksft_test_result_skip("%s test: Skipping test\n", test_name); ret = 0; break; case PIDFD_XFAIL: diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa28077..fb4fe9188806 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include <errno.h> #include <sched.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c0245..8511dcfe67c7 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include <assert.h> #include <unistd.h> diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index bb543bf69d69..361235ad574b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -100,7 +100,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower.*handle", "matchCount": "1", "teardown": [ @@ -119,7 +119,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": " dst_mac e4:11:22:11:4a:51", "matchCount": "0", "teardown": [ diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index d77f4829f1e0..74c69b75f6f5 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -316,6 +316,14 @@ pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 n1 wg set wg0 peer "$pub2" persistent-keepalive 0 +# Test that sk_bound_dev_if works +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 +# What about when the mark changes and the packet must be rerouted? +n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 +n1 ping -c 1 -W 1 192.168.241.2 # First the boring case +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case +n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 + # Test that onion routing works, even when it loops n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 ip1 addr add 192.168.242.1/24 dev wg0 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index d531de13c95b..4eecb432a66c 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -18,10 +18,12 @@ CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MARK=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_NAT=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y |