summaryrefslogtreecommitdiff
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c7
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh13
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh2
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile5
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c479
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c30
-rw-r--r--tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c2
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_init.c369
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/arch_timer.h142
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/delay.h25
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic.h21
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h90
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/spinlock.h13
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h20
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h3
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c95
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h21
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c240
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.h70
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c24
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/spinlock.c27
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c70
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c22
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c56
-rw-r--r--tools/testing/selftests/kvm/steal_time.c16
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c15
-rw-r--r--tools/testing/selftests/lib.mk1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c5
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat_zones.sh309
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh156
35 files changed, 2147 insertions, 215 deletions
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 6836510a522f..22722abc9dfa 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -266,16 +266,19 @@ int test_init(struct tdescr *td)
td->feats_supported |= FEAT_SSBS;
if (getauxval(AT_HWCAP) & HWCAP_SVE)
td->feats_supported |= FEAT_SVE;
- if (feats_ok(td))
+ if (feats_ok(td)) {
fprintf(stderr,
"Required Features: [%s] supported\n",
feats_to_string(td->feats_required &
td->feats_supported));
- else
+ } else {
fprintf(stderr,
"Required Features: [%s] NOT supported\n",
feats_to_string(td->feats_required &
~td->feats_supported));
+ td->result = KSFT_SKIP;
+ return 0;
+ }
}
/* Perform test specific additional initialization */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 866531c08e4f..799b88152e9e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -375,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) \
+ | $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 59ea56945e6c..b497bb85b667 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -112,6 +112,14 @@ setup()
ip netns add "${NS2}"
ip netns add "${NS3}"
+ # rp_filter gets confused by what these tests are doing, so disable it
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
@@ -236,11 +244,6 @@ setup()
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
sleep 1 # reduce flakiness
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..f7d84549cc3e 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
WAIT_TIME=1
NUM_NETIFS=4
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index fdd13fbdcc8f..d4a830139683 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
+/aarch64/arch_timer
/aarch64/debug-exceptions
/aarch64/get-reg-list
/aarch64/psci_cpu_on_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index fd20f271aac0..c23e89dea0b6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -35,7 +35,7 @@ endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
@@ -88,6 +88,7 @@ TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
TEST_GEN_PROGS_x86_64 += system_counter_offset_test
+TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
@@ -97,6 +98,8 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += kvm_page_table_test
+TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
+TEST_GEN_PROGS_aarch64 += memslot_perf_test
TEST_GEN_PROGS_aarch64 += rseq_test
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
new file mode 100644
index 000000000000..bf6a45b0b8dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the aarch64 timer IRQ functionality
+ *
+ * The test validates both the virtual and physical timer IRQs using
+ * CVAL and TVAL registers. This consitutes the four stages in the test.
+ * The guest's main thread configures the timer interrupt for a stage
+ * and waits for it to fire, with a timeout equal to the timer period.
+ * It asserts that the timeout doesn't exceed the timer period.
+ *
+ * On the other hand, upon receipt of an interrupt, the guest's interrupt
+ * handler validates the interrupt by checking if the architectural state
+ * is in compliance with the specifications.
+ *
+ * The test provides command-line options to configure the timer's
+ * period (-p), number of vCPUs (-n), and iterations per stage (-i).
+ * To stress-test the timer stack even more, an option to migrate the
+ * vCPUs across pCPUs (-m), at a particular rate, is also provided.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+#define NR_VCPUS_DEF 4
+#define NR_TEST_ITERS_DEF 5
+#define TIMER_TEST_PERIOD_MS_DEF 10
+#define TIMER_TEST_ERR_MARGIN_US 100
+#define TIMER_TEST_MIGRATION_FREQ_MS 2
+
+struct test_args {
+ int nr_vcpus;
+ int nr_iter;
+ int timer_period_ms;
+ int migration_freq_ms;
+};
+
+static struct test_args test_args = {
+ .nr_vcpus = NR_VCPUS_DEF,
+ .nr_iter = NR_TEST_ITERS_DEF,
+ .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
+ .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+};
+
+#define msecs_to_usecs(msec) ((msec) * 1000LL)
+
+#define GICD_BASE_GPA 0x8000000ULL
+#define GICR_BASE_GPA 0x80A0000ULL
+
+enum guest_stage {
+ GUEST_STAGE_VTIMER_CVAL = 1,
+ GUEST_STAGE_VTIMER_TVAL,
+ GUEST_STAGE_PTIMER_CVAL,
+ GUEST_STAGE_PTIMER_TVAL,
+ GUEST_STAGE_MAX,
+};
+
+/* Shared variables between host and guest */
+struct test_vcpu_shared_data {
+ int nr_iter;
+ enum guest_stage guest_stage;
+ uint64_t xcnt;
+};
+
+struct test_vcpu {
+ uint32_t vcpuid;
+ pthread_t pt_vcpu_run;
+ struct kvm_vm *vm;
+};
+
+static struct test_vcpu test_vcpu[KVM_MAX_VCPUS];
+static struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
+
+static int vtimer_irq, ptimer_irq;
+
+static unsigned long *vcpu_done_map;
+static pthread_mutex_t vcpu_done_map_lock;
+
+static void
+guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
+{
+ switch (shared_data->guest_stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_VTIMER_TVAL:
+ timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_TVAL:
+ timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ default:
+ GUEST_ASSERT(0);
+ }
+}
+
+static void guest_validate_irq(unsigned int intid,
+ struct test_vcpu_shared_data *shared_data)
+{
+ enum guest_stage stage = shared_data->guest_stage;
+ uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+ unsigned long xctl = 0;
+ unsigned int timer_irq = 0;
+
+ if (stage == GUEST_STAGE_VTIMER_CVAL ||
+ stage == GUEST_STAGE_VTIMER_TVAL) {
+ xctl = timer_get_ctl(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ xcnt = timer_get_cntct(VIRTUAL);
+ cval = timer_get_cval(VIRTUAL);
+ timer_irq = vtimer_irq;
+ } else if (stage == GUEST_STAGE_PTIMER_CVAL ||
+ stage == GUEST_STAGE_PTIMER_TVAL) {
+ xctl = timer_get_ctl(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+ xcnt = timer_get_cntct(PHYSICAL);
+ cval = timer_get_cval(PHYSICAL);
+ timer_irq = ptimer_irq;
+ } else {
+ GUEST_ASSERT(0);
+ }
+
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq);
+
+ /* Basic 'timer condition met' check */
+ GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
+ GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ guest_validate_irq(intid, shared_data);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+
+ gic_set_eoi(intid);
+}
+
+static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
+ enum guest_stage stage)
+{
+ uint32_t irq_iter, config_iter;
+
+ shared_data->guest_stage = stage;
+ shared_data->nr_iter = 0;
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup the next interrupt */
+ guest_configure_timer_action(shared_data);
+
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(msecs_to_usecs(test_args.timer_period_ms) +
+ TIMER_TEST_ERR_MARGIN_US);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ GUEST_ASSERT_2(config_iter + 1 == irq_iter,
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, test_args.nr_vcpus,
+ (void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
+
+ GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+ struct ucall uc;
+ struct test_vcpu *vcpu = arg;
+ struct kvm_vm *vm = vcpu->vm;
+ uint32_t vcpuid = vcpu->vcpuid;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpuid];
+
+ vcpu_run(vm, vcpuid);
+
+ /* Currently, any exit from guest is an indication of completion */
+ pthread_mutex_lock(&vcpu_done_map_lock);
+ set_bit(vcpuid, vcpu_done_map);
+ pthread_mutex_unlock(&vcpu_done_map_lock);
+
+ switch (get_ucall(vm, vcpuid, &uc)) {
+ case UCALL_SYNC:
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ sync_global_from_guest(vm, *shared_data);
+ TEST_FAIL("%s at %s:%ld\n\tvalues: %lu, %lu; %lu, vcpu: %u; stage: %u; iter: %u",
+ (const char *)uc.args[0], __FILE__, uc.args[1],
+ uc.args[2], uc.args[3], uc.args[4], vcpuid,
+ shared_data->guest_stage, shared_data->nr_iter);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit\n");
+ }
+
+ return NULL;
+}
+
+static uint32_t test_get_pcpu(void)
+{
+ uint32_t pcpu;
+ unsigned int nproc_conf;
+ cpu_set_t online_cpuset;
+
+ nproc_conf = get_nprocs_conf();
+ sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
+
+ /* Randomly find an available pCPU to place a vCPU on */
+ do {
+ pcpu = rand() % nproc_conf;
+ } while (!CPU_ISSET(pcpu, &online_cpuset));
+
+ return pcpu;
+}
+
+static int test_migrate_vcpu(struct test_vcpu *vcpu)
+{
+ int ret;
+ cpu_set_t cpuset;
+ uint32_t new_pcpu = test_get_pcpu();
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(new_pcpu, &cpuset);
+
+ pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu->vcpuid, new_pcpu);
+
+ ret = pthread_setaffinity_np(vcpu->pt_vcpu_run,
+ sizeof(cpuset), &cpuset);
+
+ /* Allow the error where the vCPU thread is already finished */
+ TEST_ASSERT(ret == 0 || ret == ESRCH,
+ "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n",
+ vcpu->vcpuid, new_pcpu, ret);
+
+ return ret;
+}
+
+static void *test_vcpu_migration(void *arg)
+{
+ unsigned int i, n_done;
+ bool vcpu_done;
+
+ do {
+ usleep(msecs_to_usecs(test_args.migration_freq_ms));
+
+ for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
+ pthread_mutex_lock(&vcpu_done_map_lock);
+ vcpu_done = test_bit(i, vcpu_done_map);
+ pthread_mutex_unlock(&vcpu_done_map_lock);
+
+ if (vcpu_done) {
+ n_done++;
+ continue;
+ }
+
+ test_migrate_vcpu(&test_vcpu[i]);
+ }
+ } while (test_args.nr_vcpus != n_done);
+
+ return NULL;
+}
+
+static void test_run(struct kvm_vm *vm)
+{
+ int i, ret;
+ pthread_t pt_vcpu_migration;
+
+ pthread_mutex_init(&vcpu_done_map_lock, NULL);
+ vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
+ TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n");
+
+ for (i = 0; i < test_args.nr_vcpus; i++) {
+ ret = pthread_create(&test_vcpu[i].pt_vcpu_run, NULL,
+ test_vcpu_run, &test_vcpu[i]);
+ TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i);
+ }
+
+ /* Spawn a thread to control the vCPU migrations */
+ if (test_args.migration_freq_ms) {
+ srand(time(NULL));
+
+ ret = pthread_create(&pt_vcpu_migration, NULL,
+ test_vcpu_migration, NULL);
+ TEST_ASSERT(!ret, "Failed to create the migration pthread\n");
+ }
+
+
+ for (i = 0; i < test_args.nr_vcpus; i++)
+ pthread_join(test_vcpu[i].pt_vcpu_run, NULL);
+
+ if (test_args.migration_freq_ms)
+ pthread_join(pt_vcpu_migration, NULL);
+
+ bitmap_free(vcpu_done_map);
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm)
+{
+ /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
+ int vcpu0_fd = vcpu_get_fd(vm, 0);
+
+ kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq, false);
+ kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq, false);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ unsigned int i;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
+
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ vcpu_init_descriptor_tables(vm, i);
+
+ test_vcpu[i].vcpuid = i;
+ test_vcpu[i].vm = vm;
+ }
+
+ ucall_init(vm, NULL);
+ test_init_timer_irq(vm);
+ vgic_v3_setup(vm, nr_vcpus, GICD_BASE_GPA, GICR_BASE_GPA);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n",
+ name);
+ pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
+ NR_VCPUS_DEF, KVM_MAX_VCPUS);
+ pr_info("\t-i: Number of iterations per stage (default: %u)\n",
+ NR_TEST_ITERS_DEF);
+ pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
+ TIMER_TEST_PERIOD_MS_DEF);
+ pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
+ TIMER_TEST_MIGRATION_FREQ_MS);
+ pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) {
+ switch (opt) {
+ case 'n':
+ test_args.nr_vcpus = atoi(optarg);
+ if (test_args.nr_vcpus <= 0) {
+ pr_info("Positive value needed for -n\n");
+ goto err;
+ } else if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
+ pr_info("Max allowed vCPUs: %u\n",
+ KVM_MAX_VCPUS);
+ goto err;
+ }
+ break;
+ case 'i':
+ test_args.nr_iter = atoi(optarg);
+ if (test_args.nr_iter <= 0) {
+ pr_info("Positive value needed for -i\n");
+ goto err;
+ }
+ break;
+ case 'p':
+ test_args.timer_period_ms = atoi(optarg);
+ if (test_args.timer_period_ms <= 0) {
+ pr_info("Positive value needed for -p\n");
+ goto err;
+ }
+ break;
+ case 'm':
+ test_args.migration_freq_ms = atoi(optarg);
+ if (test_args.migration_freq_ms < 0) {
+ pr_info("0 or positive value needed for -m\n");
+ goto err;
+ }
+ break;
+ case 'h':
+ default:
+ goto err;
+ }
+ }
+
+ return true;
+
+err:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (test_args.migration_freq_ms && get_nprocs() < 2) {
+ print_skip("At least two physical CPUs needed for vCPU migration");
+ exit(KSFT_SKIP);
+ }
+
+ vm = test_vm_create();
+ test_run(vm);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index e5e6c92b60da..ea189d83abf7 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -34,16 +34,16 @@ static void reset_debug_state(void)
{
asm volatile("msr daifset, #8");
- write_sysreg(osdlr_el1, 0);
- write_sysreg(oslar_el1, 0);
+ write_sysreg(0, osdlr_el1);
+ write_sysreg(0, oslar_el1);
isb();
- write_sysreg(mdscr_el1, 0);
+ write_sysreg(0, mdscr_el1);
/* This test only uses the first bp and wp slot. */
- write_sysreg(dbgbvr0_el1, 0);
- write_sysreg(dbgbcr0_el1, 0);
- write_sysreg(dbgwcr0_el1, 0);
- write_sysreg(dbgwvr0_el1, 0);
+ write_sysreg(0, dbgbvr0_el1);
+ write_sysreg(0, dbgbcr0_el1);
+ write_sysreg(0, dbgwcr0_el1);
+ write_sysreg(0, dbgwvr0_el1);
isb();
}
@@ -53,14 +53,14 @@ static void install_wp(uint64_t addr)
uint32_t mdscr;
wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
- write_sysreg(dbgwcr0_el1, wcr);
- write_sysreg(dbgwvr0_el1, addr);
+ write_sysreg(wcr, dbgwcr0_el1);
+ write_sysreg(addr, dbgwvr0_el1);
isb();
asm volatile("msr daifclr, #8");
mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
- write_sysreg(mdscr_el1, mdscr);
+ write_sysreg(mdscr, mdscr_el1);
isb();
}
@@ -70,14 +70,14 @@ static void install_hw_bp(uint64_t addr)
uint32_t mdscr;
bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
- write_sysreg(dbgbcr0_el1, bcr);
- write_sysreg(dbgbvr0_el1, addr);
+ write_sysreg(bcr, dbgbcr0_el1);
+ write_sysreg(addr, dbgbvr0_el1);
isb();
asm volatile("msr daifclr, #8");
mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
- write_sysreg(mdscr_el1, mdscr);
+ write_sysreg(mdscr, mdscr_el1);
isb();
}
@@ -88,7 +88,7 @@ static void install_ss(void)
asm volatile("msr daifclr, #8");
mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
- write_sysreg(mdscr_el1, mdscr);
+ write_sysreg(mdscr, mdscr_el1);
isb();
}
@@ -190,7 +190,7 @@ static int debug_version(struct kvm_vm *vm)
{
uint64_t id_aa64dfr0;
- get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0);
+ get_reg(vm, VCPU_ID, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0);
return id_aa64dfr0 & 0xf;
}
diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
index 018c269990e1..4c5f6814030f 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
@@ -91,7 +91,7 @@ int main(void)
init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
- get_reg(vm, VCPU_ID_TARGET, ARM64_SYS_REG(MPIDR_EL1), &target_mpidr);
+ get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
vcpu_run(vm, VCPU_ID_SOURCE);
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
index 623f31a14326..34379c98d2f4 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c
@@ -13,25 +13,28 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "vgic.h"
#define NR_VCPUS 4
-#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) (((uint64_t)(count) << 52) | \
- ((uint64_t)((base) >> 16) << 16) | ((uint64_t)(flags) << 12) | index)
#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
#define GICR_TYPER 0x8
+#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
+#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
+
struct vm_gic {
struct kvm_vm *vm;
int gic_fd;
+ uint32_t gic_dev_type;
};
-static int max_ipa_bits;
+static uint64_t max_phys_size;
/* helper to access a redistributor register */
-static int access_redist_reg(int gicv3_fd, int vcpu, int offset,
- uint32_t *val, bool write)
+static int access_v3_redist_reg(int gicv3_fd, int vcpu, int offset,
+ uint32_t *val, bool write)
{
uint64_t attr = REG_OFFSET(vcpu, offset);
@@ -58,12 +61,13 @@ static int run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
return 0;
}
-static struct vm_gic vm_gic_create(void)
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, uint32_t nr_vcpus)
{
struct vm_gic v;
- v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL);
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ v.gic_dev_type = gic_dev_type;
+ v.vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false);
return v;
}
@@ -74,78 +78,129 @@ static void vm_gic_destroy(struct vm_gic *v)
kvm_vm_free(v->vm);
}
+struct vgic_region_attr {
+ uint64_t attr;
+ uint64_t size;
+ uint64_t alignment;
+};
+
+struct vgic_region_attr gic_v3_dist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
+ .size = 0x10000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v3_redist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+ .size = NR_VCPUS * 0x20000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v2_dist_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
+ .size = 0x1000,
+ .alignment = 0x1000,
+};
+
+struct vgic_region_attr gic_v2_cpu_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
+ .size = 0x2000,
+ .alignment = 0x1000,
+};
+
/**
- * Helper routine that performs KVM device tests in general and
- * especially ARM_VGIC_V3 ones. Eventually the ARM_VGIC_V3
- * device gets created, a legacy RDIST region is set at @0x0
- * and a DIST region is set @0x60000
+ * Helper routine that performs KVM device tests in general. Eventually the
+ * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
+ * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
+ * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
+ * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
+ * DIST region @0x1000.
*/
static void subtest_dist_rdist(struct vm_gic *v)
{
int ret;
uint64_t addr;
+ struct vgic_region_attr rdist; /* CPU interface in GICv2*/
+ struct vgic_region_attr dist;
+
+ rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
+ : gic_v2_cpu_region;
+ dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
+ : gic_v2_dist_region;
/* Check existing group/attributes */
kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST);
+ dist.attr);
kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ rdist.attr);
/* check non existing attribute */
- ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, 0);
+ ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
/* misaligned DIST and REDIST address settings */
- addr = 0x1000;
+ addr = dist.alignment / 0x10;
ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
- TEST_ASSERT(ret && errno == EINVAL, "GICv3 dist base not 64kB aligned");
+ dist.attr, &addr, true);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
+ addr = rdist.alignment / 0x10;
ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
- TEST_ASSERT(ret && errno == EINVAL, "GICv3 redist base not 64kB aligned");
+ rdist.attr, &addr, true);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
/* out of range address */
- if (max_ipa_bits) {
- addr = 1ULL << max_ipa_bits;
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
- TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
+ addr = max_phys_size;
+ ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr, true);
+ TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
- TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
- }
+ ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr, true);
+ TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
+
+ /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
+ addr = max_phys_size - dist.alignment;
+ ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr, true);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "half of the redist is beyond IPA limit");
/* set REDIST base address @0x0*/
addr = 0x00000;
kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
+ rdist.attr, &addr, true);
/* Attempt to create a second legacy redistributor region */
addr = 0xE0000;
ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
- TEST_ASSERT(ret && errno == EEXIST, "GICv3 redist base set again");
+ rdist.attr, &addr, true);
+ TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
- /* Attempt to mix legacy and new redistributor regions */
- addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
- ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
- TEST_ASSERT(ret && errno == EINVAL, "attempt to mix GICv3 REDIST and REDIST_REGION");
+ ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ if (!ret) {
+ /* Attempt to mix legacy and new redistributor regions */
+ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
+ ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION,
+ &addr, true);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "attempt to mix GICv3 REDIST and REDIST_REGION");
+ }
/*
* Set overlapping DIST / REDIST, cannot be detected here. Will be detected
* on first vcpu run instead.
*/
- addr = 3 * 2 * 0x10000;
- kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST,
- &addr, true);
+ addr = rdist.size - rdist.alignment;
+ kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr, true);
}
/* Test the new REDIST region API */
-static void subtest_redist_regions(struct vm_gic *v)
+static void subtest_v3_redist_regions(struct vm_gic *v)
{
uint64_t addr, expected_addr;
int ret;
@@ -199,12 +254,19 @@ static void subtest_redist_regions(struct vm_gic *v)
kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
- addr = REDIST_REGION_ATTR_ADDR(1, 1ULL << max_ipa_bits, 0, 2);
+ addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
TEST_ASSERT(ret && errno == E2BIG,
"register redist region with base address beyond IPA range");
+ /* The last redist is above the pa range. */
+ addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
+ ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register redist region with top address beyond IPA range");
+
addr = 0x260000;
ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
@@ -249,13 +311,12 @@ static void subtest_redist_regions(struct vm_gic *v)
* VGIC KVM device is created and initialized before the secondary CPUs
* get created
*/
-static void test_vgic_then_vcpus(void)
+static void test_vgic_then_vcpus(uint32_t gic_dev_type)
{
struct vm_gic v;
int ret, i;
- v.vm = vm_create_default(0, 0, guest_code);
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ v = vm_gic_create_with_vcpus(gic_dev_type, 1);
subtest_dist_rdist(&v);
@@ -270,12 +331,12 @@ static void test_vgic_then_vcpus(void)
}
/* All the VCPUs are created before the VGIC KVM device gets initialized */
-static void test_vcpus_then_vgic(void)
+static void test_vcpus_then_vgic(uint32_t gic_dev_type)
{
struct vm_gic v;
int ret;
- v = vm_gic_create();
+ v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS);
subtest_dist_rdist(&v);
@@ -285,15 +346,15 @@ static void test_vcpus_then_vgic(void)
vm_gic_destroy(&v);
}
-static void test_new_redist_regions(void)
+static void test_v3_new_redist_regions(void)
{
void *dummy = NULL;
struct vm_gic v;
uint64_t addr;
int ret;
- v = vm_gic_create();
- subtest_redist_regions(&v);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+ subtest_v3_redist_regions(&v);
kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
@@ -303,8 +364,8 @@ static void test_new_redist_regions(void)
/* step2 */
- v = vm_gic_create();
- subtest_redist_regions(&v);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+ subtest_v3_redist_regions(&v);
addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
@@ -317,8 +378,8 @@ static void test_new_redist_regions(void)
/* step 3 */
- v = vm_gic_create();
- subtest_redist_regions(&v);
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+ subtest_v3_redist_regions(&v);
_kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy, true);
@@ -338,7 +399,7 @@ static void test_new_redist_regions(void)
vm_gic_destroy(&v);
}
-static void test_typer_accesses(void)
+static void test_v3_typer_accesses(void)
{
struct vm_gic v;
uint64_t addr;
@@ -351,12 +412,12 @@ static void test_typer_accesses(void)
vm_vcpu_add_default(v.vm, 3, guest_code);
- ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
TEST_ASSERT(ret && errno == EINVAL, "attempting to read GICR_TYPER of non created vcpu");
vm_vcpu_add_default(v.vm, 1, guest_code);
- ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
TEST_ASSERT(ret && errno == EBUSY, "read GICR_TYPER before GIC initialized");
vm_vcpu_add_default(v.vm, 2, guest_code);
@@ -365,7 +426,7 @@ static void test_typer_accesses(void)
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
for (i = 0; i < NR_VCPUS ; i++) {
- ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && !val, "read GICR_TYPER before rdist region setting");
}
@@ -374,10 +435,10 @@ static void test_typer_accesses(void)
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
/* The 2 first rdists should be put there (vcpu 0 and 3) */
- ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && !val, "read typer of rdist #0");
- ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #1");
addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
@@ -385,11 +446,11 @@ static void test_typer_accesses(void)
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
- ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x100,
"no redist region attached to vcpu #1 yet, last cannot be returned");
- ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x200,
"no redist region attached to vcpu #2, last cannot be returned");
@@ -397,10 +458,10 @@ static void test_typer_accesses(void)
kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
- ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1");
- ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x210,
"read typer of rdist #1, last properly returned");
@@ -417,7 +478,7 @@ static void test_typer_accesses(void)
* rdist region #2 @0x200000 2 rdist capacity
* rdists: 1, 2
*/
-static void test_last_bit_redist_regions(void)
+static void test_v3_last_bit_redist_regions(void)
{
uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
struct vm_gic v;
@@ -444,29 +505,29 @@ static void test_last_bit_redist_regions(void)
kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
- ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0");
- ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1");
- ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x200, "read typer of rdist #2");
- ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #3");
- ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #5");
- ret = access_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x410, "read typer of rdist #4");
vm_gic_destroy(&v);
}
/* Test last bit with legacy region */
-static void test_last_bit_single_rdist(void)
+static void test_v3_last_bit_single_rdist(void)
{
uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
struct vm_gic v;
@@ -485,28 +546,106 @@ static void test_last_bit_single_rdist(void)
kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
- ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0");
- ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x300, "read typer of rdist #1");
- ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #2");
- ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #3");
- ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+ ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #3");
vm_gic_destroy(&v);
}
-void test_kvm_device(void)
+/* Uses the legacy REDIST region API. */
+static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
+{
+ struct vm_gic v;
+ int ret, i;
+ uint64_t addr;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1);
+
+ /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
+ addr = max_phys_size - (3 * 2 * 0x10000);
+ kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
+
+ addr = 0x00000;
+ kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vm_vcpu_add_default(v.vm, i, guest_code);
+
+ kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
+ /* Attempt to run a vcpu without enough redist space. */
+ ret = run_vcpu(v.vm, 2);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "redist base+size above PA range detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_its_region(void)
+{
+ struct vm_gic v;
+ uint64_t addr;
+ int its_fd, ret;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+ its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS, false);
+
+ addr = 0x401000;
+ ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "ITS region with misaligned address");
+
+ addr = max_phys_size;
+ ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register ITS region with base address beyond IPA range");
+
+ addr = max_phys_size - 0x10000;
+ ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "Half of ITS region is beyond IPA range");
+
+ /* This one succeeds setting the ITS base */
+ addr = 0x400000;
+ kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+
+ addr = 0x300000;
+ ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+ TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
+
+ close(its_fd);
+ vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
{
struct vm_gic v;
int ret, fd;
+ uint32_t other;
v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL);
@@ -514,38 +653,70 @@ void test_kvm_device(void)
ret = _kvm_create_device(v.vm, 0, true, &fd);
TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
- /* trial mode with VGIC_V3 device */
- ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true, &fd);
- if (ret) {
- print_skip("GICv3 not supported");
- exit(KSFT_SKIP);
- }
- v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ /* trial mode */
+ ret = _kvm_create_device(v.vm, gic_dev_type, true, &fd);
+ if (ret)
+ return ret;
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false);
+
+ ret = _kvm_create_device(v.vm, gic_dev_type, false, &fd);
+ TEST_ASSERT(ret && errno == EEXIST, "create GIC device twice");
- ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false, &fd);
- TEST_ASSERT(ret && errno == EEXIST, "create GICv3 device twice");
+ kvm_create_device(v.vm, gic_dev_type, true);
- kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true);
+ /* try to create the other gic_dev_type */
+ other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
+ : KVM_DEV_TYPE_ARM_VGIC_V2;
- if (!_kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, true, &fd)) {
- ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, false, &fd);
- TEST_ASSERT(ret && errno == EINVAL, "create GICv2 while v3 exists");
+ if (!_kvm_create_device(v.vm, other, true, &fd)) {
+ ret = _kvm_create_device(v.vm, other, false, &fd);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "create GIC device while other version exists");
}
vm_gic_destroy(&v);
+
+ return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+ test_vcpus_then_vgic(gic_dev_type);
+ test_vgic_then_vcpus(gic_dev_type);
+
+ if (VGIC_DEV_IS_V3(gic_dev_type)) {
+ test_v3_new_redist_regions();
+ test_v3_typer_accesses();
+ test_v3_last_bit_redist_regions();
+ test_v3_last_bit_single_rdist();
+ test_v3_redist_ipa_range_check_at_vcpu_run();
+ test_v3_its_region();
+ }
}
int main(int ac, char **av)
{
- max_ipa_bits = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+ int ret;
+ int pa_bits;
+
+ pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+ max_phys_size = 1ULL << pa_bits;
- test_kvm_device();
- test_vcpus_then_vgic();
- test_vgic_then_vcpus();
- test_new_redist_regions();
- test_typer_accesses();
- test_last_bit_redist_regions();
- test_last_bit_single_rdist();
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (!ret) {
+ pr_info("Running GIC_v3 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
+ return 0;
+ }
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+ if (!ret) {
+ pr_info("Running GIC_v2 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
+ return 0;
+ }
+ print_skip("No GICv2 nor GICv3 support");
+ exit(KSFT_SKIP);
return 0;
}
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
new file mode 100644
index 000000000000..cb7c03de3a21
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Timer specific interface
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+enum arch_timer {
+ VIRTUAL,
+ PHYSICAL,
+};
+
+#define CTL_ENABLE (1 << 0)
+#define CTL_IMASK (1 << 1)
+#define CTL_ISTATUS (1 << 2)
+
+#define msec_to_cycles(msec) \
+ (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec) \
+ (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+ ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+
+static inline uint32_t timer_get_cntfrq(void)
+{
+ return read_sysreg(cntfrq_el0);
+}
+
+static inline uint64_t timer_get_cntct(enum arch_timer timer)
+{
+ isb();
+
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntvct_el0);
+ case PHYSICAL:
+ return read_sysreg(cntpct_el0);
+ default:
+ GUEST_ASSERT_1(0, timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(cval, cntv_cval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(cval, cntp_cval_el0);
+ break;
+ default:
+ GUEST_ASSERT_1(0, timer);
+ }
+
+ isb();
+}
+
+static inline uint64_t timer_get_cval(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_cval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_cval_el0);
+ default:
+ GUEST_ASSERT_1(0, timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(tval, cntv_tval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(tval, cntp_tval_el0);
+ break;
+ default:
+ GUEST_ASSERT_1(0, timer);
+ }
+
+ isb();
+}
+
+static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(ctl, cntv_ctl_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(ctl, cntp_ctl_el0);
+ break;
+ default:
+ GUEST_ASSERT_1(0, timer);
+ }
+
+ isb();
+}
+
+static inline uint32_t timer_get_ctl(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_ctl_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_ctl_el0);
+ default:
+ GUEST_ASSERT_1(0, timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+{
+ uint64_t now_ct = timer_get_cntct(timer);
+ uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+ timer_set_cval(timer, next_ct);
+}
+
+static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+{
+ timer_set_tval(timer, msec_to_cycles(msec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/aarch64/delay.h
new file mode 100644
index 000000000000..329e4f5079ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/delay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM simple delay routines
+ */
+
+#ifndef SELFTEST_KVM_ARM_DELAY_H
+#define SELFTEST_KVM_ARM_DELAY_H
+
+#include "arch_timer.h"
+
+static inline void __delay(uint64_t cycles)
+{
+ enum arch_timer timer = VIRTUAL;
+ uint64_t start = timer_get_cntct(timer);
+
+ while ((timer_get_cntct(timer) - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
new file mode 100644
index 000000000000..85dd1e53048e
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) specific defines
+ */
+
+#ifndef SELFTEST_KVM_GIC_H
+#define SELFTEST_KVM_GIC_H
+
+enum gic_type {
+ GIC_V3,
+ GIC_TYPE_MAX,
+};
+
+void gic_init(enum gic_type type, unsigned int nr_cpus,
+ void *dist_base, void *redist_base);
+void gic_irq_enable(unsigned int intid);
+void gic_irq_disable(unsigned int intid);
+unsigned int gic_get_and_ack_irq(void);
+void gic_set_eoi(unsigned int intid);
+
+#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index c0273aefa63d..27d8e1bb5b36 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -9,20 +9,24 @@
#include "kvm_util.h"
#include <linux/stringify.h>
+#include <linux/types.h>
+#include <asm/sysreg.h>
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-#define CPACR_EL1 3, 0, 1, 0, 2
-#define TCR_EL1 3, 0, 2, 0, 2
-#define MAIR_EL1 3, 0, 10, 2, 0
-#define MPIDR_EL1 3, 0, 0, 0, 5
-#define TTBR0_EL1 3, 0, 2, 0, 0
-#define SCTLR_EL1 3, 0, 1, 0, 0
-#define VBAR_EL1 3, 0, 12, 0, 0
-
-#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0
+/*
+ * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
+ * SYS_* register definitions in asm/sysreg.h to use in KVM
+ * calls such as get_reg() and set_reg().
+ */
+#define KVM_ARM64_SYS_REG(sys_reg_id) \
+ ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \
+ sys_reg_Op1(sys_reg_id), \
+ sys_reg_CRn(sys_reg_id), \
+ sys_reg_CRm(sys_reg_id), \
+ sys_reg_Op2(sys_reg_id))
/*
* Default MAIR
@@ -59,7 +63,7 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint
vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
}
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init);
+void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init);
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_init *init, void *guest_code);
@@ -118,18 +122,64 @@ void vm_install_exception_handler(struct kvm_vm *vm,
void vm_install_sync_handler(struct kvm_vm *vm,
int vector, int ec, handler_fn handler);
-#define write_sysreg(reg, val) \
-({ \
- u64 __val = (u64)(val); \
- asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \
-})
+static inline void cpu_relax(void)
+{
+ asm volatile("yield" ::: "memory");
+}
-#define read_sysreg(reg) \
-({ u64 val; \
- asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
- val; \
+#define isb() asm volatile("isb" : : : "memory")
+#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+
+#define dma_wmb() dmb(oshst)
+#define __iowmb() dma_wmb()
+
+#define dma_rmb() dmb(oshld)
+
+#define __iormb(v) \
+({ \
+ unsigned long tmp; \
+ \
+ dma_rmb(); \
+ \
+ /* \
+ * Courtesy of arch/arm64/include/asm/io.h: \
+ * Create a dummy control dependency from the IO read to any \
+ * later instructions. This ensures that a subsequent call \
+ * to udelay() will be ordered due to the ISB in __delay(). \
+ */ \
+ asm volatile("eor %0, %1, %1\n" \
+ "cbnz %0, ." \
+ : "=r" (tmp) : "r" ((unsigned long)(v)) \
+ : "memory"); \
})
-#define isb() asm volatile("isb" : : : "memory")
+static __always_inline void __raw_writel(u32 val, volatile void *addr)
+{
+ asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u32 __raw_readl(const volatile void *addr)
+{
+ u32 val;
+ asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+
+#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));})
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+
+static inline void local_irq_enable(void)
+{
+ asm volatile("msr daifclr, #3" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+ asm volatile("msr daifset, #3" : : : "memory");
+}
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/aarch64/spinlock.h
new file mode 100644
index 000000000000..cf0984106d14
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/spinlock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
+#define SELFTEST_KVM_ARM64_SPINLOCK_H
+
+struct spinlock {
+ int v;
+};
+
+extern void spin_lock(struct spinlock *lock);
+extern void spin_unlock(struct spinlock *lock);
+
+#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
new file mode 100644
index 000000000000..0ecfb253893c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) host specific defines
+ */
+
+#ifndef SELFTEST_KVM_VGIC_H
+#define SELFTEST_KVM_VGIC_H
+
+#include <linux/kvm.h>
+
+#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
+ (((uint64_t)(count) << 52) | \
+ ((uint64_t)((base) >> 16) << 16) | \
+ ((uint64_t)(flags) << 12) | \
+ index)
+
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+ uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
+
+#endif /* SELFTEST_KVM_VGIC_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 1b3ef5757819..f6b3794f306b 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -411,4 +411,6 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
int vm_get_stats_fd(struct kvm_vm *vm);
int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
+uint32_t guest_get_vcpuid(void);
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 4fa1db32c05e..f8fddc84c0d3 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -97,6 +97,8 @@ struct vm_mem_backing_src_alias {
uint32_t flag;
};
+#define MIN_RUN_DELAY_NS 200000UL
+
bool thp_configured(void);
size_t get_trans_hugepagesz(void);
size_t get_def_hugetlb_pagesz(void);
@@ -104,6 +106,7 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
size_t get_backing_src_pagesz(uint32_t i);
void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+long get_run_delay(void);
/*
* Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
new file mode 100644
index 000000000000..fff4fc27504d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu, void *redist_base)
+{
+ gic_common_ops->gic_cpu_init(cpu, redist_base);
+}
+
+static void
+gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
+{
+ const struct gic_common_ops *gic_ops = NULL;
+
+ spin_lock(&gic_lock);
+
+ /* Distributor initialization is needed only once per VM */
+ if (gic_common_ops) {
+ spin_unlock(&gic_lock);
+ return;
+ }
+
+ if (type == GIC_V3)
+ gic_ops = &gicv3_ops;
+
+ GUEST_ASSERT(gic_ops);
+
+ gic_ops->gic_init(nr_cpus, dist_base);
+ gic_common_ops = gic_ops;
+
+ /* Make sure that the initialized data is visible to all the vCPUs */
+ dsb(sy);
+
+ spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus,
+ void *dist_base, void *redist_base)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ GUEST_ASSERT(type < GIC_TYPE_MAX);
+ GUEST_ASSERT(dist_base);
+ GUEST_ASSERT(redist_base);
+ GUEST_ASSERT(nr_cpus);
+
+ gic_dist_init(type, nr_cpus, dist_base);
+ gic_cpu_init(cpu, redist_base);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+ uint64_t irqstat;
+ unsigned int intid;
+
+ GUEST_ASSERT(gic_common_ops);
+
+ irqstat = gic_common_ops->gic_read_iar();
+ intid = irqstat & GENMASK(23, 0);
+
+ return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_eoir(intid);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
new file mode 100644
index 000000000000..d81d739433dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+ void (*gic_init)(unsigned int nr_cpus, void *dist_base);
+ void (*gic_cpu_init)(unsigned int cpu, void *redist_base);
+ void (*gic_irq_enable)(unsigned int intid);
+ void (*gic_irq_disable)(unsigned int intid);
+ uint64_t (*gic_read_iar)(void);
+ void (*gic_write_eoir)(uint32_t irq);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
new file mode 100644
index 000000000000..2dbf3339b62e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic_v3.h"
+#include "gic_private.h"
+
+struct gicv3_data {
+ void *dist_base;
+ void *redist_base[GICV3_MAX_CPUS];
+ unsigned int nr_cpus;
+ unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+
+enum gicv3_intid_range {
+ SGI_RANGE,
+ PPI_RANGE,
+ SPI_RANGE,
+ INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_gicr_wait_for_rwp(void *redist_base)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+ switch (intid) {
+ case 0 ... 15:
+ return SGI_RANGE;
+ case 16 ... 31:
+ return PPI_RANGE;
+ case 32 ... 1019:
+ return SPI_RANGE;
+ }
+
+ /* We should not be reaching here */
+ GUEST_ASSERT(0);
+
+ return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+ uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+ dsb(sy);
+ return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+ isb();
+}
+
+static void
+gicv3_config_irq(unsigned int intid, unsigned int offset)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ uint32_t mask = 1 << (intid % 32);
+ enum gicv3_intid_range intid_range = get_intid_range(intid);
+ void *reg;
+
+ /* We care about 'cpu' only for SGIs or PPIs */
+ if (intid_range == SGI_RANGE || intid_range == PPI_RANGE) {
+ GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+ reg = sgi_base_from_redist(gicv3_data.redist_base[cpu]) +
+ offset;
+ writel(mask, reg);
+ gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu]);
+ } else if (intid_range == SPI_RANGE) {
+ reg = gicv3_data.dist_base + offset + (intid / 32) * 4;
+ writel(mask, reg);
+ gicv3_gicd_wait_for_rwp();
+ } else {
+ GUEST_ASSERT(0);
+ }
+}
+
+static void gicv3_irq_enable(unsigned int intid)
+{
+ gicv3_config_irq(intid, GICD_ISENABLER);
+}
+
+static void gicv3_irq_disable(unsigned int intid)
+{
+ gicv3_config_irq(intid, GICD_ICENABLER);
+}
+
+static void gicv3_enable_redist(void *redist_base)
+{
+ uint32_t val = readl(redist_base + GICR_WAKER);
+ unsigned int count = 100000; /* 1s */
+
+ val &= ~GICR_WAKER_ProcessorSleep;
+ writel(val, redist_base + GICR_WAKER);
+
+ /* Wait until the processor is 'active' */
+ while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu)
+{
+ /* Align all the redistributors sequentially */
+ return redist_base + cpu * SZ_64K * 2;
+}
+
+static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
+{
+ void *sgi_base;
+ unsigned int i;
+ void *redist_base_cpu;
+
+ GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+ redist_base_cpu = gicr_base_cpu(redist_base, cpu);
+ sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+ gicv3_enable_redist(redist_base_cpu);
+
+ /*
+ * Mark all the SGI and PPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ writel(~0, sgi_base + GICR_IGROUPR0);
+ writel(~0, sgi_base + GICR_ICACTIVER0);
+ writel(~0, sgi_base + GICR_ICENABLER0);
+
+ /* Set a default priority for all the SGIs and PPIs */
+ for (i = 0; i < 32; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ sgi_base + GICR_IPRIORITYR0 + i);
+
+ gicv3_gicr_wait_for_rwp(redist_base_cpu);
+
+ /* Enable the GIC system register (ICC_*) access */
+ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+ SYS_ICC_SRE_EL1);
+
+ /* Set a default priority threshold */
+ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+ /* Enable non-secure Group-1 interrupts */
+ write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1);
+
+ gicv3_data.redist_base[cpu] = redist_base_cpu;
+}
+
+static void gicv3_dist_init(void)
+{
+ void *dist_base = gicv3_data.dist_base;
+ unsigned int i;
+
+ /* Disable the distributor until we set things up */
+ writel(0, dist_base + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+
+ /*
+ * Mark all the SPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+ writel(~0, dist_base + GICD_IGROUPR + i / 8);
+ writel(~0, dist_base + GICD_ICACTIVER + i / 8);
+ writel(~0, dist_base + GICD_ICENABLER + i / 8);
+ }
+
+ /* Set a default priority for all the SPIs */
+ for (i = 32; i < gicv3_data.nr_spis; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ dist_base + GICD_IPRIORITYR + i);
+
+ /* Wait for the settings to sync-in */
+ gicv3_gicd_wait_for_rwp();
+
+ /* Finally, enable the distributor globally with ARE */
+ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+ GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus, void *dist_base)
+{
+ GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+ gicv3_data.nr_cpus = nr_cpus;
+ gicv3_data.dist_base = dist_base;
+ gicv3_data.nr_spis = GICD_TYPER_SPIS(
+ readl(gicv3_data.dist_base + GICD_TYPER));
+ if (gicv3_data.nr_spis > 1020)
+ gicv3_data.nr_spis = 1020;
+
+ /*
+ * Initialize only the distributor for now.
+ * The redistributor and CPU interfaces are initialized
+ * later for every PE.
+ */
+ gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+ .gic_init = gicv3_init,
+ .gic_cpu_init = gicv3_cpu_init,
+ .gic_irq_enable = gicv3_irq_enable,
+ .gic_irq_disable = gicv3_irq_disable,
+ .gic_read_iar = gicv3_read_iar,
+ .gic_write_eoir = gicv3_write_eoir,
+};
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h
new file mode 100644
index 000000000000..b51536d469a6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 specific defines
+ */
+
+#ifndef SELFTEST_KVM_GICV3_H
+#define SELFTEST_KVM_GICV3_H
+
+#include <asm/sysreg.h>
+
+/*
+ * Distributor registers
+ */
+#define GICD_CTLR 0x0000
+#define GICD_TYPER 0x0004
+#define GICD_IGROUPR 0x0080
+#define GICD_ISENABLER 0x0100
+#define GICD_ICENABLER 0x0180
+#define GICD_ICACTIVER 0x0380
+#define GICD_IPRIORITYR 0x0400
+
+/*
+ * The assumption is that the guest runs in a non-secure mode.
+ * The following bits of GICD_CTLR are defined accordingly.
+ */
+#define GICD_CTLR_RWP (1U << 31)
+#define GICD_CTLR_nASSGIreq (1U << 8)
+#define GICD_CTLR_ARE_NS (1U << 4)
+#define GICD_CTLR_ENABLE_G1A (1U << 1)
+#define GICD_CTLR_ENABLE_G1 (1U << 0)
+
+#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32)
+#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0
+
+/*
+ * Redistributor registers
+ */
+#define GICR_CTLR 0x000
+#define GICR_WAKER 0x014
+
+#define GICR_CTLR_RWP (1U << 3)
+
+#define GICR_WAKER_ProcessorSleep (1U << 1)
+#define GICR_WAKER_ChildrenAsleep (1U << 2)
+
+/*
+ * Redistributor registers, offsets from SGI base
+ */
+#define GICR_IGROUPR0 GICD_IGROUPR
+#define GICR_ISENABLER0 GICD_ISENABLER
+#define GICR_ICENABLER0 GICD_ICENABLER
+#define GICR_ICACTIVER0 GICD_ICACTIVER
+#define GICR_IPRIORITYR0 GICD_IPRIORITYR
+
+/* CPU interface registers */
+#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
+#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
+#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
+#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+
+#define ICC_PMR_DEF_PRIO 0xf0
+
+#define ICC_SRE_EL1_SRE (1U << 0)
+
+#define ICC_IGRPEN1_EL1_ENABLE (1U << 0)
+
+#define GICV3_MAX_CPUS 512
+
+#endif /* SELFTEST_KVM_GICV3_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 632b74d6b3ca..b4eeeafd2a70 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -212,7 +212,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
+void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
uint64_t sctlr_el1, tcr_el1;
@@ -232,10 +232,10 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
- set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+ set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
- get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
- get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+ get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
+ get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
switch (vm->mode) {
case VM_MODE_P52V48_4K:
@@ -273,10 +273,11 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
- set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+ set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+ set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+ set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd);
+ set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpuid);
}
void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
@@ -362,7 +363,7 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
{
extern char vectors;
- set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors);
+ set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
}
void route_exception(struct ex_regs *regs, int vector)
@@ -426,3 +427,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
assert(vector < VECTOR_NUM);
handlers->exception_handlers[vector][0] = handler;
}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return read_sysreg(tpidr_el1);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
new file mode 100644
index 000000000000..a076e780be5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+ int val, res;
+
+ asm volatile(
+ "1: ldaxr %w0, [%2]\n"
+ " cbnz %w0, 1b\n"
+ " mov %w0, #1\n"
+ " stxr %w1, %w0, [%2]\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (val), "=&r" (res)
+ : "r" (&lock->v)
+ : "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+ asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
new file mode 100644
index 000000000000..b9b271ff520d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "vgic.h"
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ * vm - KVM VM
+ * nr_vcpus - Number of vCPUs supported by this VM
+ * gicd_base_gpa - Guest Physical Address of the Distributor region
+ * gicr_base_gpa - Guest Physical Address of the Redistributor region
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+ uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
+{
+ int gic_fd;
+ uint64_t redist_attr;
+ struct list_head *iter;
+ unsigned int nr_gic_pages, nr_vcpus_created = 0;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n",
+ nr_vcpus, nr_vcpus_created);
+
+ /* Distributor setup */
+ gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+ virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages);
+
+ /* Redistributor setup */
+ redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0);
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr, true);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+ KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+ virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages);
+
+ kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
+ return gic_fd;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index e487f798e095..b72429108993 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <linux/mman.h>
#include "linux/kernel.h"
@@ -129,13 +130,16 @@ size_t get_trans_hugepagesz(void)
{
size_t size;
FILE *f;
+ int ret;
TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
- fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
fclose(f);
return size;
@@ -309,3 +313,19 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
TEST_FAIL("Unknown backing src type: %s", type_name);
return -1;
}
+
+long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ /* Return MIN_RUN_DELAY_NS upon failure just to be safe */
+ if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2)
+ val[1] = MIN_RUN_DELAY_NS;
+ fclose(fp);
+
+ return val[1];
+}
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index d6e381e01db7..1727f75e0c2c 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -127,43 +127,54 @@ static bool verbose;
pr_info(__VA_ARGS__); \
} while (0)
+static void check_mmio_access(struct vm_data *vm, struct kvm_run *run)
+{
+ TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
+ TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
+ TEST_ASSERT(run->mmio.len == 8,
+ "Unexpected exit mmio size = %u", run->mmio.len);
+ TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
+ run->mmio.phys_addr <= vm->mmio_gpa_max,
+ "Unexpected exit mmio address = 0x%llx",
+ run->mmio.phys_addr);
+}
+
static void *vcpu_worker(void *data)
{
struct vm_data *vm = data;
struct kvm_run *run;
struct ucall uc;
- uint64_t cmd;
run = vcpu_state(vm->vm, VCPU_ID);
while (1) {
vcpu_run(vm->vm, VCPU_ID);
- if (run->exit_reason == KVM_EXIT_IO) {
- cmd = get_ucall(vm->vm, VCPU_ID, &uc);
- if (cmd != UCALL_SYNC)
- break;
-
+ switch (get_ucall(vm->vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == 0,
+ "Unexpected sync ucall, got %lx",
+ (ulong)uc.args[1]);
sem_post(&vcpu_ready);
continue;
- }
-
- if (run->exit_reason != KVM_EXIT_MMIO)
+ case UCALL_NONE:
+ if (run->exit_reason == KVM_EXIT_MMIO)
+ check_mmio_access(vm, run);
+ else
+ goto done;
break;
-
- TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
- TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
- TEST_ASSERT(run->mmio.len == 8,
- "Unexpected exit mmio size = %u", run->mmio.len);
- TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
- run->mmio.phys_addr <= vm->mmio_gpa_max,
- "Unexpected exit mmio address = 0x%llx",
- run->mmio.phys_addr);
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld, val = %lu",
+ (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
}
- if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
- TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2]);
-
+done:
return NULL;
}
@@ -268,6 +279,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
TEST_ASSERT(data->hva_slots, "malloc() fail");
data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
+ ucall_init(data->vm, NULL);
pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
max_mem_slots - 1, data->pages_per_slot, rempages);
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index aafaa8e38b7c..62f2eb9ee3d5 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -10,7 +10,6 @@
#include <sched.h>
#include <pthread.h>
#include <linux/kernel.h>
-#include <sys/syscall.h>
#include <asm/kvm.h>
#include <asm/kvm_para.h>
@@ -20,7 +19,6 @@
#define NR_VCPUS 4
#define ST_GPA_BASE (1 << 30)
-#define MIN_RUN_DELAY_NS 200000UL
static void *st_gva[NR_VCPUS];
static uint64_t guest_stolen_time[NR_VCPUS];
@@ -217,20 +215,6 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
#endif
-static long get_run_delay(void)
-{
- char path[64];
- long val[2];
- FILE *fp;
-
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
-
- return val[1];
-}
-
static void *do_steal_time(void *arg)
{
struct timespec ts, stop;
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index e6480fd5c4bd..8039e1eff938 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -82,7 +82,8 @@ int get_warnings_count(void)
FILE *f;
f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
- fscanf(f, "%d", &warnings);
+ if (fscanf(f, "%d", &warnings) < 1)
+ warnings = 0;
fclose(f);
return warnings;
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 117bf49a3d79..eda0d2a51224 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -14,7 +14,6 @@
#include <stdint.h>
#include <time.h>
#include <sched.h>
-#include <sys/syscall.h>
#define VCPU_ID 5
@@ -98,20 +97,6 @@ static void guest_code(void)
GUEST_DONE();
}
-static long get_run_delay(void)
-{
- char path[64];
- long val[2];
- FILE *fp;
-
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
-
- return val[1];
-}
-
static int cmp_timespec(struct timespec *a, struct timespec *b)
{
if (a->tv_sec > b->tv_sec)
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index fa2ac0e56b43..fe7ee2b0f29c 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH)
# When local build is done, headers are installed in the default
# INSTALL_HDR_PATH usr/include.
.PHONY: khdr
+.NOTPARALLEL:
khdr:
ifndef KSFT_KHDR_INSTALL_DONE
ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index cfc7f4f97fd1..df341648f818 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,5 +1,2 @@
-##TEST_GEN_FILES := test_unix_oob
-TEST_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob
include ../../lib.mk
-
-all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 0f3e3763f4f8..3dece8b29253 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -271,8 +271,9 @@ main(int argc, char **argv)
read_oob(pfd, &oob);
if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
- "atmark %d\n", signal_recvd, len, oob, atmark);
+ fprintf(stderr,
+ "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+ signal_recvd, len, oob, atmark);
die(1);
}
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+# veth1-.
+# |
+# NAT Gateway --veth0--> Server
+# | |
+# veth2-' |
+# client2---' |
+# .... |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces. Each client connects to Server, each with colliding tuples:
+# clientsaddr:10000 -> serveraddr:dport
+# NAT Gateway is supposed to do port reallocation for each of the
+# connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+ ip netns del $gw
+ ip netns del $srv
+ for i in $(seq 1 $maxclients); do
+ ip netns del ns-cl$i-$sfx 2>/dev/null
+ done
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+ have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create server netns $srv"
+ exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+
+ ip netns add "$cl"
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not create client netns $cl"
+ exit $ksft_skip
+ fi
+ ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+ fi
+done
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+ echo netns exec "$cl" ip link set lo up
+ echo netns exec "$cl" ip link set eth0 up
+ echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+ echo netns exec "$gw" ip link set veth$i up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+ # clients have same IP addresses.
+ echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+ echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+ # NB: same addresses on client-facing interfaces.
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+ # gw: policy routing
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+ map iiftomark {
+ type ifname : mark
+ }
+
+ map iiftozone {
+ typeof iifname : ct zone
+ }
+
+ set inicmp {
+ flags dynamic
+ type ipv4_addr . ifname . ipv4_addr
+ }
+ set inflows {
+ flags dynamic
+ type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+ }
+
+ set inflows6 {
+ flags dynamic
+ type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -64000; policy accept;
+ ct original zone set meta iifname map @iiftozone
+ meta mark set meta iifname map @iiftomark
+
+ tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+ add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+ ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+ }
+
+ chain nat_postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ct mark set meta mark meta oifname veth0 masquerade
+ }
+
+ chain mangle_prerouting {
+ type filter hook prerouting priority -100; policy accept;
+ ct direction reply meta mark set ct mark
+ }
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+ echo add element inet raw iiftozone \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+ if [ $? -ne 0 ]; then
+ echo FAIL: Ping failure from $cl 1>&2
+ ret=1
+ break
+ fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ break
+ fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+ echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+ echo "SKIP: iperf3 not installed"
+ if [ $ret -ne 0 ];then
+ exit $ret
+ fi
+ exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+ if [ $ret -ne 0 ]; then
+ break
+ fi
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo FAIL: Failure to connect for $cl 1>&2
+ ip netns exec $gw conntrack -S 1>&2
+ ret=1
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+ break
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..ac646376eb01
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=20000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+ ip netns del $ns
+}
+
+ip netns add $ns
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+ have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+ local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+ map rndzone {
+ typeof numgen inc mod $max_zones : ct zone
+ }
+
+ chain output {
+ type filter hook output priority -64000; policy accept;
+ udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
+ }
+}
+EOF
+ (
+ echo "add element inet raw rndzone {"
+ for i in $(seq 1 $max_zones);do
+ echo -n "$i : $i"
+ if [ $i -lt $max_zones ]; then
+ echo ","
+ else
+ echo "}"
+ fi
+ done
+ ) | ip netns exec $ns nft -f /dev/stdin
+
+ local i=0
+ local j=0
+ local outerstart=$(date +%s%3N)
+ local stop=$outerstart
+
+ while [ $i -lt $max_zones ]; do
+ local start=$(date +%s%3N)
+ i=$((i + 10000))
+ j=$((j + 1))
+ dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
+ if [ $? -ne 0 ] ;then
+ ret=1
+ break
+ fi
+
+ stop=$(date +%s%3N)
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
+ done
+
+ if [ $have_ct_tool -eq 1 ]; then
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries from packet path in $duration ms total"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+ ret=1
+ fi
+ fi
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: insert $max_zones entries from packet path" 1>&2
+ fi
+}
+
+test_conntrack_tool() {
+ local max_zones=$1
+
+ ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+ local outerstart=$(date +%s%3N)
+ local start=$(date +%s%3N)
+ local stop=$start
+ local i=0
+ while [ $i -lt $max_zones ]; do
+ i=$((i + 1))
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+ echo "FAIL: conntrack -I returned an error"
+ ret=1
+ break
+ fi
+
+ if [ $((i%10000)) -eq 0 ];then
+ stop=$(date +%s%3N)
+
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total)"
+ start=$stop
+ fi
+ done
+
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+ ret=1
+ fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+ test_conntrack_tool $zones
+else
+ echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+ if [ $ret -eq 0 ];then
+ exit $ksft_skip
+ fi
+fi
+
+exit $ret