summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2021-10-28 07:59:38 +0300
committerDave Airlie <airlied@redhat.com>2021-10-28 07:59:38 +0300
commit970eae15600a883e4ad27dd0757b18871cc983ab (patch)
tree7aeb639a35ca2ab8b54a5987837f8ac5f96ee5f4 /tools/testing
parent27f4432577e4f78bbdf15c104748cc738db8eead (diff)
parent3906fe9bb7f1a2c8667ae54e967dc8690824f4ea (diff)
downloadlinux-970eae15600a883e4ad27dd0757b18871cc983ab.tar.xz
BackMerge tag 'v5.15-rc7' into drm-next
The msm next tree is based on rc3, so let's just backmerge rc7 before pulling it in. Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'tools/testing')
-rwxr-xr-xtools/testing/kunit/kunit.py24
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py8
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c7
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh13
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c5
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc54
-rw-r--r--tools/testing/selftests/kvm/.gitignore2
-rw-r--r--tools/testing/selftests/kvm/Makefile4
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c6
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c15
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c62
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h34
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c7
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c39
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c286
-rw-r--r--tools/testing/selftests/kvm/steal_time.c20
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c128
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c15
-rw-r--r--tools/testing/selftests/lib.mk1
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c5
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh60
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh172
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh8
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh26
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c164
-rw-r--r--tools/testing/selftests/net/nettest.c28
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh1
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh145
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat_zones.sh309
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh156
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c23
-rw-r--r--tools/testing/vsock/vsock_diag_test.c2
40 files changed, 1622 insertions, 231 deletions
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 5a931456e718..ac35c61f65f5 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -16,7 +16,7 @@ assert sys.version_info >= (3, 7), "Python version is too old"
from collections import namedtuple
from enum import Enum, auto
-from typing import Iterable
+from typing import Iterable, Sequence
import kunit_config
import kunit_json
@@ -186,6 +186,26 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
exec_result.elapsed_time))
return parse_result
+# Problem:
+# $ kunit.py run --json
+# works as one would expect and prints the parsed test results as JSON.
+# $ kunit.py run --json suite_name
+# would *not* pass suite_name as the filter_glob and print as json.
+# argparse will consider it to be another way of writing
+# $ kunit.py run --json=suite_name
+# i.e. it would run all tests, and dump the json to a `suite_name` file.
+# So we hackily automatically rewrite --json => --json=stdout
+pseudo_bool_flag_defaults = {
+ '--json': 'stdout',
+ '--raw_output': 'kunit',
+}
+def massage_argv(argv: Sequence[str]) -> Sequence[str]:
+ def massage_arg(arg: str) -> str:
+ if arg not in pseudo_bool_flag_defaults:
+ return arg
+ return f'{arg}={pseudo_bool_flag_defaults[arg]}'
+ return list(map(massage_arg, argv))
+
def add_common_opts(parser) -> None:
parser.add_argument('--build_dir',
help='As in the make command, it specifies the build '
@@ -303,7 +323,7 @@ def main(argv, linux=None):
help='Specifies the file to read results from.',
type=str, nargs='?', metavar='input_file')
- cli_args = parser.parse_args(argv)
+ cli_args = parser.parse_args(massage_argv(argv))
if get_kernel_root_path():
os.chdir(get_kernel_root_path())
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 619c4554cbff..1edcc8373b4e 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -408,6 +408,14 @@ class KUnitMainTest(unittest.TestCase):
self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
+ def test_run_raw_output_does_not_take_positional_args(self):
+ # --raw_output is a string flag, but we don't want it to consume
+ # any positional arguments, only ones after an '='
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300)
+
def test_exec_timeout(self):
timeout = 3453
kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 6836510a522f..22722abc9dfa 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -266,16 +266,19 @@ int test_init(struct tdescr *td)
td->feats_supported |= FEAT_SSBS;
if (getauxval(AT_HWCAP) & HWCAP_SVE)
td->feats_supported |= FEAT_SVE;
- if (feats_ok(td))
+ if (feats_ok(td)) {
fprintf(stderr,
"Required Features: [%s] supported\n",
feats_to_string(td->feats_required &
td->feats_supported));
- else
+ } else {
fprintf(stderr,
"Required Features: [%s] NOT supported\n",
feats_to_string(td->feats_required &
~td->feats_supported));
+ td->result = KSFT_SKIP;
+ return 0;
+ }
}
/* Perform test specific additional initialization */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 866531c08e4f..799b88152e9e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -375,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) \
+ | $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 59ea56945e6c..b497bb85b667 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -112,6 +112,14 @@ setup()
ip netns add "${NS2}"
ip netns add "${NS3}"
+ # rp_filter gets confused by what these tests are doing, so disable it
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
@@ -236,11 +244,6 @@ setup()
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
sleep 1 # reduce flakiness
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 4de902ea14d8..de1c4e6de0b2 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
#include <malloc.h>
#include <sys/ioctl.h>
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..f7d84549cc3e 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
WAIT_TIME=1
NUM_NETIFS=4
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
index 5f5b2ba3e557..60c02b482be8 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
@@ -11,8 +11,8 @@ SYSTEM="syscalls"
EVENT="sys_enter_openat"
FIELD="filename"
EPROBE="eprobe_open"
-
-echo "e:$EPROBE $SYSTEM/$EVENT file=+0(\$filename):ustring" >> dynamic_events
+OPTIONS="file=+0(\$filename):ustring"
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
grep -q "$EPROBE" dynamic_events
test -d events/eprobes/$EPROBE
@@ -37,4 +37,54 @@ echo "-:$EPROBE" >> dynamic_events
! grep -q "$EPROBE" dynamic_events
! test -d events/eprobes/$EPROBE
+# test various ways to remove the probe (already tested with just event name)
+
+# With group name
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# Finally make sure what is in the dynamic_events file clears it too
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+LINE=`sed -e '/$EPROBE/s/^e/-/' < dynamic_events`
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
clear_trace
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 98053d3afbda..b8dbabe24ac2 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -24,6 +24,7 @@
/x86_64/smm_test
/x86_64/state_test
/x86_64/svm_vmcall_test
+/x86_64/svm_int_ctl_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
/x86_64/userspace_msr_exit_test
@@ -48,6 +49,7 @@
/kvm_page_table_test
/memslot_modification_stress_test
/memslot_perf_test
+/rseq_test
/set_memory_region_test
/steal_time
/kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 5d05801ab816..d1774f461393 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
@@ -80,6 +81,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
TEST_GEN_PROGS_x86_64 += memslot_perf_test
+TEST_GEN_PROGS_x86_64 += rseq_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
@@ -93,6 +95,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += kvm_page_table_test
+TEST_GEN_PROGS_aarch64 += rseq_test
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
@@ -104,6 +107,7 @@ TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += kvm_page_table_test
+TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 71e277c7c3f3..5d95113c7b7c 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -371,9 +371,7 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n\n");
- backing_src_help();
+ backing_src_help("-s");
puts("");
exit(0);
}
@@ -381,7 +379,7 @@ static void help(char *name)
int main(int argc, char *argv[])
{
struct test_params params = {
- .backing_src = VM_MEM_SRC_ANONYMOUS,
+ .backing_src = DEFAULT_VM_MEM_SRC,
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.vcpus = 1,
};
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index e79c1b64977f..1510b21e6306 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -179,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg)
return NULL;
}
- if (!pollfd[0].revents & POLLIN)
+ if (!(pollfd[0].revents & POLLIN))
continue;
r = read(uffd, &msg, sizeof(msg));
@@ -416,7 +416,7 @@ static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
- " [-b memory] [-t type] [-v vcpus] [-o]\n", name);
+ " [-b memory] [-s type] [-v vcpus] [-o]\n", name);
guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n");
@@ -426,8 +426,7 @@ static void help(char *name)
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
- printf(" -t: The type of backing memory to use. Default: anonymous\n");
- backing_src_help();
+ backing_src_help("-s");
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
@@ -439,14 +438,14 @@ int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
- .src_type = VM_MEM_SRC_ANONYMOUS,
+ .src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -465,7 +464,7 @@ int main(int argc, char *argv[])
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
- case 't':
+ case 's':
p.src_type = parse_backing_src_type(optarg);
break;
case 'v':
@@ -485,7 +484,7 @@ int main(int argc, char *argv[])
if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
!backing_src_is_shared(p.src_type)) {
- TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+ TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
}
for_each_guest_mode(run_test, &p);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 479868570d59..7ffab5bd5ce5 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -118,42 +118,64 @@ static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
toggle_dirty_logging(vm, slots, false);
}
-static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
- uint64_t nr_pages)
+static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
{
- uint64_t slot_pages = nr_pages / slots;
int i;
for (i = 0; i < slots; i++) {
int slot = PERF_TEST_MEM_SLOT_INDEX + i;
- unsigned long *slot_bitmap = bitmap + i * slot_pages;
- kvm_vm_get_dirty_log(vm, slot, slot_bitmap);
+ kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
}
}
-static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
- uint64_t nr_pages)
+static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot)
{
- uint64_t slot_pages = nr_pages / slots;
int i;
for (i = 0; i < slots; i++) {
int slot = PERF_TEST_MEM_SLOT_INDEX + i;
- unsigned long *slot_bitmap = bitmap + i * slot_pages;
- kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages);
+ kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
}
}
+static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+ unsigned long **bitmaps;
+ int i;
+
+ bitmaps = malloc(slots * sizeof(bitmaps[0]));
+ TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+ for (i = 0; i < slots; i++) {
+ bitmaps[i] = bitmap_zalloc(pages_per_slot);
+ TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+ }
+
+ return bitmaps;
+}
+
+static void free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++)
+ free(bitmaps[i]);
+
+ free(bitmaps);
+}
+
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
pthread_t *vcpu_threads;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long **bitmaps;
uint64_t guest_num_pages;
uint64_t host_num_pages;
+ uint64_t pages_per_slot;
int vcpu_id;
struct timespec start;
struct timespec ts_diff;
@@ -171,7 +193,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
- bmap = bitmap_zalloc(host_num_pages);
+ pages_per_slot = host_num_pages / p->slots;
+
+ bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
if (dirty_log_manual_caps) {
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
@@ -239,7 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
clock_gettime(CLOCK_MONOTONIC, &start);
- get_dirty_log(vm, p->slots, bmap, host_num_pages);
+ get_dirty_log(vm, bitmaps, p->slots);
ts_diff = timespec_elapsed(start);
get_dirty_log_total = timespec_add(get_dirty_log_total,
ts_diff);
@@ -248,7 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (dirty_log_manual_caps) {
clock_gettime(CLOCK_MONOTONIC, &start);
- clear_dirty_log(vm, p->slots, bmap, host_num_pages);
+ clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
ts_diff = timespec_elapsed(start);
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
ts_diff);
@@ -281,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
}
- free(bmap);
+ free_bitmaps(bitmaps, p->slots);
free(vcpu_threads);
perf_test_destroy_vm(vm);
}
@@ -308,11 +332,9 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n\n");
+ backing_src_help("-s");
printf(" -x: Split the memory region into this number of memslots.\n"
- " (default: 1)");
- backing_src_help();
+ " (default: 1)\n");
puts("");
exit(0);
}
@@ -324,7 +346,7 @@ int main(int argc, char *argv[])
.iterations = TEST_HOST_LOOP_N,
.wr_fract = 1,
.partition_vcpu_memory_access = true,
- .backing_src = VM_MEM_SRC_ANONYMOUS,
+ .backing_src = DEFAULT_VM_MEM_SRC,
.slots = 1,
};
int opt;
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index d79be15dd3d2..f8fddc84c0d3 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -90,18 +90,23 @@ enum vm_mem_backing_src_type {
NUM_SRC_TYPES,
};
+#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
+
struct vm_mem_backing_src_alias {
const char *name;
uint32_t flag;
};
+#define MIN_RUN_DELAY_NS 200000UL
+
bool thp_configured(void);
size_t get_trans_hugepagesz(void);
size_t get_def_hugetlb_pagesz(void);
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
size_t get_backing_src_pagesz(uint32_t i);
-void backing_src_help(void);
+void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+long get_run_delay(void);
/*
* Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 242ae8e09a65..05e65ca1c30c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -312,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val)
}
}
-typedef unsigned long v1di __attribute__ ((vector_size (8)));
+#define GET_XMM(__xmm) \
+({ \
+ unsigned long __val; \
+ asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \
+ __val; \
+})
+
static inline unsigned long get_xmm(int n)
{
assert(n >= 0 && n <= 7);
- register v1di xmm0 __asm__("%xmm0");
- register v1di xmm1 __asm__("%xmm1");
- register v1di xmm2 __asm__("%xmm2");
- register v1di xmm3 __asm__("%xmm3");
- register v1di xmm4 __asm__("%xmm4");
- register v1di xmm5 __asm__("%xmm5");
- register v1di xmm6 __asm__("%xmm6");
- register v1di xmm7 __asm__("%xmm7");
switch (n) {
case 0:
- return (unsigned long)xmm0;
+ return GET_XMM(xmm0);
case 1:
- return (unsigned long)xmm1;
+ return GET_XMM(xmm1);
case 2:
- return (unsigned long)xmm2;
+ return GET_XMM(xmm2);
case 3:
- return (unsigned long)xmm3;
+ return GET_XMM(xmm3);
case 4:
- return (unsigned long)xmm4;
+ return GET_XMM(xmm4);
case 5:
- return (unsigned long)xmm5;
+ return GET_XMM(xmm5);
case 6:
- return (unsigned long)xmm6;
+ return GET_XMM(xmm6);
case 7:
- return (unsigned long)xmm7;
+ return GET_XMM(xmm7);
}
+
+ /* never reached */
return 0;
}
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 0d04a7db7f24..36407cb0ec85 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -456,10 +456,7 @@ static void help(char *name)
" (default: 1G)\n");
printf(" -v: specify the number of vCPUs to run\n"
" (default: 1)\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n"
- " (default: anonymous)\n\n");
- backing_src_help();
+ backing_src_help("-s");
puts("");
}
@@ -468,7 +465,7 @@ int main(int argc, char *argv[])
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
.test_mem_size = DEFAULT_TEST_MEM_SIZE,
- .src_type = VM_MEM_SRC_ANONYMOUS,
+ .src_type = DEFAULT_VM_MEM_SRC,
};
int opt;
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index af1031fed97f..b72429108993 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <linux/mman.h>
#include "linux/kernel.h"
@@ -129,13 +130,16 @@ size_t get_trans_hugepagesz(void)
{
size_t size;
FILE *f;
+ int ret;
TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
- fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
fclose(f);
return size;
@@ -279,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i)
}
}
-void backing_src_help(void)
+static void print_available_backing_src_types(const char *prefix)
{
int i;
- printf("Available backing src types:\n");
+ printf("%sAvailable backing src types:\n", prefix);
+
for (i = 0; i < NUM_SRC_TYPES; i++)
- printf("\t%s\n", vm_mem_backing_src_alias(i)->name);
+ printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+ printf(" %s: specify the type of memory that should be used to\n"
+ " back the guest data region. (default: %s)\n",
+ flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+ print_available_backing_src_types(" ");
}
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
@@ -296,7 +309,23 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
return i;
- backing_src_help();
+ print_available_backing_src_types("");
TEST_FAIL("Unknown backing src type: %s", type_name);
return -1;
}
+
+long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ /* Return MIN_RUN_DELAY_NS upon failure just to be safe */
+ if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2)
+ val[1] = MIN_RUN_DELAY_NS;
+ fclose(fp);
+
+ return val[1];
+}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
new file mode 100644
index 000000000000..4158da0da2bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <asm/barrier.h>
+#include <linux/atomic.h>
+#include <linux/rseq.h>
+#include <linux/unistd.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID 0
+
+static __thread volatile struct rseq __rseq = {
+ .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+/*
+ * Use an arbitrary, bogus signature for configuring rseq, this test does not
+ * actually enter an rseq critical section.
+ */
+#define RSEQ_SIG 0xdeadbeef
+
+/*
+ * Any bug related to task migration is likely to be timing-dependent; perform
+ * a large number of migrations to reduce the odds of a false negative.
+ */
+#define NR_TASK_MIGRATIONS 100000
+
+static pthread_t migration_thread;
+static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
+static bool done;
+
+static atomic_t seq_cnt;
+
+static void guest_code(void)
+{
+ for (;;)
+ GUEST_SYNC(0);
+}
+
+static void sys_rseq(int flags)
+{
+ int r;
+
+ r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG);
+ TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
+}
+
+static int next_cpu(int cpu)
+{
+ /*
+ * Advance to the next CPU, skipping those that weren't in the original
+ * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+ * data storage is considered as opaque. Note, if this task is pinned
+ * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+ * burn a lot cycles and the test will take longer than normal to
+ * complete.
+ */
+ do {
+ cpu++;
+ if (cpu > max_cpu) {
+ cpu = min_cpu;
+ TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+ "Min CPU = %d must always be usable", cpu);
+ break;
+ }
+ } while (!CPU_ISSET(cpu, &possible_mask));
+
+ return cpu;
+}
+
+static void *migration_worker(void *ign)
+{
+ cpu_set_t allowed_mask;
+ int r, i, cpu;
+
+ CPU_ZERO(&allowed_mask);
+
+ for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
+ CPU_SET(cpu, &allowed_mask);
+
+ /*
+ * Bump the sequence count twice to allow the reader to detect
+ * that a migration may have occurred in between rseq and sched
+ * CPU ID reads. An odd sequence count indicates a migration
+ * is in-progress, while a completely different count indicates
+ * a migration occurred since the count was last read.
+ */
+ atomic_inc(&seq_cnt);
+
+ /*
+ * Ensure the odd count is visible while sched_getcpu() isn't
+ * stable, i.e. while changing affinity is in-progress.
+ */
+ smp_wmb();
+ r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+ smp_wmb();
+ atomic_inc(&seq_cnt);
+
+ CPU_CLR(cpu, &allowed_mask);
+
+ /*
+ * Wait 1-10us before proceeding to the next iteration and more
+ * specifically, before bumping seq_cnt again. A delay is
+ * needed on three fronts:
+ *
+ * 1. To allow sched_setaffinity() to prompt migration before
+ * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
+ * (or TIF_NEED_RESCHED, which indirectly leads to handling
+ * NOTIFY_RESUME) is handled in KVM context.
+ *
+ * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
+ * the guest, the guest will trigger a IO/MMIO exit all the
+ * way to userspace and the TIF flags will be handled by
+ * the generic "exit to userspace" logic, not by KVM. The
+ * exit to userspace is necessary to give the test a chance
+ * to check the rseq CPU ID (see #2).
+ *
+ * Alternatively, guest_code() could include an instruction
+ * to trigger an exit that is handled by KVM, but any such
+ * exit requires architecture specific code.
+ *
+ * 2. To let ioctl(KVM_RUN) make its way back to the test
+ * before the next round of migration. The test's check on
+ * the rseq CPU ID must wait for migration to complete in
+ * order to avoid false positive, thus any kernel rseq bug
+ * will be missed if the next migration starts before the
+ * check completes.
+ *
+ * 3. To ensure the read-side makes efficient forward progress,
+ * e.g. if sched_getcpu() involves a syscall. Stalling the
+ * read-side means the test will spend more time waiting for
+ * sched_getcpu() to stabilize and less time trying to hit
+ * the timing-dependent bug.
+ *
+ * Because any bug in this area is likely to be timing-dependent,
+ * run with a range of delays at 1us intervals from 1us to 10us
+ * as a best effort to avoid tuning the test to the point where
+ * it can hit _only_ the original bug and not detect future
+ * regressions.
+ *
+ * The original bug can reproduce with a delay up to ~500us on
+ * x86-64, but starts to require more iterations to reproduce
+ * as the delay creeps above ~10us, and the average runtime of
+ * each iteration obviously increases as well. Cap the delay
+ * at 10us to keep test runtime reasonable while minimizing
+ * potential coverage loss.
+ *
+ * The lower bound for reproducing the bug is likely below 1us,
+ * e.g. failures occur on x86-64 with nanosleep(0), but at that
+ * point the overhead of the syscall likely dominates the delay.
+ * Use usleep() for simplicity and to avoid unnecessary kernel
+ * dependencies.
+ */
+ usleep((i % 10) + 1);
+ }
+ done = true;
+ return NULL;
+}
+
+static int calc_min_max_cpu(void)
+{
+ int i, cnt, nproc;
+
+ if (CPU_COUNT(&possible_mask) < 2)
+ return -EINVAL;
+
+ /*
+ * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+ * this task is affined to in order to reduce the time spent querying
+ * unusable CPUs, e.g. if this task is pinned to a small percentage of
+ * total CPUs.
+ */
+ nproc = get_nprocs_conf();
+ min_cpu = -1;
+ max_cpu = -1;
+ cnt = 0;
+
+ for (i = 0; i < nproc; i++) {
+ if (!CPU_ISSET(i, &possible_mask))
+ continue;
+ if (min_cpu == -1)
+ min_cpu = i;
+ max_cpu = i;
+ cnt++;
+ }
+
+ return (cnt < 2) ? -EINVAL : 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int r, i, snapshot;
+ struct kvm_vm *vm;
+ u32 cpu, rseq_cpu;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+ TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
+ strerror(errno));
+
+ if (calc_min_max_cpu()) {
+ print_skip("Only one usable CPU, task migration not possible");
+ exit(KSFT_SKIP);
+ }
+
+ sys_rseq(0);
+
+ /*
+ * Create and run a dummy VM that immediately exits to userspace via
+ * GUEST_SYNC, while concurrently migrating the process by setting its
+ * CPU affinity.
+ */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ pthread_create(&migration_thread, NULL, migration_worker, 0);
+
+ for (i = 0; !done; i++) {
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ "Guest failed?");
+
+ /*
+ * Verify rseq's CPU matches sched's CPU. Ensure migration
+ * doesn't occur between sched_getcpu() and reading the rseq
+ * cpu_id by rereading both if the sequence count changes, or
+ * if the count is odd (migration in-progress).
+ */
+ do {
+ /*
+ * Drop bit 0 to force a mismatch if the count is odd,
+ * i.e. if a migration is in-progress.
+ */
+ snapshot = atomic_read(&seq_cnt) & ~1;
+
+ /*
+ * Ensure reading sched_getcpu() and rseq.cpu_id
+ * complete in a single "no migration" window, i.e. are
+ * not reordered across the seq_cnt reads.
+ */
+ smp_rmb();
+ cpu = sched_getcpu();
+ rseq_cpu = READ_ONCE(__rseq.cpu_id);
+ smp_rmb();
+ } while (snapshot != atomic_read(&seq_cnt));
+
+ TEST_ASSERT(rseq_cpu == cpu,
+ "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
+ }
+
+ /*
+ * Sanity check that the test was able to enter the guest a reasonable
+ * number of times, e.g. didn't get stalled too often/long waiting for
+ * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a
+ * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs
+ * than migrations given the 1us+ delay in the migration task.
+ */
+ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
+ "Only performed %d KVM_RUNs, task stalled too much?\n", i);
+
+ pthread_join(migration_thread, NULL);
+
+ kvm_vm_free(vm);
+
+ sys_rseq(RSEQ_FLAG_UNREGISTER);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index ecec30865a74..62f2eb9ee3d5 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -10,7 +10,6 @@
#include <sched.h>
#include <pthread.h>
#include <linux/kernel.h>
-#include <sys/syscall.h>
#include <asm/kvm.h>
#include <asm/kvm_para.h>
@@ -20,7 +19,6 @@
#define NR_VCPUS 4
#define ST_GPA_BASE (1 << 30)
-#define MIN_RUN_DELAY_NS 200000UL
static void *st_gva[NR_VCPUS];
static uint64_t guest_stolen_time[NR_VCPUS];
@@ -118,12 +116,12 @@ struct st_time {
uint64_t st_time;
};
-static int64_t smccc(uint32_t func, uint32_t arg)
+static int64_t smccc(uint32_t func, uint64_t arg)
{
unsigned long ret;
asm volatile(
- "mov x0, %1\n"
+ "mov w0, %w1\n"
"mov x1, %2\n"
"hvc #0\n"
"mov %0, x0\n"
@@ -217,20 +215,6 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
#endif
-static long get_run_delay(void)
-{
- char path[64];
- long val[2];
- FILE *fp;
-
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
-
- return val[1];
-}
-
static void *do_steal_time(void *arg)
{
struct timespec ts, stop;
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index e6480fd5c4bd..8039e1eff938 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -82,7 +82,8 @@ int get_warnings_count(void)
FILE *f;
f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
- fscanf(f, "%d", &warnings);
+ if (fscanf(f, "%d", &warnings) < 1)
+ warnings = 0;
fclose(f);
return warnings;
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644
index 000000000000..df04f56ce859
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+#define VCPU_ID 0
+
+static struct kvm_vm *vm;
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+ vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+ x2apic_write_reg(APIC_EOI, 0x00);
+ intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+ * and since L1 didn't enable virtual interrupt masking,
+ * L2 should receive it and not L1.
+ *
+ * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+ * so it should also receive it after the following 'sti'.
+ */
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ GUEST_ASSERT(vintr_irq_called);
+ GUEST_ASSERT(intr_irq_called);
+
+ __asm__ __volatile__(
+ "vmcall\n"
+ );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* No virtual interrupt masking */
+ vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* No intercepts for real and virtual interrupts */
+ vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
+
+ /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+ vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t svm_gva;
+
+ nested_svm_check_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+ vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *)uc.args[0]);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 117bf49a3d79..eda0d2a51224 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -14,7 +14,6 @@
#include <stdint.h>
#include <time.h>
#include <sched.h>
-#include <sys/syscall.h>
#define VCPU_ID 5
@@ -98,20 +97,6 @@ static void guest_code(void)
GUEST_DONE();
}
-static long get_run_delay(void)
-{
- char path[64];
- long val[2];
- FILE *fp;
-
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
-
- return val[1];
-}
-
static int cmp_timespec(struct timespec *a, struct timespec *b)
{
if (a->tv_sec > b->tv_sec)
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index fa2ac0e56b43..fe7ee2b0f29c 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH)
# When local build is done, headers are installed in the default
# INSTALL_HDR_PATH usr/include.
.PHONY: khdr
+.NOTPARALLEL:
khdr:
ifndef KSFT_KHDR_INSTALL_DONE
ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index cfc7f4f97fd1..df341648f818 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,5 +1,2 @@
-##TEST_GEN_FILES := test_unix_oob
-TEST_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob
include ../../lib.mk
-
-all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 0f3e3763f4f8..3dece8b29253 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -271,8 +271,9 @@ main(int argc, char **argv)
read_oob(pfd, &oob);
if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
- "atmark %d\n", signal_recvd, len, oob, atmark);
+ fprintf(stderr,
+ "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+ signal_recvd, len, oob, atmark);
die(1);
}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 21b646d10b88..86ab429fe7f3 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -43,3 +43,4 @@ CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_BAREUDP=m
CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_CRYPTO_SM4=y
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 13350cd5c8ac..8e67a252b672 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -289,6 +289,12 @@ set_sysctl()
run_cmd sysctl -q -w $*
}
+# get sysctl values in NS-A
+get_sysctl()
+{
+ ${NSA_CMD} sysctl -n $*
+}
+
################################################################################
# Setup for tests
@@ -1003,6 +1009,60 @@ ipv4_tcp_md5()
run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
+ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex
+ test_ipv4_md5_vrf__global_server__bind_ifindex0
+}
+
+test_ipv4_md5_vrf__vrf_server__no_bind_ifindex()
+{
+ log_start
+ show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX"
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection"
+
+ log_start
+ show_hint "Binding both the socket and the key is not required but it works"
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection"
+}
+
+test_ipv4_md5_vrf__global_server__bind_ifindex0()
+{
+ # This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections
+ local old_tcp_l3mdev_accept
+ old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept)
+ set_sysctl net.ipv4.tcp_l3mdev_accept=1
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection"
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection"
+ log_start
+
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection"
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection"
+
+ # restore value
+ set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept"
}
ipv4_tcp_novrf()
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index d97bd6889446..72ee644d47bf 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -9,6 +9,7 @@ TEST_PROGS = bridge_igmp.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
gre_multipath.sh \
+ ip6_forward_instats_vrf.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
ipip_flat_gre_key.sh \
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index b802c14d2950..e5e2fbeca22e 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -39,3 +39,5 @@ NETIF_CREATE=yes
# Timeout (in seconds) before ping exits regardless of how many packets have
# been sent or received
PING_TIMEOUT=5
+# IPv6 traceroute utility name.
+TROUTE6=traceroute6
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
new file mode 100755
index 000000000000..9f5b3e2e5e95
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test ipv6 stats on the incoming if when forwarding with VRF
+
+ALL_TESTS="
+ ipv6_ping
+ ipv6_in_too_big_err
+ ipv6_in_hdr_err
+ ipv6_in_addr_err
+ ipv6_in_discard
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:1:1::2/64
+ ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+ simple_if_fini $h1 2001:1:1::2/64
+}
+
+router_create()
+{
+ vrf_create router
+ __simple_if_init $rtr1 router 2001:1:1::1/64
+ __simple_if_init $rtr2 router 2001:1:2::1/64
+ mtu_set $rtr2 1280
+}
+
+router_destroy()
+{
+ mtu_restore $rtr2
+ __simple_if_fini $rtr2 2001:1:2::1/64
+ __simple_if_fini $rtr1 2001:1:1::1/64
+ vrf_destroy router
+}
+
+h2_create()
+{
+ simple_if_init $h2 2001:1:2::2/64
+ ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+ mtu_set $h2 1280
+}
+
+h2_destroy()
+{
+ mtu_restore $h2
+ ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+ simple_if_fini $h2 2001:1:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rtr1=${NETIFS[p2]}
+
+ rtr2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ router_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ router_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ipv6_in_too_big_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Send too big packets
+ ip vrf exec $vrf_name \
+ $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InTooBigErrors"
+}
+
+ipv6_in_hdr_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Send packets with hop limit 1, easiest with traceroute6 as some ping6
+ # doesn't allow hop limit to be specified
+ ip vrf exec $vrf_name \
+ $TROUTE6 2001:1:2::2 &> /dev/null
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InHdrErrors"
+}
+
+ipv6_in_addr_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Disable forwarding temporary while sending the packet
+ sysctl -qw net.ipv6.conf.all.forwarding=0
+ ip vrf exec $vrf_name \
+ $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ sysctl -qw net.ipv6.conf.all.forwarding=1
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InAddrErrors"
+}
+
+ipv6_in_discard()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+ local vrf_name=$(master_name_get $h1)
+
+ # Add a policy to discard
+ ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block
+ ip vrf exec $vrf_name \
+ $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ ip xfrm policy del dst 2001:1:2::2/128 dir fwd
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InDiscards"
+}
+ipv6_ping()
+{
+ RET=0
+
+ ping6_test $h1 2001:1:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e7fc5c35b569..92087d423bcf 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -751,6 +751,14 @@ qdisc_parent_stats_get()
| jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
}
+ipv6_stats_get()
+{
+ local dev=$1; shift
+ local stat=$1; shift
+
+ cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
+}
+
humanize()
{
local speed=$1; shift
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index 3caf72bb9c6a..a2489ec398fe 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -468,10 +468,26 @@ out_bits()
for i in {0..22}
do
ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
- prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
-
- run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
- db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+ dev veth0 &>/dev/null
+
+ local cmd_res=$?
+ local descr="${desc/<n>/$i}"
+
+ if [[ $i -ge 12 && $i -le 21 ]]
+ then
+ if [ $cmd_res != 0 ]
+ then
+ npassed=$((npassed+1))
+ log_test_passed "$descr"
+ else
+ nfailed=$((nfailed+1))
+ log_test_failed "$descr"
+ fi
+ else
+ run_test "out_bit$i" "$descr" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ fi
done
bit2size[22]=$tmp
@@ -544,7 +560,7 @@ in_bits()
local tmp=${bit2size[22]}
bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
- for i in {0..22}
+ for i in {0..11} {22..22}
do
ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
index d376cb2c383c..8f6997d35816 100644
--- a/tools/testing/selftests/net/ioam6_parser.c
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -94,16 +94,6 @@ enum {
TEST_OUT_BIT9,
TEST_OUT_BIT10,
TEST_OUT_BIT11,
- TEST_OUT_BIT12,
- TEST_OUT_BIT13,
- TEST_OUT_BIT14,
- TEST_OUT_BIT15,
- TEST_OUT_BIT16,
- TEST_OUT_BIT17,
- TEST_OUT_BIT18,
- TEST_OUT_BIT19,
- TEST_OUT_BIT20,
- TEST_OUT_BIT21,
TEST_OUT_BIT22,
TEST_OUT_FULL_SUPP_TRACE,
@@ -125,16 +115,6 @@ enum {
TEST_IN_BIT9,
TEST_IN_BIT10,
TEST_IN_BIT11,
- TEST_IN_BIT12,
- TEST_IN_BIT13,
- TEST_IN_BIT14,
- TEST_IN_BIT15,
- TEST_IN_BIT16,
- TEST_IN_BIT17,
- TEST_IN_BIT18,
- TEST_IN_BIT19,
- TEST_IN_BIT20,
- TEST_IN_BIT21,
TEST_IN_BIT22,
TEST_IN_FULL_SUPP_TRACE,
@@ -199,30 +179,6 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
ioam6h->nodelen != 2 ||
ioam6h->remlen;
- case TEST_OUT_BIT12:
- case TEST_IN_BIT12:
- case TEST_OUT_BIT13:
- case TEST_IN_BIT13:
- case TEST_OUT_BIT14:
- case TEST_IN_BIT14:
- case TEST_OUT_BIT15:
- case TEST_IN_BIT15:
- case TEST_OUT_BIT16:
- case TEST_IN_BIT16:
- case TEST_OUT_BIT17:
- case TEST_IN_BIT17:
- case TEST_OUT_BIT18:
- case TEST_IN_BIT18:
- case TEST_OUT_BIT19:
- case TEST_IN_BIT19:
- case TEST_OUT_BIT20:
- case TEST_IN_BIT20:
- case TEST_OUT_BIT21:
- case TEST_IN_BIT21:
- return ioam6h->overflow ||
- ioam6h->nodelen ||
- ioam6h->remlen != 1;
-
case TEST_OUT_BIT22:
case TEST_IN_BIT22:
return ioam6h->overflow ||
@@ -326,6 +282,66 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
*p += sizeof(__u32);
}
+ if (ioam6h->type.bit12) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit13) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit14) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit15) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit16) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit17) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit18) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit19) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit20) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit21) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
if (ioam6h->type.bit22) {
len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
@@ -455,26 +471,6 @@ static int str2id(const char *tname)
return TEST_OUT_BIT10;
if (!strcmp("out_bit11", tname))
return TEST_OUT_BIT11;
- if (!strcmp("out_bit12", tname))
- return TEST_OUT_BIT12;
- if (!strcmp("out_bit13", tname))
- return TEST_OUT_BIT13;
- if (!strcmp("out_bit14", tname))
- return TEST_OUT_BIT14;
- if (!strcmp("out_bit15", tname))
- return TEST_OUT_BIT15;
- if (!strcmp("out_bit16", tname))
- return TEST_OUT_BIT16;
- if (!strcmp("out_bit17", tname))
- return TEST_OUT_BIT17;
- if (!strcmp("out_bit18", tname))
- return TEST_OUT_BIT18;
- if (!strcmp("out_bit19", tname))
- return TEST_OUT_BIT19;
- if (!strcmp("out_bit20", tname))
- return TEST_OUT_BIT20;
- if (!strcmp("out_bit21", tname))
- return TEST_OUT_BIT21;
if (!strcmp("out_bit22", tname))
return TEST_OUT_BIT22;
if (!strcmp("out_full_supp_trace", tname))
@@ -509,26 +505,6 @@ static int str2id(const char *tname)
return TEST_IN_BIT10;
if (!strcmp("in_bit11", tname))
return TEST_IN_BIT11;
- if (!strcmp("in_bit12", tname))
- return TEST_IN_BIT12;
- if (!strcmp("in_bit13", tname))
- return TEST_IN_BIT13;
- if (!strcmp("in_bit14", tname))
- return TEST_IN_BIT14;
- if (!strcmp("in_bit15", tname))
- return TEST_IN_BIT15;
- if (!strcmp("in_bit16", tname))
- return TEST_IN_BIT16;
- if (!strcmp("in_bit17", tname))
- return TEST_IN_BIT17;
- if (!strcmp("in_bit18", tname))
- return TEST_IN_BIT18;
- if (!strcmp("in_bit19", tname))
- return TEST_IN_BIT19;
- if (!strcmp("in_bit20", tname))
- return TEST_IN_BIT20;
- if (!strcmp("in_bit21", tname))
- return TEST_IN_BIT21;
if (!strcmp("in_bit22", tname))
return TEST_IN_BIT22;
if (!strcmp("in_full_supp_trace", tname))
@@ -606,16 +582,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
[TEST_OUT_BIT9] = check_ioam_header_and_data,
[TEST_OUT_BIT10] = check_ioam_header_and_data,
[TEST_OUT_BIT11] = check_ioam_header_and_data,
- [TEST_OUT_BIT12] = check_ioam_header,
- [TEST_OUT_BIT13] = check_ioam_header,
- [TEST_OUT_BIT14] = check_ioam_header,
- [TEST_OUT_BIT15] = check_ioam_header,
- [TEST_OUT_BIT16] = check_ioam_header,
- [TEST_OUT_BIT17] = check_ioam_header,
- [TEST_OUT_BIT18] = check_ioam_header,
- [TEST_OUT_BIT19] = check_ioam_header,
- [TEST_OUT_BIT20] = check_ioam_header,
- [TEST_OUT_BIT21] = check_ioam_header,
[TEST_OUT_BIT22] = check_ioam_header_and_data,
[TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data,
[TEST_IN_UNDEF_NS] = check_ioam_header,
@@ -633,16 +599,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
[TEST_IN_BIT9] = check_ioam_header_and_data,
[TEST_IN_BIT10] = check_ioam_header_and_data,
[TEST_IN_BIT11] = check_ioam_header_and_data,
- [TEST_IN_BIT12] = check_ioam_header,
- [TEST_IN_BIT13] = check_ioam_header,
- [TEST_IN_BIT14] = check_ioam_header,
- [TEST_IN_BIT15] = check_ioam_header,
- [TEST_IN_BIT16] = check_ioam_header,
- [TEST_IN_BIT17] = check_ioam_header,
- [TEST_IN_BIT18] = check_ioam_header,
- [TEST_IN_BIT19] = check_ioam_header,
- [TEST_IN_BIT20] = check_ioam_header,
- [TEST_IN_BIT21] = check_ioam_header,
[TEST_IN_BIT22] = check_ioam_header_and_data,
[TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data,
[TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data,
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index bd6288302094..b599003eb5ba 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -28,6 +28,7 @@
#include <unistd.h>
#include <time.h>
#include <errno.h>
+#include <getopt.h>
#include <linux/xfrm.h>
#include <linux/ipsec.h>
@@ -101,6 +102,8 @@ struct sock_args {
struct sockaddr_in6 v6;
} md5_prefix;
unsigned int prefix_len;
+ /* 0: default, -1: force off, +1: force on */
+ int bind_key_ifindex;
/* expected addresses and device index for connection */
const char *expected_dev;
@@ -271,11 +274,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args
}
memcpy(&md5sig.tcpm_addr, addr, alen);
- if (args->ifindex) {
+ if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) {
opt = TCP_MD5SIG_EXT;
md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
md5sig.tcpm_ifindex = args->ifindex;
+ log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex);
+ } else {
+ log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex);
}
rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig));
@@ -1822,6 +1828,14 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
}
#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq"
+#define OPT_FORCE_BIND_KEY_IFINDEX 1001
+#define OPT_NO_BIND_KEY_IFINDEX 1002
+
+static struct option long_opts[] = {
+ {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX},
+ {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX},
+ {0, 0, 0, 0}
+};
static void print_usage(char *prog)
{
@@ -1858,6 +1872,10 @@ static void print_usage(char *prog)
" -M password use MD5 sum protection\n"
" -X password MD5 password for client mode\n"
" -m prefix/len prefix and length to use for MD5 key\n"
+ " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n"
+ " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n"
+ " (default: only if -I is passed)\n"
+ "\n"
" -g grp multicast group (e.g., 239.1.1.1)\n"
" -i interactive mode (default is echo and terminate)\n"
"\n"
@@ -1893,7 +1911,7 @@ int main(int argc, char *argv[])
* process input args
*/
- while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) {
+ while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) {
switch (rc) {
case 'B':
both_mode = 1;
@@ -1966,6 +1984,12 @@ int main(int argc, char *argv[])
case 'M':
args.password = optarg;
break;
+ case OPT_FORCE_BIND_KEY_IFINDEX:
+ args.bind_key_ifindex = 1;
+ break;
+ case OPT_NO_BIND_KEY_IFINDEX:
+ args.bind_key_ifindex = -1;
+ break;
case 'X':
args.client_pw = optarg;
break;
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 427d94816f2d..d4ffebb989f8 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -199,7 +199,6 @@ fi
# test basic connectivity
if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: ns1 cannot reach ns2" 1>&2
- bash
exit 1
fi
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index d7e07f4c3d7f..da1c1e4b6c86 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -741,6 +741,149 @@ EOF
return $lret
}
+# test port shadowing.
+# create two listening services, one on router (ns0), one
+# on client (ns2), which is masqueraded from ns1 point of view.
+# ns2 sends udp packet coming from service port to ns1, on a highport.
+# Later, if n1 uses same highport to connect to ns0:service, packet
+# might be port-forwarded to ns2 instead.
+
+# second argument tells if we expect the 'fake-entry' to take effect
+# (CLIENT) or not (ROUTER).
+test_port_shadow()
+{
+ local test=$1
+ local expect=$2
+ local daddrc="10.0.1.99"
+ local daddrs="10.0.1.1"
+ local result=""
+ local logmsg=""
+
+ echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
+ nc_r=$!
+
+ echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
+ nc_c=$!
+
+ # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+ echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+
+ # ns1 tries to connect to ns0:1405. With default settings this should connect
+ # to client, it matches the conntrack entry created above.
+
+ result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
+
+ if [ "$result" = "$expect" ] ;then
+ echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
+ else
+ echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
+ ret=1
+ fi
+
+ kill $nc_r $nc_c 2>/dev/null
+
+ # flush udp entries for next test round, if any
+ ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
+}
+
+# This prevents port shadow of router service via packet filter,
+# packets claiming to originate from service port from internal
+# network are dropped.
+test_port_shadow_filter()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta iif veth1 udp sport 1405 drop
+ }
+}
+EOF
+ test_port_shadow "port-filter" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family filter
+}
+
+# This prevents port shadow of router service via notrack.
+test_port_shadow_notrack()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family raw {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 udp dport 1405 notrack
+ udp dport 1405 notrack
+ }
+ chain output {
+ type filter hook output priority -300; policy accept;
+ udp sport 1405 notrack
+ }
+}
+EOF
+ test_port_shadow "port-notrack" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family raw
+}
+
+# This prevents port shadow of router service via sport remap.
+test_port_shadow_pat()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family pat {
+ chain postrouting {
+ type nat hook postrouting priority -1; policy accept;
+ meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
+ }
+}
+EOF
+ test_port_shadow "pat" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family pat
+}
+
+test_port_shadowing()
+{
+ local family="ip"
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
+ # test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+ test_port_shadow "default" "CLIENT"
+
+ # test packet filter based mitigation: prevent forwarding of
+ # packets claiming to come from the service port.
+ test_port_shadow_filter "$family"
+
+ # test conntrack based mitigation: connections going or coming
+ # from router:service bypass connection tracking.
+ test_port_shadow_notrack "$family"
+
+ # test nat based mitigation: fowarded packets coming from service port
+ # are masqueraded with random highport.
+ test_port_shadow_pat "$family"
+
+ ip netns exec "$ns0" nft delete table $family nat
+}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
@@ -861,6 +1004,8 @@ reset_counters
$test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
+test_port_shadowing
+
if [ $ret -ne 0 ];then
echo -n "FAIL: "
nft --version
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+# veth1-.
+# |
+# NAT Gateway --veth0--> Server
+# | |
+# veth2-' |
+# client2---' |
+# .... |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces. Each client connects to Server, each with colliding tuples:
+# clientsaddr:10000 -> serveraddr:dport
+# NAT Gateway is supposed to do port reallocation for each of the
+# connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+ ip netns del $gw
+ ip netns del $srv
+ for i in $(seq 1 $maxclients); do
+ ip netns del ns-cl$i-$sfx 2>/dev/null
+ done
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+ have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create server netns $srv"
+ exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+
+ ip netns add "$cl"
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not create client netns $cl"
+ exit $ksft_skip
+ fi
+ ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+ fi
+done
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+ echo netns exec "$cl" ip link set lo up
+ echo netns exec "$cl" ip link set eth0 up
+ echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+ echo netns exec "$gw" ip link set veth$i up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+ # clients have same IP addresses.
+ echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+ echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+ # NB: same addresses on client-facing interfaces.
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+ # gw: policy routing
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+ map iiftomark {
+ type ifname : mark
+ }
+
+ map iiftozone {
+ typeof iifname : ct zone
+ }
+
+ set inicmp {
+ flags dynamic
+ type ipv4_addr . ifname . ipv4_addr
+ }
+ set inflows {
+ flags dynamic
+ type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+ }
+
+ set inflows6 {
+ flags dynamic
+ type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -64000; policy accept;
+ ct original zone set meta iifname map @iiftozone
+ meta mark set meta iifname map @iiftomark
+
+ tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+ add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+ ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+ }
+
+ chain nat_postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ct mark set meta mark meta oifname veth0 masquerade
+ }
+
+ chain mangle_prerouting {
+ type filter hook prerouting priority -100; policy accept;
+ ct direction reply meta mark set ct mark
+ }
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+ echo add element inet raw iiftozone \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+ if [ $? -ne 0 ]; then
+ echo FAIL: Ping failure from $cl 1>&2
+ ret=1
+ break
+ fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ break
+ fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+ echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+ echo "SKIP: iperf3 not installed"
+ if [ $ret -ne 0 ];then
+ exit $ret
+ fi
+ exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+ if [ $ret -ne 0 ]; then
+ break
+ fi
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo FAIL: Failure to connect for $cl 1>&2
+ ip netns exec $gw conntrack -S 1>&2
+ ret=1
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+ break
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..ac646376eb01
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=20000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+ ip netns del $ns
+}
+
+ip netns add $ns
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+ have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+ local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+ map rndzone {
+ typeof numgen inc mod $max_zones : ct zone
+ }
+
+ chain output {
+ type filter hook output priority -64000; policy accept;
+ udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
+ }
+}
+EOF
+ (
+ echo "add element inet raw rndzone {"
+ for i in $(seq 1 $max_zones);do
+ echo -n "$i : $i"
+ if [ $i -lt $max_zones ]; then
+ echo ","
+ else
+ echo "}"
+ fi
+ done
+ ) | ip netns exec $ns nft -f /dev/stdin
+
+ local i=0
+ local j=0
+ local outerstart=$(date +%s%3N)
+ local stop=$outerstart
+
+ while [ $i -lt $max_zones ]; do
+ local start=$(date +%s%3N)
+ i=$((i + 10000))
+ j=$((j + 1))
+ dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
+ if [ $? -ne 0 ] ;then
+ ret=1
+ break
+ fi
+
+ stop=$(date +%s%3N)
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
+ done
+
+ if [ $have_ct_tool -eq 1 ]; then
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries from packet path in $duration ms total"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+ ret=1
+ fi
+ fi
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: insert $max_zones entries from packet path" 1>&2
+ fi
+}
+
+test_conntrack_tool() {
+ local max_zones=$1
+
+ ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+ local outerstart=$(date +%s%3N)
+ local start=$(date +%s%3N)
+ local stop=$start
+ local i=0
+ while [ $i -lt $max_zones ]; do
+ i=$((i + 1))
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+ echo "FAIL: conntrack -I returned an error"
+ ret=1
+ break
+ fi
+
+ if [ $((i%10000)) -eq 0 ];then
+ stop=$(date +%s%3N)
+
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total)"
+ start=$stop
+ fi
+ done
+
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+ ret=1
+ fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+ test_conntrack_tool $zones
+else
+ echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+ if [ $ret -eq 0 ];then
+ exit $ksft_skip
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 10ab56c2484a..60aa1a4fc69b 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
uffd_test_ops->allocate_area((void **)&area_src);
uffd_test_ops->allocate_area((void **)&area_dst);
- uffd_test_ops->release_pages(area_src);
- uffd_test_ops->release_pages(area_dst);
-
userfaultfd_open(features);
count_verify = malloc(nr_pages * sizeof(unsigned long long));
@@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
*(area_count(area_src, nr) + 1) = 1;
}
+ /*
+ * After initialization of area_src, we must explicitly release pages
+ * for area_dst to make sure it's fully empty. Otherwise we could have
+ * some area_dst pages be errornously initialized with zero pages,
+ * hence we could hit memory corruption later in the test.
+ *
+ * One example is when THP is globally enabled, above allocate_area()
+ * calls could have the two areas merged into a single VMA (as they
+ * will have the same VMA flags so they're mergeable). When we
+ * initialize the area_src above, it's possible that some part of
+ * area_dst could have been faulted in via one huge THP that will be
+ * shared between area_src and area_dst. It could cause some of the
+ * area_dst won't be trapped by missing userfaults.
+ *
+ * This release_pages() will guarantee even if that happened, we'll
+ * proactively split the thp and drop any accidentally initialized
+ * pages within area_dst.
+ */
+ uffd_test_ops->release_pages(area_dst);
+
pipefd = malloc(sizeof(int) * nr_cpus * 2);
if (!pipefd)
err("pipefd");
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
index cec6f5a738e1..fa927ad16f8a 100644
--- a/tools/testing/vsock/vsock_diag_test.c
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -332,8 +332,6 @@ static void test_no_sockets(const struct test_opts *opts)
read_vsock_stat(&sockets);
check_no_sockets(&sockets);
-
- free_sock_stat(&sockets);
}
static void test_listen_socket_server(const struct test_opts *opts)