diff options
Diffstat (limited to 'tools/testing')
38 files changed, 1537 insertions, 94 deletions
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index be8d8d4a4e08..6276ce0c0196 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -12,6 +12,8 @@ import sys import os import time +assert sys.version_info >= (3, 7), "Python version is too old" + from collections import namedtuple from enum import Enum, auto diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 90bc007f1f93..2c6f916ccbaf 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,15 +6,13 @@ # Author: Felix Guo <felixguoxiuping@gmail.com> # Author: Brendan Higgins <brendanhiggins@google.com> -from __future__ import annotations import importlib.util import logging import subprocess import os import shutil import signal -from typing import Iterator -from typing import Optional +from typing import Iterator, Optional, Tuple from contextlib import ExitStack @@ -208,7 +206,7 @@ def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceT raise ConfigError(arch + ' is not a valid arch') def get_source_tree_ops_from_qemu_config(config_path: str, - cross_compile: Optional[str]) -> tuple[ + cross_compile: Optional[str]) -> Tuple[ str, LinuxSourceTreeOperations]: # The module name/path has very little to do with where the actual file # exists (I learned this through experimentation and could not find it diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index c3c524b79db8..b88db3f51dc5 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -338,9 +338,11 @@ def bubble_up_suite_errors(test_suites: Iterable[TestSuite]) -> TestStatus: def parse_test_result(lines: LineStream) -> TestResult: consume_non_diagnostic(lines) if not lines or not parse_tap_header(lines): - return TestResult(TestStatus.NO_TESTS, [], lines) + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) expected_test_suite_num = parse_test_plan(lines) - if not expected_test_suite_num: + if expected_test_suite_num == 0: + return TestResult(TestStatus.NO_TESTS, [], lines) + elif expected_test_suite_num is None: return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) test_suites = [] for i in range(1, expected_test_suite_num + 1): diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index bdae0e5f6197..75045aa0f8a1 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -157,8 +157,18 @@ class KUnitParserTest(unittest.TestCase): kunit_parser.TestStatus.FAILURE, result.status) + def test_no_header(self): + empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') + with open(empty_log) as file: + result = kunit_parser.parse_run_tests( + kunit_parser.extract_tap_lines(file.readlines())) + self.assertEqual(0, len(result.suites)) + self.assertEqual( + kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, + result.status) + def test_no_tests(self): - empty_log = test_data_path('test_is_test_passed-no_tests_run.log') + empty_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') with open(empty_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) @@ -173,7 +183,7 @@ class KUnitParserTest(unittest.TestCase): with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) - print_mock.assert_any_call(StrContains('no tests run!')) + print_mock.assert_any_call(StrContains('could not parse test results!')) print_mock.stop() file.close() @@ -309,7 +319,7 @@ class KUnitJsonTest(unittest.TestCase): result["sub_groups"][1]["test_cases"][0]) def test_no_tests_json(self): - result = self._json_for('test_is_test_passed-no_tests_run.log') + result = self._json_for('test_is_test_passed-no_tests_run_with_header.log') self.assertEqual(0, len(result['sub_groups'])) class StrContains(str): diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log index ba69f5c94b75..ba69f5c94b75 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_no_header.log diff --git a/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log new file mode 100644 index 000000000000..5f48ee659d40 --- /dev/null +++ b/tools/testing/kunit/test_data/test_is_test_passed-no_tests_run_with_header.log @@ -0,0 +1,2 @@ +TAP version 14 +1..0 diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 54f367cbadae..b1bff5fb0f65 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -434,7 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t, dev_dbg(dev, "%s: transition out verify\n", __func__); fw->state = FW_STATE_UPDATED; fw->missed_activate = false; - /* fall through */ + fallthrough; case FW_STATE_UPDATED: nd_cmd->status = 0; /* bogus test version */ diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index ee27d68d2a1c..b5940e6ca67c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -715,6 +715,8 @@ out: bpf_object__close(obj); } +#include "tailcall_bpf2bpf4.skel.h" + /* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved * across tailcalls combined with bpf2bpf calls. for making sure that tailcall * counter behaves correctly, bpf program will go through following flow: @@ -727,10 +729,15 @@ out: * the loop begins. At the end of the test make sure that the global counter is * equal to 31, because tailcall counter includes the first two tailcalls * whereas global counter is incremented only on loop presented on flow above. + * + * The noise parameter is used to insert bpf_map_update calls into the logic + * to force verifier to patch instructions. This allows us to ensure jump + * logic remains correct with instruction movement. */ -static void test_tailcall_bpf2bpf_4(void) +static void test_tailcall_bpf2bpf_4(bool noise) { - int err, map_fd, prog_fd, main_fd, data_fd, i, val; + int err, map_fd, prog_fd, main_fd, data_fd, i; + struct tailcall_bpf2bpf4__bss val; struct bpf_map *prog_array, *data_map; struct bpf_program *prog; struct bpf_object *obj; @@ -774,11 +781,6 @@ static void test_tailcall_bpf2bpf_4(void) goto out; } - err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, - &duration, &retval, NULL); - CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", - err, errno, retval); - data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) return; @@ -788,9 +790,21 @@ static void test_tailcall_bpf2bpf_4(void) return; i = 0; + val.noise = noise; + val.count = 0; + err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 0; err = bpf_map_lookup_elem(data_fd, &i, &val); - CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n", - err, errno, val); + CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n", + err, errno, val.count); out: bpf_object__close(obj); @@ -815,5 +829,7 @@ void test_tailcalls(void) if (test__start_subtest("tailcall_bpf2bpf_3")) test_tailcall_bpf2bpf_3(); if (test__start_subtest("tailcall_bpf2bpf_4")) - test_tailcall_bpf2bpf_4(); + test_tailcall_bpf2bpf_4(false); + if (test__start_subtest("tailcall_bpf2bpf_5")) + test_tailcall_bpf2bpf_4(true); } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c index 77df6d4db895..e89368a50b97 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -3,6 +3,13 @@ #include <bpf/bpf_helpers.h> struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} nop_table SEC(".maps"); + +struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __uint(max_entries, 3); __uint(key_size, sizeof(__u32)); @@ -10,10 +17,21 @@ struct { } jmp_table SEC(".maps"); int count = 0; +int noise = 0; + +__always_inline int subprog_noise(void) +{ + __u32 key = 0; + + bpf_map_lookup_elem(&nop_table, &key); + return 0; +} __noinline int subprog_tail_2(struct __sk_buff *skb) { + if (noise) + subprog_noise(); bpf_tail_call_static(skb, &jmp_table, 2); return skb->len * 3; } diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index 2c8935b3e65d..ee454327e5c6 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -159,3 +159,15 @@ .result = ACCEPT, .retval = 2, }, +{ + "dead code: zero extension", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index a3e593ddfafc..2debba4e8a3a 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1,4 +1,233 @@ { + "map access: known scalar += value_ptr unknown vs const", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs unknown", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (ne)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (lt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (gt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ "map access: known scalar += value_ptr from different maps", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 06a351b4f93b..0709af0144c8 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -38,6 +38,7 @@ /x86_64/xen_vmcall_test /x86_64/xss_msr_test /x86_64/vmx_pmu_msrs_test +/access_tracking_perf_test /demand_paging_test /dirty_log_test /dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b853be2ae3c6..5832f510a16c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index a16c8f05366c..cc898181faab 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = { #define VREGS_SUBLIST \ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } #define PMU_SUBLIST \ - { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } + { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \ + .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } #define SVE_SUBLIST \ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c new file mode 100644 index 000000000000..e2baa187a21e --- /dev/null +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * access_tracking_perf_test + * + * Copyright (C) 2021, Google, Inc. + * + * This test measures the performance effects of KVM's access tracking. + * Access tracking is driven by the MMU notifiers test_young, clear_young, and + * clear_flush_young. These notifiers do not have a direct userspace API, + * however the clear_young notifier can be triggered by marking a pages as idle + * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to + * enable access tracking on guest memory. + * + * To measure performance this test runs a VM with a configurable number of + * vCPUs that each touch every page in disjoint regions of memory. Performance + * is measured in the time it takes all vCPUs to finish touching their + * predefined region. + * + * Note that a deterministic correctness test of access tracking is not possible + * by using page_idle as it exists today. This is for a few reasons: + * + * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This + * means subsequent guest accesses are not guaranteed to see page table + * updates made by KVM until some time in the future. + * + * 2. page_idle only operates on LRU pages. Newly allocated pages are not + * immediately allocated to LRU lists. Instead they are held in a "pagevec", + * which is drained to LRU lists some time in the future. There is no + * userspace API to force this drain to occur. + * + * These limitations are worked around in this test by using a large enough + * region of memory for each vCPU such that the number of translations cached in + * the TLB and the number of pages held in pagevecs are a small fraction of the + * overall workload. And if either of those conditions are not true this test + * will fail rather than silently passing. + */ +#include <inttypes.h> +#include <limits.h> +#include <pthread.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" + +/* Global variable used to synchronize all of the vCPU threads. */ +static int iteration = -1; + +/* Defines what vCPU threads should do during a given iteration. */ +static enum { + /* Run the vCPU to access all its memory. */ + ITERATION_ACCESS_MEMORY, + /* Mark the vCPU's memory idle in page_idle. */ + ITERATION_MARK_IDLE, +} iteration_work; + +/* Set to true when vCPU threads should exit. */ +static bool done; + +/* The iteration that was last completed by each vCPU. */ +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; + +/* Whether to overlap the regions of memory vCPUs access. */ +static bool overlap_memory_access; + +struct test_params { + /* The backing source for the region of memory. */ + enum vm_mem_backing_src_type backing_src; + + /* The amount of memory to allocate for each vCPU. */ + uint64_t vcpu_memory_bytes; + + /* The number of vCPUs to create in the VM. */ + int vcpus; +}; + +static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) +{ + uint64_t value; + off_t offset = index * sizeof(value); + + TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value), + "pread from %s offset 0x%" PRIx64 " failed!", + filename, offset); + + return value; + +} + +#define PAGEMAP_PRESENT (1ULL << 63) +#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) + +static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) +{ + uint64_t hva = (uint64_t) addr_gva2hva(vm, gva); + uint64_t entry; + uint64_t pfn; + + entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize()); + if (!(entry & PAGEMAP_PRESENT)) + return 0; + + pfn = entry & PAGEMAP_PFN_MASK; + if (!pfn) { + print_skip("Looking up PFNs requires CAP_SYS_ADMIN"); + exit(KSFT_SKIP); + } + + return pfn; +} + +static bool is_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64); + + return !!((bits >> (pfn % 64)) & 1); +} + +static void mark_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = 1ULL << (pfn % 64); + + TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8, + "Set page_idle bits for PFN 0x%" PRIx64, pfn); +} + +static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) +{ + uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva; + uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages; + uint64_t page; + uint64_t still_idle = 0; + uint64_t no_pfn = 0; + int page_idle_fd; + int pagemap_fd; + + /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */ + if (overlap_memory_access && vcpu_id) + return; + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle."); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); + + for (page = 0; page < pages; page++) { + uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); + + if (!pfn) { + no_pfn++; + continue; + } + + if (is_page_idle(page_idle_fd, pfn)) { + still_idle++; + continue; + } + + mark_page_idle(page_idle_fd, pfn); + } + + /* + * Assumption: Less than 1% of pages are going to be swapped out from + * under us during this test. + */ + TEST_ASSERT(no_pfn < pages / 100, + "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.", + vcpu_id, no_pfn, pages); + + /* + * Test that at least 90% of memory has been marked idle (the rest might + * not be marked idle because the pages have not yet made it to an LRU + * list or the translations are still cached in the TLB). 90% is + * arbitrary; high enough that we ensure most memory access went through + * access tracking but low enough as to not make the test too brittle + * over time and across architectures. + */ + TEST_ASSERT(still_idle < pages / 10, + "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" + PRIu64 ").\n", + vcpu_id, still_idle, pages); + + close(page_idle_fd); + close(pagemap_fd); +} + +static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id, + uint64_t expected_ucall) +{ + struct ucall uc; + uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc); + + TEST_ASSERT(expected_ucall == actual_ucall, + "Guest exited unexpectedly (expected ucall %" PRIu64 + ", got %" PRIu64 ")", + expected_ucall, actual_ucall); +} + +static bool spin_wait_for_next_iteration(int *current_iteration) +{ + int last_iteration = *current_iteration; + + do { + if (READ_ONCE(done)) + return false; + + *current_iteration = READ_ONCE(iteration); + } while (last_iteration == *current_iteration); + + return true; +} + +static void *vcpu_thread_main(void *arg) +{ + struct perf_test_vcpu_args *vcpu_args = arg; + struct kvm_vm *vm = perf_test_args.vm; + int vcpu_id = vcpu_args->vcpu_id; + int current_iteration = -1; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + while (spin_wait_for_next_iteration(¤t_iteration)) { + switch (READ_ONCE(iteration_work)) { + case ITERATION_ACCESS_MEMORY: + vcpu_run(vm, vcpu_id); + assert_ucall(vm, vcpu_id, UCALL_SYNC); + break; + case ITERATION_MARK_IDLE: + mark_vcpu_memory_idle(vm, vcpu_id); + break; + }; + + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + } + + return NULL; +} + +static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) +{ + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + target_iteration) { + continue; + } +} + +/* The type of memory accesses to perform in the VM. */ +enum access_type { + ACCESS_READ, + ACCESS_WRITE, +}; + +static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + int next_iteration; + int vcpu_id; + + /* Kick off the vCPUs by incrementing iteration. */ + next_iteration = ++iteration; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + /* Wait for all vCPUs to finish the iteration. */ + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + spin_wait_for_vcpu(vcpu_id, next_iteration); + + ts_elapsed = timespec_elapsed(ts_start); + pr_info("%-30s: %ld.%09lds\n", + description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec); +} + +static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, + const char *description) +{ + perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; + sync_global_to_guest(vm, perf_test_args); + iteration_work = ITERATION_ACCESS_MEMORY; + run_iteration(vm, vcpus, description); +} + +static void mark_memory_idle(struct kvm_vm *vm, int vcpus) +{ + /* + * Even though this parallelizes the work across vCPUs, this is still a + * very slow operation because page_idle forces the test to mark one pfn + * at a time and the clear_young notifier serializes on the KVM MMU + * lock. + */ + pr_debug("Marking VM memory idle (slow)...\n"); + iteration_work = ITERATION_MARK_IDLE; + run_iteration(vm, vcpus, "Mark memory idle"); +} + +static pthread_t *create_vcpu_threads(int vcpus) +{ + pthread_t *vcpu_threads; + int i; + + vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); + TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); + + for (i = 0; i < vcpus; i++) { + vcpu_last_completed_iteration[i] = iteration; + pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, + &perf_test_args.vcpu_args[i]); + } + + return vcpu_threads; +} + +static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) +{ + int i; + + /* Set done to signal the vCPU threads to exit */ + done = true; + + for (i = 0; i < vcpus; i++) + pthread_join(vcpu_threads[i], NULL); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *params = arg; + struct kvm_vm *vm; + pthread_t *vcpu_threads; + int vcpus = params->vcpus; + + vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, + params->backing_src); + + perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, + !overlap_memory_access); + + vcpu_threads = create_vcpu_threads(vcpus); + + pr_info("\n"); + access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); + + /* As a control, read and write to the populated memory first. */ + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory"); + access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory"); + + /* Repeat on memory that has been marked as idle. */ + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory"); + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); + + terminate_vcpu_threads(vcpu_threads, vcpus); + free(vcpu_threads); + perf_test_destroy_vm(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n", + name); + puts(""); + printf(" -h: Display this help message."); + guest_modes_help(); + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n\n"); + backing_src_help(); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct test_params params = { + .backing_src = VM_MEM_SRC_ANONYMOUS, + .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, + .vcpus = 1, + }; + int page_idle_fd; + int opt; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 'b': + params.vcpu_memory_bytes = parse_size(optarg); + break; + case 'v': + params.vcpus = atoi(optarg); + break; + case 'o': + overlap_memory_access = true; + break; + case 's': + params.backing_src = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + if (page_idle_fd < 0) { + print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled"); + exit(KSFT_SKIP); + } + close(page_idle_fd); + + for_each_guest_mode(run_test, ¶ms); + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 04a2641261be..80cbd3a748c0 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -312,6 +312,7 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 615ab254899d..010b59b13917 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -45,6 +45,7 @@ enum vm_guest_mode { VM_MODE_P40V48_64K, VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, NUM_VM_MODES, }; @@ -62,7 +63,7 @@ enum vm_guest_mode { #elif defined(__s390x__) -#define VM_MODE_DEFAULT VM_MODE_P47V64_4K +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K #define MIN_PAGE_SHIFT 12U #define ptes_per_page(page_size) ((page_size) / 16) diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 412eaee7884a..b66910702c0a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -117,7 +117,7 @@ #define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) #define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) @@ -182,4 +182,7 @@ #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* hypercall options */ +#define HV_HYPERCALL_FAST_BIT BIT(16) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 9f49f6caafe5..632b74d6b3ca 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -401,7 +401,7 @@ unexpected_exception: void vm_init_descriptor_tables(struct kvm_vm *vm) { vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers), - vm->page_size, 0, 0); + vm->page_size); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 25bff307c71f..c330f414ef96 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -22,6 +22,22 @@ void guest_modes_append_default(void) } } #endif +#ifdef __s390x__ + { + int kvm_fd, vm_fd; + struct kvm_s390_vm_cpu_processor info; + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0); + kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info, false); + close(vm_fd); + close(kvm_fd); + /* Starting with z13 we have 47bits of physical address */ + if (info.ibc >= 0x30) + guest_mode_append(VM_MODE_P47V64_4K, true, true); + } +#endif } void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg) diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 5b56b57b3c20..10a8ed691c66 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -176,6 +176,7 @@ const char *vm_guest_mode_string(uint32_t i) [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages", [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages", [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages", + [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages", }; _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -194,6 +195,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { { 40, 48, 0x10000, 16 }, { 0, 0, 0x1000, 12 }, { 47, 64, 0x1000, 12 }, + { 44, 64, 0x1000, 12 }, }; _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); @@ -282,6 +284,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) case VM_MODE_P47V64_4K: vm->pgtable_levels = 5; break; + case VM_MODE_P44V64_4K: + vm->pgtable_levels = 5; + break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 85b18bb8f762..72a1c9b4882c 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void) (max_mem_slots - 1), MEM_REGION_SIZE >> 10); mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment, - PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0); TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host"); mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1)); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index b0031f2d38fd..ecec30865a74 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -320,7 +320,7 @@ int main(int ac, char **av) run_delay = get_run_delay(); pthread_create(&thread, &attr, do_steal_time, NULL); do - pthread_yield(); + sched_yield(); while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS); pthread_join(thread, NULL); run_delay = get_run_delay() - run_delay; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index bab10ae787b6..e0b2bb1339b1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -215,7 +215,7 @@ int main(void) vcpu_set_hv_cpuid(vm, VCPU_ID); tsc_page_gva = vm_vaddr_alloc_page(vm); - memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 42bd658f52a8..91d88aaa9899 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val) } static int nr_gp; +static int nr_ud; static inline u64 hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address) @@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs) regs->rip = (uint64_t)&wrmsr_end; } +static void guest_ud_handler(struct ex_regs *regs) +{ + nr_ud++; + regs->rip += 3; +} + struct msr_data { uint32_t idx; bool available; @@ -90,6 +97,7 @@ struct msr_data { struct hcall_data { uint64_t control; uint64_t expect; + bool ud_expected; }; static void guest_msr(struct msr_data *msr) @@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr) static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) { int i = 0; + u64 res, input, output; wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); while (hcall->control) { - GUEST_ASSERT(hypercall(hcall->control, pgs_gpa, - pgs_gpa + 4096) == hcall->expect); + nr_ud = 0; + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; + } + + res = hypercall(hcall->control, input, output); + if (hcall->ud_expected) + GUEST_ASSERT(nr_ud == 1); + else + GUEST_ASSERT(res == hcall->expect); + GUEST_SYNC(i++); } @@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall recomm.ebx = 0xfff; hcall->expect = HV_STATUS_SUCCESS; break; - case 17: + /* XMM fast hypercall */ + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = true; + break; + case 18: + feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + hcall->ud_expected = false; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 19: /* END */ hcall->control = 0; break; @@ -615,7 +646,7 @@ int main(void) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); pr_info("Testing access to Hyper-V specific MSRs\n"); guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), @@ -625,6 +656,10 @@ int main(void) /* Test hypercalls */ vm = vm_create_default(VCPU_ID, 0, guest_hcall); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c index 523371cf8e8f..da2325fcad87 100644 --- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c @@ -71,7 +71,7 @@ static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val) /* Set up a #PF handler to eat the RSVD #PF and signal all done! */ vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_handle_exception(vm, PF_VECTOR, guest_pf_handler); + vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler); r = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index c1f831803ad2..d0fe2fdce58c 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -53,15 +53,28 @@ static inline void sync_with_host(uint64_t phase) : "+a" (phase)); } -void self_smi(void) +static void self_smi(void) { x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI); } -void guest_code(void *arg) +static void l2_guest_code(void) { + sync_with_host(8); + + sync_with_host(10); + + vmcall(); +} + +static void guest_code(void *arg) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; uint64_t apicbase = rdmsr(MSR_IA32_APICBASE); + struct svm_test_data *svm = arg; + struct vmx_pages *vmx_pages = arg; sync_with_host(1); @@ -74,21 +87,50 @@ void guest_code(void *arg) sync_with_host(4); if (arg) { - if (cpu_has_svm()) - generic_svm_setup(arg, NULL, NULL); - else - GUEST_ASSERT(prepare_for_vmx_operation(arg)); + if (cpu_has_svm()) { + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } else { + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + } sync_with_host(5); self_smi(); sync_with_host(7); + + if (cpu_has_svm()) { + run_guest(svm->vmcb, svm->vmcb_gpa); + svm->vmcb->save.rip += 3; + run_guest(svm->vmcb, svm->vmcb_gpa); + } else { + vmlaunch(); + vmresume(); + } + + /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */ + sync_with_host(12); } sync_with_host(DONE); } +void inject_smi(struct kvm_vm *vm) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vm, VCPU_ID, &events); + + events.smi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_SMM; + + vcpu_events_set(vm, VCPU_ID, &events); +} + int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; @@ -147,6 +189,22 @@ int main(int argc, char *argv[]) "Unexpected stage: #%x, got %x", stage, stage_reported); + /* + * Enter SMM during L2 execution and check that we correctly + * return from it. Do not perform save/restore while in SMM yet. + */ + if (stage == 8) { + inject_smi(vm); + continue; + } + + /* + * Perform save/restore while the guest is in SMM triggered + * during L2 execution. + */ + if (stage == 10) + inject_smi(vm); + state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); kvm_vm_restart(vm, O_RDWR); diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index b37585e6aa38..46a97f318f58 100755 --- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh @@ -282,7 +282,9 @@ done # echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error for memory in `hotpluggable_online_memory`; do - offline_memory_expect_fail $memory + if [ $((RANDOM % 100)) -lt $ratio ]; then + offline_memory_expect_fail $memory + fi done echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index c19ecc6a8614..ecbf57f264ed 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -313,9 +313,10 @@ check_exception() fi log_test $? 0 "IPv4: ${desc}" - if [ "$with_redirect" = "yes" ]; then + # No PMTU info for test "redirect" and "mtu exception plus redirect" + if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ - grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}" + grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0" elif [ -n "${mtu}" ]; then ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index f23438d512c5..3d7dde2c321b 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -484,13 +484,16 @@ enum desc_type { MONITOR_ACQUIRE, EXPIRE_STATE, EXPIRE_POLICY, + SPDINFO_ATTRS, }; const char *desc_name[] = { "create tunnel", "alloc spi", "monitor acquire", "expire state", - "expire policy" + "expire policy", + "spdinfo attributes", + "" }; struct xfrm_desc { enum desc_type type; @@ -1593,6 +1596,155 @@ out_close: return ret; } +static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq, + unsigned thresh4_l, unsigned thresh4_r, + unsigned thresh6_l, unsigned thresh6_r, + bool add_bad_attr) + +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrmu_spdhthresh thresh; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + thresh.lbits = thresh4_l; + thresh.rbits = thresh4_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + thresh.lbits = thresh6_l; + thresh.rbits = thresh6_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + if (add_bad_attr) { + BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { + pr_err("adding attribute failed: no space"); + return -1; + } + } + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return -1; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + return 0; +} + +static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq) +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) { + pr_err("Can't set SPD HTHRESH"); + return KSFT_FAIL; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) { + size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused)); + struct rtattr *attr = (void *)req.attrbuf; + int got_thresh = 0; + + for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 32 || t->rbits != 31) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 120 || t->rbits != 16) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + } + if (got_thresh != 2) { + pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh); + return KSFT_FAIL; + } + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return KSFT_FAIL; + } else { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + /* Restore the default */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) { + pr_err("Can't restore SPD HTHRESH"); + return KSFT_FAIL; + } + + /* + * At this moment xfrm uses nlmsg_parse_deprecated(), which + * implies NL_VALIDATE_LIBERAL - ignoring attributes with + * (type > maxtype). nla_parse_depricated_strict() would enforce + * it. Or even stricter nla_parse(). + * Right now it's not expected to fail, but to be ignored. + */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true)) + return KSFT_PASS; + + return KSFT_PASS; +} + static int child_serv(int xfrm_sock, uint32_t *seq, unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) { @@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) case EXPIRE_POLICY: ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); break; + case SPDINFO_ATTRS: + ret = xfrm_spdinfo_attrs(xfrm_sock, &seq); + break; default: printk("Unknown desc type %d", desc.type); exit(KSFT_FAIL); @@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto) * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 * * Check the affected by the UABI difference structures. + * Also, check translation for xfrm_set_spdinfo: it has it's own attributes + * which needs to be correctly copied, but not translated. */ -const unsigned int compat_plan = 4; +const unsigned int compat_plan = 5; static int write_compat_struct_tests(int test_desc_fd) { struct xfrm_desc desc = {}; @@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd) if (__write_desc(test_desc_fd, &desc)) return -1; + desc.type = SPDINFO_ATTRS; + if (__write_desc(test_desc_fd, &desc)) + return -1; + return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9a191c1a5de8..f02f4de2f3a0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1409,7 +1409,7 @@ syncookies_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr "subflows limited by server w cookies" 2 2 1 + chk_join_nr "subflows limited by server w cookies" 2 1 1 # test signal address with cookies reset_with_cookies diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 6365c7fd1262..bd6288302094 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -11,9 +11,11 @@ #include <sys/socket.h> #include <sys/wait.h> #include <linux/tcp.h> +#include <linux/udp.h> #include <arpa/inet.h> #include <net/if.h> #include <netinet/in.h> +#include <netinet/ip.h> #include <netdb.h> #include <fcntl.h> #include <libgen.h> @@ -27,6 +29,10 @@ #include <time.h> #include <errno.h> +#include <linux/xfrm.h> +#include <linux/ipsec.h> +#include <linux/pfkeyv2.h> + #ifndef IPV6_UNICAST_IF #define IPV6_UNICAST_IF 76 #endif @@ -114,6 +120,9 @@ struct sock_args { struct in_addr in; struct in6_addr in6; } expected_raddr; + + /* ESP in UDP encap test */ + int use_xfrm; }; static int server_mode; @@ -1346,6 +1355,41 @@ static int bind_socket(int sd, struct sock_args *args) return 0; } +static int config_xfrm_policy(int sd, struct sock_args *args) +{ + struct xfrm_userpolicy_info policy = {}; + int type = UDP_ENCAP_ESPINUDP; + int xfrm_af = IP_XFRM_POLICY; + int level = SOL_IP; + + if (args->type != SOCK_DGRAM) { + log_error("Invalid socket type. Only DGRAM could be used for XFRM\n"); + return 1; + } + + policy.action = XFRM_POLICY_ALLOW; + policy.sel.family = args->version; + if (args->version == AF_INET6) { + xfrm_af = IPV6_XFRM_POLICY; + level = SOL_IPV6; + } + + policy.dir = XFRM_POLICY_OUT; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + policy.dir = XFRM_POLICY_IN; + if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0) + return 1; + + if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) { + log_err_errno("Failed to set xfrm encap"); + return 1; + } + + return 0; +} + static int lsock_init(struct sock_args *args) { long flags; @@ -1389,6 +1433,11 @@ static int lsock_init(struct sock_args *args) if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) log_err_errno("Failed to set close-on-exec flag"); + if (args->use_xfrm && config_xfrm_policy(sd, args)) { + log_err_errno("Failed to set xfrm policy"); + goto err; + } + out: return sd; @@ -1772,7 +1821,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq" +#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" static void print_usage(char *prog) { @@ -1795,6 +1844,7 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" " -I dev bind socket to given device name - server mode\n" @@ -1966,6 +2016,9 @@ int main(int argc, char *argv[]) case 'q': quiet = 1; break; + case 'x': + args.use_xfrm = 1; + break; default: print_usage(argv[0]); return 1; diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 64cd2e23c568..543ad7513a8e 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -118,6 +118,16 @@ # below for IPv6 doesn't apply here, because, on IPv4, administrative MTU # changes alone won't affect PMTU # +# - pmtu_vti4_udp_exception +# Same as pmtu_vti4_exception, but using ESP-in-UDP +# +# - pmtu_vti4_udp_routed_exception +# Set up vti tunnel on top of veth connected through routing namespace and +# add xfrm states and policies with ESP-in-UDP encapsulation. Check that +# route exception is not created if link layer MTU is not exceeded, then +# lower MTU on second part of routed environment and check that exception +# is created with the expected PMTU. +# # - pmtu_vti6_exception # Set up vti6 tunnel on top of veth, with xfrm states and policies, in two # namespaces with matching endpoints. Check that route exception is @@ -125,6 +135,13 @@ # decrease and increase MTU of tunnel, checking that route exception PMTU # changes accordingly # +# - pmtu_vti6_udp_exception +# Same as pmtu_vti6_exception, but using ESP-in-UDP +# +# - pmtu_vti6_udp_routed_exception +# Same as pmtu_vti6_udp_routed_exception but with routing between vti +# endpoints +# # - pmtu_vti4_default_mtu # Set up vti4 tunnel on top of veth, in two namespaces with matching # endpoints. Check that MTU assigned to vti interface is the MTU of the @@ -224,6 +241,10 @@ tests=" pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1 pmtu_vti6_exception vti6: PMTU exceptions 0 pmtu_vti4_exception vti4: PMTU exceptions 0 + pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0 + pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0 + pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0 pmtu_vti4_default_mtu vti4: default MTU assignment 0 pmtu_vti6_default_mtu vti6: default MTU assignment 0 pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0 @@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}" ns_c="ip netns exec ${NS_C}" ns_r1="ip netns exec ${NS_R1}" ns_r2="ip netns exec ${NS_R2}" - # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). @@ -279,7 +299,6 @@ routes=" A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2 B default ${prefix6}:${b_r1}::2 " - USE_NH="no" # ns family nh id destination gateway nexthops=" @@ -326,6 +345,7 @@ dummy6_mask="64" err_buf= tcpdump_pids= +nettest_pids= err() { err_buf="${err_buf}${1} @@ -548,6 +568,14 @@ setup_vti6() { setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} } +setup_vti4routed() { + setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask} +} + +setup_vti6routed() { + setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask} +} + setup_vxlan_or_geneve() { type="${1}" a_addr="${2}" @@ -619,18 +647,36 @@ setup_xfrm() { proto=${1} veth_a_addr="${2}" veth_b_addr="${3}" + encap=${4} - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1 - run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1 + run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel - run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} + run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel } +setup_nettest_xfrm() { + which nettest >/dev/null + if [ $? -ne 0 ]; then + echo "'nettest' command not found; skipping tests" + return 1 + fi + + [ ${1} -eq 6 ] && proto="-6" || proto="" + port=${2} + + run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" + + run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 & + nettest_pids="${nettest_pids} $!" +} + setup_xfrm4() { setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} } @@ -639,6 +685,26 @@ setup_xfrm6() { setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} } +setup_xfrm4udp() { + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udp() { + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + +setup_xfrm4udprouted() { + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 4 4500 +} + +setup_xfrm6udprouted() { + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" + setup_nettest_xfrm 6 4500 +} + setup_routing_old() { for i in ${routes}; do [ "${ns}" = "" ] && ns="${i}" && continue @@ -823,6 +889,11 @@ cleanup() { done tcpdump_pids= + for pid in ${nettest_pids}; do + kill ${pid} + done + nettest_pids= + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do ip netns del ${n} 2> /dev/null done @@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() { return ${fail} } +test_pmtu_vti4_udp_exception() { + setup namespaces veth vti4 xfrm4udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_a ${veth_mtu} + mtu "${ns_b}" veth_b ${veth_mtu} + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now exceed link layer MTU by one byte, check that exception is created + # with the right PMTU value + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))" +} + +test_pmtu_vti6_udp_exception() { + setup namespaces veth vti6 xfrm6udp || return $ksft_skip + trace "${ns_a}" veth_a "${ns_b}" veth_b \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + fail=0 + + # Create route exception by exceeding link layer MTU + mtu "${ns_a}" veth_a 4000 + mtu "${ns_b}" veth_b 4000 + mtu "${ns_a}" vti6_a 5000 + mtu "${ns_b}" vti6_b 5000 + run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr} + + # Check that exception was created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1 + + # Decrease tunnel MTU, check for PMTU decrease in route exception + mtu "${ns_a}" vti6_a 3000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1 + + # Increase tunnel MTU, check for PMTU increase in route exception + mtu "${ns_a}" vti6_a 9000 + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1 + + return ${fail} +} + +test_pmtu_vti4_udp_routed_exception() { + setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti4_a "${ns_b}" vti4_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 20)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 28)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + mtu "${ns_a}" vti4_a ${vti_mtu} + mtu "${ns_b}" vti4_b ${vti_mtu} + + # Send DF packet without exceeding link layer MTU, check that no + # exception is created + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" +} + +test_pmtu_vti6_udp_routed_exception() { + setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip + trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \ + "${ns_a}" vti6_a "${ns_b}" vti6_b + + veth_mtu=1500 + vti_mtu=$((veth_mtu - 40)) + + # UDP SPI SN IV ICV pad length next header + esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1)) + ping_payload=$((esp_payload_rfc4106 - 48)) + + mtu "${ns_a}" veth_A-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-A ${veth_mtu} + mtu "${ns_b}" veth_B-R1 ${veth_mtu} + mtu "${ns_r1}" veth_R1-B ${veth_mtu} + + # mtu "${ns_a}" vti6_a ${vti_mtu} + # mtu "${ns_b}" vti6_b ${vti_mtu} + + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr} + + # Check that exception was not created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1 + + # Now decrease link layer MTU by 8 bytes on R1, check that exception is created + # with the right PMTU value + mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8)) + run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr} + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})" + check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))" + +} + test_pmtu_vti4_default_mtu() { setup namespaces veth vti4 || return $ksft_skip diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index 21091be70688..aee631c5284e 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -47,7 +47,7 @@ static void usage(const char *error) { if (error) printf("invalid option: %s\n", error); - printf("timestamping interface option*\n\n" + printf("timestamping <interface> [bind_phc_index] [option]*\n\n" "Options:\n" " IP_MULTICAST_LOOP - looping outgoing multicasts\n" " SO_TIMESTAMP - normal software time stamping, ms resolution\n" @@ -58,6 +58,7 @@ static void usage(const char *error) " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SOF_TIMESTAMPING_BIND_PHC - request to bind a PHC of PTP vclock\n" " SIOCGSTAMP - check last socket time stamp\n" " SIOCGSTAMPNS - more accurate socket time stamp\n" " PTPV2 - use PTPv2 messages\n"); @@ -311,7 +312,6 @@ static void recvpacket(int sock, int recvmsg_flags, int main(int argc, char **argv) { - int so_timestamping_flags = 0; int so_timestamp = 0; int so_timestampns = 0; int siocgstamp = 0; @@ -325,6 +325,8 @@ int main(int argc, char **argv) struct ifreq device; struct ifreq hwtstamp; struct hwtstamp_config hwconfig, hwconfig_requested; + struct so_timestamping so_timestamping_get = { 0, -1 }; + struct so_timestamping so_timestamping = { 0, -1 }; struct sockaddr_in addr; struct ip_mreq imr; struct in_addr iaddr; @@ -342,7 +344,12 @@ int main(int argc, char **argv) exit(1); } - for (i = 2; i < argc; i++) { + if (argc >= 3 && sscanf(argv[2], "%d", &so_timestamping.bind_phc) == 1) + val = 3; + else + val = 2; + + for (i = val; i < argc; i++) { if (!strcasecmp(argv[i], "SO_TIMESTAMP")) so_timestamp = 1; else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) @@ -356,17 +363,19 @@ int main(int argc, char **argv) else if (!strcasecmp(argv[i], "PTPV2")) ptpv2 = 1; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_TX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RX_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_SOFTWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) - so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + so_timestamping.flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_BIND_PHC")) + so_timestamping.flags |= SOF_TIMESTAMPING_BIND_PHC; else usage(argv[i]); } @@ -385,10 +394,10 @@ int main(int argc, char **argv) hwtstamp.ifr_data = (void *)&hwconfig; memset(&hwconfig, 0, sizeof(hwconfig)); hwconfig.tx_type = - (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_TX_HARDWARE) ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; hwconfig.rx_filter = - (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + (so_timestamping.flags & SOF_TIMESTAMPING_RX_HARDWARE) ? ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC : HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; hwconfig_requested = hwconfig; @@ -413,6 +422,9 @@ int main(int argc, char **argv) sizeof(struct sockaddr_in)) < 0) bail("bind"); + if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, interface, if_len)) + bail("bind device"); + /* set multicast group for outgoing packets */ inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ addr.sin_addr = iaddr; @@ -444,10 +456,9 @@ int main(int argc, char **argv) &enabled, sizeof(enabled)) < 0) bail("setsockopt SO_TIMESTAMPNS"); - if (so_timestamping_flags && - setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, - &so_timestamping_flags, - sizeof(so_timestamping_flags)) < 0) + if (so_timestamping.flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping, + sizeof(so_timestamping)) < 0) bail("setsockopt SO_TIMESTAMPING"); /* request IP_PKTINFO for debugging purposes */ @@ -468,14 +479,18 @@ int main(int argc, char **argv) else printf("SO_TIMESTAMPNS %d\n", val); - if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + len = sizeof(so_timestamping_get); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping_get, + &len) < 0) { printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", strerror(errno)); } else { - printf("SO_TIMESTAMPING %d\n", val); - if (val != so_timestamping_flags) - printf(" not the expected value %d\n", - so_timestamping_flags); + printf("SO_TIMESTAMPING flags %d, bind phc %d\n", + so_timestamping_get.flags, so_timestamping_get.bind_phc); + if (so_timestamping_get.flags != so_timestamping.flags || + so_timestamping_get.bind_phc != so_timestamping.bind_phc) + printf(" not expected, flags %d, bind phc %d\n", + so_timestamping.flags, so_timestamping.bind_phc); } /* send packets forever every five seconds */ diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index cd6430b39982..8748199ac109 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -5,7 +5,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ - ipip-conntrack-mtu.sh + ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh new file mode 100755 index 000000000000..e7d7bf13cff5 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh @@ -0,0 +1,167 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that UNREPLIED tcp conntrack will eventually timeout. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +waittime=20 +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup() { + ip netns pids $ns1 | xargs kill 2>/dev/null + ip netns pids $ns2 | xargs kill 2>/dev/null + + ip netns del $ns1 + ip netns del $ns2 +} + +ipv4() { + echo -n 192.168.$1.2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect") + if [ $? -ne 0 ]; then + echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 + ip netns exec $ns2 nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +# Create test namespaces +ip netns add $ns1 || exit 1 + +trap cleanup EXIT + +ip netns add $ns2 || exit 1 + +# Connect the namespace to the host using a veth pair +ip -net $ns1 link add name veth1 type veth peer name veth2 +ip -net $ns1 link set netns $ns2 dev veth2 + +ip -net $ns1 link set up dev lo +ip -net $ns2 link set up dev lo +ip -net $ns1 link set up dev veth1 +ip -net $ns2 link set up dev veth2 + +ip -net $ns2 addr add 10.11.11.2/24 dev veth2 +ip -net $ns2 route add default via 10.11.11.1 + +ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1 + +# add a rule inside NS so we enable conntrack +ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT + +ip -net $ns1 addr add 10.11.11.1/24 dev veth1 +ip -net $ns1 route add 10.99.99.99 via 10.11.11.2 + +# Check connectivity works +ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1 + +ip netns exec $ns2 nc -l -p 8080 < /dev/null & + +# however, conntrack entries are there + +ip netns exec $ns2 nft -f - <<EOF +table inet filter { + counter connreq { } + counter redir { } + chain input { + type filter hook input priority 0; policy accept; + ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept + ct state new ct status dnat tcp dport 8080 counter name "redir" accept + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nft rules" + exit 1 +fi + +ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10 + +echo "INFO: connect $ns1 -> $ns2 to the virtual ip" +ip netns exec $ns1 bash -c 'while true ; do + nc -p 60000 10.99.99.99 80 + sleep 1 + done' & + +sleep 1 + +ip netns exec $ns2 nft -f - <<EOF +table inet nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + ip daddr 10.99.99.99 tcp dport 80 redirect to :8080 + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nat redirect" + exit 1 +fi + +count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l) +if [ $count -eq 0 ]; then + echo "ERROR: $ns2 did not pick up tcp connection from peer" + exit 1 +fi + +echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect" +for i in $(seq 1 $waittime); do + echo -n "." + + sleep 1 + + count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) + if [ $count -gt 0 ]; then + echo + echo "PASS: redirection took effect after $i seconds" + break + fi + + m=$((i%20)) + if [ $m -eq 0 ]; then + echo " waited for $i seconds" + fi +done + +expect="packets 1 bytes 60" +check_counter "$ns2" "redir" "$expect" +if [ $? -ne 0 ]; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: redirection counter has expected values" +else + echo "ERROR: no tcp connection was redirected" +fi + +exit $ret diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index dee7a3d6c5a5..92bbc5a15c39 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -55,10 +55,27 @@ static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m, return true; } +static void reverse_bytes(void *data, int length) +{ + int i = 0; + int j = length - 1; + uint8_t temp; + uint8_t *ptr = data; + + while (i < j) { + temp = ptr[i]; + ptr[i] = ptr[j]; + ptr[j] = temp; + i++; + j--; + } +} + static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, uint8_t *q2) { struct q1q2_ctx ctx; + int len; if (!alloc_q1q2_ctx(s, m, &ctx)) { fprintf(stderr, "Not enough memory for Q1Q2 calculation\n"); @@ -89,8 +106,10 @@ static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, goto out; } - BN_bn2bin(ctx.q1, q1); - BN_bn2bin(ctx.q2, q2); + len = BN_bn2bin(ctx.q1, q1); + reverse_bytes(q1, len); + len = BN_bn2bin(ctx.q2, q2); + reverse_bytes(q2, len); free_q1q2_ctx(&ctx); return true; @@ -152,22 +171,6 @@ static RSA *gen_sign_key(void) return key; } -static void reverse_bytes(void *data, int length) -{ - int i = 0; - int j = length - 1; - uint8_t temp; - uint8_t *ptr = data; - - while (i < j) { - temp = ptr[i]; - ptr[i] = ptr[j]; - ptr[j] = temp; - i++; - j--; - } -} - enum mrtags { MRECREATE = 0x0045544145524345, MREADD = 0x0000000044444145, @@ -367,8 +370,6 @@ bool encl_measure(struct encl *encl) /* BE -> LE */ reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE); reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE); - reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE); - reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE); EVP_MD_CTX_destroy(ctx); RSA_free(key); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e363bdaff59d..2ea438e6b8b1 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area) static void anon_allocate_area(void **alloc_area) { - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) - err("posix_memalign() failed"); + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*alloc_area == MAP_FAILED) + err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) |